Created
June 1, 2024 11:49
-
-
Save timfel/0faf07aaf500912c669b0c68f5b60f0f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
from datasets import load_dataset | |
device = "cuda:0" | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
model_id = "openai/whisper-large-v3" | |
model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, | |
) | |
model.to(device) | |
processor = AutoProcessor.from_pretrained(model_id) | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model, | |
tokenizer=processor.tokenizer, | |
feature_extractor=processor.feature_extractor, | |
max_new_tokens=128, | |
chunk_length_s=30, | |
batch_size=16, | |
return_timestamps=True, | |
torch_dtype=torch_dtype, | |
device=device, | |
) | |
if __name__ == "__main__": | |
from tkinter import filedialog, simpledialog | |
language = simpledialog.askstring("Language?", "Which language (e.g. german, english, ...)") | |
if language: | |
filename = filedialog.askopenfilename(title="Choose file to transcribe") | |
if filename: | |
result = pipe(filename, generate_kwargs={"language": language}) | |
with filedialog.asksaveasfile(title="Choose file to save transcript to") as f: | |
f.write(result["text"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment