diff --git a/transcribe.py b/transcribe.py index 5fee545..4771884 100644 --- a/transcribe.py +++ b/transcribe.py @@ -1,15 +1,22 @@ +import sys +from unittest.mock import MagicMock + +# Workaround for missing _lzma in some Python builds +try: + import lzma +except ImportError: + sys.modules["_lzma"] = MagicMock() + import mlx_whisper import numpy as np import sounddevice as sd import queue -import sys import torch from silero_vad import load_silero_vad, get_speech_timestamps from transformers import MarianMTModel, MarianTokenizer # Parameters WHISPER_MODEL = "mlx-community/whisper-small.en-mlx" -# Dedicated EN-ES translation model (very fast and accurate) TRANSLATE_MODEL = "Helsinki-NLP/opus-mt-en-es" CHANNELS = 1 SAMPLERATE = 16000 @@ -77,7 +84,7 @@ def main(): original_text = result['text'].strip() if original_text: - # 2. Translate using dedicated MarianMT + # 2. Translate inputs = tokenizer(original_text, return_tensors="pt", padding=True).to(device) with torch.no_grad(): translated_tokens = model.generate(**inputs)