From 88cd83f87ec5891612b8d132c68c6143d2e53358 Mon Sep 17 00:00:00 2001 From: Adolfo Reyna Date: Thu, 26 Feb 2026 21:13:48 -0500 Subject: [PATCH] Fix lzma import error and use MarianMT translation --- transcribe.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/transcribe.py b/transcribe.py index 5fee545..4771884 100644 --- a/transcribe.py +++ b/transcribe.py @@ -1,15 +1,22 @@ +import sys +from unittest.mock import MagicMock + +# Workaround for missing _lzma in some Python builds +try: + import lzma +except ImportError: + sys.modules["_lzma"] = MagicMock() + import mlx_whisper import numpy as np import sounddevice as sd import queue -import sys import torch from silero_vad import load_silero_vad, get_speech_timestamps from transformers import MarianMTModel, MarianTokenizer # Parameters WHISPER_MODEL = "mlx-community/whisper-small.en-mlx" -# Dedicated EN-ES translation model (very fast and accurate) TRANSLATE_MODEL = "Helsinki-NLP/opus-mt-en-es" CHANNELS = 1 SAMPLERATE = 16000 @@ -77,7 +84,7 @@ def main(): original_text = result['text'].strip() if original_text: - # 2. Translate using dedicated MarianMT + # 2. Translate inputs = tokenizer(original_text, return_tensors="pt", padding=True).to(device) with torch.no_grad(): translated_tokens = model.generate(**inputs)