Fix lzma import error and use MarianMT translation

This commit is contained in:
Adolfo Reyna
2026-02-26 21:13:48 -05:00
parent 8e45daec87
commit 88cd83f87e

View File

@@ -1,15 +1,22 @@
import sys
from unittest.mock import MagicMock
# Workaround for missing _lzma in some Python builds
try:
import lzma
except ImportError:
sys.modules["_lzma"] = MagicMock()
import mlx_whisper import mlx_whisper
import numpy as np import numpy as np
import sounddevice as sd import sounddevice as sd
import queue import queue
import sys
import torch import torch
from silero_vad import load_silero_vad, get_speech_timestamps from silero_vad import load_silero_vad, get_speech_timestamps
from transformers import MarianMTModel, MarianTokenizer from transformers import MarianMTModel, MarianTokenizer
# Parameters # Parameters
WHISPER_MODEL = "mlx-community/whisper-small.en-mlx" WHISPER_MODEL = "mlx-community/whisper-small.en-mlx"
# Dedicated EN-ES translation model (very fast and accurate)
TRANSLATE_MODEL = "Helsinki-NLP/opus-mt-en-es" TRANSLATE_MODEL = "Helsinki-NLP/opus-mt-en-es"
CHANNELS = 1 CHANNELS = 1
SAMPLERATE = 16000 SAMPLERATE = 16000
@@ -77,7 +84,7 @@ def main():
original_text = result['text'].strip() original_text = result['text'].strip()
if original_text: if original_text:
# 2. Translate using dedicated MarianMT # 2. Translate
inputs = tokenizer(original_text, return_tensors="pt", padding=True).to(device) inputs = tokenizer(original_text, return_tensors="pt", padding=True).to(device)
with torch.no_grad(): with torch.no_grad():
translated_tokens = model.generate(**inputs) translated_tokens = model.generate(**inputs)