Fix lzma import error and use MarianMT translation
This commit is contained in:
@@ -1,15 +1,22 @@
|
||||
import sys
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
# Workaround for missing _lzma in some Python builds
|
||||
try:
|
||||
import lzma
|
||||
except ImportError:
|
||||
sys.modules["_lzma"] = MagicMock()
|
||||
|
||||
import mlx_whisper
|
||||
import numpy as np
|
||||
import sounddevice as sd
|
||||
import queue
|
||||
import sys
|
||||
import torch
|
||||
from silero_vad import load_silero_vad, get_speech_timestamps
|
||||
from transformers import MarianMTModel, MarianTokenizer
|
||||
|
||||
# Parameters
|
||||
WHISPER_MODEL = "mlx-community/whisper-small.en-mlx"
|
||||
# Dedicated EN-ES translation model (very fast and accurate)
|
||||
TRANSLATE_MODEL = "Helsinki-NLP/opus-mt-en-es"
|
||||
CHANNELS = 1
|
||||
SAMPLERATE = 16000
|
||||
@@ -77,7 +84,7 @@ def main():
|
||||
original_text = result['text'].strip()
|
||||
|
||||
if original_text:
|
||||
# 2. Translate using dedicated MarianMT
|
||||
# 2. Translate
|
||||
inputs = tokenizer(original_text, return_tensors="pt", padding=True).to(device)
|
||||
with torch.no_grad():
|
||||
translated_tokens = model.generate(**inputs)
|
||||
|
||||
Reference in New Issue
Block a user