From 37cb45c12fcf4aa9121075af6f672447d2d2e46e Mon Sep 17 00:00:00 2001 From: Adolfo Reyna Date: Thu, 26 Feb 2026 21:25:57 -0500 Subject: [PATCH] Add safety limits to transcription length and translation tokens to prevent memory exhaustion --- transcribe.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/transcribe.py b/transcribe.py index f8c7d42..a7405e5 100644 --- a/transcribe.py +++ b/transcribe.py @@ -103,13 +103,18 @@ def main(): original_text = result['text'].strip() if original_text: + # Limit the input length to avoid memory spikes or model glitches + if len(original_text) > 250: + original_text = original_text[:247] + "..." + print(f"\n[EN]: {original_text}") # 2. Translate to all targets for lang_name, (model, tokenizer) in translation_engines.items(): inputs = tokenizer(original_text, return_tensors="pt", padding=True).to(device) with torch.no_grad(): - translated_tokens = model.generate(**inputs) + # Added max_new_tokens to prevent runaway generation + translated_tokens = model.generate(**inputs, max_new_tokens=150) translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) print(f"[{lang_name[:2].upper()}]: {translated_text}")