commit 0180b1f29e5f0a81c7dadfde04f09a49c903ac86 Author: Adolfo Reyna Date: Thu Feb 26 20:53:39 2026 -0500 Initial commit: basic Whisper live transcription script diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8b73e14 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/\n*.pyc\n.DS_Store diff --git a/__pycache__/transcribe.cpython-310.pyc b/__pycache__/transcribe.cpython-310.pyc new file mode 100644 index 0000000..3ddde21 Binary files /dev/null and b/__pycache__/transcribe.cpython-310.pyc differ diff --git a/transcribe.py b/transcribe.py new file mode 100644 index 0000000..f3ddd85 --- /dev/null +++ b/transcribe.py @@ -0,0 +1,66 @@ +import whisper +import numpy as np +import sounddevice as sd +import queue +import sys + +# Parameters +MODEL_TYPE = "tiny.en" +CHANNELS = 1 +SAMPLERATE = 16000 +BLOCK_SIZE = 8000 # 0.5 seconds of audio per block +TRANSCRIBE_RATE = 2 # Process every 2 seconds + +audio_queue = queue.Queue() + +def callback(indata, frames, time, status): + if status: + print(status, file=sys.stderr) + audio_queue.put(indata.copy()) + +def main(): + print(f"Loading Whisper model '{MODEL_TYPE}'...") + model = whisper.load_model(MODEL_TYPE) + print("Model loaded.") + + print("\nAvailable Audio Devices:") + devices = sd.query_devices() + print(devices) + + # Try to find a sensible default if the system one is tricky + default_device = sd.default.device[0] + print(f"\nUsing default input device index: {default_device}") + + print("\nStarting live transcription... (Press Ctrl+C to stop)") + print("Note: On macOS, you may need to grant Microphone permissions to your terminal.\n") + + audio_buffer = np.array([], dtype=np.float32) + + try: + with sd.InputStream(samplerate=SAMPLERATE, channels=CHANNELS, callback=callback, blocksize=BLOCK_SIZE): + while True: + # Pull all available data from the queue + while not audio_queue.empty(): + data = audio_queue.get() + audio_buffer = np.append(audio_buffer, data.flatten()) + + # If we have enough audio, transcribe it + if len(audio_buffer) >= SAMPLERATE * TRANSCRIBE_RATE: + # Transcribe the current buffer + # fp16=False is used for CPU execution + result = model.transcribe(audio_buffer, fp16=False, language="en") + text = result['text'].strip() + + if text: + print(f"Transcription: {text}") + + # Clear buffer for next chunk + audio_buffer = np.array([], dtype=np.float32) + + except KeyboardInterrupt: + print("\nStopped by user.") + except Exception as e: + print(f"\nError: {e}") + +if __name__ == "__main__": + main()