From 0180b1f29e5f0a81c7dadfde04f09a49c903ac86 Mon Sep 17 00:00:00 2001 From: Adolfo Reyna Date: Thu, 26 Feb 2026 20:53:39 -0500 Subject: [PATCH] Initial commit: basic Whisper live transcription script --- .gitignore | 1 + __pycache__/transcribe.cpython-310.pyc | Bin 0 -> 1589 bytes transcribe.py | 66 +++++++++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 .gitignore create mode 100644 __pycache__/transcribe.cpython-310.pyc create mode 100644 transcribe.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8b73e14 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/\n*.pyc\n.DS_Store diff --git a/__pycache__/transcribe.cpython-310.pyc b/__pycache__/transcribe.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ddde216e0949b9d27dc0ca731bf825175884b9c GIT binary patch literal 1589 zcmZux&2QT_6elT4mgSE)ZQ8WJu<6jDGqxG%3gl1>Mbpf~f+kKE2Pn#mC!36;M3z*N zYAVAiT4dK?_Z0=4YgIBow zD`HJvA>v(~1t`yd&ZZ49oD%Lx4wLAar5u8c`$j zp_#cE8!*%zfSDz$8c^dy!toiwNLzC{Lv#BaonyEP^--Jj+PI+mVITbz!Mo z6^_zeNLkc^jEBkKMJ6)AtTfXF9VKbrrpyUr9_WBM`!bjaRZtyHggF=JO#Ojm9c=ig zp=A)Okqn2Tljemx3ZiHjjE-8yN(X@ACccJc6Q&~=ADiKsmnq#@1LrlwMdNpfllRss zn6JqkTsy}nhO^i~oR%EK?UK?%dyF(abhvXyPVjVBR}NiW)t;{DI zo+Gsmbm<(O5KZT;8J>~(G6V|FE%@@DzBa?B7|vf`K}WVcn4!a!88+|8YiHJpHM4mA zHk#i69dt(JFJM<=M&F_LQynHT$opSCl|m_hL(Ayv4PPg|(n;E$ z?(`BZ9{4-4KM6)V{crp{$zYoMu@KydjUf}Xza5Tbk{%?n@WGTxs1&GGh7a=6*Jdvc zqK?;AoPjVB9eGGbLna>Uv!%6Zybz-;y>YU*T=<{LtMR z>^*e(Oj4Z})>!Dm3BbIV8_j{%A}*{5U}41H)DvX%u-!cyRSx~^u6scyR@Fw7oU5W z?e7a&Q1hJ&2VOUew7h1l!J_=e3uu@fItjw~2F%nNKnr`=#V&Diix9Gc$zqP(e_M9T zs$)u;xMh*QZeG?`n()&Vmv(;Ff_^1Gf!)4#4c6O`R$uS;qI{O)-?H!Qc~TOm&i_NAC9!j4TpR b3&1NMz(wFw*{MbGqJxjbwcR@&rkDQ#TZ)-T literal 0 HcmV?d00001 diff --git a/transcribe.py b/transcribe.py new file mode 100644 index 0000000..f3ddd85 --- /dev/null +++ b/transcribe.py @@ -0,0 +1,66 @@ +import whisper +import numpy as np +import sounddevice as sd +import queue +import sys + +# Parameters +MODEL_TYPE = "tiny.en" +CHANNELS = 1 +SAMPLERATE = 16000 +BLOCK_SIZE = 8000 # 0.5 seconds of audio per block +TRANSCRIBE_RATE = 2 # Process every 2 seconds + +audio_queue = queue.Queue() + +def callback(indata, frames, time, status): + if status: + print(status, file=sys.stderr) + audio_queue.put(indata.copy()) + +def main(): + print(f"Loading Whisper model '{MODEL_TYPE}'...") + model = whisper.load_model(MODEL_TYPE) + print("Model loaded.") + + print("\nAvailable Audio Devices:") + devices = sd.query_devices() + print(devices) + + # Try to find a sensible default if the system one is tricky + default_device = sd.default.device[0] + print(f"\nUsing default input device index: {default_device}") + + print("\nStarting live transcription... (Press Ctrl+C to stop)") + print("Note: On macOS, you may need to grant Microphone permissions to your terminal.\n") + + audio_buffer = np.array([], dtype=np.float32) + + try: + with sd.InputStream(samplerate=SAMPLERATE, channels=CHANNELS, callback=callback, blocksize=BLOCK_SIZE): + while True: + # Pull all available data from the queue + while not audio_queue.empty(): + data = audio_queue.get() + audio_buffer = np.append(audio_buffer, data.flatten()) + + # If we have enough audio, transcribe it + if len(audio_buffer) >= SAMPLERATE * TRANSCRIBE_RATE: + # Transcribe the current buffer + # fp16=False is used for CPU execution + result = model.transcribe(audio_buffer, fp16=False, language="en") + text = result['text'].strip() + + if text: + print(f"Transcription: {text}") + + # Clear buffer for next chunk + audio_buffer = np.array([], dtype=np.float32) + + except KeyboardInterrupt: + print("\nStopped by user.") + except Exception as e: + print(f"\nError: {e}") + +if __name__ == "__main__": + main()