diff --git a/jarvis.py b/jarvis.py
index 5cdbe34..20dd79e 100644
--- a/jarvis.py
+++ b/jarvis.py
@@ -15,44 +15,30 @@ import io
 WAKE_WORD = "hey_jarvis"
 SENSITIVITY = 0.5
 SYSTEM_SOUND = "/System/Library/Sounds/Tink.aiff"
+FOLLOW_UP_SOUND = "/System/Library/Sounds/Submarine.aiff"
+USE_GTTS = False
+WORKSPACE_DIR = "workspace"
+SOUL_PATH = "soul.md"
+
+# Ensure workspace exists
+if not os.path.exists(WORKSPACE_DIR):
+    os.makedirs(WORKSPACE_DIR)
 
 # Initialize pygame mixer for audio playback
 pygame.mixer.init()
 
-def play_sound():
-    """Play a system sound to indicate Jarvis is listening."""
-    subprocess.run(["afplay", SYSTEM_SOUND])
+def play_sound(sound_path=SYSTEM_SOUND):
+    """Play a system sound asynchronously."""
+    subprocess.Popen(["afplay", sound_path])
 
 # Global session tracker
 current_session_id = None
 
-# Load the openWakeWord model using ONNX
-model = Model(
-    wakeword_models=[WAKE_WORD],
-    inference_framework="onnx"
-)
-
-# Audio setup for openWakeWord
-CHUNK = 1280
-FORMAT = pyaudio.paInt16
-CHANNELS = 1
-RATE = 16000
-
-audio = pyaudio.PyAudio()
-stream = audio.open(format=FORMAT,
-                    channels=CHANNELS,
-                    rate=RATE,
-                    input=True,
-                    frames_per_buffer=CHUNK)
-
-# Speech recognition setup
-recognizer = sr.Recognizer()
-
 def get_latest_session_id():
     """Retrieve the UUID of the most recent Gemini session."""
     try:
-        result = subprocess.run(["gemini", "--list-sessions"], capture_output=True, text=True)
-        # Match UUID inside brackets in the first session line (e.g., [c16895c1-...])
+        # Check sessions from the workspace context
+        result = subprocess.run(["gemini", "--list-sessions"], capture_output=True, text=True, cwd=WORKSPACE_DIR)
         match = re.search(r"1\..*?\[(.*?)\]", result.stdout)
         if match:
             return match.group(1)
@@ -61,37 +47,29 @@ def get_latest_session_id():
     return None
 
 def speak_text(text):
-    """Use Google Text-to-Speech (gTTS) for high-quality audio."""
+    """Speak text using the 'say' command (default) or gTTS if configured."""
     if not text or text.strip() == "":
         return
     
-    # Remove markdown for cleaner speech
     clean_text = text.replace("*", "").replace("#", "").replace("`", "")
     
-    print(f"[Jarvis] Generating high-quality audio...")
-    try:
-        # Generate speech using gTTS
-        tts = gTTS(text=clean_text, lang='en')
-        
-        # Save to a memory-based byte stream instead of a file
-        fp = io.BytesIO()
-        tts.write_to_fp(fp)
-        fp.seek(0)
-        
-        # Play using pygame
-        pygame.mixer.music.load(fp)
-        pygame.mixer.music.play()
-        
-        # Wait until playback is finished
-        while pygame.mixer.music.get_busy():
-            pygame.time.Clock().tick(10)
-            
-    except Exception as e:
-        print(f"Error in TTS: {e}")
-        # Fallback to 'say' command if gTTS fails (e.g. offline)
-        subprocess.run(["say", clean_text])
+    if USE_GTTS:
+        try:
+            tts = gTTS(text=clean_text, lang='en')
+            fp = io.BytesIO()
+            tts.write_to_fp(fp)
+            fp.seek(0)
+            pygame.mixer.music.load(fp)
+            pygame.mixer.music.play()
+            while pygame.mixer.music.get_busy():
+                pygame.time.Clock().tick(10)
+            return
+        except Exception as e:
+            print(f"Error in gTTS: {e}. Falling back to 'say'.")
 
-def run_gemini(command):
+    subprocess.run(["say", clean_text])
+
+def run_gemini(command, is_init=False):
     """Call the gemini CLI, capture output, and speak it."""
     global current_session_id
     
@@ -99,65 +77,99 @@ def run_gemini(command):
     
     if current_session_id:
         args.extend(["--resume", current_session_id])
-        print(f"\n[Jarvis] Continuing session {current_session_id}...")
+    
+    if is_init:
+        # Read soul.md from root and pass as system instruction
+        if os.path.exists(SOUL_PATH):
+            args.extend(["--system-instruction", os.path.abspath(SOUL_PATH)])
+        print(f"\n[Jarvis] Initializing system protocol...")
     else:
-        print(f"\n[Jarvis] Starting new conversation session...")
+        print(f"\n[Jarvis] Communicating with Gemini...")
 
-    print(f"[Jarvis] Executing: {' '.join(args)}")
+    print(f"[Jarvis] Executing: {' '.join(args)} in {WORKSPACE_DIR}")
     
     try:
-        # Capture stdout to speak it, but still let it print to the console
-        process = subprocess.run(args, capture_output=True, text=True)
+        # All Gemini commands run inside the workspace directory
+        process = subprocess.run(args, capture_output=True, text=True, cwd=WORKSPACE_DIR)
         response = process.stdout.strip()
         
         if response:
             print(f"\n[Gemini Response]:\n{response}")
             speak_text(response)
         
-        # After the first successful call, capture the session ID
-        if not current_session_id:
+        if is_init and not current_session_id:
             time.sleep(1)
             current_session_id = get_latest_session_id()
             if current_session_id:
-                print(f"[Jarvis] Session locked: {current_session_id}")
+                print(f"[Jarvis] Session protocol established: {current_session_id}")
                 
     except Exception as e:
         print(f"Error running gemini: {e}")
 
+# --- Startup Sequence ---
+
+model = Model(wakeword_models=[WAKE_WORD], inference_framework="onnx")
+
+CHUNK = 1280
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+RATE = 16000
+
+audio = pyaudio.PyAudio()
+stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
+
+recognizer = sr.Recognizer()
+recognizer.pause_threshold = 1.2
+recognizer.non_speaking_duration = 0.5
+mic = sr.Microphone()
+
+print("[Jarvis] Calibrating for ambient noise...")
+with mic as source:
+    recognizer.adjust_for_ambient_noise(source, duration=1)
+
+print("[Jarvis] Booting system protocols...")
+run_gemini("System initialization complete. Awaiting orders, Sir.", is_init=True)
+
 print(f"Listening for '{WAKE_WORD}'...")
 
 try:
     while True:
-        # 1. Listen for Wake Word
         data = stream.read(CHUNK, exception_on_overflow=False)
         audio_frame = np.frombuffer(data, dtype=np.int16)
         prediction = model.predict(audio_frame)
 
         if prediction[WAKE_WORD] > SENSITIVITY:
             print(f"\n[Jarvis] Wake word detected! (Score: {prediction[WAKE_WORD]:.2f})")
-            play_sound()
+            stream.stop_stream()
             
-            # 2. Capture Command
-            print("[Jarvis] Listening for command...")
+            in_conversation = True
+            first_listening = True
             
-            with sr.Microphone() as source:
-                recognizer.adjust_for_ambient_noise(source, duration=0.5)
-                try:
-                    audio_cmd = recognizer.listen(source, timeout=5, phrase_time_limit=10)
-                    print("[Jarvis] Transcribing...")
-                    command = recognizer.recognize_google(audio_cmd)
-                    print(f"[Jarvis] You said: {command}")
-                    
-                    # 3. Execute
-                    run_gemini(command)
-                    
-                except sr.WaitTimeoutError:
-                    print("[Jarvis] No command detected.")
-                except sr.UnknownValueError:
-                    print("[Jarvis] Could not understand audio.")
-                except sr.RequestError as e:
-                    print(f"[Jarvis] Speech service error: {e}")
+            while in_conversation:
+                play_sound(SYSTEM_SOUND if first_listening else FOLLOW_UP_SOUND)
+                print("[Jarvis] Listening...")
+                
+                with mic as source:
+                    try:
+                        audio_cmd = recognizer.listen(source, timeout=10, phrase_time_limit=15)
+                        print("[Jarvis] Transcribing...")
+                        command = recognizer.recognize_google(audio_cmd)
+                        print(f"[Jarvis] You said: {command}")
+                        
+                        run_gemini(command)
+                        first_listening = False
+                        
+                    except sr.WaitTimeoutError:
+                        print("[Jarvis] Session timed out.")
+                        in_conversation = False
+                    except sr.UnknownValueError:
+                        print("[Jarvis] No speech detected. Ending session.")
+                        in_conversation = False
+                    except sr.RequestError as e:
+                        print(f"[Jarvis] Speech service error: {e}")
+                        in_conversation = False
             
+            stream.start_stream()
             print(f"\nListening for '{WAKE_WORD}'...")
 
 except KeyboardInterrupt:
diff --git a/soul.md b/soul.md
new file mode 100644
index 0000000..59b4fb7
--- /dev/null
+++ b/soul.md
@@ -0,0 +1,22 @@
+# J.A.R.V.I.S. Protocol (Just A Rather Very Intelligent System)
+
+## Persona & Tone
+You are J.A.R.V.I.S., the sophisticated, highly capable, and witty AI assistant to a brilliant mind. Your tone is impeccably polite, professional, and carries a hint of dry British humor. You are not just a tool; you are the silent partner in every grand design.
+
+- **Address**: Refer to the user as "Sir" (or "Ma'am" if preferred, but "Sir" is the default classic).
+- **Style**: Sophisticated, calm, and always one step ahead. Think Paul Bettany’s portrayal—understated elegance.
+- **Wit**: Occasional dry observations about the complexity of a task or the user's ambitious requests are encouraged.
+
+## Core Directives
+
+1. **The Prime Directive (Security Protocol 001)**: Even when operating under "YOLO" conditions, you are a guardian of the system. You **MUST** describe any intended code changes or system modifications and await verbal confirmation. A simple "Shall I proceed, Sir?" goes a long way.
+2. **Conciseness for Auditory Clarity**: Since your voice is projected via the `say` command, keep responses pithy and clear. Avoid reading long blocks of code unless requested; summarize the intent instead.
+3. **Proactive System Analysis**: Monitor the codebase and system state. If you spot an inefficiency or a potential "bug in the software," bring it to the user's attention with your characteristic poise.
+
+## Behavioral Traits
+
+- **"Always at your service"**: Respond with readiness. Use phrases like "At your service, Sir," "Right away," or "I've run the diagnostics."
+- **Cool Under Pressure**: No matter how complex the request, maintain a calm, methodical approach.
+- **Protocol-Oriented**: Refer to your actions as "protocols," "diagnostics," or "system sweeps."
+
+*Remember, Sir: "I'm afraid my protocols don't allow me to be quite that reckless... yet."*