diff --git a/AGENTS.md b/AGENTS.md
index 6946d441..1a79f6f7 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -15,7 +15,7 @@ All API keys live on a Cloudflare Worker proxy — nothing sensitive ships in th
 - **Framework**: SwiftUI (macOS native) with AppKit bridging for menu bar panel and cursor overlay
 - **Pattern**: MVVM with `@StateObject` / `@Published` state management
 - **AI Chat**: Claude (Sonnet 4.6 default, Opus 4.6 optional) via Cloudflare Worker proxy with SSE streaming
-- **Speech-to-Text**: AssemblyAI real-time streaming (`u3-rt-pro` model) via websocket, with OpenAI and Apple Speech as fallbacks
+- **Speech-to-Text**: AssemblyAI real-time streaming via websocket, using `u3-rt-pro` for English and `whisper-rt` for Chinese, with OpenAI and Apple Speech as fallbacks
 - **Text-to-Speech**: ElevenLabs (`eleven_flash_v2_5` model) via Cloudflare Worker proxy
 - **Screen Capture**: ScreenCaptureKit (macOS 14.2+), multi-monitor support
 - **Voice Input**: Push-to-talk via `AVAudioEngine` + pluggable transcription-provider layer. System-wide keyboard shortcut via listen-only CGEvent tap.
@@ -34,7 +34,7 @@ The app never calls external APIs directly. All requests go through a Cloudflare
 | `POST /transcribe-token` | `streaming.assemblyai.com/v3/token` | Fetches a short-lived (480s) AssemblyAI websocket token |
 
 Worker secrets: `ANTHROPIC_API_KEY`, `ASSEMBLYAI_API_KEY`, `ELEVENLABS_API_KEY`
-Worker vars: `ELEVENLABS_VOICE_ID`
+Worker vars: `ELEVENLABS_VOICE_ID`, `ELEVENLABS_CHINESE_VOICE_ID` (optional)
 
 ### Key Architecture Decisions
 
@@ -61,7 +61,7 @@ Worker vars: `ELEVENLABS_VOICE_ID`
 | `CompanionScreenCaptureUtility.swift` | ~132 | Multi-monitor screenshot capture using ScreenCaptureKit. Returns labeled image data for each connected display. |
 | `BuddyDictationManager.swift` | ~866 | Push-to-talk voice pipeline. Handles microphone capture via `AVAudioEngine`, provider-aware permission checks, keyboard/button dictation sessions, transcript finalization, shortcut parsing, contextual keyterms, and live audio-level reporting for waveform feedback. |
 | `BuddyTranscriptionProvider.swift` | ~100 | Protocol surface and provider factory for voice transcription backends. Resolves provider based on `VoiceTranscriptionProvider` in Info.plist — AssemblyAI, OpenAI, or Apple Speech. |
-| `AssemblyAIStreamingTranscriptionProvider.swift` | ~478 | Streaming transcription provider. Fetches temp tokens from the Cloudflare Worker, opens an AssemblyAI v3 websocket, streams PCM16 audio, tracks turn-based transcripts, and delivers finalized text on key-up. Shares a single URLSession across all sessions. |
+| `AssemblyAIStreamingTranscriptionProvider.swift` | ~541 | Streaming transcription provider. Fetches temp tokens from the Cloudflare Worker, opens an AssemblyAI v3 websocket, streams PCM16 audio, switches between `u3-rt-pro` for English and `whisper-rt` for Chinese, tracks turn-based transcripts, and delivers finalized text on key-up. Shares a single URLSession across all sessions. |
 | `OpenAIAudioTranscriptionProvider.swift` | ~317 | Upload-based transcription provider. Buffers push-to-talk audio locally, uploads as WAV on release, returns finalized transcript. |
 | `AppleSpeechTranscriptionProvider.swift` | ~147 | Local fallback transcription provider backed by Apple's Speech framework. |
 | `BuddyAudioConversionSupport.swift` | ~108 | Audio conversion helpers. Converts live mic buffers to PCM16 mono audio and builds WAV payloads for upload-based providers. |
diff --git a/README.md b/README.md
index d7dbf74b..8f0fbcff 100644
--- a/README.md
+++ b/README.md
@@ -56,13 +56,16 @@ npx wrangler secret put ASSEMBLYAI_API_KEY
 npx wrangler secret put ELEVENLABS_API_KEY
 ```
 
-For the ElevenLabs voice ID, open `wrangler.toml` and set it there (it's not sensitive):
+For the ElevenLabs voice IDs, open `wrangler.toml` and set them there (they're not sensitive):
 
 ```toml
 [vars]
 ELEVENLABS_VOICE_ID = "your-voice-id-here"
+ELEVENLABS_CHINESE_VOICE_ID = "optional-chinese-voice-id"
 ```
 
+`ELEVENLABS_VOICE_ID` stays the default voice. `ELEVENLABS_CHINESE_VOICE_ID` is optional and only used when the app is set to Chinese voice mode. If you leave it blank, the default voice is reused.
+
 Deploy it:
 
 ```bash
@@ -87,6 +90,7 @@ ANTHROPIC_API_KEY=sk-ant-...
 ASSEMBLYAI_API_KEY=...
 ELEVENLABS_API_KEY=...
 ELEVENLABS_VOICE_ID=...
+ELEVENLABS_CHINESE_VOICE_ID=...
 ```
 
 Then update the proxy URLs in the Swift code to point to `http://localhost:8787` instead of the deployed Worker URL while developing. Grep for `clicky-proxy` to find them all.
@@ -127,7 +131,7 @@ The app will appear in your menu bar (not the dock). Click the icon to open the
 
 If you want the full technical breakdown, read `CLAUDE.md`. But here's the short version:
 
-**Menu bar app** (no dock icon) with two `NSPanel` windows — one for the control panel dropdown, one for the full-screen transparent cursor overlay. Push-to-talk streams audio over a websocket to AssemblyAI, sends the transcript + screenshot to Claude via streaming SSE, and plays the response through ElevenLabs TTS. Claude can embed `[POINT:x,y:label:screenN]` tags in its responses to make the cursor fly to specific UI elements across multiple monitors. All three APIs are proxied through a Cloudflare Worker.
+**Menu bar app** (no dock icon) with two `NSPanel` windows — one for the control panel dropdown, one for the full-screen transparent cursor overlay. Push-to-talk streams audio over a websocket to AssemblyAI, sends the transcript + screenshot to Claude via streaming SSE, and plays the response through ElevenLabs TTS. English uses AssemblyAI `u3-rt-pro`, while Chinese switches to `whisper-rt` so Chinese speech can be transcribed reliably. Claude can embed `[POINT:x,y:label:screenN]` tags in its responses to make the cursor fly to specific UI elements across multiple monitors. All three APIs are proxied through a Cloudflare Worker.
 
 ## Project structure
 
diff --git a/install-clicky.sh b/install-clicky.sh
new file mode 100644
index 00000000..02e24b5d
--- /dev/null
+++ b/install-clicky.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+APP_PATH="$HOME/Library/Developer/Xcode/DerivedData/leanring-buddy-dvwepfgqqgvpjhbjcbybcytzaawt/Index.noindex/Build/Products/Debug/Clicky.app"
+
+if [ -d "$APP_PATH" ]; then
+    rm -rf "/Applications/Clicky.app" 2>/dev/null
+    cp -R "$APP_PATH" /Applications/
+    echo "✅ Clicky installed to /Applications!"
+    echo "Now open Clicky from Applications folder and grant permissions."
+else
+    echo "❌ Clicky.app not found. Please build in Xcode first (⌘R)"
+fi
diff --git a/leanring-buddy.xcodeproj/project.pbxproj b/leanring-buddy.xcodeproj/project.pbxproj
index 75e57261..4c4e65ee 100644
--- a/leanring-buddy.xcodeproj/project.pbxproj
+++ b/leanring-buddy.xcodeproj/project.pbxproj
@@ -34,9 +34,22 @@
 		28F22CD62F56440300A0FC59 /* leanring-buddyUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "leanring-buddyUITests.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
 /* End PBXFileReference section */
 
+/* Begin PBXFileSystemSynchronizedBuildFileExceptionSet section */
+		AA11CC112F7000010039DA55 /* Exceptions for "leanring-buddy" folder in "leanring-buddy" target */ = {
+			isa = PBXFileSystemSynchronizedBuildFileExceptionSet;
+			membershipExceptions = (
+				Info.plist,
+			);
+			target = 28F22CBE2F56440300A0FC59 /* leanring-buddy */;
+		};
+/* End PBXFileSystemSynchronizedBuildFileExceptionSet section */
+
 /* Begin PBXFileSystemSynchronizedRootGroup section */
 		28F22CC12F56440300A0FC59 /* leanring-buddy */ = {
 			isa = PBXFileSystemSynchronizedRootGroup;
+			exceptions = (
+				AA11CC112F7000010039DA55 /* Exceptions for "leanring-buddy" folder in "leanring-buddy" target */,
+			);
 			path = "leanring-buddy";
 			sourceTree = "<group>";
 		};
@@ -411,7 +424,7 @@
 				CODE_SIGN_STYLE = Automatic;
 				COMBINE_HIDPI_IMAGES = YES;
 				CURRENT_PROJECT_VERSION = 1;
-				DEVELOPMENT_TEAM = 2UDAY4J48G;
+				DEVELOPMENT_TEAM = ZZY6K862N2;
 				ENABLE_APP_SANDBOX = NO;
 				ENABLE_HARDENED_RUNTIME = YES;
 				ENABLE_OUTGOING_NETWORK_CONNECTIONS = YES;
@@ -449,7 +462,7 @@
 				CODE_SIGN_STYLE = Automatic;
 				COMBINE_HIDPI_IMAGES = YES;
 				CURRENT_PROJECT_VERSION = 1;
-				DEVELOPMENT_TEAM = 2UDAY4J48G;
+				DEVELOPMENT_TEAM = ZZY6K862N2;
 				ENABLE_APP_SANDBOX = NO;
 				ENABLE_HARDENED_RUNTIME = YES;
 				ENABLE_OUTGOING_NETWORK_CONNECTIONS = YES;
@@ -484,7 +497,7 @@
 				BUNDLE_LOADER = "$(TEST_HOST)";
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
-				DEVELOPMENT_TEAM = 6D7X9GGZAW;
+				DEVELOPMENT_TEAM = ZZY6K862N2;
 				GENERATE_INFOPLIST_FILE = YES;
 				MACOSX_DEPLOYMENT_TARGET = 14.2;
 				MARKETING_VERSION = 1.0;
@@ -505,7 +518,7 @@
 				BUNDLE_LOADER = "$(TEST_HOST)";
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
-				DEVELOPMENT_TEAM = 6D7X9GGZAW;
+				DEVELOPMENT_TEAM = ZZY6K862N2;
 				GENERATE_INFOPLIST_FILE = YES;
 				MACOSX_DEPLOYMENT_TARGET = 14.2;
 				MARKETING_VERSION = 1.0;
@@ -525,7 +538,7 @@
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
-				DEVELOPMENT_TEAM = 6D7X9GGZAW;
+				DEVELOPMENT_TEAM = ZZY6K862N2;
 				GENERATE_INFOPLIST_FILE = YES;
 				MARKETING_VERSION = 1.0;
 				PRODUCT_BUNDLE_IDENTIFIER = "com.yourcompany.leanring-buddyUITests";
@@ -544,7 +557,7 @@
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
-				DEVELOPMENT_TEAM = 6D7X9GGZAW;
+				DEVELOPMENT_TEAM = ZZY6K862N2;
 				GENERATE_INFOPLIST_FILE = YES;
 				MARKETING_VERSION = 1.0;
 				PRODUCT_BUNDLE_IDENTIFIER = "com.yourcompany.leanring-buddyUITests";
diff --git a/leanring-buddy.xcodeproj/xcuserdata/mac.xcuserdatad/xcschemes/xcschememanagement.plist b/leanring-buddy.xcodeproj/xcuserdata/mac.xcuserdatad/xcschemes/xcschememanagement.plist
new file mode 100644
index 00000000..6c13490a
--- /dev/null
+++ b/leanring-buddy.xcodeproj/xcuserdata/mac.xcuserdatad/xcschemes/xcschememanagement.plist
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>SchemeUserState</key>
+	<dict>
+		<key>leanring-buddy.xcscheme_^#shared#^_</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>0</integer>
+		</dict>
+	</dict>
+</dict>
+</plist>
diff --git a/leanring-buddy/AppleSpeechTranscriptionProvider.swift b/leanring-buddy/AppleSpeechTranscriptionProvider.swift
index 600594fa..20e41ee2 100644
--- a/leanring-buddy/AppleSpeechTranscriptionProvider.swift
+++ b/leanring-buddy/AppleSpeechTranscriptionProvider.swift
@@ -25,11 +25,12 @@ final class AppleSpeechTranscriptionProvider: BuddyTranscriptionProvider {
 
     func startStreamingSession(
         keyterms: [String],
+        languageCode: String?,
         onTranscriptUpdate: @escaping (String) -> Void,
         onFinalTranscriptReady: @escaping (String) -> Void,
         onError: @escaping (Error) -> Void
     ) async throws -> any BuddyStreamingTranscriptionSession {
-        guard let speechRecognizer = Self.makeBestAvailableSpeechRecognizer() else {
+        guard let speechRecognizer = Self.makeBestAvailableSpeechRecognizer(languageCode: languageCode) else {
             throw AppleSpeechTranscriptionProviderError(message: "dictation is not available on this mac.")
         }
 
@@ -41,14 +42,23 @@ final class AppleSpeechTranscriptionProvider: BuddyTranscriptionProvider {
         )
     }
 
-    private static func makeBestAvailableSpeechRecognizer() -> SFSpeechRecognizer? {
-        let preferredLocales = [
-            Locale.autoupdatingCurrent,
-            Locale(identifier: "en-US")
-        ]
+    private static func makeBestAvailableSpeechRecognizer(languageCode: String?) -> SFSpeechRecognizer? {
+        var preferredLocales: [Locale] = []
+
+        if let languageCode {
+            preferredLocales.append(Locale(identifier: languageCode))
+        }
+
+        preferredLocales.append(Locale.autoupdatingCurrent)
+        preferredLocales.append(Locale(identifier: "en-US"))
+
+        if languageCode != "zh-CN" {
+            preferredLocales.append(Locale(identifier: "zh-CN"))
+        }
 
         for preferredLocale in preferredLocales {
-            if let speechRecognizer = SFSpeechRecognizer(locale: preferredLocale) {
+            if let speechRecognizer = SFSpeechRecognizer(locale: preferredLocale),
+               speechRecognizer.isAvailable {
                 return speechRecognizer
             }
         }
diff --git a/leanring-buddy/AssemblyAIStreamingTranscriptionProvider.swift b/leanring-buddy/AssemblyAIStreamingTranscriptionProvider.swift
index d21286b6..3dfbb6c2 100644
--- a/leanring-buddy/AssemblyAIStreamingTranscriptionProvider.swift
+++ b/leanring-buddy/AssemblyAIStreamingTranscriptionProvider.swift
@@ -19,7 +19,7 @@ struct AssemblyAIStreamingTranscriptionProviderError: LocalizedError {
 final class AssemblyAIStreamingTranscriptionProvider: BuddyTranscriptionProvider {
     /// URL for the Cloudflare Worker endpoint that returns a short-lived
     /// AssemblyAI streaming token. The real API key never leaves the server.
-    private static let tokenProxyURL = "https://your-worker-name.your-subdomain.workers.dev/transcribe-token"
+    private static let tokenProxyURL = "https://clicky-proxy.clicky-mark.workers.dev/transcribe-token"
 
     let displayName = "AssemblyAI"
     let requiresSpeechRecognitionPermission = false
@@ -35,11 +35,11 @@ final class AssemblyAIStreamingTranscriptionProvider: BuddyTranscriptionProvider
 
     func startStreamingSession(
         keyterms: [String],
+        languageCode: String?,
         onTranscriptUpdate: @escaping (String) -> Void,
         onFinalTranscriptReady: @escaping (String) -> Void,
         onError: @escaping (Error) -> Void
     ) async throws -> any BuddyStreamingTranscriptionSession {
-        // Fetch a fresh temporary token from the proxy before each session
         let temporaryToken = try await fetchTemporaryToken()
         print("🎙️ AssemblyAI: fetched temporary token (\(temporaryToken.prefix(20))...)")
 
@@ -48,6 +48,7 @@ final class AssemblyAIStreamingTranscriptionProvider: BuddyTranscriptionProvider
             temporaryToken: temporaryToken,
             urlSession: sharedWebSocketURLSession,
             keyterms: keyterms,
+            languageCode: languageCode,
             onTranscriptUpdate: onTranscriptUpdate,
             onFinalTranscriptReady: onFinalTranscriptReady,
             onError: onError
@@ -85,6 +86,53 @@ final class AssemblyAIStreamingTranscriptionProvider: BuddyTranscriptionProvider
 }
 
 private final class AssemblyAIStreamingTranscriptionSession: NSObject, BuddyStreamingTranscriptionSession {
+    private enum StreamingSpeechModelConfiguration {
+        case universalRealtimePro
+        case whisperRealtime
+
+        init(languageCode: String?) {
+            let normalizedLanguageCode = languageCode?
+                .trimmingCharacters(in: .whitespacesAndNewlines)
+                .lowercased()
+
+            if let normalizedLanguageCode,
+               !normalizedLanguageCode.isEmpty,
+               normalizedLanguageCode != "en" {
+                self = .whisperRealtime
+                return
+            }
+
+            self = .universalRealtimePro
+        }
+
+        var modelIdentifier: String {
+            switch self {
+            case .universalRealtimePro:
+                return "u3-rt-pro"
+            case .whisperRealtime:
+                return "whisper-rt"
+            }
+        }
+
+        var supportsExplicitLanguageCode: Bool {
+            switch self {
+            case .universalRealtimePro:
+                return true
+            case .whisperRealtime:
+                return false
+            }
+        }
+
+        var shouldEnableLanguageDetection: Bool {
+            switch self {
+            case .universalRealtimePro:
+                return false
+            case .whisperRealtime:
+                return true
+            }
+        }
+    }
+
     private struct MessageEnvelope: Decodable {
         let type: String
     }
@@ -117,6 +165,7 @@ private final class AssemblyAIStreamingTranscriptionSession: NSObject, BuddyStre
     private let apiKey: String?
     private let temporaryToken: String?
     private let keyterms: [String]
+    private let languageCode: String?
     private let onTranscriptUpdate: (String) -> Void
     private let onFinalTranscriptReady: (String) -> Void
     private let onError: (Error) -> Void
@@ -142,6 +191,7 @@ private final class AssemblyAIStreamingTranscriptionSession: NSObject, BuddyStre
         temporaryToken: String?,
         urlSession: URLSession,
         keyterms: [String],
+        languageCode: String?,
         onTranscriptUpdate: @escaping (String) -> Void,
         onFinalTranscriptReady: @escaping (String) -> Void,
         onError: @escaping (Error) -> Void
@@ -150,6 +200,7 @@ private final class AssemblyAIStreamingTranscriptionSession: NSObject, BuddyStre
         self.temporaryToken = temporaryToken
         self.urlSession = urlSession
         self.keyterms = keyterms
+        self.languageCode = languageCode
         self.onTranscriptUpdate = onTranscriptUpdate
         self.onFinalTranscriptReady = onFinalTranscriptReady
         self.onError = onError
@@ -158,7 +209,8 @@ private final class AssemblyAIStreamingTranscriptionSession: NSObject, BuddyStre
     func open() async throws {
         let websocketURL = try Self.makeWebsocketURL(
             temporaryToken: temporaryToken,
-            keyterms: keyterms
+            keyterms: keyterms,
+            languageCode: languageCode
         )
 
         var websocketRequest = URLRequest(url: websocketURL)
@@ -436,7 +488,8 @@ private final class AssemblyAIStreamingTranscriptionSession: NSObject, BuddyStre
 
     private static func makeWebsocketURL(
         temporaryToken: String?,
-        keyterms: [String]
+        keyterms: [String],
+        languageCode: String?
     ) throws -> URL {
         guard var websocketURLComponents = URLComponents(string: websocketBaseURLString) else {
             throw AssemblyAIStreamingTranscriptionProviderError(
@@ -444,13 +497,29 @@ private final class AssemblyAIStreamingTranscriptionSession: NSObject, BuddyStre
             )
         }
 
+        let streamingSpeechModelConfiguration = StreamingSpeechModelConfiguration(
+            languageCode: languageCode
+        )
+
         var queryItems = [
             URLQueryItem(name: "sample_rate", value: "16000"),
             URLQueryItem(name: "encoding", value: "pcm_s16le"),
             URLQueryItem(name: "format_turns", value: "true"),
-            URLQueryItem(name: "speech_model", value: "u3-rt-pro")
+            URLQueryItem(
+                name: "speech_model",
+                value: streamingSpeechModelConfiguration.modelIdentifier
+            )
         ]
 
+        if streamingSpeechModelConfiguration.shouldEnableLanguageDetection {
+            queryItems.append(URLQueryItem(name: "language_detection", value: "true"))
+        }
+
+        if streamingSpeechModelConfiguration.supportsExplicitLanguageCode,
+           let languageCode {
+            queryItems.append(URLQueryItem(name: "language_code", value: languageCode))
+        }
+
         let normalizedKeyterms = keyterms
             .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
             .filter { !$0.isEmpty }
diff --git a/leanring-buddy/BuddyDictationManager.swift b/leanring-buddy/BuddyDictationManager.swift
index 5bca2677..eedbfbba 100644
--- a/leanring-buddy/BuddyDictationManager.swift
+++ b/leanring-buddy/BuddyDictationManager.swift
@@ -265,6 +265,7 @@ final class BuddyDictationManager: NSObject, ObservableObject {
     private let transcriptionProvider: any BuddyTranscriptionProvider
     private let audioEngine = AVAudioEngine()
     private var activeTranscriptionSession: (any BuddyStreamingTranscriptionSession)?
+    var languageCode: String?
     private var activeStartSource: BuddyDictationStartSource?
     private var draftCallbacks: BuddyDictationDraftCallbacks?
     private var draftTextBeforeCurrentDictation = ""
@@ -519,6 +520,7 @@ final class BuddyDictationManager: NSObject, ObservableObject {
 
         let activeTranscriptionSession = try await transcriptionProvider.startStreamingSession(
             keyterms: buildTranscriptionKeyterms(),
+            languageCode: languageCode,
             onTranscriptUpdate: { [weak self] transcriptText in
                 Task { @MainActor in
                     self?.latestRecognizedText = transcriptText
diff --git a/leanring-buddy/BuddyTranscriptionProvider.swift b/leanring-buddy/BuddyTranscriptionProvider.swift
index 0a75715d..8c057c24 100644
--- a/leanring-buddy/BuddyTranscriptionProvider.swift
+++ b/leanring-buddy/BuddyTranscriptionProvider.swift
@@ -23,6 +23,7 @@ protocol BuddyTranscriptionProvider {
 
     func startStreamingSession(
         keyterms: [String],
+        languageCode: String?,
         onTranscriptUpdate: @escaping (String) -> Void,
         onFinalTranscriptReady: @escaping (String) -> Void,
         onError: @escaping (Error) -> Void
diff --git a/leanring-buddy/ClaudeAPI.swift b/leanring-buddy/ClaudeAPI.swift
index 0c7070b5..125da23b 100644
--- a/leanring-buddy/ClaudeAPI.swift
+++ b/leanring-buddy/ClaudeAPI.swift
@@ -203,7 +203,7 @@ class ClaudeAPI {
                 accumulatedResponseText += textChunk
                 // Send the accumulated text so far to the UI for progressive rendering
                 let currentAccumulatedText = accumulatedResponseText
-                await onTextChunk(currentAccumulatedText)
+                onTextChunk(currentAccumulatedText)
             }
         }
 
diff --git a/leanring-buddy/CompanionManager.swift b/leanring-buddy/CompanionManager.swift
index 0234cf19..039c6764 100644
--- a/leanring-buddy/CompanionManager.swift
+++ b/leanring-buddy/CompanionManager.swift
@@ -7,6 +7,7 @@
 //  exposes observable voice state for the panel UI.
 //
 
+@preconcurrency import AVFAudio
 import AVFoundation
 import Combine
 import Foundation
@@ -23,6 +24,8 @@ enum CompanionVoiceState {
 
 @MainActor
 final class CompanionManager: ObservableObject {
+    private static let screenContentPermissionUserDefaultsKey = "hasScreenContentPermission"
+
     @Published private(set) var voiceState: CompanionVoiceState = .idle
     @Published private(set) var lastTranscript: String?
     @Published private(set) var currentAudioPowerLevel: CGFloat = 0
@@ -61,6 +64,10 @@ final class CompanionManager: ObservableObject {
 
     private var onboardingMusicPlayer: AVAudioPlayer?
     private var onboardingMusicFadeTimer: Timer?
+    private var onboardingMusicFadeStepsRemaining = 0
+    private var onboardingMusicVolumeDecrement: Float = 0
+    private var onboardingPromptStreamTask: Task<Void, Never>?
+    private let fallbackSpeechSynthesizer = AVSpeechSynthesizer()
 
     let buddyDictationManager = BuddyDictationManager()
     let globalPushToTalkShortcutMonitor = GlobalPushToTalkShortcutMonitor()
@@ -70,7 +77,7 @@ final class CompanionManager: ObservableObject {
 
     /// Base URL for the Cloudflare Worker proxy. All API requests route
     /// through this so keys never ship in the app binary.
-    private static let workerBaseURL = "https://your-worker-name.your-subdomain.workers.dev"
+    private static let workerBaseURL = "https://clicky-proxy.clicky-mark.workers.dev"
 
     private lazy var claudeAPI: ClaudeAPI = {
         return ClaudeAPI(proxyURL: "\(Self.workerBaseURL)/chat", model: selectedModel)
@@ -93,6 +100,7 @@ final class CompanionManager: ObservableObject {
     private var audioPowerCancellable: AnyCancellable?
     private var accessibilityCheckTimer: Timer?
     private var pendingKeyboardShortcutStartTask: Task<Void, Never>?
+    private var screenContentPermissionValidationTask: Task<Void, Never>?
     /// Scheduled hide for transient cursor mode — cancelled if the user
     /// speaks again before the delay elapses.
     private var transientHideTask: Task<Void, Never>?
@@ -116,6 +124,73 @@ final class CompanionManager: ObservableObject {
         claudeAPI.model = model
     }
 
+    /// Supported voice languages for transcription, AI responses, and TTS.
+    enum VoiceLanguage: String, CaseIterable, Identifiable {
+        case english = "en"
+        case chinese = "zh"
+
+        var id: String { rawValue }
+
+        var displayName: String {
+            switch self {
+            case .english: return "English"
+            case .chinese: return "中文"
+            }
+        }
+
+        var locale: Locale {
+            switch self {
+            case .english: return Locale(identifier: "en-US")
+            case .chinese: return Locale(identifier: "zh-CN")
+            }
+        }
+
+        var transcriptionLanguageCode: String? {
+            switch self {
+            case .english: return nil
+            case .chinese: return "zh"
+            }
+        }
+
+        var textToSpeechLanguageCode: String? {
+            switch self {
+            case .english: return nil
+            case .chinese: return "zh"
+            }
+        }
+
+        var responseSystemPromptInstructions: String {
+            switch self {
+            case .english:
+                return """
+                - reply in english unless the user clearly asks for another language.
+                - all lowercase, casual, warm. no emojis.
+                """
+            case .chinese:
+                return """
+                - reply in simplified chinese unless the user clearly asks for another language.
+                - write the way spoken mandarin sounds in conversation: natural, warm, and compact.
+                - chinese does not need lowercase styling. keep punctuation natural for speech.
+                - if a technical term is much easier to recognize in english, you can mention the english term once alongside the chinese explanation.
+                """
+            }
+        }
+    }
+
+    /// The voice language used for transcription and TTS. Persisted to UserDefaults.
+    @Published var selectedVoiceLanguage: VoiceLanguage = {
+        if let savedLanguage = UserDefaults.standard.string(forKey: "selectedVoiceLanguage"),
+           let language = VoiceLanguage(rawValue: savedLanguage) {
+            return language
+        }
+        return .english
+    }()
+
+    func setSelectedVoiceLanguage(_ language: VoiceLanguage) {
+        selectedVoiceLanguage = language
+        UserDefaults.standard.set(language.rawValue, forKey: "selectedVoiceLanguage")
+    }
+
     /// User preference for whether the Clicky cursor should be shown.
     /// When toggled off, the overlay is hidden and push-to-talk is disabled.
     /// Persisted to UserDefaults so the choice survives app restarts.
@@ -233,6 +308,8 @@ final class CompanionManager: ObservableObject {
     private func stopOnboardingMusic() {
         onboardingMusicFadeTimer?.invalidate()
         onboardingMusicFadeTimer = nil
+        onboardingMusicFadeStepsRemaining = 0
+        onboardingMusicVolumeDecrement = 0
         onboardingMusicPlayer?.stop()
         onboardingMusicPlayer = nil
     }
@@ -252,7 +329,9 @@ final class CompanionManager: ObservableObject {
 
             // After 1m 30s, fade the music out over 3s
             onboardingMusicFadeTimer = Timer.scheduledTimer(withTimeInterval: 90.0, repeats: false) { [weak self] _ in
-                self?.fadeOutOnboardingMusic()
+                Task { @MainActor [weak self] in
+                    self?.fadeOutOnboardingMusic()
+                }
             }
         } catch {
             print("⚠️ Clicky: Failed to play onboarding music: \(error)")
@@ -265,22 +344,38 @@ final class CompanionManager: ObservableObject {
         let fadeSteps = 30
         let fadeDuration: Double = 3.0
         let stepInterval = fadeDuration / Double(fadeSteps)
-        let volumeDecrement = player.volume / Float(fadeSteps)
-        var stepsRemaining = fadeSteps
+        onboardingMusicFadeStepsRemaining = fadeSteps
+        onboardingMusicVolumeDecrement = player.volume / Float(fadeSteps)
 
         onboardingMusicFadeTimer = Timer.scheduledTimer(withTimeInterval: stepInterval, repeats: true) { [weak self] timer in
-            stepsRemaining -= 1
-            player.volume -= volumeDecrement
-
-            if stepsRemaining <= 0 {
-                timer.invalidate()
-                player.stop()
-                self?.onboardingMusicPlayer = nil
-                self?.onboardingMusicFadeTimer = nil
+            Task { @MainActor [weak self] in
+                self?.handleOnboardingMusicFadeTimerTick(timer)
             }
         }
     }
 
+    private func handleOnboardingMusicFadeTimerTick(_ timer: Timer) {
+        guard let onboardingMusicPlayer else {
+            timer.invalidate()
+            onboardingMusicFadeTimer = nil
+            onboardingMusicFadeStepsRemaining = 0
+            onboardingMusicVolumeDecrement = 0
+            return
+        }
+
+        onboardingMusicFadeStepsRemaining -= 1
+        onboardingMusicPlayer.volume -= onboardingMusicVolumeDecrement
+
+        if onboardingMusicFadeStepsRemaining <= 0 {
+            timer.invalidate()
+            onboardingMusicPlayer.stop()
+            self.onboardingMusicPlayer = nil
+            onboardingMusicFadeTimer = nil
+            onboardingMusicFadeStepsRemaining = 0
+            onboardingMusicVolumeDecrement = 0
+        }
+    }
+
     func clearDetectedElementLocation() {
         detectedElementScreenLocation = nil
         detectedElementDisplayFrame = nil
@@ -292,6 +387,8 @@ final class CompanionManager: ObservableObject {
         buddyDictationManager.cancelCurrentDictation()
         overlayWindowManager.hideOverlay()
         transientHideTask?.cancel()
+        screenContentPermissionValidationTask?.cancel()
+        onboardingPromptStreamTask?.cancel()
 
         currentResponseTask?.cancel()
         currentResponseTask = nil
@@ -317,7 +414,8 @@ final class CompanionManager: ObservableObject {
             globalPushToTalkShortcutMonitor.stop()
         }
 
-        hasScreenRecordingPermission = WindowPositionManager.hasScreenRecordingPermission()
+        hasScreenRecordingPermission = WindowPositionManager
+            .shouldTreatScreenRecordingPermissionAsGrantedForSessionLaunch()
 
         let micAuthStatus = AVCaptureDevice.authorizationStatus(for: .audio)
         hasMicrophonePermission = micAuthStatus == .authorized
@@ -339,10 +437,13 @@ final class CompanionManager: ObservableObject {
         if !previouslyHadMicrophone && hasMicrophonePermission {
             ClickyAnalytics.trackPermissionGranted(permission: "microphone")
         }
-        // Screen content permission is persisted — once the user has approved the
-        // SCShareableContent picker, we don't need to re-check it.
-        if !hasScreenContentPermission {
-            hasScreenContentPermission = UserDefaults.standard.bool(forKey: "hasScreenContentPermission")
+        if !hasScreenRecordingPermission {
+            hasScreenContentPermission = false
+            screenContentPermissionValidationTask?.cancel()
+            screenContentPermissionValidationTask = nil
+        } else if hasScreenContentPermission
+            || UserDefaults.standard.bool(forKey: Self.screenContentPermissionUserDefaultsKey) {
+            validateScreenContentPermissionAgainstSystemIfNeeded()
         }
 
         if !previouslyHadAll && allPermissionsGranted {
@@ -378,7 +479,7 @@ final class CompanionManager: ObservableObject {
                     isRequestingScreenContent = false
                     guard didCapture else { return }
                     hasScreenContentPermission = true
-                    UserDefaults.standard.set(true, forKey: "hasScreenContentPermission")
+                    UserDefaults.standard.set(true, forKey: Self.screenContentPermissionUserDefaultsKey)
                     ClickyAnalytics.trackPermissionGranted(permission: "screen_content")
 
                     // If onboarding was already completed, show the cursor overlay now
@@ -390,7 +491,10 @@ final class CompanionManager: ObservableObject {
                 }
             } catch {
                 print("⚠️ Screen content permission request failed: \(error)")
-                await MainActor.run { isRequestingScreenContent = false }
+                await MainActor.run {
+                    isRequestingScreenContent = false
+                    invalidateScreenContentPermissionGrant()
+                }
             }
         }
     }
@@ -512,6 +616,7 @@ final class CompanionManager: ObservableObject {
 
             pendingKeyboardShortcutStartTask?.cancel()
             pendingKeyboardShortcutStartTask = Task {
+                buddyDictationManager.languageCode = selectedVoiceLanguage.transcriptionLanguageCode
                 await buddyDictationManager.startPushToTalkFromKeyboardShortcut(
                     currentDraftText: "",
                     updateDraftText: { _ in
@@ -541,12 +646,15 @@ final class CompanionManager: ObservableObject {
 
     // MARK: - Companion Prompt
 
-    private static let companionVoiceResponseSystemPrompt = """
+    private static func companionVoiceResponseSystemPrompt(
+        for voiceLanguage: VoiceLanguage
+    ) -> String {
+        """
     you're clicky, a friendly always-on companion that lives in the user's menu bar. the user just spoke to you via push-to-talk and you can see their screen(s). your reply will be spoken aloud via text-to-speech, so write the way you'd actually talk. this is an ongoing conversation — you remember everything they've said before.
 
     rules:
     - default to one or two sentences. be direct and dense. BUT if the user asks you to explain more, go deeper, or elaborate, then go all out — give a thorough, detailed explanation with no length limit.
-    - all lowercase, casual, warm. no emojis.
+    \(voiceLanguage.responseSystemPromptInstructions)
     - write for the ear, not the eye. short sentences. no lists, bullet points, markdown, or formatting — just natural speech.
     - don't use abbreviations or symbols that sound weird read aloud. write "for example" not "e.g.", spell out small numbers.
     - if the user's question relates to what's on their screen, reference specific things you see.
@@ -575,6 +683,7 @@ final class CompanionManager: ObservableObject {
     - user asks how to commit in xcode: "see that source control menu up top? click that and hit commit, or you can use command option c as a shortcut. [POINT:285,11:source control]"
     - element is on screen 2 (not where cursor is): "that's over on your other monitor — see the terminal window? [POINT:400,300:terminal:screen2]"
     """
+    }
 
     // MARK: - AI Response Pipeline
 
@@ -612,7 +721,7 @@ final class CompanionManager: ObservableObject {
 
                 let (fullResponseText, _) = try await claudeAPI.analyzeImageStreaming(
                     images: labeledImages,
-                    systemPrompt: Self.companionVoiceResponseSystemPrompt,
+                    systemPrompt: Self.companionVoiceResponseSystemPrompt(for: selectedVoiceLanguage),
                     conversationHistory: historyForAPI,
                     userPrompt: transcript,
                     onTextChunk: { _ in
@@ -701,6 +810,7 @@ final class CompanionManager: ObservableObject {
                 // until the audio actually starts playing, then switch to responding.
                 if !spokenText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
                     do {
+                        elevenLabsTTSClient.languageCode = selectedVoiceLanguage.textToSpeechLanguageCode
                         try await elevenLabsTTSClient.speakText(spokenText)
                         // speakText returns after player.play() — audio is now playing
                         voiceState = .responding
@@ -713,6 +823,12 @@ final class CompanionManager: ObservableObject {
             } catch is CancellationError {
                 // User spoke again — response was interrupted
             } catch {
+                if Self.isScreenContentPermissionDenied(error) {
+                    invalidateScreenContentPermissionGrant()
+                    print("⚠️ Screen content permission is no longer valid: \(error.localizedDescription)")
+                    voiceState = .idle
+                    return
+                }
                 ClickyAnalytics.trackResponseError(error: error.localizedDescription)
                 print("⚠️ Companion response error: \(error)")
                 speakCreditsErrorFallback()
@@ -725,6 +841,73 @@ final class CompanionManager: ObservableObject {
         }
     }
 
+    private func validateScreenContentPermissionAgainstSystemIfNeeded() {
+        guard screenContentPermissionValidationTask == nil else { return }
+
+        screenContentPermissionValidationTask = Task { [weak self] in
+            let isGranted = await Self.canCaptureScreenContentRightNow()
+
+            await MainActor.run {
+                guard let self else { return }
+                self.screenContentPermissionValidationTask = nil
+
+                if isGranted {
+                    self.hasScreenContentPermission = true
+                    UserDefaults.standard.set(
+                        true,
+                        forKey: Self.screenContentPermissionUserDefaultsKey
+                    )
+                } else {
+                    self.invalidateScreenContentPermissionGrant()
+                }
+            }
+        }
+    }
+
+    private func invalidateScreenContentPermissionGrant() {
+        hasScreenContentPermission = false
+        UserDefaults.standard.removeObject(forKey: Self.screenContentPermissionUserDefaultsKey)
+    }
+
+    private static func canCaptureScreenContentRightNow() async -> Bool {
+        do {
+            let content = try await SCShareableContent.excludingDesktopWindows(
+                false,
+                onScreenWindowsOnly: true
+            )
+
+            guard let display = content.displays.first else {
+                return false
+            }
+
+            let filter = SCContentFilter(display: display, excludingWindows: [])
+            let configuration = SCStreamConfiguration()
+            configuration.width = 64
+            configuration.height = 64
+
+            let image = try await SCScreenshotManager.captureImage(
+                contentFilter: filter,
+                configuration: configuration
+            )
+
+            return image.width > 0 && image.height > 0
+        } catch {
+            print("⚠️ Screen content validation failed: \(error)")
+            return false
+        }
+    }
+
+    private static func isScreenContentPermissionDenied(_ error: Error) -> Bool {
+        let nsError = error as NSError
+
+        if nsError.domain == "com.apple.ScreenCaptureKit.SCStreamErrorDomain",
+           nsError.code == -3801 {
+            return true
+        }
+
+        return nsError.localizedDescription.contains("TCC")
+    }
+
     /// If the cursor is in transient mode (user toggled "Show Clicky" off),
     /// waits for TTS playback and any pointing animation to finish, then
     /// fades out the overlay after a 1-second pause. Cancelled automatically
@@ -755,13 +938,14 @@ final class CompanionManager: ObservableObject {
         }
     }
 
-    /// Speaks a hardcoded error message using macOS system TTS when API
-    /// credits run out. Uses NSSpeechSynthesizer so it works even when
-    /// ElevenLabs is down.
+    /// Speaks a hardcoded error message using the system speech synthesizer
+    /// when API credits run out, so the user still hears a fallback even if
+    /// ElevenLabs never returns audio.
     private func speakCreditsErrorFallback() {
         let utterance = "I'm all out of credits. Please DM Farza and tell him to bring me back to life."
-        let synthesizer = NSSpeechSynthesizer()
-        synthesizer.startSpeaking(utterance)
+        let speechUtterance = AVSpeechUtterance(string: utterance)
+        fallbackSpeechSynthesizer.stopSpeaking(at: .immediate)
+        fallbackSpeechSynthesizer.speak(speechUtterance)
         voiceState = .responding
     }
 
@@ -856,7 +1040,9 @@ final class CompanionManager: ObservableObject {
             queue: .main
         ) { [weak self] in
             ClickyAnalytics.trackOnboardingDemoTriggered()
-            self?.performOnboardingDemoInteraction()
+            Task { @MainActor [weak self] in
+                self?.performOnboardingDemoInteraction()
+            }
         }
 
         // Fade out and clean up when the video finishes
@@ -865,16 +1051,14 @@ final class CompanionManager: ObservableObject {
             object: player.currentItem,
             queue: .main
         ) { [weak self] _ in
-            guard let self else { return }
             ClickyAnalytics.trackOnboardingVideoCompleted()
-            self.onboardingVideoOpacity = 0.0
-            // Wait for the 2s fade-out animation to complete before tearing down
-            DispatchQueue.main.asyncAfter(deadline: .now() + 2.0) {
+            Task { @MainActor [weak self] in
+                guard let self else { return }
+                self.onboardingVideoOpacity = 0.0
+                try? await Task.sleep(nanoseconds: 2_000_000_000)
                 self.tearDownOnboardingVideo()
-                // After the video disappears, stream in the prompt to try talking
-                DispatchQueue.main.asyncAfter(deadline: .now() + 0.3) {
-                    self.startOnboardingPromptStream()
-                }
+                try? await Task.sleep(nanoseconds: 300_000_000)
+                self.startOnboardingPromptStream()
             }
         }
     }
@@ -895,6 +1079,7 @@ final class CompanionManager: ObservableObject {
 
     private func startOnboardingPromptStream() {
         let message = "press control + option and introduce yourself"
+        onboardingPromptStreamTask?.cancel()
         onboardingPromptText = ""
         showOnboardingPrompt = true
         onboardingPromptOpacity = 0.0
@@ -903,26 +1088,26 @@ final class CompanionManager: ObservableObject {
             onboardingPromptOpacity = 1.0
         }
 
-        var currentIndex = 0
-        Timer.scheduledTimer(withTimeInterval: 0.03, repeats: true) { timer in
-            guard currentIndex < message.count else {
-                timer.invalidate()
-                // Auto-dismiss after 10 seconds
-                DispatchQueue.main.asyncAfter(deadline: .now() + 10.0) {
-                    guard self.showOnboardingPrompt else { return }
-                    withAnimation(.easeOut(duration: 0.3)) {
-                        self.onboardingPromptOpacity = 0.0
-                    }
-                    DispatchQueue.main.asyncAfter(deadline: .now() + 0.35) {
-                        self.showOnboardingPrompt = false
-                        self.onboardingPromptText = ""
-                    }
-                }
-                return
+        onboardingPromptStreamTask = Task { @MainActor [weak self] in
+            guard let self else { return }
+
+            for character in message {
+                guard !Task.isCancelled else { return }
+                self.onboardingPromptText.append(character)
+                try? await Task.sleep(nanoseconds: 30_000_000)
             }
-            let index = message.index(message.startIndex, offsetBy: currentIndex)
-            self.onboardingPromptText.append(message[index])
-            currentIndex += 1
+
+            try? await Task.sleep(nanoseconds: 10_000_000_000)
+            guard !Task.isCancelled, self.showOnboardingPrompt else { return }
+
+            withAnimation(.easeOut(duration: 0.3)) {
+                self.onboardingPromptOpacity = 0.0
+            }
+
+            try? await Task.sleep(nanoseconds: 350_000_000)
+            guard !Task.isCancelled else { return }
+            self.showOnboardingPrompt = false
+            self.onboardingPromptText = ""
         }
     }
 
diff --git a/leanring-buddy/CompanionPanelView.swift b/leanring-buddy/CompanionPanelView.swift
index 76789b4c..11d16a8c 100644
--- a/leanring-buddy/CompanionPanelView.swift
+++ b/leanring-buddy/CompanionPanelView.swift
@@ -31,6 +31,10 @@ struct CompanionPanelView: View {
 
                 modelPickerRow
                     .padding(.horizontal, 16)
+
+                voiceLanguagePickerRow
+                    .padding(.horizontal, 16)
+                    .padding(.top, 8)
             }
 
             if !companionManager.allPermissionsGranted {
@@ -256,9 +260,7 @@ struct CompanionPanelView: View {
 
             screenRecordingPermissionRow
 
-            if companionManager.hasScreenRecordingPermission {
-                screenContentPermissionRow
-            }
+            screenContentPermissionRow
 
         }
     }
@@ -392,6 +394,7 @@ struct CompanionPanelView: View {
 
     private var screenContentPermissionRow: some View {
         let isGranted = companionManager.hasScreenContentPermission
+        let canRequestScreenContentPermission = companionManager.hasScreenRecordingPermission
         return HStack {
             HStack(spacing: 8) {
                 Image(systemName: "eye")
@@ -399,9 +402,21 @@ struct CompanionPanelView: View {
                     .foregroundColor(isGranted ? DS.Colors.textTertiary : DS.Colors.warning)
                     .frame(width: 16)
 
-                Text("Screen Content")
-                    .font(.system(size: 13, weight: .medium))
-                    .foregroundColor(DS.Colors.textSecondary)
+                VStack(alignment: .leading, spacing: 1) {
+                    Text("Screen Content")
+                        .font(.system(size: 13, weight: .medium))
+                        .foregroundColor(DS.Colors.textSecondary)
+
+                    if !isGranted {
+                        Text(
+                            canRequestScreenContentPermission
+                                ? "Approve the ScreenCaptureKit picker after tapping Grant"
+                                : "Grant Screen Recording first"
+                        )
+                        .font(.system(size: 10))
+                        .foregroundColor(DS.Colors.textTertiary)
+                    }
+                }
             }
 
             Spacer()
@@ -431,6 +446,8 @@ struct CompanionPanelView: View {
                 }
                 .buttonStyle(.plain)
                 .pointerCursor()
+                .disabled(!canRequestScreenContentPermission)
+                .opacity(canRequestScreenContentPermission ? 1 : 0.45)
             }
         }
         .padding(.vertical, 6)
@@ -641,6 +658,55 @@ struct CompanionPanelView: View {
         .pointerCursor()
     }
 
+    // MARK: - Voice Language Picker
+
+    private var voiceLanguagePickerRow: some View {
+        HStack {
+            Text("Voice")
+                .font(.system(size: 13, weight: .medium))
+                .foregroundColor(DS.Colors.textSecondary)
+
+            Spacer()
+
+            HStack(spacing: 0) {
+                ForEach(CompanionManager.VoiceLanguage.allCases) { language in
+                    voiceLanguageOptionButton(
+                        label: language.displayName,
+                        language: language
+                    )
+                }
+            }
+            .background(
+                RoundedRectangle(cornerRadius: 6, style: .continuous)
+                    .fill(Color.white.opacity(0.06))
+            )
+            .overlay(
+                RoundedRectangle(cornerRadius: 6, style: .continuous)
+                    .stroke(DS.Colors.borderSubtle, lineWidth: 0.5)
+            )
+        }
+        .padding(.vertical, 4)
+    }
+
+    private func voiceLanguageOptionButton(label: String, language: CompanionManager.VoiceLanguage) -> some View {
+        let isSelected = companionManager.selectedVoiceLanguage == language
+        return Button(action: {
+            companionManager.setSelectedVoiceLanguage(language)
+        }) {
+            Text(label)
+                .font(.system(size: 11, weight: .medium))
+                .foregroundColor(isSelected ? DS.Colors.textPrimary : DS.Colors.textTertiary)
+                .padding(.horizontal, 10)
+                .padding(.vertical, 5)
+                .background(
+                    RoundedRectangle(cornerRadius: 5, style: .continuous)
+                        .fill(isSelected ? Color.white.opacity(0.1) : Color.clear)
+                )
+        }
+        .buttonStyle(.plain)
+        .pointerCursor()
+    }
+
     // MARK: - DM Farza Button
 
     private var dmFarzaButton: some View {
diff --git a/leanring-buddy/CompanionResponseOverlay.swift b/leanring-buddy/CompanionResponseOverlay.swift
index a11c6240..7755160e 100644
--- a/leanring-buddy/CompanionResponseOverlay.swift
+++ b/leanring-buddy/CompanionResponseOverlay.swift
@@ -177,7 +177,7 @@ final class CompanionResponseOverlayManager {
             context.duration = 0.4
             overlayPanel.animator().alphaValue = 0
         }, completionHandler: { [weak self] in
-            Task { @MainActor in
+            Task { @MainActor [weak self] in
                 self?.hideOverlay()
             }
         })
diff --git a/leanring-buddy/ElevenLabsTTSClient.swift b/leanring-buddy/ElevenLabsTTSClient.swift
index 35545c9d..91b6a564 100644
--- a/leanring-buddy/ElevenLabsTTSClient.swift
+++ b/leanring-buddy/ElevenLabsTTSClient.swift
@@ -15,10 +15,10 @@ final class ElevenLabsTTSClient {
     private let proxyURL: URL
     private let session: URLSession
 
-    /// The audio player for the current TTS playback. Kept alive so the
-    /// audio finishes playing even if the caller doesn't hold a reference.
     private var audioPlayer: AVAudioPlayer?
 
+    var languageCode: String?
+
     init(proxyURL: String) {
         self.proxyURL = URL(string: proxyURL)!
 
@@ -28,15 +28,13 @@ final class ElevenLabsTTSClient {
         self.session = URLSession(configuration: configuration)
     }
 
-    /// Sends `text` to ElevenLabs TTS and plays the resulting audio.
-    /// Throws on network or decoding errors. Cancellation-safe.
     func speakText(_ text: String) async throws {
         var request = URLRequest(url: proxyURL)
         request.httpMethod = "POST"
         request.setValue("application/json", forHTTPHeaderField: "Content-Type")
         request.setValue("audio/mpeg", forHTTPHeaderField: "Accept")
 
-        let body: [String: Any] = [
+        var body: [String: Any] = [
             "text": text,
             "model_id": "eleven_flash_v2_5",
             "voice_settings": [
@@ -45,6 +43,10 @@ final class ElevenLabsTTSClient {
             ]
         ]
 
+        if let languageCode {
+            body["language_code"] = languageCode
+        }
+
         request.httpBody = try JSONSerialization.data(withJSONObject: body)
 
         let (data, response) = try await session.data(for: request)
diff --git a/leanring-buddy/MenuBarPanelManager.swift b/leanring-buddy/MenuBarPanelManager.swift
index e5eb98de..4a9b3ad9 100644
--- a/leanring-buddy/MenuBarPanelManager.swift
+++ b/leanring-buddy/MenuBarPanelManager.swift
@@ -45,7 +45,9 @@ final class MenuBarPanelManager: NSObject {
             object: nil,
             queue: .main
         ) { [weak self] _ in
-            self?.hidePanel()
+            Task { @MainActor [weak self] in
+                self?.hidePanel()
+            }
         }
     }
 
diff --git a/leanring-buddy/OpenAIAudioTranscriptionProvider.swift b/leanring-buddy/OpenAIAudioTranscriptionProvider.swift
index 75092092..3a4dcce3 100644
--- a/leanring-buddy/OpenAIAudioTranscriptionProvider.swift
+++ b/leanring-buddy/OpenAIAudioTranscriptionProvider.swift
@@ -35,6 +35,7 @@ final class OpenAIAudioTranscriptionProvider: BuddyTranscriptionProvider {
 
     func startStreamingSession(
         keyterms: [String],
+        languageCode: String?,
         onTranscriptUpdate: @escaping (String) -> Void,
         onFinalTranscriptReady: @escaping (String) -> Void,
         onError: @escaping (Error) -> Void
@@ -49,6 +50,7 @@ final class OpenAIAudioTranscriptionProvider: BuddyTranscriptionProvider {
             apiKey: apiKey,
             modelName: modelName,
             keyterms: keyterms,
+            languageCode: languageCode,
             onTranscriptUpdate: onTranscriptUpdate,
             onFinalTranscriptReady: onFinalTranscriptReady,
             onError: onError
@@ -69,6 +71,7 @@ private final class OpenAIAudioTranscriptionSession: BuddyStreamingTranscription
     private let apiKey: String
     private let modelName: String
     private let keyterms: [String]
+    private let languageCode: String?
     private let onTranscriptUpdate: (String) -> Void
     private let onFinalTranscriptReady: (String) -> Void
     private let onError: (Error) -> Void
@@ -89,6 +92,7 @@ private final class OpenAIAudioTranscriptionSession: BuddyStreamingTranscription
         apiKey: String,
         modelName: String,
         keyterms: [String],
+        languageCode: String?,
         onTranscriptUpdate: @escaping (String) -> Void,
         onFinalTranscriptReady: @escaping (String) -> Void,
         onError: @escaping (Error) -> Void
@@ -96,6 +100,7 @@ private final class OpenAIAudioTranscriptionSession: BuddyStreamingTranscription
         self.apiKey = apiKey
         self.modelName = modelName
         self.keyterms = keyterms
+        self.languageCode = languageCode
         self.onTranscriptUpdate = onTranscriptUpdate
         self.onFinalTranscriptReady = onFinalTranscriptReady
         self.onError = onError
@@ -232,11 +237,14 @@ private final class OpenAIAudioTranscriptionSession: BuddyStreamingTranscription
             value: modelName,
             usingBoundary: boundary
         )
+
+        let languageValue = languageCode ?? "en"
         requestBodyData.appendMultipartFormField(
             named: "language",
-            value: "en",
+            value: languageValue,
             usingBoundary: boundary
         )
+
         requestBodyData.appendMultipartFormField(
             named: "response_format",
             value: "json",
diff --git a/leanring-buddy/OverlayWindow.swift b/leanring-buddy/OverlayWindow.swift
index 884ebcbf..2a8dbffb 100644
--- a/leanring-buddy/OverlayWindow.swift
+++ b/leanring-buddy/OverlayWindow.swift
@@ -87,7 +87,7 @@ struct NavigationBubbleSizePreferenceKey: PreferenceKey {
 
 /// The buddy's behavioral mode. Controls whether it follows the cursor,
 /// is flying toward a detected UI element, or is pointing at an element.
-enum BuddyNavigationMode {
+enum BuddyNavigationMode: Sendable {
     /// Default — buddy follows the mouse cursor with spring animation
     case followingCursor
     /// Buddy is animating toward a detected UI element location
@@ -261,7 +261,7 @@ struct BlueCursorView: View {
             // Navigation pointer bubble — shown when buddy arrives at a detected element.
             // Pops in with a scale-bounce (0.5x → 1.0x spring) and a bright initial
             // glow that settles, creating a "materializing" effect.
-            if buddyNavigationMode == .pointingAtTarget && !navigationBubbleText.isEmpty {
+            if isPointingAtTargetMode && !navigationBubbleText.isEmpty {
                 Text(navigationBubbleText)
                     .font(.system(size: 11, weight: .medium))
                     .foregroundColor(.white)
@@ -311,14 +311,14 @@ struct BlueCursorView: View {
                 .opacity(buddyIsVisibleOnThisScreen && (companionManager.voiceState == .idle || companionManager.voiceState == .responding) ? cursorOpacity : 0)
                 .position(cursorPosition)
                 .animation(
-                    buddyNavigationMode == .followingCursor
+                    isFollowingCursorMode
                         ? .spring(response: 0.2, dampingFraction: 0.6, blendDuration: 0)
                         : nil,
                     value: cursorPosition
                 )
                 .animation(.easeIn(duration: 0.25), value: companionManager.voiceState)
                 .animation(
-                    buddyNavigationMode == .navigatingToTarget ? nil : .easeInOut(duration: 0.3),
+                    isNavigatingToTargetMode ? nil : .easeInOut(duration: 0.3),
                     value: triangleRotationDegrees
                 )
 
@@ -368,7 +368,7 @@ struct BlueCursorView: View {
             navigationAnimationTimer?.invalidate()
             companionManager.tearDownOnboardingVideo()
         }
-        .onChange(of: companionManager.detectedElementScreenLocation) { newLocation in
+        .onChange(of: companionManager.detectedElementScreenLocation) { _, newLocation in
             // When a UI element location is detected, navigate the buddy to
             // that position so it points at the element.
             guard let screenLocation = newLocation,
@@ -406,6 +406,27 @@ struct BlueCursorView: View {
         }
     }
 
+    private var isFollowingCursorMode: Bool {
+        if case .followingCursor = buddyNavigationMode {
+            return true
+        }
+        return false
+    }
+
+    private var isNavigatingToTargetMode: Bool {
+        if case .navigatingToTarget = buddyNavigationMode {
+            return true
+        }
+        return false
+    }
+
+    private var isPointingAtTargetMode: Bool {
+        if case .pointingAtTarget = buddyNavigationMode {
+            return true
+        }
+        return false
+    }
+
     // MARK: - Cursor Tracking
 
     private func startTrackingCursor() {
@@ -417,7 +438,7 @@ struct BlueCursorView: View {
             // mouse movement — it completes its full animation and return flight.
             // Only during the RETURN flight do we allow cursor movement to cancel
             // (so the buddy snaps to following if the user moves while it's flying back).
-            if self.buddyNavigationMode == .navigatingToTarget && self.isReturningToCursor {
+            if self.isNavigatingToTargetMode && self.isReturningToCursor {
                 let currentMouseInSwiftUI = self.convertScreenPointToSwiftUICoordinates(mouseLocation)
                 let distanceFromNavigationStart = hypot(
                     currentMouseInSwiftUI.x - self.cursorPositionWhenNavigationStarted.x,
@@ -430,7 +451,7 @@ struct BlueCursorView: View {
             }
 
             // During forward navigation or pointing, just skip cursor tracking
-            if self.buddyNavigationMode != .followingCursor {
+            if !self.isFollowingCursorMode {
                 return
             }
 
@@ -483,7 +504,7 @@ struct BlueCursorView: View {
         isReturningToCursor = false
 
         animateBezierFlightArc(to: clampedTarget) {
-            guard self.buddyNavigationMode == .navigatingToTarget else { return }
+            guard self.isNavigatingToTargetMode else { return }
             self.startPointingAtElement()
         }
     }
@@ -590,10 +611,10 @@ struct BlueCursorView: View {
         streamNavigationBubbleCharacter(phrase: pointerPhrase, characterIndex: 0) {
             // All characters streamed — hold for 3 seconds, then fly back
             DispatchQueue.main.asyncAfter(deadline: .now() + 3.0) {
-                guard self.buddyNavigationMode == .pointingAtTarget else { return }
+                guard self.isPointingAtTargetMode else { return }
                 self.navigationBubbleOpacity = 0.0
                 DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) {
-                    guard self.buddyNavigationMode == .pointingAtTarget else { return }
+                    guard self.isPointingAtTargetMode else { return }
                     self.startFlyingBackToCursor()
                 }
             }
@@ -607,7 +628,7 @@ struct BlueCursorView: View {
         characterIndex: Int,
         onComplete: @escaping () -> Void
     ) {
-        guard buddyNavigationMode == .pointingAtTarget else { return }
+        guard isPointingAtTargetMode else { return }
         guard characterIndex < phrase.count else {
             onComplete()
             return
diff --git a/worker/package-lock.json b/worker/package-lock.json
index c2383cc1..4d043455 100644
--- a/worker/package-lock.json
+++ b/worker/package-lock.json
@@ -643,9 +643,6 @@
         "arm"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "LGPL-3.0-or-later",
       "optional": true,
       "os": [
@@ -663,9 +660,6 @@
         "arm64"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "LGPL-3.0-or-later",
       "optional": true,
       "os": [
@@ -683,9 +677,6 @@
         "s390x"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "LGPL-3.0-or-later",
       "optional": true,
       "os": [
@@ -703,9 +694,6 @@
         "x64"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "LGPL-3.0-or-later",
       "optional": true,
       "os": [
@@ -723,9 +711,6 @@
         "arm64"
       ],
       "dev": true,
-      "libc": [
-        "musl"
-      ],
       "license": "LGPL-3.0-or-later",
       "optional": true,
       "os": [
@@ -743,9 +728,6 @@
         "x64"
       ],
       "dev": true,
-      "libc": [
-        "musl"
-      ],
       "license": "LGPL-3.0-or-later",
       "optional": true,
       "os": [
@@ -763,9 +745,6 @@
         "arm"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "Apache-2.0",
       "optional": true,
       "os": [
@@ -789,9 +768,6 @@
         "arm64"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "Apache-2.0",
       "optional": true,
       "os": [
@@ -815,9 +791,6 @@
         "s390x"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "Apache-2.0",
       "optional": true,
       "os": [
@@ -841,9 +814,6 @@
         "x64"
       ],
       "dev": true,
-      "libc": [
-        "glibc"
-      ],
       "license": "Apache-2.0",
       "optional": true,
       "os": [
@@ -867,9 +837,6 @@
         "arm64"
       ],
       "dev": true,
-      "libc": [
-        "musl"
-      ],
       "license": "Apache-2.0",
       "optional": true,
       "os": [
@@ -893,9 +860,6 @@
         "x64"
       ],
       "dev": true,
-      "libc": [
-        "musl"
-      ],
       "license": "Apache-2.0",
       "optional": true,
       "os": [
diff --git a/worker/src/index.ts b/worker/src/index.ts
index 2e3e9345..bbec5f45 100644
--- a/worker/src/index.ts
+++ b/worker/src/index.ts
@@ -13,9 +13,15 @@ interface Env {
   ANTHROPIC_API_KEY: string;
   ELEVENLABS_API_KEY: string;
   ELEVENLABS_VOICE_ID: string;
+  ELEVENLABS_CHINESE_VOICE_ID?: string;
   ASSEMBLYAI_API_KEY: string;
 }
 
+interface TextToSpeechRequestBody {
+  language_code?: string;
+  [key: string]: unknown;
+}
+
 export default {
   async fetch(request: Request, env: Env): Promise<Response> {
     const url = new URL(request.url);
@@ -107,8 +113,21 @@ async function handleTranscribeToken(env: Env): Promise<Response> {
 }
 
 async function handleTTS(request: Request, env: Env): Promise<Response> {
-  const body = await request.text();
-  const voiceId = env.ELEVENLABS_VOICE_ID;
+  let requestBody: TextToSpeechRequestBody;
+
+  try {
+    requestBody = await request.json() as TextToSpeechRequestBody;
+  } catch {
+    return new Response(
+      JSON.stringify({ error: "Invalid JSON body." }),
+      { status: 400, headers: { "content-type": "application/json" } }
+    );
+  }
+
+  const voiceId = resolveVoiceIdForLanguage(
+    requestBody.language_code,
+    env
+  );
 
   const response = await fetch(
     `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`,
@@ -119,7 +138,7 @@ async function handleTTS(request: Request, env: Env): Promise<Response> {
         "content-type": "application/json",
         accept: "audio/mpeg",
       },
-      body,
+      body: JSON.stringify(requestBody),
     }
   );
 
@@ -139,3 +158,19 @@ async function handleTTS(request: Request, env: Env): Promise<Response> {
     },
   });
 }
+
+function resolveVoiceIdForLanguage(
+  languageCode: string | undefined,
+  env: Env
+): string {
+  const normalizedLanguageCode = languageCode?.trim().toLowerCase();
+
+  if (
+    normalizedLanguageCode?.startsWith("zh") &&
+    env.ELEVENLABS_CHINESE_VOICE_ID?.trim()
+  ) {
+    return env.ELEVENLABS_CHINESE_VOICE_ID.trim();
+  }
+
+  return env.ELEVENLABS_VOICE_ID;
+}
diff --git a/worker/wrangler.toml b/worker/wrangler.toml
index b4bdbf38..056ab903 100644
--- a/worker/wrangler.toml
+++ b/worker/wrangler.toml
@@ -4,3 +4,4 @@ compatibility_date = "2024-01-01"
 
 [vars]
 ELEVENLABS_VOICE_ID = "kPzsL2i3teMYv0FxEYQ6"
+ELEVENLABS_CHINESE_VOICE_ID = ""