From 4441f4046fbad706ae2001ee8cf96ffe1a18557d Mon Sep 17 00:00:00 2001 From: syntaxbullet Date: Wed, 18 Feb 2026 16:18:16 +0100 Subject: [PATCH] chore: update readme.md --- README.md | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index b4d8968..ee4dddf 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,8 @@ Calliope lives in your menu bar and turns speech into text in any application. P - **Menu bar native** — Runs quietly in the macOS menu bar, always one hotkey away - **Universal text input** — Types transcribed text directly into any focused application via Quartz events or clipboard paste -- **On-device transcription** — Powered by OpenAI Whisper models via Hugging Face Transformers, accelerated with MPS on Apple Silicon +- **On-device transcription** — Powered by OpenAI Whisper models via `mlx-whisper`, natively accelerated on Apple Silicon with no MPS/PyTorch overhead +- **Auto-stop on silence** — Recording stops automatically after a configurable period of silence, so you don't have to press the hotkey again - **LLM post-processing** — Optional grammar and punctuation correction using local MLX language models - **Live waveform overlay** — Floating visual feedback showing audio levels during recording and a pulsing indicator during transcription - **Dual hotkey modes** — Push-to-talk (hold to record) and toggle (tap to start/stop), both fully configurable @@ -38,7 +39,7 @@ pip install -e . calliope # Launch (runs setup wizard on first run) calliope setup # Re-run the setup wizard calliope --debug # Launch with verbose logging -calliope --device 2 --model openai/whisper-large-v3 # Override config for this session +calliope --device 2 --model mlx-community/whisper-large-v3 # Override config for this session calliope --version # Print version ``` @@ -56,22 +57,24 @@ Hotkeys are fully configurable through the setup wizard or by editing the config All settings are stored at `~/.config/calliope/config.yaml`: ```yaml -device: null # Microphone index (null = system default) -model: distil-whisper/distil-large-v3 -language: auto # Language code or "auto" for detection +device: null # Microphone index (null = system default) +model: mlx-community/whisper-large-v3-turbo +language: auto # Language code or "auto" for detection hotkeys: ptt: ctrl+shift toggle: ctrl+space -context: "" # Domain-specific terms to improve accuracy -typing_mode: char # "char" (keystroke simulation) or "clipboard" (Cmd+V paste) -typing_delay: 0.005 # Seconds between keystrokes in char mode -max_recording_seconds: 300 # Maximum recording duration -silence_threshold: 0.005 # RMS energy below which audio is considered silence -notifications: true # macOS notification banners +context: "" # Domain-specific terms to improve accuracy +typing_mode: char # "char" (keystroke simulation) or "clipboard" (Cmd+V paste) +typing_delay: 0.005 # Seconds between keystrokes in char mode +max_recording_seconds: 300 # Maximum recording duration +silence_threshold: 0.005 # RMS energy below which audio is considered silence +auto_stop_silence: true # Automatically stop recording after sustained silence +silence_timeout_seconds: 1.5 # Seconds of silence before auto-stop triggers +notifications: true # macOS notification banners postprocessing: - enabled: false # LLM grammar/punctuation correction - model: null # Active MLX model - system_prompt: "..." # Custom post-processing instructions + enabled: false # LLM grammar/punctuation correction + model: null # Active MLX model + system_prompt: "..." # Custom post-processing instructions debug: false ``` @@ -79,13 +82,15 @@ CLI flags override config values for that session. ## Available Models +All models are sourced from Hugging Face and run natively via `mlx-whisper` on Apple Silicon. + | Model | Size | Speed | Accuracy | |-------|------|-------|----------| -| `openai/whisper-base` | ~150 MB | Fastest | Basic | -| `openai/whisper-small` | ~500 MB | Fast | Good | -| `openai/whisper-medium` | ~1.5 GB | Moderate | Better | -| `distil-whisper/distil-large-v3` | ~1.5 GB | Fast | High (default) | -| `openai/whisper-large-v3` | ~3 GB | Slower | Highest | +| `mlx-community/whisper-base` | ~150 MB | Fastest | Basic | +| `mlx-community/whisper-small` | ~500 MB | Fast | Good | +| `mlx-community/whisper-medium` | ~1.5 GB | Moderate | Better | +| `mlx-community/whisper-large-v3-turbo` | ~1.6 GB | Fast | High (default) | +| `mlx-community/whisper-large-v3` | ~3 GB | Slower | Highest | ## Troubleshooting