feat: add llm post processing support.

2026-02-17 16:59:26 +01:00
parent 435c87803b
commit 5c6ddcd21b
5 changed files with 306 additions and 0 deletions
--- a/calliope/app.py
+++ b/calliope/app.py
@@ -18,6 +18,7 @@ import rumps
 from calliope import config as config_mod
 from calliope.recorder import Recorder
 from calliope.transcriber import Transcriber
+from calliope.postprocessor import Postprocessor
 from calliope.typer import type_text, type_text_clipboard
 from calliope.hotkeys import HotkeyListener
 from calliope.overlay import WaveformOverlay
@@ -43,6 +44,14 @@ class CalliopeApp(rumps.App):
        self.transcriber.context = cfg.get("context", "")
        self.transcriber.language = cfg.get("language", "auto")

+        # Post-processing
+        pp_cfg = cfg.get("postprocessing", {})
+        self.postprocessor: Postprocessor | None = None
+        if pp_cfg.get("enabled") and pp_cfg.get("model"):
+            self.postprocessor = Postprocessor(
+                system_prompt=pp_cfg.get("system_prompt", ""),
+            )
+
        self._recording = False
        self._rec_lock = threading.Lock()
        self._rec_start_time: float | None = None
@@ -78,6 +87,10 @@ class CalliopeApp(rumps.App):
        self._mic_menu = rumps.MenuItem("Microphone")
        self._build_mic_menu()

+        # Post-Processing submenu
+        self._pp_menu = rumps.MenuItem("Post-Processing")
+        self._build_pp_menu()
+
        # Typing mode submenu
        self._typing_menu = rumps.MenuItem("Typing Mode")
        current_mode = cfg.get("typing_mode", "char")
@@ -98,6 +111,7 @@ class CalliopeApp(rumps.App):
            self._model_menu,
            self._mic_menu,
            self._typing_menu,
+            self._pp_menu,
            None,
            quit_item,
        ]
@@ -138,6 +152,10 @@ class CalliopeApp(rumps.App):
            self.status_item.title = self._ready_status()
            self.hotkeys.start()
            log.info("Model loaded, hotkeys active")
+            # Load postprocessor if enabled
+            pp_cfg = self.cfg.get("postprocessing", {})
+            if pp_cfg.get("enabled") and pp_cfg.get("model"):
+                self._ensure_postprocessor(pp_cfg["model"])
        except Exception:
            log.error("Failed to load model", exc_info=True)
            self.status_item.title = "Status: Model load failed"
@@ -353,6 +371,14 @@ class CalliopeApp(rumps.App):
                self.status_item.title = self._ready_status()
                self._notify("Calliope", "", "No speech detected — audio too short or too quiet")
                return
+            # LLM post-processing
+            pp_cfg = self.cfg.get("postprocessing", {})
+            if pp_cfg.get("enabled") and self.postprocessor and self.postprocessor._model is not None:
+                try:
+                    self.status_item.title = "Status: Post-processing..."
+                    text = self.postprocessor.process(text)
+                except Exception:
+                    log.error("Post-processing failed, using raw transcription", exc_info=True)
            if text:
                def _do_type():
                    try:
@@ -377,7 +403,179 @@ class CalliopeApp(rumps.App):
            self.title = "\U0001f3a4"  # 🎤
            self._transcribe_done.set()

+    # ── Post-Processing ───────────────────────────────────────────
+
+    def _build_pp_menu(self) -> None:
+        if self._pp_menu._menu is not None:
+            self._pp_menu.clear()
+        pp_cfg = self.cfg.get("postprocessing", {})
+        enabled = pp_cfg.get("enabled", False)
+        active_model = pp_cfg.get("model")
+        models = pp_cfg.get("models", [])
+
+        # Enable/disable toggle
+        toggle_label = "Disable Post-Processing" if enabled else "Enable Post-Processing"
+        self._pp_menu.add(rumps.MenuItem(toggle_label, callback=self._on_pp_toggle))
+        self._pp_menu.add(None)  # separator
+
+        # Downloaded models
+        if models:
+            for m in models:
+                short = m.split("/")[-1]
+                prefix = "\u2713 " if m == active_model else "   "
+                item = rumps.MenuItem(f"{prefix}{short}", callback=self._on_pp_model_select)
+                item._pp_model_id = m
+                self._pp_menu.add(item)
+            self._pp_menu.add(None)
+
+        self._pp_menu.add(rumps.MenuItem("Download Model...", callback=self._on_pp_download))
+        self._pp_menu.add(rumps.MenuItem("Edit System Prompt...", callback=self._on_pp_edit_prompt))
+        if models:
+            self._pp_menu.add(rumps.MenuItem("Delete Model...", callback=self._on_pp_delete))
+
+    def _on_pp_toggle(self, sender) -> None:
+        pp_cfg = self.cfg.setdefault("postprocessing", {})
+        enabled = not pp_cfg.get("enabled", False)
+        pp_cfg["enabled"] = enabled
+        config_mod.save(self.cfg)
+        if enabled and pp_cfg.get("model"):
+            self._ensure_postprocessor(pp_cfg["model"])
+        elif not enabled:
+            self._release_postprocessor()
+        self._build_pp_menu()
+        log.info("Post-processing %s", "enabled" if enabled else "disabled")
+
+    def _on_pp_model_select(self, sender) -> None:
+        model_id = sender._pp_model_id
+        pp_cfg = self.cfg.setdefault("postprocessing", {})
+        if model_id == pp_cfg.get("model"):
+            return
+        pp_cfg["model"] = model_id
+        config_mod.save(self.cfg)
+        if pp_cfg.get("enabled"):
+            self._ensure_postprocessor(model_id)
+        self._build_pp_menu()
+        log.info("Post-processing model set to %s", model_id)
+
+    def _on_pp_download(self, sender) -> None:
+        self._activate_app()
+        response = rumps.Window(
+            message="Enter a HuggingFace MLX model repo ID.\n\n"
+            "Example: mlx-community/Qwen2.5-0.5B-Instruct-4bit",
+            title="Download MLX Model",
+            default_text="mlx-community/Qwen2.5-0.5B-Instruct-4bit",
+            ok="Download",
+            cancel="Cancel",
+            dimensions=(320, 24),
+        ).run()
+        self._deactivate_app()
+        if response.clicked != 1:
+            return
+        repo = response.text.strip()
+        if not repo:
+            return
+        self._notify("Calliope", "", f"Downloading {repo}...")
+
+        def _do_download():
+            try:
+                import huggingface_hub.constants as hf_constants
+                os.environ["HF_HUB_OFFLINE"] = "0"
+                hf_constants.HF_HUB_OFFLINE = False
+                Postprocessor.download(repo)
+                pp_cfg = self.cfg.setdefault("postprocessing", {})
+                if repo not in pp_cfg.setdefault("models", []):
+                    pp_cfg["models"].append(repo)
+                if not pp_cfg.get("model"):
+                    pp_cfg["model"] = repo
+                config_mod.save(self.cfg)
+                self._build_pp_menu()
+                self._notify("Calliope", "", f"Model downloaded: {repo}")
+            except Exception:
+                log.error("Failed to download %s", repo, exc_info=True)
+                self._notify("Calliope", "Error", f"Failed to download {repo}")
+            finally:
+                os.environ["HF_HUB_OFFLINE"] = "1"
+                hf_constants.HF_HUB_OFFLINE = True
+
+        threading.Thread(target=_do_download, daemon=True).start()
+
+    def _on_pp_edit_prompt(self, sender) -> None:
+        pp_cfg = self.cfg.setdefault("postprocessing", {})
+        current = pp_cfg.get("system_prompt", "")
+        self._activate_app()
+        response = rumps.Window(
+            message="System prompt sent to the LLM before your transcription:",
+            title="Edit System Prompt",
+            default_text=current,
+            ok="Save",
+            cancel="Cancel",
+            dimensions=(320, 120),
+        ).run()
+        self._deactivate_app()
+        if response.clicked != 1:
+            return
+        pp_cfg["system_prompt"] = response.text.strip()
+        config_mod.save(self.cfg)
+        if self.postprocessor:
+            from calliope.postprocessor import DEFAULT_SYSTEM_PROMPT
+            self.postprocessor.system_prompt = pp_cfg["system_prompt"] or DEFAULT_SYSTEM_PROMPT
+        log.info("Post-processing system prompt updated")
+
+    def _on_pp_delete(self, sender) -> None:
+        pp_cfg = self.cfg.setdefault("postprocessing", {})
+        models = pp_cfg.get("models", [])
+        if not models:
+            return
+        self._activate_app()
+        response = rumps.Window(
+            message="Enter the repo ID of the model to remove from Calliope:\n\n"
+            + "\n".join(f"  • {m}" for m in models),
+            title="Delete Model",
+            default_text="",
+            ok="Delete",
+            cancel="Cancel",
+            dimensions=(320, 24),
+        ).run()
+        self._deactivate_app()
+        if response.clicked != 1:
+            return
+        repo = response.text.strip()
+        if repo not in models:
+            return
+        models.remove(repo)
+        if pp_cfg.get("model") == repo:
+            pp_cfg["model"] = models[0] if models else None
+            if not models:
+                pp_cfg["enabled"] = False
+            self._release_postprocessor()
+        config_mod.save(self.cfg)
+        self._build_pp_menu()
+        log.info("Removed model %s", repo)
+
+    def _ensure_postprocessor(self, model_id: str) -> None:
+        """Load the postprocessor model in a background thread."""
+        def _load():
+            try:
+                if self.postprocessor is None:
+                    pp_cfg = self.cfg.get("postprocessing", {})
+                    self.postprocessor = Postprocessor(
+                        system_prompt=pp_cfg.get("system_prompt", ""),
+                    )
+                self.postprocessor.unload()
+                self.postprocessor.load(model_id)
+            except Exception:
+                log.error("Failed to load postprocessor %s", model_id, exc_info=True)
+                self._notify("Calliope", "Error", f"Failed to load LLM: {model_id}")
+
+        threading.Thread(target=_load, daemon=True).start()
+
+    def _release_postprocessor(self) -> None:
+        if self.postprocessor is not None:
+            self.postprocessor.unload()
+            self.postprocessor = None
+
    def _on_quit(self, sender) -> None:
+        self._release_postprocessor()
        self.hotkeys.stop()
        self.recorder.stop()
        # Stop overlay timers synchronously to avoid retain cycles on quit.
--- a/calliope/config.py
+++ b/calliope/config.py
@@ -26,6 +26,12 @@ DEFAULTS: dict[str, Any] = {
    "silence_threshold": 0.005,  # RMS energy below which audio is considered silence
    "notifications": True,  # show macOS notifications
    "typing_delay": 0.005,  # seconds between keystrokes in char mode
+    "postprocessing": {
+        "enabled": False,
+        "model": None,          # active model HF repo id
+        "models": [],           # list of downloaded model repo ids
+        "system_prompt": "Fix grammar and punctuation in the following dictated text. Output only the corrected text, nothing else.",
+    },
 }

 LANGUAGES: dict[str, str] = {
--- a/calliope/postprocessor.py
+++ b/calliope/postprocessor.py
@@ -0,0 +1,72 @@
+"""LLM post-processing of transcriptions using MLX on Apple Silicon."""
+
+import logging
+import re
+
+from huggingface_hub import snapshot_download
+
+log = logging.getLogger(__name__)
+
+DEFAULT_SYSTEM_PROMPT = (
+    "You are a speech-to-text post-processor. Your sole job is to clean up "
+    "raw transcriptions. Fix punctuation, capitalization, and obvious "
+    "mistranscriptions. Do not add, remove, or rephrase any words beyond "
+    "what is necessary for correctness. Output ONLY the corrected text with "
+    "no commentary, explanations, or prefixes."
+)
+
+
+class Postprocessor:
+    def __init__(self, system_prompt: str = ""):
+        self.system_prompt = system_prompt or DEFAULT_SYSTEM_PROMPT
+        self._model = None
+        self._tokenizer = None
+        self._model_id: str | None = None
+
+    def load(self, model_id: str) -> None:
+        from mlx_lm import load
+
+        log.info("Loading MLX model %s", model_id)
+        self._model, self._tokenizer = load(model_id)
+        self._model_id = model_id
+        log.info("MLX model ready")
+
+    def process(self, text: str) -> str:
+        if self._model is None or self._tokenizer is None:
+            raise RuntimeError("Postprocessor model not loaded")
+
+        from mlx_lm import generate
+
+        messages = [
+            {"role": "system", "content": self.system_prompt},
+            {"role": "user", "content": text},
+        ]
+        prompt = self._tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        output_budget = len(text) * 2 + 100
+        # Allow extra headroom for reasoning/thinking tokens (stripped later).
+        reasoning_budget = 2048
+        result = generate(
+            self._model, self._tokenizer, prompt=prompt, max_tokens=output_budget + reasoning_budget
+        )
+        result = re.sub(r"<think>[\s\S]*?</think>", "", result)
+        result = result.strip()
+        log.debug("Post-processing input:  %s", text)
+        log.debug("Post-processing output: %s", result)
+        return result
+
+    def unload(self) -> None:
+        import gc
+
+        self._model = None
+        self._tokenizer = None
+        self._model_id = None
+        gc.collect()
+        log.info("MLX model unloaded")
+
+    @staticmethod
+    def download(hf_repo: str) -> None:
+        log.info("Downloading MLX model %s", hf_repo)
+        snapshot_download(hf_repo)
+        log.info("Download complete: %s", hf_repo)
--- a/calliope/setup_wizard.py
+++ b/calliope/setup_wizard.py
@@ -99,6 +99,33 @@ def run() -> dict:
        text = transcriber.transcribe(audio)
        console.print(f"[green]Result:[/green] {text or '(no speech detected)'}")

+    # ── LLM Post-Processing ─────────────────────────────────────────
+    console.print("\n[bold]LLM Post-Processing (optional)[/bold]")
+    console.print("  Clean up grammar & punctuation using a local MLX language model.")
+    if Confirm.ask("Enable LLM post-processing?", default=False):
+        default_llm = "mlx-community/Qwen2.5-0.5B-Instruct-4bit"
+        llm_repo = Prompt.ask("MLX model repo", default=default_llm)
+        console.print(f"Downloading [cyan]{llm_repo}[/cyan]...")
+
+        from calliope.postprocessor import Postprocessor
+
+        with Progress() as progress:
+            task = progress.add_task("Downloading model...", total=None)
+            Postprocessor.download(llm_repo)
+            progress.update(task, completed=100, total=100)
+
+        console.print("[green]Model downloaded.[/green]")
+
+        pp_cfg = cfg.setdefault("postprocessing", {})
+        pp_cfg["enabled"] = True
+        pp_cfg["model"] = llm_repo
+        pp_cfg["models"] = [llm_repo]
+
+        default_prompt = config.DEFAULTS["postprocessing"]["system_prompt"]
+        current_prompt = pp_cfg.get("system_prompt", default_prompt)
+        if not Confirm.ask(f"Use default system prompt?\n  \"{current_prompt}\"", default=True):
+            pp_cfg["system_prompt"] = Prompt.ask("System prompt")
+
    # ── Save ─────────────────────────────────────────────────────────
    config.save(cfg)
    console.print(f"\n[green]Config saved to {config.CONFIG_PATH}[/green]")
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,9 @@ dependencies = [
    "rich>=13.0.0",
    "click>=8.1.0",
    "pyyaml>=6.0",
+    "mlx>=0.16.0",
+    "mlx-lm>=0.14.0",
+    "huggingface-hub>=0.20.0",
 ]

 [project.scripts]