From e429adca48310ddb2015ad808c127eb860a14df0 Mon Sep 17 00:00:00 2001
From: syntaxbullet <syntaxbullet@protonmail.com>
Date: Wed, 18 Feb 2026 14:52:21 +0100
Subject: [PATCH] fix: fix undropped semaphores memory leak issues.

---
 calliope/app.py         | 8 +++++---
 calliope/cli.py         | 9 +++++++++
 calliope/transcriber.py | 3 ++-
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/calliope/app.py b/calliope/app.py
index 38df317..dafe905 100644
--- a/calliope/app.py
+++ b/calliope/app.py
@@ -6,8 +6,6 @@ import threading
 import time
 from typing import Any
 
-# Disable tokenizers parallelism to avoid leaked semaphore warnings on shutdown.
-os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
 # Run offline — models are downloaded during setup, no need to hit HuggingFace on every launch.
 os.environ.setdefault("HF_HUB_OFFLINE", "1")
 
@@ -575,9 +573,13 @@ class CalliopeApp(rumps.App):
             self.postprocessor = None
 
     def _on_quit(self, sender) -> None:
-        self._release_postprocessor()
         self.hotkeys.stop()
         self.recorder.stop()
+        # Wait for any in-flight transcription so PyTorch isn't killed mid-operation,
+        # which would cause a SIGTRAP from native threads being torn down uncleanly.
+        self._transcribe_done.wait(timeout=10)
+        self._release_transcriber()
+        self._release_postprocessor()
         # Stop overlay timers synchronously to avoid retain cycles on quit.
         self.overlay.cleanup()
         rumps.quit_application()
diff --git a/calliope/cli.py b/calliope/cli.py
index 8732aa5..46120a1 100644
--- a/calliope/cli.py
+++ b/calliope/cli.py
@@ -1,6 +1,15 @@
 """CLI entry point using click."""
 
 import logging
+import os
+
+# Set these before any library import so tokenizer/OpenMP threads are never spawned.
+# TOKENIZERS_PARALLELISM=false prevents the HF fast-tokenizer from creating a Rust
+# thread-pool backed by OS semaphores (which leak on unclean shutdown → trace trap).
+# OMP_NUM_THREADS / MKL_NUM_THREADS prevent OpenMP/MKL from forking worker threads.
+os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+os.environ.setdefault("OMP_NUM_THREADS", "1")
+os.environ.setdefault("MKL_NUM_THREADS", "1")
 
 import click
 
diff --git a/calliope/transcriber.py b/calliope/transcriber.py
index 309cb36..7858819 100644
--- a/calliope/transcriber.py
+++ b/calliope/transcriber.py
@@ -71,7 +71,8 @@ class Transcriber:
         generate_kwargs = {}
         if self._context:
             if self._cached_prompt_ids is None:
-                self._cached_prompt_ids = self._tokenizer.get_prompt_ids(self._context)
+                device = self._pipe.model.device
+                self._cached_prompt_ids = torch.tensor(self._tokenizer.get_prompt_ids(self._context), device=device)
             generate_kwargs["prompt_ids"] = self._cached_prompt_ids
 
         pipe_kwargs = {