Files
MurMur/murmur.py

1249 lines
49 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# murmur.py — Whisper-live (ASR) + NLLB-200 distilled 600M (Translation)
# - Two-pane UI (Original + Translation) with dynamic show/hide
# - Source language dropdown (Auto → whisper-live auto-detect)
# - Transcription via whisper-live (FasterWhisper backend)
# - Translation via NLLB-200 distilled 600M (no SeamlessM4T / MMS-LID)
# - Square Record button (grey ↔ red) that records input to MP3 (<timestamp>_record.mp3)
# - Virtual loopback Gain (dB) before the output sink (e.g., BlackHole)
# - English UI; window auto-resizes (no manual resize, no page scrollbar)
import atexit
import json
import locale
import os
import shutil
import subprocess
import sys
import threading
import time
import wave
import queue
from multiprocessing import Process, Manager
from typing import Optional
import numpy as np
import sounddevice as sd
import webview
import socket
import logging
# Whisper-live
from whisper_live.server import TranscriptionServer
from whisper_live.client import TranscriptionClient, TranscriptionTeeClient # noqa: F401
# Translation (NLLB)
import torch # noqa: F401
from huggingface_hub import snapshot_download, HfApi
# Tame noisy websocket logs caused by readiness probing/forced disconnects
logging.getLogger("websockets.server").setLevel(logging.CRITICAL)
logging.getLogger("websockets.sync.server").setLevel(logging.CRITICAL)
logging.getLogger("websockets.client").setLevel(logging.CRITICAL)
logging.getLogger("websocket").setLevel(logging.ERROR)
CONFIG_PATH = os.path.join(os.path.dirname(__file__), "config.json")
# -----------------------------------------------------------------------------
# Language options / mappings
# -----------------------------------------------------------------------------
LANG_TABLE = [
{"id":"eng", "label":"English", "whisper":"en", "nllb":"eng_Latn"},
{"id":"deu", "label":"Deutsch", "whisper":"de", "nllb":"deu_Latn"},
{"id":"spa", "label":"Español", "whisper":"es", "nllb":"spa_Latn"},
{"id":"fra", "label":"Français", "whisper":"fr", "nllb":"fra_Latn"},
{"id":"zho", "label":"中文", "whisper":"zh", "nllb":"zho_Hans"},
{"id":"jpn", "label":"日本語", "whisper":"ja", "nllb":"jpn_Jpan"},
{"id":"por", "label":"Português", "whisper":"pt", "nllb":"por_Latn"},
{"id":"ind", "label":"Bahasa Indonesia", "whisper":"id", "nllb":"ind_Latn"},
{"id":"hin", "label":"हिन्दी", "whisper":"hi", "nllb":"hin_Deva"},
{"id":"arb", "label":"العربية", "whisper":"ar", "nllb":"arb_Arab"},
]
LANG_BY_ID = {x["id"]: x for x in LANG_TABLE}
LANG_CHOICES_TGT = [(x["id"], x["label"]) for x in LANG_TABLE]
LANG_CHOICES_SRC = [("auto", "Auto")] + LANG_CHOICES_TGT
WL_TO_NLLB = {x["whisper"]: x["nllb"] for x in LANG_TABLE}
SYS2ID = {"en":"eng","de":"deu","es":"spa","fr":"fra","zh":"zho","ja":"jpn","pt":"por","id":"ind","hi":"hin","ar":"arb"}
def detect_system_lang_code():
try:
# Avoid deprecated getdefaultlocale: prefer getlocale()
loc = (locale.getlocale()[0] or "") if locale.getlocale() else ""
if not loc:
loc = (locale.getdefaultlocale()[0] or "") # fallback for older Pythons
pref = (loc.split("_")[0] or "").lower()
return SYS2ID.get(pref, "eng")
except Exception:
return "eng"
# -----------------------------------------------------------------------------
# macOS system output switcher (optional)
# -----------------------------------------------------------------------------
class SystemAudioManager:
def __init__(self):
self.exe = shutil.which("SwitchAudioSource")
self.original = None
def is_available(self):
return bool(self.exe)
def _run(self, args):
return subprocess.run([self.exe] + args, capture_output=True, text=True, check=False)
def get_current_output(self):
if not self.is_available():
return None
res = self._run(["-t", "output", "-c"]) ; return (res.stdout or "").strip() or None
def list_outputs(self):
if not self.is_available():
return []
res = self._run(["-a", "-t", "output"]) ; return [ln.strip() for ln in (res.stdout or "").splitlines() if ln.strip()]
def set_output(self, name):
if not (self.is_available() and name):
return False
self._run(["-t", "output", "-s", name]) ; return True
def maybe_switch_to(self, preferred_name):
if not (self.is_available() and preferred_name):
return False
outs = self.list_outputs()
target = None
for n in outs:
if n == preferred_name: target = n ; break
if target is None:
low = preferred_name.lower()
for n in outs:
if n.lower().startswith(low): target = n ; break
if target is None: return False
cur = self.get_current_output()
self.original = cur or self.original
if cur != target: self.set_output(target)
return True
def restore(self):
if self.is_available() and self.original and self.get_current_output() != self.original:
self.set_output(self.original)
# -----------------------------------------------------------------------------
# Audio loopback (monitoring) with virtual gain
# -----------------------------------------------------------------------------
class AudioRouter:
def __init__(self):
sd.default.samplerate = 44100
sd.default.channels = 2
sd.default.latency = 'high'
sd.default.blocksize = 512
self.thread = None
self.running = False
# gain (in dB and linear)
self.gain_db = 0.0
self.gain = 1.0
def set_gain_db(self, db):
"""Set loopback gain in dB (applied to input before sending to output)."""
try:
db = float(db)
except Exception:
db = 0.0
db = max(-60.0, min(30.0, db)) # clamp
self.gain_db = db
self.gain = 10.0 ** (db / 20.0)
print(f"[AudioRouter] Gain set to {self.gain_db:.1f} dB (x{self.gain:.2f})", file=sys.stderr)
def _cb(self, indata, outdata, frames, t, status):
if status:
print(f"[Stream-Status] {status}", file=sys.stderr)
if self.gain != 1.0:
out = indata * self.gain
np.clip(out, -1.0, 1.0, out=out) # hard-clip for safety
outdata[:] = out
else:
outdata[:] = indata
def _loop(self, inp, outp, channels):
try:
with sd.Stream(device=(inp, outp), samplerate=sd.default.samplerate, channels=channels,
latency=sd.default.latency, blocksize=sd.default.blocksize, callback=self._cb):
while self.running:
time.sleep(0.1)
except Exception as e:
print(f"[AudioRouter] {e}", file=sys.stderr)
def start(self, inp, outp):
devs = sd.query_devices()
in_ch = int(devs[inp]['max_input_channels'])
out_ch = int(devs[outp]['max_output_channels'])
common = max(1, min(in_ch, out_ch))
if common <= 0:
print(f"[AudioRouter Error] no common channels (in={in_ch}, out={out_ch})", file=sys.stderr)
return
self.stop()
self.running = True
self.thread = threading.Thread(target=self._loop, args=(inp, outp, common), daemon=True)
self.thread.start()
print(f"[AudioRouter] Loopback: {inp}{outp} with {common} channel(s)", file=sys.stderr)
def stop(self):
if self.running:
self.running = False
self.thread.join(timeout=1.0)
print("[AudioRouter] stopped", file=sys.stderr)
# -----------------------------------------------------------------------------
# Simple input recorder → WAV (stream) → MP3 via ffmpeg
# -----------------------------------------------------------------------------
class InputRecorder:
def __init__(self):
self._stream = None
self._writer_thread = None
self._q = queue.Queue(maxsize=64)
self._running = False
self._wav = None
self._wav_path = None
self._mp3_path = None
self._channels = 1
self._rate = 44100
self._start_ts = None
self._input_index = None
def is_recording(self):
return self._running
def _writer_loop(self):
try:
while self._running or not self._q.empty():
try:
chunk = self._q.get(timeout=0.25)
except queue.Empty:
continue
if chunk is None:
break
self._wav.writeframes(chunk)
finally:
try:
self._wav.close()
except Exception:
pass
def start(self, input_index: int):
if self._running:
return True
devs = sd.query_devices()
if input_index is None or input_index < 0 or input_index >= len(devs):
print("[Recorder] invalid input device", file=sys.stderr)
return False
self._input_index = input_index
self._channels = max(1, min(2, int(devs[input_index].get("max_input_channels", 1)) ))
self._rate = int(sd.default.samplerate or 44100)
self._start_ts = time.strftime("%Y-%m-%d_%H-%M-%S")
base = os.path.dirname(__file__)
# temporary WAV, will convert to MP3 on stop
self._wav_path = os.path.join(base, f"{self._start_ts}_record_temp.wav")
self._mp3_path = None
# open WAV sink
self._wav = wave.open(self._wav_path, "wb")
self._wav.setnchannels(self._channels)
self._wav.setsampwidth(2) # int16
self._wav.setframerate(self._rate)
self._running = True
def cb(indata, frames, time_info, status):
if status:
print(f"[Recorder] Status: {status}", file=sys.stderr)
pcm16 = np.clip(indata, -1.0, 1.0)
pcm16 = (pcm16 * 32767.0).astype(np.int16).tobytes()
try:
self._q.put_nowait(pcm16)
except queue.Full:
pass # drop if writer is briefly behind
self._writer_thread = threading.Thread(target=self._writer_loop, daemon=True)
self._writer_thread.start()
self._stream = sd.InputStream(
device=input_index,
channels=self._channels,
samplerate=self._rate,
dtype="float32",
blocksize=sd.default.blocksize or 512,
latency=sd.default.latency or 'high',
callback=cb
)
self._stream.start()
print(f"[Recorder] started (dev #{input_index}, {self._channels}ch @ {self._rate} Hz)", file=sys.stderr)
return True
def stop_and_save(self):
if not self._running:
return None
self._running = False
try:
if self._stream:
self._stream.stop(); self._stream.close()
except Exception:
pass
try:
self._q.put(None)
if self._writer_thread:
self._writer_thread.join(timeout=2.0)
except Exception:
pass
# Transcode to MP3 via ffmpeg (if present), else keep WAV
mp3_name = f"{time.strftime('%Y-%m-%d_%H-%M-%S')}_record.mp3"
base = os.path.dirname(__file__)
mp3_path = os.path.join(base, mp3_name)
ffmpeg = shutil.which("ffmpeg")
if ffmpeg:
cmd = [ffmpeg, "-y", "-i", self._wav_path, "-vn", "-acodec", "libmp3lame", "-b:a", "192k", mp3_path]
try:
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
try:
os.remove(self._wav_path)
except Exception:
pass
self._mp3_path = mp3_path
print(f"[Recorder] saved: {mp3_path}", file=sys.stderr)
return mp3_path
except Exception as e:
print(f"[Recorder] ffmpeg failed ({e}), keeping WAV", file=sys.stderr)
self._mp3_path = None
print(f"[Recorder] WAV saved (no ffmpeg): {self._wav_path}", file=sys.stderr)
return self._wav_path
# -----------------------------------------------------------------------------
# Whisper-live server (daemon)
# -----------------------------------------------------------------------------
def _run_wl_server():
srv = TranscriptionServer()
srv.run("0.0.0.0", 9090, backend="faster_whisper")
def _wait_for_port(host="127.0.0.1", port=9090, timeout=15.0) -> bool:
t0 = time.time(); delay = 0.2
while time.time() - t0 < timeout:
try:
with socket.create_connection((host, port), timeout=0.5):
return True
except OSError:
time.sleep(delay)
delay = min(1.0, delay * 1.5)
return False
# -----------------------------------------------------------------------------
# STT worker: whisper-live client + optional English translation (no NLLB)
# -----------------------------------------------------------------------------
def _stt_worker(input_index, queue_to_main, translate_flag, src_lang_id):
# Force TranscriptionClient to use the chosen input device
_orig_init = TranscriptionClient.__init__
def _patched_init(self, *args, **kwargs):
_orig_init(self, *args, **kwargs)
try:
self.stream.stop_stream(); self.stream.close()
except Exception:
pass
self.stream = self.p.open(
format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
output=False,
frames_per_buffer=self.chunk,
input_device_index=input_index
)
TranscriptionClient.__init__ = _patched_init
wl_lang = None if (src_lang_id == "auto") else LANG_BY_ID.get(src_lang_id, {}).get("whisper")
devs = sd.query_devices()
print(f"[Whisper-live] listening on input #{input_index} ({devs[input_index]['name']})", file=sys.stderr)
last_asr = ""
def _cb_asr(text, segments):
nonlocal last_asr
asr_text = text or ""
if asr_text == last_asr:
return
last_asr = asr_text
queue_to_main.put({"asr": asr_text, "trans": None})
def _cb_eng(text, segments):
trans_text = text or ""
queue_to_main.put({"asr": None, "trans": trans_text})
client_asr = TranscriptionClient(
host="localhost", port=9090,
lang=wl_lang,
translate=False,
model="small",
use_vad=True,
transcription_callback=_cb_asr,
max_connection_time=86400*30
)
client_eng = None
if translate_flag:
client_eng = TranscriptionClient(
host="localhost", port=9090,
lang=wl_lang,
translate=True, # Whisper translate → English
model="small",
use_vad=True,
transcription_callback=_cb_eng,
max_connection_time=86400*30
)
th1 = threading.Thread(target=client_asr, daemon=True)
th1.start()
th2 = None
if client_eng is not None:
th2 = threading.Thread(target=client_eng, daemon=True)
th2.start()
# Notify parent: STT clients launched (server reachable / sockets starting)
try:
queue_to_main.put({"ready": True})
except Exception:
pass
try:
while True:
time.sleep(0.25)
except KeyboardInterrupt:
pass
# -----------------------------------------------------------------------------
# Backend API for the GUI
# -----------------------------------------------------------------------------
class DeviceAPI:
gui_window = None
def cleanup(self):
try:
if hasattr(self, "client_proc") and self.client_proc:
self.client_proc.terminate()
self.client_proc.join(timeout=1.0)
except Exception:
pass
try:
if hasattr(self, "router") and self.router:
self.router.stop()
except Exception:
pass
try:
if hasattr(self, "recorder") and self.recorder and self.recorder.is_recording():
self.recorder.stop_and_save()
except Exception as e:
print(f"[Recorder] save-on-exit error: {e}", file=sys.stderr)
try:
if hasattr(self, "sys_audio") and self.sys_audio:
self.sys_audio.restore()
print("[SystemAudio] restored original default output", file=sys.stderr)
except Exception as e:
print(f"[SystemAudio] restore error: {e}", file=sys.stderr)
def __init__(self):
self.router = AudioRouter()
self.recorder = InputRecorder()
mgr = Manager()
self.queue = mgr.Queue()
self.transcribing = False
self.client_proc = None
self.input_index = None
self.output_index = None
self.translate_enabled = True
self.tgt_lang_id = detect_system_lang_code()
self.src_lang_id = "auto"
self.models_ready = False
self.waiting_first_result = False
cfg = self._load_config()
self.input_index = cfg.get("input_index")
self.output_index = cfg.get("output_index")
self.translate_enabled = cfg.get("translate", True)
self.tgt_lang_id = cfg.get("tgt_lang", self.tgt_lang_id)
self.src_lang_id = cfg.get("src_lang", self.src_lang_id)
self.gain_db = float(cfg.get("gain_db", 0.0))
self.sys_audio = SystemAudioManager()
try:
if isinstance(self.input_index, int):
devs = sd.query_devices()
if 0 <= self.input_index < len(devs):
preferred = devs[self.input_index]["name"]
if self.sys_audio.is_available():
self.sys_audio.maybe_switch_to(preferred)
except Exception as e:
print(f"[SystemAudio] init error: {e}", file=sys.stderr)
if isinstance(self.input_index, int) and isinstance(self.output_index, int) and self.output_index != -1:
try:
self.router.start(self.input_index, self.output_index)
self.router.set_gain_db(self.gain_db)
except Exception as e:
print(f"[AudioRouter init] {e}", file=sys.stderr)
else:
self.router.set_gain_db(self.gain_db)
threading.Thread(target=self._poll, daemon=True).start()
def _poll(self):
while True:
try:
payload = self.queue.get(timeout=0.1)
except Exception:
continue
try:
# Hide spinner as soon as STT clients report readiness
if payload.get("ready"):
self.waiting_first_result = False
self._overlay("hide")
continue
# Back-compat: if we were waiting and first text arrives, also hide
if self.waiting_first_result and (payload.get("asr") or payload.get("trans")):
self.waiting_first_result = False
self._overlay("hide")
js = f"appendTranscripts({json.dumps(payload.get('asr',''))}, {json.dumps(payload.get('trans',''))});"
if DeviceAPI.gui_window:
DeviceAPI.gui_window.evaluate_js(js)
except Exception as e:
print(f"[JS Eval Error] {e}", file=sys.stderr)
def app_ready(self):
self._overlay("progress", "Initializing…", 1)
threading.Thread(target=self._prefetch_models, daemon=True).start()
# Toggle visibility & resize native window
self._apply_layout()
return True
def _overlay(self, action: str, message: Optional[str] = None, progress: Optional[float] = None):
if not DeviceAPI.gui_window:
return
msg_js = json.dumps(message) if message is not None else "null"
prog_js = ("null" if progress is None else str(int(progress)))
js = f"overlayUpdate('{action}', {msg_js}, {prog_js});"
try:
DeviceAPI.gui_window.evaluate_js(js)
except Exception:
pass
def _overlay_progress_bytes(self, label: str, current: int, total: int):
now = time.time()
st = getattr(self, "_dl_state", None)
if not st or st.get("label") != label:
st = self._dl_state = {"label": label, "t0": now, "last_t": now, "last_b": current, "total": total}
dt = max(1e-3, now - st["last_t"]) ; db = max(0, current - st["last_b"]) ; speed = db / dt
st["last_t"], st["last_b"] = now, current
pct = int(100 * current / max(1, total))
avg_speed = max(1e-3, current / max(1e-3, now - st["t0"]))
remaining = max(0, total - current)
eta_s = int(remaining / avg_speed)
def _fmt_bytes(b):
for unit in ("B","KB","MB","GB","TB"):
if b < 1024 or unit=="TB": return f"{b:.1f} {unit}"; b/=1024
def _fmt_time(s):
if s < 60: return f"{s}s"
m, s = divmod(s, 60)
if m < 60: return f"{m}m {s}s"
h, m = divmod(m, 60) ; return f"{h}h {m}m"
msg = f"{label} {pct}% • {_fmt_bytes(current)} / {_fmt_bytes(total)}{(_fmt_bytes(speed)+'/s') if speed else ''} • ETA {_fmt_time(eta_s)}"
self._overlay("progress", msg, pct)
def _make_tqdm_class(self, label: str, total_bytes: int):
outer = self
class OverlayTqdm:
def __init__(self, *args, **kwargs):
self.total = kwargs.get("total") or 0
def update(self, n=1):
st = getattr(outer, "_agg", None)
if st is None or st.get("label") != label:
outer._agg = st = {"label": label, "cur": 0}
st["cur"] += int(n or 0)
outer._overlay_progress_bytes(label, min(st["cur"], total_bytes), total_bytes)
def close(self): pass
def __enter__(self): return self
def __exit__(self, exc_type, exc, tb): pass
return OverlayTqdm
def _download_with_pulse(self, desc: str, start: int, end: int, func):
self._overlay("progress", desc, start)
done = False
err = None
ret = None
def runner():
nonlocal done, err, ret
try:
ret = func()
except Exception as e:
err = e
finally:
done = True
t = threading.Thread(target=runner, daemon=True)
t.start()
val = start
while not done:
val = min(end - 1, val + 1)
self._overlay("progress", desc, val)
time.sleep(0.3)
self._overlay("progress", desc + "", end)
if err:
print(f"[Prefetch] {desc} failed: {err}", file=sys.stderr)
return ret
def _prefetch_models(self):
api = HfApi()
fw_total = 0
try:
info = api.repo_info(repo_id="Systran/faster-whisper-small", files_metadata=True)
if getattr(info, "siblings", None):
fw_total = sum(getattr(s, "size", 0) or 0 for s in info.siblings)
except Exception as e:
print(f"[Prefetch] repo_info FW failed: {e}", file=sys.stderr)
used_byte_progress = False
label = "Downloading Whisper model (small)"
try:
if fw_total > 0:
self._overlay_progress_bytes(label, 0, fw_total)
tqdm_cls = self._make_tqdm_class(label, fw_total)
_fw_dir = snapshot_download(repo_id="Systran/faster-whisper-small", allow_patterns=None, tqdm_class=tqdm_cls)
self._overlay_progress_bytes(label, fw_total, fw_total)
used_byte_progress = True
except TypeError:
pass
except Exception as e:
print(f"[Prefetch] FW tqdm download failed: {e}", file=sys.stderr)
if not used_byte_progress:
def _dl_fw():
return snapshot_download(repo_id="Systran/faster-whisper-small", allow_patterns=None)
_fw_dir = self._download_with_pulse(f"{label}", 10, 95, _dl_fw)
self._overlay("progress", "Starting transcription server…", 95)
threading.Thread(target=_run_wl_server, daemon=True).start()
if _wait_for_port("127.0.0.1", 9090, timeout=20.0):
self._overlay("progress", "Server ready", 100)
else:
self._overlay("progress", "Server pending…", 98)
self.models_ready = True
self._overlay("hide")
# ---- UI API ----
def get_config(self):
return {
"input_index": self.input_index,
"output_index": self.output_index,
"translate": self.translate_enabled,
"translate_lang": self.tgt_lang_id,
"src_lang": self.src_lang_id,
"sys_lang": detect_system_lang_code(),
"lang_choices": [(x["id"], x["label"]) for x in LANG_TABLE],
"src_choices": LANG_CHOICES_SRC,
"is_recording": self.recorder.is_recording(),
"gain_db": self.gain_db,
}
def get_input_devices(self):
devs = sd.query_devices()
seen, out = set(), []
for i, d in enumerate(devs):
if d.get("max_input_channels", 0) > 0 and d["name"] not in seen:
seen.add(d["name"]) ; out.append({"name": d["name"], "index": i})
return out
def get_output_devices(self):
devs = sd.query_devices()
seen, out = set(), []
for i, d in enumerate(devs):
if d.get("max_output_channels", 0) > 0 and d["name"] not in seen:
seen.add(d["name"]) ; out.append({"name": d["name"], "index": i})
out.insert(0, {"name": "No output", "index": -1})
return out
def set_devices(self, inp, outp):
inp, outp = int(inp), int(outp)
if self.recorder.is_recording():
self.recorder.stop_and_save()
self.input_index, self.output_index = inp, outp
if outp != -1:
self.router.start(inp, outp)
self.router.set_gain_db(self.gain_db)
else:
self.router.stop()
self._persist()
if self.transcribing and self.client_proc:
self._overlay("show", "Restarting…", None)
self.waiting_first_result = True
self.client_proc.terminate(); self.client_proc.join(timeout=1.0)
time.sleep(0.3)
self._start_stt()
return True
def set_translate(self, v):
self.translate_enabled = bool(v)
self._persist()
if self.transcribing:
self._restart_stt()
self._apply_layout() # toggle visibility + resize
return True
def set_translate_lang(self, code):
self.tgt_lang_id = str(code or detect_system_lang_code())
self._persist()
if self.transcribing:
self._restart_stt()
return True
def set_src_lang(self, code):
self.src_lang_id = str(code or "auto")
self._persist()
if self.transcribing:
self._restart_stt()
return True
def set_gain_db(self, db):
try:
self.gain_db = float(db)
except Exception:
self.gain_db = 0.0
self.router.set_gain_db(self.gain_db)
self._persist()
return True
def toggle_transcription(self):
if not self.transcribing:
if self.input_index is None:
return False
self._overlay("show", "Starting…", None)
self.waiting_first_result = True
self._start_stt()
self.transcribing = True
else:
if self.client_proc:
self.client_proc.terminate(); self.client_proc.join(timeout=1.0)
self._overlay("hide")
self.transcribing = False
self._apply_layout() # toggle visibility + resize
return self.transcribing
# Recording control
def toggle_recording(self):
if not self.recorder.is_recording():
if self.input_index is None:
return False
ok = self.recorder.start(self.input_index)
return bool(ok)
else:
self.recorder.stop_and_save()
return False # now "not recording"
def _find_matching_input_for_output(self) -> Optional[int]:
"""
Versucht, für das aktuell gewählte Output-Device (self.output_index)
ein gleichnamiges Input-Device zu finden (typisch bei Loopback-Treibern).
Gibt den Input-Device-Index zurück oder None.
"""
try:
if self.output_index is None or self.output_index < 0:
return None
devs = sd.query_devices()
out_name = devs[self.output_index]["name"]
# 1) exakter Name
for i, d in enumerate(devs):
if d.get("max_input_channels", 0) > 0 and d["name"] == out_name:
return i
low = out_name.lower()
# 2) case-insensitive exakter Name
for i, d in enumerate(devs):
if d.get("max_input_channels", 0) > 0 and d["name"].lower() == low:
return i
# 3) Prefix-Match (robuster für unterschiedliche Bezeichnungen)
for i, d in enumerate(devs):
if d.get("max_input_channels", 0) > 0 and d["name"].lower().startswith(low):
return i
return None
except Exception:
return None
def _start_stt(self):
"""
Startet den STT-Client-Prozess.
Neu: bevorzugt den POST-GAIN Loopback als STT-Quelle, wenn ein passendes
Input-Device zum aktuell gewählten Output-Device existiert. Fallback: raw input.
"""
# Standard: rohes Eingabegerät
capture_index = self.input_index
# Versuch: passendes Loopback-Input zu aktuellem Output finden
loop_idx = self._find_matching_input_for_output()
if loop_idx is not None:
capture_index = loop_idx
try:
devs = sd.query_devices()
print(f"[ASR] capturing POST-GAIN from loopback input #{loop_idx} ({devs[loop_idx]['name']})",
file=sys.stderr)
except Exception:
print(f"[ASR] capturing POST-GAIN from loopback input #{loop_idx}", file=sys.stderr)
else:
try:
devs = sd.query_devices()
print(f"[ASR] capturing RAW from input #{self.input_index} ({devs[self.input_index]['name']})",
file=sys.stderr)
except Exception:
print(f"[ASR] capturing RAW from input #{self.input_index}", file=sys.stderr)
self.client_proc = Process(
target=_stt_worker,
args=(capture_index, self.queue, self.translate_enabled, self.src_lang_id),
daemon=True
)
self.client_proc.start()
def _restart_stt(self):
try:
self._overlay("show", "Restarting…", None)
self.waiting_first_result = True
if self.client_proc:
self.client_proc.terminate(); self.client_proc.join(timeout=1.0)
time.sleep(0.2)
self._start_stt()
except Exception:
pass
def _persist(self):
data = {
"input_index": self.input_index,
"output_index": self.output_index,
"translate": self.translate_enabled,
"tgt_lang": self.tgt_lang_id,
"src_lang": self.src_lang_id,
"gain_db": self.gain_db,
}
try:
with open(CONFIG_PATH, "w") as f:
json.dump(data, f, indent=2)
except Exception as e:
print(f"[Config Error] {e}", file=sys.stderr)
def _load_config(self):
if os.path.isfile(CONFIG_PATH):
try:
return json.load(open(CONFIG_PATH, "r"))
except Exception:
pass
return {}
# ---- layout helper: toggle visibility AND resize the native window ----
def _apply_layout(self):
show_orig = self.transcribing
show_trans = self.transcribing and self.translate_enabled
try:
if DeviceAPI.gui_window:
# Toggle visibility in the DOM
js = f"updateLayout({str(show_orig).lower()}, {str(show_trans).lower()});"
DeviceAPI.gui_window.evaluate_js(js)
# Resize native window (no manual resize; no page scrollbar)
# Tuned heights for this layout:
# - compact (controls only) : ~333
# - one pane (original) : ~600
# - two panes (orig+trans) : ~860
height = 865 if show_trans else (595 if show_orig else 333)
DeviceAPI.gui_window.resize(730, height)
except Exception as e:
print(f"[Layout] update failed: {e}", file=sys.stderr)
# -----------------------------------------------------------------------------
# HTML UI (English, compact device rows, top-aligned buttons, fixed button width)
# -----------------------------------------------------------------------------
HTML = """
<!DOCTYPE html>
<html lang="en"><head><meta charset="UTF-8">
<title>MurMur - Audio Bridge / Transcribe / Translate</title>
<style>
:root{
--bg:#f7f7fb; --card:#ffffff; --ink:#1c1d22; --muted:#71727a; --line:#e6e7ef;
--accent:#3a7afe; --accent-ink:#0e2a6b;
--radius:12px; --shadow:0 6px 24px rgba(0,0,0,.08);
}
html, body { height:100%; overflow:hidden; } /* no page scrollbar */
body{
margin:0; background:var(--bg); color:var(--ink);
font-family: system-ui, -apple-system, Segoe UI, Roboto, "Helvetica Neue", Arial, "Noto Sans", "Apple Color Emoji", "Segoe UI Emoji";
}
.wrap{ max-width:770px; margin:0 auto; padding:24px; }
.hstack{ display:flex; gap:12px; align-items:center; flex-wrap:wrap; }
.grid-2{ display:grid; grid-template-columns:1fr 1fr; gap:12px; }
.card{ background:var(--card); border:1px solid var(--line); border-radius:var(--radius); box-shadow:var(--shadow); }
.card.pad{ padding:16px; }
.title{ font-weight:600; font-size:14px; margin-bottom:8px; color:var(--muted); }
/* Device dropdowns (more compact) */
.dropdown{ position:relative; user-select:none; }
.dropdown .label{
padding:.45em .6em; border:1px solid var(--line); background:#fff;
border-radius:10px; cursor:pointer; min-height:34px; /* was 40px */
display:flex; align-items:center;
}
.dropdown .list{
position:absolute; top:100%; left:0; right:0; max-height:220px; overflow:auto;
border:1px solid var(--line); background:#fff; z-index:10; display:none; border-radius:10px; margin-top:6px;
box-shadow:var(--shadow);
}
.dropdown-item{ padding:.5em .75em; cursor:pointer; }
.dropdown-item:hover{ background:#f1f3f9; }
/* Controls toolbar */
.toolbar{ display:grid; grid-template-columns:1fr auto; gap:12px; align-items:flex-start; } /* top-align both sides */
.leftControls{ display:flex; gap:14px; align-items:center; flex-wrap:wrap; }
select, button, input[type="range"]{ font: inherit; }
select{
padding:.45em .6em; border:1px solid var(--line); border-radius:10px; background:#fff; min-height:34px;
}
.gain-wrap{ display:flex; align-items:center; gap:8px; }
#gainSlider{ width:200px; }
/* Action buttons */
.actions{ align-self:flex-start; } /* ensures top edge lines up with the select */
.primary{
background:linear-gradient(180deg, #3a7afe, #2f6de9);
color:white; border:none; border-radius:10px; padding:.45em .8em; min-height:36px;
box-shadow:0 2px 10px rgba(58,122,254,.2); cursor:pointer; font-weight:600;
min-width:170px; /* prevents layout shift when text changes */
}
.primary:active{ transform:translateY(1px); }
.record-btn{
width:36px; height:36px; display:inline-flex; align-items:center; justify-content:center;
border:1px solid var(--line); border-radius:10px; background:#fff; cursor:pointer;
}
.record-btn .rec-dot{ width:14px; height:14px; border-radius:50%; background:#b9bdc7; display:block; }
.record-btn.on .rec-dot{ background:#e53935; box-shadow:0 0 0 4px rgba(229,57,53,.15); }
/* Transcript areas */
.stack{ display:grid; gap:12px; }
.transcript{
height:170px; overflow:auto; padding:12px 14px; border:1px solid var(--line); border-radius:10px;
background:#fff; line-height:1.4; white-space:pre-wrap; word-break:break-word;
}
.fade-in{opacity:0; animation:fadeIn .35s ease-out forwards}
@keyframes fadeIn{to{opacity:1}}
.ghost{visibility:hidden; user-select:none}
.hint{ color:var(--muted); font-size:.88em; }
/* Overlay */
.overlay{position:fixed; inset:0; background:rgba(20,22,30,.35); display:none; align-items:center; justify-content:center; z-index:9999}
.overlay.show{display:flex}
.panel{background:#fff; padding:16px 18px; border-radius:12px; box-shadow:var(--shadow); min-width:280px; max-width:420px; text-align:center}
.spinner{width:32px;height:32px;border:3px solid #e5e7f1;border-top-color:#59627a;border-radius:50%;margin:0 auto 10px auto;animation:spin .9s linear infinite}
@keyframes spin{to{transform:rotate(360deg)}}
.msg{font-size:14px;color:#333;margin:0 0 8px 0}
.bar{width:100%}
</style>
</head><body>
<div class="wrap">
<!-- Devices -->
<div class="card pad">
<div class="title">Devices</div>
<div class="grid-2">
<div>
<div class="hint">Input device</div>
<div class="dropdown">
<div class="label" id="inLbl" onclick="toggleList('in', event)">— loading… —</div>
<div class="list" id="inList"></div>
</div>
</div>
<div>
<div class="hint">Output device</div>
<div class="dropdown">
<div class="label" id="outLbl" onclick="toggleList('out', event)">— loading… —</div>
<div class="list" id="outList"></div>
</div>
</div>
</div>
</div>
<!-- Controls -->
<div class="card pad" style="margin-top:12px;">
<div class="title">Controls</div>
<div class="toolbar">
<div class="leftControls">
<label class="hstack" style="gap:8px;">
<span class="hint">Source language</span>
<select id="srcSelect" onchange="chgSrc()"></select>
</label>
<div class="gain-wrap" title="Virtual amplifier for the loopback (before the output sink)">
<span class="hint">Gain</span>
<input id="gainSlider" type="range" min="-12" max="24" step="1" value="0" oninput="chgGain(this.value)">
<span id="gainVal" class="hint">0 dB</span>
</div>
<label class="hstack" style="gap:8px; margin-left:8px;">
<input type="checkbox" id="txBox" onchange="chgTx()">
<span class="hint">Translate</span>
</label>
</div>
<div class="hstack actions" style="margin-left:auto;">
<button id="trBtn" class="primary" onclick="tog()">Transcribe</button>
<button id="recBtn" class="record-btn" title="Start/stop recording" onclick="togRec()">
<span class="rec-dot"></span>
</button>
</div>
</div>
</div>
<!-- Transcript panes (visibility toggled by Python via updateLayout) -->
<div class="stack" style="margin-top:12px;">
<div id="orig_card" class="card pad" style="display:none;">
<div class="hstack" style="justify-content:space-between;">
<div class="title">Live transcript</div>
</div>
<div>
<div id="transcript_orig" class="transcript" aria-live="polite" contenteditable="false"></div>
</div>
</div>
<div id="trans_card" class="card pad" style="display:none;">
<div class="hstack" style="justify-content:space-between;">
<div class="title">Live translation</div>
</div>
<div>
<div id="transcript_trans" class="transcript" aria-live="polite"></div>
</div>
</div>
</div>
<!-- Overlay -->
<div id="overlay" class="overlay">
<div class="panel">
<div class="spinner"></div>
<div id="overlayMsg" class="msg">Loading…</div>
<progress id="overlayProg" class="bar" value="0" max="100" style="display:none"></progress>
</div>
</div>
<script>
function overlayUpdate(action, msg, prog){
const ov=document.getElementById('overlay');
const m=document.getElementById('overlayMsg');
const p=document.getElementById('overlayProg');
if(typeof msg==='string' && msg.length) m.textContent=msg;
if(action==='show'){
p.style.display='none'; ov.classList.add('show');
} else if(action==='progress'){
ov.classList.add('show'); p.style.display='block';
if(typeof prog==='number'){ p.value=Math.max(0,Math.min(100,prog)); }
} else if(action==='hide'){
ov.classList.remove('show'); p.style.display='none';
}
}
// show/hide transcript sections from Python (kept)
function updateLayout(showOrig, showTrans){
const oCard = document.getElementById('orig_card');
const tCard = document.getElementById('trans_card');
oCard.style.display = showOrig ? '' : 'none';
tCard.style.display = showTrans ? '' : 'none';
}
let cfg={}, inCache=[], outCache=[];
let prevOrig = '', prevTrans = '';
let userEditing = false, userEditHoldTimer = null;
function sanitizeTranscriptBox(el){
el.querySelectorAll('.ghost,[data-ghost],[aria-hidden="true"],[hidden]').forEach(n => n.remove());
el.querySelectorAll('[style]').forEach(n => {
const s = (n.getAttribute('style') || '').toLowerCase();
if (s.includes('display:none') || s.includes('visibility:hidden')) n.remove();
});
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, null);
const toUpdate = [];
while (walker.nextNode()){
const node = walker.currentNode;
if (node.nodeValue){
const nv = node.nodeValue.replace(/[\\u200B-\\u200D\\u2060]/g, '');
if (nv !== node.nodeValue) toUpdate.push([node, nv]);
}
}
toUpdate.forEach(([n,v]) => n.nodeValue = v);
const walker2 = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, null);
while (walker2.nextNode()){
const n = walker2.currentNode;
if (n.nodeValue && n.nodeValue.trim().length){
n.nodeValue = n.nodeValue.replace(/^\\s+/, '');
break;
}
}
}
function applySmartUpdate(el, prev, now){
now = (now || '').replace(/^\\s+/, '');
if (prev === now) return now;
let i=0, lim=Math.min(prev.length, now.length);
while(i<lim && prev.charCodeAt(i)===now.charCodeAt(i)) i++;
function overlapSuffixPrefix(a,b){
const m=Math.min(256, Math.min(a.length,b.length));
for(let k=m;k>0;k--){ if(a.slice(a.length-k)===b.slice(0,k)) return k; }
return 0;
}
el.innerHTML='';
if(i>0){
el.appendChild(document.createTextNode(now.slice(0,i)));
const s=document.createElement('span'); s.className='fade-in'; s.textContent=now.slice(i);
el.appendChild(s);
} else {
const k=overlapSuffixPrefix(prev, now);
const removed=Math.max(0, prev.length-k);
if(removed){
const g=document.createElement('span');
g.className='ghost'; g.setAttribute('data-ghost','1'); g.setAttribute('aria-hidden','true');
g.textContent=prev.slice(0,removed);
el.appendChild(g);
}
const s=document.createElement('span'); s.className='fade-in'; s.textContent=now; el.appendChild(s);
}
sanitizeTranscriptBox(el);
el.scrollTop = el.scrollHeight;
return now;
}
document.addEventListener('click', ()=>closeAll());
document.addEventListener('keydown', e=>{ if(e.key==='Escape') closeAll(); });
function closeAll(){ document.querySelectorAll('.dropdown .list').forEach(l=>l.style.display='none'); }
async function toggleList(which, ev){
ev.stopPropagation(); const list=document.getElementById(which+'List');
if(list.style.display==='block'){ list.style.display='none'; return; }
closeAll(); list.style.display='block';
const devs = which==='in' ? await pywebview.api.get_input_devices() : await pywebview.api.get_output_devices();
if(which==='in') inCache=devs; else outCache=devs;
list.innerHTML='';
devs.forEach(d=>{
const it=document.createElement('div'); it.className='dropdown-item'; it.textContent=`${d.name} [${d.index}]`;
it.onclick=()=>{ if(which==='in'){ cfg.input_index=d.index; document.getElementById('inLbl').textContent=d.name; } else { cfg.output_index=d.index; document.getElementById('outLbl').textContent=d.name; }
closeAll(); if(cfg.input_index!=null && cfg.output_index!=null){ pywebview.api.set_devices(cfg.input_index,cfg.output_index); } };
list.appendChild(it);
});
}
function tog(){
pywebview.api.toggle_transcription().then(a=>{
document.getElementById('trBtn').textContent = a ? 'Stop transcribing' : 'Transcribe';
if(!a){
prevOrig=''; prevTrans='';
document.getElementById('transcript_orig').textContent='';
document.getElementById('transcript_trans').textContent='';
}
});
}
function chgTx(){ pywebview.api.set_translate(document.getElementById('txBox').checked); }
function chgSrc(){
const code=document.getElementById('srcSelect').value; pywebview.api.set_src_lang(code);
}
function chgGain(val){
const db = parseFloat(val)||0;
const label = (db>0? '+'+db: db) + ' dB';
document.getElementById('gainVal').textContent = label;
pywebview.api.set_gain_db(db);
}
function togRec(){
pywebview.api.toggle_recording().then(isRecording=>{
const btn = document.getElementById('recBtn');
if(isRecording){ btn.classList.add('on'); } else { btn.classList.remove('on'); }
});
}
function appendTranscripts(asr, trans){
if(typeof asr==='string'){
const el=document.getElementById('transcript_orig');
if(!userEditing){ prevOrig=applySmartUpdate(el, prevOrig, asr); }
}
if(typeof trans==='string'){
const el2=document.getElementById('transcript_trans');
prevTrans=applySmartUpdate(el2, prevTrans, trans);
}
}
document.addEventListener('DOMContentLoaded', ()=>{
const el = document.getElementById('transcript_orig');
el.addEventListener('input', ()=>{
userEditing = true;
clearTimeout(userEditHoldTimer);
userEditHoldTimer = setTimeout(()=>{ userEditing = false; }, 3000);
});
el.addEventListener('keydown', (e)=>{
if ((e.ctrlKey || e.metaKey) && e.key === 'Enter'){
e.preventDefault();
userEditing = true;
clearTimeout(userEditHoldTimer);
userEditHoldTimer = setTimeout(()=>{ userEditing = false; }, 1500);
}
});
});
function buildMenu(selectId, sysCode, choices, current){
const sel=document.getElementById(selectId); sel.innerHTML=''; const seen=new Set();
function add(code,label){ if(seen.has(code)) return; const o=document.createElement('option'); o.value=code; o.textContent=`${label} (${code})`; sel.appendChild(o); seen.add(code); }
const sys = choices.find(([c,_])=>c===sysCode);
if(sys) add(sys[0], sys[1]);
choices.forEach(([c,l])=>{ if(!seen.has(c)) add(c,l); });
sel.value = current || sysCode || (choices[0]&&choices[0][0]);
}
window.addEventListener('pywebviewready', async () => {
overlayUpdate('show', 'Initializing…', null);
cfg = await pywebview.api.get_config();
const inDevs = await pywebview.api.get_input_devices();
const outDevs = await pywebview.api.get_output_devices();
const inSel = inDevs.find(d=>d.index===cfg.input_index);
const outSel = outDevs.find(d=>d.index===cfg.output_index);
document.getElementById('inLbl').textContent = inSel? inSel.name : '— select —';
document.getElementById('outLbl').textContent = outSel? outSel.name : '— select —';
document.getElementById('txBox').checked = !!cfg.translate;
// Build source menu
buildMenu('srcSelect', 'auto', cfg.src_choices, cfg.src_lang);
await pywebview.api.app_ready();
// Init record state
const recBtn = document.getElementById('recBtn');
if (cfg.is_recording) recBtn.classList.add('on'); else recBtn.classList.remove('on');
// Init gain UI
const g = (typeof cfg.gain_db === 'number') ? cfg.gain_db : 0;
const slider = document.getElementById('gainSlider');
const label = document.getElementById('gainVal');
slider.value = g;
label.textContent = (g>0? '+'+g: g) + ' dB';
});
</script>
</body></html>
"""
# -----------------------------------------------------------------------------
# App bootstrap
# -----------------------------------------------------------------------------
def start_gui():
api = DeviceAPI()
w = webview.create_window(
"MurMur - Audio Bridge / Transcribe / Translate",
html=HTML,
js_api=api,
width=730, height=333, # will be resized programmatically; no manual resize
resizable=False
)
DeviceAPI.gui_window = w
try:
w.events.closed += api.cleanup
except Exception:
pass
atexit.register(api.cleanup)
webview.start()
if __name__ == "__main__":
start_gui()