Add audio transcription route to backend/main.py

2026-04-16 22:07:45 +02:00
parent 7225980fb9
commit a0c2b28591
1 changed files with 34 additions and 0 deletions
--- a/backend/main.py
+++ b/backend/main.py
@@ -137,6 +137,40 @@ def get_db():
 def health():
    return {"ok": True}

+
+@app.post("/audio/transcribe", response_model=schemas.AudioTranscriptionResponse)
+async def transcribe_audio_route(req: schemas.AudioTranscriptionRequest):
+    mime_type = str(req.mime_type or "").split(";", 1)[0].strip().lower()
+    if not mime_type.startswith("audio/"):
+        raise HTTPException(status_code=400, detail="An audio mime type is required.")
+
+    payload = re.sub(r"\s+", "", str(req.audio_base64 or ""))
+    if not payload:
+        raise HTTPException(status_code=400, detail="Audio payload is required.")
+
+    try:
+        audio_bytes = base64.b64decode(payload, validate=True)
+    except Exception as exc:
+        raise HTTPException(status_code=400, detail="Invalid base64 audio payload.") from exc
+
+    try:
+        result = await asyncio.to_thread(
+            transcribe_audio_bytes,
+            audio_bytes,
+            mime_type,
+            req.model or DEFAULT_WHISPER_MODEL,
+        )
+    except RuntimeError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=f"Audio transcription failed: {exc}") from exc
+
+    return {
+        "text": str(result.get("text") or "").strip(),
+        "language": str(result.get("language") or "").strip() or None,
+        "model": str(result.get("model") or req.model or DEFAULT_WHISPER_MODEL),
+    }
+
@app.get("/models")
 async def get_models():
    try: