From 55974b90fd8136e96005a738444fbde40ad813cd Mon Sep 17 00:00:00 2001
From: Victor Giers <mail@victorgiers.com>
Date: Fri, 20 Mar 2026 15:43:00 +0100
Subject: [PATCH] Add startup model preparation endpoint and update Whisper
 handling

---
 backend/main.py               | 12 +++++++++++-
 backend/rag/corpus_builder.py | 11 +++++------
 src/App.jsx                   | 35 ++++++++++++++++++++---------------
 3 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/backend/main.py b/backend/main.py
index 35e4e98..bc1f657 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -9,7 +9,7 @@ import json
 from . import models, schemas
 from .database import Base, engine, SessionLocal, ensure_sources_column
 from .local_rag import router as local_rag_router
-from .ollama_admin import inspect_ollama_startup, pull_local_model, start_local_ollama
+from .ollama_admin import inspect_ollama_startup, prepare_startup_models, pull_local_model, start_local_ollama
 from .ollama_client import list_models as ollama_list, chat as ollama_chat, chat_stream as ollama_chat_stream
 from .websearch import enrich_prompt
 
@@ -73,6 +73,16 @@ async def ollama_pull_route(req: schemas.OllamaPullRequest):
     except RuntimeError as exc:
         raise HTTPException(status_code=400, detail=str(exc)) from exc
 
+
+@app.post("/startup/prepare-models")
+async def startup_prepare_models_route():
+    try:
+        return await prepare_startup_models()
+    except FileNotFoundError as exc:
+        raise HTTPException(status_code=404, detail=str(exc)) from exc
+    except RuntimeError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
 @app.get("/sessions", response_model=schemas.SessionsResponse)
 def get_sessions(db: Session = Depends(get_db)):
     sessions = db.query(models.ChatSession).order_by(models.ChatSession.created_at.desc()).all()
diff --git a/backend/rag/corpus_builder.py b/backend/rag/corpus_builder.py
index 57f90fb..4e5f6d1 100644
--- a/backend/rag/corpus_builder.py
+++ b/backend/rag/corpus_builder.py
@@ -69,6 +69,8 @@ import faulthandler, signal
 import multiprocessing as mp_context
 import time
 
+from backend.whisper_admin import ensure_whisper_model_downloaded, whisper_runtime_error as whisper_admin_runtime_error
+
 os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
 os.environ.setdefault("OBJC_DISABLE_INITIALIZE_FORK_SAFETY", "YES")
 
@@ -1431,12 +1433,7 @@ def _load_torch_module():
         return None
 
 def whisper_runtime_error() -> Optional[str]:
-    if importlib.util.find_spec("whisper") is None:
-        return (
-            "Audio/video transcription requires the optional 'openai-whisper' package. "
-            "Install it in backend/.venv, for example: pip install -U openai-whisper"
-        )
-    return None
+    return whisper_admin_runtime_error()
 
 def _resolve_whisper_device(flag: str) -> Optional[str]:
     if flag and flag != "auto":
@@ -1495,6 +1492,8 @@ def process_media(path: Path, args) -> List[Record]:
     if whisper_error:
         raise RuntimeError(whisper_error)
 
+    ensure_whisper_model_downloaded(args.whisper_model)
+
     ffmpeg_bin = resolve_binary_path(args.ffmpeg, "HEIMGEIST_FFMPEG_PATH", "ffmpeg", "/usr/bin/ffmpeg")
     ffprobe_bin = resolve_binary_path(args.ffprobe, "HEIMGEIST_FFPROBE_PATH", "ffprobe", "/usr/bin/ffprobe")
     missing_tools = []
diff --git a/src/App.jsx b/src/App.jsx
index db93809..8ca2ecd 100644
--- a/src/App.jsx
+++ b/src/App.jsx
@@ -282,6 +282,11 @@ export default function App() {
     return expectBackendJson(response)
   }
 
+  async function prepareStartupModels() {
+    const response = await fetch(`${backendApiUrl}/startup/prepare-models`, { method: 'POST' })
+    return expectBackendJson(response)
+  }
+
   async function fetchLocalLibraryContext(slug, prompt, signal) {
     if (!slug) return { contextBlock: null, sources: [] }
 
@@ -781,23 +786,23 @@ async function regenerateFromIndex(index, overrideUserText = null) {
           }
         }
 
-        if (status?.ollama_running && !status?.embedding_model_available && status?.can_manage_locally) {
-          const confirmed = window.confirm(
-            `The selected embedding model "${status.selected_embed_model}" is not installed in Ollama. Pull it now?`
-          )
-          if (cancelled) return
-          if (confirmed) {
-            actionStarted = true
-            setStartupTaskBusy(true)
+        const needsWhisper = !status?.whisper_model_available
+        const needsEmbedding = Boolean(status?.ollama_running && status?.can_manage_locally && !status?.embedding_model_available)
+
+        if (needsWhisper || needsEmbedding) {
+          actionStarted = true
+          setStartupTaskBusy(true)
+          if (needsWhisper && needsEmbedding) {
+            setStartupTaskMessage(
+              `Downloading Whisper ${status?.whisper_model || 'base'} and ${status.selected_embed_model}. This can take a while on first install.`
+            )
+          } else if (needsWhisper) {
+            setStartupTaskMessage(`Downloading Whisper ${status?.whisper_model || 'base'}. This can take a while on first install.`)
+          } else {
             setStartupTaskMessage(`Downloading ${status.selected_embed_model} from Ollama. This can take a while on first install.`)
-            const response = await fetch(`${backendApiUrl}/ollama/pull`, {
-              method: 'POST',
-              headers: { 'Content-Type': 'application/json' },
-              body: JSON.stringify({ model: status.selected_embed_model })
-            })
-            await expectBackendJson(response)
-            if (cancelled) return
           }
+          await prepareStartupModels()
+          if (cancelled) return
         }
       } catch (error) {
         if (!cancelled) {