Add startup model preparation endpoint and update Whisper handling

This commit is contained in:
2026-03-20 15:43:00 +01:00
parent 13f2fb9306
commit 55974b90fd
3 changed files with 36 additions and 22 deletions

View File

@@ -9,7 +9,7 @@ import json
from . import models, schemas
from .database import Base, engine, SessionLocal, ensure_sources_column
from .local_rag import router as local_rag_router
from .ollama_admin import inspect_ollama_startup, pull_local_model, start_local_ollama
from .ollama_admin import inspect_ollama_startup, prepare_startup_models, pull_local_model, start_local_ollama
from .ollama_client import list_models as ollama_list, chat as ollama_chat, chat_stream as ollama_chat_stream
from .websearch import enrich_prompt
@@ -73,6 +73,16 @@ async def ollama_pull_route(req: schemas.OllamaPullRequest):
except RuntimeError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
@app.post("/startup/prepare-models")
async def startup_prepare_models_route():
try:
return await prepare_startup_models()
except FileNotFoundError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
except RuntimeError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
@app.get("/sessions", response_model=schemas.SessionsResponse)
def get_sessions(db: Session = Depends(get_db)):
sessions = db.query(models.ChatSession).order_by(models.ChatSession.created_at.desc()).all()

View File

@@ -69,6 +69,8 @@ import faulthandler, signal
import multiprocessing as mp_context
import time
from backend.whisper_admin import ensure_whisper_model_downloaded, whisper_runtime_error as whisper_admin_runtime_error
os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
os.environ.setdefault("OBJC_DISABLE_INITIALIZE_FORK_SAFETY", "YES")
@@ -1431,12 +1433,7 @@ def _load_torch_module():
return None
def whisper_runtime_error() -> Optional[str]:
if importlib.util.find_spec("whisper") is None:
return (
"Audio/video transcription requires the optional 'openai-whisper' package. "
"Install it in backend/.venv, for example: pip install -U openai-whisper"
)
return None
return whisper_admin_runtime_error()
def _resolve_whisper_device(flag: str) -> Optional[str]:
if flag and flag != "auto":
@@ -1495,6 +1492,8 @@ def process_media(path: Path, args) -> List[Record]:
if whisper_error:
raise RuntimeError(whisper_error)
ensure_whisper_model_downloaded(args.whisper_model)
ffmpeg_bin = resolve_binary_path(args.ffmpeg, "HEIMGEIST_FFMPEG_PATH", "ffmpeg", "/usr/bin/ffmpeg")
ffprobe_bin = resolve_binary_path(args.ffprobe, "HEIMGEIST_FFPROBE_PATH", "ffprobe", "/usr/bin/ffprobe")
missing_tools = []

View File

@@ -282,6 +282,11 @@ export default function App() {
return expectBackendJson(response)
}
async function prepareStartupModels() {
const response = await fetch(`${backendApiUrl}/startup/prepare-models`, { method: 'POST' })
return expectBackendJson(response)
}
async function fetchLocalLibraryContext(slug, prompt, signal) {
if (!slug) return { contextBlock: null, sources: [] }
@@ -781,23 +786,23 @@ async function regenerateFromIndex(index, overrideUserText = null) {
}
}
if (status?.ollama_running && !status?.embedding_model_available && status?.can_manage_locally) {
const confirmed = window.confirm(
`The selected embedding model "${status.selected_embed_model}" is not installed in Ollama. Pull it now?`
)
if (cancelled) return
if (confirmed) {
actionStarted = true
setStartupTaskBusy(true)
const needsWhisper = !status?.whisper_model_available
const needsEmbedding = Boolean(status?.ollama_running && status?.can_manage_locally && !status?.embedding_model_available)
if (needsWhisper || needsEmbedding) {
actionStarted = true
setStartupTaskBusy(true)
if (needsWhisper && needsEmbedding) {
setStartupTaskMessage(
`Downloading Whisper ${status?.whisper_model || 'base'} and ${status.selected_embed_model}. This can take a while on first install.`
)
} else if (needsWhisper) {
setStartupTaskMessage(`Downloading Whisper ${status?.whisper_model || 'base'}. This can take a while on first install.`)
} else {
setStartupTaskMessage(`Downloading ${status.selected_embed_model} from Ollama. This can take a while on first install.`)
const response = await fetch(`${backendApiUrl}/ollama/pull`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: status.selected_embed_model })
})
await expectBackendJson(response)
if (cancelled) return
}
await prepareStartupModels()
if (cancelled) return
}
} catch (error) {
if (!cancelled) {