Files
Heimgeist/backend/ollama_admin.py

141 lines
4.4 KiB
Python

from __future__ import annotations
import asyncio
import os
import shutil
import subprocess
from typing import Any, Dict, List, Optional
from urllib.parse import urlparse
import httpx
from .app_settings import get_embed_model_preference, get_ollama_api_url, normalize_embed_model
LOCAL_OLLAMA_HOSTS = {"127.0.0.1", "localhost", "::1"}
def _ollama_binary() -> Optional[str]:
return shutil.which("ollama")
def _is_local_ollama_url(url: str) -> bool:
try:
parsed = urlparse(url)
except Exception:
return False
return (parsed.hostname or "").strip().lower() in LOCAL_OLLAMA_HOSTS
def _model_aliases(model: str) -> set[str]:
normalized = normalize_embed_model(model)
aliases = {normalized}
if normalized.endswith(":latest"):
aliases.add(normalized[:-7])
else:
aliases.add(f"{normalized}:latest")
return aliases
async def _list_model_names(ollama_url: str, *, timeout: float = 5.0) -> List[str]:
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.get(f"{ollama_url.rstrip('/')}/api/tags")
response.raise_for_status()
payload = response.json()
out: List[str] = []
for item in payload.get("models", []) or []:
name = str((item or {}).get("name") or "").strip()
if name:
out.append(name)
return out
async def inspect_ollama_startup() -> Dict[str, Any]:
ollama_url = get_ollama_api_url()
embed_model = get_embed_model_preference()
ollama_bin = _ollama_binary()
is_local = _is_local_ollama_url(ollama_url)
available_models: List[str] = []
error = ""
running = False
try:
available_models = await _list_model_names(ollama_url)
running = True
except Exception as exc:
error = str(exc)
available = bool(set(available_models) & _model_aliases(embed_model))
return {
"ollama_url": ollama_url,
"ollama_running": running,
"ollama_binary_found": bool(ollama_bin),
"can_manage_locally": bool(ollama_bin) and is_local,
"selected_embed_model": embed_model,
"embedding_model_available": available,
"available_models": available_models,
"error": error,
}
async def start_local_ollama() -> Dict[str, Any]:
status = await inspect_ollama_startup()
if status["ollama_running"]:
return status
if not status["can_manage_locally"]:
raise RuntimeError("Ollama can only be started automatically when the configured Ollama URL points to this machine.")
ollama_bin = _ollama_binary()
if not ollama_bin:
raise FileNotFoundError("Could not find the 'ollama' executable in PATH.")
subprocess.Popen(
[ollama_bin, "serve"],
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True,
env=os.environ.copy(),
)
for _ in range(20):
await asyncio.sleep(0.5)
status = await inspect_ollama_startup()
if status["ollama_running"]:
return status
raise RuntimeError("Started 'ollama serve', but Ollama did not become reachable in time.")
async def pull_local_model(model: Optional[str] = None) -> Dict[str, Any]:
status = await inspect_ollama_startup()
if not status["can_manage_locally"]:
raise RuntimeError("Heimgeist can only pull models automatically when the configured Ollama URL points to this machine.")
if not status["ollama_running"]:
raise RuntimeError("Ollama must be running before Heimgeist can pull a model.")
ollama_bin = _ollama_binary()
if not ollama_bin:
raise FileNotFoundError("Could not find the 'ollama' executable in PATH.")
model_name = normalize_embed_model(model or status["selected_embed_model"])
process = await asyncio.create_subprocess_exec(
ollama_bin,
"pull",
model_name,
stdin=asyncio.subprocess.DEVNULL,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.PIPE,
)
_stdout, stderr = await process.communicate()
if process.returncode != 0:
detail = (stderr or b"").decode("utf-8", errors="ignore").strip()
raise RuntimeError(detail or f"'ollama pull {model_name}' failed with exit code {process.returncode}.")
status = await inspect_ollama_startup()
return {
"model": model_name,
"status": status,
}