Files
Heimgeist/backend/ollama_client.py

95 lines
3.5 KiB
Python

import httpx
import json
import re
import time
from typing import Dict, Any, List, AsyncGenerator, Tuple
from .app_settings import get_ollama_api_url
_MODEL_DETAILS_CACHE: Dict[Tuple[str, str], Tuple[float, Dict[str, Any]]] = {}
_MODEL_DETAILS_TTL_S = 15.0
async def list_models() -> Dict[str, Any]:
ollama_url = get_ollama_api_url()
async with httpx.AsyncClient(timeout=30.0) as client:
r = await client.get(f"{ollama_url}/api/tags")
r.raise_for_status()
data = r.json()
# Normalize to a simple list of names
models = [m.get('name') for m in data.get('models', [])]
return {"models": models}
async def show_model(model: str, *, refresh: bool = False) -> Dict[str, Any]:
ollama_url = get_ollama_api_url()
cache_key = (ollama_url.rstrip('/'), str(model or '').strip())
cached = _MODEL_DETAILS_CACHE.get(cache_key)
now = time.monotonic()
if not refresh and cached and (now - cached[0]) < _MODEL_DETAILS_TTL_S:
return cached[1]
async with httpx.AsyncClient(timeout=30.0) as client:
r = await client.post(f"{ollama_url}/api/show", json={"model": model})
r.raise_for_status()
data = r.json()
_MODEL_DETAILS_CACHE[cache_key] = (now, data)
return data
def supports_vision(model_data: Dict[str, Any]) -> bool:
capabilities = model_data.get("capabilities") or []
if any(str(item).strip().lower() == "vision" for item in capabilities):
return True
model_info = model_data.get("model_info") or {}
if isinstance(model_info, dict):
for key in model_info.keys():
lowered = str(key).strip().lower()
if ".vision." in lowered or lowered.endswith(".vision"):
return True
if lowered.endswith("tokens_per_image") or re.search(r"\bmm\b", lowered):
return True
return False
async def chat(model: str, messages: List[Dict[str, Any]]) -> str:
ollama_url = get_ollama_api_url()
payload = {
"model": model,
"messages": messages,
"stream": False
}
async with httpx.AsyncClient(timeout=600.0) as client:
r = await client.post(f"{ollama_url}/api/chat", json=payload)
r.raise_for_status()
data = r.json()
# Ollama returns full conversation; pick last message content
try:
return data["message"]["content"]
except Exception:
# Newer Ollama formats may return messages list
msgs = data.get("messages") or []
if msgs:
return msgs[-1].get("content", "")
return data.get("content", "")
async def chat_stream(model: str, messages: List[Dict[str, Any]]) -> AsyncGenerator[str, None]:
ollama_url = get_ollama_api_url()
payload = {
"model": model,
"messages": messages,
"stream": True
}
async with httpx.AsyncClient(timeout=600.0) as client:
async with client.stream("POST", f"{ollama_url}/api/chat", json=payload) as r:
r.raise_for_status()
async for line in r.aiter_lines():
if line:
try:
chunk = json.loads(line)
if "content" in chunk: # Newer Ollama format
yield chunk["content"]
elif "message" in chunk and "content" in chunk["message"]: # Older format
yield chunk["message"]["content"]
except json.JSONDecodeError:
pass # Ignore invalid JSON lines