added embedding model selection

2026-03-20 12:00:44 +01:00
parent afaaa1cfb2
commit 6be438519c
18 changed files with 513 additions and 100 deletions
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Requirements:
 - Node.js 18+
 - Python 3.13
 - Ollama running locally
- Optional: SearXNG on `http://localhost:8888`
+- Optional: SearXNG on `http://127.0.0.1:8888`

 Quick start:

--- a/backend/app_settings.py
+++ b/backend/app_settings.py
@@ -10,9 +10,12 @@ from typing import Any, Dict
 APP_NAME = "Heimgeist"
 DEFAULT_BACKEND_API_URL = "http://127.0.0.1:8000"
 DEFAULT_OLLAMA_API_URL = "http://127.0.0.1:11434"
+DEFAULT_EMBED_MODEL = "nomic-embed-text:latest"
+BGE_EMBED_MODEL = "bge-m3:latest"
 DEFAULT_SETTINGS: Dict[str, Any] = {
    "backendApiUrl": DEFAULT_BACKEND_API_URL,
    "ollamaApiUrl": DEFAULT_OLLAMA_API_URL,
+    "embedModel": DEFAULT_EMBED_MODEL,
 }


@@ -56,6 +59,16 @@ def _normalize_url(value: Any, fallback: str) -> str:
    return trimmed or fallback


+def normalize_embed_model(value: Any) -> str:
+    if not isinstance(value, str):
+        return DEFAULT_EMBED_MODEL
+
+    trimmed = value.strip().lower()
+    if trimmed in {"bge", "bge-m3", BGE_EMBED_MODEL}:
+        return BGE_EMBED_MODEL
+    return DEFAULT_EMBED_MODEL
+
+
 def load_app_settings() -> Dict[str, Any]:
    path = settings_path()
    try:
@@ -79,6 +92,7 @@ def load_app_settings() -> Dict[str, Any]:
    else:
        settings["backendApiUrl"] = _normalize_url(settings.get("backendApiUrl"), DEFAULT_BACKEND_API_URL)
        settings["ollamaApiUrl"] = _normalize_url(settings.get("ollamaApiUrl"), DEFAULT_OLLAMA_API_URL)
+    settings["embedModel"] = normalize_embed_model(settings.get("embedModel"))

    return settings

@@ -86,3 +100,8 @@ def load_app_settings() -> Dict[str, Any]:
 def get_ollama_api_url() -> str:
    settings = load_app_settings()
    return _normalize_url(settings.get("ollamaApiUrl"), DEFAULT_OLLAMA_API_URL)
+
+
+def get_embed_model_preference() -> str:
+    settings = load_app_settings()
+    return normalize_embed_model(settings.get("embedModel"))
--- a/backend/local_rag.py
+++ b/backend/local_rag.py
@@ -19,6 +19,11 @@ from urllib.parse import quote
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel

+from .app_settings import (
+    DEFAULT_EMBED_MODEL as DEFAULT_EMBED_MODEL_SETTING,
+    get_embed_model_preference,
+    get_ollama_api_url,
+)

 router = APIRouter(tags=["local-rag"])

@@ -27,7 +32,7 @@ LIB_ROOT.mkdir(parents=True, exist_ok=True)

 RAW_CORPUS_PROFILE = "per-file-default-v1"
 PREPARE_PROFILE = "selective-enrich-v2"
-DEFAULT_EMBED_MODEL = "bge-m3:latest"
+DEFAULT_EMBED_MODEL = DEFAULT_EMBED_MODEL_SETTING
 DEFAULT_ENRICH_MODEL = "qwen3:4b"
 DEFAULT_ENRICH_MIN_CHARS = 240
 DEFAULT_ENRICH_MAX_TEXT = 6000
@@ -61,7 +66,7 @@ class UpdateFileEnrichmentRequest(BaseModel):

 class EmbedLibraryRequest(BaseModel):
    embed_model: Optional[str] = None
-    ollama: str = "http://localhost:11434"
+    ollama: Optional[str] = None
    target_chars: int = 2000
    overlap_chars: int = 200
    concurrency: int = 6
@@ -70,11 +75,25 @@ class EmbedLibraryRequest(BaseModel):
 class LibraryContextRequest(BaseModel):
    prompt: str
    top_k: int = 5
-    ollama: str = "http://localhost:11434"
+    ollama: Optional[str] = None
    embed_model: Optional[str] = None
    gen_model: str = "qwen3:4b"


+def _default_ollama_url() -> str:
+    return get_ollama_api_url()
+
+
+def _default_embed_model() -> str:
+    return get_embed_model_preference()
+
+
+def _resolve_ollama_url(value: Optional[str] = None) -> str:
+    if isinstance(value, str) and value.strip():
+        return value.strip().rstrip("/")
+    return _default_ollama_url()
+
+
 def now_iso() -> str:
    return datetime.utcnow().isoformat(timespec="seconds") + "Z"

@@ -745,7 +764,7 @@ def _run_selected_enrichment(slug: str, on_progress=None, **opts) -> Dict[str, A
        out=paths["enhanced"],
        shadow_out=paths["shadow_partial"],
        on_progress=on_progress,
-        ollama=opts.get("ollama", "http://localhost:11434"),
+        ollama=_resolve_ollama_url(opts.get("ollama")),
        model=opts.get("enrich_model", DEFAULT_ENRICH_MODEL),
        summary_lang=opts.get("summary_lang", "auto"),
        concurrency=opts.get("enrich_concurrency", DEFAULT_ENRICH_CONCURRENCY),
@@ -777,7 +796,7 @@ def _run_prepare_pipeline(slug: str, on_progress=None, **opts):

    build_runner = _load_pipeline_fn("corpus_builder", "run_build")
    index_runner = _load_pipeline_fn("index_builder", "run_index")
-    embed_model = opts.get("embed_model") or pipeline.get("embed_model") or DEFAULT_EMBED_MODEL
+    embed_model = opts.get("embed_model") or _default_embed_model() or pipeline.get("embed_model") or DEFAULT_EMBED_MODEL

    if on_progress:
        on_progress("prepare", 0.01, "Preparing database for chat...")
@@ -807,7 +826,7 @@ def _run_prepare_pipeline(slug: str, on_progress=None, **opts):
            results["enrich"] = _run_selected_enrichment(
                slug,
                on_progress=enrich_progress,
-                ollama=opts.get("ollama", "http://localhost:11434"),
+                ollama=_resolve_ollama_url(opts.get("ollama")),
                enrich_model=opts.get("enrich_model", DEFAULT_ENRICH_MODEL),
                summary_lang=opts.get("summary_lang", "auto"),
                enrich_concurrency=opts.get("enrich_concurrency", DEFAULT_ENRICH_CONCURRENCY),
@@ -837,7 +856,7 @@ def _run_prepare_pipeline(slug: str, on_progress=None, **opts):
            out_dir=paths["indexes"],
            on_progress=index_progress,
            embed_model=embed_model,
-            ollama=opts.get("ollama", "http://localhost:11434"),
+            ollama=_resolve_ollama_url(opts.get("ollama")),
            target_chars=opts.get("target_chars", 2000),
            overlap_chars=opts.get("overlap_chars", 200),
            concurrency=opts.get("concurrency", 6),
@@ -1189,7 +1208,7 @@ async def embed_library(slug: str, req: EmbedLibraryRequest):
    paths = _collect_library_paths(slug)
    if not payload["states"].get("has_corpus"):
        raise HTTPException(status_code=400, detail="Build the corpus before indexing.")
-    embed_model = req.embed_model or pipeline.get("embed_model") or DEFAULT_EMBED_MODEL
+    embed_model = req.embed_model or _default_embed_model() or pipeline.get("embed_model") or DEFAULT_EMBED_MODEL
    lock = LIB_LOCKS.setdefault(slug, asyncio.Lock())
    async with lock:
        if _has_active_job(slug):
@@ -1202,7 +1221,7 @@ async def embed_library(slug: str, req: EmbedLibraryRequest):
            shadow=paths["shadow"] if paths["shadow"].exists() else None,
            out_dir=paths["indexes"],
            embed_model=embed_model,
-            ollama=req.ollama,
+            ollama=_resolve_ollama_url(req.ollama),
            target_chars=req.target_chars,
            overlap_chars=req.overlap_chars,
            concurrency=req.concurrency,
@@ -1247,7 +1266,7 @@ def library_context(slug: str, req: LibraryContextRequest):
    paths = _collect_library_paths(slug)
    if not payload["states"].get("is_indexed"):
        raise HTTPException(status_code=400, detail="Prepare the library before using it in chat.")
-    embed_model = req.embed_model or pipeline.get("embed_model") or DEFAULT_EMBED_MODEL
+    embed_model = req.embed_model or pipeline.get("embed_model") or _default_embed_model() or DEFAULT_EMBED_MODEL
    try:
        run_query = _load_pipeline_fn("unified_rag", "run_query")
        result = run_query(
@@ -1257,7 +1276,7 @@ def library_context(slug: str, req: LibraryContextRequest):
            content_store=paths["content_store"],
            query=req.prompt,
            answer=False,
-            ollama=req.ollama,
+            ollama=_resolve_ollama_url(req.ollama),
            embed_model=embed_model,
            gen_model=req.gen_model,
            no_rerank=True,
--- a/backend/main.py
+++ b/backend/main.py
@@ -9,6 +9,7 @@ import json
 from . import models, schemas
 from .database import Base, engine, SessionLocal, ensure_sources_column
 from .local_rag import router as local_rag_router
+from .ollama_admin import inspect_ollama_startup, pull_local_model, start_local_ollama
 from .ollama_client import list_models as ollama_list, chat as ollama_chat, chat_stream as ollama_chat_stream
 from .websearch import enrich_prompt

@@ -47,6 +48,31 @@ async def get_models():
    except Exception as e:
        raise HTTPException(status_code=502, detail=f"Ollama not available: {e}")

+
+@app.get("/ollama/startup-status")
+async def ollama_startup_status():
+    return await inspect_ollama_startup()
+
+
+@app.post("/ollama/start")
+async def ollama_start_route():
+    try:
+        return await start_local_ollama()
+    except FileNotFoundError as exc:
+        raise HTTPException(status_code=404, detail=str(exc)) from exc
+    except RuntimeError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
+@app.post("/ollama/pull")
+async def ollama_pull_route(req: schemas.OllamaPullRequest):
+    try:
+        return await pull_local_model(req.model)
+    except FileNotFoundError as exc:
+        raise HTTPException(status_code=404, detail=str(exc)) from exc
+    except RuntimeError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
@app.get("/sessions", response_model=schemas.SessionsResponse)
 def get_sessions(db: Session = Depends(get_db)):
    sessions = db.query(models.ChatSession).order_by(models.ChatSession.created_at.desc()).all()
--- a/backend/ollama_admin.py
+++ b/backend/ollama_admin.py
@@ -0,0 +1,140 @@
+from __future__ import annotations
+
+import asyncio
+import os
+import shutil
+import subprocess
+from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse
+
+import httpx
+
+from .app_settings import get_embed_model_preference, get_ollama_api_url, normalize_embed_model
+
+
+LOCAL_OLLAMA_HOSTS = {"127.0.0.1", "localhost", "::1"}
+
+
+def _ollama_binary() -> Optional[str]:
+    return shutil.which("ollama")
+
+
+def _is_local_ollama_url(url: str) -> bool:
+    try:
+        parsed = urlparse(url)
+    except Exception:
+        return False
+    return (parsed.hostname or "").strip().lower() in LOCAL_OLLAMA_HOSTS
+
+
+def _model_aliases(model: str) -> set[str]:
+    normalized = normalize_embed_model(model)
+    aliases = {normalized}
+    if normalized.endswith(":latest"):
+        aliases.add(normalized[:-7])
+    else:
+        aliases.add(f"{normalized}:latest")
+    return aliases
+
+
+async def _list_model_names(ollama_url: str, *, timeout: float = 5.0) -> List[str]:
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        response = await client.get(f"{ollama_url.rstrip('/')}/api/tags")
+        response.raise_for_status()
+        payload = response.json()
+
+    out: List[str] = []
+    for item in payload.get("models", []) or []:
+        name = str((item or {}).get("name") or "").strip()
+        if name:
+            out.append(name)
+    return out
+
+
+async def inspect_ollama_startup() -> Dict[str, Any]:
+    ollama_url = get_ollama_api_url()
+    embed_model = get_embed_model_preference()
+    ollama_bin = _ollama_binary()
+    is_local = _is_local_ollama_url(ollama_url)
+    available_models: List[str] = []
+    error = ""
+    running = False
+
+    try:
+        available_models = await _list_model_names(ollama_url)
+        running = True
+    except Exception as exc:
+        error = str(exc)
+
+    available = bool(set(available_models) & _model_aliases(embed_model))
+    return {
+        "ollama_url": ollama_url,
+        "ollama_running": running,
+        "ollama_binary_found": bool(ollama_bin),
+        "can_manage_locally": bool(ollama_bin) and is_local,
+        "selected_embed_model": embed_model,
+        "embedding_model_available": available,
+        "available_models": available_models,
+        "error": error,
+    }
+
+
+async def start_local_ollama() -> Dict[str, Any]:
+    status = await inspect_ollama_startup()
+    if status["ollama_running"]:
+        return status
+    if not status["can_manage_locally"]:
+        raise RuntimeError("Ollama can only be started automatically when the configured Ollama URL points to this machine.")
+
+    ollama_bin = _ollama_binary()
+    if not ollama_bin:
+        raise FileNotFoundError("Could not find the 'ollama' executable in PATH.")
+
+    subprocess.Popen(
+        [ollama_bin, "serve"],
+        stdin=subprocess.DEVNULL,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+        start_new_session=True,
+        env=os.environ.copy(),
+    )
+
+    for _ in range(20):
+        await asyncio.sleep(0.5)
+        status = await inspect_ollama_startup()
+        if status["ollama_running"]:
+            return status
+
+    raise RuntimeError("Started 'ollama serve', but Ollama did not become reachable in time.")
+
+
+async def pull_local_model(model: Optional[str] = None) -> Dict[str, Any]:
+    status = await inspect_ollama_startup()
+    if not status["can_manage_locally"]:
+        raise RuntimeError("Heimgeist can only pull models automatically when the configured Ollama URL points to this machine.")
+    if not status["ollama_running"]:
+        raise RuntimeError("Ollama must be running before Heimgeist can pull a model.")
+
+    ollama_bin = _ollama_binary()
+    if not ollama_bin:
+        raise FileNotFoundError("Could not find the 'ollama' executable in PATH.")
+
+    model_name = normalize_embed_model(model or status["selected_embed_model"])
+    process = await asyncio.create_subprocess_exec(
+        ollama_bin,
+        "pull",
+        model_name,
+        stdin=asyncio.subprocess.DEVNULL,
+        stdout=asyncio.subprocess.DEVNULL,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    _stdout, stderr = await process.communicate()
+    if process.returncode != 0:
+        detail = (stderr or b"").decode("utf-8", errors="ignore").strip()
+        raise RuntimeError(detail or f"'ollama pull {model_name}' failed with exit code {process.returncode}.")
+
+    status = await inspect_ollama_startup()
+    return {
+        "model": model_name,
+        "status": status,
+    }
--- a/backend/rag/ollama_embeddings.py
+++ b/backend/rag/ollama_embeddings.py
@@ -6,8 +6,8 @@ import requests


 DEFAULT_EMBED_CANDIDATES = (
-    "bge-m3:latest",
    "nomic-embed-text:latest",
+    "bge-m3:latest",
    "dengcao/Qwen3-Embedding-0.6B:F16",
 )

@@ -21,7 +21,17 @@ def _cache_key(ollama_url: str, preferred_model: Optional[str]) -> Tuple[str, st
 def _candidate_models(preferred_model: Optional[str]) -> List[str]:
    out: List[str] = []
    seen = set()
-    for model in [preferred_model, *DEFAULT_EMBED_CANDIDATES]:
+    if preferred_model:
+        primary = str(preferred_model).strip()
+        candidates = [primary]
+        if primary.endswith(":latest"):
+            candidates.append(primary[:-7])
+        else:
+            candidates.append(f"{primary}:latest")
+    else:
+        candidates = list(DEFAULT_EMBED_CANDIDATES)
+
+    for model in candidates:
        name = str(model or "").strip()
        if not name or name in seen:
            continue
--- a/backend/schemas.py
+++ b/backend/schemas.py
@@ -67,3 +67,7 @@ class WebSearchResponse(BaseModel):
    enriched_prompt: str
    sources: List[str] = []
    context_block: str = ""
+
+
+class OllamaPullRequest(BaseModel):
+    model: Optional[str] = None
--- a/backend/websearch.py
+++ b/backend/websearch.py
@@ -7,10 +7,11 @@ import json
 import traceback
 import hashlib

+from .app_settings import get_embed_model_preference, get_ollama_api_url
 from .ollama_client import chat as ollama_chat

 # Configure your local SearXNG instance URL (no trailing slash)
-SEARX_URL = "http://localhost:8888"
+SEARX_URL = "http://127.0.0.1:8888"

 # ----- Utilities ----------------------------------------------------------------

@@ -263,7 +264,7 @@ async def rerank(
    docs: List[Tuple[str, str]],
    model: str,                  # kept for signature compatibility (unused here)
    context_excerpt: str,
-    embed_model: str = "bge-m3:latest"  # prefer explicit tag; we will auto-fallback
+    embed_model: Optional[str] = None,
 ) -> List[Tuple[str, str, float]]:
    """
    Embedding-based reranker (bge-m3 via Ollama) using cosine similarity.
@@ -277,6 +278,8 @@ async def rerank(
    """
    import time
    t0 = time.perf_counter()
+    embed_model = (embed_model or get_embed_model_preference()).strip()
+    ollama_url = get_ollama_api_url().rstrip("/")

    # --- optional fast cosine via NumPy ---------------------------------------
    try:
@@ -357,9 +360,9 @@ async def rerank(
        async def _one(text: str) -> Tuple[List[float], Optional[str]]:
            payload = {"model": model_name, "prompt": text}
            try:
-                async with sem:
-                    async with httpx.AsyncClient(timeout=timeout) as client:
-                        r = await client.post("http://localhost:11434/api/embeddings", json=payload)
+                    async with sem:
+                        async with httpx.AsyncClient(timeout=timeout) as client:
+                            r = await client.post(f"{ollama_url}/api/embeddings", json=payload)
                        r.raise_for_status()
                        data = r.json()
            except httpx.HTTPStatusError as e:
@@ -639,4 +642,4 @@ async def enrich_prompt(
    except Exception:
        print("[web] ERROR in build_enriched_prompt:\n" + traceback.format_exc())
        print(f"[web] enrich_prompt total: {time.perf_counter() - start_all:.3f}s")
-        return _no_results_enriched("build_enriched_failed", queries)
+        return _no_results_enriched("build_enriched_failed", queries)
--- a/dist/assets/index-BIbxZDtU.css
+++ b/dist/assets/index-BIbxZDtU.css
--- a/dist/assets/index-CEHqzDAN.js
+++ b/dist/assets/index-CEHqzDAN.js
--- a/dist/assets/index-Cl_WYrJF.css
+++ b/dist/assets/index-Cl_WYrJF.css
--- a/dist/assets/index-u1-aH6b-.js
+++ b/dist/assets/index-u1-aH6b-.js
--- a/dist/index.html
+++ b/dist/index.html
@@ -5,8 +5,8 @@
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>LLM Desktop</title>
-    <script type="module" crossorigin src="/assets/index-CEHqzDAN.js"></script>
-    <link rel="stylesheet" crossorigin href="/assets/index-BIbxZDtU.css">
+    <script type="module" crossorigin src="/assets/index-u1-aH6b-.js"></script>
+    <link rel="stylesheet" crossorigin href="/assets/index-Cl_WYrJF.css">
  </head>
  <body>
    <div id="root"></div>
--- a/electron/main.cjs
+++ b/electron/main.cjs
@@ -11,6 +11,8 @@ const execFileAsync = promisify(execFile)

 const DEFAULT_BACKEND_API_URL = 'http://127.0.0.1:8000'
 const DEFAULT_OLLAMA_API_URL = 'http://127.0.0.1:11434'
+const DEFAULT_EMBED_MODEL = 'nomic-embed-text:latest'
+const BGE_EMBED_MODEL = 'bge-m3:latest'
 const REPO_ROOT = path.resolve(__dirname, '..')
 const UPDATE_REMOTE_URL = 'https://giers10.uber.space/giers10/Heimgeist.git'
 const UPDATE_BRANCH = 'master'
@@ -27,11 +29,20 @@ const MAX_UI_SCALE = 1.3
 const defaultSettings = {
  backendApiUrl: DEFAULT_BACKEND_API_URL,
  ollamaApiUrl: DEFAULT_OLLAMA_API_URL,
+  embedModel: DEFAULT_EMBED_MODEL,
  colorScheme: 'Default',
  uiScale: DEFAULT_UI_SCALE,
  chatModel: 'llama3',
 }

+function normalizeEmbedModel(value) {
+  const trimmed = String(value || '').trim().toLowerCase()
+  if (trimmed === 'bge' || trimmed === 'bge-m3' || trimmed === BGE_EMBED_MODEL) {
+    return BGE_EMBED_MODEL
+  }
+  return DEFAULT_EMBED_MODEL
+}
+
 function looksLikeOllamaUrl(value) {
  if (typeof value !== 'string') {
    return false
@@ -67,6 +78,7 @@ function migrateSettings(rawSettings) {

  nextSettings.backendApiUrl = String(nextSettings.backendApiUrl || '').trim()
  nextSettings.ollamaApiUrl = String(nextSettings.ollamaApiUrl || '').trim()
+  nextSettings.embedModel = normalizeEmbedModel(nextSettings.embedModel)

  return { nextSettings, migrated }
 }
@@ -457,7 +469,13 @@ ipcMain.handle('get-update-status', () => lastUpdateCheckResult)
 ipcMain.handle('check-for-updates', () => checkForUpdates('manual'))

 ipcMain.handle('set-setting', (event, key, value) => {
-  appSettings[key] = key === 'uiScale' ? normalizeUiScale(value) : value
+  if (key === 'uiScale') {
+    appSettings[key] = normalizeUiScale(value)
+  } else if (key === 'embedModel') {
+    appSettings[key] = normalizeEmbedModel(value)
+  } else {
+    appSettings[key] = value
+  }
  saveSettings()
  if (key === 'uiScale') {
    applyUiScaleToAllWindows()
@@ -468,6 +486,7 @@ ipcMain.handle('set-setting', (event, key, value) => {
 ipcMain.handle('update-settings', (event, settings) => {
  appSettings = { ...appSettings, ...settings }
  appSettings.uiScale = normalizeUiScale(appSettings.uiScale)
+  appSettings.embedModel = normalizeEmbedModel(appSettings.embedModel)
  saveSettings()
  if (Object.prototype.hasOwnProperty.call(settings, 'uiScale')) {
    applyUiScaleToAllWindows()
--- a/src/App.jsx
+++ b/src/App.jsx
@@ -127,6 +127,7 @@ const COLOR_SCHEME_KEY = 'colorScheme';
 const WEBSEARCH_URL_KEY = 'websearch.searxUrl';
 const WEBSEARCH_ENGINES_KEY = 'websearch.engines';
 const CHAT_LIBRARY_MAP_KEY = 'chat.libraryBySession';
+const DEFAULT_SEARX_URL = 'http://127.0.0.1:8888';

 // Initial API value will be set by useEffect after settings are loaded
 let API = import.meta.env.VITE_API_URL ?? 'http://127.0.0.1:8000';
@@ -137,6 +138,13 @@ function resolveBackendApiUrl(settings) {
  return settings.backendApiUrl || settings.ollamaApiUrl || API;
 }

+function migrateLegacySearxUrl(value) {
+  const trimmed = typeof value === 'string' ? value.trim() : '';
+  if (!trimmed) return DEFAULT_SEARX_URL;
+  if (trimmed === 'http://localhost:8888') return DEFAULT_SEARX_URL;
+  return trimmed;
+}
+
 export default function App() {
  const [chatSessions, setChatSessions] = useState([])
  const [activeSessionId, setActiveSessionId] = useState(null)
@@ -169,7 +177,8 @@ export default function App() {
  const [backendApiUrl, setBackendApiUrl] = useState(API); // State for Heimgeist backend URL
  const [colorScheme, setColorScheme] = useState('Default'); // State for color scheme
  const [streamOutput, setStreamOutput] = useState(false);
-  const [searxUrl, setSearxUrl] = useState(localStorage.getItem(WEBSEARCH_URL_KEY) || 'http://localhost:8888');
+  const [startupTaskMessage, setStartupTaskMessage] = useState('');
+  const [searxUrl, setSearxUrl] = useState(() => migrateLegacySearxUrl(localStorage.getItem(WEBSEARCH_URL_KEY)));
  const [searxEngines, setSearxEngines] = useState(() => {
    try {
      const raw = localStorage.getItem(WEBSEARCH_ENGINES_KEY);
@@ -191,6 +200,8 @@ export default function App() {
  const [loading, setLoading] = useState(true); // Loading state for initial session fetch
  const [unreadSessions, setUnreadSessions] = useState([]); // Track unread messages
  const [scrollPositions, setScrollPositions] = useState({}); // Store scroll positions for each session
+  const [settingsLoaded, setSettingsLoaded] = useState(false);
+  const startupOllamaCheckRanRef = useRef(false);
  // Editing state for user messages
  const [editingMessageIndex, setEditingMessageIndex] = useState(null);
  const [editText, setEditText] = useState('');
@@ -253,6 +264,20 @@ export default function App() {
    return String(error)
  }

+  async function expectBackendJson(response) {
+    const data = await response.json().catch(() => null)
+    if (response.ok) return data
+    const detail = typeof data?.detail === 'string'
+      ? data.detail
+      : (typeof data?.message === 'string' ? data.message : '')
+    throw new Error(detail || `HTTP ${response.status}`)
+  }
+
+  async function fetchStartupOllamaStatus() {
+    const response = await fetch(`${backendApiUrl}/ollama/startup-status`)
+    return expectBackendJson(response)
+  }
+
  async function fetchLocalLibraryContext(slug, prompt, signal) {
    if (!slug) return { contextBlock: null, sources: [] }

@@ -705,6 +730,8 @@ async function regenerateFromIndex(index, overrideUserText = null) {
      setStreamOutput(settings.streamOutput || false);
      setScrollPositions(settings.scrollPositions || {}); // Load scroll positions
      applyColorScheme(settings.colorScheme || 'Default'); // Apply initial scheme
+    }).finally(() => {
+      setSettingsLoaded(true);
    });

    const handleFocus = () => {
@@ -724,6 +751,68 @@ async function regenerateFromIndex(index, overrideUserText = null) {
    };
  }, [activeSidebarMode]);

+  useEffect(() => {
+    if (!settingsLoaded || !backendApiUrl || startupOllamaCheckRanRef.current) return
+    startupOllamaCheckRanRef.current = true
+
+    let cancelled = false
+
+    ;(async () => {
+      let actionStarted = false
+      try {
+        let status = await fetchStartupOllamaStatus()
+        if (cancelled) return
+
+        if (!status?.ollama_running && status?.can_manage_locally) {
+          const confirmed = window.confirm(
+            `Ollama is not running at ${status.ollama_url}. Start it in the background now with "ollama serve"?`
+          )
+          if (cancelled) return
+          if (confirmed) {
+            actionStarted = true
+            setStartupTaskMessage('Starting Ollama in the background...')
+            const response = await fetch(`${backendApiUrl}/ollama/start`, { method: 'POST' })
+            status = await expectBackendJson(response)
+            if (cancelled) return
+          }
+        }
+
+        if (status?.ollama_running && !status?.embedding_model_available && status?.can_manage_locally) {
+          const confirmed = window.confirm(
+            `The selected embedding model "${status.selected_embed_model}" is not installed in Ollama. Pull it now?`
+          )
+          if (cancelled) return
+          if (confirmed) {
+            actionStarted = true
+            setStartupTaskMessage(`Pulling ${status.selected_embed_model} in Ollama...`)
+            const response = await fetch(`${backendApiUrl}/ollama/pull`, {
+              method: 'POST',
+              headers: { 'Content-Type': 'application/json' },
+              body: JSON.stringify({ model: status.selected_embed_model })
+            })
+            await expectBackendJson(response)
+            if (cancelled) return
+          }
+        }
+      } catch (error) {
+        if (!cancelled) {
+          console.warn('startup Ollama check failed', error)
+          if (actionStarted) {
+            window.alert(`Startup action failed: ${getErrorText(error)}`)
+          }
+        }
+      } finally {
+        if (!cancelled) {
+          setStartupTaskMessage('')
+        }
+      }
+    })()
+
+    return () => {
+      cancelled = true
+    }
+  }, [backendApiUrl, settingsLoaded]);
+
  // Apply color scheme whenever it changes
  useEffect(() => {
    applyColorScheme(colorScheme);
@@ -1785,6 +1874,11 @@ async function createNewChat() {
        <div className="resizer" onMouseDown={startResizing}></div>
      </div>
      <div className="main-content">
+        {startupTaskMessage && (
+          <div className="startup-task-banner" role="status" aria-live="polite">
+            {startupTaskMessage}
+          </div>
+        )}
        {activeSidebarMode === 'chats' && (
          <>
            <div className="header">
--- a/src/GeneralSettings.jsx
+++ b/src/GeneralSettings.jsx
@@ -2,10 +2,13 @@ import React, { useState, useEffect } from 'react';

 const BACKEND_API_URL_KEY = 'backendApiUrl';
 const OLLAMA_API_URL_KEY = 'ollamaApiUrl';
+const EMBED_MODEL_KEY = 'embedModel';
 const MODEL_KEY = 'chatModel';
 const STREAM_KEY = 'streamOutput';
 const DEFAULT_BACKEND_API_URL = 'http://127.0.0.1:8000';
 const DEFAULT_OLLAMA_API_URL = 'http://127.0.0.1:11434';
+const DEFAULT_EMBED_MODEL = 'nomic-embed-text:latest';
+const BGE_EMBED_MODEL = 'bge-m3:latest';
 const DEFAULT_UPDATE_STATUS = {
  state: 'idle',
  message: '',
@@ -32,6 +35,7 @@ function getStatusTone(state) {
 export default function GeneralSettings({ onModelChange, onStreamOutputChange, onLibrariesPurged }) {
  const [backendApiUrl, setBackendApiUrl] = useState('');
  const [ollamaApiUrl, setOllamaApiUrl] = useState('');
+  const [embedModel, setEmbedModel] = useState(DEFAULT_EMBED_MODEL);
  const [models, setModels] = useState([]);
  const [selectedModel, setSelectedModel] = useState('');
  const [streamOutput, setStreamOutput] = useState(false);
@@ -53,6 +57,7 @@ export default function GeneralSettings({ onModelChange, onStreamOutputChange, o

      setBackendApiUrl(resolveBackendApiUrl(settings));
      setOllamaApiUrl(settings.ollamaApiUrl || DEFAULT_OLLAMA_API_URL);
+      setEmbedModel(settings.embedModel || DEFAULT_EMBED_MODEL);
      setSelectedModel(settings.chatModel || '');
      setStreamOutput(settings.streamOutput || false);
      setUpdateStatus(status || DEFAULT_UPDATE_STATUS);
@@ -102,6 +107,12 @@ export default function GeneralSettings({ onModelChange, onStreamOutputChange, o
    }
  };

+  const handleEmbedModelToggle = () => {
+    const nextModel = embedModel === BGE_EMBED_MODEL ? DEFAULT_EMBED_MODEL : BGE_EMBED_MODEL;
+    setEmbedModel(nextModel);
+    window.electronAPI.setSetting(EMBED_MODEL_KEY, nextModel);
+  };
+
  const handleStreamToggle = () => {
    const newStreamValue = !streamOutput;
    setStreamOutput(newStreamValue);
@@ -199,6 +210,28 @@ export default function GeneralSettings({ onModelChange, onStreamOutputChange, o
        />
        <p className="setting-description">Heimgeist uses this URL to talk to Ollama for models and chat generation.</p>
      </div>
+      <div className="setting-section">
+        <h3>Embedding Model</h3>
+        <div className="setting-switch-row">
+          <span className={"setting-switch-label" + (embedModel !== BGE_EMBED_MODEL ? " active" : "")}>
+            nomic
+          </span>
+          <label className="toggle-switch toggle-switch--binary-select">
+            <input
+              type="checkbox"
+              checked={embedModel === BGE_EMBED_MODEL}
+              onChange={handleEmbedModelToggle}
+            />
+            <span className="slider"></span>
+          </label>
+          <span className={"setting-switch-label" + (embedModel === BGE_EMBED_MODEL ? " active" : "")}>
+            bge-m3
+          </span>
+        </div>
+        <p className="setting-description">
+          Heimgeist uses this model for web-search reranking and for building or rebuilding local database embeddings.
+        </p>
+      </div>
      <div className="setting-section">
        <h3>Chat Model</h3>
        <select
--- a/src/WebsearchSettings.jsx
+++ b/src/WebsearchSettings.jsx
@@ -38,7 +38,7 @@ return (
        className="input"
        value={searxUrl}
        onChange={e => setSearxUrl(e.target.value)}
-        placeholder="e.g., http://localhost:8888"
+        placeholder="e.g., http://127.0.0.1:8888"
      />
    </div>

@@ -59,4 +59,4 @@ return (
    </div>
  </div>
 );
-}
+}
--- a/src/styles.css
+++ b/src/styles.css
@@ -546,6 +546,42 @@ textarea.input {
  flex-wrap: wrap;
 }

+.setting-switch-row {
+  display: inline-flex;
+  align-items: center;
+  gap: 12px;
+  flex-wrap: wrap;
+}
+
+.setting-switch-label {
+  color: var(--muted);
+  font-weight: 600;
+  letter-spacing: 0.01em;
+  transition: color 0.2s ease;
+}
+
+.setting-switch-label.active {
+  color: var(--text);
+}
+
+.toggle-switch--binary-select .slider {
+  background-color: var(--input-bg);
+  border-color: var(--border);
+}
+
+.toggle-switch--binary-select .slider:before {
+  background-color: var(--text);
+}
+
+.toggle-switch--binary-select input:checked + .slider {
+  background-color: var(--input-bg);
+  border-color: var(--border);
+}
+
+.toggle-switch--binary-select input:checked + .slider:before {
+  background-color: var(--text);
+}
+
 .range-input {
  width: min(360px, 100%);
  accent-color: var(--accent);
@@ -798,6 +834,16 @@ input:checked + .slider:before {
  background-color: var(--panel);
 }

+.startup-task-banner {
+  margin: 16px 16px 0;
+  padding: 12px 14px;
+  border: 1px solid var(--border);
+  border-radius: 10px;
+  background: var(--panel);
+  color: var(--text);
+  line-height: 1.5;
+}
+
 /* Spinner Styles */
 .spinner {
  border: 3px solid rgba(255, 255, 255, 0.3);