diff --git a/README.md b/README.md new file mode 100644 index 0000000..1bc783b --- /dev/null +++ b/README.md @@ -0,0 +1,89 @@ +# Heimgeist + +Heimgeist is a local desktop chat client for Ollama. It combines an Electron + React renderer with a FastAPI backend, stores chat history in SQLite, supports optional SearXNG-backed web search, and can enrich prompts with context from local library indexes. + +## Features + +- Local desktop chat UI with Electron +- Ollama-backed chat with streaming and non-streaming replies +- Persistent chat sessions and automatic title generation +- Edit-and-regenerate flow for earlier user messages +- Optional web search enrichment with source chips +- Local library management for RAG-style prompt enrichment +- Theme selection and UI scale controls + +## Local Libraries + +The `DBs` tab is no longer a placeholder. You can: + +- create and rename libraries +- register files and folders +- build, enrich, and index library content +- mark one library as active for chat context +- open or remove registered files from the UI + +When a chat library is active, Heimgeist queries it before sending a message and appends the returned context block to the prompt. + +## Stack + +- Frontend: Electron, React, Vite +- Backend: FastAPI, SQLAlchemy, SQLite +- Search enrichment: SearXNG + page fetching/reranking +- Local RAG pipeline: corpus build, enrichment, embedding, and retrieval helpers under `backend/rag/` + +## Development + +Requirements: + +- Node.js 18+ +- Python 3.13 +- Ollama running locally +- Optional: SearXNG on `http://localhost:8888` + +Quick start: + +```bash +./run.sh +``` + +This creates or refreshes `backend/.venv`, installs Python dependencies, installs npm dependencies, and starts the dev stack. + +Manual startup: + +```bash +python3.13 -m venv backend/.venv +backend/.venv/bin/python -m pip install -r backend/requirements.txt +npm install +npm run dev +``` + +## Project Layout + +```text +. +├── backend/ +│ ├── main.py +│ ├── local_rag.py +│ ├── rag/ +│ ├── websearch.py +│ ├── ollama_client.py +│ ├── models.py +│ ├── database.py +│ ├── schemas.py +│ └── requirements.txt +├── electron/ +│ ├── main.cjs +│ └── preload.cjs +├── src/ +│ ├── App.jsx +│ ├── LibraryManager.jsx +│ ├── GeneralSettings.jsx +│ ├── InterfaceSettings.jsx +│ ├── WebsearchSettings.jsx +│ ├── markdown.js +│ ├── colorSchemes.js +│ └── styles.css +├── package.json +├── run.sh +└── vite.config.js +``` diff --git a/backend/libraries/punk/library.json b/backend/libraries/punk/library.json new file mode 100644 index 0000000..0c5fce7 --- /dev/null +++ b/backend/libraries/punk/library.json @@ -0,0 +1,32 @@ +{ + "id": "f5194228933140b68625347333749baf", + "name": "Punk", + "slug": "punk", + "created_at": "2026-03-19T20:02:20Z", + "files": [ + { + "sha256": "e816ca61aebd84159747d248fedd6d5ff318c471c36bcc31b1ac6bf9aebcd3c1", + "path": "/Users/giers/Documents/The Evolution of Cooperation_Robert Axelrod_liber3.pdf", + "rel": "e816ca61aebd84159747d248fedd6d5ff318c471c36bcc31b1ac6bf9aebcd3c1--The_Evolution_of_Cooperation_Robert_Axelrod_liber3.pdf", + "name": "The Evolution of Cooperation_Robert Axelrod_liber3.pdf", + "size": 1208035, + "added_at": "2026-03-19T20:02:53Z" + }, + { + "sha256": "19f1e5d2ceaab5fd1f1dc58ff07422388f156610d16dfdea2bdb35a5b9e70813", + "path": "/Users/giers/Documents/GeorgeJordac-TheVoiceOfHumanJustice.pdf", + "rel": "19f1e5d2ceaab5fd1f1dc58ff07422388f156610d16dfdea2bdb35a5b9e70813--GeorgeJordac-TheVoiceOfHumanJustice.pdf", + "name": "GeorgeJordac-TheVoiceOfHumanJustice.pdf", + "size": 849816, + "added_at": "2026-03-19T20:04:17Z" + }, + { + "sha256": "85fce554ff7685f7bccb136aff5768e54b9ba8361672fe45dbce599598c4be4b", + "path": "/Users/giers/Music/4 Strings - Take Me Away (Into The Night) (Vocal Radio Mix).mp3", + "rel": "85fce554ff7685f7bccb136aff5768e54b9ba8361672fe45dbce599598c4be4b--4_Strings_-_Take_Me_Away_Into_The_Night_Vocal_Radio_Mix_.mp3", + "name": "4 Strings - Take Me Away (Into The Night) (Vocal Radio Mix).mp3", + "size": 7994108, + "added_at": "2026-03-19T20:06:30Z" + } + ] +} \ No newline at end of file diff --git a/backend/libraries/punk/stage/19f1e5d2ceaab5fd1f1dc58ff07422388f156610d16dfdea2bdb35a5b9e70813--GeorgeJordac-TheVoiceOfHumanJustice.pdf b/backend/libraries/punk/stage/19f1e5d2ceaab5fd1f1dc58ff07422388f156610d16dfdea2bdb35a5b9e70813--GeorgeJordac-TheVoiceOfHumanJustice.pdf new file mode 120000 index 0000000..44e3f4c --- /dev/null +++ b/backend/libraries/punk/stage/19f1e5d2ceaab5fd1f1dc58ff07422388f156610d16dfdea2bdb35a5b9e70813--GeorgeJordac-TheVoiceOfHumanJustice.pdf @@ -0,0 +1 @@ +/Users/giers/Documents/GeorgeJordac-TheVoiceOfHumanJustice.pdf \ No newline at end of file diff --git a/backend/libraries/punk/stage/85fce554ff7685f7bccb136aff5768e54b9ba8361672fe45dbce599598c4be4b--4_Strings_-_Take_Me_Away_Into_The_Night_Vocal_Radio_Mix_.mp3 b/backend/libraries/punk/stage/85fce554ff7685f7bccb136aff5768e54b9ba8361672fe45dbce599598c4be4b--4_Strings_-_Take_Me_Away_Into_The_Night_Vocal_Radio_Mix_.mp3 new file mode 120000 index 0000000..a460770 --- /dev/null +++ b/backend/libraries/punk/stage/85fce554ff7685f7bccb136aff5768e54b9ba8361672fe45dbce599598c4be4b--4_Strings_-_Take_Me_Away_Into_The_Night_Vocal_Radio_Mix_.mp3 @@ -0,0 +1 @@ +/Users/giers/Music/4 Strings - Take Me Away (Into The Night) (Vocal Radio Mix).mp3 \ No newline at end of file diff --git a/backend/libraries/punk/stage/e816ca61aebd84159747d248fedd6d5ff318c471c36bcc31b1ac6bf9aebcd3c1--The_Evolution_of_Cooperation_Robert_Axelrod_liber3.pdf b/backend/libraries/punk/stage/e816ca61aebd84159747d248fedd6d5ff318c471c36bcc31b1ac6bf9aebcd3c1--The_Evolution_of_Cooperation_Robert_Axelrod_liber3.pdf new file mode 120000 index 0000000..06db110 --- /dev/null +++ b/backend/libraries/punk/stage/e816ca61aebd84159747d248fedd6d5ff318c471c36bcc31b1ac6bf9aebcd3c1--The_Evolution_of_Cooperation_Robert_Axelrod_liber3.pdf @@ -0,0 +1 @@ +/Users/giers/Documents/The Evolution of Cooperation_Robert Axelrod_liber3.pdf \ No newline at end of file diff --git a/backend/local_rag.py b/backend/local_rag.py new file mode 100644 index 0000000..1b208cd --- /dev/null +++ b/backend/local_rag.py @@ -0,0 +1,526 @@ +from __future__ import annotations + +import asyncio +import functools +import hashlib +import importlib +import json +import os +import re +import shutil +import threading +import uuid +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional +from urllib.parse import quote + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + + +router = APIRouter(tags=["local-rag"]) + +LIB_ROOT = Path(__file__).parent / "libraries" +LIB_ROOT.mkdir(parents=True, exist_ok=True) + +JOB_EXECUTOR = ThreadPoolExecutor(max_workers=2) +JOBS: Dict[str, Dict[str, Any]] = {} +LIB_LOCKS: Dict[str, asyncio.Lock] = {} + + +class CreateLibraryRequest(BaseModel): + name: str + + +class RenameLibraryRequest(BaseModel): + name: str + + +class RegisterPathsRequest(BaseModel): + paths: List[str] + + +class RemoveFileRequest(BaseModel): + rel: str + + +class EmbedLibraryRequest(BaseModel): + embed_model: str = "dengcao/Qwen3-Embedding-0.6B:F16" + ollama: str = "http://localhost:11434" + target_chars: int = 2000 + overlap_chars: int = 200 + concurrency: int = 6 + + +class LibraryContextRequest(BaseModel): + prompt: str + top_k: int = 5 + ollama: str = "http://localhost:11434" + embed_model: str = "dengcao/Qwen3-Embedding-0.6B:F16" + gen_model: str = "qwen3:4b" + + +def now_iso() -> str: + return datetime.utcnow().isoformat(timespec="seconds") + "Z" + + +def slugify(name: str) -> str: + cleaned = re.sub(r"[^a-zA-Z0-9\- ]+", "", name).strip().lower() + cleaned = re.sub(r"\s+", "-", cleaned) + return cleaned or f"lib-{uuid.uuid4().hex[:8]}" + + +def lib_dir(slug: str) -> Path: + return LIB_ROOT / slug + + +def lib_json(slug: str) -> Path: + return lib_dir(slug) / "library.json" + + +def stage_dir(slug: str) -> Path: + path = lib_dir(slug) / "stage" + path.mkdir(parents=True, exist_ok=True) + return path + + +def indexes_dir(slug: str) -> Path: + path = lib_dir(slug) / "indexes" + path.mkdir(parents=True, exist_ok=True) + return path + + +def default_library_data(name: str, slug: str) -> Dict[str, Any]: + return { + "id": uuid.uuid4().hex, + "name": name, + "slug": slug, + "created_at": now_iso(), + "files": [], + } + + +def _read_json(path: Path) -> Dict[str, Any]: + return json.loads(path.read_text(encoding="utf-8")) + + +def read_library(slug: str) -> Dict[str, Any]: + path = lib_json(slug) + if not path.exists(): + raise HTTPException(status_code=404, detail="Library not found") + return _read_json(path) + + +def write_library(slug: str, data: Dict[str, Any]) -> None: + path = lib_json(slug) + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_suffix(".tmp") + tmp.write_text(json.dumps(data, indent=2), encoding="utf-8") + tmp.replace(path) + + +def _line_count(path: Path) -> int: + if not path.exists(): + return 0 + with path.open("r", encoding="utf-8", errors="ignore") as handle: + return sum(1 for line in handle if line.strip()) + + +def _file_uri(path_value: str) -> str: + return f"file://{quote(path_value)}" + + +def _collect_library_paths(slug: str) -> Dict[str, Path]: + base = lib_dir(slug) + return { + "base": base, + "stage": stage_dir(slug), + "corpus": base / "corpus.jsonl", + "enhanced": base / "corpus.enhanced.jsonl", + "shadow": base / "corpus.shadow.jsonl", + "indexes": indexes_dir(slug), + "shadow_index": indexes_dir(slug) / "shadow.index.faiss", + "shadow_store": indexes_dir(slug) / "shadow.meta.jsonl", + "content_index": indexes_dir(slug) / "content.index.faiss", + "content_store": indexes_dir(slug) / "content.meta.jsonl", + } + + +def library_payload(data: Dict[str, Any]) -> Dict[str, Any]: + paths = _collect_library_paths(data["slug"]) + files = list(data.get("files", [])) + stages = { + "has_files": len(files) > 0, + "has_corpus": paths["corpus"].exists(), + "is_enriched": paths["enhanced"].exists() and paths["shadow"].exists(), + "is_indexed": paths["shadow_index"].exists() and paths["content_index"].exists(), + } + artifacts = { + "corpus_records": _line_count(paths["corpus"]), + "enhanced_records": _line_count(paths["enhanced"]), + "shadow_records": _line_count(paths["shadow"]), + } + return { + **data, + "files": files, + "states": stages, + "artifacts": artifacts, + } + + +def _walk_input_paths(paths: List[str]) -> List[Path]: + out: List[Path] = [] + for raw in paths: + current = Path(raw).expanduser().resolve() + if not current.exists(): + continue + if current.is_file(): + out.append(current) + continue + for child in current.rglob("*"): + if child.is_file(): + out.append(child.resolve()) + return out + + +def _sha256_file(path: Path) -> str: + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def _stage_name(sha: str, path: Path) -> str: + safe_name = re.sub(r"[^A-Za-z0-9._-]+", "_", path.name).strip("._") or "file" + return f"{sha}--{safe_name}" + + +def _job_public(job: Dict[str, Any]) -> Dict[str, Any]: + return { + "id": job["id"], + "slug": job["slug"], + "type": job["type"], + "status": job["status"], + "phase": job.get("phase"), + "progress": job.get("progress", 0.0), + "detail": job.get("detail", ""), + "error": job.get("error"), + "result": job.get("result"), + "created_at": job["created_at"], + "finished_at": job.get("finished_at"), + } + + +def _has_active_job(slug: str) -> bool: + return any( + job["slug"] == slug and job["status"] in {"queued", "running"} + for job in JOBS.values() + ) + + +def _load_pipeline_fn(module_name: str, attr: str): + try: + module = importlib.import_module(f"backend.rag.{module_name}") + except ModuleNotFoundError: + module = importlib.import_module(f".rag.{module_name}", package=__package__) + return getattr(module, attr) + + +async def _run_job(job_id: str, fn_name: str, **kwargs): + loop = asyncio.get_running_loop() + job = JOBS[job_id] + + def on_progress(phase: str, pct: float, detail: str): + job["phase"] = phase + job["progress"] = round(float(pct) * 100.0, 1) + job["detail"] = detail + + job["status"] = "running" + try: + if fn_name == "build": + runner = _load_pipeline_fn("corpus_builder", "run_build") + elif fn_name == "enrich": + runner = _load_pipeline_fn("corpus_enricher", "run_enrich") + elif fn_name == "embed": + runner = _load_pipeline_fn("index_builder", "run_index") + else: + raise RuntimeError(f"Unknown job type: {fn_name}") + + call = functools.partial(runner, on_progress=on_progress, **kwargs) + result = await loop.run_in_executor(JOB_EXECUTOR, call) + job["status"] = "succeeded" + job["progress"] = 100.0 + job["phase"] = "done" + job["detail"] = "Completed." + job["result"] = result + except Exception as exc: + job["status"] = "failed" + job["error"] = f"{type(exc).__name__}: {exc}" + finally: + job["finished_at"] = now_iso() + + +def _start_job(slug: str, job_type: str, **kwargs) -> str: + job_id = uuid.uuid4().hex + JOBS[job_id] = { + "id": job_id, + "slug": slug, + "type": job_type, + "status": "queued", + "phase": "queued", + "progress": 0.0, + "detail": "", + "created_at": now_iso(), + "finished_at": None, + "result": None, + "error": None, + } + asyncio.create_task(_run_job(job_id, job_type, **kwargs)) + return job_id + + +def _build_local_context(prompt: str, results: Dict[str, Any], top_k: int = 5) -> Dict[str, Any]: + sources = results.get("sources") or [] + selected = sources[: max(1, top_k)] + if not selected: + context_block = ( + "\n" + "No useful results were found in the selected local knowledge base.\n" + "" + ) + return {"context_block": context_block, "sources": []} + + blocks: List[str] = [""] + file_sources: List[str] = [] + for idx, source in enumerate(selected, start=1): + title = (source.get("title") or Path(source.get("url") or source.get("doc_id") or f"Source {idx}").name).strip() + snippet = re.sub(r"\s+", " ", (source.get("snippet") or "")).strip() + if len(snippet) > 1400: + snippet = snippet[:1400].rstrip() + "..." + raw_path = source.get("url") or source.get("doc_id") or "" + if raw_path and os.path.isabs(raw_path): + file_sources.append(_file_uri(raw_path)) + blocks.append(f"[L{idx}] {title}\n{snippet}") + blocks.append("") + blocks.append( + "Use the local knowledge base context when it is relevant. " + "If it does not answer the question, say so clearly instead of inventing details." + ) + return {"context_block": "\n".join(blocks), "sources": file_sources} + + +@router.get("/libraries") +def list_libraries(): + libraries: List[Dict[str, Any]] = [] + for path in LIB_ROOT.iterdir(): + if not path.is_dir(): + continue + meta = path / "library.json" + if not meta.exists(): + continue + try: + libraries.append(library_payload(_read_json(meta))) + except Exception: + continue + libraries.sort(key=lambda item: item.get("created_at", ""), reverse=True) + return {"libraries": libraries} + + +@router.post("/libraries") +def create_library(req: CreateLibraryRequest): + slug = slugify(req.name) + base_slug = slug + idx = 2 + while lib_dir(slug).exists(): + slug = f"{base_slug}-{idx}" + idx += 1 + data = default_library_data(req.name, slug) + stage_dir(slug) + indexes_dir(slug) + write_library(slug, data) + return library_payload(data) + + +@router.get("/libraries/{slug}") +def get_library(slug: str): + return library_payload(read_library(slug)) + + +@router.patch("/libraries/{slug}") +def rename_library(slug: str, req: RenameLibraryRequest): + data = read_library(slug) + data["name"] = req.name.strip() or data["name"] + write_library(slug, data) + return library_payload(data) + + +@router.delete("/libraries/{slug}") +def delete_library(slug: str): + path = lib_dir(slug) + if not path.exists(): + raise HTTPException(status_code=404, detail="Library not found") + shutil.rmtree(path) + return {"ok": True} + + +@router.post("/libraries/{slug}/files/register") +def register_paths(slug: str, req: RegisterPathsRequest): + data = read_library(slug) + stage = stage_dir(slug) + existing = {entry.get("sha256"): entry for entry in data.get("files", [])} + added: List[Dict[str, Any]] = [] + skipped: List[str] = [] + + for file_path in _walk_input_paths(req.paths): + sha = _sha256_file(file_path) + if sha in existing: + skipped.append(str(file_path)) + continue + stage_name = _stage_name(sha, file_path) + symlink_path = stage / stage_name + if symlink_path.exists(): + symlink_path.unlink() + symlink_path.symlink_to(file_path) + entry = { + "sha256": sha, + "path": str(file_path), + "rel": stage_name, + "name": file_path.name, + "size": file_path.stat().st_size, + "added_at": now_iso(), + } + data.setdefault("files", []).append(entry) + added.append(entry) + existing[sha] = entry + + write_library(slug, data) + return { + "added": added, + "skipped": skipped, + "library": library_payload(data), + } + + +@router.delete("/libraries/{slug}/files") +def remove_file(slug: str, req: RemoveFileRequest): + data = read_library(slug) + before = len(data.get("files", [])) + data["files"] = [entry for entry in data.get("files", []) if entry.get("rel") != req.rel] + symlink_path = stage_dir(slug) / req.rel + if symlink_path.exists(): + symlink_path.unlink() + write_library(slug, data) + if len(data["files"]) == before: + raise HTTPException(status_code=404, detail="File not found") + return {"ok": True, "library": library_payload(data)} + + +@router.post("/libraries/{slug}/jobs/build") +async def build_library(slug: str): + data = read_library(slug) + if not data.get("files"): + raise HTTPException(status_code=400, detail="Add files before building a library.") + lock = LIB_LOCKS.setdefault(slug, asyncio.Lock()) + async with lock: + if _has_active_job(slug): + raise HTTPException(status_code=409, detail="This library already has an active job.") + job_id = _start_job( + slug, + "build", + root=stage_dir(slug), + out=_collect_library_paths(slug)["corpus"], + ) + return {"job_id": job_id} + + +@router.post("/libraries/{slug}/jobs/enrich") +async def enrich_library(slug: str): + paths = _collect_library_paths(slug) + if not paths["corpus"].exists(): + raise HTTPException(status_code=400, detail="Build the corpus before enrichment.") + lock = LIB_LOCKS.setdefault(slug, asyncio.Lock()) + async with lock: + if _has_active_job(slug): + raise HTTPException(status_code=409, detail="This library already has an active job.") + job_id = _start_job( + slug, + "enrich", + inp=paths["corpus"], + out=paths["enhanced"], + shadow_out=paths["shadow"], + ) + return {"job_id": job_id} + + +@router.post("/libraries/{slug}/jobs/embed") +async def embed_library(slug: str, req: EmbedLibraryRequest): + paths = _collect_library_paths(slug) + if not paths["corpus"].exists(): + raise HTTPException(status_code=400, detail="Build the corpus before indexing.") + lock = LIB_LOCKS.setdefault(slug, asyncio.Lock()) + async with lock: + if _has_active_job(slug): + raise HTTPException(status_code=409, detail="This library already has an active job.") + job_id = _start_job( + slug, + "embed", + raw=paths["corpus"], + enhanced=paths["enhanced"] if paths["enhanced"].exists() else None, + shadow=paths["shadow"] if paths["shadow"].exists() else None, + out_dir=paths["indexes"], + embed_model=req.embed_model, + ollama=req.ollama, + target_chars=req.target_chars, + overlap_chars=req.overlap_chars, + concurrency=req.concurrency, + ) + return {"job_id": job_id} + + +@router.get("/jobs") +def list_jobs(slug: Optional[str] = None): + jobs = [_job_public(job) for job in JOBS.values() if slug is None or job["slug"] == slug] + jobs.sort(key=lambda item: item.get("created_at", ""), reverse=True) + return {"jobs": jobs} + + +@router.get("/jobs/{job_id}") +def get_job(job_id: str): + job = JOBS.get(job_id) + if not job: + raise HTTPException(status_code=404, detail="Job not found") + return _job_public(job) + + +@router.post("/libraries/{slug}/context") +def library_context(slug: str, req: LibraryContextRequest): + paths = _collect_library_paths(slug) + if not paths["shadow_index"].exists() or not paths["content_index"].exists(): + raise HTTPException(status_code=400, detail="Index the library before using it in chat.") + try: + run_query = _load_pipeline_fn("unified_rag", "run_query") + result = run_query( + shadow_index=paths["shadow_index"], + shadow_store=paths["shadow_store"], + content_index=paths["content_index"], + content_store=paths["content_store"], + query=req.prompt, + answer=False, + ollama=req.ollama, + embed_model=req.embed_model, + gen_model=req.gen_model, + no_rerank=True, + k=max(1, req.top_k), + ) + except Exception as exc: + raise HTTPException(status_code=500, detail=f"Local retrieval failed: {type(exc).__name__}: {exc}") from exc + + context = _build_local_context(req.prompt, result, top_k=req.top_k) + return { + "context_block": context["context_block"], + "sources": context["sources"], + "result": result, + } diff --git a/backend/main.py b/backend/main.py index b6d7b8a..33e6c25 100644 --- a/backend/main.py +++ b/backend/main.py @@ -8,6 +8,7 @@ import html import json from . import models, schemas from .database import Base, engine, SessionLocal, ensure_sources_column +from .local_rag import router as local_rag_router from .ollama_client import list_models as ollama_list, chat as ollama_chat, chat_stream as ollama_chat_stream from .websearch import enrich_prompt @@ -25,6 +26,7 @@ app.add_middleware( allow_methods=["*"], allow_headers=["*"], ) +app.include_router(local_rag_router) def get_db(): db = SessionLocal() @@ -331,8 +333,11 @@ async def websearch_route(req: schemas.WebSearchRequest): searx_url=req.searx_url, engines=req.engines, ) - return {"enriched_prompt": enriched, "sources": sources} + context_block = "" + if "" in enriched: + context_block = enriched[enriched.index(""):].strip() + return {"enriched_prompt": enriched, "sources": sources, "context_block": context_block} except Exception: - return {"enriched_prompt": req.prompt, "sources": []} + return {"enriched_prompt": req.prompt, "sources": [], "context_block": ""} # To run standalone: python -m uvicorn backend.main:app --host 127.0.0.1 --port 8000 diff --git a/backend/rag/__init__.py b/backend/rag/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/rag/corpus_builder.py b/backend/rag/corpus_builder.py new file mode 100644 index 0000000..c7ddcaf --- /dev/null +++ b/backend/rag/corpus_builder.py @@ -0,0 +1,1741 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Build a JSONL corpus from a folder (recurses subdirectories). + +What it does (type-specific): + • PDF: PyMuPDF extraction (multi-column); OCR scanned PDFs via ocrmypdf. + • HTML: strip chrome; split into H1/H2 sections. + • Text: encoding-sniffed read. + • EPUB: extract spine sections (BS4) + OCR embedded images; optional EPUB→PDF fallback. + • Audio/Video: ffmpeg → mono 16k WAV → slice into N overlapping parts → multi-process Whisper (base) → merge. + • Images: detect text-like → Tesseract OCR; otherwise VLM description via Ollama (qwen2.5vl); OCR→VLM fallback if empty. + • Code: summarize with Ollama (qwen3:4b), no code copied into text (only description). + +RAG-friendly emission: + • --emit {per-file, per-page, per-section, auto} + - PDF per-page (auto, with optional per-PDF page threads) + - EPUB/HTML per-section (auto) + - everything else per-file + • A/V can emit per-slice and/or joined via --emit-av {joined, slices, both} + +LLM hygiene: + • Strips , code fences, normalizes whitespace before writing JSONL. + +Language detection: + • Uses langid or langdetect (if installed). Store `lang` per record. + +Concurrency: + • ThreadPoolExecutor for files and per-PDF page extraction (safe variant). + • Multiprocessing for Whisper slices. + • Bounded semaphore for Ollama calls. + +External tools: + • ocrmypdf, tesseract, ffmpeg, ffprobe + • (optional) Calibre `ebook-convert` or `pandoc` for EPUB→PDF fallback + • Ollama running qwen2.5vl:7b and qwen3:4b models + +Python deps (install as needed): + pymupdf beautifulsoup4 ebooklib chardet pillow numpy requests tqdm + openai-whisper + langid (or langdetect) + opencv-python-headless (optional, improves image text-detect) +""" + +from __future__ import annotations +import argparse +import concurrent.futures as cf +import json +import os +import re +import shutil +import subprocess +import sys +import tempfile +import base64 +import csv +import mimetypes +import threading +import queue +import multiprocessing as mp +import warnings +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Iterable, List, Tuple, Dict, Optional, Any, Callable +import faulthandler, signal +import multiprocessing as mp_context +import time + +os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1") +os.environ.setdefault("OBJC_DISABLE_INITIALIZE_FORK_SAFETY", "YES") + +# ------------------------- +# Async writer (chunked + optional rotation) +# ------------------------- + +_writer_q: Optional[queue.Queue] = None +_writer_thread: Optional[threading.Thread] = None + +def start_writer(out_path: Path, rotate_mb: int, queue_max: int): + """Background writer with bounded queue and optional file rotation.""" + global _writer_q, _writer_thread + _writer_q = queue.Queue(maxsize=max(1, queue_max)) + + def _run(): + bytes_since_rotate = 0 + fh = open(out_path, "a", encoding="utf-8", buffering=1<<20) # 1 MiB buffer + try: + while True: + chunk = _writer_q.get() + if chunk is None: + break + fh.write(chunk) + bytes_since_rotate += len(chunk.encode("utf-8", "ignore")) + if rotate_mb and bytes_since_rotate >= rotate_mb * 1024 * 1024: + fh.flush() + fh.close() + fh = open(out_path, "a", encoding="utf-8", buffering=1<<20) + bytes_since_rotate = 0 + finally: + try: + fh.flush() + fh.close() + except Exception: + pass + + _writer_thread = threading.Thread(target=_run, daemon=True) + _writer_thread.start() + +def enqueue_records_chunked(records: List["Record"], chunk_size: int): + """Serialize records in small batches to keep latency/GC sane.""" + if not records: + return + step = max(1, int(chunk_size)) + for i in range(0, len(records), step): + batch = records[i:i+step] + chunk = "".join(json.dumps(asdict(r), ensure_ascii=False) + "\n" for r in batch) + _writer_q.put(chunk) + +def stop_writer(): + if _writer_q is not None: + _writer_q.put(None) + if _writer_thread is not None: + _writer_thread.join() + +# ------------------------- +# Crash diagnostics +# ------------------------- + +try: + faulthandler.enable() + for _sig in (signal.SIGSEGV, signal.SIGBUS, signal.SIGABRT): + try: + faulthandler.register(_sig, chain=True) + except Exception: + pass +except Exception: + pass + +# ------------------------- +# Subprocess isolation helper (for crashy libs) +# ------------------------- + +def _subproc_entry(conn, func, path, args): + """Run `func(path, args)` in a clean process and send back (status, payload).""" + try: + recs = func(path, args) + conn.send(("ok", recs)) + except Exception as e: + conn.send(("err", f"{type(e).__name__}: {e}")) + finally: + try: + conn.close() + except Exception: + pass + +def run_isolated(func, path, args, *, timeout=900, cancellation_event: Optional[mp_context.Event] = None): + """ + Run a CPU/IO-heavy function in a child process. + If the child segfaults, times out, or crashes, we return a synthetic error. + """ + ctx = mp_context.get_context("fork" if sys.platform == "darwin" else "spawn") + parent_conn, child_conn = ctx.Pipe(duplex=False) + p = ctx.Process(target=_subproc_entry, args=(child_conn, func, path, args), daemon=True) + p.start() + try: + child_conn.close() + status, payload = ("err", "crash") + while p.is_alive(): + if cancellation_event and cancellation_event.is_set(): + p.terminate() + p.join() + return [], "cancelled" + if parent_conn.poll(0.1): # Check for data with a small timeout + status, payload = parent_conn.recv() + break + time.sleep(0.1) # Small sleep to prevent busy-waiting + else: # Process died without sending data + if not cancellation_event or not cancellation_event.is_set(): + # Only report crash if not cancelled + status, payload = ("err", "crash") + + if status == "ok": + return payload, None + else: + return [], f"isolated-{status}: {payload}" + + except EOFError: + return [], "isolated-eof" + except Exception as e: + return [], f"isolated-exception: {e}" + finally: + try: + parent_conn.close() + except Exception: + pass + if p.is_alive(): + p.terminate() + p.join() + +try: + mp_context.set_start_method("fork") +except RuntimeError: + pass + +# ---- Required core deps +try: + import fitz # PyMuPDF +except ImportError: + print("[ERROR] PyMuPDF (fitz) is required. Install with: pip install pymupdf", file=sys.stderr) + sys.exit(1) + +try: + from bs4 import BeautifulSoup +except ImportError: + print("[ERROR] BeautifulSoup is required. Install with: pip install beautifulsoup4", file=sys.stderr) + sys.exit(1) + +# ---- Optional but recommended +try: + from ebooklib import epub +except ImportError: + epub = None + +try: + import chardet +except ImportError: + chardet = None + +try: + from PIL import Image, ImageOps, ImageChops +except ImportError: + Image = None + ImageOps = None + ImageChops = None + +try: + import numpy as np +except ImportError: + np = None + +try: + import cv2 # optional +except ImportError: + cv2 = None + +# Whisper (OpenAI) +try: + import whisper +except ImportError: + whisper = None + +# Optional: device hinting for Whisper +try: + import torch +except Exception: + torch = None + +# Optional language detection (either works) +try: + import langid +except ImportError: + langid = None +try: + from langdetect import detect as _ld_detect, DetectorFactory as _ld_factory + _ld_factory.seed = 42 +except Exception: + _ld_detect = None + +# Progress +try: + from tqdm import tqdm +except ImportError: + tqdm = None # fallback to simple prints + +warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead") + +# ------------------------- +# CLI args +# ------------------------- + +def parse_args(): + p = argparse.ArgumentParser(description="Build a JSONL corpus from a folder") + + # Root input (recurses) + p.add_argument("--root", help="Path to input root directory") + p.add_argument("--mirror", help="(Deprecated) Path to website mirror root (alias of --root)") + p.add_argument("--out", required=True, help="Output JSONL file path") + p.add_argument("--workers", type=int, default=os.cpu_count() or 4, help="Concurrent per-file workers") + p.add_argument("--verbose", action="store_true", help="Verbose logging") + + # Emission granularity + p.add_argument("--emit", choices=["per-file", "per-page", "per-section", "auto"], default="auto", + help="Granularity: per-file, per-page (PDF), per-section (EPUB/HTML), or auto") + p.add_argument("--emit-av", choices=["joined", "slices", "both"], default="joined", + help="For audio/video: emit one joined record, per-slice records, or both") + + # PDF/EPUB/HTML specifics + p.add_argument("--ocr-page-jobs", type=int, default=1, help="Per-PDF page concurrency for ocrmypdf --jobs") + p.add_argument("--ocr-lang", default="eng", help="Tesseract language(s), e.g. 'eng+deu'") + p.add_argument("--max-cols", type=int, default=4, help="Maximum columns to consider per PDF page") + p.add_argument("--epub-strategy", choices=["direct", "pdf-fallback", "force-pdf"], default="pdf-fallback", + help="EPUB handling: try direct, fallback to PDF; or always convert to PDF") + p.add_argument("--pdf-page-workers", type=int, default=0, + help="Threads per PDF for page extraction (0=auto: min(4, cpu)). Only used when emitting per-page/auto.") + p.add_argument("--html-section-workers", type=int, default=0, + help="Threads per HTML for per-section record building (0=auto: min(4, cpu)).") + + # Include/Exclude + p.add_argument( + "--include", + default=( + r".*\.(?:pdf|html?|txt|md|rst|epub|" + r"png|jpe?g|gif|bmp|tiff?|webp|heic|" + r"mp3|wav|m4a|flac|ogg|opus|aac|" + r"mp4|mkv|mov|webm|avi|ts|" + r"py|ipynb|js|ts|tsx|jsx|java|c|cpp|rs|go|rb|php|cs|swift|kt|m|sh|bat|ps1|sql)$" + ), + help="Regex for files to include" + ) + p.add_argument( + "--exclude", + default=r"(^|[\\/])\.|__MACOSX([\\/]|$)|\.DS_Store$|\.ocr\.txt$", + help="Regex for files/paths to exclude" + ) + + # ASR (Whisper-base, multi-process slices) + p.add_argument("--whisper-model", default="base", help="OpenAI Whisper model size (tiny, base, small, …)") + p.add_argument("--num-slices", type=int, default=8, help="Number of equal slices per media file") + p.add_argument("--overlap-sec", type=float, default=1.0, help="Overlap seconds between slices") + p.add_argument("--max-overlap-words", type=int, default=7, help="Max words to align/dedup across slice boundaries") + p.add_argument("--mp-workers", type=int, default=0, help="Multiprocessing workers (0 -> use num-slices)") + p.add_argument("--asr-task", choices=["transcribe", "translate"], default="transcribe", + help="Whisper task: transcribe (original language) or translate (to English)") + p.add_argument("--max-av-duration", type=float, default=5*3600, help="Hard cap (seconds) for audio/video") + + # NEW: device control (avoid MPS crash by default) + p.add_argument("--whisper-device", choices=["auto","cpu","cuda","mps"], default="auto", + help="Device for Whisper slices. Default 'auto' prefers CUDA, otherwise CPU (not MPS).") + + # Ollama (images, code) + p.add_argument("--ollama-host", default="http://localhost:11434", help="Ollama host URL") + p.add_argument("--vlm-model", default="qwen2.5vl:7b", help="Vision LLM model for image description") + p.add_argument("--code-llm", default="qwen3:4b", help="Code summarizer model") + p.add_argument("--llm-parallel", type=int, default=1, help="Parallel LLM calls (Ollama)") + + # Images + p.add_argument("--image-max-edge", type=int, default=1600, help="Resize longest edge before VLM to save VRAM") + + # Image OCR gate + thresholds + p.add_argument("--image-text-gate", + choices=["tesseract-conf", "vlm-gate", "always-ocr", "always-vlm"], + default="tesseract-conf", + help="How to decide OCR vs VLM for images.") + p.add_argument("--ocr-psms", default="6,11", + help="Comma-separated PSMs to probe for OCR gating (e.g. '6,11').") + p.add_argument("--ocr-min-conf", type=int, default=55, + help="Minimum median word confidence to accept OCR.") + p.add_argument("--ocr-min-words", type=int, default=10, + help="Minimum word count to accept OCR.") + p.add_argument("--ocr-min-alnum", type=float, default=0.55, + help="Minimum alnum ratio over non-space printable chars to accept OCR.") + + # Code + p.add_argument("--code-max-bytes", type=int, default=200_000, help="Read at most N bytes from code files") + + # Language hints/detection + p.add_argument("--lang-hint", default=None, help="Optional language hint for OCR") + p.add_argument("--lang-detect", action="store_true", default=True, help="Detect language of each record") + p.add_argument("--no-lang-detect", dest="lang_detect", action="store_false") + + # Writer tuning + p.add_argument("--writer-queue", type=int, default=64, help="Max queued chunks to the writer thread") + p.add_argument("--writer-chunk", type=int, default=256, help="Records per JSONL chunk enqueued to writer") + p.add_argument("--writer-rotate-mb", type=int, default=0, help="Rotate (close/reopen) writer every N MB; 0=off") + + # External tools + p.add_argument("--ffmpeg", default=shutil.which("ffmpeg") or "/usr/bin/ffmpeg", help="Path to ffmpeg") + p.add_argument("--ffprobe", default=shutil.which("ffprobe") or "/usr/bin/ffprobe", help="Path to ffprobe") + p.add_argument("--tesseract", default=shutil.which("tesseract") or "/usr/bin/tesseract", help="Path to tesseract") + p.add_argument("--ebook-convert", dest="ebook_convert", default=shutil.which("ebook-convert"), help="Path to Calibre's ebook-convert (optional)") + p.add_argument("--pandoc", default=shutil.which("pandoc"), help="Path to pandoc (optional)") + + return p.parse_args() + +# ------------------------- +# Utilities +# ------------------------- + +def log(msg: str, *, verbose: bool = True): + if verbose: + print(msg, flush=True) + +def ensure_parent(path: Path): + path.parent.mkdir(parents=True, exist_ok=True) + +def detect_encoding(b: bytes) -> str: + if chardet is None: + return "utf-8" + guess = chardet.detect(b) or {} + enc = guess.get("encoding") or "utf-8" + return enc + +def read_text_file(path: Path) -> str: + data = path.read_bytes() + enc = detect_encoding(data) + try: + return data.decode(enc, errors="replace") + except Exception: + return data.decode("utf-8", errors="replace") + +def run_cmd(cmd: List[str], *, cwd: Optional[Path] = None, env: Optional[Dict[str, str]] = None) -> subprocess.CompletedProcess: + return subprocess.run(cmd, cwd=str(cwd) if cwd else None, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) + +def ffprobe_json(ffprobe_bin: str, media_path: Path) -> Optional[Dict]: + cmd = [ffprobe_bin, "-v", "error", "-print_format", "json", "-show_format", "-show_streams", str(media_path)] + res = run_cmd(cmd) + if res.returncode != 0: + return None + try: + return json.loads(res.stdout) + except Exception: + return None + +def extract_audio_wav(ffmpeg_bin: str, input_path: Path, out_wav: Path, *, samplerate=16000) -> bool: + cmd = [ffmpeg_bin, "-y", "-i", str(input_path), "-ac", "1", "-ar", str(samplerate), "-f", "wav", str(out_wav)] + res = run_cmd(cmd) + return res.returncode == 0 + +def try_mutool_clean(in_pdf: Path) -> Optional[Path]: + if not shutil.which("mutool"): return None + tmp = Path(tempfile.mkstemp(suffix=".clean.pdf")[1]) + res = run_cmd(["mutool", "clean", "-gg", str(in_pdf), str(tmp)]) + return tmp if res.returncode == 0 and tmp.exists() else None + +def pdftotext_fallback(in_pdf: Path) -> str: + if not shutil.which("pdftotext"): return "" + tmp = Path(tempfile.mkstemp(suffix=".txt")[1]) + try: + run_cmd(["pdftotext", "-layout", "-enc", "UTF-8", str(in_pdf), str(tmp)]) + return tmp.read_text("utf-8", errors="ignore") + finally: + try: tmp.unlink() + except Exception: pass + +# ---- Ollama HTTP helpers +def ollama_generate(host: str, model: str, prompt: str, images_b64: Optional[List[str]] = None, options: Optional[Dict]=None, stream: bool=False) -> str: + try: + import requests + except ImportError as e: + raise RuntimeError("The 'requests' package is required for Ollama calls. Install with: pip install requests") from e + payload = {"model": model, "prompt": prompt, "stream": stream} + if images_b64: + payload["images"] = images_b64 + if options: + payload["options"] = options + resp = requests.post(f"{host.rstrip('/')}/api/generate", json=payload, timeout=600) + resp.raise_for_status() + data = resp.json() + return data.get("response", "") + +def encode_image_b64(path: Path, max_edge: int = 1600) -> str: + if Image is None: + return base64.b64encode(path.read_bytes()).decode("ascii") + try: + img = Image.open(path).convert("RGB") + except Exception: + return base64.b64encode(path.read_bytes()).decode("ascii") + w, h = img.size + scale = max(w, h) + if scale > max_edge: + ratio = max_edge / float(scale) + img = img.resize((int(w*ratio), int(h*ratio))) + buf = tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) + try: + img.save(buf.name, format="JPEG", quality=90) + b = Path(buf.name).read_bytes() + return base64.b64encode(b).decode("ascii") + finally: + try: + os.unlink(buf.name) + except Exception: + pass + +# ---- LLM hygiene / language detection +def sanitize_llm_text(s: str) -> str: + s = re.sub(r".*?", "", s, flags=re.S|re.I) + s = re.sub(r"^\s*```(?:\w+)?\s*|\s*```\s*$", "", s, flags=re.M) + s = re.sub(r"[ \t]+", " ", s) + s = re.sub(r"\n{3,}", "\n\n", s) + return s.strip() + +def detect_language(text: str) -> Optional[str]: + text = (text or "").strip() + if not text: + return None + n = len(text) + if n > 3000: + head = text[:1000]; mid = text[n//2:n//2+1000]; tail = text[-1000:] + sample = head + "\n" + mid + "\n" + tail + else: + sample = text + try: + if langid is not None: + lang, _ = langid.classify(sample) + return lang + if _ld_detect is not None: + return _ld_detect(sample) + except Exception: + pass + return None + +# ------------------------- +# Image text-likeness detection (optional) +# ------------------------- + +def image_is_textlike(path: Path) -> bool: + try: + if cv2 is not None and np is not None: + data = np.fromfile(str(path), dtype=np.uint8) + img = cv2.imdecode(data, cv2.IMREAD_GRAYSCALE) + if img is None: + return False + h, w = img.shape[:2] + scale = max(h, w) + if scale > 1800: + r = 1800.0 / scale + img = cv2.resize(img, (int(w*r), int(h*r)), interpolation=cv2.INTER_AREA) + thr = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, + cv2.THRESH_BINARY, 35, 11) + contours, _ = cv2.findContours(thr, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) + if not contours: + return False + areas = [cv2.contourArea(c) for c in contours] + small = [a for a in areas if 10 < a < 5000] + density = len(small) / (img.shape[0]*img.shape[1] / 1e5) + return density > 8 + else: + if Image is None or np is None: + return False + img = Image.open(path).convert("L") + w, h = img.size + if max(w, h) > 1800: + r = 1800.0 / max(w, h) + img = img.resize((int(w*r), int(h*r))) + arr = np.array(img, dtype=np.float32) + dx = np.abs(np.diff(arr, axis=1)) + dy = np.abs(np.diff(arr, axis=0)) + edge_ratio = (np.pad((dx[:, :-1]**2 + dy[:-1, :]**2)**0.5, ((0,1),(0,1))) > 25).mean() + thresh = (arr > 200).mean() + (arr < 55).mean() + return (edge_ratio > 0.15) and (thresh > 0.25) + except Exception: + return False + +# ------------------------- +# PDF helpers +# ------------------------- + +def is_probably_scanned(pdf_path: Path, sample_pages: int = 3) -> bool: + try: + with fitz.open(pdf_path) as doc: + n = min(len(doc), max(1, sample_pages)) + text_len = 0 + for i in range(n): + page = doc.load_page(i) + txt = page.get_text("text") + text_len += len(txt.strip()) + return text_len < 50 * n + except Exception: + return True + +def ocrmypdf_searchable(in_pdf: Path, out_pdf: Path, lang: str, page_jobs: int, verbose: bool) -> Tuple[bool, str]: + base_cmd = [ + "ocrmypdf", + "--skip-text", + "--optimize", "0", + "--rotate-pages", + "--deskew", + "--jobs", str(max(1, page_jobs)), + "--tesseract-timeout", "120", + "--output-type", "pdf", + "--language", lang, + ] + base_cmd.append("--verbose" if verbose else "-q") + cmd = base_cmd + [str(in_pdf), str(out_pdf)] + res = run_cmd(cmd) + out = res.stdout or "" + if "NotImplementedError: --remove-background" in out or "--remove-background is temporarily not implemented" in out: + log(f"[INFO] {in_pdf.name}: retrying without --remove-background", verbose=verbose) + res = run_cmd(cmd) + out = res.stdout or "" + ok = res.returncode == 0 + if not ok and "NotImplementedError" in out: + log(f"[INFO] {in_pdf.name}: quality retry (psm=3, cleanup=on)", verbose=verbose) + cmd_retry = base_cmd + ["--tesseract-pagesegmode", "3", "--clean-final"] + [str(in_pdf), str(out_pdf)] + res = run_cmd(cmd_retry) + out = res.stdout or "" + ok = res.returncode == 0 + return ok, out + +def segment_columns(blocks: List[Tuple], max_cols: int) -> List[List[Tuple]]: + if not blocks: + return [] + tblocks = [b for b in blocks if isinstance(b[4], str) and b[4].strip()] + if not tblocks: + return [] + xs = [] + for b in tblocks: + x0, y0, x1, y1, txt, *_ = b + xs.append(((x0 + x1) / 2.0, b)) + xs.sort(key=lambda t: t[0]) + centers = [v for v,_ in xs] + gaps = [] + for i in range(1, len(centers)): + gaps.append((centers[i] - centers[i-1], i)) + gaps.sort(reverse=True, key=lambda t: t[0]) + splits = sorted(idx for _, idx in gaps[:max(0, max_cols-1)]) + columns: List[List[Tuple]] = [] + last = 0 + for s in splits: + col = [b for _, b in xs[last:s]] + if col: + columns.append(col) + last = s + col = [b for _, b in xs[last:]] + if col: + columns.append(col) + if len(columns) <= 1: + columns = [[b for _, b in xs]] + for col in columns: + col.sort(key=lambda b: (b[1], b[0])) + return columns + +def extract_pdf_text(pdf_path: Path, max_cols: int, verbose: bool) -> str: + texts: List[str] = [] + with fitz.open(pdf_path) as doc: + for pno in range(len(doc)): + page = doc.load_page(pno) + blocks = page.get_text("blocks") + if not blocks: + continue + blocks = [b for b in blocks if isinstance(b[4], str) and b[4].strip()] + if not blocks: + continue + cols = segment_columns(blocks, max_cols=max_cols) + page_lines: List[str] = [] + for col in cols: + for x0,y0,x1,y1,txt,*_ in col: + t = re.sub(r"\s+", " ", txt.strip()) + if t: + page_lines.append(t) + if page_lines: + texts.append("\n".join(page_lines)) + return "\n\n".join(texts).strip() + +# ------------------------- +# HTML helpers +# ------------------------- + +def split_html_sections(html_text: str) -> List[Dict[str, Any]]: + soup = BeautifulSoup(html_text, "html.parser") + for tag in soup(["script", "style", "noscript", "nav", "header", "footer"]): + tag.decompose() + + sections: List[Dict[str, Any]] = [] + current = {"title": None, "parts": []} + + def flush(): + if current["parts"] or current["title"]: + txt = "\n".join(current["parts"]).strip() + sections.append({"title": current["title"] or None, "text": txt}) + current["title"], current["parts"] = None, [] + + for el in soup.find_all(["h1","h2","h3","h4","h5","h6","p","li","blockquote","pre","code"]): + if el.name in {"h1","h2"}: + flush() + t = el.get_text(separator=" ", strip=True) + current["title"] = t or None + else: + t = el.get_text(separator=" ", strip=True) + if t: + current["parts"].append(t) + flush() + return sections + +# ------------------------- +# Records +# ------------------------- + +@dataclass +class Record: + id: str + parent_id: Optional[str] + source_path: str + url: Optional[str] + mime: str + record_type: str # "file" | "page" | "section" | "av" | "image" | "code-summary" | "html-section" + title: Optional[str] + text: str + span: Optional[Dict[str, Any]] = None + lang: Optional[str] = None + meta: Optional[Dict[str, Any]] = None + +# ------------------------- +# Processors +# ------------------------- + +def _extract_single_pdf_page(pdf_path: Path, pno: int, max_cols: int) -> Tuple[int, str, str]: + """Open the PDF in THIS thread, extract one page. Returns (page_index, title_guess, text).""" + title = None + text = "" + try: + with fitz.open(pdf_path) as doc: + if pno < 0 or pno >= len(doc): + return (pno, "", "") + page = doc.load_page(pno) + blocks = page.get_text("blocks") or [] + blocks = [b for b in blocks if isinstance(b[4], str) and b[4].strip()] + if not blocks: + return (pno, "", "") + cols = segment_columns(blocks, max_cols=max_cols) + lines: List[str] = [] + for col in cols: + for x0, y0, x1, y1, txt, *_ in col: + t = re.sub(r"\s+", " ", txt.strip()) + if t: + lines.append(t) + text = "\n".join(lines).strip() + for line in text.splitlines(): + if line.strip(): + title = line.strip() + break + return (pno, title or "", text) + except Exception: + return (pno, "", "") + +def process_pdf(path: Path, args) -> List[Record]: + """ + PDF: if emit=per-page/auto → one record per page (with optional page threads); + else single record. + Also uses ocrmypdf --jobs for scanned PDFs (already parallel). + """ + verbose = args.verbose + tmpdir_obj = tempfile.TemporaryDirectory() + tmpdir = Path(tmpdir_obj.name) + records: List[Record] = [] + try: + src = path + work_pdf = src + # (1) Make searchable if scanned + if is_probably_scanned(src): + out_pdf = tmpdir / f"{src.stem}.ocr.pdf" + ok, _ocr_log = ocrmypdf_searchable(src, out_pdf, args.lang_hint or args.ocr_lang, args.ocr_page_jobs, verbose) + if ok: + work_pdf = out_pdf + + per_page = (args.emit in ("per-page", "auto")) + if per_page: + # Determine page worker count + page_workers = args.pdf_page_workers or min(4, (os.cpu_count() or 4)) + try: + # First open once to count pages + with fitz.open(work_pdf) as d: + n_pages = len(d) + if page_workers > 1 and n_pages > 1: + # Threaded per-page extraction (safe: each worker opens the doc) + results: List[Tuple[int, str, str]] = [] + with cf.ThreadPoolExecutor(max_workers=max(1, page_workers)) as ex: + futs = {ex.submit(_extract_single_pdf_page, work_pdf, pno, args.max_cols): pno for pno in range(n_pages)} + for fut in cf.as_completed(futs): + results.append(fut.result()) + results.sort(key=lambda t: t[0]) + else: + # Single-threaded per-page + results = [] + with fitz.open(work_pdf) as d: + for pno in range(len(d)): + page = d.load_page(pno) + blocks = page.get_text("blocks") or [] + blocks = [b for b in blocks if isinstance(b[4], str) and b[4].strip()] + if not blocks: + text = "" + else: + cols = segment_columns(blocks, max_cols=args.max_cols) + lines = [] + for col in cols: + for x0,y0,x1,y1,txt,*_ in col: + t = re.sub(r"\s+", " ", txt.strip()) + if t: lines.append(t) + text = "\n".join(lines).strip() + title = None + for line in text.splitlines(): + if line.strip(): + title = line.strip(); break + results.append((pno, title or "", text)) + for (pno, title, text) in results: + lang = detect_language(text) if args.lang_detect else None + records.append(Record( + id=f"{path.as_posix()}#page={pno+1}", + parent_id=str(path.as_posix()), + source_path=str(path.resolve()), + url=None, + mime="application/pdf", + record_type="page", + title=title or f"{path.stem} — p.{pno+1}", + text=text, + span={"page_start": pno+1, "page_end": pno+1}, + lang=lang, + meta=None + )) + return records + except Exception: + pass # fallthrough to file-level + + # (2) File-level extraction + text = extract_pdf_text(work_pdf, max_cols=args.max_cols, verbose=verbose) + title = None + for line in text.splitlines(): + if line.strip(): + title = line.strip() + break + lang = detect_language(text) if args.lang_detect else None + records.append(Record( + id=str(path.as_posix()), + parent_id=None, + source_path=str(path.resolve()), + url=None, + mime="application/pdf", + record_type="file", + title=title, + text=text, + span=None, + lang=lang, + meta=None + )) + return records + finally: + tmpdir_obj.cleanup() + +def process_html(path: Path, args) -> List[Record]: + html = path.read_text(encoding="utf-8", errors="ignore") + per_section = (args.emit in ("per-section", "auto")) + if per_section: + secs = split_html_sections(html) + secs = [s for s in secs if (s.get("text") or "").strip()] + if secs: + sec_workers = args.html_section_workers or min(4, (os.cpu_count() or 4)) + + def _build(idx: int, s: Dict[str, Any]) -> Record: + text = s["text"] + title = s["title"] or f"{path.stem} — section {idx+1}" + lang = detect_language(text) if args.lang_detect else None + return Record( + id=f"{path.as_posix()}#section={idx+1}", + parent_id=str(path.as_posix()), + source_path=str(path.resolve()), + url=None, + mime="text/html", + record_type="html-section", + title=title, + text=text, + span={"section_idx": idx+1, "section_title": s["title"]}, + lang=lang, + meta=None + ) + + records: List[Tuple[int, Record]] = [] + with cf.ThreadPoolExecutor(max_workers=max(1, sec_workers)) as ex: + futs = {ex.submit(_build, i, s): i for i, s in enumerate(secs)} + for fut in cf.as_completed(futs): + i = futs[fut] + records.append((i, fut.result())) + records.sort(key=lambda t: t[0]) + return [r for _, r in records] + + # file-level fallback + soup = BeautifulSoup(html, "html.parser") + for tag in soup(["script", "style", "noscript", "nav", "header", "footer"]): + tag.decompose() + texts: List[str] = [] + for el in soup.find_all(["h1","h2","h3","h4","h5","h6","p","li","blockquote","pre","code"]): + t = el.get_text(separator=" ", strip=True) + if t: + texts.append(t) + text = "\n".join(texts).strip() + title = None + h1 = soup.find("h1") + if h1: + title = h1.get_text(strip=True) + if not title: + for line in text.splitlines(): + if line.strip(): + title = line.strip() + break + lang = detect_language(text) if args.lang_detect else None + return [Record( + id=str(path.as_posix()), + parent_id=None, + source_path=str(path.resolve()), + url=None, + mime="text/html", + record_type="file", + title=title or path.stem, + text=text, + span=None, + lang=lang, + meta=None + )] + +def preprocess_image_for_ocr(img_path: Path, upsample_min_edge: int = 900) -> Path: + if Image is None: + return img_path + img = Image.open(img_path).convert("RGB") + w, h = img.size + if ImageChops is not None: + corners = [(0,0), (w-1,0), (0,h-1), (w-1,h-1)] + bboxes = [] + for cx, cy in corners: + try: + bg = Image.new(img.mode, img.size, img.getpixel((cx, cy))) + diff = ImageChops.difference(img, bg) + bbox = diff.getbbox() + if bbox: bboxes.append(bbox) + except Exception: + pass + if bboxes: + left = max(b[0] for b in bboxes) + top = max(b[1] for b in bboxes) + right = min(b[2] for b in bboxes) + bottom= min(b[3] for b in bboxes) + if 0 <= left < right <= w and 0 <= top < bottom <= h: + if (right-left) >= 0.7*w and (bottom-top) >= 0.7*h: + img = img.crop((left, top, right, bottom)) + img = ImageOps.grayscale(img) + try: + img = ImageOps.autocontrast(img, cutoff=1) + except Exception: + pass + W, H = img.size + if max(W, H) < upsample_min_edge: + scale = float(upsample_min_edge) / float(max(W, H)) + img = img.resize((int(W*scale), int(H*scale)), Image.LANCZOS) + tmp = Path(tempfile.mkstemp(suffix=".png")[1]) + img.save(tmp) + return Path(tmp) + +def tesseract_ocr_image(tesseract_bin: str, img_path: Path, lang: str, psm: Optional[int] = None) -> str: + pre = preprocess_image_for_ocr(img_path) + try: + cmd = [tesseract_bin, str(pre), "stdout", "-l", lang] + if psm is not None: + cmd += ["--psm", str(psm)] + res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) + if res.returncode != 0: + return "" + return res.stdout.strip() + finally: + if pre != img_path: + try: pre.unlink() + except Exception: pass + +def _alnum_ratio(s: str) -> float: + chars = [c for c in s if c.isprintable() and not c.isspace()] + if not chars: + return 0.0 + alnum = sum(1 for c in chars if c.isalnum()) + return float(alnum) / float(len(chars)) + +def _looks_like_garbage(text: str, *, require_lang: bool, args) -> bool: + t = (text or "").strip() + if len(t) < 20: + return True + toks = re.findall(r"\w+|\S", t) + avg_tok = sum(len(x) for x in toks) / max(1, len(toks)) + uniq_ratio = len(set(t)) / max(1, len(t)) + if uniq_ratio > 0.6 and avg_tok < 2.2: + return True + if re.search(r"[|—\-]{5,}", t): + return True + if require_lang and args.lang_detect and (detect_language(t) is None): + return True + return False + +def _tesseract_probe_tsv(tesseract_bin: str, img_path: Path, lang: str, psm: Optional[int] = None) -> Dict[str, Any]: + pre = preprocess_image_for_ocr(img_path) + tmpdir = Path(tempfile.mkdtemp(prefix="tsv_")) + try: + base = tmpdir / "probe" + cmd = [tesseract_bin, str(pre), str(base), "-l", lang] + if psm is not None: + cmd += ["--psm", str(psm)] + cmd += ["tsv"] + res = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + if res.returncode != 0: + return {"psm": psm, "words": 0, "conf_median": 0.0, "conf_mean": 0.0, "text": "", "alnum_ratio": 0.0} + tsv_path = base.with_suffix(".tsv") + if not tsv_path.exists(): + return {"psm": psm, "words": 0, "conf_median": 0.0, "conf_mean": 0.0, "text": "", "alnum_ratio": 0.0} + words, confs, tokens = 0, [], [] + with open(tsv_path, "r", encoding="utf-8", errors="ignore") as fh: + reader = csv.DictReader(fh, delimiter="\t") + for row in reader: + txt = (row.get("text") or "").strip() + try: + conf = float(row.get("conf") or -1) + except Exception: + conf = -1.0 + if txt and conf >= 0: + words += 1 + confs.append(conf) + tokens.append(txt) + text = " ".join(tokens).strip() + conf_median = float(np.median(confs)) if confs else 0.0 + conf_mean = float(np.mean(confs)) if confs else 0.0 + return { + "psm": psm, + "words": words, + "conf_median": conf_median, + "conf_mean": conf_mean, + "text": text, + "alnum_ratio": _alnum_ratio(text), + } + finally: + try: + if pre != img_path: + pre.unlink() + except Exception: + pass + try: + shutil.rmtree(tmpdir) + except Exception: + pass + +def process_image(path: Path, args) -> List[Record]: + def vlm_describe() -> Tuple[str, str, Dict[str, Any]]: + img_b64 = encode_image_b64(path, args.image_max_edge) + prompt = ( + "Decide first if the image is primarily TEXT or not.\n" + "- If TEXT: output exactly:\n" + "TYPE: TEXT\nCONTENT:\n\n" + "- If not: output exactly:\n" + "TYPE: DESCRIPTION\nCONTENT:\n\n" + "Do not add extra headers, markdown, or commentary." + ) + if LLM_SEM is not None: + with LLM_SEM: + resp = ollama_generate(args.ollama_host, args.vlm_model, prompt, images_b64=[img_b64], options={"temperature": 0.2}) + else: + resp = ollama_generate(args.ollama_host, args.vlm_model, prompt, images_b64=[img_b64], options={"temperature": 0.2}) + resp = sanitize_llm_text(resp) + kind = "DESCRIPTION" + content = resp.strip() + m = re.search(r"TYPE:\s*(TEXT|DESCRIPTION)", resp, re.I) + if m: + kind = m.group(1).upper() + m2 = re.search(r"CONTENT:\s*(.*)", resp, re.S) + if m2: + content = m2.group(1).strip() + meta = {"vlm_kind": kind} + return sanitize_llm_text(content), f"vlm:{kind}", meta + + if args.image_text_gate == "always-vlm": + text, mode, meta_extra = vlm_describe() + else: + if args.image_text_gate == "always-ocr": + psms = [int(x) for x in str(args.ocr_psms).split(",") if str(x).strip().isdigit()] + best_txt, best_psm = "", None + for psm in psms or [6]: + txt = tesseract_ocr_image(args.tesseract, path, args.lang_hint or args.ocr_lang, psm=psm).strip() + if len(txt) > len(best_txt): + best_txt, best_psm = txt, psm + text = sanitize_llm_text(best_txt) + if _looks_like_garbage(text, require_lang=True, args=args): + vlm_text, vlm_mode, meta_extra = vlm_describe() + text, mode = vlm_text, vlm_mode + meta_extra = {"fallback": "vlm_garbage_filter"} + else: + mode, meta_extra = "tesseract", {"ocr_psm": best_psm} + elif args.image_text_gate in ("tesseract-conf", "vlm-gate"): + gate_decision = None + gate_meta: Dict[str, Any] = {} + if args.image_text_gate == "vlm-gate": + img_b64 = encode_image_b64(path, args.image_max_edge) + gate_prompt = ( + "Is this image primarily text (documents, slides, screenshots) or not?\n" + "Answer with EXACTLY one word: TEXT or DESCRIPTION." + ) + if LLM_SEM is not None: + with LLM_SEM: + g = ollama_generate(args.ollama_host, args.vlm_model, gate_prompt, images_b64=[img_b64], options={"temperature": 0.0}) + else: + g = ollama_generate(args.ollama_host, args.vlm_model, gate_prompt, images_b64=[img_b64], options={"temperature": 0.0}) + g = sanitize_llm_text(g).split()[0].upper() if g.strip() else "DESCRIPTION" + if g not in {"TEXT", "DESCRIPTION"}: + g = "DESCRIPTION" + gate_decision = g + gate_meta["vlm_gate"] = g + + if gate_decision == "DESCRIPTION": + text, mode, meta_extra = vlm_describe() + meta_extra.update({"image_gate": "vlm-gate"}) + else: + psms = [int(x) for x in str(args.ocr_psms).split(",") if str(x).strip().isdigit()] or [6, 11] + probes = [_tesseract_probe_tsv(args.tesseract, path, args.lang_hint or args.ocr_lang, psm=psm) for psm in psms] + best = max(probes, key=lambda d: (d.get("conf_median", 0.0), d.get("words", 0))) + accept = ( + best.get("conf_median", 0.0) >= float(args.ocr_min_conf) and + best.get("words", 0) >= int(args.ocr_min_words) and + best.get("alnum_ratio", 0.0) >= float(args.ocr_min_alnum) + ) + if accept: + best_psm = best.get("psm") or 6 + text = tesseract_ocr_image(args.tesseract, path, args.lang_hint or args.ocr_lang, psm=best_psm).strip() + text = sanitize_llm_text(text) + if _looks_like_garbage(text, require_lang=True, args=args): + vlm_text, vlm_mode, meta_extra = vlm_describe() + text, mode = vlm_text, vlm_mode + meta_extra = {"fallback": "vlm_garbage_filter", "image_gate": "tesseract-conf"} + meta_extra.update(gate_meta) + else: + mode, meta_extra = "tesseract", {"image_gate": "tesseract-conf", "ocr_psm": best_psm} + meta_extra.update({ + "ocr_words": best.get("words", 0), + "ocr_conf_median": round(best.get("conf_median", 0.0), 2), + "ocr_conf_mean": round(best.get("conf_mean", 0.0), 2), + "alnum_ratio": round(best.get("alnum_ratio", 0.0), 3), + }) + meta_extra.update(gate_meta) + else: + vlm_text, vlm_mode, meta_extra = vlm_describe() + text, mode = vlm_text, vlm_mode + meta_extra.update({ + "image_gate": "tesseract-conf", + "fallback": "vlm_conf_too_low", + "ocr_words": best.get("words", 0), + "ocr_conf_median": round(best.get("conf_median", 0.0), 2), + "ocr_conf_mean": round(best.get("conf_mean", 0.0), 2), + "alnum_ratio": round(best.get("alnum_ratio", 0.0), 3), + }) + else: + text, mode, meta_extra = vlm_describe() + + text = sanitize_llm_text(text) + mime = mimetypes.guess_type(str(path))[0] or "image/*" + title = (text.splitlines()[0].strip() if text else path.stem)[:200] + lang = detect_language(text) if args.lang_detect else None + + meta = {"image_mode": mode} + if "meta_extra" in locals() and isinstance(meta_extra, dict): + meta.update(meta_extra) + + return [Record( + id=f"{path.as_posix()}", + parent_id=None, + source_path=str(path.resolve()), + url=None, + mime=mime, + record_type="image", + title=title or path.stem, + text=text, + span=None, + lang=lang, + meta=meta + )] + +def extract_epub_sections(path: Path, args) -> List[Dict[str, Any]]: + sections: List[Dict[str, Any]] = [] + if epub is None: + return sections + book = epub.read_epub(str(path)) + tmpdir = Path(tempfile.mkdtemp(prefix="epub_")) + try: + order = [] + for itemref in book.spine or []: + idref = itemref[0] if isinstance(itemref, (list, tuple)) else itemref + it = book.get_item_with_id(idref) + if it: order.append(it) + if not order: + order = [it for it in book.get_items() if it.get_type() == 9] + for idx, it in enumerate(order): + html = it.get_content().decode("utf-8", errors="ignore") + soup = BeautifulSoup(html, "html.parser") + for tag in soup(["script", "style", "noscript", "nav", "header", "footer"]): + tag.decompose() + texts: List[str] = [] + for el in soup.find_all(["h1","h2","h3","h4","h5","h6","p","li","blockquote","pre","code"]): + t = el.get_text(separator=" ", strip=True) + if t: + texts.append(t) + title = None + for el in soup.find_all(["h1","h2"]): + t = el.get_text(separator=" ", strip=True) + if t: + title = t + break + if not title: + title = it.get_id() or f"Section {idx+1}" + sections.append({"idx": idx, "title": title, "text": "\n".join(texts).strip(), "images": []}) + images = [] + for item in book.get_items(): + if item.get_type() == 3: + fp = tmpdir / f"{item.get_id()}" + with open(fp, "wb") as fh: + fh.write(item.get_content()) + images.append(fp) + if sections and images: + sections[0]["images"] = images + return sections + except Exception: + return sections + finally: + pass + +def process_epub(path: Path, args) -> List[Record]: + per_section = (args.emit in ("per-section", "auto")) + if per_section: + secs = extract_epub_sections(path, args) + records: List[Record] = [] + if not secs: + per_section = False + else: + for sec in secs: + texts = sec["text"] + img_texts: List[str] = [] + for img in sec.get("images") or []: + ocr_txt = tesseract_ocr_image(args.tesseract, img, args.lang_hint or args.ocr_lang) + if ocr_txt: + img_texts.append(ocr_txt) + final_text = (texts + ("\n\n" + "\n\n".join(img_texts) if img_texts else "")).strip() + rid = f"{path.as_posix()}#section={sec['idx']+1}" + lang = detect_language(final_text) if args.lang_detect else None + records.append(Record( + id=rid, + parent_id=str(path.as_posix()), + source_path=str(path.resolve()), + url=None, + mime="application/epub+zip", + record_type="section", + title=sec["title"] or f"{path.stem} — section {sec['idx']+1}", + text=final_text, + span={"section_idx": sec['idx']+1, "section_title": sec["title"]}, + lang=lang, + meta={"epub_strategy": "direct"} + )) + if records: + return records + + texts = "" + img_texts: List[str] = [] + tmp_pdf = None + if args.epub_strategy in ("direct", "pdf-fallback"): + secs = extract_epub_sections(path, args) + texts = "\n\n".join([s["text"] for s in secs]) if secs else "" + for s in secs: + for img in s.get("images") or []: + ocr_txt = tesseract_ocr_image(args.tesseract, img, args.lang_hint or args.ocr_lang) + if ocr_txt: + img_texts.append(ocr_txt) + combined = (texts + ("\n\n" + "\n\n".join(img_texts) if img_texts else "")).strip() + if len(combined) < 500 and args.epub_strategy == "pdf-fallback": + tmp_pdf = path.with_suffix(".epub.tmp.pdf") + else: + tmp_pdf = path.with_suffix(".epub.tmp.pdf") + + if tmp_pdf: + converted = False + if args.ebook_convert: + res = run_cmd([args.ebook_convert, str(path), str(tmp_pdf)]) + converted = (res.returncode == 0 and tmp_pdf.exists()) + elif args.pandoc: + res = run_cmd([args.pandoc, str(path), "-o", str(tmp_pdf)]) + converted = (res.returncode == 0 and tmp_pdf.exists()) + if converted: + try: + recs = process_pdf(tmp_pdf, args) + try: tmp_pdf.unlink(missing_ok=True) + except Exception: pass + return recs + except Exception: + try: tmp_pdf.unlink(missing_ok=True) + except Exception: pass + + final_text = (texts + ("\n\n" + "\n\n".join(img_texts) if img_texts else "")).strip() + title = None + for line in final_text.splitlines(): + if line.strip(): + title = line.strip() + break + lang = detect_language(final_text) if args.lang_detect else None + return [Record( + id=str(path.as_posix()), + parent_id=None, + source_path=str(path.resolve()), + url=None, + mime="application/epub+zip", + record_type="file", + title=title or path.stem, + text=final_text, + span=None, + lang=lang, + meta={"epub_strategy": args.epub_strategy} + )] + +def process_text(path: Path, args) -> List[Record]: + txt = read_text_file(path) + title = None + for line in txt.splitlines(): + if line.strip(): + title = line.strip() + break + mime = mimetypes.guess_type(str(path))[0] or "text/plain" + lang = detect_language(txt) if args.lang_detect else None + return [Record( + id=str(path.as_posix()), + parent_id=None, + source_path=str(path.resolve()), + url=None, + mime=mime, + record_type="file", + title=title or path.stem, + text=txt, + span=None, + lang=lang, + meta=None + )] + +# Global semaphore for LLM calls (set in main) +LLM_SEM: Optional[threading.BoundedSemaphore] = None + +CODE_SUFFIX_LANG = { + ".py":"Python",".ipynb":"Jupyter",".js":"JavaScript",".ts":"TypeScript",".tsx":"TSX",".jsx":"JSX", + ".java":"Java",".c":"C",".cpp":"C++",".cc":"C++",".h":"C/C++ header",".hpp":"C++ header", + ".rs":"Rust",".go":"Go",".rb":"Ruby",".php":"PHP",".cs":"C#",".swift":"Swift",".kt":"Kotlin",".m":"Objective-C", + ".sh":"Shell",".bat":"Batch",".ps1":"PowerShell",".sql":"SQL" +} + +def process_code_llm(path: Path, args) -> List[Record]: + maxb = max(1, args.code_max_bytes) + b = path.read_bytes() + trunc = False + if len(b) > maxb: + b = b[:maxb]; trunc = True + try: + content = b.decode("utf-8") + except Exception: + content = b.decode("latin-1", errors="replace") + suffix = path.suffix.lower() + lang_hint = CODE_SUFFIX_LANG.get(suffix, "Code") + prompt = ( + f"File: {path.name} (language: {lang_hint})\n" + "Task: Explain what this file does in 5–10 tight bullet points.\n" + "Include: purpose, key functions/classes, inputs/outputs, side effects (I/O, network, env), external deps.\n" + "Avoid: stylistic critique and rewrites. Be precise.\n\n" + "Code:\n" + content + ("\n\n[TRUNCATED]" if trunc else "") + ) + if LLM_SEM is not None: + with LLM_SEM: + resp = ollama_generate(args.ollama_host, args.code_llm, prompt, options={"temperature": 0.2}) + else: + resp = ollama_generate(args.ollama_host, args.code_llm, prompt, options={"temperature": 0.2}) + text = sanitize_llm_text(resp.strip()) + title = f"{path.name} — summary" + lang = detect_language(text) if args.lang_detect else None + return [Record( + id=str(path.as_posix()), + parent_id=None, + source_path=str(path.resolve()), + url=None, + mime="text/x-code-summary", + record_type="code-summary", + title=title, + text=text, + span=None, + lang=lang, + meta={"model": args.code_llm, "truncated": "yes" if trunc else "no", "lang_hint": lang_hint} + )] + +# ------------------------- +# Whisper-base ASR +# ------------------------- + +def get_audio_duration(audio_path: Path, ffprobe_bin: str) -> float: + info = ffprobe_json(ffprobe_bin, audio_path) + if not info: + return 0.0 + try: + return float(info.get("format", {}).get("duration") or 0.0) + except Exception: + return 0.0 + +def slice_audio(audio_path: Path, out_dir: Path, num_slices: int, overlap_sec: float, ffprobe_bin: str, ffmpeg_bin: str) -> List[Tuple[Path, float, float]]: + duration = get_audio_duration(audio_path, ffprobe_bin) + if duration <= 0: + return [(audio_path, 0.0, 0.0)] + length = duration / max(1, num_slices) + slices: List[Tuple[Path, float, float]] = [] + for i in range(num_slices): + start = max(0.0, i * length - (overlap_sec if i > 0 else 0.0)) + end = min(duration, (i + 1) * length + (overlap_sec if i < num_slices - 1 else 0.0)) + fn = out_dir / f"slice_{i:02d}.wav" + cmd = [ + ffmpeg_bin, "-y", "-hide_banner", "-loglevel", "error", + "-ss", f"{start}", "-to", f"{end}", + "-i", str(audio_path), "-acodec", "copy", str(fn) + ] + res = run_cmd(cmd) + if res.returncode != 0: + raise RuntimeError(f"ffmpeg slice failed for {audio_path.name} [{i}]") + slices.append((fn, start, end)) + return slices + +_WHISPER_MODEL = None + +def _resolve_whisper_device(flag: str) -> Optional[str]: + if flag and flag != "auto": + return flag + try: + if torch is not None and getattr(torch.cuda, "is_available", lambda: False)(): + return "cuda" + except Exception: + pass + return "cpu" + +def _whisper_pool_init(model_name: str, device: Optional[str] = None): + global _WHISPER_MODEL + if whisper is None: + raise RuntimeError("Whisper package is required (pip install -U openai-whisper)") + warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead") + if device in (None, "auto"): + device = _resolve_whisper_device("auto") + try: + _WHISPER_MODEL = whisper.load_model(model_name, device=device) + except TypeError: + _WHISPER_MODEL = whisper.load_model(model_name) + +def _transcribe_slice(task: str, tup: Tuple[Path, int, str]) -> Tuple[int, str]: + global _WHISPER_MODEL + slice_path, idx, _vid = tup + res = _WHISPER_MODEL.transcribe(str(slice_path), task=task) + text = (res.get("text") or "").strip() + return idx, text + +def merge_transcripts(files_idx_text: List[Tuple[int, str]], max_overlap_words: int) -> str: + files_idx_text.sort(key=lambda x: x[0]) + merged_words: List[str] = [] + prev_words: List[str] = [] + for i, txt in files_idx_text: + words = (txt or "").split() + if merged_words and prev_words: + p_tail = prev_words[-max_overlap_words:] + c_head = words[:max_overlap_words] + L = min(len(p_tail), len(c_head)) + best = 0 + for n in range(L, 4, -1): + if p_tail[-n:] == c_head[:n]: + best = n + break + if best: + words = words[best:] + merged_words += words + prev_words = words + return " ".join(merged_words).strip() + +def process_media(path: Path, args) -> List[Record]: + probe = ffprobe_json(args.ffprobe, path) + duration_s = None + if probe: + try: + duration_s = float(probe.get("format", {}).get("duration") or 0.0) + except Exception: + duration_s = None + if duration_s and duration_s > args.max_av_duration: + raise RuntimeError(f"Media too long ({duration_s:.1f}s > cap {args.max_av_duration}s)") + + tmpdir = Path(tempfile.mkdtemp(prefix="av_")) + wav_path = tmpdir / "audio.wav" + ok = extract_audio_wav(args.ffmpeg, path, wav_path) + if not ok or not wav_path.exists(): + try: shutil.rmtree(tmpdir) + except Exception: pass + raise RuntimeError("ffmpeg audio extraction failed") + + slice_dir = tmpdir / "slices" + slice_dir.mkdir(parents=True, exist_ok=True) + nslices = max(1, args.num_slices) + slices = slice_audio(wav_path, slice_dir, nslices, args.overlap_sec, args.ffprobe, args.ffmpeg) + + mpw = args.mp_workers or len(slices) + device = _resolve_whisper_device(args.whisper_device) + ctx = mp.get_context("fork") + pool = ctx.Pool(processes=mpw, initializer=_whisper_pool_init, initargs=(args.whisper_model, device)) + try: + jobs = [(fp, i, path.stem) for i, (fp, _s, _e) in enumerate(slices)] + results = pool.starmap(_transcribe_slice, [(args.asr_task, j) for j in jobs]) + except BaseException: + try: + pool.terminate() + finally: + pool.join() + raise + else: + pool.close() + pool.join() + + joined_text = merge_transcripts(results, args.max_overlap_words) + joined_text = sanitize_llm_text(joined_text) + lang = "en" if args.asr_task == "translate" else (detect_language(joined_text) if args.lang_detect else None) + mime = mimetypes.guess_type(str(path))[0] or "audio/wav" + + records: List[Record] = [] + if args.emit_av in ("slices", "both"): + for i, (fp, s, e) in enumerate(slices): + seg_txt = next((t for idx, t in results if idx == i), "") + seg_txt = sanitize_llm_text(seg_txt) + seg_lang = "en" if args.asr_task == "translate" else (detect_language(seg_txt) if args.lang_detect else None) + records.append(Record( + id=f"{path.as_posix()}#slice={i+1}", + parent_id=str(path.as_posix()), + source_path=str(path.resolve()), + url=None, + mime=mime, + record_type="av", + title=f"{path.stem} — slice {i+1}", + text=seg_txt, + span={"time_start": s, "time_end": e}, + lang=seg_lang, + meta={"duration_s": f"{duration_s:.1f}" if duration_s else "", "asr_model": f"whisper-{args.whisper_model}", "asr_task": args.asr_task} + )) + if args.emit_av in ("joined", "both"): + records.append(Record( + id=str(path.as_posix()), + parent_id=None, + source_path=str(path.resolve()), + url=None, + mime=mime, + record_type="av", + title=path.stem, + text=joined_text, + span={"duration_s": duration_s}, + lang=lang, + meta={"duration_s": f"{duration_s:.1f}" if duration_s else "", "asr_model": f"whisper-{args.whisper_model}", "asr_task": args.asr_task} + )) + + try: + shutil.rmtree(tmpdir) + except Exception: + pass + + return records + +# ------------------------- +# IO +# ------------------------- + +def iter_files(root: Path, include_rgx: re.Pattern, exclude_rgx: re.Pattern) -> Iterable[Path]: + for p in root.rglob("*"): + if not p.is_file(): + continue + rel = str(p.relative_to(root)) + if exclude_rgx.search(rel): + continue + if include_rgx.search(rel): + yield p + +# ------------------------- +# Main +# ------------------------- + +def run_build(root: Path, out: Path, *, on_progress=None, **opts) -> dict: + global LLM_SEM + # Use provided options or default values + args = argparse.Namespace( + root=root, + out=out, + workers=opts.get("workers", os.cpu_count() or 4), + verbose=opts.get("verbose", False), + emit=opts.get("emit", "auto"), + emit_av=opts.get("emit_av", "joined"), + ocr_page_jobs=opts.get("ocr_page_jobs", 1), + ocr_lang=opts.get("ocr_lang", "eng"), + max_cols=opts.get("max_cols", 4), + epub_strategy=opts.get("epub_strategy", "pdf-fallback"), + pdf_page_workers=opts.get("pdf_page_workers", 0), + html_section_workers=opts.get("html_section_workers", 0), + include=opts.get("include", r".*\.(?:pdf|html?|txt|md|rst|epub|png|jpe?g|gif|bmp|tiff?|webp|heic|mp3|wav|m4a|flac|ogg|opus|aac|mp4|mkv|mov|webm|avi|ts|py|ipynb|js|ts|tsx|jsx|java|c|cpp|rs|go|rb|php|cs|swift|kt|m|sh|bat|ps1|sql)$"), + exclude=opts.get("exclude", r"(^|[\\/])\.|__MACOSX([\\/]|$)|\.DS_Store$|\.ocr\.txt$"), + whisper_model=opts.get("whisper_model", "base"), + num_slices=opts.get("num_slices", 8), + overlap_sec=opts.get("overlap_sec", 1.0), + max_overlap_words=opts.get("max_overlap_words", 7), + mp_workers=opts.get("mp_workers", 0), + asr_task=opts.get("asr_task", "transcribe"), + max_av_duration=opts.get("max_av_duration", 5 * 3600), + whisper_device=opts.get("whisper_device", "auto"), + ollama_host=opts.get("ollama_host", "http://localhost:11434"), + vlm_model=opts.get("vlm_model", "qwen2.5vl:7b"), + code_llm=opts.get("code_llm", "qwen3:4b"), + llm_parallel=opts.get("llm_parallel", 1), + image_max_edge=opts.get("image_max_edge", 1600), + image_text_gate=opts.get("image_text_gate", "tesseract-conf"), + ocr_psms=opts.get("ocr_psms", "6,11"), + ocr_min_conf=opts.get("ocr_min_conf", 55), + ocr_min_words=opts.get("ocr_min_words", 10), + ocr_min_alnum=opts.get("ocr_min_alnum", 0.55), + code_max_bytes=opts.get("code_max_bytes", 200_000), + lang_hint=opts.get("lang_hint", None), + lang_detect=opts.get("lang_detect", True), + writer_queue=opts.get("writer_queue", 64), + writer_chunk=opts.get("writer_chunk", 256), + writer_rotate_mb=opts.get("writer_rotate_mb", 0), + ffmpeg=opts.get("ffmpeg", shutil.which("ffmpeg") or "/usr/bin/ffmpeg"), + ffprobe=opts.get("ffprobe", shutil.which("ffprobe") or "/usr/bin/ffprobe"), + tesseract=opts.get("tesseract", shutil.which("tesseract") or "/usr/bin/tesseract"), + ebook_convert=opts.get("ebook_convert", shutil.which("ebook-convert")), + pandoc=opts.get("pandoc", shutil.which("pandoc")), + ) + + out_path = Path(args.out).expanduser() + if not out_path.is_absolute(): + out_path = (Path(__file__).parent / out_path).resolve() + + ensure_parent(out_path) + open(out_path, "w", encoding="utf-8").close() + start_writer(out_path, rotate_mb=args.writer_rotate_mb, queue_max=args.writer_queue) + if on_progress: + on_progress("start", 0.0, f"Writing JSONL to: {out_path}") + + include_rgx = re.compile(args.include, flags=re.I) + exclude_rgx = re.compile(args.exclude, flags=re.I) + + if on_progress: + on_progress("scan", 0.05, "Scanning files...") + files = list(iter_files(root, include_rgx, exclude_rgx)) + if not files: + if on_progress: + on_progress("done", 1.0, "No matching files found.") + stop_writer() + return {"status": "warning", "message": "No matching files found."} + + priority = { + ".pdf": 0, ".html": 1, ".htm": 1, ".txt": 2, ".md": 2, ".rst": 2, ".epub": 3, + ".png": 4, ".jpg": 4, ".jpeg": 4, ".gif": 4, ".bmp": 4, ".tif": 4, ".tiff": 4, ".webp": 4, ".heic": 4, + ".mp3": 5, ".wav": 5, ".m4a": 5, ".flac": 5, ".ogg": 5, ".opus": 5, ".aac": 5, + ".mp4": 6, ".mkv": 6, ".mov": 6, ".webm": 6, ".avi": 6, ".ts": 6 + } + priority.update({k: 7 for k in CODE_SUFFIX_LANG.keys()}) + files.sort(key=lambda p: (priority.get(p.suffix.lower(), 9), + (p.stat().st_size if p.exists() else 0), + str(p).lower())) + + LLM_SEM = threading.BoundedSemaphore(max(1, args.llm_parallel)) + + def worker(path: Path) -> Tuple[Path, List[Record], Optional[str]]: + try: + suf = path.suffix.lower() + if suf == ".pdf": + recs, perr = run_isolated(process_pdf, path, args, timeout=1200) + if perr: + cleaned = try_mutool_clean(path) + if cleaned: + recs2, perr2 = run_isolated(process_pdf, cleaned, args, timeout=1200) + try: cleaned.unlink(missing_ok=True) + except Exception: pass + if not perr2: + return (path, recs2, None) + txt = pdftotext_fallback(path) + if txt.strip(): + lang = detect_language(txt) if args.lang_detect else None + return (path, [Record( + id=str(path.as_posix()), + parent_id=None, + source_path=str(path.resolve()), + url=None, + mime="application/pdf", + record_type="file", + title=(txt.splitlines()[0].strip() if txt else path.stem)[:200], + text=txt, + span=None, + lang=lang, + meta={"fallback":"pdftotext"} + )], None) + return (path, [], perr) + else: + return (path, recs, None) + elif suf in {".html", ".htm"}: + recs = process_html(path, args) + elif suf in {".txt", ".md", ".rst"}: + recs = process_text(path, args) + elif suf == ".epub": + recs = process_epub(path, args) + elif suf in {".png",".jpg",".jpeg",".gif",".bmp",".tif",".tiff",".webp",".heic"}: + recs = process_image(path, args) + elif suf in {".mp3",".wav",".m4a",".flac",".ogg",".opus",".aac",".mp4",".mkv",".mov",".webm",".avi",".ts"}: + recs = process_media(path, args) + elif suf in set(CODE_SUFFIX_LANG.keys()): + recs = process_code_llm(path, args) + else: + recs = process_text(path, args) + return (path, recs, None) + except Exception as e: + return (path, [], f"{type(e).__name__}: {e}") + + total = len(files) + processed_count = 0 + errors = [] + + if on_progress: + on_progress("extract", 0.1, f"Extracting content from {total} files...") + + with cf.ThreadPoolExecutor(max_workers=max(1, args.workers)) as ex: + futures = {ex.submit(worker, p): p for p in files} + for fut in cf.as_completed(futures): + path, recs, err = fut.result() + processed_count += 1 + if err: + errors.append(f"{path.name}: {err}") + else: + enqueue_records_chunked(recs, args.writer_chunk) + if on_progress: + pct = 0.1 + 0.8 * (processed_count / total) + on_progress("extract", pct, f"Processed {processed_count}/{total}: {path.name}") + + if on_progress: + on_progress("write", 0.9, "Finalizing write...") + stop_writer() + if on_progress: + on_progress("done", 1.0, "Corpus build complete.") + + return {"status": "ok", "total_files": total, "errors": errors} + +def main(): + args = parse_args() + root_arg = args.root or args.mirror + if not root_arg: + print("[ERROR] Please provide --root .", file=sys.stderr) + sys.exit(2) + + run_build(Path(root_arg), Path(args.out), on_progress=lambda p, pct, d: print(f"[{p}] {pct*100:.1f}%: {d}"), **vars(args)) + +if __name__ == "__main__": + main() diff --git a/backend/rag/corpus_enricher.py b/backend/rag/corpus_enricher.py new file mode 100644 index 0000000..15ba31e --- /dev/null +++ b/backend/rag/corpus_enricher.py @@ -0,0 +1,1048 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +RAG Corpus Enrichment (second pass) + +What this does +- Reads a JSONL corpus (one record per line) from build_corpus.py and adds LLM-generated enrichment: + * headline (<= 12 words, language = --summary-lang) + * summary (2–4 sentences, language = --summary-lang) + * keywords (5–12, normalized & deduped) + * entities (name + canonical type: PERSON|ORG|PRODUCT|WORK|PLACE|EVENT|DATE|OTHER) + * 2–4 likely Q/A pairs (language = --summary-lang) + +- Writes two outputs: + 1) --out : original record + enrichment fields + embedding_text_hint (what your indexer should embed) + 2) --shadow-out : compact “shadow” record for retrieval with normalized shadow_text and useful metadata + (includes: parent_id, span, size metrics, quality_flags) + +Design for speed & robustness +- Default local model: phi4:latest (good balance on Apple/M1 Max). Swap with --model if desired. +- Pooled HTTP via requests.Session (one per thread), bounded semaphore on Ollama calls. +- JSON mode with strict schema + robust repair if the model returns non-JSON. +- Head/Mid/Tail sampling for long texts to stay within context quickly. +- Caching: + * Main enrichment cache keyed by (prompt_version + model + lang + sampled_text + record_id + record_type) + * Translation cache keyed by (model + target_lang + field_text) +- Post-enforcement: + * clamp headline to <=12 words, summary to 2–4 sentences + * ensure 5–12 keywords, dedup & normalize + * canonicalize entity types and dedup by name + * top-up Q/A to required count with a tiny follow-up call (cheap) + * verify/translate fields to --summary-lang if needed + +CLI example + python rag_enhance_corpus.py \ + --in corpus.jsonl \ + --out corpus.enhanced.jsonl \ + --shadow-out corpus.shadow.jsonl \ + --summary-lang en \ + --ollama http://localhost:11434 \ + --model phi4:latest \ + --concurrency 8 \ + --keep-alive 15m \ + --min-chars 120 \ + --max-text 12000 \ + --timeout 120 + +Requires: requests, tqdm, (optional) langid, (optional) orjson +""" + +from __future__ import annotations +import argparse +import concurrent.futures as cf +import hashlib +import json +import os +import re +import sys +import threading +import time +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Tuple, Callable +import multiprocessing as mp_context + +# Optional faster JSON if available +try: + import orjson as _orjson + def json_dumps(obj) -> str: + return _orjson.dumps(obj, option=_orjson.OPT_NON_STR_KEYS | _orjson.OPT_SERIALIZABLE).decode("utf-8") + def json_loads(s: str) -> Any: + return _orjson.loads(s) +except Exception: + def json_dumps(obj) -> str: + return json.dumps(obj, ensure_ascii=False) + def json_loads(s: str) -> Any: + return json.loads(s) + +try: + import langid # optional language detection +except Exception: + langid = None + +try: + from tqdm import tqdm +except Exception: + tqdm = None + +# ------------------------- +# Constants & helpers +# ------------------------- + +PROMPT_VERSION = "v3.0" + +ENTITY_CANON = { + "PERSON": "PERSON", + "ORG": "ORG", "ORGANIZATION": "ORG", "COMPANY": "ORG", "INSTITUTION": "ORG", "COUNTRY": "ORG", + "PRODUCT": "PRODUCT", "TOOL": "PRODUCT", "LIBRARY": "PRODUCT", + "WORK": "WORK", "BOOK": "WORK", "PAPER": "WORK", "ARTICLE": "WORK", "MOVIE": "WORK", + "PLACE": "PLACE", "LOCATION": "PLACE", "CITY": "PLACE", "REGION": "PLACE", "ADDRESS": "PLACE", + "EVENT": "EVENT", "CONFERENCE": "EVENT", "MEETING": "EVENT", + "DATE": "DATE", "TIME": "DATE", "YEAR": "DATE", + "OTHER": "OTHER", +} + +QA_TARGET_DEFAULT = 3 # aim for 3 Q/A pairs for normal docs +QA_TARGET_SHORT = 2 # short docs can have 2 + +# thread-local session pool +_TLS = threading.local() + +def get_session(): + import requests + s = getattr(_TLS, "session", None) + if s is None: + s = requests.Session() + setattr(_TLS, "session", s) + return s + +def log(msg: str, *, verbose: bool = True): + if verbose: + print(msg, flush=True) + +def strip_think(s: str) -> str: + return re.sub(r"<\s*think\s*>.*?<\s*/\s*think\s*>", "", s, flags=re.S | re.I) + +def sanitize_text(s: str) -> str: + if not s: + return "" + s = strip_think(s) + s = re.sub(r"^\s*```(?:\w+)?\s*|\s*```\s*$", "", s, flags=re.M) # strip stray code fences + s = re.sub(r"[ \t]+", " ", s) + s = re.sub(r"\n{3,}", "\n\n", s) + return s.strip() + +def detect_lang_quick(s: str) -> Optional[str]: + s = (s or "").strip() + if not s: + return None + try: + if langid is not None: + lang, _ = langid.classify(s[:4000]) + return lang + except Exception: + pass + return None + +def sentence_split(text: str) -> List[str]: + # very light heuristic splitter + parts = re.split(r"(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9\"'])", text.strip()) + # fall back if we ended up with nothing + if len(parts) == 1 and len(parts[0]) > 0: + return [parts[0]] + return [p.strip() for p in parts if p.strip()] + +def clamp_sentences(text: str, min_s: int = 2, max_s: int = 4) -> str: + sents = sentence_split(text) + if not sents: + return "" + sents = sents[:max_s] + # if after clamp we have < min_s and original had more, pad; else keep as is + return " ".join(sents) + +def clamp_words(text: str, max_words: int) -> str: + words = text.strip().split() + if len(words) <= max_words: + return text.strip() + return " ".join(words[:max_words]) + +def normalize_keywords(items: List[str]) -> List[str]: + out: List[str] = [] + seen = set() + for x in items or []: + s = sanitize_text(str(x)) + s = re.sub(r"^[,;:.\-–—\s]+|[,;:.\-–—\s]+$", "", s) + s = re.sub(r"\s+", " ", s) + if not s: + continue + key = s.lower() + if key in seen: + continue + seen.add(key) + out.append(s) + # enforce 5–12 by trimming if needed + if len(out) > 12: + out = out[:12] + return out + +def canonicalize_entities(ents: List[Dict[str, Any]]) -> List[Dict[str, str]]: + out: List[Dict[str, str]] = [] + seen = set() + for e in ents or []: + if not isinstance(e, dict): continue + name = sanitize_text(str(e.get("name", ""))) + if not name: continue + typ_raw = sanitize_text(str(e.get("type", ""))).upper() + typ = ENTITY_CANON.get(typ_raw, ENTITY_CANON.get(typ_raw.split("_")[0], "OTHER")) + key = name.lower() + if key in seen: continue + seen.add(key) + out.append({"name": name, "type": typ}) + return out + +def text_size_metrics(text: str) -> Dict[str, int]: + text = text or "" + return { + "char_count": len(text), + "word_count": len(text.split()), + "line_count": len([ln for ln in text.splitlines()]), + } + +def head_mid_tail_sample(s: str, max_chars: int) -> str: + if len(s) <= max_chars: + return s + third = max_chars // 3 + head = s[:third] + mid_start = max(0, len(s)//2 - third//2) + mid = s[mid_start:mid_start + third] + tail = s[-third:] + return f"{head}\n\n[...] (sample)\n\n{mid}\n\n[...] (sample)\n\n{tail}" + +def looks_like_ocr_noise(s: str) -> bool: + s = s or "" + if not s.strip(): + return False + letters = sum(ch.isalpha() for ch in s) + punct = sum(ch in "!@#$%^&*()[]{}<>/\\|~`" for ch in s) + ratio_letters = letters / max(1, len(s)) + ratio_punct = punct / max(1, len(s)) + return ratio_letters < 0.45 and ratio_punct > 0.08 + +def build_doc_hint(rec: Dict[str, Any]) -> str: + rt = rec.get("record_type") or "" + mime = rec.get("mime") or "" + title = rec.get("title") or "" + if rt == "image": + return "This record is derived from an IMAGE. If text exists, it may be OCR; otherwise it is an image description. Summaries should read like quality alt-text and include short visible text only if clearly legible." + if rt == "av": + return "This record is derived from an AUDIO/VIDEO transcript. Focus on the main points, speakers (if known), and concrete facts. Q&A should target answerable details from the transcript." + if rt == "code-summary": + return "This record summarizes a code file. Keywords should emphasize APIs, functions, modules, and side effects. Q&A should focus on how to use or extend the code." + # PDFs/HTML/TXT/etc. + if "pdf" in mime: + return "This record is a PDF page or document content." + if "html" in mime: + return "This record is HTML/webpage content." + if "text" in mime: + return "This record is plain text content." + return f"This record is of type '{rt}' with mime '{mime}'. Title (if any): {title}" + +def pick_text(d: Dict[str, Any]) -> str: + return d.get("text") or d.get("content") or d.get("body") or "" + +def stable_hash(text: str, model: str, lang: str, rec_id: str, rec_type: str) -> str: + h = hashlib.sha1() + for part in (PROMPT_VERSION, model, lang, rec_id or "", rec_type or "", text): + h.update(part.encode("utf-8", errors="ignore")) + h.update(b"\x00") + return h.hexdigest() + +# ------------------------- +# Ollama calls +# ------------------------- + +def ollama_generate_json( + host: str, + model: str, + system_prompt: str, + user_prompt: str, + *, + keep_alive: str = "15m", + timeout: int = 120, + options: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + """ + Call Ollama /api/generate in JSON mode (format='json'). + Robust JSON repair if needed. + """ + session = get_session() + payload = { + "model": model, + "system": system_prompt, + "prompt": user_prompt, + "format": "json", + "stream": False, + "keep_alive": keep_alive, + } + if options: + payload["options"] = options + r = session.post(f"{host.rstrip('/')}/api/generate", json=payload, timeout=timeout) + r.raise_for_status() + data = r.json() + raw = sanitize_text(data.get("response", "")) + try: + return json_loads(raw) + except Exception: + m = re.search(r"\{.*\}", raw, flags=re.S) + if m: + try: + return json_loads(m.group(0)) + except Exception: + pass + # last resort minimal structure + return {"headline": "", "summary": raw, "keywords": [], "entities": [], "qa": []} + +def ollama_generate_text( + host: str, + model: str, + system_prompt: str, + user_prompt: str, + *, + keep_alive: str = "15m", + timeout: int = 120, + options: Optional[Dict[str, Any]] = None, +) -> str: + """ + Plain text response (no enforced JSON). Used for tiny follow-ups if desired. + """ + session = get_session() + payload = { + "model": model, + "system": system_prompt, + "prompt": user_prompt, + "stream": False, + "keep_alive": keep_alive, + } + if options: + payload["options"] = options + r = session.post(f"{host.rstrip('/')}/api/generate", json=payload, timeout=timeout) + r.raise_for_status() + data = r.json() + return sanitize_text(data.get("response", "")) + +# ------------------------- +# Prompts +# ------------------------- + +def build_system(summary_lang: str) -> str: + return ( + "You are a precise, concise, multilingual document tagger for retrieval-augmented generation (RAG). " + "Return ONLY JSON matching the schema. Avoid markdown. No extra commentary.\n" + f"Output language for headline/summary/keywords/Q&A must be '{summary_lang}'." + ) + +def build_user_main(text: str, summary_lang: str, doc_hint: str, want_qa: int) -> str: + want_qa = max(2, min(4, int(want_qa))) + # Fixed internal instruction for style/tone + fixed_instruction = ( + "Produce concise headlines (≤12 words) and 2–4 sentence summaries; " + "5–12 normalized keywords (kebab-case); named entities with types; 2–4 useful QA pairs. " + "Keep strictly grounded in the source." + ) + return ( + f"{doc_hint}\n\n" + "You will receive a document TEXT. Produce JSON matching this schema strictly:\n" + "{\n" + ' "headline": string, # <= 12 words\n' + ' "summary": string, # 2-4 sentences, faithful and specific\n' + ' "keywords": [string, ...], # 5-12 salient terms; multi-word allowed; no hashtags\n' + ' "entities": [ # up to ~12 unique items\n' + ' {"name": string, "type": "PERSON|ORG|PRODUCT|WORK|PLACE|EVENT|DATE|OTHER"}\n' + " ],\n" + f' "qa": [ # exactly {want_qa} Q&A pairs\n' + ' {"q": string, "a": string}\n' + " ]\n" + "}\n\n" + f"Style Instruction: {fixed_instruction}\n\n" + f"Constraints:\n" + f"- Headline and summary MUST be in {summary_lang}.\n" + "- Extract proper nouns and salient terms as entities; deduplicate by name.\n" + "- Q&A must be answerable ONLY from the TEXT; keep questions <= 16 words; answers concise (<= ~80 words).\n" + "- Be terse and informative; no filler.\n\n" + "TEXT:\n" + text + ) + +def build_user_qa_topup(text: str, summary_lang: str, need: int) -> str: + need = max(1, min(3, int(need))) + return ( + "We have a document TEXT and need ONLY additional Q&A pairs for retrieval. " + "Return STRICT JSON of the form: {\n" + ' "qa": [ {"q": string, "a": string}, ... ]\n' + "}\n" + f"Output language: {summary_lang}. Provide exactly {need} pairs. " + "Questions <= 16 words; answers concise (<= ~80 words).\n\n" + "TEXT:\n" + text + ) + +def build_system_translate(target_lang: str) -> str: + return ( + "You are a translator. Return ONLY JSON of the form {\"text\": \"...\"}. " + "Do not add commentary." + ) + +def build_user_translate(text: str, target_lang: str) -> str: + return ( + f"Translate into {target_lang} preserving meaning and tone.\n" + "Return: {\"text\": \"...\"} only.\n\n" + "TEXT:\n" + text + ) + +# ------------------------- +# Shadow rendering +# ------------------------- + +def render_shadow(rec: Dict[str, Any], enrichment: Dict[str, Any]) -> Dict[str, Any]: + """ + Build a compact record for retrieval. 'shadow_text' concatenates fields in a stable order. + Adds: parent_id, span, size metrics, quality_flags. + """ + parts: List[str] = [] + h = enrichment.get("headline") or rec.get("title") or "" + s = enrichment.get("summary") or "" + kws = enrichment.get("keywords") or [] + ents = enrichment.get("entities") or [] + qas = enrichment.get("qa") or [] + + if h: parts.append(f"headline: {h}") + if s: parts.append(f"summary: {s}") + if kws: + kw_line = ", ".join(str(k).strip() for k in kws if str(k).strip()) + parts.append("keywords: " + kw_line) + # tiny topical boost (helps small embedders) + parts.append("keywords_boost: " + kw_line) + if ents: + uniq = {} + for e in ents: + name = (e.get("name") or "").strip() + et = (e.get("type") or "OTHER").strip().upper() + if name and name.lower() not in uniq: + uniq[name.lower()] = (name, et) + if uniq: + parts.append("entities: " + "; ".join(f"{n} [{t}]" for n, t in uniq.values())) + if qas: + qas_strs = [] + for qa in qas[:4]: + q = (qa.get("q") or "").strip() + a = (qa.get("a") or "").strip() + if q and a: + qas_strs.append(f"Q: {q}\nA: {a}") + if qas_strs: + parts.append("qa:\n" + "\n".join(qas_strs)) + + shadow_text = "\n".join(parts).strip() + meta = { + "prompt_version": PROMPT_VERSION, + "size": text_size_metrics(shadow_text), + } + parent_id = rec.get("parent_id") + span = rec.get("span") if isinstance(rec.get("span"), dict) else None + + out = { + "id": rec.get("id"), + "parent_id": parent_id, + "source_path": rec.get("source_path"), + "url": rec.get("url"), + "title": rec.get("title"), + "record_type": rec.get("record_type"), + "mime": rec.get("mime"), + "lang": rec.get("lang"), + "span": span, + "shadow_text": shadow_text, + "shadow_meta": meta, + } + return out + +# ------------------------- +# Cache +# ------------------------- + +class Cache: + def __init__(self, root: Path, prefix: str = ""): + self.root = root + self.root.mkdir(parents=True, exist_ok=True) + self.lock = threading.Lock() + self.prefix = prefix + + def _path(self, key: str) -> Path: + k = (self.prefix + key) + sub = self.root / k[:2] / (k + ".json") + sub.parent.mkdir(parents=True, exist_ok=True) + return sub + + def get(self, key: str) -> Optional[Dict[str, Any]]: + p = self._path(key) + if not p.exists(): + return None + try: + return json_loads(p.read_text(encoding="utf-8")) + except Exception: + return None + + def put(self, key: str, value: Dict[str, Any]): + p = self._path(key) + with self.lock: + p.write_text(json_dumps(value), encoding="utf-8") + +# ------------------------- +# Post-process + translation guards +# ------------------------- + +def enforce_schema_and_language( + out: Dict[str, Any], + *, + target_lang: str, + rec_text_sample: str, + rec_is_short: bool, + perform_translate, + stats: Dict[str, int], +) -> Dict[str, Any]: + quality_flags: List[str] = [] + + # headline + headline = sanitize_text(str(out.get("headline", ""))) + if headline: + hd = clamp_words(headline, 12) + if hd != headline: + quality_flags.append("headline_clamped") + headline = hd + + # summary + summary = sanitize_text(str(out.get("summary", ""))) + if summary: + sm = clamp_sentences(summary, 2, 4) + if sm != summary: + quality_flags.append("summary_clamped") + summary = sm + + # keywords + kws = out.get("keywords", []) + if isinstance(kws, list): + kws = normalize_keywords(kws) + if len(kws) < 5 and headline: + # augment from headline tokens if we’re short + extra = [w for w in re.split(r"[,\s]+", headline) if len(w) > 3] + kws = normalize_keywords((kws or []) + extra) + if len(kws) < 5 and summary: + extra = [w for w in re.findall(r"[A-Za-zÀ-ÖØ-öø-ÿ][A-Za-zÀ-ÖØ-öø-ÿ\-]{2,}", summary)] + kws = normalize_keywords((kws or []) + extra) + if len(kws) > 12: + kws = kws[:12] + else: + kws = [] + + # entities + ents = canonicalize_entities(out.get("entities", [])) + + # QA + qas = [] + for qa in out.get("qa", []) or []: + if not isinstance(qa, dict): continue + q = clamp_words(sanitize_text(str(qa.get("q", ""))), 16) + a = sanitize_text(str(qa.get("a", ""))) + if q and a and len(a) >= 30: + qas.append({"q": q, "a": a}) + # ensure minimum count target + target = QA_TARGET_SHORT if rec_is_short else QA_TARGET_DEFAULT + if len(qas) < target: + need = target - len(qas) + # ask for a top-up + add = perform_translate("__QATOPUP__", rec_text_sample, need) # overloaded: returns dict {"qa":[...]} + extra = [] + if isinstance(add, dict): + for qa in add.get("qa", []) or []: + if not isinstance(qa, dict): continue + q = clamp_words(sanitize_text(str(qa.get("q", ""))), 16) + a = sanitize_text(str(qa.get("a", ""))) + if q and a and len(a) >= 30: + extra.append({"q": q, "a": a}) + if extra: + qas.extend(extra[:need]) + quality_flags.append("qa_topped_up") + stats["qa_topped_up"] += 1 + + # Language guard (per-field) + def _guard_lang(field_value: str, field_name: str) -> str: + if not field_value: + return field_value + detected = detect_lang_quick(field_value) + if detected and target_lang and detected != target_lang: + tr = perform_translate(field_name, field_value, 0) # 0 = translate exactly this string + if isinstance(tr, dict): + txt = sanitize_text(str(tr.get("text", ""))) + else: + txt = sanitize_text(str(tr) if tr else "") + if txt: + quality_flags.append(f"{field_name}_translated") + stats["translated_fields"] += 1 + return txt + return field_value + + headline = _guard_lang(headline, "headline") + summary = _guard_lang(summary, "summary") + # translate Q&A fields if needed + fixed_qas = [] + for qa in qas: + q = _guard_lang(qa["q"], "qa_q") + a = _guard_lang(qa["a"], "qa_a") + fixed_qas.append({"q": q, "a": a}) + qas = fixed_qas + + return { + "headline": headline, + "summary": summary, + "keywords": kws, + "entities": ents, + "qa": qas, + "quality_flags": quality_flags, + } + +# ------------------------- +# Worker +# ------------------------- + +@dataclass +class Args: + inp: str + out: str + shadow_out: str + ollama: str + model: str + summary_lang: str + concurrency: int + keep_alive: str + timeout: int + min_chars: int + max_text: int + force: bool + cache_dir: str + verbose: bool + +def enrich_one( + rec: Dict[str, Any], + *, + args: Args, + cache_main: Cache, + cache_tr: Cache, + sem: threading.BoundedSemaphore, + stats: Dict[str, int], +) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """ + Return (enriched_record, shadow_record) + """ + base_text = sanitize_text(pick_text(rec)) + rec_id = str(rec.get("id") or "") + rec_type = str(rec.get("record_type") or "") + doc_hint = build_doc_hint(rec) + + is_short = len(base_text) < args.min_chars + sampled = base_text if len(base_text) <= args.max_text else head_mid_tail_sample(base_text, args.max_text) + qa_target = QA_TARGET_SHORT if is_short else QA_TARGET_DEFAULT + + # short fast-path (no LLM) + if is_short: + enrichment = { + "headline": (rec.get("title") or base_text[:80]).strip(), + "summary": clamp_sentences(base_text[:400], 1, 3), + "keywords": [], + "entities": [], + "qa": [], + "model": None, + "prompt_version": PROMPT_VERSION, + "cached": True, + "strategy": "short-fastpath", + } + enr = dict(rec) + enr.update({ + "headline": enrichment["headline"], + "summary": enrichment["summary"], + "keywords": enrichment["keywords"], + "entities": enrichment["entities"], + "qa": enrichment["qa"], + "enrichment_meta": { + "model": None, + "prompt_version": PROMPT_VERSION, + "cached": True, + "strategy": "short-fastpath", + "ok": True, + "error": None, + } + }) + shadow = render_shadow(rec, enrichment) + # embedding hint prefers shadow_text + enr["embedding_text_hint"] = shadow["shadow_text"] + return enr, shadow + + # OCR noise guard: nudge the prompt to produce a descriptive summary + if looks_like_ocr_noise(sampled): + doc_hint += " The TEXT appears noisy/garbled (possibly OCR). Summarize what the document likely conveys and any clearly legible details; avoid copying garbled strings." + + # caching + key = stable_hash(sampled, args.model, args.summary_lang, rec_id, rec_type) + if not args.force: + hit = cache_main.get(key) + if hit is not None: + enriched = dict(rec) + enriched.update({ + "headline": hit.get("headline"), + "summary": hit.get("summary"), + "keywords": hit.get("keywords"), + "entities": hit.get("entities"), + "qa": hit.get("qa"), + "enrichment_meta": { + "model": hit.get("model"), + "prompt_version": hit.get("prompt_version"), + "cached": True, + "strategy": hit.get("strategy"), + "ok": True, + "error": None, + } + }) + shadow = render_shadow(rec, hit) + enriched["embedding_text_hint"] = shadow["shadow_text"] + stats["cache_hits"] += 1 + return enriched, shadow + + # tiny helper: translation or QA top-up calls (cached for translations) + def perform_translate(kind: str, payload: str, need_pairs: int) -> Dict[str, Any] | str: + if kind == "__QATOPUP__": + # request exactly need_pairs additional pairs + sys_prompt = build_system(args.summary_lang) + usr_prompt = build_user_qa_topup(sampled, args.summary_lang, need_pairs) + opts = {"temperature": 0.2, "repeat_penalty": 1.1, "top_p": 0.9, "num_predict": 280} + with sem: + tries, backoff, last = 2, 1.5, None + for i in range(tries): + try: + return ollama_generate_json(args.ollama, args.model, sys_prompt, usr_prompt, + keep_alive=args.keep_alive, timeout=args.timeout, options=opts) + except Exception as e: + last = e + time.sleep(backoff ** (i+1)) + # failure → empty result + return {"qa": []} + else: + # per-field translation caching + tr_key = stable_hash(payload, args.model, args.summary_lang, kind, "translate") + if not args.force: + tr_hit = cache_tr.get(tr_key) + if tr_hit is not None: + return tr_hit + sys_prompt = build_system_translate(args.summary_lang) + usr_prompt = build_user_translate(payload, args.summary_lang) + opts = {"temperature": 0.2, "repeat_penalty": 1.05, "top_p": 0.9, "num_predict": 200} + with sem: + tries, backoff, last = 2, 1.5, None + for i in range(tries): + try: + out = ollama_generate_json(args.ollama, args.model, sys_prompt, usr_prompt, + keep_alive=args.keep_alive, timeout=args.timeout, options=opts) + # normalize + if not isinstance(out, dict): + out = {"text": sanitize_text(str(out))} + else: + out["text"] = sanitize_text(str(out.get("text", ""))) + cache_tr.put(tr_key, out) + return out + except Exception as e: + last = e + time.sleep(backoff ** (i+1)) + return {"text": payload} # give up: return original + + # main call + system = build_system(args.summary_lang) + user = build_user_main(sampled, args.summary_lang, doc_hint, qa_target) + options = {"temperature": 0.2, "repeat_penalty": 1.1, "top_p": 0.9, "num_predict": 320} + + with sem: + tries, backoff, last_exc = 3, 1.5, None + for i in range(tries): + try: + out = ollama_generate_json(args.ollama, args.model, system, user, + keep_alive=args.keep_alive, timeout=args.timeout, options=options) + # sanitize + normalize structure + if not isinstance(out, dict): + out = {"headline": "", "summary": sanitize_text(str(out)), "keywords": [], "entities": [], "qa": []} + else: + for k in ("headline", "summary"): + if k in out and isinstance(out[k], str): + out[k] = sanitize_text(out[k]) + + # normalize arrays to expected types + out["keywords"] = [sanitize_text(str(x)) for x in out.get("keywords", []) if str(x).strip()] + ents = [] + for e in out.get("entities", []) or []: + if isinstance(e, dict): + name = sanitize_text(str(e.get("name", ""))) + typ = sanitize_text(str(e.get("type", "OTHER"))) + if name: + ents.append({"name": name, "type": typ}) + out["entities"] = ents + + qas = [] + for qa in out.get("qa", []) or []: + if isinstance(qa, dict): + q = sanitize_text(str(qa.get("q", ""))) + a = sanitize_text(str(qa.get("a", ""))) + if q and a: + qas.append({"q": q, "a": a}) + out["qa"] = qas + + # post-enforce schema + language + fixed = enforce_schema_and_language( + out, + target_lang=args.summary_lang, + rec_text_sample=sampled, + rec_is_short=is_short, + perform_translate=perform_translate, + stats=stats, + ) + + result = { + "headline": fixed["headline"], + "summary": fixed["summary"], + "keywords": fixed["keywords"], + "entities": fixed["entities"], + "qa": fixed["qa"], + "quality_flags": fixed["quality_flags"], + "model": args.model, + "prompt_version": PROMPT_VERSION, + "cached": False, + "strategy": "sampled" if len(base_text) > args.max_text else "full", + } + + # save to cache + cache_main.put(key, result) + + enriched = dict(rec) + enriched.update({ + "headline": result["headline"], + "summary": result["summary"], + "keywords": result["keywords"], + "entities": result["entities"], + "qa": result["qa"], + "enrichment_meta": { + "model": args.model, + "prompt_version": PROMPT_VERSION, + "cached": False, + "strategy": result["strategy"], + "ok": True, + "error": None, + "quality_flags": result["quality_flags"], + } + }) + shadow = render_shadow(rec, result) + enriched["embedding_text_hint"] = shadow["shadow_text"] + return enriched, shadow + + except Exception as e: + last_exc = e + time.sleep(backoff ** (i+1)) + + # fallback if everything failed + stats["fallbacks"] += 1 + fallback_summary = clamp_sentences(sampled[:1000], 2, 4) + fallback = { + "headline": (rec.get("title") or sampled.split("\n", 1)[0][:80]).strip(), + "summary": fallback_summary, + "keywords": [], + "entities": [], + "qa": [], + "model": None, + "prompt_version": PROMPT_VERSION, + "cached": False, + "strategy": f"fallback:{type(last_exc).__name__ if last_exc else 'error'}", + "quality_flags": ["fallback"], + } + enriched = dict(rec) + enriched.update({ + "headline": fallback["headline"], + "summary": fallback["summary"], + "keywords": [], + "entities": [], + "qa": [], + "enrichment_meta": { + "model": None, + "prompt_version": PROMPT_VERSION, + "cached": False, + "strategy": fallback["strategy"], + "ok": False, + "error": str(last_exc) if last_exc else "unknown", + "quality_flags": ["fallback"], + } + }) + shadow = render_shadow(rec, fallback) + enriched["embedding_text_hint"] = shadow["shadow_text"] + return enriched, shadow + +# ------------------------- +# IO +# ------------------------- + +def iter_jsonl(path: Path) -> Iterable[Dict[str, Any]]: + with path.open("r", encoding="utf-8") as f: + for line in f: + if not line.strip(): + continue + try: + yield json_loads(line) + except Exception: + continue + +def write_line(path: Path, obj: Dict[str, Any], lock: threading.Lock, *, dry_run: bool = False): + if dry_run: + return + line = json_dumps(obj) + "\n" + with lock: + with path.open("a", encoding="utf-8") as fh: + fh.write(line) + fh.flush() + +# ------------------------- +# CLI +# ------------------------- + +def parse_args(): + p = argparse.ArgumentParser(description="Enrich a JSONL corpus with small-LLM generated summaries, keywords, entities and Q&A.") + p.add_argument("--in", dest="inp", required=True, help="Input JSONL (from build_corpus.py)") + p.add_argument("--out", required=True, help="Output JSONL with enrichment fields merged into each record") + p.add_argument("--shadow-out", required=True, help="Output JSONL of compact 'shadow' records for retrieval") + p.add_argument("--ollama", default="http://localhost:11434", help="Ollama base URL") + p.add_argument("--model", default="phi4:latest", help="Local model (e.g., 'phi4:latest' or 'llama3.2:3b')") + p.add_argument("--summary-lang", default="en", help="Language of headline/summary/keywords/Q&A") + p.add_argument("--concurrency", type=int, default=max(2, (os.cpu_count() or 4)//2), help="Parallel HTTP workers") + p.add_argument("--keep-alive", default="15m", help="Ollama keep_alive value (e.g., '15m', '-1' for forever)") + p.add_argument("--timeout", type=int, default=120, help="HTTP timeout per request (seconds)") + p.add_argument("--min-chars", type=int, default=120, help="Skip LLM when text shorter than this (fast-path heuristic)") + p.add_argument("--max-text", type=int, default=12000, help="If text is longer, sample head/mid/tail to this many chars") + p.add_argument("--force", action="store_true", help="Ignore cache and regenerate everything") + p.add_argument("--cache-dir", default=".rag_cache", help="Directory for per-record JSON cache") + p.add_argument("--dry-run", action="store_true", help="Do the work but do not write outputs") + p.add_argument("--verbose", action="store_true", help="Verbose logging") + return p.parse_args() + +# ------------------------- +# Main +# ------------------------- + +def run_enrich(inp: Path, out: Path, shadow_out: Path, *, + summary_lang: str = "auto", + on_progress: Optional[Callable[[str, float, str], None]] = None, + cancellation_event: Optional[threading.Event] = None, **opts) -> dict: + args = Args( + inp=str(inp), + out=str(out), + shadow_out=str(shadow_out), + ollama=opts.get("ollama", "http://localhost:11434"), + model=opts.get("model", "phi4:latest"), + summary_lang=summary_lang, + concurrency=opts.get("concurrency", max(2, (os.cpu_count() or 4)//2)), + keep_alive=opts.get("keep_alive", "15m"), + timeout=opts.get("timeout", 120), + min_chars=opts.get("min_chars", 120), + max_text=opts.get("max_text", 12000), + force=opts.get("force", False), + cache_dir=opts.get("cache_dir", ".rag_cache"), + verbose=opts.get("verbose", False), + ) + + src = Path(args.inp).expanduser().resolve() + if not src.exists(): + return {"status": "error", "message": f"Input not found: {src}"} + + out_path = Path(args.out).expanduser().resolve() + shadow_path = Path(args.shadow_out).expanduser().resolve() + out_path.parent.mkdir(parents=True, exist_ok=True) + shadow_path.parent.mkdir(parents=True, exist_ok=True) + + out_path.write_text("", encoding="utf-8") + shadow_path.write_text("", encoding="utf-8") + + cache_main = Cache(Path(args.cache_dir), prefix="enrich_") + cache_tr = Cache(Path(args.cache_dir), prefix="trans_") + + sem = threading.BoundedSemaphore(max(1, args.concurrency)) + lock_out = threading.Lock() + lock_sh = threading.Lock() + + if on_progress: + on_progress("load", 0.05, "Loading records...") + records = list(iter_jsonl(src)) + total = len(records) + if total == 0: + if on_progress: + on_progress("done", 1.0, "Empty input.") + return {"status": "warning", "message": "Empty input."} + + stats = { + "cache_hits": 0, + "fallbacks": 0, + "qa_topped_up": 0, + "translated_fields": 0, + "processed": 0, + } + + def _worker(rec: Dict[str, Any]) -> None: + if cancellation_event and cancellation_event.is_set(): + return # Exit early if cancelled + try: + enriched, shadow = enrich_one( + rec, args=args, cache_main=cache_main, cache_tr=cache_tr, + sem=sem, stats=stats + ) + write_line(out_path, enriched, lock_out) + write_line(shadow_path, shadow, lock_sh) + except Exception as e: + passthru = dict(rec) + passthru["enrichment_meta"] = { + "model": None, "prompt_version": PROMPT_VERSION, "cached": False, + "strategy": "error", "ok": False, "error": f"{type(e).__name__}: {e}", + "quality_flags": ["error"], + } + write_line(out_path, passthru, lock_out) + finally: + stats["processed"] += 1 + if on_progress: + pct = 0.1 + 0.8 * (stats["processed"] / total) + on_progress("enrich", pct, f"Processed {stats['processed']}/{total}") + + if on_progress: + on_progress("enrich", 0.1, f"Enriching {total} records...") + with cf.ThreadPoolExecutor(max_workers=max(1, args.concurrency)) as ex: + futs = [ex.submit(_worker, r) for r in records] + for fut in cf.as_completed(futs): + if cancellation_event and cancellation_event.is_set(): + for f in futs: + f.cancel() # Attempt to cancel remaining futures + if on_progress: + on_progress("done", 1.0, "Enrichment cancelled.") + return {"status": "cancelled", "message": "Enrichment cancelled."} + _ = fut.result() + + if on_progress: + on_progress("done", 1.0, "Enrichment complete.") + + return {"status": "ok", "stats": stats} + +def main(): + a = parse_args() + run_enrich( + Path(a.inp), Path(a.out), Path(a.shadow_out), + summary_lang=a.summary_lang, + on_progress=lambda p, pct, d: print(f"[{p}] {pct*100:.1f}%: {d}"), + **vars(a) + ) + +if __name__ == "__main__": + main() diff --git a/backend/rag/index_builder.py b/backend/rag/index_builder.py new file mode 100644 index 0000000..2488250 --- /dev/null +++ b/backend/rag/index_builder.py @@ -0,0 +1,525 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +03_index_builder.py + +Flexible FAISS index builder for hybrid RAG. + +Supports these inputs (any subset): +- --raw : corpus.jsonl from 01_corpus_builder.py (no enrichment) +- --enhanced : corpus.enhanced.jsonl from 02_corpus_enricher.py +- --shadow : corpus.shadow.jsonl from 02_corpus_enricher.py + +Outputs (by default into ./indexes): +- shadow.index.faiss : FAISS IP index over vectors of "shadow_text" +- shadow.meta.jsonl : metadata for each FAISS id (id, doc_id, record_id, title, url, record_type, mime, lang, kind, shadow_text) +- content.index.faiss : FAISS IP index over vectors of chunked "text" +- content.meta.jsonl : metadata for each FAISS id (id, doc_id, record_id, chunk_no, title, url, text, record_type, mime, lang) + +Behavior +- If you provide --shadow → build shadow from it. +- Else if you provide --enhanced → synthesize shadow from enriched fields (headline+summary+keywords+entities+qa). +- Else if you provide --raw → synthesize shadow from raw (title + first sentences + hints). +- If you provide --enhanced → build content from it. +- Else if you provide --raw → build content from raw text (chunking). +- You can disable either side with --no-shadow or --no-content. + +Embedding +- Uses Ollama /api/embeddings with cosine similarity (L2-normalize then IP). + +Examples: + +# Full hybrid from enriched+shadow +python 03_index_builder.py \ + --enhanced corpus.enhanced.jsonl \ + --shadow corpus.shadow.jsonl \ + --out-dir indexes \ + --embed-model "dengcao/Qwen3-Embedding-0.6B:F16" \ + --target-chars 2500 --overlap-chars 200 \ + --concurrency 6 + +# Raw-only (no enricher) → builds content from raw text and a proxy shadow +python 03_index_builder.py \ + --raw corpus.jsonl \ + --out-dir indexes \ + --embed-model "dengcao/Qwen3-Embedding-0.6B:F16" + +""" +from __future__ import annotations + +import argparse, json, sys, uuid, os, re +from pathlib import Path +from typing import Dict, Any, Iterable, List, Tuple, Optional, Callable +from concurrent.futures import ThreadPoolExecutor, as_completed +import threading +import numpy as np +import requests +import faiss +from tqdm import tqdm + +# ----------------------------- +# IO +# ----------------------------- +def read_jsonl(path: Path) -> Iterable[Dict[str, Any]]: + with open(path, "r", encoding="utf-8") as f: + for line in f: + if line.strip(): + try: + yield json.loads(line) + except Exception: + continue + +def ensure_dir(p: Path): + p.mkdir(parents=True, exist_ok=True) + +# ----------------------------- +# Text helpers +# ----------------------------- +def pick_text(rec: Dict[str, Any]) -> str: + return rec.get("text") or rec.get("content") or rec.get("body") or "" + +def first_sentences(s: str, max_chars: int = 500) -> str: + s = (s or "").strip() + if not s: + return "" + # cheap sentence-ish split + parts = re.split(r"(?<=[\.\!\?])\s+", s) + out = [] + total = 0 + for p in parts: + if not p: + continue + out.append(p) + total += len(p) + 1 + if total >= max_chars: + break + joined = " ".join(out).strip() + return joined[:max_chars].rstrip() + +def chunk_text(txt: str, target_chars: int = 2500, overlap_chars: int = 200) -> Iterable[str]: + # paragraph-first greedy pack + paras = [p.strip() for p in (txt or "").split("\n\n") if p.strip()] + if not paras: + if txt.strip(): + yield txt.strip() + return + buf, size = [], 0 + for p in paras: + if size + len(p) + 2 > target_chars and buf: + chunk = "\n\n".join(buf) + yield chunk + if overlap_chars > 0 and len(chunk) > overlap_chars: + tail = chunk[-overlap_chars:] + buf, size = [tail], len(tail) + else: + buf, size = [], 0 + buf.append(p) + size += len(p) + 2 + if buf: + yield "\n\n".join(buf) + +def norm_f32(mat: np.ndarray) -> np.ndarray: + mat = np.asarray(mat, dtype="float32") + norms = np.linalg.norm(mat, axis=1, keepdims=True) + norms[norms == 0] = 1.0 + return mat / norms + +# ----------------------------- +# Embedding +# ----------------------------- +def embed_many(ollama_url: str, model: str, texts: List[str], *, concurrency: int = 4, timeout: int = 120, on_progress=None) -> List[np.ndarray]: + def _embed_one(t: str) -> np.ndarray: + r = requests.post(f"{ollama_url.rstrip('/')}/api/embeddings", json={"model": model, "prompt": t}, timeout=timeout) + r.raise_for_status() + data = r.json() + vec = data.get("embedding") or (data.get("embeddings") or [None])[0] + if vec is None: + raise RuntimeError("No 'embedding' in response") + return np.array(vec, dtype="float32") + + out: List[Optional[np.ndarray]] = [None] * len(texts) + with ThreadPoolExecutor(max_workers=max(1, concurrency)) as ex: + futures = {ex.submit(_embed_one, t): i for i, t in enumerate(texts)} + + progress_bar = None + if on_progress is None and 'tqdm' in globals() and tqdm is not None: + progress_bar = tqdm(as_completed(futures), total=len(futures), desc="embed") + + iterator = progress_bar if progress_bar else as_completed(futures) + + count = 0 + for fut in iterator: + i = futures[fut] + out[i] = fut.result() + count += 1 + if on_progress: + on_progress("embed", count / len(texts), f"Embedding {count}/{len(texts)}") + + # type: ignore + return out # List[np.ndarray] + +# ----------------------------- +# Meta helpers +# ----------------------------- +def derive_doc_id_from_any(any_id: Optional[str], parent_id: Optional[str]) -> str: + """Prefer parent_id if present (file-level), else base of 'id' before '#...'.""" + if parent_id: + return str(parent_id) + if not any_id: + return "" + return any_id.split("#", 1)[0] + +def kind_from_rec(rec: Dict[str, Any]) -> str: + rt = (rec.get("record_type") or "").lower() + mime = (rec.get("mime") or "").lower() + if rt == "image" or (mime.startswith("image/")): + return "image" + if rt == "av" or mime.startswith(("audio/", "video/")): + return "av" + if "html" in mime or rt in {"html-section"}: + return "html" + if "pdf" in mime or rt == "page": + return "pdf" + if rt == "code-summary" or mime.startswith("text/x-code"): + return "code" + return rt or "file" + +# ----------------------------- +# Shadow text synthesis (fallbacks) +# ----------------------------- +def synth_shadow_from_enhanced(rec: Dict[str, Any]) -> str: + """ + Build a compact shadow_text from enriched fields if present. + """ + parts: List[str] = [] + h = (rec.get("headline") or rec.get("title") or "").strip() + s = (rec.get("summary") or "").strip() + kws = rec.get("keywords") or [] + ents = rec.get("entities") or [] + qas = rec.get("qa") or [] + + if h: + parts.append(f"headline: {h}") + if s: + parts.append(f"summary: {s}") + if kws: + parts.append("keywords: " + ", ".join([str(k).strip() for k in kws if str(k).strip()])) + if ents: + uniq = {} + for e in ents: + if not isinstance(e, dict): + continue + name = (e.get("name") or "").strip() + typ = (e.get("type") or "OTHER").strip().upper() + if name and name.lower() not in uniq: + uniq[name.lower()] = (name, typ) + if uniq: + parts.append("entities: " + "; ".join(f"{n} [{t}]" for n, t in uniq.values())) + if qas: + qa_lines = [] + for qa in qas[:4]: + if not isinstance(qa, dict): + continue + q = (qa.get("q") or "").strip() + a = (qa.get("a") or "").strip() + if q and a: + qa_lines.append(f"Q: {q}\nA: {a}") + if qa_lines: + parts.append("qa:\n" + "\n".join(qa_lines)) + return "\n".join(parts).strip() + +def synth_shadow_from_raw(rec: Dict[str, Any]) -> str: + """ + Build a proxy shadow_text without any LLM: title + first sentences + light hints. + """ + title = (rec.get("title") or "").strip() + text = pick_text(rec) + kind = kind_from_rec(rec) + url = rec.get("url") or rec.get("source_path") or "" + head = f"headline: {title}" if title else "" + summary = first_sentences(text, 500) + parts = [] + if head: + parts.append(head) + if summary: + parts.append(f"summary: {summary}") + hints = [] + if kind: + hints.append(kind) + if rec.get("mime"): + hints.append(rec.get("mime").split(";")[0]) + if url: + hints.append(Path(url).name) + if hints: + parts.append("keywords: " + ", ".join(hints)) + return "\n".join(parts).strip() + +# ----------------------------- +# Builders +# ----------------------------- +def build_shadow_any( + shadow_jsonl: Optional[Path], + enhanced_jsonl: Optional[Path], + raw_jsonl: Optional[Path], + out_index: Path, + out_meta: Path, + *, + ollama: str, + model: str, + concurrency: int +) -> Tuple[int, int, int]: + """ + Build FAISS over shadow_text from best available source. + Priority: shadow_jsonl > enhanced_jsonl (synth) > raw_jsonl (synth). + Returns (n_input_records, n_indexed, dim) + """ + src_records: List[Dict[str, Any]] = [] + mode = "" + if shadow_jsonl and shadow_jsonl.exists(): + src_records = list(read_jsonl(shadow_jsonl)) + mode = "shadow" + elif enhanced_jsonl and enhanced_jsonl.exists(): + src_records = list(read_jsonl(enhanced_jsonl)) + mode = "enhanced->shadow" + elif raw_jsonl and raw_jsonl.exists(): + src_records = list(read_jsonl(raw_jsonl)) + mode = "raw->shadow" + else: + raise SystemExit("[ERR] No input for shadow index (need --shadow OR --enhanced OR --raw).") + + if not src_records: + raise SystemExit("[ERR] Empty input for shadow index.") + + texts: List[str] = [] + metas: List[Dict[str, Any]] = [] + for rec in src_records: + if mode == "shadow": + st = rec.get("shadow_text") or "" + elif mode == "enhanced->shadow": + st = synth_shadow_from_enhanced(rec) + else: + st = synth_shadow_from_raw(rec) + + if not st.strip(): + continue + + record_id = rec.get("id") or rec.get("record_id") or str(uuid.uuid4()) + doc_id = derive_doc_id_from_any(record_id, rec.get("parent_id")) + + meta = { + "id": None, # numeric FAISS id later + "record_id": record_id, + "doc_id": doc_id, + "title": rec.get("title"), + "url": rec.get("url") or rec.get("source_path"), + "record_type": rec.get("record_type"), + "mime": rec.get("mime"), + "lang": rec.get("lang"), + "kind": kind_from_rec(rec), + "shadow_text": st, + } + metas.append(meta) + texts.append(st) + + if not texts: + raise SystemExit("[ERR] no shadow_text to embed") + + vecs = embed_many(ollama, model, texts, concurrency=concurrency) + d = len(vecs[0]) + mat = norm_f32(np.vstack(vecs)) + + base = faiss.IndexFlatIP(d) + index = faiss.IndexIDMap2(base) + + out_meta.parent.mkdir(parents=True, exist_ok=True) + with open(out_meta, "w", encoding="utf-8") as mf: + buf_vecs, buf_ids = [], [] + next_id = 0 + for m, v in zip(metas, mat): + m["id"] = next_id + mf.write(json.dumps(m, ensure_ascii=False) + "\n") + buf_vecs.append(v) + buf_ids.append(next_id) + next_id += 1 + if len(buf_vecs) >= 512: + index.add_with_ids(np.vstack(buf_vecs), np.array(buf_ids, dtype="int64")) + buf_vecs, buf_ids = [], [] + if buf_vecs: + index.add_with_ids(np.vstack(buf_vecs), np.array(buf_ids, dtype="int64")) + + faiss.write_index(index, str(out_index)) + return (len(src_records), index.ntotal, d) + +def build_content_any( + enhanced_jsonl: Optional[Path], + raw_jsonl: Optional[Path], + out_index: Path, + out_meta: Path, + *, + ollama: str, + model: str, + target_chars: int, + overlap_chars: int, + concurrency: int +) -> Tuple[int, int, int]: + """ + Build FAISS over chunked 'text' from best available source. + Priority: enhanced_jsonl > raw_jsonl. + Returns (n_input_records, n_chunks, dim) + """ + src_records: List[Dict[str, Any]] = [] + mode = "" + if enhanced_jsonl and enhanced_jsonl.exists(): + src_records = list(read_jsonl(enhanced_jsonl)) + mode = "enhanced" + elif raw_jsonl and raw_jsonl.exists(): + src_records = list(read_jsonl(raw_jsonl)) + mode = "raw" + else: + raise SystemExit("[ERR] No input for content index (need --enhanced OR --raw).") + + metas: List[Dict[str, Any]] = [] + texts: List[str] = [] + for rec in src_records: + base_text = pick_text(rec) + if not base_text.strip(): + continue + record_id = rec.get("id") or rec.get("record_id") or str(uuid.uuid4()) + doc_id = derive_doc_id_from_any(record_id, rec.get("parent_id")) + title = rec.get("title") + url = rec.get("url") or rec.get("source_path") + + chunks = list(chunk_text(base_text, target_chars, overlap_chars)) + if not chunks: + continue + for ci, chunk in enumerate(chunks): + meta = { + "id": None, # numeric FAISS id later + "doc_id": doc_id, + "record_id": record_id, + "chunk_no": ci, + "title": title, + "url": url, + "text": chunk, + "record_type": rec.get("record_type"), + "mime": rec.get("mime"), + "lang": rec.get("lang"), + } + metas.append(meta) + texts.append(chunk) + + if not texts: + raise SystemExit("[ERR] no content chunks to embed") + + vecs = embed_many(ollama, model, texts, concurrency=concurrency) + d = len(vecs[0]) + mat = norm_f32(np.vstack(vecs)) + + base = faiss.IndexFlatIP(d) + index = faiss.IndexIDMap2(base) + + out_meta.parent.mkdir(parents=True, exist_ok=True) + with open(out_meta, "w", encoding="utf-8") as mf: + buf_vecs, buf_ids = [], [] + next_id = 0 + for m, v in zip(metas, mat): + m["id"] = next_id + mf.write(json.dumps(m, ensure_ascii=False) + "\n") + buf_vecs.append(v) + buf_ids.append(next_id) + next_id += 1 + if len(buf_vecs) >= 512: + index.add_with_ids(np.vstack(buf_vecs), np.array(buf_ids, dtype="int64")) + buf_vecs, buf_ids = [], [] + if buf_vecs: + index.add_with_ids(np.vstack(buf_vecs), np.array(buf_ids, dtype="int64")) + + faiss.write_index(index, str(out_index)) + return (len(src_records), index.ntotal, d) + +# ----------------------------- +# CLI +# ----------------------------- +def run_index(raw: Path|None, enhanced: Path|None, shadow: Path|None, out_dir: Path, *, + on_progress=None, **opts) -> dict: + + args = argparse.Namespace( + raw=raw, + enhanced=enhanced, + shadow=shadow, + out_dir=out_dir, + embed_model=opts.get("embed_model", "dengcao/Qwen3-Embedding-0.6B:F16"), + ollama=opts.get("ollama", "http://localhost:11434"), + target_chars=opts.get("target_chars", 2500), + overlap_chars=opts.get("overlap_chars", 200), + concurrency=opts.get("concurrency", 6), + no_shadow=opts.get("no_shadow", False), + no_content=opts.get("no_content", False), + ) + + ensure_dir(out_dir) + + shadow_index_path = out_dir / "shadow.index.faiss" + shadow_meta_path = out_dir / "shadow.meta.jsonl" + content_index_path = out_dir / "content.index.faiss" + content_meta_path = out_dir / "content.meta.jsonl" + + results = {} + built_any = False + + if not args.no_shadow: + if on_progress: on_progress("shadow", 0.1, "Building shadow index...") + s_tot, s_ix, s_dim = build_shadow_any( + args.shadow, args.enhanced, args.raw, + shadow_index_path, shadow_meta_path, + ollama=args.ollama, model=args.embed_model, concurrency=args.concurrency + ) + results["shadow"] = {"records": s_tot, "indexed": s_ix, "dim": s_dim} + if on_progress: on_progress("shadow", 0.5, "Shadow index complete.") + built_any = True + + if not args.no_content: + if on_progress: on_progress("content", 0.6, "Building content index...") + c_tot, c_ix, c_dim = build_content_any( + args.enhanced, args.raw, + content_index_path, content_meta_path, + ollama=args.ollama, model=args.embed_model, + target_chars=args.target_chars, overlap_chars=args.overlap_chars, + concurrency=args.concurrency + ) + results["content"] = {"records": c_tot, "chunks": c_ix, "dim": c_dim} + if on_progress: on_progress("content", 0.9, "Content index complete.") + built_any = True + + if not built_any: + return {"status": "warning", "message": "Nothing built."} + + if on_progress: on_progress("done", 1.0, "Indexing complete.") + return {"status": "ok", "results": results} + +def main(): + ap = argparse.ArgumentParser(description="Build FAISS indexes (shadow + content) for hybrid RAG with or without enrichment.") + ap.add_argument("--raw", help="Raw corpus JSONL (from 01_corpus_builder.py)") + ap.add_argument("--enhanced", help="Enhanced corpus JSONL (from 02_corpus_enricher.py)") + ap.add_argument("--shadow", help="Shadow corpus JSONL (from 02_corpus_enricher.py)") + ap.add_argument("--out-dir", default="indexes", help="Output directory for indexes + metadata") + ap.add_argument("--embed-model", default="dengcao/Qwen3-Embedding-0.6B:F16", help="Ollama embedding model") + ap.add_argument("--ollama", default="http://localhost:11434", help="Ollama base URL") + ap.add_argument("--target-chars", type=int, default=2500, help="Chunk size for content index") + ap.add_argument("--overlap-chars", type=int, default=200, help="Overlap size for content index") + ap.add_argument("--concurrency", type=int, default=6, help="Parallel HTTP workers for embeddings") + ap.add_argument("--no-shadow", action="store_true", help="Do not build shadow index") + ap.add_argument("--no-content", action="store_true", help="Do not build content index") + args = ap.parse_args() + + run_index( + Path(args.raw) if args.raw else None, + Path(args.enhanced) if args.enhanced else None, + Path(args.shadow) if args.shadow else None, + Path(args.out_dir), + on_progress=lambda p, pct, d: print(f"[{p}] {pct*100:.1f}%: {d}"), + **vars(args) + ) + +if __name__ == "__main__": + main() diff --git a/backend/rag/unified_rag.py b/backend/rag/unified_rag.py new file mode 100644 index 0000000..0c6030a --- /dev/null +++ b/backend/rag/unified_rag.py @@ -0,0 +1,687 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +04_unified_rag.py + +Hybrid retrieval + (optional) rerank + (optional) answer generation. + +Now supports: +- HYBRID: shadow+content indexes (best quality) +- SINGLE-INDEX: + * legacy pair (--index/--store) ← back-compat + * content-only pair (--content-index/--content-store) + * shadow-only pair (--shadow-index/--shadow-store) + +If you skipped enrichment: +- Build only content + proxy shadow with 03_index_builder.py (raw → content; raw/enhanced→proxy shadow) +- Query with: + * HYBRID: provide both pairs + * SINGLE-INDEX: provide only one pair (content OR shadow) + +""" +from __future__ import annotations + +import argparse, json, os, sys, subprocess, math +from pathlib import Path +from typing import List, Dict, Tuple, Optional + +import faiss +import numpy as np +import requests +import threading +from typing import Callable + +# ----------------------------- +# Utilities +# ----------------------------- +def norm_f32(mat: np.ndarray) -> np.ndarray: + mat = np.asarray(mat, dtype="float32") + norms = np.linalg.norm(mat, axis=1, keepdims=True) + norms[norms == 0] = 1.0 + return mat / norms + +def zscore(x: List[float]) -> List[float]: + if not x: + return [] + mu = float(np.mean(x)) + sd = float(np.std(x)) + if sd == 0.0: + return [0.0 for _ in x] + return [(v - mu) / sd for v in x] + +def sanitize(s: Optional[str]) -> str: + if not s: + return "" + import re + s = re.sub(r"<\s*think\s*>.*?<\s*/\s*think\s*>", "", s, flags=re.S|re.I) + s = re.sub(r"^\s*```(?:\w+)?\s*|\s*```\s*$", "", s, flags=re.M) + s = re.sub(r"[ \t]+", " ", s) + s = re.sub(r"\n{3,}", "\n\n", s) + return s.strip() + +def pick_any_text(rec: Dict) -> str: + """Use 'text' if present else 'shadow_text' for rerank/answer/pretty.""" + return rec.get("text") or rec.get("shadow_text") or rec.get("content") or rec.get("body") or "" + +def embed_query(ollama_url: str, model: str, text: str, timeout_s: int = 60) -> np.ndarray: + r = requests.post( + f"{ollama_url.rstrip('/')}/api/embeddings", + json={"model": model, "prompt": text}, + timeout=timeout_s, + ) + r.raise_for_status() + data = r.json() + vec = data.get("embedding") or (data.get("embeddings") or [None])[0] + if vec is None: + raise RuntimeError("Ollama /api/embeddings returned no vector.") + return np.array(vec, dtype="float32") + +def load_meta(store_path: str) -> Dict[int, Dict]: + id2meta: Dict[int, Dict] = {} + with open(store_path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + rec = json.loads(line) + id2meta[int(rec["id"])] = rec + return id2meta + +def truncate_text(s: Optional[str], limit: int) -> str: + if not s: + return "" + return s if len(s) <= limit else s[:limit] + +def derive_doc_id(rec: Dict) -> str: + # Prefer explicit doc_id if provided by meta builder + did = rec.get("doc_id") + if did: + return did + rid = rec.get("record_id") or rec.get("id") or "" + return rid.split("#", 1)[0] + +# ----------------------------- +# Rerank (subprocess worker) +# ----------------------------- +def sentence_transformers_available() -> bool: + try: + import importlib.util as _ilu + spec = _ilu.find_spec("sentence_transformers") + return spec is not None + except Exception: + return False + +def rerank_subprocess( + query: str, + docs: List[str], + *, + worker_path: Path, + model: str, + device: str, + dtype: str, + batch: int, + maxlen: int, +) -> Optional[List[Tuple[int, float]]]: + """ + Call this same script with --mode rerank-worker via a clean Python subprocess. + Returns: list of (local_index, score) sorted desc, or None on failure. + """ + payload = {"query": query, "docs": docs} + cmd = [ + sys.executable, + str(worker_path), + "--mode", "rerank-worker", + "--rerank-model", model, + "--rerank-device", device, + "--rerank-dtype", dtype, + "--rerank-batch", str(batch), + "--rerank-maxlen", str(maxlen), + "--stdio" + ] + env = os.environ.copy() + env.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1") + env.setdefault("TOKENIZERS_PARALLELISM", "false") + env.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE") + + try: + proc = subprocess.run( + cmd, + input=json.dumps(payload).encode("utf-8"), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + env=env, + ) + except Exception as e: + sys.stderr.write(f"[rerank] failed to launch worker: {e}\n") + return None + + if proc.returncode != 0: + sys.stderr.write(proc.stderr.decode("utf-8", errors="ignore") + "\n") + return None + + try: + data = json.loads(proc.stdout.decode("utf-8")) + results = data.get("results") or [] + pairs = [(int(r["index"]), float(r["score"])) for r in results] + pairs.sort(key=lambda x: x[1], reverse=True) + return pairs + except Exception as e: + sys.stderr.write(f"[rerank] parse error: {e}\n") + return None + +# ----------------------------- +# Simple diversity (per-source cap) +# ----------------------------- +def apply_per_source_cap(ordered: List[Dict], per_source_limit: int) -> List[Dict]: + if per_source_limit <= 0: + return ordered + counts = {} + out = [] + for rec in ordered: + key = rec.get("url") or rec.get("doc_id") or rec.get("title") or str(rec.get("id")) + c = counts.get(key, 0) + if c < per_source_limit: + out.append(rec) + counts[key] = c + 1 + return out + +# ----------------------------- +# Generation +# ----------------------------- +def generate( + ollama_url: str, + model: str, + prompt: str, + system: Optional[str] = None, + temperature: float = 0.2, + timeout_s: int = 180, + on_stream=None, +) -> str: + payload = {"model": model, "prompt": prompt, "options": {"temperature": temperature}} + if system: + payload["system"] = system + r = requests.post(f"{ollama_url.rstrip('/')}/api/generate", json=payload, timeout=timeout_s, stream=True) + r.raise_for_status() + out = [] + for chunk in r.iter_lines(decode_unicode=True): + if not chunk: + continue + try: + obj = json.loads(chunk) + delta = obj.get("response", "") + out.append(delta) + if on_stream: + on_stream({"delta": delta}) + if obj.get("done"): + break + except Exception: + pass + return sanitize("".join(out)) + +# ----------------------------- +# Search helpers +# ----------------------------- +def faiss_search(index: faiss.Index, qvec: np.ndarray, k: int) -> Tuple[List[int], List[float]]: + sims, ids = index.search(qvec, k) + ids = [int(i) for i in ids[0] if i != -1] + sims = [float(s) for s in sims[0][: len(ids)]] + return ids, sims + +# ----------------------------- +# Output / answer +# ----------------------------- +def output_or_answer(final: List[Dict], args, on_stream=None): + if not args.answer: + # Return top-k results without generating an answer + return { + "done": True, + "sources": [ + { + "doc_id": rec.get("doc_id"), + "title": rec.get("title"), + "url": rec.get("url"), + "record_type": rec.get("record_type"), + "mime": rec.get("mime"), + "lang": rec.get("lang"), + "snippet": pick_any_text(rec), + "scores": { + "final": float(rec.get("_score", 0.0)), + "shadow": float(rec.get("_shadow")) if rec.get("_shadow") is not None else None, + "content": float(rec.get("_ann", 0.0)), + "rerank": float(rec.get("_rerank")) if rec.get("_rerank") is not None else None, + }, + } + for rec in final + ], + } + + # Build prompt for answering + context_blocks, sources = [], [] + for i, rec in enumerate(final, start=1): + text = pick_any_text(rec) + title = rec.get("title") or "(untitled)" + url = rec.get("url") or title + sources.append(f"[{i}] {url}") + context_blocks.append(f"[{i}] {title}\n{text}") + + system = ( + "You are a careful researcher. Answer ONLY from the provided sources. " + "Cite like [1], [2] in-line. If the answer is not in the sources, say you can't find it. " + "Do not include private chain-of-thought or tags." + ) + prompt = ( + f"Question: {args.query}\n\n" + "Use the sources below. If not answerable from them, say so clearly.\n\n" + "Sources:\n" + "\n\n".join(context_blocks) + "\n\n----\n\n" + "Remember: only use these sources. Provide a concise answer with citations.\n\n" + f"And again. The question you need to answer is: {args.query}" + ) + + full_answer = generate(args.ollama, args.gen_model, prompt, system=system, temperature=args.temperature, on_stream=on_stream) + + final_result = { + "done": True, + "answer": full_answer, + "sources": [ + { + "doc_id": rec.get("doc_id"), + "title": rec.get("title"), + "url": rec.get("url"), + } + for rec in final + ], + } + if on_stream: + on_stream(final_result) + + return final_result + +# ----------------------------- +# Main CLI (search / answer) +# ----------------------------- +def run_cli(args): + # Determine mode + hybrid_ok = all([args.shadow_index, args.shadow_store, args.content_index, args.content_store]) + + single_pair: Optional[Tuple[str, str]] = None + single_kind = None + if not hybrid_ok: + # Prefer legacy if provided + if args.index and args.store: + single_pair = (args.index, args.store) + single_kind = "legacy" + elif args.content_index and args.content_store: + single_pair = (args.content_index, args.content_store) + single_kind = "content" + elif args.shadow_index and args.shadow_store: + single_pair = (args.shadow_index, args.shadow_store) + single_kind = "shadow" + + # Embed query + q = norm_f32(embed_query(args.ollama, args.embed_model, args.query).reshape(1, -1)) + + if single_pair: + # SINGLE-INDEX path (works for legacy/content-only/shadow-only) + index = faiss.read_index(single_pair[0]) + id2meta = load_meta(single_pair[1]) + + ids, sims = faiss_search(index, q, min(args.candidates, index.ntotal)) + candidates = [] + for pos, _id in enumerate(ids): + base = id2meta[_id] + rec = dict(base) + rec["_ann"] = sims[pos] + candidates.append(rec) + + # Optional rerank + reranked_scores = None + if not args.no_rerank and sentence_transformers_available(): + docs = [truncate_text(pick_any_text(c), args.max_doc_chars) for c in candidates] + pairs = rerank_subprocess( + args.query, docs, + worker_path=Path(__file__), + model=args.rerank_model, + device=args.rerank_device, + dtype=args.rerank_dtype, + batch=args.rerank_batch, + maxlen=args.rerank_maxlen, + ) + if pairs is not None: + reranked_scores = [None] * len(candidates) + for local_idx, score in pairs: + if 0 <= local_idx < len(reranked_scores): + reranked_scores[local_idx] = float(score) + min_score = min([s for s in reranked_scores if s is not None], default=0.0) + reranked_scores = [s if s is not None else min_score for s in reranked_scores] + else: + print("[info] rerank disabled (worker failed).", file=sys.stderr) + + # Blend + if reranked_scores is not None: + z_ann = zscore([c["_ann"] for c in candidates]) + z_rr = zscore(reranked_scores) + alpha = float(args.blend) + final_scores = [(1 - alpha) * a + alpha * r for a, r in zip(z_ann, z_rr)] + for rec, fs, rr in zip(candidates, final_scores, reranked_scores): + rec["_score"] = float(fs) + rec["_rerank"] = float(rr) + candidates.sort(key=lambda r: r["_score"], reverse=True) + else: + for rec in candidates: + rec["_score"] = rec["_ann"] + + final = candidates[: max(1, min(args.k, len(candidates)))] + return output_or_answer(final, args) + + # HYBRID path + shadow_index = faiss.read_index(args.shadow_index) + shadow_meta = load_meta(args.shadow_store) + content_index = faiss.read_index(args.content_index) + content_meta = load_meta(args.content_store) + + # Stage A: Shadow search → doc shortlist + sid_list, s_sim = faiss_search(shadow_index, q, min(args.shadow_candidates, shadow_index.ntotal)) + s_hits = [{"id": sid, "sim": sim, **shadow_meta[sid]} for sid, sim in zip(sid_list, s_sim)] + + # optional shadow weighting by kind + kw = {} + for kv in args.shadow_kind_weights.split(","): + kv = kv.strip() + if not kv: + continue + if ":" in kv: + k, v = kv.split(":", 1) + try: + kw[k.strip().lower()] = float(v) + except Exception: + pass + if kw: + for h in s_hits: + w = kw.get((h.get("kind") or "").lower(), 1.0) + h["sim"] *= float(w) + + # group to doc_id + doc_scores: Dict[str, float] = {} + for h in s_hits: + did = derive_doc_id(h) + doc_scores[did] = max(doc_scores.get(did, 0.0), float(h["sim"])) # max over shadow signals + + # Stage B: Content search (global) + cid_list, c_sim = faiss_search(content_index, q, min(args.content_candidates, content_index.ntotal)) + c_hits = [{"id": cid, "sim": sim, **content_meta[cid]} for cid, sim in zip(cid_list, c_sim)] + + # Stage C: filter to doc shortlist + ordered_docs = sorted(doc_scores.items(), key=lambda kv: kv[1], reverse=True)[: args.doc_top] + if not ordered_docs: + # Fallback: derive docs from top content hits + tmp_docs = [] + seen = set() + for h in c_hits: + did = derive_doc_id(h) + if did not in seen: + seen.add(did) + tmp_docs.append((did, float(h['sim']))) + if len(tmp_docs) >= args.doc_top: + break + ordered_docs = tmp_docs + shortlist = set([d for d, _ in ordered_docs]) + + # keep content hits belonging to shortlist (fallback to global if empty) + content_for_docs = [h for h in c_hits if derive_doc_id(h) in shortlist] or c_hits + + # per-doc cap + per_doc = max(1, args.per_doc_chunks) + doc_buckets: Dict[str, List[Dict]] = {} + for h in content_for_docs: + did = derive_doc_id(h) + doc_buckets.setdefault(did, []).append(h) + for did, arr in doc_buckets.items(): + arr.sort(key=lambda r: r["sim"], reverse=True) + doc_buckets[did] = arr[:per_doc] + + # flatten, compute final score as blend of shadow(doc) + content(chunk) + out_candidates: List[Dict] = [] + for did, doc_sim in ordered_docs: + for ch in doc_buckets.get(did, []): + final = dict(ch) + final["_shadow"] = float(doc_sim) + final["_ann"] = float(ch["sim"]) + alpha = float(args.doc_blend) # weight of shadow + beta = float(args.chunk_blend) # weight of chunk ann + final["_score"] = alpha * float(doc_sim) + beta * float(ch["sim"]) + out_candidates.append(final) + + if not out_candidates: + print("No retrieval results.", file=sys.stderr) + if args.answer: + print("No results from retrieval; cannot answer.") + return + + out_candidates.sort(key=lambda r: r["_score"], reverse=True) + + # Optional rerank of the first pool + reranked_scores = None + if not args.no_rerank and sentence_transformers_available(): + pool = out_candidates[: args.candidates] + docs = [truncate_text(pick_any_text(c), args.max_doc_chars) for c in pool] + pairs = rerank_subprocess( + args.query, docs, + worker_path=Path(__file__), + model=args.rerank_model, + device=args.rerank_device, + dtype=args.rerank_dtype, + batch=args.rerank_batch, + maxlen=args.rerank_maxlen, + ) + if pairs is not None: + reranked_scores = [None] * len(pool) + for local_idx, score in pairs: + if 0 <= local_idx < len(pool): + reranked_scores[local_idx] = float(score) + min_score = min([s for s in reranked_scores if s is not None], default=0.0) + reranked_scores = [s if s is not None else min_score for s in reranked_scores] + z_ann = zscore([c["_score"] for c in pool]) + z_rr = zscore(reranked_scores) + alpha = float(args.blend) + blended = [(1 - alpha) * a + alpha * r for a, r in zip(z_ann, z_rr)] + for rec, fs, rr in zip(pool, blended, reranked_scores): + rec["_score"] = float(fs) + rec["_rerank"] = float(rr) + out_candidates[: len(pool)] = sorted(pool, key=lambda r: r["_score"], reverse=True) + else: + print("[info] rerank disabled (worker failed).", file=sys.stderr) + + # per-source cap and top-k + ordered = apply_per_source_cap(out_candidates, args.per_source_limit) + final = ordered[: max(1, min(args.k, len(ordered)))] + return output_or_answer(final, args) + +# ----------------------------- +# Rerank worker mode +# ----------------------------- +def run_rerank_worker(args): + """ + Reads JSON from stdin: {"query": str, "docs": [str, ...]} + Writes JSON to stdout: {"results": [{"index": int, "score": float}, ...]} + """ + try: + import torch + from sentence_transformers import CrossEncoder + except Exception as e: + # Gracefully tell parent we failed by returning empty results + out = {"results": []} + json.dump(out, sys.stdout) + sys.stdout.flush() + print(f"[worker] sentence_transformers unavailable: {e}", file=sys.stderr) + return + + try: + torch.set_num_threads(1) + except Exception: + pass + + device = args.rerank_device + if device == "auto": + device = "mps" if torch.backends.mps.is_available() else "cpu" + + if args.rerank_dtype == "auto": + dtype = torch.float16 if device == "mps" else torch.float32 + else: + dtype = torch.float16 if args.rerank_dtype == "float16" else torch.float32 + + model = CrossEncoder( + args.rerank_model, + device=device, + max_length=args.rerank_maxlen, + automodel_args={"torch_dtype": dtype}, + ) + + data = json.load(sys.stdin) + query = data["query"] + docs = data["docs"] + + pairs = [(query, d) for d in docs] + scores = model.predict( + pairs, + batch_size=args.rerank_batch, + convert_to_numpy=True, + show_progress_bar=False, + ).tolist() + + ordered = sorted(enumerate(scores), key=lambda x: x[1], reverse=True) + out = {"results": [{"index": int(i), "score": float(s)} for i, s in ordered]} + json.dump(out, sys.stdout) + sys.stdout.flush() + +# ----------------------------- +# Argparse +# ----------------------------- +def build_parser(): + ap = argparse.ArgumentParser(allow_abbrev=False) + ap.add_argument("--mode", default="cli", choices=["cli", "rerank-worker"]) + + # Legacy single-index I/O (kept for back-compat) + ap.add_argument("--index", help="Single FAISS index (legacy)") + ap.add_argument("--store", help="Single metadata JSONL (legacy)") + ap.add_argument("--candidates", type=int, default=200, help="ANN neighbors to fetch (legacy or rerank pool).") + + # Hybrid I/O + ap.add_argument("--shadow-index", help="FAISS index over shadow_text") + ap.add_argument("--shadow-store", help="Metadata JSONL for shadow index") + ap.add_argument("--content-index", help="FAISS index over content chunks") + ap.add_argument("--content-store", help="Metadata JSONL for content index") + + ap.add_argument("--query", required=False) + ap.add_argument("--ollama", default="http://localhost:11434") + ap.add_argument("--embed-model", default="dengcao/Qwen3-Embedding-0.6B:F16") + + # Shadow/content retrieval sizes (hybrid) + ap.add_argument("--shadow-candidates", type=int, default=400, help="Shadow ANN pool size") + ap.add_argument("--content-candidates", type=int, default=600, help="Content ANN pool size") + ap.add_argument("--doc-top", type=int, default=40, help="Top-N documents from shadow shortlist") + ap.add_argument("--per-doc-chunks", type=int, default=2, help="Max chunks per doc from content pool") + ap.add_argument("--doc-blend", type=float, default=0.6, help="Weight for shadow score in final blend [0..1]") + ap.add_argument("--chunk-blend", type=float, default=0.4, help="Weight for content-ANN score in final blend [0..1]") + ap.add_argument("--shadow-kind-weights", default="image:1.2,code:1.1", help="Comma list 'kind:weight' to bias doc ranking") + + # Rerank knobs + ap.add_argument("--no-rerank", action="store_true", help="Disable reranking.") + ap.add_argument("--blend", type=float, default=0.75, help="Blend weight for reranker in normalized score [0..1].") + ap.add_argument("--rerank-model", default="cross-encoder/ms-marco-MiniLM-L-6-v2") + ap.add_argument("--rerank-device", default="auto", choices=["auto", "mps", "cpu"]) + ap.add_argument("--rerank-dtype", default="auto", choices=["auto", "float16", "float32"]) + ap.add_argument("--rerank-batch", type=int, default=64) + ap.add_argument("--rerank-maxlen", type=int, default=256) + ap.add_argument("--stdio", action="store_true", help=argparse.SUPPRESS) # worker-only flag + + # Output / answer + ap.add_argument("--json", action="store_true", help="Print search results as JSON.") + ap.add_argument("--pretty", action="store_true", help="Pretty-print search results.") + ap.add_argument("--show-scores", action="store_true", help="Show ANN/rerank scores in pretty output.") + ap.add_argument("--answer", action="store_true", help="Generate an answer with an LLM using top-k contexts.") + ap.add_argument("--gen-model", default="qwen3:4b", + help="Any chat-capable model in Ollama (e.g., 'qwen2.5:7b-instruct', 'llama3.1:8b-instruct').") + ap.add_argument("--temperature", type=float, default=0.2) + ap.add_argument("--k", type=int, default=10, help="Number of final results to return/use.") + + # Misc + ap.add_argument("--max-doc-chars", type=int, default=4000, help="Truncate each candidate before rerank.") + ap.add_argument("--per-source-limit", type=int, default=3, help="Max results per source (url/doc) to diversify.") + + return ap + +def run_query(shadow_index: Path, shadow_store: Path, + content_index: Path, content_store: Path, + query: str, *, answer: bool = False, + on_stream: Optional[Callable[[Dict], None]] = None, **opts) -> dict: + + # Ensure all paths are strings for argparse.Namespace + _shadow_index = str(shadow_index) if shadow_index else None + _shadow_store = str(shadow_store) if shadow_store else None + _content_index = str(content_index) if content_index else None + _content_store = str(content_store) if content_store else None + + args = argparse.Namespace( + shadow_index=_shadow_index, + shadow_store=_shadow_store, + content_index=_content_index, + content_store=_content_store, + query=query, + answer=answer, + ollama=opts.get("ollama", "http://localhost:11434"), + embed_model=opts.get("embed_model", "dengcao/Qwen3-Embedding-0.6B:F16"), + shadow_candidates=opts.get("shadow_candidates", 400), + content_candidates=opts.get("content_candidates", 600), + doc_top=opts.get("doc_top", 40), + per_doc_chunks=opts.get("per_doc_chunks", 2), + doc_blend=opts.get("doc_blend", 0.6), + chunk_blend=opts.get("chunk_blend", 0.4), + shadow_kind_weights=opts.get("shadow_kind_weights", "image:1.2,code:1.1"), + no_rerank=opts.get("no_rerank", True), # Reranker OFF by default + blend=opts.get("blend", 0.75), + rerank_model=opts.get("rerank_model", "cross-encoder/ms-marco-MiniLM-L-6-v2"), + rerank_device=opts.get("rerank_device", "auto"), + rerank_dtype=opts.get("rerank_dtype", "auto"), + rerank_batch=opts.get("rerank_batch", 64), + rerank_maxlen=opts.get("rerank_maxlen", 256), + gen_model=opts.get("gen_model", "qwen3:4b"), + temperature=opts.get("temperature", 0.2), + k=opts.get("k", 10), + max_doc_chars=opts.get("max_doc_chars", 4000), + per_source_limit=opts.get("per_source_limit", 3), + json=True # Force JSON-like output dict + ) + + return run_cli(args, on_stream=on_stream) + +def main(): + ap = build_parser() + args = ap.parse_args() + + if args.mode == "rerank-worker": + return run_rerank_worker(args) + + if not args.query: + ap.error("--query is required in cli mode") + + hybrid_ok = all([args.shadow_index, args.shadow_store, args.content_index, args.content_store]) + if not hybrid_ok: + ap.error("For CLI use, all four index/store paths are required for hybrid retrieval.") + + result = run_query( + shadow_index=Path(args.shadow_index), + shadow_store=Path(args.shadow_store), + content_index=Path(args.content_index), + content_store=Path(args.content_store), + query=args.query, + answer=args.answer, + on_stream=lambda d: print(json.dumps(d, ensure_ascii=False), flush=True) if args.answer else None, + **vars(args) + ) + + if not args.answer: + print(json.dumps(result, ensure_ascii=False)) + +if __name__ == "__main__": + main() diff --git a/backend/requirements.txt b/backend/requirements.txt index 0d395cc..9109252 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -3,9 +3,24 @@ fastapi==0.111.0 uvicorn[standard]==0.30.1 SQLAlchemy==2.0.32 httpx==0.27.0 -pydantic==2.7.4 +pydantic==2.11.7 +requests>=2.32.0 # Web search enrichment dependencies beautifulsoup4==4.12.3 httpx[http2]>=0.27.0 -numpy \ No newline at end of file +numpy + +# Local RAG pipeline dependencies +faiss-cpu>=1.8.0 +PyMuPDF>=1.24.0 +ebooklib>=0.18 +chardet>=5.2.0 +Pillow>=10.0.0 +langid>=1.1.6 +langdetect>=1.0.9 + +# Optional but recommended for richer ingestion / reranking: +# openai-whisper +# opencv-python-headless +# sentence-transformers diff --git a/backend/schemas.py b/backend/schemas.py index 0cd9a88..b8c81ff 100644 --- a/backend/schemas.py +++ b/backend/schemas.py @@ -1,4 +1,4 @@ -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict from typing import List, Optional from datetime import datetime @@ -38,8 +38,7 @@ class ChatSession(BaseModel): name: str created_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class SessionsResponse(BaseModel): sessions: List[ChatSession] @@ -67,3 +66,4 @@ class WebSearchRequest(BaseModel): class WebSearchResponse(BaseModel): enriched_prompt: str sources: List[str] = [] + context_block: str = "" diff --git a/dist/assets/index-Cc0DLWqA.css b/dist/assets/index-Cc0DLWqA.css new file mode 100644 index 0000000..0502232 --- /dev/null +++ b/dist/assets/index-Cc0DLWqA.css @@ -0,0 +1 @@ +:root{--bg: #0b1020;--panel: #141b34;--text: #e6e8ef;--muted: #9aa3b2;--accent: #6ea8fe;--border: #24304f}*{box-sizing:border-box}html,body,#root{height:100%;margin:0}body{background:var(--bg);color:var(--text);font-family:ui-sans-serif,system-ui,-apple-system,Segoe UI,Roboto,Inter,Helvetica,Arial}.no-select{-webkit-user-select:none;user-select:none;cursor:ew-resize!important}.app{display:grid;grid-template-columns:var(--sidebar-width, 230px) 1fr;grid-template-rows:1fr;height:100%}.sidebar{display:grid;grid-template-rows:auto 1fr auto;background:var(--panel);border-right:1px solid var(--border);height:100vh;overflow:hidden;position:relative}.sidebar span{font-size:14px}.resizer{width:13px;cursor:ew-resize;background:transparent;position:absolute;top:0;right:-5px;bottom:0;z-index:1}.sidebar-header{display:flex;justify-content:space-around;padding:0;background:var(--panel)}.sidebar-tab{flex-grow:1;text-align:center;padding:11px 16px;cursor:pointer;border-bottom:3px solid transparent;transition:background-color .2s ease}.sidebar-tab:hover{background-color:var(--hover-bg)}.sidebar-tab.active{background-color:var(--active-bg);border-bottom-color:var(--accent)}.sidebar-tab.active:hover{background-color:var(--active-bg)}.sidebar-content{flex-grow:1;overflow-y:auto}.db-list,.settings-list{padding:8px 0}.empty-list-message{padding:10px 16px;color:var(--muted);text-align:center}.settings-item{padding:10px 16px;cursor:pointer;border-left:3px solid transparent}.settings-item.active{background:var(--active-bg);border-left-color:var(--accent)}.settings-item.active:hover{background:var(--active-bg)}.settings-item:hover{background:var(--hover-bg)}.settings-footer-placeholder{height:40px;padding:12px 16px;border-top:1px solid var(--border);background:var(--panel)}.new-db-button{width:100%;padding:10px;background:var(--accent);border-color:var(--accent);color:var(--bg);font-weight:700}.new-db-button:hover{opacity:.9}.chat-list{overflow-y:auto;padding:8px 0}.chat-item{display:flex;justify-content:space-between;align-items:center;padding:10px 16px;cursor:pointer;border-left:3px solid transparent;overflow:hidden;white-space:nowrap}.chat-item span{flex-grow:1;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.chat-item-buttons{display:flex;gap:8px;align-items:center}.unread-dot{width:8px;height:8px;background-color:red;border-radius:50%;margin-right:4px}.icon-button{background:none;border:none;color:var(--muted);cursor:pointer;font-size:16px;padding:0;display:flex;align-items:center;justify-content:center}.icon-button svg{width:16px;height:16px;stroke:var(--muted);transition:stroke .2s ease}.icon-button:hover svg{stroke:var(--accent)}.icon-button:hover{color:var(--accent)}.chat-item.active{background:var(--active-bg);border-left-color:var(--accent)}.chat-item:hover{background:var(--hover-bg)}.chat-item.active:hover{background:var(--active-bg)}.rename-input{background:var(--input-bg);border:1px solid var(--accent);color:var(--text);border-radius:10px;padding:8px 12px;outline:none;width:100%}::-webkit-scrollbar{width:8px}::-webkit-scrollbar-track{background:transparent}::-webkit-scrollbar-thumb{background:var(--muted);border-radius:4px}::-webkit-scrollbar-thumb:hover{background:var(--accent)}.sidebar-footer{padding:12px 16px;background:var(--panel)}.new-db-form,.library-inline-form{display:grid;gap:8px}.new-db-actions{display:flex;gap:8px}.form-error{color:#ff9aa8;font-size:12px}.new-chat-button{width:100%;padding:10px;background:var(--accent);border-color:var(--accent);color:var(--bg);font-weight:700}.new-chat-button:hover{opacity:.9}.main-content{position:relative;display:grid;grid-template-rows:auto 1fr auto;height:100vh;overflow:hidden}.header{display:flex;align-items:center;gap:12px;padding:12px 16px;border-bottom:1px solid var(--border);background:var(--panel)}.select,.input,.button{background:var(--input-bg);border:1px solid var(--border);color:var(--text);border-radius:10px;padding:8px 12px;outline:none}.footer-content-wrapper .button{flex-shrink:0}.select{min-width:220px}.button{cursor:pointer}.button:hover{border-color:var(--accent)}.button.ghost{background:transparent}.button.danger{border-color:#8f3d49;color:#ffb8c2}.button.danger:hover{border-color:#d86a79}.header-subtle{color:var(--muted);font-size:13px}.chat{display:grid;grid-template-columns:1fr minmax(auto,1000px) 1fr;align-content:start;gap:8px;padding:16px;overflow:auto}.chat>*{grid-column:2}.msg{padding:12px 14px;border-radius:12px;line-height:1.5;white-space:wrap;font-family:ui-sans-serif,system-ui,-apple-system,Segoe UI,Roboto,Inter,Helvetica,Arial}textarea.input{resize:none;font-family:ui-sans-serif,system-ui,-apple-system,Segoe UI,Roboto,Inter,Helvetica,Arial;overflow-y:auto;flex-grow:1}.msg.user{background:var(--user-msg-bg);margin-left:auto;max-width:80%;border:1px solid var(--border);margin-right:5px;margin-bottom:15px;transition:border-color .15s ease,box-shadow .15s ease}.msg.user.editing{border-color:var(--accent);box-shadow:0 0 0 2px color-mix(in srgb,var(--accent) 35%,transparent)}.msg.user .msg-content--user{white-space:pre-wrap;overflow-wrap:anywhere;word-break:break-word;position:relative}.msg.user .user-msg-expand{margin-top:8px;padding:0;background:transparent;border:none;color:var(--muted);font-weight:700;cursor:pointer;align-self:flex-start;margin-left:0;text-align:left}.msg.assistant{background:transparent;border:none;max-width:none;animation:fadeIn .3s ease-in-out;margin-bottom:30px}.user-message-wrapper{display:flex;flex-direction:column;align-items:flex-end;position:relative}.assistant-message-wrapper{display:flex;flex-direction:column;align-items:flex-start;position:relative}.message-options-bar{display:flex;gap:2px;padding:0;border-radius:8px;background-color:transparent;border:none;position:absolute;bottom:-30px}.user-options{right:-10px;bottom:-40px}.assistant-options{left:0}.message-options-bar .icon-button{padding:4px;border-radius:4px;border:none}.message-options-bar .icon-button:hover{background-color:var(--hover-bg)}.new-msg-tip{position:absolute;right:24px;bottom:84px;padding:8px 12px;border-radius:9999px;background:var(--accent);color:var(--bg);border:none;cursor:pointer;font-weight:600;box-shadow:0 6px 20px #00000059}.new-msg-tip:hover{opacity:.9}@keyframes fadeIn{0%{opacity:0}to{opacity:1}}.footer{display:flex;justify-content:center;padding:12px 16px;border-top:1px solid var(--border);background:var(--panel);align-items:flex-end}.footer-content-wrapper{display:flex;gap:8px;width:100%;max-width:1000px;align-items:flex-end}.settings-content-panel{padding:20px;overflow-y:auto;height:100%}.settings-category{margin-bottom:30px}.settings-category h2{color:var(--accent);margin-bottom:15px;font-size:1.3em}.setting-section{margin-bottom:20px;padding:15px;border-bottom:1px solid var(--border);background-color:var(--panel);border-radius:8px}.setting-section:last-child{border-bottom:none;margin-bottom:0}.setting-section h3{color:var(--text);margin-top:0;margin-bottom:10px;font-size:1.1em}.settings-content-panel .input,.settings-content-panel .select{width:100%;max-width:400px;padding:10px 12px;border-radius:8px;border:1px solid var(--border);background-color:var(--input-bg);color:var(--text);font-size:1em}.settings-content-panel .select{min-width:unset}.setting-control-row{display:flex;align-items:center;gap:12px;flex-wrap:wrap}.range-input{width:min(360px,100%);accent-color:var(--accent)}.setting-value{min-width:48px;color:var(--text);font-variant-numeric:tabular-nums}.setting-description{margin:10px 0 0;color:var(--muted);line-height:1.5}.msg h1,.msg h2,.msg h3,.msg h4{margin:10px 0;color:var(--accent)}.msg blockquote{border-left:4px solid var(--accent);padding-left:15px;margin-left:0;color:var(--muted)}.msg ul{padding-left:20px}.msg li{margin-bottom:5px}.msg code{background-color:var(--input-bg);padding:2px 4px;border-radius:4px;font-family:monospace}.msg pre{background-color:var(--input-bg);padding:10px;border-radius:8px;overflow-x:auto;white-space:pre-wrap}.msg pre code{padding:0;background-color:transparent}.msg a{color:var(--text);display:inline-flex;align-items:center;gap:4px;position:relative;text-decoration:underline dotted;text-underline-offset:3px}.msg a .tooltip{visibility:hidden;width:auto;background-color:var(--panel);color:var(--text);text-align:center;border-radius:6px;padding:5px 10px;position:absolute;z-index:1;bottom:110%;left:50%;transform:translate(-50%);opacity:0;transition:opacity .3s;white-space:nowrap}.msg a:hover .tooltip{visibility:visible;opacity:1}.msg a:hover{color:var(--accent)}.msg a svg{width:14px;height:14px;stroke:var(--text);transition:stroke .2s ease}.msg a:hover svg{stroke:var(--accent)}.msg hr{border:none;border-top:1px solid var(--border);margin:20px 0}:root{--outline-w: 1px;--grid-w: 1px;--grid: var(--border)}.msg table.nice{border-collapse:separate;border-spacing:0;width:100%;margin:1rem 0;border-radius:12px;box-shadow:0 0 0 var(--outline-w) var(--border)}.msg table.nice th,.msg table.nice td{border:var(--grid-w) solid var(--grid);border-width:.5px;padding:.6rem .75rem;vertical-align:top}.msg table.nice thead tr:first-child th{border-top:0}.msg table.nice tr th:first-child,.msg table.nice tr td:first-child{border-left:0}.msg table.nice tr th:last-child,.msg table.nice tr td:last-child{border-right:0}.msg table.nice tbody tr:last-child td{border-bottom:0}.msg table.nice thead tr:first-child th{background-color:var(--panel)}.msg table.nice thead tr:first-child th:first-child{border-top-left-radius:12px;background-clip:padding-box}.msg table.nice thead tr:first-child th:last-child{border-top-right-radius:12px;background-clip:padding-box}.msg table.nice tbody tr:last-child td:first-child{border-bottom-left-radius:12px;background-clip:padding-box}.msg table.nice tbody tr:last-child td:last-child{border-bottom-right-radius:12px;background-clip:padding-box}.toggle-switch{position:relative;display:inline-block;width:50px;height:28px}.toggle-switch input{opacity:0;width:0;height:0}.slider{position:absolute;cursor:pointer;top:0;left:0;right:0;bottom:0;background-color:var(--input-bg);border:1px solid var(--border);transition:.4s;border-radius:28px}.slider:before{position:absolute;content:"";height:20px;width:20px;left:3px;bottom:3px;background-color:var(--muted);transition:.4s;border-radius:50%}input:checked+.slider{background-color:var(--accent)}input:checked+.slider:before{transform:translate(22px);background-color:var(--panel)}.spinner{border:3px solid rgba(255,255,255,.3);border-radius:50%;border-top:3px solid var(--accent);width:20px;height:20px;animation:spin 1s linear infinite;margin:0 auto}@keyframes spin{0%{transform:rotate(0)}to{transform:rotate(360deg)}}.codeblock{border:1px solid var(--grid, #e5e7eb);border-radius:12px;overflow:hidden;margin:1rem 0}.codeblock__header{display:flex;align-items:center;justify-content:space-between;padding:.5rem .75rem;background:var(--panel);border-bottom:1px solid var(--grid);font-weight:600;font-size:.875rem}.codeblock__lang{opacity:.9}.codeblock__copy{display:inline-flex;align-items:center;gap:.25rem;border:0;background:transparent;padding:.25rem;border-radius:8px;cursor:pointer}.codeblock__copy:hover{background:#0000000f}.codeblock__copy.copied .icon-copy{transform:scale(1.05)}.codeblock__pre{margin:0;padding:.75rem;overflow:auto}.codeblock__code{display:block;white-space:pre}.assistant-message{display:block}.assistant-thoughts{margin-bottom:8px}.think-toggle{background:var(--active-bg);color:var(--muted);border:1px solid var(--border);border-radius:10px;font-size:12px;padding:4px 8px;cursor:pointer}.think-toggle:hover{color:var(--text);border-color:var(--accent)}.think-toggle-icon{display:inline-block;margin-right:6px}.think-content{margin-top:8px;padding:10px 12px;border:1px solid var(--border);border-radius:12px;background:var(--panel);color:var(--muted);font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace;font-size:13px;line-height:1.5;overflow-x:auto;white-space:pre-wrap}.edit-message-input{display:block;width:100%;height:100%;background:transparent;border:0;padding:0;margin:0;color:inherit;font:inherit;line-height:inherit;letter-spacing:inherit;white-space:pre-wrap;overflow-wrap:anywhere;word-break:break-word;resize:none;overflow:hidden;outline:none}.edit-message-input:focus{border-color:var(--accent);box-shadow:0 0 0 2px color-mix(in srgb,var(--accent) 35%,transparent)}.user-edit-shadow{visibility:hidden;white-space:pre-wrap;overflow-wrap:anywhere;word-break:break-word}.edit-message-input.edit-overlay{position:absolute;top:0;right:0;bottom:0;left:0;width:100%;height:100%;background:transparent;border:0;padding:0;margin:0;color:inherit;font:inherit;line-height:inherit;letter-spacing:inherit;white-space:pre-wrap;overflow-wrap:anywhere;word-break:break-word;resize:none;overflow:hidden;outline:none;box-shadow:none}.websearch-toggle{background:transparent;border:0;padding:0;margin:0 .1rem 0 0;line-height:0;display:inline-flex;align-items:center;justify-content:center;cursor:pointer;position:relative;top:-7.5px;color:var(--muted);box-shadow:none;outline:none;-webkit-tap-highlight-color:transparent}.websearch-toggle svg{width:20px;height:20px;pointer-events:none}.websearch-toggle.active{color:var(--accent)}.websearch-toggle:focus,.websearch-toggle:focus-visible{outline:none;box-shadow:none}.msg-sources{margin-top:8px;font-size:12px;color:var(--muted)}.msg-sources a{color:var(--accent);text-decoration:none;margin-right:8px}.msg-sources a:hover{text-decoration:underline}.msg-sources.chips{display:flex;flex-wrap:wrap;margin:.5rem 0}.msg-sources.chips .chip{display:inline-flex;align-items:center;padding:.25rem .6rem;border-radius:9999px;border:1px solid var(--border);text-decoration:none;font-size:.85rem;line-height:1;white-space:nowrap;margin-top:.5rem}.db-active-badge{margin-left:8px;padding:2px 8px;border-radius:999px;background:color-mix(in srgb,var(--accent) 20%,transparent);color:var(--accent);font-size:11px}.placeholder-view,.library-panel{overflow:auto;padding:20px}.placeholder-view h1{margin-top:0}.library-toolbar{display:flex;flex-wrap:wrap;gap:10px;margin-bottom:18px}.library-states{display:flex;flex-wrap:wrap;gap:10px;margin-bottom:14px}.state-pill{padding:6px 10px;border-radius:999px;border:1px solid var(--border);color:var(--muted);font-size:13px}.state-pill.ready{color:var(--text);border-color:color-mix(in srgb,var(--accent) 45%,var(--border))}.library-chat-note,.job-card{margin-bottom:12px;padding:12px 14px;border-radius:12px;background:color-mix(in srgb,var(--panel) 82%,black);border:1px solid var(--border)}.library-inline-form{margin-bottom:14px;padding:12px 14px;border-radius:12px;border:1px solid var(--border);background:color-mix(in srgb,var(--panel) 88%,black)}.danger-zone{border-color:#8f3d49}.library-files h2{margin:18px 0 12px;font-size:16px}.library-file-list{display:grid;gap:10px}.library-file-row{display:flex;justify-content:space-between;gap:14px;align-items:flex-start;padding:12px 14px;border-radius:12px;border:1px solid var(--border);background:color-mix(in srgb,var(--panel) 88%,black)}.library-file-meta{min-width:0}.library-file-name{font-weight:600;margin-bottom:4px}.library-file-path,.muted-copy{color:var(--muted);font-size:13px;word-break:break-word}.library-file-actions{display:flex;gap:8px;flex-shrink:0} diff --git a/dist/assets/index-DKAz6gtp.js b/dist/assets/index-DKAz6gtp.js new file mode 100644 index 0000000..a17bbd0 --- /dev/null +++ b/dist/assets/index-DKAz6gtp.js @@ -0,0 +1,69 @@ +(function(){const t=document.createElement("link").relList;if(t&&t.supports&&t.supports("modulepreload"))return;for(const l of document.querySelectorAll('link[rel="modulepreload"]'))r(l);new MutationObserver(l=>{for(const o of l)if(o.type==="childList")for(const i of o.addedNodes)i.tagName==="LINK"&&i.rel==="modulepreload"&&r(i)}).observe(document,{childList:!0,subtree:!0});function n(l){const o={};return l.integrity&&(o.integrity=l.integrity),l.referrerPolicy&&(o.referrerPolicy=l.referrerPolicy),l.crossOrigin==="use-credentials"?o.credentials="include":l.crossOrigin==="anonymous"?o.credentials="omit":o.credentials="same-origin",o}function r(l){if(l.ep)return;l.ep=!0;const o=n(l);fetch(l.href,o)}})();function op(e){return e&&e.__esModule&&Object.prototype.hasOwnProperty.call(e,"default")?e.default:e}var tc={exports:{}},ho={},nc={exports:{}},Q={};/** + * @license React + * react.production.min.js + * + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */var Yr=Symbol.for("react.element"),ip=Symbol.for("react.portal"),sp=Symbol.for("react.fragment"),ap=Symbol.for("react.strict_mode"),up=Symbol.for("react.profiler"),cp=Symbol.for("react.provider"),fp=Symbol.for("react.context"),dp=Symbol.for("react.forward_ref"),pp=Symbol.for("react.suspense"),hp=Symbol.for("react.memo"),mp=Symbol.for("react.lazy"),ja=Symbol.iterator;function gp(e){return e===null||typeof e!="object"?null:(e=ja&&e[ja]||e["@@iterator"],typeof e=="function"?e:null)}var rc={isMounted:function(){return!1},enqueueForceUpdate:function(){},enqueueReplaceState:function(){},enqueueSetState:function(){}},lc=Object.assign,oc={};function tr(e,t,n){this.props=e,this.context=t,this.refs=oc,this.updater=n||rc}tr.prototype.isReactComponent={};tr.prototype.setState=function(e,t){if(typeof e!="object"&&typeof e!="function"&&e!=null)throw Error("setState(...): takes an object of state variables to update or a function which returns an object of state variables.");this.updater.enqueueSetState(this,e,t,"setState")};tr.prototype.forceUpdate=function(e){this.updater.enqueueForceUpdate(this,e,"forceUpdate")};function ic(){}ic.prototype=tr.prototype;function ms(e,t,n){this.props=e,this.context=t,this.refs=oc,this.updater=n||rc}var gs=ms.prototype=new ic;gs.constructor=ms;lc(gs,tr.prototype);gs.isPureReactComponent=!0;var Ta=Array.isArray,sc=Object.prototype.hasOwnProperty,vs={current:null},ac={key:!0,ref:!0,__self:!0,__source:!0};function uc(e,t,n){var r,l={},o=null,i=null;if(t!=null)for(r in t.ref!==void 0&&(i=t.ref),t.key!==void 0&&(o=""+t.key),t)sc.call(t,r)&&!ac.hasOwnProperty(r)&&(l[r]=t[r]);var a=arguments.length-2;if(a===1)l.children=n;else if(1>>1,R=$[te];if(0>>1;tel(Fe,U))Cel(Mt,Fe)?($[te]=Mt,$[Ce]=U,te=Ce):($[te]=Fe,$[fe]=U,te=fe);else if(Cel(Mt,U))$[te]=Mt,$[Ce]=U,te=Ce;else break e}}return A}function l($,A){var U=$.sortIndex-A.sortIndex;return U!==0?U:$.id-A.id}if(typeof performance=="object"&&typeof performance.now=="function"){var o=performance;e.unstable_now=function(){return o.now()}}else{var i=Date,a=i.now();e.unstable_now=function(){return i.now()-a}}var s=[],u=[],f=1,m=null,v=3,w=!1,S=!1,E=!1,N=typeof setTimeout=="function"?setTimeout:null,h=typeof clearTimeout=="function"?clearTimeout:null,c=typeof setImmediate<"u"?setImmediate:null;typeof navigator<"u"&&navigator.scheduling!==void 0&&navigator.scheduling.isInputPending!==void 0&&navigator.scheduling.isInputPending.bind(navigator.scheduling);function d($){for(var A=n(u);A!==null;){if(A.callback===null)r(u);else if(A.startTime<=$)r(u),A.sortIndex=A.expirationTime,t(s,A);else break;A=n(u)}}function x($){if(E=!1,d($),!S)if(n(s)!==null)S=!0,gt(P);else{var A=n(u);A!==null&&$t(x,A.startTime-$)}}function P($,A){S=!1,E&&(E=!1,h(M),M=-1),w=!0;var U=v;try{for(d(A),m=n(s);m!==null&&(!(m.expirationTime>A)||$&&!pe());){var te=m.callback;if(typeof te=="function"){m.callback=null,v=m.priorityLevel;var R=te(m.expirationTime<=A);A=e.unstable_now(),typeof R=="function"?m.callback=R:m===n(s)&&r(s),d(A)}else r(s);m=n(s)}if(m!==null)var W=!0;else{var fe=n(u);fe!==null&&$t(x,fe.startTime-A),W=!1}return W}finally{m=null,v=U,w=!1}}var T=!1,j=null,M=-1,B=5,O=-1;function pe(){return!(e.unstable_now()-O$||125<$?console.error("forceFrameRate takes a positive int between 0 and 125, forcing frame rates higher than 125 fps is not supported"):B=0<$?Math.floor(1e3/$):5},e.unstable_getCurrentPriorityLevel=function(){return v},e.unstable_getFirstCallbackNode=function(){return n(s)},e.unstable_next=function($){switch(v){case 1:case 2:case 3:var A=3;break;default:A=v}var U=v;v=A;try{return $()}finally{v=U}},e.unstable_pauseExecution=function(){},e.unstable_requestPaint=function(){},e.unstable_runWithPriority=function($,A){switch($){case 1:case 2:case 3:case 4:case 5:break;default:$=3}var U=v;v=$;try{return A()}finally{v=U}},e.unstable_scheduleCallback=function($,A,U){var te=e.unstable_now();switch(typeof U=="object"&&U!==null?(U=U.delay,U=typeof U=="number"&&0te?($.sortIndex=U,t(u,$),n(s)===null&&$===n(u)&&(E?(h(M),M=-1):E=!0,$t(x,U-te))):($.sortIndex=R,t(s,$),S||w||(S=!0,gt(P))),$},e.unstable_shouldYield=pe,e.unstable_wrapCallback=function($){var A=v;return function(){var U=v;v=A;try{return $.apply(this,arguments)}finally{v=U}}}})(hc);pc.exports=hc;var Pp=pc.exports;/** + * @license React + * react-dom.production.min.js + * + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */var Lp=g,Ke=Pp;function _(e){for(var t="https://reactjs.org/docs/error-decoder.html?invariant="+e,n=1;n"u"||typeof window.document>"u"||typeof window.document.createElement>"u"),wi=Object.prototype.hasOwnProperty,Rp=/^[:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD][:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*$/,Ma={},za={};function jp(e){return wi.call(za,e)?!0:wi.call(Ma,e)?!1:Rp.test(e)?za[e]=!0:(Ma[e]=!0,!1)}function Tp(e,t,n,r){if(n!==null&&n.type===0)return!1;switch(typeof t){case"function":case"symbol":return!0;case"boolean":return r?!1:n!==null?!n.acceptsBooleans:(e=e.toLowerCase().slice(0,5),e!=="data-"&&e!=="aria-");default:return!1}}function $p(e,t,n,r){if(t===null||typeof t>"u"||Tp(e,t,n,r))return!0;if(r)return!1;if(n!==null)switch(n.type){case 3:return!t;case 4:return t===!1;case 5:return isNaN(t);case 6:return isNaN(t)||1>t}return!1}function Ie(e,t,n,r,l,o,i){this.acceptsBooleans=t===2||t===3||t===4,this.attributeName=r,this.attributeNamespace=l,this.mustUseProperty=n,this.propertyName=e,this.type=t,this.sanitizeURL=o,this.removeEmptyString=i}var Ee={};"children dangerouslySetInnerHTML defaultValue defaultChecked innerHTML suppressContentEditableWarning suppressHydrationWarning style".split(" ").forEach(function(e){Ee[e]=new Ie(e,0,!1,e,null,!1,!1)});[["acceptCharset","accept-charset"],["className","class"],["htmlFor","for"],["httpEquiv","http-equiv"]].forEach(function(e){var t=e[0];Ee[t]=new Ie(t,1,!1,e[1],null,!1,!1)});["contentEditable","draggable","spellCheck","value"].forEach(function(e){Ee[e]=new Ie(e,2,!1,e.toLowerCase(),null,!1,!1)});["autoReverse","externalResourcesRequired","focusable","preserveAlpha"].forEach(function(e){Ee[e]=new Ie(e,2,!1,e,null,!1,!1)});"allowFullScreen async autoFocus autoPlay controls default defer disabled disablePictureInPicture disableRemotePlayback formNoValidate hidden loop noModule noValidate open playsInline readOnly required reversed scoped seamless itemScope".split(" ").forEach(function(e){Ee[e]=new Ie(e,3,!1,e.toLowerCase(),null,!1,!1)});["checked","multiple","muted","selected"].forEach(function(e){Ee[e]=new Ie(e,3,!0,e,null,!1,!1)});["capture","download"].forEach(function(e){Ee[e]=new Ie(e,4,!1,e,null,!1,!1)});["cols","rows","size","span"].forEach(function(e){Ee[e]=new Ie(e,6,!1,e,null,!1,!1)});["rowSpan","start"].forEach(function(e){Ee[e]=new Ie(e,5,!1,e.toLowerCase(),null,!1,!1)});var ws=/[\-:]([a-z])/g;function Ss(e){return e[1].toUpperCase()}"accent-height alignment-baseline arabic-form baseline-shift cap-height clip-path clip-rule color-interpolation color-interpolation-filters color-profile color-rendering dominant-baseline enable-background fill-opacity fill-rule flood-color flood-opacity font-family font-size font-size-adjust font-stretch font-style font-variant font-weight glyph-name glyph-orientation-horizontal glyph-orientation-vertical horiz-adv-x horiz-origin-x image-rendering letter-spacing lighting-color marker-end marker-mid marker-start overline-position overline-thickness paint-order panose-1 pointer-events rendering-intent shape-rendering stop-color stop-opacity strikethrough-position strikethrough-thickness stroke-dasharray stroke-dashoffset stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity stroke-width text-anchor text-decoration text-rendering underline-position underline-thickness unicode-bidi unicode-range units-per-em v-alphabetic v-hanging v-ideographic v-mathematical vector-effect vert-adv-y vert-origin-x vert-origin-y word-spacing writing-mode xmlns:xlink x-height".split(" ").forEach(function(e){var t=e.replace(ws,Ss);Ee[t]=new Ie(t,1,!1,e,null,!1,!1)});"xlink:actuate xlink:arcrole xlink:role xlink:show xlink:title xlink:type".split(" ").forEach(function(e){var t=e.replace(ws,Ss);Ee[t]=new Ie(t,1,!1,e,"http://www.w3.org/1999/xlink",!1,!1)});["xml:base","xml:lang","xml:space"].forEach(function(e){var t=e.replace(ws,Ss);Ee[t]=new Ie(t,1,!1,e,"http://www.w3.org/XML/1998/namespace",!1,!1)});["tabIndex","crossOrigin"].forEach(function(e){Ee[e]=new Ie(e,1,!1,e.toLowerCase(),null,!1,!1)});Ee.xlinkHref=new Ie("xlinkHref",1,!1,"xlink:href","http://www.w3.org/1999/xlink",!0,!1);["src","href","action","formAction"].forEach(function(e){Ee[e]=new Ie(e,1,!1,e.toLowerCase(),null,!0,!0)});function xs(e,t,n,r){var l=Ee.hasOwnProperty(t)?Ee[t]:null;(l!==null?l.type!==0:r||!(2a||l[i]!==o[a]){var s=` +`+l[i].replace(" at new "," at ");return e.displayName&&s.includes("")&&(s=s.replace("",e.displayName)),s}while(1<=i&&0<=a);break}}}finally{Ko=!1,Error.prepareStackTrace=n}return(e=e?e.displayName||e.name:"")?mr(e):""}function Mp(e){switch(e.tag){case 5:return mr(e.type);case 16:return mr("Lazy");case 13:return mr("Suspense");case 19:return mr("SuspenseList");case 0:case 2:case 15:return e=Yo(e.type,!1),e;case 11:return e=Yo(e.type.render,!1),e;case 1:return e=Yo(e.type,!0),e;default:return""}}function Ei(e){if(e==null)return null;if(typeof e=="function")return e.displayName||e.name||null;if(typeof e=="string")return e;switch(e){case Tn:return"Fragment";case jn:return"Portal";case Si:return"Profiler";case ks:return"StrictMode";case xi:return"Suspense";case ki:return"SuspenseList"}if(typeof e=="object")switch(e.$$typeof){case vc:return(e.displayName||"Context")+".Consumer";case gc:return(e._context.displayName||"Context")+".Provider";case Es:var t=e.render;return e=e.displayName,e||(e=t.displayName||t.name||"",e=e!==""?"ForwardRef("+e+")":"ForwardRef"),e;case Cs:return t=e.displayName||null,t!==null?t:Ei(e.type)||"Memo";case Ot:t=e._payload,e=e._init;try{return Ei(e(t))}catch{}}return null}function zp(e){var t=e.type;switch(e.tag){case 24:return"Cache";case 9:return(t.displayName||"Context")+".Consumer";case 10:return(t._context.displayName||"Context")+".Provider";case 18:return"DehydratedFragment";case 11:return e=t.render,e=e.displayName||e.name||"",t.displayName||(e!==""?"ForwardRef("+e+")":"ForwardRef");case 7:return"Fragment";case 5:return t;case 4:return"Portal";case 3:return"Root";case 6:return"Text";case 16:return Ei(t);case 8:return t===ks?"StrictMode":"Mode";case 22:return"Offscreen";case 12:return"Profiler";case 21:return"Scope";case 13:return"Suspense";case 19:return"SuspenseList";case 25:return"TracingMarker";case 1:case 0:case 17:case 2:case 14:case 15:if(typeof t=="function")return t.displayName||t.name||null;if(typeof t=="string")return t}return null}function Xt(e){switch(typeof e){case"boolean":case"number":case"string":case"undefined":return e;case"object":return e;default:return""}}function wc(e){var t=e.type;return(e=e.nodeName)&&e.toLowerCase()==="input"&&(t==="checkbox"||t==="radio")}function Ip(e){var t=wc(e)?"checked":"value",n=Object.getOwnPropertyDescriptor(e.constructor.prototype,t),r=""+e[t];if(!e.hasOwnProperty(t)&&typeof n<"u"&&typeof n.get=="function"&&typeof n.set=="function"){var l=n.get,o=n.set;return Object.defineProperty(e,t,{configurable:!0,get:function(){return l.call(this)},set:function(i){r=""+i,o.call(this,i)}}),Object.defineProperty(e,t,{enumerable:n.enumerable}),{getValue:function(){return r},setValue:function(i){r=""+i},stopTracking:function(){e._valueTracker=null,delete e[t]}}}}function fl(e){e._valueTracker||(e._valueTracker=Ip(e))}function Sc(e){if(!e)return!1;var t=e._valueTracker;if(!t)return!0;var n=t.getValue(),r="";return e&&(r=wc(e)?e.checked?"true":"false":e.value),e=r,e!==n?(t.setValue(e),!0):!1}function Bl(e){if(e=e||(typeof document<"u"?document:void 0),typeof e>"u")return null;try{return e.activeElement||e.body}catch{return e.body}}function Ci(e,t){var n=t.checked;return ue({},t,{defaultChecked:void 0,defaultValue:void 0,value:void 0,checked:n??e._wrapperState.initialChecked})}function Oa(e,t){var n=t.defaultValue==null?"":t.defaultValue,r=t.checked!=null?t.checked:t.defaultChecked;n=Xt(t.value!=null?t.value:n),e._wrapperState={initialChecked:r,initialValue:n,controlled:t.type==="checkbox"||t.type==="radio"?t.checked!=null:t.value!=null}}function xc(e,t){t=t.checked,t!=null&&xs(e,"checked",t,!1)}function Ni(e,t){xc(e,t);var n=Xt(t.value),r=t.type;if(n!=null)r==="number"?(n===0&&e.value===""||e.value!=n)&&(e.value=""+n):e.value!==""+n&&(e.value=""+n);else if(r==="submit"||r==="reset"){e.removeAttribute("value");return}t.hasOwnProperty("value")?_i(e,t.type,n):t.hasOwnProperty("defaultValue")&&_i(e,t.type,Xt(t.defaultValue)),t.checked==null&&t.defaultChecked!=null&&(e.defaultChecked=!!t.defaultChecked)}function Fa(e,t,n){if(t.hasOwnProperty("value")||t.hasOwnProperty("defaultValue")){var r=t.type;if(!(r!=="submit"&&r!=="reset"||t.value!==void 0&&t.value!==null))return;t=""+e._wrapperState.initialValue,n||t===e.value||(e.value=t),e.defaultValue=t}n=e.name,n!==""&&(e.name=""),e.defaultChecked=!!e._wrapperState.initialChecked,n!==""&&(e.name=n)}function _i(e,t,n){(t!=="number"||Bl(e.ownerDocument)!==e)&&(n==null?e.defaultValue=""+e._wrapperState.initialValue:e.defaultValue!==""+n&&(e.defaultValue=""+n))}var gr=Array.isArray;function Bn(e,t,n,r){if(e=e.options,t){t={};for(var l=0;l"+t.valueOf().toString()+"",t=dl.firstChild;e.firstChild;)e.removeChild(e.firstChild);for(;t.firstChild;)e.appendChild(t.firstChild)}});function Rr(e,t){if(t){var n=e.firstChild;if(n&&n===e.lastChild&&n.nodeType===3){n.nodeValue=t;return}}e.textContent=t}var wr={animationIterationCount:!0,aspectRatio:!0,borderImageOutset:!0,borderImageSlice:!0,borderImageWidth:!0,boxFlex:!0,boxFlexGroup:!0,boxOrdinalGroup:!0,columnCount:!0,columns:!0,flex:!0,flexGrow:!0,flexPositive:!0,flexShrink:!0,flexNegative:!0,flexOrder:!0,gridArea:!0,gridRow:!0,gridRowEnd:!0,gridRowSpan:!0,gridRowStart:!0,gridColumn:!0,gridColumnEnd:!0,gridColumnSpan:!0,gridColumnStart:!0,fontWeight:!0,lineClamp:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,tabSize:!0,widows:!0,zIndex:!0,zoom:!0,fillOpacity:!0,floodOpacity:!0,stopOpacity:!0,strokeDasharray:!0,strokeDashoffset:!0,strokeMiterlimit:!0,strokeOpacity:!0,strokeWidth:!0},Op=["Webkit","ms","Moz","O"];Object.keys(wr).forEach(function(e){Op.forEach(function(t){t=t+e.charAt(0).toUpperCase()+e.substring(1),wr[t]=wr[e]})});function Nc(e,t,n){return t==null||typeof t=="boolean"||t===""?"":n||typeof t!="number"||t===0||wr.hasOwnProperty(e)&&wr[e]?(""+t).trim():t+"px"}function _c(e,t){e=e.style;for(var n in t)if(t.hasOwnProperty(n)){var r=n.indexOf("--")===0,l=Nc(n,t[n],r);n==="float"&&(n="cssFloat"),r?e.setProperty(n,l):e[n]=l}}var Fp=ue({menuitem:!0},{area:!0,base:!0,br:!0,col:!0,embed:!0,hr:!0,img:!0,input:!0,keygen:!0,link:!0,meta:!0,param:!0,source:!0,track:!0,wbr:!0});function Ri(e,t){if(t){if(Fp[e]&&(t.children!=null||t.dangerouslySetInnerHTML!=null))throw Error(_(137,e));if(t.dangerouslySetInnerHTML!=null){if(t.children!=null)throw Error(_(60));if(typeof t.dangerouslySetInnerHTML!="object"||!("__html"in t.dangerouslySetInnerHTML))throw Error(_(61))}if(t.style!=null&&typeof t.style!="object")throw Error(_(62))}}function ji(e,t){if(e.indexOf("-")===-1)return typeof t.is=="string";switch(e){case"annotation-xml":case"color-profile":case"font-face":case"font-face-src":case"font-face-uri":case"font-face-format":case"font-face-name":case"missing-glyph":return!1;default:return!0}}var Ti=null;function Ns(e){return e=e.target||e.srcElement||window,e.correspondingUseElement&&(e=e.correspondingUseElement),e.nodeType===3?e.parentNode:e}var $i=null,Wn=null,Vn=null;function Ua(e){if(e=Xr(e)){if(typeof $i!="function")throw Error(_(280));var t=e.stateNode;t&&(t=wo(t),$i(e.stateNode,e.type,t))}}function Pc(e){Wn?Vn?Vn.push(e):Vn=[e]:Wn=e}function Lc(){if(Wn){var e=Wn,t=Vn;if(Vn=Wn=null,Ua(e),t)for(e=0;e>>=0,e===0?32:31-(Jp(e)/Gp|0)|0}var pl=64,hl=4194304;function vr(e){switch(e&-e){case 1:return 1;case 2:return 2;case 4:return 4;case 8:return 8;case 16:return 16;case 32:return 32;case 64:case 128:case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:case 262144:case 524288:case 1048576:case 2097152:return e&4194240;case 4194304:case 8388608:case 16777216:case 33554432:case 67108864:return e&130023424;case 134217728:return 134217728;case 268435456:return 268435456;case 536870912:return 536870912;case 1073741824:return 1073741824;default:return e}}function Kl(e,t){var n=e.pendingLanes;if(n===0)return 0;var r=0,l=e.suspendedLanes,o=e.pingedLanes,i=n&268435455;if(i!==0){var a=i&~l;a!==0?r=vr(a):(o&=i,o!==0&&(r=vr(o)))}else i=n&~l,i!==0?r=vr(i):o!==0&&(r=vr(o));if(r===0)return 0;if(t!==0&&t!==r&&!(t&l)&&(l=r&-r,o=t&-t,l>=o||l===16&&(o&4194240)!==0))return t;if(r&4&&(r|=n&16),t=e.entangledLanes,t!==0)for(e=e.entanglements,t&=r;0n;n++)t.push(e);return t}function Jr(e,t,n){e.pendingLanes|=t,t!==536870912&&(e.suspendedLanes=0,e.pingedLanes=0),e=e.eventTimes,t=31-it(t),e[t]=n}function qp(e,t){var n=e.pendingLanes&~t;e.pendingLanes=t,e.suspendedLanes=0,e.pingedLanes=0,e.expiredLanes&=t,e.mutableReadLanes&=t,e.entangledLanes&=t,t=e.entanglements;var r=e.eventTimes;for(e=e.expirationTimes;0=xr),Ga=" ",Xa=!1;function Jc(e,t){switch(e){case"keyup":return Ph.indexOf(t.keyCode)!==-1;case"keydown":return t.keyCode!==229;case"keypress":case"mousedown":case"focusout":return!0;default:return!1}}function Gc(e){return e=e.detail,typeof e=="object"&&"data"in e?e.data:null}var $n=!1;function Rh(e,t){switch(e){case"compositionend":return Gc(t);case"keypress":return t.which!==32?null:(Xa=!0,Ga);case"textInput":return e=t.data,e===Ga&&Xa?null:e;default:return null}}function jh(e,t){if($n)return e==="compositionend"||!Ms&&Jc(e,t)?(e=Kc(),jl=js=Ut=null,$n=!1,e):null;switch(e){case"paste":return null;case"keypress":if(!(t.ctrlKey||t.altKey||t.metaKey)||t.ctrlKey&&t.altKey){if(t.char&&1=t)return{node:n,offset:t-e};e=r}e:{for(;n;){if(n.nextSibling){n=n.nextSibling;break e}n=n.parentNode}n=void 0}n=eu(n)}}function qc(e,t){return e&&t?e===t?!0:e&&e.nodeType===3?!1:t&&t.nodeType===3?qc(e,t.parentNode):"contains"in e?e.contains(t):e.compareDocumentPosition?!!(e.compareDocumentPosition(t)&16):!1:!1}function ef(){for(var e=window,t=Bl();t instanceof e.HTMLIFrameElement;){try{var n=typeof t.contentWindow.location.href=="string"}catch{n=!1}if(n)e=t.contentWindow;else break;t=Bl(e.document)}return t}function zs(e){var t=e&&e.nodeName&&e.nodeName.toLowerCase();return t&&(t==="input"&&(e.type==="text"||e.type==="search"||e.type==="tel"||e.type==="url"||e.type==="password")||t==="textarea"||e.contentEditable==="true")}function Ah(e){var t=ef(),n=e.focusedElem,r=e.selectionRange;if(t!==n&&n&&n.ownerDocument&&qc(n.ownerDocument.documentElement,n)){if(r!==null&&zs(n)){if(t=r.start,e=r.end,e===void 0&&(e=t),"selectionStart"in n)n.selectionStart=t,n.selectionEnd=Math.min(e,n.value.length);else if(e=(t=n.ownerDocument||document)&&t.defaultView||window,e.getSelection){e=e.getSelection();var l=n.textContent.length,o=Math.min(r.start,l);r=r.end===void 0?o:Math.min(r.end,l),!e.extend&&o>r&&(l=r,r=o,o=l),l=tu(n,o);var i=tu(n,r);l&&i&&(e.rangeCount!==1||e.anchorNode!==l.node||e.anchorOffset!==l.offset||e.focusNode!==i.node||e.focusOffset!==i.offset)&&(t=t.createRange(),t.setStart(l.node,l.offset),e.removeAllRanges(),o>r?(e.addRange(t),e.extend(i.node,i.offset)):(t.setEnd(i.node,i.offset),e.addRange(t)))}}for(t=[],e=n;e=e.parentNode;)e.nodeType===1&&t.push({element:e,left:e.scrollLeft,top:e.scrollTop});for(typeof n.focus=="function"&&n.focus(),n=0;n=document.documentMode,Mn=null,Di=null,Er=null,Ai=!1;function nu(e,t,n){var r=n.window===n?n.document:n.nodeType===9?n:n.ownerDocument;Ai||Mn==null||Mn!==Bl(r)||(r=Mn,"selectionStart"in r&&zs(r)?r={start:r.selectionStart,end:r.selectionEnd}:(r=(r.ownerDocument&&r.ownerDocument.defaultView||window).getSelection(),r={anchorNode:r.anchorNode,anchorOffset:r.anchorOffset,focusNode:r.focusNode,focusOffset:r.focusOffset}),Er&&Ir(Er,r)||(Er=r,r=Gl(Di,"onSelect"),0On||(e.current=Qi[On],Qi[On]=null,On--)}function ne(e,t){On++,Qi[On]=e.current,e.current=t}var bt={},Re=qt(bt),Ue=qt(!1),pn=bt;function Gn(e,t){var n=e.type.contextTypes;if(!n)return bt;var r=e.stateNode;if(r&&r.__reactInternalMemoizedUnmaskedChildContext===t)return r.__reactInternalMemoizedMaskedChildContext;var l={},o;for(o in n)l[o]=t[o];return r&&(e=e.stateNode,e.__reactInternalMemoizedUnmaskedChildContext=t,e.__reactInternalMemoizedMaskedChildContext=l),l}function He(e){return e=e.childContextTypes,e!=null}function bl(){le(Ue),le(Re)}function uu(e,t,n){if(Re.current!==bt)throw Error(_(168));ne(Re,t),ne(Ue,n)}function cf(e,t,n){var r=e.stateNode;if(t=t.childContextTypes,typeof r.getChildContext!="function")return n;r=r.getChildContext();for(var l in r)if(!(l in t))throw Error(_(108,zp(e)||"Unknown",l));return ue({},n,r)}function Zl(e){return e=(e=e.stateNode)&&e.__reactInternalMemoizedMergedChildContext||bt,pn=Re.current,ne(Re,e),ne(Ue,Ue.current),!0}function cu(e,t,n){var r=e.stateNode;if(!r)throw Error(_(169));n?(e=cf(e,t,pn),r.__reactInternalMemoizedMergedChildContext=e,le(Ue),le(Re),ne(Re,e)):le(Ue),ne(Ue,n)}var St=null,So=!1,si=!1;function ff(e){St===null?St=[e]:St.push(e)}function bh(e){So=!0,ff(e)}function en(){if(!si&&St!==null){si=!0;var e=0,t=q;try{var n=St;for(q=1;e>=i,l-=i,xt=1<<32-it(t)+l|n<M?(B=j,j=null):B=j.sibling;var O=v(h,j,d[M],x);if(O===null){j===null&&(j=B);break}e&&j&&O.alternate===null&&t(h,j),c=o(O,c,M),T===null?P=O:T.sibling=O,T=O,j=B}if(M===d.length)return n(h,j),oe&&on(h,M),P;if(j===null){for(;MM?(B=j,j=null):B=j.sibling;var pe=v(h,j,O.value,x);if(pe===null){j===null&&(j=B);break}e&&j&&pe.alternate===null&&t(h,j),c=o(pe,c,M),T===null?P=pe:T.sibling=pe,T=pe,j=B}if(O.done)return n(h,j),oe&&on(h,M),P;if(j===null){for(;!O.done;M++,O=d.next())O=m(h,O.value,x),O!==null&&(c=o(O,c,M),T===null?P=O:T.sibling=O,T=O);return oe&&on(h,M),P}for(j=r(h,j);!O.done;M++,O=d.next())O=w(j,h,M,O.value,x),O!==null&&(e&&O.alternate!==null&&j.delete(O.key===null?M:O.key),c=o(O,c,M),T===null?P=O:T.sibling=O,T=O);return e&&j.forEach(function(ve){return t(h,ve)}),oe&&on(h,M),P}function N(h,c,d,x){if(typeof d=="object"&&d!==null&&d.type===Tn&&d.key===null&&(d=d.props.children),typeof d=="object"&&d!==null){switch(d.$$typeof){case cl:e:{for(var P=d.key,T=c;T!==null;){if(T.key===P){if(P=d.type,P===Tn){if(T.tag===7){n(h,T.sibling),c=l(T,d.props.children),c.return=h,h=c;break e}}else if(T.elementType===P||typeof P=="object"&&P!==null&&P.$$typeof===Ot&&pu(P)===T.type){n(h,T.sibling),c=l(T,d.props),c.ref=fr(h,T,d),c.return=h,h=c;break e}n(h,T);break}else t(h,T);T=T.sibling}d.type===Tn?(c=dn(d.props.children,h.mode,x,d.key),c.return=h,h=c):(x=Dl(d.type,d.key,d.props,null,h.mode,x),x.ref=fr(h,c,d),x.return=h,h=x)}return i(h);case jn:e:{for(T=d.key;c!==null;){if(c.key===T)if(c.tag===4&&c.stateNode.containerInfo===d.containerInfo&&c.stateNode.implementation===d.implementation){n(h,c.sibling),c=l(c,d.children||[]),c.return=h,h=c;break e}else{n(h,c);break}else t(h,c);c=c.sibling}c=mi(d,h.mode,x),c.return=h,h=c}return i(h);case Ot:return T=d._init,N(h,c,T(d._payload),x)}if(gr(d))return S(h,c,d,x);if(ir(d))return E(h,c,d,x);xl(h,d)}return typeof d=="string"&&d!==""||typeof d=="number"?(d=""+d,c!==null&&c.tag===6?(n(h,c.sibling),c=l(c,d),c.return=h,h=c):(n(h,c),c=hi(d,h.mode,x),c.return=h,h=c),i(h)):n(h,c)}return N}var bn=mf(!0),gf=mf(!1),to=qt(null),no=null,An=null,Ds=null;function As(){Ds=An=no=null}function Us(e){var t=to.current;le(to),e._currentValue=t}function Ji(e,t,n){for(;e!==null;){var r=e.alternate;if((e.childLanes&t)!==t?(e.childLanes|=t,r!==null&&(r.childLanes|=t)):r!==null&&(r.childLanes&t)!==t&&(r.childLanes|=t),e===n)break;e=e.return}}function Kn(e,t){no=e,Ds=An=null,e=e.dependencies,e!==null&&e.firstContext!==null&&(e.lanes&t&&(Ae=!0),e.firstContext=null)}function et(e){var t=e._currentValue;if(Ds!==e)if(e={context:e,memoizedValue:t,next:null},An===null){if(no===null)throw Error(_(308));An=e,no.dependencies={lanes:0,firstContext:e}}else An=An.next=e;return t}var un=null;function Hs(e){un===null?un=[e]:un.push(e)}function vf(e,t,n,r){var l=t.interleaved;return l===null?(n.next=n,Hs(t)):(n.next=l.next,l.next=n),t.interleaved=n,Pt(e,r)}function Pt(e,t){e.lanes|=t;var n=e.alternate;for(n!==null&&(n.lanes|=t),n=e,e=e.return;e!==null;)e.childLanes|=t,n=e.alternate,n!==null&&(n.childLanes|=t),n=e,e=e.return;return n.tag===3?n.stateNode:null}var Ft=!1;function Bs(e){e.updateQueue={baseState:e.memoizedState,firstBaseUpdate:null,lastBaseUpdate:null,shared:{pending:null,interleaved:null,lanes:0},effects:null}}function yf(e,t){e=e.updateQueue,t.updateQueue===e&&(t.updateQueue={baseState:e.baseState,firstBaseUpdate:e.firstBaseUpdate,lastBaseUpdate:e.lastBaseUpdate,shared:e.shared,effects:e.effects})}function Et(e,t){return{eventTime:e,lane:t,tag:0,payload:null,callback:null,next:null}}function Kt(e,t,n){var r=e.updateQueue;if(r===null)return null;if(r=r.shared,X&2){var l=r.pending;return l===null?t.next=t:(t.next=l.next,l.next=t),r.pending=t,Pt(e,n)}return l=r.interleaved,l===null?(t.next=t,Hs(r)):(t.next=l.next,l.next=t),r.interleaved=t,Pt(e,n)}function $l(e,t,n){if(t=t.updateQueue,t!==null&&(t=t.shared,(n&4194240)!==0)){var r=t.lanes;r&=e.pendingLanes,n|=r,t.lanes=n,Ps(e,n)}}function hu(e,t){var n=e.updateQueue,r=e.alternate;if(r!==null&&(r=r.updateQueue,n===r)){var l=null,o=null;if(n=n.firstBaseUpdate,n!==null){do{var i={eventTime:n.eventTime,lane:n.lane,tag:n.tag,payload:n.payload,callback:n.callback,next:null};o===null?l=o=i:o=o.next=i,n=n.next}while(n!==null);o===null?l=o=t:o=o.next=t}else l=o=t;n={baseState:r.baseState,firstBaseUpdate:l,lastBaseUpdate:o,shared:r.shared,effects:r.effects},e.updateQueue=n;return}e=n.lastBaseUpdate,e===null?n.firstBaseUpdate=t:e.next=t,n.lastBaseUpdate=t}function ro(e,t,n,r){var l=e.updateQueue;Ft=!1;var o=l.firstBaseUpdate,i=l.lastBaseUpdate,a=l.shared.pending;if(a!==null){l.shared.pending=null;var s=a,u=s.next;s.next=null,i===null?o=u:i.next=u,i=s;var f=e.alternate;f!==null&&(f=f.updateQueue,a=f.lastBaseUpdate,a!==i&&(a===null?f.firstBaseUpdate=u:a.next=u,f.lastBaseUpdate=s))}if(o!==null){var m=l.baseState;i=0,f=u=s=null,a=o;do{var v=a.lane,w=a.eventTime;if((r&v)===v){f!==null&&(f=f.next={eventTime:w,lane:0,tag:a.tag,payload:a.payload,callback:a.callback,next:null});e:{var S=e,E=a;switch(v=t,w=n,E.tag){case 1:if(S=E.payload,typeof S=="function"){m=S.call(w,m,v);break e}m=S;break e;case 3:S.flags=S.flags&-65537|128;case 0:if(S=E.payload,v=typeof S=="function"?S.call(w,m,v):S,v==null)break e;m=ue({},m,v);break e;case 2:Ft=!0}}a.callback!==null&&a.lane!==0&&(e.flags|=64,v=l.effects,v===null?l.effects=[a]:v.push(a))}else w={eventTime:w,lane:v,tag:a.tag,payload:a.payload,callback:a.callback,next:null},f===null?(u=f=w,s=m):f=f.next=w,i|=v;if(a=a.next,a===null){if(a=l.shared.pending,a===null)break;v=a,a=v.next,v.next=null,l.lastBaseUpdate=v,l.shared.pending=null}}while(!0);if(f===null&&(s=m),l.baseState=s,l.firstBaseUpdate=u,l.lastBaseUpdate=f,t=l.shared.interleaved,t!==null){l=t;do i|=l.lane,l=l.next;while(l!==t)}else o===null&&(l.shared.lanes=0);gn|=i,e.lanes=i,e.memoizedState=m}}function mu(e,t,n){if(e=t.effects,t.effects=null,e!==null)for(t=0;tn?n:4,e(!0);var r=ui.transition;ui.transition={};try{e(!1),t()}finally{q=n,ui.transition=r}}function If(){return tt().memoizedState}function tm(e,t,n){var r=Jt(e);if(n={lane:r,action:n,hasEagerState:!1,eagerState:null,next:null},Of(e))Ff(t,n);else if(n=vf(e,t,n,r),n!==null){var l=Me();st(n,e,r,l),Df(n,t,r)}}function nm(e,t,n){var r=Jt(e),l={lane:r,action:n,hasEagerState:!1,eagerState:null,next:null};if(Of(e))Ff(t,l);else{var o=e.alternate;if(e.lanes===0&&(o===null||o.lanes===0)&&(o=t.lastRenderedReducer,o!==null))try{var i=t.lastRenderedState,a=o(i,n);if(l.hasEagerState=!0,l.eagerState=a,at(a,i)){var s=t.interleaved;s===null?(l.next=l,Hs(t)):(l.next=s.next,s.next=l),t.interleaved=l;return}}catch{}finally{}n=vf(e,t,l,r),n!==null&&(l=Me(),st(n,e,r,l),Df(n,t,r))}}function Of(e){var t=e.alternate;return e===se||t!==null&&t===se}function Ff(e,t){Cr=oo=!0;var n=e.pending;n===null?t.next=t:(t.next=n.next,n.next=t),e.pending=t}function Df(e,t,n){if(n&4194240){var r=t.lanes;r&=e.pendingLanes,n|=r,t.lanes=n,Ps(e,n)}}var io={readContext:et,useCallback:_e,useContext:_e,useEffect:_e,useImperativeHandle:_e,useInsertionEffect:_e,useLayoutEffect:_e,useMemo:_e,useReducer:_e,useRef:_e,useState:_e,useDebugValue:_e,useDeferredValue:_e,useTransition:_e,useMutableSource:_e,useSyncExternalStore:_e,useId:_e,unstable_isNewReconciler:!1},rm={readContext:et,useCallback:function(e,t){return ft().memoizedState=[e,t===void 0?null:t],e},useContext:et,useEffect:vu,useImperativeHandle:function(e,t,n){return n=n!=null?n.concat([e]):null,zl(4194308,4,jf.bind(null,t,e),n)},useLayoutEffect:function(e,t){return zl(4194308,4,e,t)},useInsertionEffect:function(e,t){return zl(4,2,e,t)},useMemo:function(e,t){var n=ft();return t=t===void 0?null:t,e=e(),n.memoizedState=[e,t],e},useReducer:function(e,t,n){var r=ft();return t=n!==void 0?n(t):t,r.memoizedState=r.baseState=t,e={pending:null,interleaved:null,lanes:0,dispatch:null,lastRenderedReducer:e,lastRenderedState:t},r.queue=e,e=e.dispatch=tm.bind(null,se,e),[r.memoizedState,e]},useRef:function(e){var t=ft();return e={current:e},t.memoizedState=e},useState:gu,useDebugValue:Xs,useDeferredValue:function(e){return ft().memoizedState=e},useTransition:function(){var e=gu(!1),t=e[0];return e=em.bind(null,e[1]),ft().memoizedState=e,[t,e]},useMutableSource:function(){},useSyncExternalStore:function(e,t,n){var r=se,l=ft();if(oe){if(n===void 0)throw Error(_(407));n=n()}else{if(n=t(),we===null)throw Error(_(349));mn&30||kf(r,t,n)}l.memoizedState=n;var o={value:n,getSnapshot:t};return l.queue=o,vu(Cf.bind(null,r,o,e),[e]),r.flags|=2048,Wr(9,Ef.bind(null,r,o,n,t),void 0,null),n},useId:function(){var e=ft(),t=we.identifierPrefix;if(oe){var n=kt,r=xt;n=(r&~(1<<32-it(r)-1)).toString(32)+n,t=":"+t+"R"+n,n=Hr++,0<\/script>",e=e.removeChild(e.firstChild)):typeof r.is=="string"?e=i.createElement(n,{is:r.is}):(e=i.createElement(n),n==="select"&&(i=e,r.multiple?i.multiple=!0:r.size&&(i.size=r.size))):e=i.createElementNS(e,n),e[dt]=t,e[Dr]=r,Jf(e,t,!1,!1),t.stateNode=e;e:{switch(i=ji(n,r),n){case"dialog":re("cancel",e),re("close",e),l=r;break;case"iframe":case"object":case"embed":re("load",e),l=r;break;case"video":case"audio":for(l=0;ler&&(t.flags|=128,r=!0,dr(o,!1),t.lanes=4194304)}else{if(!r)if(e=lo(i),e!==null){if(t.flags|=128,r=!0,n=e.updateQueue,n!==null&&(t.updateQueue=n,t.flags|=4),dr(o,!0),o.tail===null&&o.tailMode==="hidden"&&!i.alternate&&!oe)return Pe(t),null}else 2*de()-o.renderingStartTime>er&&n!==1073741824&&(t.flags|=128,r=!0,dr(o,!1),t.lanes=4194304);o.isBackwards?(i.sibling=t.child,t.child=i):(n=o.last,n!==null?n.sibling=i:t.child=i,o.last=i)}return o.tail!==null?(t=o.tail,o.rendering=t,o.tail=t.sibling,o.renderingStartTime=de(),t.sibling=null,n=ie.current,ne(ie,r?n&1|2:n&1),t):(Pe(t),null);case 22:case 23:return na(),r=t.memoizedState!==null,e!==null&&e.memoizedState!==null!==r&&(t.flags|=8192),r&&t.mode&1?We&1073741824&&(Pe(t),t.subtreeFlags&6&&(t.flags|=8192)):Pe(t),null;case 24:return null;case 25:return null}throw Error(_(156,t.tag))}function fm(e,t){switch(Os(t),t.tag){case 1:return He(t.type)&&bl(),e=t.flags,e&65536?(t.flags=e&-65537|128,t):null;case 3:return Zn(),le(Ue),le(Re),Qs(),e=t.flags,e&65536&&!(e&128)?(t.flags=e&-65537|128,t):null;case 5:return Vs(t),null;case 13:if(le(ie),e=t.memoizedState,e!==null&&e.dehydrated!==null){if(t.alternate===null)throw Error(_(340));Xn()}return e=t.flags,e&65536?(t.flags=e&-65537|128,t):null;case 19:return le(ie),null;case 4:return Zn(),null;case 10:return Us(t.type._context),null;case 22:case 23:return na(),null;case 24:return null;default:return null}}var El=!1,Le=!1,dm=typeof WeakSet=="function"?WeakSet:Set,z=null;function Un(e,t){var n=e.ref;if(n!==null)if(typeof n=="function")try{n(null)}catch(r){ce(e,t,r)}else n.current=null}function rs(e,t,n){try{n()}catch(r){ce(e,t,r)}}var Lu=!1;function pm(e,t){if(Ui=Yl,e=ef(),zs(e)){if("selectionStart"in e)var n={start:e.selectionStart,end:e.selectionEnd};else e:{n=(n=e.ownerDocument)&&n.defaultView||window;var r=n.getSelection&&n.getSelection();if(r&&r.rangeCount!==0){n=r.anchorNode;var l=r.anchorOffset,o=r.focusNode;r=r.focusOffset;try{n.nodeType,o.nodeType}catch{n=null;break e}var i=0,a=-1,s=-1,u=0,f=0,m=e,v=null;t:for(;;){for(var w;m!==n||l!==0&&m.nodeType!==3||(a=i+l),m!==o||r!==0&&m.nodeType!==3||(s=i+r),m.nodeType===3&&(i+=m.nodeValue.length),(w=m.firstChild)!==null;)v=m,m=w;for(;;){if(m===e)break t;if(v===n&&++u===l&&(a=i),v===o&&++f===r&&(s=i),(w=m.nextSibling)!==null)break;m=v,v=m.parentNode}m=w}n=a===-1||s===-1?null:{start:a,end:s}}else n=null}n=n||{start:0,end:0}}else n=null;for(Hi={focusedElem:e,selectionRange:n},Yl=!1,z=t;z!==null;)if(t=z,e=t.child,(t.subtreeFlags&1028)!==0&&e!==null)e.return=t,z=e;else for(;z!==null;){t=z;try{var S=t.alternate;if(t.flags&1024)switch(t.tag){case 0:case 11:case 15:break;case 1:if(S!==null){var E=S.memoizedProps,N=S.memoizedState,h=t.stateNode,c=h.getSnapshotBeforeUpdate(t.elementType===t.type?E:rt(t.type,E),N);h.__reactInternalSnapshotBeforeUpdate=c}break;case 3:var d=t.stateNode.containerInfo;d.nodeType===1?d.textContent="":d.nodeType===9&&d.documentElement&&d.removeChild(d.documentElement);break;case 5:case 6:case 4:case 17:break;default:throw Error(_(163))}}catch(x){ce(t,t.return,x)}if(e=t.sibling,e!==null){e.return=t.return,z=e;break}z=t.return}return S=Lu,Lu=!1,S}function Nr(e,t,n){var r=t.updateQueue;if(r=r!==null?r.lastEffect:null,r!==null){var l=r=r.next;do{if((l.tag&e)===e){var o=l.destroy;l.destroy=void 0,o!==void 0&&rs(t,n,o)}l=l.next}while(l!==r)}}function Eo(e,t){if(t=t.updateQueue,t=t!==null?t.lastEffect:null,t!==null){var n=t=t.next;do{if((n.tag&e)===e){var r=n.create;n.destroy=r()}n=n.next}while(n!==t)}}function ls(e){var t=e.ref;if(t!==null){var n=e.stateNode;switch(e.tag){case 5:e=n;break;default:e=n}typeof t=="function"?t(e):t.current=e}}function bf(e){var t=e.alternate;t!==null&&(e.alternate=null,bf(t)),e.child=null,e.deletions=null,e.sibling=null,e.tag===5&&(t=e.stateNode,t!==null&&(delete t[dt],delete t[Dr],delete t[Vi],delete t[Gh],delete t[Xh])),e.stateNode=null,e.return=null,e.dependencies=null,e.memoizedProps=null,e.memoizedState=null,e.pendingProps=null,e.stateNode=null,e.updateQueue=null}function Zf(e){return e.tag===5||e.tag===3||e.tag===4}function Ru(e){e:for(;;){for(;e.sibling===null;){if(e.return===null||Zf(e.return))return null;e=e.return}for(e.sibling.return=e.return,e=e.sibling;e.tag!==5&&e.tag!==6&&e.tag!==18;){if(e.flags&2||e.child===null||e.tag===4)continue e;e.child.return=e,e=e.child}if(!(e.flags&2))return e.stateNode}}function os(e,t,n){var r=e.tag;if(r===5||r===6)e=e.stateNode,t?n.nodeType===8?n.parentNode.insertBefore(e,t):n.insertBefore(e,t):(n.nodeType===8?(t=n.parentNode,t.insertBefore(e,n)):(t=n,t.appendChild(e)),n=n._reactRootContainer,n!=null||t.onclick!==null||(t.onclick=Xl));else if(r!==4&&(e=e.child,e!==null))for(os(e,t,n),e=e.sibling;e!==null;)os(e,t,n),e=e.sibling}function is(e,t,n){var r=e.tag;if(r===5||r===6)e=e.stateNode,t?n.insertBefore(e,t):n.appendChild(e);else if(r!==4&&(e=e.child,e!==null))for(is(e,t,n),e=e.sibling;e!==null;)is(e,t,n),e=e.sibling}var xe=null,lt=!1;function It(e,t,n){for(n=n.child;n!==null;)qf(e,t,n),n=n.sibling}function qf(e,t,n){if(pt&&typeof pt.onCommitFiberUnmount=="function")try{pt.onCommitFiberUnmount(mo,n)}catch{}switch(n.tag){case 5:Le||Un(n,t);case 6:var r=xe,l=lt;xe=null,It(e,t,n),xe=r,lt=l,xe!==null&&(lt?(e=xe,n=n.stateNode,e.nodeType===8?e.parentNode.removeChild(n):e.removeChild(n)):xe.removeChild(n.stateNode));break;case 18:xe!==null&&(lt?(e=xe,n=n.stateNode,e.nodeType===8?ii(e.parentNode,n):e.nodeType===1&&ii(e,n),Mr(e)):ii(xe,n.stateNode));break;case 4:r=xe,l=lt,xe=n.stateNode.containerInfo,lt=!0,It(e,t,n),xe=r,lt=l;break;case 0:case 11:case 14:case 15:if(!Le&&(r=n.updateQueue,r!==null&&(r=r.lastEffect,r!==null))){l=r=r.next;do{var o=l,i=o.destroy;o=o.tag,i!==void 0&&(o&2||o&4)&&rs(n,t,i),l=l.next}while(l!==r)}It(e,t,n);break;case 1:if(!Le&&(Un(n,t),r=n.stateNode,typeof r.componentWillUnmount=="function"))try{r.props=n.memoizedProps,r.state=n.memoizedState,r.componentWillUnmount()}catch(a){ce(n,t,a)}It(e,t,n);break;case 21:It(e,t,n);break;case 22:n.mode&1?(Le=(r=Le)||n.memoizedState!==null,It(e,t,n),Le=r):It(e,t,n);break;default:It(e,t,n)}}function ju(e){var t=e.updateQueue;if(t!==null){e.updateQueue=null;var n=e.stateNode;n===null&&(n=e.stateNode=new dm),t.forEach(function(r){var l=km.bind(null,e,r);n.has(r)||(n.add(r),r.then(l,l))})}}function nt(e,t){var n=t.deletions;if(n!==null)for(var r=0;rl&&(l=i),r&=~o}if(r=l,r=de()-r,r=(120>r?120:480>r?480:1080>r?1080:1920>r?1920:3e3>r?3e3:4320>r?4320:1960*mm(r/1960))-r,10e?16:e,Ht===null)var r=!1;else{if(e=Ht,Ht=null,uo=0,X&6)throw Error(_(331));var l=X;for(X|=4,z=e.current;z!==null;){var o=z,i=o.child;if(z.flags&16){var a=o.deletions;if(a!==null){for(var s=0;sde()-ea?fn(e,0):qs|=n),Be(e,t)}function sd(e,t){t===0&&(e.mode&1?(t=hl,hl<<=1,!(hl&130023424)&&(hl=4194304)):t=1);var n=Me();e=Pt(e,t),e!==null&&(Jr(e,t,n),Be(e,n))}function xm(e){var t=e.memoizedState,n=0;t!==null&&(n=t.retryLane),sd(e,n)}function km(e,t){var n=0;switch(e.tag){case 13:var r=e.stateNode,l=e.memoizedState;l!==null&&(n=l.retryLane);break;case 19:r=e.stateNode;break;default:throw Error(_(314))}r!==null&&r.delete(t),sd(e,n)}var ad;ad=function(e,t,n){if(e!==null)if(e.memoizedProps!==t.pendingProps||Ue.current)Ae=!0;else{if(!(e.lanes&n)&&!(t.flags&128))return Ae=!1,um(e,t,n);Ae=!!(e.flags&131072)}else Ae=!1,oe&&t.flags&1048576&&df(t,eo,t.index);switch(t.lanes=0,t.tag){case 2:var r=t.type;Il(e,t),e=t.pendingProps;var l=Gn(t,Re.current);Kn(t,n),l=Ys(null,t,r,e,l,n);var o=Js();return t.flags|=1,typeof l=="object"&&l!==null&&typeof l.render=="function"&&l.$$typeof===void 0?(t.tag=1,t.memoizedState=null,t.updateQueue=null,He(r)?(o=!0,Zl(t)):o=!1,t.memoizedState=l.state!==null&&l.state!==void 0?l.state:null,Bs(t),l.updater=ko,t.stateNode=l,l._reactInternals=t,Xi(t,r,e,n),t=qi(null,t,r,!0,o,n)):(t.tag=0,oe&&o&&Is(t),$e(null,t,l,n),t=t.child),t;case 16:r=t.elementType;e:{switch(Il(e,t),e=t.pendingProps,l=r._init,r=l(r._payload),t.type=r,l=t.tag=Cm(r),e=rt(r,e),l){case 0:t=Zi(null,t,r,e,n);break e;case 1:t=Nu(null,t,r,e,n);break e;case 11:t=Eu(null,t,r,e,n);break e;case 14:t=Cu(null,t,r,rt(r.type,e),n);break e}throw Error(_(306,r,""))}return t;case 0:return r=t.type,l=t.pendingProps,l=t.elementType===r?l:rt(r,l),Zi(e,t,r,l,n);case 1:return r=t.type,l=t.pendingProps,l=t.elementType===r?l:rt(r,l),Nu(e,t,r,l,n);case 3:e:{if(Qf(t),e===null)throw Error(_(387));r=t.pendingProps,o=t.memoizedState,l=o.element,yf(e,t),ro(t,r,null,n);var i=t.memoizedState;if(r=i.element,o.isDehydrated)if(o={element:r,isDehydrated:!1,cache:i.cache,pendingSuspenseBoundaries:i.pendingSuspenseBoundaries,transitions:i.transitions},t.updateQueue.baseState=o,t.memoizedState=o,t.flags&256){l=qn(Error(_(423)),t),t=_u(e,t,r,n,l);break e}else if(r!==l){l=qn(Error(_(424)),t),t=_u(e,t,r,n,l);break e}else for(Ve=Qt(t.stateNode.containerInfo.firstChild),Qe=t,oe=!0,ot=null,n=gf(t,null,r,n),t.child=n;n;)n.flags=n.flags&-3|4096,n=n.sibling;else{if(Xn(),r===l){t=Lt(e,t,n);break e}$e(e,t,r,n)}t=t.child}return t;case 5:return wf(t),e===null&&Yi(t),r=t.type,l=t.pendingProps,o=e!==null?e.memoizedProps:null,i=l.children,Bi(r,l)?i=null:o!==null&&Bi(r,o)&&(t.flags|=32),Vf(e,t),$e(e,t,i,n),t.child;case 6:return e===null&&Yi(t),null;case 13:return Kf(e,t,n);case 4:return Ws(t,t.stateNode.containerInfo),r=t.pendingProps,e===null?t.child=bn(t,null,r,n):$e(e,t,r,n),t.child;case 11:return r=t.type,l=t.pendingProps,l=t.elementType===r?l:rt(r,l),Eu(e,t,r,l,n);case 7:return $e(e,t,t.pendingProps,n),t.child;case 8:return $e(e,t,t.pendingProps.children,n),t.child;case 12:return $e(e,t,t.pendingProps.children,n),t.child;case 10:e:{if(r=t.type._context,l=t.pendingProps,o=t.memoizedProps,i=l.value,ne(to,r._currentValue),r._currentValue=i,o!==null)if(at(o.value,i)){if(o.children===l.children&&!Ue.current){t=Lt(e,t,n);break e}}else for(o=t.child,o!==null&&(o.return=t);o!==null;){var a=o.dependencies;if(a!==null){i=o.child;for(var s=a.firstContext;s!==null;){if(s.context===r){if(o.tag===1){s=Et(-1,n&-n),s.tag=2;var u=o.updateQueue;if(u!==null){u=u.shared;var f=u.pending;f===null?s.next=s:(s.next=f.next,f.next=s),u.pending=s}}o.lanes|=n,s=o.alternate,s!==null&&(s.lanes|=n),Ji(o.return,n,t),a.lanes|=n;break}s=s.next}}else if(o.tag===10)i=o.type===t.type?null:o.child;else if(o.tag===18){if(i=o.return,i===null)throw Error(_(341));i.lanes|=n,a=i.alternate,a!==null&&(a.lanes|=n),Ji(i,n,t),i=o.sibling}else i=o.child;if(i!==null)i.return=o;else for(i=o;i!==null;){if(i===t){i=null;break}if(o=i.sibling,o!==null){o.return=i.return,i=o;break}i=i.return}o=i}$e(e,t,l.children,n),t=t.child}return t;case 9:return l=t.type,r=t.pendingProps.children,Kn(t,n),l=et(l),r=r(l),t.flags|=1,$e(e,t,r,n),t.child;case 14:return r=t.type,l=rt(r,t.pendingProps),l=rt(r.type,l),Cu(e,t,r,l,n);case 15:return Bf(e,t,t.type,t.pendingProps,n);case 17:return r=t.type,l=t.pendingProps,l=t.elementType===r?l:rt(r,l),Il(e,t),t.tag=1,He(r)?(e=!0,Zl(t)):e=!1,Kn(t,n),Af(t,r,l),Xi(t,r,l,n),qi(null,t,r,!0,e,n);case 19:return Yf(e,t,n);case 22:return Wf(e,t,n)}throw Error(_(156,t.tag))};function ud(e,t){return Ic(e,t)}function Em(e,t,n,r){this.tag=e,this.key=n,this.sibling=this.child=this.return=this.stateNode=this.type=this.elementType=null,this.index=0,this.ref=null,this.pendingProps=t,this.dependencies=this.memoizedState=this.updateQueue=this.memoizedProps=null,this.mode=r,this.subtreeFlags=this.flags=0,this.deletions=null,this.childLanes=this.lanes=0,this.alternate=null}function Ze(e,t,n,r){return new Em(e,t,n,r)}function la(e){return e=e.prototype,!(!e||!e.isReactComponent)}function Cm(e){if(typeof e=="function")return la(e)?1:0;if(e!=null){if(e=e.$$typeof,e===Es)return 11;if(e===Cs)return 14}return 2}function Gt(e,t){var n=e.alternate;return n===null?(n=Ze(e.tag,t,e.key,e.mode),n.elementType=e.elementType,n.type=e.type,n.stateNode=e.stateNode,n.alternate=e,e.alternate=n):(n.pendingProps=t,n.type=e.type,n.flags=0,n.subtreeFlags=0,n.deletions=null),n.flags=e.flags&14680064,n.childLanes=e.childLanes,n.lanes=e.lanes,n.child=e.child,n.memoizedProps=e.memoizedProps,n.memoizedState=e.memoizedState,n.updateQueue=e.updateQueue,t=e.dependencies,n.dependencies=t===null?null:{lanes:t.lanes,firstContext:t.firstContext},n.sibling=e.sibling,n.index=e.index,n.ref=e.ref,n}function Dl(e,t,n,r,l,o){var i=2;if(r=e,typeof e=="function")la(e)&&(i=1);else if(typeof e=="string")i=5;else e:switch(e){case Tn:return dn(n.children,l,o,t);case ks:i=8,l|=8;break;case Si:return e=Ze(12,n,t,l|2),e.elementType=Si,e.lanes=o,e;case xi:return e=Ze(13,n,t,l),e.elementType=xi,e.lanes=o,e;case ki:return e=Ze(19,n,t,l),e.elementType=ki,e.lanes=o,e;case yc:return No(n,l,o,t);default:if(typeof e=="object"&&e!==null)switch(e.$$typeof){case gc:i=10;break e;case vc:i=9;break e;case Es:i=11;break e;case Cs:i=14;break e;case Ot:i=16,r=null;break e}throw Error(_(130,e==null?e:typeof e,""))}return t=Ze(i,n,t,l),t.elementType=e,t.type=r,t.lanes=o,t}function dn(e,t,n,r){return e=Ze(7,e,r,t),e.lanes=n,e}function No(e,t,n,r){return e=Ze(22,e,r,t),e.elementType=yc,e.lanes=n,e.stateNode={isHidden:!1},e}function hi(e,t,n){return e=Ze(6,e,null,t),e.lanes=n,e}function mi(e,t,n){return t=Ze(4,e.children!==null?e.children:[],e.key,t),t.lanes=n,t.stateNode={containerInfo:e.containerInfo,pendingChildren:null,implementation:e.implementation},t}function Nm(e,t,n,r,l){this.tag=t,this.containerInfo=e,this.finishedWork=this.pingCache=this.current=this.pendingChildren=null,this.timeoutHandle=-1,this.callbackNode=this.pendingContext=this.context=null,this.callbackPriority=0,this.eventTimes=Go(0),this.expirationTimes=Go(-1),this.entangledLanes=this.finishedLanes=this.mutableReadLanes=this.expiredLanes=this.pingedLanes=this.suspendedLanes=this.pendingLanes=0,this.entanglements=Go(0),this.identifierPrefix=r,this.onRecoverableError=l,this.mutableSourceEagerHydrationData=null}function oa(e,t,n,r,l,o,i,a,s){return e=new Nm(e,t,n,a,s),t===1?(t=1,o===!0&&(t|=8)):t=0,o=Ze(3,null,null,t),e.current=o,o.stateNode=e,o.memoizedState={element:r,isDehydrated:n,cache:null,transitions:null,pendingSuspenseBoundaries:null},Bs(o),e}function _m(e,t,n){var r=3"u"||typeof __REACT_DEVTOOLS_GLOBAL_HOOK__.checkDCE!="function"))try{__REACT_DEVTOOLS_GLOBAL_HOOK__.checkDCE(pd)}catch(e){console.error(e)}}pd(),dc.exports=Ye;var hd=dc.exports,md,Du=hd;md=Du.createRoot,Du.hydrateRoot;/** + * react-router v7.8.2 + * + * Copyright (c) Remix Software Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE.md file in the root directory of this source tree. + * + * @license MIT + */var Au="popstate";function Tm(e={}){function t(l,o){let{pathname:i="/",search:a="",hash:s=""}=Sn(l.location.hash.substring(1));return!i.startsWith("/")&&!i.startsWith(".")&&(i="/"+i),fs("",{pathname:i,search:a,hash:s},o.state&&o.state.usr||null,o.state&&o.state.key||"default")}function n(l,o){let i=l.document.querySelector("base"),a="";if(i&&i.getAttribute("href")){let s=l.location.href,u=s.indexOf("#");a=u===-1?s:s.slice(0,u)}return a+"#"+(typeof o=="string"?o:Qr(o))}function r(l,o){ut(l.pathname.charAt(0)==="/",`relative pathnames are not supported in hash history.push(${JSON.stringify(o)})`)}return Mm(t,n,r,e)}function ae(e,t){if(e===!1||e===null||typeof e>"u")throw new Error(t)}function ut(e,t){if(!e){typeof console<"u"&&console.warn(t);try{throw new Error(t)}catch{}}}function $m(){return Math.random().toString(36).substring(2,10)}function Uu(e,t){return{usr:e.state,key:e.key,idx:t}}function fs(e,t,n=null,r){return{pathname:typeof e=="string"?e:e.pathname,search:"",hash:"",...typeof t=="string"?Sn(t):t,state:n,key:t&&t.key||r||$m()}}function Qr({pathname:e="/",search:t="",hash:n=""}){return t&&t!=="?"&&(e+=t.charAt(0)==="?"?t:"?"+t),n&&n!=="#"&&(e+=n.charAt(0)==="#"?n:"#"+n),e}function Sn(e){let t={};if(e){let n=e.indexOf("#");n>=0&&(t.hash=e.substring(n),e=e.substring(0,n));let r=e.indexOf("?");r>=0&&(t.search=e.substring(r),e=e.substring(0,r)),e&&(t.pathname=e)}return t}function Mm(e,t,n,r={}){let{window:l=document.defaultView,v5Compat:o=!1}=r,i=l.history,a="POP",s=null,u=f();u==null&&(u=0,i.replaceState({...i.state,idx:u},""));function f(){return(i.state||{idx:null}).idx}function m(){a="POP";let N=f(),h=N==null?null:N-u;u=N,s&&s({action:a,location:E.location,delta:h})}function v(N,h){a="PUSH";let c=fs(E.location,N,h);n&&n(c,N),u=f()+1;let d=Uu(c,u),x=E.createHref(c);try{i.pushState(d,"",x)}catch(P){if(P instanceof DOMException&&P.name==="DataCloneError")throw P;l.location.assign(x)}o&&s&&s({action:a,location:E.location,delta:1})}function w(N,h){a="REPLACE";let c=fs(E.location,N,h);n&&n(c,N),u=f();let d=Uu(c,u),x=E.createHref(c);i.replaceState(d,"",x),o&&s&&s({action:a,location:E.location,delta:0})}function S(N){return zm(N)}let E={get action(){return a},get location(){return e(l,i)},listen(N){if(s)throw new Error("A history only accepts one active listener");return l.addEventListener(Au,m),s=N,()=>{l.removeEventListener(Au,m),s=null}},createHref(N){return t(l,N)},createURL:S,encodeLocation(N){let h=S(N);return{pathname:h.pathname,search:h.search,hash:h.hash}},push:v,replace:w,go(N){return i.go(N)}};return E}function zm(e,t=!1){let n="http://localhost";typeof window<"u"&&(n=window.location.origin!=="null"?window.location.origin:window.location.href),ae(n,"No window.location.(origin|href) available to create URL");let r=typeof e=="string"?e:Qr(e);return r=r.replace(/ $/,"%20"),!t&&r.startsWith("//")&&(r=n+r),new URL(r,n)}function gd(e,t,n="/"){return Im(e,t,n,!1)}function Im(e,t,n,r){let l=typeof t=="string"?Sn(t):t,o=Rt(l.pathname||"/",n);if(o==null)return null;let i=vd(e);Om(i);let a=null;for(let s=0;a==null&&s{let f={relativePath:u===void 0?i.path||"":u,caseSensitive:i.caseSensitive===!0,childrenIndex:a,route:i};if(f.relativePath.startsWith("/")){if(!f.relativePath.startsWith(r)&&s)return;ae(f.relativePath.startsWith(r),`Absolute route path "${f.relativePath}" nested under path "${r}" is not valid. An absolute child route path must start with the combined path of all its parent routes.`),f.relativePath=f.relativePath.slice(r.length)}let m=Ct([r,f.relativePath]),v=n.concat(f);i.children&&i.children.length>0&&(ae(i.index!==!0,`Index routes must not have child routes. Please remove all child routes from route path "${m}".`),vd(i.children,t,v,m,s)),!(i.path==null&&!i.index)&&t.push({path:m,score:Wm(m,i.index),routesMeta:v})};return e.forEach((i,a)=>{var s;if(i.path===""||!((s=i.path)!=null&&s.includes("?")))o(i,a);else for(let u of yd(i.path))o(i,a,!0,u)}),t}function yd(e){let t=e.split("/");if(t.length===0)return[];let[n,...r]=t,l=n.endsWith("?"),o=n.replace(/\?$/,"");if(r.length===0)return l?[o,""]:[o];let i=yd(r.join("/")),a=[];return a.push(...i.map(s=>s===""?o:[o,s].join("/"))),l&&a.push(...i),a.map(s=>e.startsWith("/")&&s===""?"/":s)}function Om(e){e.sort((t,n)=>t.score!==n.score?n.score-t.score:Vm(t.routesMeta.map(r=>r.childrenIndex),n.routesMeta.map(r=>r.childrenIndex)))}var Fm=/^:[\w-]+$/,Dm=3,Am=2,Um=1,Hm=10,Bm=-2,Hu=e=>e==="*";function Wm(e,t){let n=e.split("/"),r=n.length;return n.some(Hu)&&(r+=Bm),t&&(r+=Am),n.filter(l=>!Hu(l)).reduce((l,o)=>l+(Fm.test(o)?Dm:o===""?Um:Hm),r)}function Vm(e,t){return e.length===t.length&&e.slice(0,-1).every((r,l)=>r===t[l])?e[e.length-1]-t[t.length-1]:0}function Qm(e,t,n=!1){let{routesMeta:r}=e,l={},o="/",i=[];for(let a=0;a{if(f==="*"){let S=a[v]||"";i=o.slice(0,o.length-S.length).replace(/(.)\/+$/,"$1")}const w=a[v];return m&&!w?u[f]=void 0:u[f]=(w||"").replace(/%2F/g,"/"),u},{}),pathname:o,pathnameBase:i,pattern:e}}function Km(e,t=!1,n=!0){ut(e==="*"||!e.endsWith("*")||e.endsWith("/*"),`Route path "${e}" will be treated as if it were "${e.replace(/\*$/,"/*")}" because the \`*\` character must always follow a \`/\` in the pattern. To get rid of this warning, please change the route path to "${e.replace(/\*$/,"/*")}".`);let r=[],l="^"+e.replace(/\/*\*?$/,"").replace(/^\/*/,"/").replace(/[\\.*+^${}|()[\]]/g,"\\$&").replace(/\/:([\w-]+)(\?)?/g,(i,a,s)=>(r.push({paramName:a,isOptional:s!=null}),s?"/?([^\\/]+)?":"/([^\\/]+)")).replace(/\/([\w-]+)\?(\/|$)/g,"(/$1)?$2");return e.endsWith("*")?(r.push({paramName:"*"}),l+=e==="*"||e==="/*"?"(.*)$":"(?:\\/(.+)|\\/*)$"):n?l+="\\/*$":e!==""&&e!=="/"&&(l+="(?:(?=\\/|$))"),[new RegExp(l,t?void 0:"i"),r]}function Ym(e){try{return e.split("/").map(t=>decodeURIComponent(t).replace(/\//g,"%2F")).join("/")}catch(t){return ut(!1,`The URL path "${e}" could not be decoded because it is a malformed URL segment. This is probably due to a bad percent encoding (${t}).`),e}}function Rt(e,t){if(t==="/")return e;if(!e.toLowerCase().startsWith(t.toLowerCase()))return null;let n=t.endsWith("/")?t.length-1:t.length,r=e.charAt(n);return r&&r!=="/"?null:e.slice(n)||"/"}function Jm(e,t="/"){let{pathname:n,search:r="",hash:l=""}=typeof e=="string"?Sn(e):e;return{pathname:n?n.startsWith("/")?n:Gm(n,t):t,search:Zm(r),hash:qm(l)}}function Gm(e,t){let n=t.replace(/\/+$/,"").split("/");return e.split("/").forEach(l=>{l===".."?n.length>1&&n.pop():l!=="."&&n.push(l)}),n.length>1?n.join("/"):"/"}function gi(e,t,n,r){return`Cannot include a '${e}' character in a manually specified \`to.${t}\` field [${JSON.stringify(r)}]. Please separate it out to the \`to.${n}\` field. Alternatively you may provide the full path as a string in and the router will parse it for you.`}function Xm(e){return e.filter((t,n)=>n===0||t.route.path&&t.route.path.length>0)}function wd(e){let t=Xm(e);return t.map((n,r)=>r===t.length-1?n.pathname:n.pathnameBase)}function Sd(e,t,n,r=!1){let l;typeof e=="string"?l=Sn(e):(l={...e},ae(!l.pathname||!l.pathname.includes("?"),gi("?","pathname","search",l)),ae(!l.pathname||!l.pathname.includes("#"),gi("#","pathname","hash",l)),ae(!l.search||!l.search.includes("#"),gi("#","search","hash",l)));let o=e===""||l.pathname==="",i=o?"/":l.pathname,a;if(i==null)a=n;else{let m=t.length-1;if(!r&&i.startsWith("..")){let v=i.split("/");for(;v[0]==="..";)v.shift(),m-=1;l.pathname=v.join("/")}a=m>=0?t[m]:"/"}let s=Jm(l,a),u=i&&i!=="/"&&i.endsWith("/"),f=(o||i===".")&&n.endsWith("/");return!s.pathname.endsWith("/")&&(u||f)&&(s.pathname+="/"),s}var Ct=e=>e.join("/").replace(/\/\/+/g,"/"),bm=e=>e.replace(/\/+$/,"").replace(/^\/*/,"/"),Zm=e=>!e||e==="?"?"":e.startsWith("?")?e:"?"+e,qm=e=>!e||e==="#"?"":e.startsWith("#")?e:"#"+e;function eg(e){return e!=null&&typeof e.status=="number"&&typeof e.statusText=="string"&&typeof e.internal=="boolean"&&"data"in e}var xd=["POST","PUT","PATCH","DELETE"];new Set(xd);var tg=["GET",...xd];new Set(tg);var lr=g.createContext(null);lr.displayName="DataRouter";var jo=g.createContext(null);jo.displayName="DataRouterState";g.createContext(!1);var kd=g.createContext({isTransitioning:!1});kd.displayName="ViewTransition";var ng=g.createContext(new Map);ng.displayName="Fetchers";var rg=g.createContext(null);rg.displayName="Await";var mt=g.createContext(null);mt.displayName="Navigation";var Zr=g.createContext(null);Zr.displayName="Location";var Tt=g.createContext({outlet:null,matches:[],isDataRoute:!1});Tt.displayName="Route";var ua=g.createContext(null);ua.displayName="RouteError";function lg(e,{relative:t}={}){ae(qr(),"useHref() may be used only in the context of a component.");let{basename:n,navigator:r}=g.useContext(mt),{hash:l,pathname:o,search:i}=el(e,{relative:t}),a=o;return n!=="/"&&(a=o==="/"?n:Ct([n,o])),r.createHref({pathname:a,search:i,hash:l})}function qr(){return g.useContext(Zr)!=null}function xn(){return ae(qr(),"useLocation() may be used only in the context of a component."),g.useContext(Zr).location}var Ed="You should call navigate() in a React.useEffect(), not when your component is first rendered.";function Cd(e){g.useContext(mt).static||g.useLayoutEffect(e)}function og(){let{isDataRoute:e}=g.useContext(Tt);return e?yg():ig()}function ig(){ae(qr(),"useNavigate() may be used only in the context of a component.");let e=g.useContext(lr),{basename:t,navigator:n}=g.useContext(mt),{matches:r}=g.useContext(Tt),{pathname:l}=xn(),o=JSON.stringify(wd(r)),i=g.useRef(!1);return Cd(()=>{i.current=!0}),g.useCallback((s,u={})=>{if(ut(i.current,Ed),!i.current)return;if(typeof s=="number"){n.go(s);return}let f=Sd(s,JSON.parse(o),l,u.relative==="path");e==null&&t!=="/"&&(f.pathname=f.pathname==="/"?t:Ct([t,f.pathname])),(u.replace?n.replace:n.push)(f,u.state,u)},[t,n,o,l,e])}g.createContext(null);function el(e,{relative:t}={}){let{matches:n}=g.useContext(Tt),{pathname:r}=xn(),l=JSON.stringify(wd(n));return g.useMemo(()=>Sd(e,JSON.parse(l),r,t==="path"),[e,l,r,t])}function sg(e,t){return Nd(e,t)}function Nd(e,t,n,r,l){var c;ae(qr(),"useRoutes() may be used only in the context of a component.");let{navigator:o}=g.useContext(mt),{matches:i}=g.useContext(Tt),a=i[i.length-1],s=a?a.params:{},u=a?a.pathname:"/",f=a?a.pathnameBase:"/",m=a&&a.route;{let d=m&&m.path||"";_d(u,!m||d.endsWith("*")||d.endsWith("*?"),`You rendered descendant (or called \`useRoutes()\`) at "${u}" (under ) but the parent route path has no trailing "*". This means if you navigate deeper, the parent won't match anymore and therefore the child routes will never render. + +Please change the parent to .`)}let v=xn(),w;if(t){let d=typeof t=="string"?Sn(t):t;ae(f==="/"||((c=d.pathname)==null?void 0:c.startsWith(f)),`When overriding the location using \`\` or \`useRoutes(routes, location)\`, the location pathname must begin with the portion of the URL pathname that was matched by all parent routes. The current pathname base is "${f}" but pathname "${d.pathname}" was given in the \`location\` prop.`),w=d}else w=v;let S=w.pathname||"/",E=S;if(f!=="/"){let d=f.replace(/^\//,"").split("/");E="/"+S.replace(/^\//,"").split("/").slice(d.length).join("/")}let N=gd(e,{pathname:E});ut(m||N!=null,`No routes matched location "${w.pathname}${w.search}${w.hash}" `),ut(N==null||N[N.length-1].route.element!==void 0||N[N.length-1].route.Component!==void 0||N[N.length-1].route.lazy!==void 0,`Matched leaf route at location "${w.pathname}${w.search}${w.hash}" does not have an element or Component. This means it will render an with a null value by default resulting in an "empty" page.`);let h=dg(N&&N.map(d=>Object.assign({},d,{params:Object.assign({},s,d.params),pathname:Ct([f,o.encodeLocation?o.encodeLocation(d.pathname).pathname:d.pathname]),pathnameBase:d.pathnameBase==="/"?f:Ct([f,o.encodeLocation?o.encodeLocation(d.pathnameBase).pathname:d.pathnameBase])})),i,n,r,l);return t&&h?g.createElement(Zr.Provider,{value:{location:{pathname:"/",search:"",hash:"",state:null,key:"default",...w},navigationType:"POP"}},h):h}function ag(){let e=vg(),t=eg(e)?`${e.status} ${e.statusText}`:e instanceof Error?e.message:JSON.stringify(e),n=e instanceof Error?e.stack:null,r="rgba(200,200,200, 0.5)",l={padding:"0.5rem",backgroundColor:r},o={padding:"2px 4px",backgroundColor:r},i=null;return console.error("Error handled by React Router default ErrorBoundary:",e),i=g.createElement(g.Fragment,null,g.createElement("p",null,"💿 Hey developer 👋"),g.createElement("p",null,"You can provide a way better UX than this when your app throws errors by providing your own ",g.createElement("code",{style:o},"ErrorBoundary")," or"," ",g.createElement("code",{style:o},"errorElement")," prop on your route.")),g.createElement(g.Fragment,null,g.createElement("h2",null,"Unexpected Application Error!"),g.createElement("h3",{style:{fontStyle:"italic"}},t),n?g.createElement("pre",{style:l},n):null,i)}var ug=g.createElement(ag,null),cg=class extends g.Component{constructor(e){super(e),this.state={location:e.location,revalidation:e.revalidation,error:e.error}}static getDerivedStateFromError(e){return{error:e}}static getDerivedStateFromProps(e,t){return t.location!==e.location||t.revalidation!=="idle"&&e.revalidation==="idle"?{error:e.error,location:e.location,revalidation:e.revalidation}:{error:e.error!==void 0?e.error:t.error,location:t.location,revalidation:e.revalidation||t.revalidation}}componentDidCatch(e,t){this.props.unstable_onError?this.props.unstable_onError(e,t):console.error("React Router caught the following error during render",e)}render(){return this.state.error!==void 0?g.createElement(Tt.Provider,{value:this.props.routeContext},g.createElement(ua.Provider,{value:this.state.error,children:this.props.component})):this.props.children}};function fg({routeContext:e,match:t,children:n}){let r=g.useContext(lr);return r&&r.static&&r.staticContext&&(t.route.errorElement||t.route.ErrorBoundary)&&(r.staticContext._deepestRenderedBoundaryId=t.route.id),g.createElement(Tt.Provider,{value:e},n)}function dg(e,t=[],n=null,r=null,l=null){if(e==null){if(!n)return null;if(n.errors)e=n.matches;else if(t.length===0&&!n.initialized&&n.matches.length>0)e=n.matches;else return null}let o=e,i=n==null?void 0:n.errors;if(i!=null){let u=o.findIndex(f=>f.route.id&&(i==null?void 0:i[f.route.id])!==void 0);ae(u>=0,`Could not find a matching route for errors on route IDs: ${Object.keys(i).join(",")}`),o=o.slice(0,Math.min(o.length,u+1))}let a=!1,s=-1;if(n)for(let u=0;u=0?o=o.slice(0,s+1):o=[o[0]];break}}}return o.reduceRight((u,f,m)=>{let v,w=!1,S=null,E=null;n&&(v=i&&f.route.id?i[f.route.id]:void 0,S=f.route.errorElement||ug,a&&(s<0&&m===0?(_d("route-fallback",!1,"No `HydrateFallback` element provided to render during initial hydration"),w=!0,E=null):s===m&&(w=!0,E=f.route.hydrateFallbackElement||null)));let N=t.concat(o.slice(0,m+1)),h=()=>{let c;return v?c=S:w?c=E:f.route.Component?c=g.createElement(f.route.Component,null):f.route.element?c=f.route.element:c=u,g.createElement(fg,{match:f,routeContext:{outlet:u,matches:N,isDataRoute:n!=null},children:c})};return n&&(f.route.ErrorBoundary||f.route.errorElement||m===0)?g.createElement(cg,{location:n.location,revalidation:n.revalidation,component:S,error:v,children:h(),routeContext:{outlet:null,matches:N,isDataRoute:!0},unstable_onError:r}):h()},null)}function ca(e){return`${e} must be used within a data router. See https://reactrouter.com/en/main/routers/picking-a-router.`}function pg(e){let t=g.useContext(lr);return ae(t,ca(e)),t}function hg(e){let t=g.useContext(jo);return ae(t,ca(e)),t}function mg(e){let t=g.useContext(Tt);return ae(t,ca(e)),t}function fa(e){let t=mg(e),n=t.matches[t.matches.length-1];return ae(n.route.id,`${e} can only be used on routes that contain a unique "id"`),n.route.id}function gg(){return fa("useRouteId")}function vg(){var r;let e=g.useContext(ua),t=hg("useRouteError"),n=fa("useRouteError");return e!==void 0?e:(r=t.errors)==null?void 0:r[n]}function yg(){let{router:e}=pg("useNavigate"),t=fa("useNavigate"),n=g.useRef(!1);return Cd(()=>{n.current=!0}),g.useCallback(async(l,o={})=>{ut(n.current,Ed),n.current&&(typeof l=="number"?e.navigate(l):await e.navigate(l,{fromRouteId:t,...o}))},[e,t])}var Bu={};function _d(e,t,n){!t&&!Bu[e]&&(Bu[e]=!0,ut(!1,n))}g.memo(wg);function wg({routes:e,future:t,state:n,unstable_onError:r}){return Nd(e,void 0,n,r,t)}function Pd(e){ae(!1,"A is only ever to be used as the child of element, never rendered directly. Please wrap your in a .")}function Sg({basename:e="/",children:t=null,location:n,navigationType:r="POP",navigator:l,static:o=!1}){ae(!qr(),"You cannot render a inside another . You should never have more than one in your app.");let i=e.replace(/^\/*/,"/"),a=g.useMemo(()=>({basename:i,navigator:l,static:o,future:{}}),[i,l,o]);typeof n=="string"&&(n=Sn(n));let{pathname:s="/",search:u="",hash:f="",state:m=null,key:v="default"}=n,w=g.useMemo(()=>{let S=Rt(s,i);return S==null?null:{location:{pathname:S,search:u,hash:f,state:m,key:v},navigationType:r}},[i,s,u,f,m,v,r]);return ut(w!=null,` is not able to match the URL "${s}${u}${f}" because it does not start with the basename, so the won't render anything.`),w==null?null:g.createElement(mt.Provider,{value:a},g.createElement(Zr.Provider,{children:t,value:w}))}function xg({children:e,location:t}){return sg(ds(e),t)}function ds(e,t=[]){let n=[];return g.Children.forEach(e,(r,l)=>{if(!g.isValidElement(r))return;let o=[...t,l];if(r.type===g.Fragment){n.push.apply(n,ds(r.props.children,o));return}ae(r.type===Pd,`[${typeof r.type=="string"?r.type:r.type.name}] is not a component. All component children of must be a or `),ae(!r.props.index||!r.props.children,"An index route cannot have child routes.");let i={id:r.props.id||o.join("-"),caseSensitive:r.props.caseSensitive,element:r.props.element,Component:r.props.Component,index:r.props.index,path:r.props.path,loader:r.props.loader,action:r.props.action,hydrateFallbackElement:r.props.hydrateFallbackElement,HydrateFallback:r.props.HydrateFallback,errorElement:r.props.errorElement,ErrorBoundary:r.props.ErrorBoundary,hasErrorBoundary:r.props.hasErrorBoundary===!0||r.props.ErrorBoundary!=null||r.props.errorElement!=null,shouldRevalidate:r.props.shouldRevalidate,handle:r.props.handle,lazy:r.props.lazy};r.props.children&&(i.children=ds(r.props.children,o)),n.push(i)}),n}var Al="get",Ul="application/x-www-form-urlencoded";function To(e){return e!=null&&typeof e.tagName=="string"}function kg(e){return To(e)&&e.tagName.toLowerCase()==="button"}function Eg(e){return To(e)&&e.tagName.toLowerCase()==="form"}function Cg(e){return To(e)&&e.tagName.toLowerCase()==="input"}function Ng(e){return!!(e.metaKey||e.altKey||e.ctrlKey||e.shiftKey)}function _g(e,t){return e.button===0&&(!t||t==="_self")&&!Ng(e)}var _l=null;function Pg(){if(_l===null)try{new FormData(document.createElement("form"),0),_l=!1}catch{_l=!0}return _l}var Lg=new Set(["application/x-www-form-urlencoded","multipart/form-data","text/plain"]);function vi(e){return e!=null&&!Lg.has(e)?(ut(!1,`"${e}" is not a valid \`encType\` for \`
\`/\`\` and will default to "${Ul}"`),null):e}function Rg(e,t){let n,r,l,o,i;if(Eg(e)){let a=e.getAttribute("action");r=a?Rt(a,t):null,n=e.getAttribute("method")||Al,l=vi(e.getAttribute("enctype"))||Ul,o=new FormData(e)}else if(kg(e)||Cg(e)&&(e.type==="submit"||e.type==="image")){let a=e.form;if(a==null)throw new Error('Cannot submit a `,S=`
${v}
`;return`
${w}${S}
`}),i=i.replace(/
\s*(?=
]*>[\s\S]*?<\/div>)\s*
/g,"$1"),i}function _v(e){const t=e.split(/\r?\n/);let n=null;for(let r=0;r/i,n=/<\/think(?:ing)?>/i,r=e.match(t);if(!r)return{think:null,answer:e};const l=r.index,o=r[0].length,i=e.substring(0,l).trim();let a=e.substring(l+o);const s=a.match(n);let u=null,f=i;return s?(u=a.substring(0,s.index).trim(),f+=a.substring(s.index+s[0].length)):u=a.trim(),{think:u||null,answer:f.trim()}}function Pv({content:e,streamOutput:t,sources:n}){const{think:r,answer:l}=Od(e||""),[o,i]=Te.useState(!1),a=!!r;return y.jsxs("div",{className:"assistant-message",children:[a&&y.jsxs("div",{className:"assistant-thoughts",children:[y.jsxs("button",{className:"think-toggle",onClick:()=>i(s=>!s),"aria-expanded":o?"true":"false","aria-controls":"think-content",children:[y.jsx("span",{className:"think-toggle-icon","aria-hidden":"true",children:o?"▾":"▸"}),"Thoughts"]}),o&&y.jsx("div",{id:"think-content",className:"think-content",dangerouslySetInnerHTML:{__html:Zu(r)}})]}),y.jsx("div",{className:"msg-content",dangerouslySetInnerHTML:{__html:Zu(l||e||"")}}),Array.isArray(n)&&n.length>0&&y.jsx("div",{className:"msg-sources chips",children:n.map((s,u)=>{let f=s,m=!1;try{const v=new URL(s);if(v.protocol==="file:"){m=!0;const w=v.pathname.split("/").filter(Boolean);f=decodeURIComponent(w[w.length-1]||s)}else f=(v.hostname||s).replace(/^www\./i,"")}catch{}return y.jsx("a",{className:"chip",href:s,target:"_blank",rel:"noreferrer",title:s,onClick:v=>{var w,S;if(m){v.preventDefault();try{const E=new URL(s);(S=(w=window.electronAPI)==null?void 0:w.openPath)==null||S.call(w,decodeURIComponent(E.pathname))}catch{}}},children:f},s+u)})})]})}const qu="websearch.searxUrl",ec="websearch.engines",yi="chat.librarySlug";let Lv="http://127.0.0.1:8000";const Rv=48,jv=24;function Tv(){var Ra;const[e,t]=g.useState([]),[n,r]=g.useState(null),[l,o]=g.useState("chats"),[i,a]=g.useState("General"),[s,u]=g.useState(null),[f,m]=g.useState([]),[v,w]=g.useState([]),[S,E]=g.useState(null),[N,h]=g.useState(localStorage.getItem(yi)||null),[c,d]=g.useState(!1),[x,P]=g.useState(""),[T,j]=g.useState(""),[M,B]=g.useState(""),[O,pe]=g.useState(""),ve=g.useRef(null),Oe=g.useRef(null),[J,kn]=g.useState(Lv),[gt,$t]=g.useState("Default"),[$,A]=g.useState(!1),[U,te]=g.useState(localStorage.getItem(qu)||"http://localhost:8888"),[R,W]=g.useState(()=>{try{const p=localStorage.getItem(ec);if(p)return JSON.parse(p)}catch{}return["duckduckgo","bing","wikipedia","github","stack_overflow"]});g.useEffect(()=>{localStorage.setItem(qu,U||"")},[U]),g.useEffect(()=>{try{localStorage.setItem(ec,JSON.stringify(R||[]))}catch{}},[R]);const[fe,Fe]=g.useState(!1),[Ce,Mt]=g.useState(!1),[zv,$o]=g.useState(!0),[Fd,En]=g.useState([]),[tl,ma]=g.useState({}),[Dd,Mo]=g.useState(null),[zo,nl]=g.useState("");function Ad(p){const k=p.content||"";if(p.role==="assistant")try{const{answer:C}=Od(k);return(C||k).trim()}catch{return k.trim()}return k}async function ga(p){try{await navigator.clipboard.writeText(Ad(p))}catch(k){console.error("Failed to copy message:",k)}}function tn(p,k,C,L={}){const{removeIfEmpty:H=!1}=L;t(K=>K.map(I=>{if(I.session_id!==p)return I;const F=[];for(const G of I.messages||[]){if(G.id!==k){F.push(G);continue}H&&!C||F.push({...G,content:C})}return{...I,messages:F}}))}function nn(p){return(p==null?void 0:p.name)==="AbortError"}function Io(p){return p instanceof Error&&p.message?p.message:String(p)}function Ud(p,k){Mo(p),nl(k||"")}function rl(){Mo(null),nl("")}async function Hd(p){var K;const k=(((K=Pn[p])==null?void 0:K.content)||"").trim(),L=(zo??"").trim();if(L.length===0){rl();return}if(L===k){rl();return}const H=n;if(H){t(I=>I.map(F=>{if(F.session_id!==H)return F;const Se=(F.messages||[]).slice(0,p+1).map((V,Y)=>Y===p?{...V,content:L}:V);return{...F,messages:Se}})),Mo(null),nl(""),requestAnimationFrame(()=>Ho("auto",H));try{const I=await fetch(`${J}/sessions/${H}/messages/${p}`,{method:"PUT",headers:{"Content-Type":"application/json"},body:JSON.stringify({message:L})});if(!I.ok)throw new Error(`HTTP ${I.status}`)}catch(I){console.error("Failed to update message:",I),t(F=>F.map(G=>{if(G.session_id!==H)return G;const V=(G.messages||[]).map((Y,D)=>D===p?{...Y,content:k}:Y);return{...G,messages:V}}));return}await va(p,L)}}async function va(p,k=null){var G,Se,V,Y;const C=n;if(Ce||!C||typeof p!="number")return;const L=((G=e.find(D=>D.session_id===C))==null?void 0:G.messages)||[];let H=p;for(let D=p;D>=0;D--)if(((Se=L[D])==null?void 0:Se.role)==="user"){H=D;break}t(D=>D.map(b=>b.session_id===C?{...b,messages:(b.messages||[]).slice(0,H+1)}:b));const K=Ea(C);let I=null,F=[];try{if(fe)try{const D=k??(((V=L[H])==null?void 0:V.content)||""),b=L.slice(Math.max(0,H-7),H+1).map(Ge=>({role:Ge.role,content:Ge.content||""}));b.length>0&&(b[b.length-1]={role:"user",content:D});const ee=await(await fetch(`${J}/websearch`,{method:"POST",headers:{"Content-Type":"application/json"},signal:K.signal,body:JSON.stringify({prompt:D,model:M,messages:b,history_limit:8,searx_url:U||null,engines:Array.isArray(R)?R:null})})).json();ee&&typeof ee.enriched_prompt=="string"&&(I=ee.enriched_prompt,F=Array.isArray(ee.sources)?ee.sources:[])}catch(D){if(nn(D))throw D;console.warn("web search enrichment (regenerate) failed",D)}if($){const D=`msg-${Date.now()}-${Math.random()}`;let b="";t(Z=>Z.map(ee=>ee.session_id===C?{...ee,messages:[...ee.messages||[],{id:D,role:"assistant",content:"",sources:F}]}:ee));try{const Z=await fetch(`${J}/sessions/${C}/regenerate`,{method:"POST",headers:{"Content-Type":"application/json"},signal:K.signal,body:JSON.stringify({index:p,model:M,stream:!0,enriched_message:I,sources:F||[]})});if(!Z.ok)throw new Error(`HTTP ${Z.status}`);const ee=(Y=Z.body)==null?void 0:Y.getReader();if(!ee)throw new Error("Missing response body");const Ge=new TextDecoder;let al=!1;for(;;){const{value:vt,done:Bo}=await ee.read();if(Bo)break;const yt=Ge.decode(vt,{stream:!0});b+=yt,tn(C,D,b),!al&&Ne.current!==C&&(al=!0,rn(Wo=>({...Wo,[C]:D})),En(Wo=>[...new Set([...Wo,C])]))}Ne.current!==C?(rn(vt=>({...vt,[C]:D})),En(vt=>[...new Set([...vt,C])])):Cn.current[C]?Nn(vt=>({...vt,[C]:D})):requestAnimationFrame(()=>Ln(D,"smooth",C))}catch(Z){if(nn(Z)){tn(C,D,b,{removeIfEmpty:!0});return}console.error(Z),tn(C,D,`Error: ${Io(Z)}`,{removeIfEmpty:!0});return}}else{const D=await fetch(`${J}/sessions/${C}/regenerate`,{method:"POST",headers:{"Content-Type":"application/json"},signal:K.signal,body:JSON.stringify({index:p,model:M,stream:!1,enriched_message:I,sources:F||[]})});if(!D.ok)throw new Error(`HTTP ${D.status}`);const b=await D.json(),Z=`msg-${Date.now()}`;t(ee=>ee.map(Ge=>Ge.session_id===C?{...Ge,messages:[...Ge.messages||[],{role:"assistant",content:b.reply,id:Z,sources:F}]}:Ge)),Ne.current!==C?(rn(ee=>({...ee,[C]:Z})),En(ee=>[...new Set([...ee,C])])):Cn.current[C]?Nn(ee=>({...ee,[C]:Z})):requestAnimationFrame(()=>Ln(Z,"smooth",C))}}catch(D){nn(D)||console.error(D)}finally{Oo(K)}}const[ya,Bd]=g.useState({}),Cn=g.useRef({}),[Wd,rn]=g.useState({}),ln=g.useRef({}),wa=g.useRef({}),[Sa,Nn]=g.useState({}),[Vd,xa]=g.useState({}),ka=(p,k,C)=>p!=null&&p.id?p.id:`${C}:${k}`;g.useEffect(()=>{var k;if(!n)return;const p=((k=e.find(C=>C.session_id===n))==null?void 0:k.messages)||[];xa(C=>{const L={};return p.forEach((H,K)=>{if(H.role!=="user")return;const I=ka(H,K,n),G=(H.content||"").split(/\r\n|\r|\n/).length>30;L[I]=G?C[I]??!0:!1}),L})},[e,n]);function Qd(p){xa(k=>({...k,[p]:!(k[p]??!0)}))}const or=Te.useCallback((p,k)=>{Bd(C=>{const L={...C,[p]:k};return Cn.current=L,L})},[]),_n=g.useRef(null),Kd=g.useRef(!1),Yd=g.useRef(null),Ne=g.useRef(n);g.useEffect(()=>{Ne.current=n},[n]);const Ea=Te.useCallback(p=>{const k=new AbortController;return _n.current={controller:k,sessionId:p},Mt(!0),k},[]),Oo=Te.useCallback(p=>{var k;((k=_n.current)==null?void 0:k.controller)===p&&(_n.current=null,Mt(!1))},[]),Jd=Te.useCallback(()=>{const p=_n.current;p&&(_n.current=null,p.controller.abort(),Mt(!1))},[]);g.useEffect(()=>()=>{var p;(p=_n.current)==null||p.controller.abort()},[]);const zt=g.useRef(null),[Gd,Xd]=g.useState(230),[ll,Ca]=g.useState(!1),bd=Te.useCallback(p=>{Ca(!0)},[]),Fo=Te.useCallback(()=>{Ca(!1)},[]),Do=Te.useCallback(p=>{if(ll){const k=Math.max(230,Math.min(500,p.clientX));Xd(k)}},[ll]);Te.useEffect(()=>(window.addEventListener("mousemove",Do),window.addEventListener("mouseup",Fo),()=>{window.removeEventListener("mousemove",Do),window.removeEventListener("mouseup",Fo)}),[Do,Fo]),Te.useEffect(()=>{ll?document.body.classList.add("no-select"):document.body.classList.remove("no-select")},[ll]),Te.useEffect(()=>{const p=async k=>{const C=k.target.closest(".codeblock__copy");if(!C)return;const L=C.closest(".codeblock"),H=L==null?void 0:L.querySelector("pre > code");if(H)try{await navigator.clipboard.writeText(H.textContent||""),C.classList.add("copied"),setTimeout(()=>C.classList.remove("copied"),800)}catch(K){console.error("Copy failed:",K)}};return document.addEventListener("click",p),()=>document.removeEventListener("click",p)},[]),g.useEffect(()=>{window.electronAPI.getSettings().then(k=>{kn(k.ollamaApiUrl),$t(k.colorScheme||"Default"),B(k.chatModel||""),A(k.streamOutput||!1),ma(k.scrollPositions||{}),Kr(k.colorScheme||"Default")});const p=()=>{var k;l==="chats"&&((k=Oe.current)==null||k.focus())};return window.electronAPI.onWindowFocus(p),()=>{}},[l]),g.useEffect(()=>{Kr(gt)},[gt]);const Zd=p=>{!p||!J||fetch(`${J}/history?session_id=${encodeURIComponent(p)}`).then(k=>k.json()).then(k=>{t(C=>C.map(L=>L.session_id===p?{...L,messages:k.messages||[]}:L))}).catch(()=>{})};async function ol(){if(J)try{const k=await(await fetch(`${J}/libraries`)).json(),C=Array.isArray(k.libraries)?k.libraries:[];if(m(C),C.length===0){E(null),h(null);return}C.some(L=>L.slug===S)||E(C[0].slug),N&&!C.some(L=>L.slug===N)&&h(null)}catch(p){console.warn("Failed to load libraries",p)}}async function Ao(){if(J)try{const k=await(await fetch(`${J}/jobs`)).json();w(Array.isArray(k.jobs)?k.jobs:[])}catch(p){console.warn("Failed to load library jobs",p)}}async function Na(p=null){const C=(typeof p=="string"?p:x).trim();if(!C){j("Name is required.");return}try{j("");const L=await fetch(`${J}/libraries`,{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({name:C})});if(!L.ok){const K=await L.text();throw new Error(K||`HTTP ${L.status}`)}const H=await L.json();d(!1),P(""),await ol(),H!=null&&H.slug&&E(H.slug)}catch(L){console.error("Failed to create library",L),j(String((L==null?void 0:L.message)||L))}}g.useEffect(()=>{J&&($o(!0),fetch(`${J}/sessions`).then(p=>p.json()).then(p=>{const k=p.sessions.map(C=>({...C,messages:[]}));t(k),k.length>0?r(k[0].session_id):r(null),$o(!1)}).catch(()=>{$o(!1)}))},[J]),g.useEffect(()=>{J&&(ol(),Ao())},[J]),g.useEffect(()=>{try{N?localStorage.setItem(yi,N):localStorage.removeItem(yi)}catch{}},[N]),g.useEffect(()=>{if(!J)return;const p=setInterval(()=>{ol(),Ao()},3e3);return()=>clearInterval(p)},[J,l,S,N]),g.useEffect(()=>{Zd(n)},[n]);const Uo=p=>{o(p)},qd=p=>{np(p)},Pn=g.useMemo(()=>{var p;return((p=e.find(k=>k.session_id===n))==null?void 0:p.messages)||[]},[n,e]),il=g.useMemo(()=>f.find(p=>p.slug===S)||null,[S,f]),sl=g.useMemo(()=>f.find(p=>p.slug===N)||null,[N,f]);g.useEffect(()=>{const p=n,k=l;return()=>{if(k==="chats"&&p){const C=typeof ln.current[p]=="number"?ln.current[p]:ve.current?ve.current.scrollTop:0;ma(L=>{const H={...L,[p]:C};return window.electronAPI.updateSettings({scrollPositions:H}),H})}}},[n,l]),g.useEffect(()=>{const p=ve.current;if(!p)return;const k=()=>{const{scrollTop:C,scrollHeight:L,clientHeight:H}=p,K=L-C-H<=jv;if(n){const I=wa.current[n],F=typeof I=="number"&&Cp.removeEventListener("scroll",k)},[n,or]),g.useEffect(()=>{const p=n;p&&ya[p]===!1&&Nn(k=>{if(!(p in k))return k;const C={...k};return delete C[p],C})},[n,ya]),g.useLayoutEffect(()=>{if(l!=="chats"||!n)return;const p=ve.current;if(!p)return;zt.current=null;const k=()=>{if(zt.current===n)return;const I=typeof ln.current[n]=="number"?ln.current[n]:void 0,F=typeof I=="number"?I:tl[n];if(typeof F=="number"){p.scrollTop=F,zt.current=n;return}Pn.length>0&&(p.scrollTop=p.scrollHeight,zt.current=n)};k();const C=requestAnimationFrame(k),L=()=>{zt.current!==n&&requestAnimationFrame(k)},H=new MutationObserver(L);H.observe(p,{childList:!0,subtree:!0});const K=new ResizeObserver(L);return K.observe(p),()=>{cancelAnimationFrame(C),H.disconnect(),K.disconnect()}},[n,l,Pn.length,tl]),g.useEffect(()=>{if(l!=="chats"||!n||zt.current===n)return;const p=typeof ln.current[n]=="number"?ln.current[n]:void 0;typeof(typeof p=="number"?p:tl[n])!="number"&&Pn.length>0&&requestAnimationFrame(()=>{const C=ve.current;C&&(C.scrollTop=C.scrollHeight,zt.current=n)})},[Pn.length,n,l,tl]);const Ho=(p="smooth",k=null)=>{const C=ve.current;if(!C)return;const L=k??Ne.current;Ne.current===L&&(C.scrollTo({top:C.scrollHeight,behavior:p}),or(L,!1))},Ln=(p,k="auto",C=null)=>{const L=ve.current;if(!L)return;const H=C??Ne.current;if(Ne.current!==H)return;const K=document.getElementById(p);if(K){const I=Math.max(0,K.offsetTop-Rv);L.scrollTo({top:I,behavior:k})}},ep=()=>{const p=Ne.current,k=Sa[p];k&&(Ln(k,"smooth",p),Nn(C=>{const{[p]:L,...H}=C;return H}))};async function _a(){var H,K;if(Ce||!O.trim()||!M)return;let p=n,k=!1;if(p){const I=e.find(F=>F.session_id===p);k=I&&I.name==="New Chat"&&I.messages.length===0}else{const I=await Pa();await new Promise(F=>setTimeout(F,200)),p=I.session_id,k=!0}const C={role:"user",content:O.trim(),id:`msg-${Date.now()}-${Math.random()}`};Kd.current=!0,Yd.current=p,or(p,!1),Ne.current===p&&(zt.current=Ne.current),hd.flushSync(()=>{t(I=>I.map(F=>F.session_id===p?{...F,messages:[...F.messages||[],C]}:F)),pe("")}),requestAnimationFrame(()=>Ho("auto",p));const L=Ea(p);try{let I=[];try{I=[...(((H=e.find(D=>D.session_id===p))==null?void 0:H.messages)||[]).slice(-8).map(D=>({role:D.role,content:D.content||""})),{role:"user",content:C.content}]}catch{}let F=C.content,G=[];const Se=[];if(N)try{const Y=await(await fetch(`${J}/libraries/${N}/context`,{method:"POST",headers:{"Content-Type":"application/json"},signal:L.signal,body:JSON.stringify({prompt:C.content,top_k:5})})).json();Y&&typeof Y.context_block=="string"&&Y.context_block.trim()&&Se.push(Y.context_block.trim()),Array.isArray(Y==null?void 0:Y.sources)&&G.push(...Y.sources)}catch(V){if(nn(V))throw V;console.warn("local library enrichment failed",V)}if(fe)try{const Y=await(await fetch(`${J}/websearch`,{method:"POST",headers:{"Content-Type":"application/json"},signal:L.signal,body:JSON.stringify({prompt:C.content,model:M,messages:I,history_limit:8,searx_url:U||null,engines:Array.isArray(R)?R:null})})).json();Y&&typeof Y.context_block=="string"&&Y.context_block.trim()&&Se.push(Y.context_block.trim()),Array.isArray(Y==null?void 0:Y.sources)&&G.push(...Y.sources)}catch(V){if(nn(V))throw V;console.warn("web search enrichment failed",V)}if(G=[...new Set(G)],Se.length>0&&(F=`${C.content} + +${Se.join(` + +`)}`),$){const V=`msg-${Date.now()}-${Math.random()}`;let Y="";const D={role:"assistant",content:"",id:V,sources:G};t(b=>b.map(Z=>Z.session_id===p?{...Z,messages:[...Z.messages||[],D]}:Z));try{const b=await fetch(`${J}/chat`,{method:"POST",headers:{"Content-Type":"application/json"},signal:L.signal,body:JSON.stringify({session_id:p,model:M,message:C.content,enriched_message:Se.length>0?F:null,stream:!0,sources:G||[]})});if(!b.ok)throw new Error(`HTTP ${b.status}`);const Z=(K=b.body)==null?void 0:K.getReader();if(!Z)throw new Error("Missing response body");const ee=new TextDecoder;let Ge=!1;for(;;){const{value:al,done:vt}=await Z.read();if(vt){tn(p,V,Y),Ne.current===p?Cn.current[p]?Nn(yt=>({...yt,[p]:V})):requestAnimationFrame(()=>Ln(V,"smooth",p)):(rn(yt=>({...yt,[p]:V})),En(yt=>[...new Set([...yt,p])]));break}const Bo=ee.decode(al,{stream:!0});Y+=Bo,tn(p,V,Y),Ne.current===p&&!Cn.current[p]&&Ho("auto",p),Ne.current!==p&&!Ge&&(rn(yt=>({...yt,[p]:V})),Ge=!0)}}catch(b){if(nn(b)){tn(p,V,Y,{removeIfEmpty:!0});return}console.error("Failed to send message:",b),tn(p,V,"Error: "+Io(b),{removeIfEmpty:!0});return}}else{const V=await fetch(`${J}/chat`,{method:"POST",headers:{"Content-Type":"application/json"},signal:L.signal,body:JSON.stringify({session_id:p,model:M,message:C.content,enriched_message:Se.length>0?F:null,stream:!1,sources:G||[]})});if(!V.ok)throw new Error(`HTTP ${V.status}`);const Y=await V.json(),D=`msg-${Date.now()}`,b={role:"assistant",content:Y.reply,id:D,sources:G};t(Z=>Z.map(ee=>ee.session_id===p?{...ee,messages:[...ee.messages||[],b]}:ee)),D&&(Ne.current===p?Cn.current[p]?Nn(Z=>({...Z,[p]:D})):requestAnimationFrame(()=>Ln(D,"smooth",p)):rn(Z=>({...Z,[p]:D})))}Ne.current!==p&&En(V=>[...new Set([...V,p])]),k&&fetch(`${J}/generate-title`,{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({session_id:p,message:C.content,model:M})}).then(V=>V.json()).then(V=>{const Y=V.title.replace(/[\s\S]*?<\/think(?:ing)?>/i,"").trim();t(D=>D.map(b=>b.session_id===p?{...b,name:Y}:b))})}catch(I){if(nn(I)){Oo(L);return}console.error("Failed to send message:",I);const F={role:"assistant",content:"Error: "+Io(I),id:`msg-${Date.now()}-${Math.random()}`};t(G=>G.map(Se=>Se.session_id===p?{...Se,messages:[...Se.messages,F]}:Se))}finally{Oo(L)}}function tp(){Fe(p=>!p)}async function Pa(){var H;const p="sess-"+Math.random().toString(36).slice(2)+Date.now().toString(36),C=await(await fetch(`${J}/sessions`,{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({session_id:p})})).json(),L={...C,messages:[]};return t(K=>[L,...K]),r(C.session_id),(H=Oe.current)==null||H.focus(),C}function np(p){r(p),En(C=>C.filter(L=>L!==p));const k=Wd[p];k&&requestAnimationFrame(()=>{let C=12;const L=()=>{if(!ve.current)return;let K=document.getElementById(k);if(!K){const I=e.find(F=>F.session_id===p);if(I&&Array.isArray(I.messages))for(let F=I.messages.length-1;F>=0;F--){const G=I.messages[F];if(G.role==="assistant"&&G.id){K=document.getElementById(G.id);break}}}K?(Ln(K.id,"smooth",p),rn(I=>{const{[p]:F,...G}=I;return G})):C-- >0&&requestAnimationFrame(L)};requestAnimationFrame(L)})}function rp(p,k){fetch(`${J}/sessions/${p}/rename`,{method:"PUT",headers:{"Content-Type":"application/json"},body:JSON.stringify({title:k})}).then(()=>{t(C=>C.map(L=>L.session_id===p?{...L,name:k}:L)),u(null)})}function La(p){fetch(`${J}/sessions/${p}`,{method:"DELETE"}).then(()=>{const k=e.filter(C=>C.session_id!==p);t(k),n===p&&r(k.length>0?k[0].session_id:null)})}g.useEffect(()=>{const p=e.filter(k=>k.name==="New Chat"&&k.session_id!==n&&k.messages.length===0);p.length>0&&p.forEach(k=>{La(k.session_id)})},[n,e,J]);const lp=p=>{var C;window.getSelection().toString().length>0||document.activeElement!==Oe.current&&(p.target.closest(".msg")||(C=Oe.current)==null||C.focus())};return y.jsxs("div",{className:"app",style:{gridTemplateColumns:`${Gd}px 1fr`},children:[y.jsxs("div",{className:"sidebar",children:[y.jsxs("div",{className:"sidebar-header",children:[y.jsx("div",{className:`sidebar-tab ${l==="chats"?"active":""}`,onClick:()=>Uo("chats"),children:"Chats"}),y.jsx("div",{className:`sidebar-tab ${l==="dbs"?"active":""}`,onClick:()=>Uo("dbs"),children:"DBs"}),y.jsx("div",{className:`sidebar-tab ${l==="settings"?"active":""}`,onClick:()=>Uo("settings"),children:"Settings"})]}),y.jsxs("div",{className:"sidebar-content",children:[l==="chats"&&y.jsx("div",{className:"chat-list",children:e.map(p=>y.jsx("div",{className:`chat-item ${p.session_id===n?"active":""}`,onClick:()=>qd(p.session_id),children:s===p.session_id?y.jsx("input",{type:"text",className:"rename-input",defaultValue:p.name,onBlur:()=>u(null),onKeyDown:k=>{k.key==="Enter"?rp(p.session_id,k.target.value):k.key==="Escape"&&u(null)},autoFocus:!0}):y.jsxs(y.Fragment,{children:[y.jsx("span",{children:p.name}),y.jsxs("div",{className:"chat-item-buttons",children:[Fd.includes(p.session_id)&&y.jsx("div",{className:"unread-dot"}),y.jsx("button",{className:"icon-button",onClick:k=>{k.stopPropagation(),u(p.session_id)},children:y.jsx("svg",{xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 24 24",fill:"none",stroke:"currentColor",strokeWidth:"2",strokeLinecap:"round",strokeLinejoin:"round",className:"feather feather-edit-2",children:y.jsx("path",{d:"M17 3a2.828 2.828 0 1 1 4 4L7.5 20.5 2 22l1.5-5.5L17 3z"})})}),y.jsx("button",{className:"icon-button",onClick:k=>{k.stopPropagation(),La(p.session_id)},children:y.jsxs("svg",{xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 24 24",fill:"none",stroke:"currentColor",strokeWidth:"2",strokeLinecap:"round",strokeLinejoin:"round",className:"feather feather-x",children:[y.jsx("line",{x1:"18",y1:"6",x2:"6",y2:"18"}),y.jsx("line",{x1:"6",y1:"6",x2:"18",y2:"18"})]})})]})]})},p.session_id))}),l==="dbs"&&y.jsx("div",{className:"db-list",children:f.length===0?y.jsx("div",{className:"empty-list-message",children:"No databases yet."}):f.map(p=>y.jsxs("div",{className:`chat-item ${p.slug===S?"active":""}`,onClick:()=>E(p.slug),children:[y.jsx("span",{children:p.name}),N===p.slug&&y.jsx("div",{className:"db-active-badge",children:"Chat"})]},p.slug))}),l==="settings"&&y.jsxs("div",{className:"settings-list",children:[y.jsx("div",{className:`settings-item ${i==="General"?"active":""}`,onClick:()=>a("General"),children:"General"}),y.jsx("div",{className:`settings-item ${i==="Interface"?"active":""}`,onClick:()=>a("Interface"),children:"Interface"}),y.jsx("div",{className:`settings-item ${i==="Websearch"?"active":""}`,onClick:()=>a("Websearch"),children:"Websearch"})]})]}),l!=="settings"&&y.jsxs("div",{className:"sidebar-footer",children:[l==="chats"&&y.jsx("button",{className:"button new-chat-button",onClick:Pa,children:"New Chat"}),l==="dbs"&&(c?y.jsxs("div",{className:"new-db-form",children:[y.jsx("input",{type:"text",className:"rename-input",value:x,onChange:p=>P(p.target.value),onKeyDown:p=>{p.key==="Enter"?Na():p.key==="Escape"&&(d(!1),P(""),j(""))},placeholder:"Database name",autoFocus:!0}),T&&y.jsx("div",{className:"form-error",children:T}),y.jsxs("div",{className:"new-db-actions",children:[y.jsx("button",{className:"button new-db-button",onClick:()=>Na(),children:"Create"}),y.jsx("button",{className:"button ghost",onClick:()=>{d(!1),P(""),j("")},children:"Cancel"})]})]}):y.jsx("button",{className:"button new-db-button",onClick:()=>{d(!0),j("")},children:"New Database"}))]}),y.jsx("div",{className:"resizer",onMouseDown:bd})]}),y.jsxs("div",{className:"main-content",children:[l==="chats"&&y.jsxs(y.Fragment,{children:[y.jsxs("div",{className:"header",children:[y.jsxs("strong",{children:["Chat - ",((Ra=e.find(p=>p.session_id===n))==null?void 0:Ra.name)||"New Chat"]}),sl&&y.jsxs("span",{className:"header-subtle",children:["KB: ",sl.name]})]}),y.jsx("div",{className:"chat",ref:ve,onClick:lp,children:Pn.map((p,k)=>{const C=p.role==="user"&&Dd===k;return y.jsx("div",{id:p.id,className:"msg "+(p.role==="user"?"user":"assistant")+(C?" editing":""),children:p.role==="assistant"?y.jsxs("div",{className:"assistant-message-wrapper",children:[y.jsx(Pv,{content:p.content,streamOutput:$,sources:p.sources}),!Ce&&y.jsxs("div",{className:"message-options-bar assistant-options",children:[y.jsx("button",{className:"icon-button",title:"Copy message",onClick:()=>ga(p),children:y.jsxs("svg",{xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 24 24",fill:"none",stroke:"currentColor",strokeWidth:"2",strokeLinecap:"round",strokeLinejoin:"round",children:[y.jsx("rect",{x:"9",y:"9",width:"13",height:"13",rx:"2",ry:"2"}),y.jsx("path",{d:"M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"})]})}),y.jsx("button",{className:"icon-button",title:"Regenerate response",onClick:()=>va(k),children:y.jsx("svg",{xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 24 24",fill:"none",stroke:"currentColor",strokeWidth:"2",strokeLinecap:"round",strokeLinejoin:"round",children:y.jsx("path",{d:"M21.5 2v6h-6M2.5 22v-6h6M2 11.5a10 10 0 0 1 18.8-4.3M22 12.5a10 10 0 0 1-18.8 4.3"})})})]})]}):y.jsxs("div",{className:"user-message-wrapper",children:[C?y.jsxs("div",{className:"msg-content msg-content--user editing",children:[y.jsx("div",{className:"user-edit-shadow","aria-hidden":"true",children:zo}),y.jsx(Gu,{className:"edit-message-input edit-overlay",value:zo,onChange:L=>nl(L.target.value),onBlur:rl,onKeyDown:L=>{L.key==="Escape"&&(L.preventDefault(),rl()),L.key==="Enter"&&!L.shiftKey&&(L.preventDefault(),Hd(k))},autoFocus:!0,minRows:1})]}):(()=>{const L=p.content||"",H=L.split(/\r\n|\r|\n/),K=H.length>30,I=ka(p,k,n),F=K?Vd[I]??!0:!1,G=F?H.slice(0,30).join(` +`)+` +…`:L;return y.jsxs(y.Fragment,{children:[y.jsx("div",{className:"msg-content msg-content--user",children:G}),K&&y.jsx("button",{className:"user-msg-expand",onClick:()=>Qd(I),"aria-expanded":F?"false":"true",children:F?"Show entire message":"Collapse"})]})})(),!Ce&&!C&&y.jsxs("div",{className:"message-options-bar user-options",children:[y.jsx("button",{className:"icon-button",title:"Edit message",onClick:()=>Ud(k,p.content),children:y.jsxs("svg",{xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 24 24",fill:"none",stroke:"currentColor",strokeWidth:"2",strokeLinecap:"round",strokeLinejoin:"round",children:[y.jsx("path",{d:"M12 20h9"}),y.jsx("path",{d:"M16.5 3.5a2.121 2.121 0 0 1 3 3L7 19l-4 1 1-4L16.5 3.5z"})]})}),y.jsx("button",{className:"icon-button",title:"Copy message",onClick:()=>ga(p),children:y.jsxs("svg",{xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 24 24",fill:"none",stroke:"currentColor",strokeWidth:"2",strokeLinecap:"round",strokeLinejoin:"round",children:[y.jsx("rect",{x:"9",y:"9",width:"13",height:"13",rx:"2",ry:"2"}),y.jsx("path",{d:"M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"})]})})]})]})},p.id||k)})},n),Sa[n]&&y.jsxs("button",{className:"new-msg-tip",onClick:ep,title:"Jump to the new message","aria-label":"Jump to the new message",children:["New message",y.jsx("span",{style:{marginLeft:6},children:"↓"})]}),y.jsx("div",{className:"footer",children:y.jsxs("div",{className:"footer-content-wrapper",children:[y.jsx(Gu,{ref:Oe,className:"input",value:O,onChange:p=>pe(p.target.value),onKeyDown:p=>{p.key==="Enter"&&!p.shiftKey&&(p.preventDefault(),_a())},placeholder:"Ask any question...",maxRows:13}),y.jsx("button",{type:"button",className:"websearch-toggle"+(fe?" active":""),onClick:tp,title:"Toggle web search","aria-pressed":fe,children:y.jsxs("svg",{xmlns:"http://www.w3.org/2000/svg",viewBox:"0 0 24 24",fill:"none",stroke:"currentColor",strokeWidth:"2",strokeLinecap:"round",strokeLinejoin:"round","aria-hidden":"true",children:[y.jsx("circle",{cx:"12",cy:"12",r:"10"}),y.jsx("line",{x1:"2",y1:"12",x2:"22",y2:"12"}),y.jsx("path",{d:"M12 2a15.3 15.3 0 0 1 4 10 15.3 15.3 0 0 1-4 10 15.3 15.3 0 0 1-4-10 15.3 15.3 0 0 1 4-10z"})]})}),y.jsx("button",{className:"button",onClick:Ce?Jd:_a,title:Ce?"Cancel generation":"Send","aria-label":Ce?"Cancel generation":"Send",children:Ce?y.jsx("div",{className:"spinner"}):"Send"})]})})]}),l==="dbs"&&y.jsxs(y.Fragment,{children:[y.jsxs("div",{className:"header",children:[y.jsx("strong",{children:(il==null?void 0:il.name)||"Databases"}),sl&&y.jsxs("span",{className:"header-subtle",children:["Chat KB: ",sl.name]})]}),y.jsx(Cv,{apiBase:J,library:il,jobs:v,chatLibrarySlug:N,onRefresh:async()=>{await ol(),await Ao()},onToggleChatLibrary:h,onDeleted:p=>{if(S===p){const k=f.find(C=>C.slug!==p);E((k==null?void 0:k.slug)||null)}N===p&&h(null)}})]}),l==="settings"&&y.jsxs(y.Fragment,{children:[y.jsx("div",{className:"header",children:y.jsxs("strong",{children:[i," Settings"]})}),i==="General"&&y.jsx(yv,{onModelChange:B,streamOutput:$,onStreamOutputChange:A}),i==="Interface"&&y.jsx(kv,{}),i==="Websearch"&&y.jsx(Nv,{searxUrl:U,setSearxUrl:te,engines:R,setEngines:W})]})]})]})}function $v(){return g.useEffect(()=>{window.electronAPI.getSettings().then(e=>{Kr(e.colorScheme||"Default")})},[]),y.jsx(Te.StrictMode,{children:y.jsx(Vg,{children:y.jsx(xg,{children:y.jsx(Pd,{path:"/",element:y.jsx(Tv,{})})})})})}const Mv=md(document.getElementById("root"));Mv.render(y.jsx($v,{})); diff --git a/dist/index.html b/dist/index.html new file mode 100644 index 0000000..cbacc08 --- /dev/null +++ b/dist/index.html @@ -0,0 +1,14 @@ + + + + + + + LLM Desktop + + + + +
+ + diff --git a/electron/main.cjs b/electron/main.cjs index cdc8cf9..e8b2d24 100644 --- a/electron/main.cjs +++ b/electron/main.cjs @@ -1,5 +1,4 @@ - -const { app, BrowserWindow, Menu, ipcMain, shell } = require('electron') +const { app, BrowserWindow, Menu, dialog, ipcMain, shell } = require('electron') const path = require('path') const { is } = require('@electron-toolkit/utils') const fs = require('fs') @@ -9,12 +8,36 @@ let settingsWindow = null const settingsFilePath = path.join(app.getPath('userData'), 'settings.json') let appSettings = {} +const DEFAULT_UI_SCALE = 1 +const MIN_UI_SCALE = 0.7 +const MAX_UI_SCALE = 1.3 -// Default settings const defaultSettings = { ollamaApiUrl: 'http://127.0.0.1:8000', colorScheme: 'Default', - chatModel: 'llama3' // Set a default model here + uiScale: DEFAULT_UI_SCALE, + chatModel: 'llama3', +} + +function normalizeUiScale(value) { + const numericValue = Number(value) + if (!Number.isFinite(numericValue)) { + return DEFAULT_UI_SCALE + } + + return Math.min(MAX_UI_SCALE, Math.max(MIN_UI_SCALE, Math.round(numericValue * 100) / 100)) +} + +function applyUiScaleToWindow(window) { + if (!window || window.isDestroyed()) { + return + } + + window.webContents.setZoomFactor(normalizeUiScale(appSettings.uiScale)) +} + +function applyUiScaleToAllWindows() { + BrowserWindow.getAllWindows().forEach(applyUiScaleToWindow) } function loadSettings() { @@ -24,8 +47,9 @@ function loadSettings() { appSettings = { ...defaultSettings, ...JSON.parse(data) } } else { appSettings = { ...defaultSettings } - saveSettings() // Create the file with default settings + saveSettings() } + appSettings.uiScale = normalizeUiScale(appSettings.uiScale) } catch (error) { console.error('Failed to load settings:', error) appSettings = { ...defaultSettings } @@ -40,7 +64,7 @@ function saveSettings() { } } -async function createMainWindow () { +async function createMainWindow() { mainWindow = new BrowserWindow({ width: 1000, height: 720, @@ -50,17 +74,23 @@ async function createMainWindow () { webPreferences: { preload: path.join(__dirname, 'preload.cjs'), contextIsolation: true, - nodeIntegration: false - } + nodeIntegration: false, + }, }) + applyUiScaleToWindow(mainWindow) + mainWindow.on('ready-to-show', () => { mainWindow.show() }) + mainWindow.webContents.on('did-finish-load', () => { + applyUiScaleToWindow(mainWindow) + }) + mainWindow.on('focus', () => { - mainWindow.webContents.send('window-focused'); - }); + mainWindow.webContents.send('window-focused') + }) if (is.dev && process.env.VITE_DEV_SERVER_URL) { await mainWindow.loadURL(process.env.VITE_DEV_SERVER_URL) @@ -70,12 +100,12 @@ async function createMainWindow () { } mainWindow.webContents.setWindowOpenHandler(({ url }) => { - shell.openExternal(url); - return { action: 'deny' }; - }); + shell.openExternal(url) + return { action: 'deny' } + }) } -async function createSettingsWindow () { +async function createSettingsWindow() { if (settingsWindow) { settingsWindow.focus() return @@ -91,14 +121,20 @@ async function createSettingsWindow () { webPreferences: { preload: path.join(__dirname, 'preload.cjs'), contextIsolation: true, - nodeIntegration: false - } + nodeIntegration: false, + }, }) + applyUiScaleToWindow(settingsWindow) + settingsWindow.on('ready-to-show', () => { settingsWindow.show() }) + settingsWindow.webContents.on('did-finish-load', () => { + applyUiScaleToWindow(settingsWindow) + }) + settingsWindow.on('closed', () => { settingsWindow = null }) @@ -112,7 +148,7 @@ async function createSettingsWindow () { } app.whenReady().then(() => { - loadSettings() // Load settings when the app is ready + loadSettings() createMainWindow() const menuTemplate = [ @@ -122,11 +158,11 @@ app.whenReady().then(() => { { label: 'Settings', accelerator: 'CmdOrCtrl+,', - click: createSettingsWindow + click: createSettingsWindow, }, { type: 'separator' }, - { role: 'quit' } - ] + { role: 'quit' }, + ], }, { label: 'Edit', @@ -139,8 +175,8 @@ app.whenReady().then(() => { { role: 'paste' }, { role: 'delete' }, { type: 'separator' }, - { role: 'selectAll' } - ] + { role: 'selectAll' }, + ], }, { label: 'View', @@ -153,9 +189,9 @@ app.whenReady().then(() => { { role: 'zoomin' }, { role: 'zoomout' }, { type: 'separator' }, - { role: 'togglefullscreen' } - ] - } + { role: 'togglefullscreen' }, + ], + }, ] const menu = Menu.buildFromTemplate(menuTemplate) @@ -166,23 +202,40 @@ app.whenReady().then(() => { }) }) -// IPC handlers for settings -ipcMain.handle('get-settings', () => { - return appSettings -}) +ipcMain.handle('get-settings', () => appSettings) ipcMain.handle('set-setting', (event, key, value) => { - appSettings[key] = value + appSettings[key] = key === 'uiScale' ? normalizeUiScale(value) : value saveSettings() + if (key === 'uiScale') { + applyUiScaleToAllWindows() + } return true }) ipcMain.handle('update-settings', (event, settings) => { appSettings = { ...appSettings, ...settings } + appSettings.uiScale = normalizeUiScale(appSettings.uiScale) saveSettings() + if (Object.prototype.hasOwnProperty.call(settings, 'uiScale')) { + applyUiScaleToAllWindows() + } return true }) +ipcMain.handle('pick-paths', async () => { + const result = await dialog.showOpenDialog(mainWindow, { + properties: ['openFile', 'openDirectory', 'multiSelections'], + }) + return result.canceled ? [] : result.filePaths +}) + +ipcMain.handle('open-path', async (event, filePath) => { + if (!filePath) return false + const err = await shell.openPath(filePath) + return err === '' +}) + ipcMain.on('open-external-link', (event, url) => { shell.openExternal(url) }) diff --git a/electron/preload.cjs b/electron/preload.cjs index 0eb27d4..6ad4ce3 100644 --- a/electron/preload.cjs +++ b/electron/preload.cjs @@ -6,6 +6,8 @@ contextBridge.exposeInMainWorld('electronAPI', { getSettings: () => ipcRenderer.invoke('get-settings'), setSetting: (key, value) => ipcRenderer.invoke('set-setting', key, value), updateSettings: (settings) => ipcRenderer.invoke('update-settings', settings), + pickPaths: () => ipcRenderer.invoke('pick-paths'), + openPath: (filePath) => ipcRenderer.invoke('open-path', filePath), openExternalLink: (event) => { event.preventDefault(); const url = event.currentTarget.href; diff --git a/package.json b/package.json index c8fc3e7..55a1c2b 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,7 @@ "type": "module", "scripts": { "dev": "concurrently -k \"npm:dev:backend\" \"npm:dev:renderer\" \"npm:dev:electron\"", - "dev:backend": "python3 -m uvicorn backend.main:app --host 127.0.0.1 --port 8000 --reload", + "dev:backend": "backend/.venv/bin/python -m uvicorn backend.main:app --host 127.0.0.1 --port 8000 --reload", "dev:renderer": "vite --port 5173 --strictPort", "dev:electron": "wait-on http://localhost:5173 tcp:8000 && cross-env VITE_DEV_SERVER_URL=http://localhost:5173 electron .", "build": "vite build", diff --git a/run.sh b/run.sh index ab0eba8..9d2b691 100755 --- a/run.sh +++ b/run.sh @@ -1,6 +1,19 @@ #!/bin/sh -python -m venv backend/.venv -source backend/.venv/bin/activate -pip install -r backend/requirements.txt +set -eu + +PYTHON_BIN="${PYTHON_BIN:-python3.13}" +VENV_DIR="backend/.venv" + +if ! command -v "$PYTHON_BIN" >/dev/null 2>&1; then + echo "Python 3.13 is required. Set PYTHON_BIN to a Python 3.13 executable if needed." >&2 + exit 1 +fi + +if [ ! -x "$VENV_DIR/bin/python" ] || ! "$VENV_DIR/bin/python" -c 'import sys; raise SystemExit(0 if sys.version_info[:2] == (3, 13) else 1)'; then + rm -rf "$VENV_DIR" + "$PYTHON_BIN" -m venv "$VENV_DIR" +fi + +"$VENV_DIR/bin/python" -m pip install -r backend/requirements.txt npm install -npm run dev \ No newline at end of file +npm run dev diff --git a/src/App.jsx b/src/App.jsx index c608341..ca86d1c 100644 --- a/src/App.jsx +++ b/src/App.jsx @@ -4,8 +4,10 @@ import { flushSync } from 'react-dom'; import TextareaAutosize from 'react-textarea-autosize'; import GeneralSettings from './GeneralSettings' import InterfaceSettings from './InterfaceSettings' +import LibraryManager from './LibraryManager' import WebsearchSettings from './WebsearchSettings' import { markdownToHTML } from './markdown'; +import { applyColorScheme } from './colorSchemes' // Extract or block (first occurrence) and return { think, answer } function splitThinkBlocks(text) { if (!text) return { think: null, answer: '' }; @@ -81,12 +83,35 @@ function AssistantMessageContent({ content, streamOutput, sources }) {
{sources.map((u, i) => { let label = u; + let isFile = false; try { - const host = new URL(u).hostname || u; - label = host.replace(/^www\./i, ''); + const parsed = new URL(u); + if (parsed.protocol === 'file:') { + isFile = true; + const parts = parsed.pathname.split('/').filter(Boolean); + label = decodeURIComponent(parts[parts.length - 1] || u); + } else { + const host = parsed.hostname || u; + label = host.replace(/^www\./i, ''); + } } catch {} return ( - + { + if (!isFile) return; + event.preventDefault(); + try { + const parsed = new URL(u); + window.electronAPI?.openPath?.(decodeURIComponent(parsed.pathname)); + } catch {} + }} + > {label} ); @@ -101,6 +126,7 @@ const API_URL_KEY = 'ollamaApiUrl'; const COLOR_SCHEME_KEY = 'colorScheme'; const WEBSEARCH_URL_KEY = 'websearch.searxUrl'; const WEBSEARCH_ENGINES_KEY = 'websearch.engines'; +const CHAT_LIBRARY_KEY = 'chat.librarySlug'; // Initial API value will be set by useEffect after settings are loaded let API = import.meta.env.VITE_API_URL ?? 'http://127.0.0.1:8000'; @@ -113,6 +139,13 @@ export default function App() { const [activeSidebarMode, setActiveSidebarMode] = useState('chats') // 'chats', 'dbs', 'settings' const [activeSettingsSubmenu, setActiveSettingsSubmenu] = useState('General'); // 'General', 'Interface' const [editingSessionId, setEditingSessionId] = useState(null); // ID of the session being edited + const [libraries, setLibraries] = useState([]) + const [libraryJobs, setLibraryJobs] = useState([]) + const [activeLibrarySlug, setActiveLibrarySlug] = useState(null) + const [chatLibrarySlug, setChatLibrarySlug] = useState(localStorage.getItem(CHAT_LIBRARY_KEY) || null) + const [isCreatingLibrary, setIsCreatingLibrary] = useState(false) + const [newLibraryName, setNewLibraryName] = useState('') + const [libraryCreateError, setLibraryCreateError] = useState('') // Use currentSessionId for the actual chat operations const [model, setModel] = useState('') @@ -174,6 +207,38 @@ export default function App() { } } + function setAssistantMessageContent(sessionId, messageId, content, options = {}) { + const { removeIfEmpty = false } = options + + setChatSessions(prevSessions => + prevSessions.map(session => { + if (session.session_id !== sessionId) return session + + const nextMessages = [] + for (const message of session.messages || []) { + if (message.id !== messageId) { + nextMessages.push(message) + continue + } + + if (removeIfEmpty && !content) continue + nextMessages.push({ ...message, content }) + } + + return { ...session, messages: nextMessages } + }) + ) + } + + function isAbortError(error) { + return error?.name === 'AbortError' + } + + function getErrorText(error) { + if (error instanceof Error && error.message) return error.message + return String(error) + } + function startEditMessage(index, content) { setEditingMessageIndex(index); setEditText(content || ''); @@ -251,132 +316,134 @@ export default function App() { } async function regenerateFromIndex(index, overrideUserText = null) { - const sessionId = activeSessionId; - if (!sessionId || typeof index !== 'number') return; + const sessionId = activeSessionId + if (isSending || !sessionId || typeof index !== 'number') return - const msgs = (chatSessions.find(s => s.session_id === sessionId)?.messages) || []; - let lastUserIdx = index; + const msgs = (chatSessions.find(s => s.session_id === sessionId)?.messages) || [] + let lastUserIdx = index for (let i = index; i >= 0; i--) { - if (msgs[i]?.role === 'user') { lastUserIdx = i; break; } + if (msgs[i]?.role === 'user') { + lastUserIdx = i + break + } } - // Prune UI to lastUserIdx setChatSessions(prev => prev.map(s => s.session_id === sessionId ? { ...s, messages: (s.messages || []).slice(0, lastUserIdx + 1) } : s ) - ); + ) - setIsSending(true); + const requestController = beginCancelableRequest(sessionId) - // --- optional websearch enrichment for regenerate --- - let enrichedPrompt = null; - let citationSources = []; - if (webSearchEnabled) { - try { - // Use the freshly edited user text when provided - const promptText = (overrideUserText != null ? overrideUserText : (msgs[lastUserIdx]?.content || '')); + let enrichedPrompt = null + let citationSources = [] + try { + if (webSearchEnabled) { + try { + const promptText = overrideUserText != null ? overrideUserText : (msgs[lastUserIdx]?.content || '') + const historyForSearch = msgs + .slice(Math.max(0, lastUserIdx - 7), lastUserIdx + 1) + .map(m => ({ role: m.role, content: m.content || '' })) + if (historyForSearch.length > 0) { + historyForSearch[historyForSearch.length - 1] = { role: 'user', content: promptText } + } - // Build compact recent history and overwrite the last user turn with promptText - const historyForSearch = msgs - .slice(Math.max(0, lastUserIdx - 7), lastUserIdx + 1) - .map(m => ({ role: m.role, content: m.content || '' })); - if (historyForSearch.length > 0) { - historyForSearch[historyForSearch.length - 1] = { role: 'user', content: promptText }; - } - - const resp = await fetch(`${ollamaApiUrl}/websearch`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - prompt: promptText, - model, - messages: historyForSearch, - history_limit: 8, - searx_url: searxUrl || null, - engines: Array.isArray(searxEngines) ? searxEngines : null, + const resp = await fetch(`${ollamaApiUrl}/websearch`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + signal: requestController.signal, + body: JSON.stringify({ + prompt: promptText, + model, + messages: historyForSearch, + history_limit: 8, + searx_url: searxUrl || null, + engines: Array.isArray(searxEngines) ? searxEngines : null, + }) }) - }); - const data = await resp.json(); - if (data && typeof data.enriched_prompt === 'string') { - enrichedPrompt = data.enriched_prompt; - citationSources = Array.isArray(data.sources) ? data.sources : []; + const data = await resp.json() + if (data && typeof data.enriched_prompt === 'string') { + enrichedPrompt = data.enriched_prompt + citationSources = Array.isArray(data.sources) ? data.sources : [] + } + } catch (error) { + if (isAbortError(error)) throw error + console.warn('web search enrichment (regenerate) failed', error) } - } catch (e) { - console.warn('web search enrichment (regenerate) failed', e); } - } - if (streamOutput) { - const assistantMsgId = `msg-${Date.now()}-${Math.random()}`; - // add placeholder assistant message (keep sources on the placeholder) - setChatSessions(prev => - prev.map(s => s.session_id === sessionId - ? { ...s, messages: [...(s.messages || []), { id: assistantMsgId, role: 'assistant', content: '', sources: citationSources }] } - : s + if (streamOutput) { + const assistantMsgId = `msg-${Date.now()}-${Math.random()}` + let full = '' + + setChatSessions(prev => + prev.map(s => s.session_id === sessionId + ? { ...s, messages: [...(s.messages || []), { id: assistantMsgId, role: 'assistant', content: '', sources: citationSources }] } + : s + ) ) - ); - try { - const res = await fetch(`${ollamaApiUrl}/sessions/${sessionId}/regenerate`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - index, - model, - stream: true, - enriched_message: enrichedPrompt, - sources: citationSources || [] + try { + const res = await fetch(`${ollamaApiUrl}/sessions/${sessionId}/regenerate`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + signal: requestController.signal, + body: JSON.stringify({ + index, + model, + stream: true, + enriched_message: enrichedPrompt, + sources: citationSources || [] + }) }) - }); - const reader = res.body.getReader(); - const decoder = new TextDecoder(); - let full = ''; - let unreadMarked = false; + if (!res.ok) throw new Error(`HTTP ${res.status}`) - while (true) { - const { value, done } = await reader.read(); - if (done) break; + const reader = res.body?.getReader() + if (!reader) throw new Error('Missing response body') - const chunk = decoder.decode(value, { stream: true }); - full += chunk; + const decoder = new TextDecoder() + let unreadMarked = false - // Update the growing assistant message (sources remain intact) - setChatSessions(prev => - prev.map(s => s.session_id === sessionId - ? { ...s, messages: (s.messages || []).map(m => m.id === assistantMsgId ? { ...m, content: full } : m) } - : s - ) - ); + while (true) { + const { value, done } = await reader.read() + if (done) break - if (!unreadMarked && activeSessionIdRef.current !== sessionId) { - unreadMarked = true; - setPendingScrollToLastUser(prev => ({ ...prev, [sessionId]: assistantMsgId })); - setUnreadSessions(prev => [...new Set([...prev, sessionId])]); + const chunk = decoder.decode(value, { stream: true }) + full += chunk + setAssistantMessageContent(sessionId, assistantMsgId, full) + + if (!unreadMarked && activeSessionIdRef.current !== sessionId) { + unreadMarked = true + setPendingScrollToLastUser(prev => ({ ...prev, [sessionId]: assistantMsgId })) + setUnreadSessions(prev => [...new Set([...prev, sessionId])]) + } } - } - if (activeSessionIdRef.current !== sessionId) { - setPendingScrollToLastUser(prev => ({ ...prev, [sessionId]: assistantMsgId })); - setUnreadSessions(prev => [...new Set([...prev, sessionId])]); - } else { - if (!userScrolledUpRef.current[sessionId]) { - requestAnimationFrame(() => scrollMessageToTop(assistantMsgId, 'smooth', sessionId)); + if (activeSessionIdRef.current !== sessionId) { + setPendingScrollToLastUser(prev => ({ ...prev, [sessionId]: assistantMsgId })) + setUnreadSessions(prev => [...new Set([...prev, sessionId])]) + } else if (!userScrolledUpRef.current[sessionId]) { + requestAnimationFrame(() => scrollMessageToTop(assistantMsgId, 'smooth', sessionId)) } else { - setNewMsgTip(prev => ({ ...prev, [sessionId]: assistantMsgId })); + setNewMsgTip(prev => ({ ...prev, [sessionId]: assistantMsgId })) } + } catch (error) { + if (isAbortError(error)) { + setAssistantMessageContent(sessionId, assistantMsgId, full, { removeIfEmpty: true }) + return + } + + console.error(error) + setAssistantMessageContent(sessionId, assistantMsgId, `Error: ${getErrorText(error)}`, { removeIfEmpty: true }) + return } - } catch (e) { - console.error(e); - } finally { - setIsSending(false); - } - } else { - try { + } else { const res = await fetch(`${ollamaApiUrl}/sessions/${sessionId}/regenerate`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, + signal: requestController.signal, body: JSON.stringify({ index, model, @@ -384,31 +451,33 @@ async function regenerateFromIndex(index, overrideUserText = null) { enriched_message: enrichedPrompt, sources: citationSources || [] }) - }); - const data = await res.json(); - const assistantMsgId = `msg-${Date.now()}`; + }) + if (!res.ok) throw new Error(`HTTP ${res.status}`) + + const data = await res.json() + const assistantMsgId = `msg-${Date.now()}` setChatSessions(prev => prev.map(s => s.session_id === sessionId ? { ...s, messages: [...(s.messages || []), { role: 'assistant', content: data.reply, id: assistantMsgId, sources: citationSources }] } : s ) - ); + ) if (activeSessionIdRef.current !== sessionId) { - setPendingScrollToLastUser(prev => ({ ...prev, [sessionId]: assistantMsgId })); - setUnreadSessions(prev => [...new Set([...prev, sessionId])]); + setPendingScrollToLastUser(prev => ({ ...prev, [sessionId]: assistantMsgId })) + setUnreadSessions(prev => [...new Set([...prev, sessionId])]) + } else if (!userScrolledUpRef.current[sessionId]) { + requestAnimationFrame(() => scrollMessageToTop(assistantMsgId, 'smooth', sessionId)) } else { - if (!userScrolledUpRef.current[sessionId]) { - requestAnimationFrame(() => scrollMessageToTop(assistantMsgId, 'smooth', sessionId)); - } else { - setNewMsgTip(prev => ({ ...prev, [sessionId]: assistantMsgId })); - } + setNewMsgTip(prev => ({ ...prev, [sessionId]: assistantMsgId })) } - } catch (e) { - console.error(e); - } finally { - setIsSending(false); } + } catch (error) { + if (!isAbortError(error)) { + console.error(error) + } + } finally { + finishCancelableRequest(requestController) } } @@ -468,7 +537,7 @@ async function regenerateFromIndex(index, overrideUserText = null) { }); }, []); - const activeRequestSessionId = useRef(null); + const activeRequestRef = useRef(null); const justSentMessage = useRef(false); const lastSentSessionRef = useRef(null); const activeSessionIdRef = useRef(activeSessionId); @@ -476,6 +545,33 @@ async function regenerateFromIndex(index, overrideUserText = null) { activeSessionIdRef.current = activeSessionId; }, [activeSessionId]); + const beginCancelableRequest = React.useCallback((sessionId) => { + const controller = new AbortController() + activeRequestRef.current = { controller, sessionId } + setIsSending(true) + return controller + }, []) + + const finishCancelableRequest = React.useCallback((controller) => { + if (activeRequestRef.current?.controller !== controller) return + activeRequestRef.current = null + setIsSending(false) + }, []) + + const cancelActiveRequest = React.useCallback(() => { + const activeRequest = activeRequestRef.current + if (!activeRequest) return + activeRequestRef.current = null + activeRequest.controller.abort() + setIsSending(false) + }, []) + + useEffect(() => { + return () => { + activeRequestRef.current?.controller.abort() + } + }, []) + // Flag to ensure we only restore once per open of a chat const restoredForRef = useRef(null); @@ -544,11 +640,11 @@ async function regenerateFromIndex(index, overrideUserText = null) { useEffect(() => { window.electronAPI.getSettings().then(settings => { setOllamaApiUrl(settings.ollamaApiUrl); - setColorScheme(settings.colorScheme); + setColorScheme(settings.colorScheme || 'Default'); setModel(settings.chatModel || ''); // Load the selected model, with a fallback setStreamOutput(settings.streamOutput || false); setScrollPositions(settings.scrollPositions || {}); // Load scroll positions - applyColorScheme(settings.colorScheme); // Apply initial scheme + applyColorScheme(settings.colorScheme || 'Default'); // Apply initial scheme }); const handleFocus = () => { @@ -573,19 +669,6 @@ async function regenerateFromIndex(index, overrideUserText = null) { applyColorScheme(colorScheme); }, [colorScheme]); - // Function to apply color scheme - const colorSchemes = { - }; - - function applyColorScheme(schemeName) { - const scheme = colorSchemes[schemeName]; - if (scheme) { - for (const [key, value] of Object.entries(scheme)) { - document.documentElement.style.setProperty(key, value); - } - } - } - const fetchHistory = (sessionId) => { if (!sessionId || !ollamaApiUrl) return; fetch(`${ollamaApiUrl}/history?session_id=${encodeURIComponent(sessionId)}`) @@ -602,6 +685,73 @@ async function regenerateFromIndex(index, overrideUserText = null) { .catch(() => {}); }; + async function refreshLibraries() { + if (!ollamaApiUrl) return; + try { + const response = await fetch(`${ollamaApiUrl}/libraries`); + const data = await response.json(); + const nextLibraries = Array.isArray(data.libraries) ? data.libraries : []; + setLibraries(nextLibraries); + + if (nextLibraries.length === 0) { + setActiveLibrarySlug(null); + setChatLibrarySlug(null); + return; + } + + if (!nextLibraries.some(lib => lib.slug === activeLibrarySlug)) { + setActiveLibrarySlug(nextLibraries[0].slug); + } + if (chatLibrarySlug && !nextLibraries.some(lib => lib.slug === chatLibrarySlug)) { + setChatLibrarySlug(null); + } + } catch (error) { + console.warn('Failed to load libraries', error); + } + } + + async function refreshLibraryJobs() { + if (!ollamaApiUrl) return; + try { + const response = await fetch(`${ollamaApiUrl}/jobs`); + const data = await response.json(); + setLibraryJobs(Array.isArray(data.jobs) ? data.jobs : []); + } catch (error) { + console.warn('Failed to load library jobs', error); + } + } + + async function createLibrary(nameOverride = null) { + const rawName = typeof nameOverride === 'string' ? nameOverride : newLibraryName + const name = rawName.trim() + if (!name) { + setLibraryCreateError('Name is required.') + return + } + try { + setLibraryCreateError('') + const response = await fetch(`${ollamaApiUrl}/libraries`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name }) + }); + if (!response.ok) { + const detail = await response.text() + throw new Error(detail || `HTTP ${response.status}`) + } + const data = await response.json(); + setIsCreatingLibrary(false) + setNewLibraryName('') + await refreshLibraries(); + if (data?.slug) { + setActiveLibrarySlug(data.slug); + } + } catch (error) { + console.error('Failed to create library', error); + setLibraryCreateError(String(error?.message || error)) + } + } + // Load chat sessions from backend on initial render useEffect(() => { if (!ollamaApiUrl) return; @@ -623,6 +773,31 @@ async function regenerateFromIndex(index, overrideUserText = null) { }); }, [ollamaApiUrl]); + useEffect(() => { + if (!ollamaApiUrl) return; + refreshLibraries(); + refreshLibraryJobs(); + }, [ollamaApiUrl]); + + useEffect(() => { + try { + if (chatLibrarySlug) { + localStorage.setItem(CHAT_LIBRARY_KEY, chatLibrarySlug); + } else { + localStorage.removeItem(CHAT_LIBRARY_KEY); + } + } catch {} + }, [chatLibrarySlug]); + + useEffect(() => { + if (!ollamaApiUrl) return; + const interval = setInterval(() => { + refreshLibraries(); + refreshLibraryJobs(); + }, 3000); + return () => clearInterval(interval); + }, [ollamaApiUrl, activeSidebarMode, activeLibrarySlug, chatLibrarySlug]); + // Load messages for the active session useEffect(() => { fetchHistory(activeSessionId); @@ -642,6 +817,14 @@ async function regenerateFromIndex(index, overrideUserText = null) { return chatSessions.find(s => s.session_id === activeSessionId)?.messages || []; }, [activeSessionId, chatSessions]); + const activeLibrary = useMemo(() => { + return libraries.find(lib => lib.slug === activeLibrarySlug) || null; + }, [activeLibrarySlug, libraries]); + + const chatLibrary = useMemo(() => { + return libraries.find(lib => lib.slug === chatLibrarySlug) || null; + }, [chatLibrarySlug, libraries]); + // Persist the scrollTop of the session we are LEAVING (on chat change or when leaving the chat view) useEffect(() => { const leavingSessionId = activeSessionId; @@ -821,27 +1004,27 @@ async function regenerateFromIndex(index, overrideUserText = null) { async function sendMessage() { - if (!input.trim() || !model) return; + if (isSending || !input.trim() || !model) return - let targetSessionId = activeSessionId; - let isNewChat = false; + let targetSessionId = activeSessionId + let isNewChat = false if (!targetSessionId) { - const newSession = await createNewChat(); - await new Promise(resolve => setTimeout(resolve, 200)); - targetSessionId = newSession.session_id; - isNewChat = true; + const newSession = await createNewChat() + await new Promise(resolve => setTimeout(resolve, 200)) + targetSessionId = newSession.session_id + isNewChat = true } else { - const currentSession = chatSessions.find(s => s.session_id === targetSessionId); - isNewChat = currentSession && currentSession.name === "New Chat" && currentSession.messages.length === 0; + const currentSession = chatSessions.find(s => s.session_id === targetSessionId) + isNewChat = currentSession && currentSession.name === "New Chat" && currentSession.messages.length === 0 } - const userMsg = { role: 'user', content: input.trim(), id: `msg-${Date.now()}-${Math.random()}` }; - justSentMessage.current = true; - lastSentSessionRef.current = targetSessionId; - setUserScrolledUp(targetSessionId, false); + const userMsg = { role: 'user', content: input.trim(), id: `msg-${Date.now()}-${Math.random()}` } + justSentMessage.current = true + lastSentSessionRef.current = targetSessionId + setUserScrolledUp(targetSessionId, false) if (activeSessionIdRef.current === targetSessionId) { - restoredForRef.current = activeSessionIdRef.current; + restoredForRef.current = activeSessionIdRef.current } flushSync(() => { @@ -851,31 +1034,54 @@ async function sendMessage() { ? { ...session, messages: [...(session.messages || []), userMsg] } : session ) - ); - setInput(''); - }); - requestAnimationFrame(() => scrollToBottom('auto', targetSessionId)); + ) + setInput('') + }) + requestAnimationFrame(() => scrollToBottom('auto', targetSessionId)) - setIsSending(true); + const requestController = beginCancelableRequest(targetSessionId) try { - // Build compact recent history for context-aware websearch (resource-friendly). - // We only send the last 8 turns by default, including assistant replies, - // and we also append the *current* user message (same content as `userMsg`). - let historyForSearch = []; + let historyForSearch = [] try { - const existing = (chatSessions.find(s => s.session_id === targetSessionId)?.messages) || []; - const lastFew = existing.slice(-8).map(m => ({ role: m.role, content: m.content || '' })); - historyForSearch = [...lastFew, { role: 'user', content: userMsg.content }]; + const existing = (chatSessions.find(s => s.session_id === targetSessionId)?.messages) || [] + const lastFew = existing.slice(-8).map(m => ({ role: m.role, content: m.content || '' })) + historyForSearch = [...lastFew, { role: 'user', content: userMsg.content }] } catch {} - // Decide on enrichment using the toggle - let enrichedPrompt = userMsg.content; - let citationSources = []; + let enrichedPrompt = userMsg.content + let citationSources = [] + const contextBlocks = [] + + if (chatLibrarySlug) { + try { + const resp = await fetch(`${ollamaApiUrl}/libraries/${chatLibrarySlug}/context`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + signal: requestController.signal, + body: JSON.stringify({ + prompt: userMsg.content, + top_k: 5 + }) + }) + const data = await resp.json() + if (data && typeof data.context_block === 'string' && data.context_block.trim()) { + contextBlocks.push(data.context_block.trim()) + } + if (Array.isArray(data?.sources)) { + citationSources.push(...data.sources) + } + } catch (error) { + if (isAbortError(error)) throw error + console.warn('local library enrichment failed', error) + } + } + if (webSearchEnabled) { try { const resp = await fetch(`${ollamaApiUrl}/websearch`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, + signal: requestController.signal, body: JSON.stringify({ prompt: userMsg.content, model, @@ -884,143 +1090,124 @@ async function sendMessage() { searx_url: searxUrl || null, engines: Array.isArray(searxEngines) ? searxEngines : null, }) - }); - const data = await resp.json(); - if (data && typeof data.enriched_prompt === 'string') { - enrichedPrompt = data.enriched_prompt; - citationSources = Array.isArray(data.sources) ? data.sources : []; + }) + const data = await resp.json() + if (data && typeof data.context_block === 'string' && data.context_block.trim()) { + contextBlocks.push(data.context_block.trim()) } - } catch (e) { - console.warn('web search enrichment failed', e); + if (Array.isArray(data?.sources)) { + citationSources.push(...data.sources) + } + } catch (error) { + if (isAbortError(error)) throw error + console.warn('web search enrichment failed', error) } } + citationSources = [...new Set(citationSources)] + if (contextBlocks.length > 0) { + enrichedPrompt = `${userMsg.content}\n\n${contextBlocks.join('\n\n')}` + } + if (streamOutput) { - const assistantMsgId = `msg-${Date.now()}-${Math.random()}`; - const assistantMsg = { role: 'assistant', content: '', id: assistantMsgId, sources: citationSources }; + const assistantMsgId = `msg-${Date.now()}-${Math.random()}` + let fullReply = '' + const assistantMsg = { role: 'assistant', content: '', id: assistantMsgId, sources: citationSources } setChatSessions(prevSessions => prevSessions.map(session => session.session_id === targetSessionId ? { ...session, messages: [...(session.messages || []), assistantMsg] } : session ) - ); + ) - (async () => { - try { - const res = await fetch(`${ollamaApiUrl}/chat`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - session_id: targetSessionId, - model, - message: userMsg.content, - enriched_message: webSearchEnabled ? enrichedPrompt : null, - stream: true, - sources: citationSources || [] - }) - }); + try { + const res = await fetch(`${ollamaApiUrl}/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + signal: requestController.signal, + body: JSON.stringify({ + session_id: targetSessionId, + model, + message: userMsg.content, + enriched_message: contextBlocks.length > 0 ? enrichedPrompt : null, + stream: true, + sources: citationSources || [] + }) + }) + if (!res.ok) throw new Error(`HTTP ${res.status}`) - const reader = res.body.getReader(); - const decoder = new TextDecoder(); - let fullReply = ''; - let pendingMarked = false; + const reader = res.body?.getReader() + if (!reader) throw new Error('Missing response body') - while (true) { - const { value, done } = await reader.read(); - if (done) { - setChatSessions(prevSessions => - prevSessions.map(session => - session.session_id === targetSessionId - ? { - ...session, - messages: session.messages.map(m => - m.id === assistantMsgId ? { ...m, content: fullReply } : m - ) - } - : session - ) - ); + const decoder = new TextDecoder() + let pendingMarked = false - if (activeSessionIdRef.current === targetSessionId) { - if (!userScrolledUpRef.current[targetSessionId]) { - requestAnimationFrame(() => scrollMessageToTop(assistantMsgId, 'smooth', targetSessionId)); - } else { - setNewMsgTip(prev => ({ ...prev, [targetSessionId]: assistantMsgId })); - } + while (true) { + const { value, done } = await reader.read() + if (done) { + setAssistantMessageContent(targetSessionId, assistantMsgId, fullReply) + + if (activeSessionIdRef.current === targetSessionId) { + if (!userScrolledUpRef.current[targetSessionId]) { + requestAnimationFrame(() => scrollMessageToTop(assistantMsgId, 'smooth', targetSessionId)) } else { - setPendingScrollToLastUser(prev => ({ ...prev, [targetSessionId]: assistantMsgId })); - setUnreadSessions(prev => [...new Set([...prev, targetSessionId])]); + setNewMsgTip(prev => ({ ...prev, [targetSessionId]: assistantMsgId })) } + } else { + setPendingScrollToLastUser(prev => ({ ...prev, [targetSessionId]: assistantMsgId })) + setUnreadSessions(prev => [...new Set([...prev, targetSessionId])]) + } - break; - } - const chunk = decoder.decode(value, { stream: true }); - fullReply += chunk; - setChatSessions(prevSessions => - prevSessions.map(session => - session.session_id === targetSessionId - ? { - ...session, - messages: session.messages.map(m => - m.id === assistantMsgId ? { ...m, content: fullReply } : m - ) - } - : session - ) - ); - - if ( - activeSessionIdRef.current === targetSessionId && - !userScrolledUpRef.current[targetSessionId] - ) { - scrollToBottom('auto', targetSessionId); - } - if (activeSessionIdRef.current !== targetSessionId && !pendingMarked) { - setPendingScrollToLastUser(prev => ({ ...prev, [targetSessionId]: assistantMsgId })); - pendingMarked = true; - } + break + } + + const chunk = decoder.decode(value, { stream: true }) + fullReply += chunk + setAssistantMessageContent(targetSessionId, assistantMsgId, fullReply) + + if (activeSessionIdRef.current === targetSessionId && !userScrolledUpRef.current[targetSessionId]) { + scrollToBottom('auto', targetSessionId) + } + if (activeSessionIdRef.current !== targetSessionId && !pendingMarked) { + setPendingScrollToLastUser(prev => ({ ...prev, [targetSessionId]: assistantMsgId })) + pendingMarked = true } - } catch (e) { - console.error('Failed to send message:', e); - const errorMsg = { - role: 'assistant', - content: 'Error: ' + e.message, - id: `msg-${Date.now()}-${Math.random()}`, - sources: citationSources - }; - setChatSessions(prevSessions => - prevSessions.map(session => - session.session_id === targetSessionId - ? { ...session, messages: [...session.messages.slice(0, -1), errorMsg] } - : session - ) - ); - } finally { - setIsSending(false); } - })(); + } catch (error) { + if (isAbortError(error)) { + setAssistantMessageContent(targetSessionId, assistantMsgId, fullReply, { removeIfEmpty: true }) + return + } + + console.error('Failed to send message:', error) + setAssistantMessageContent(targetSessionId, assistantMsgId, 'Error: ' + getErrorText(error), { removeIfEmpty: true }) + return + } } else { const res = await fetch(`${ollamaApiUrl}/chat`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, + signal: requestController.signal, body: JSON.stringify({ session_id: targetSessionId, model, message: userMsg.content, - enriched_message: webSearchEnabled ? enrichedPrompt : null, + enriched_message: contextBlocks.length > 0 ? enrichedPrompt : null, stream: false, sources: citationSources || [] }) - }); - const data = await res.json(); - const assistantMsgId = `msg-${Date.now()}`; + }) + if (!res.ok) throw new Error(`HTTP ${res.status}`) + + const data = await res.json() + const assistantMsgId = `msg-${Date.now()}` const assistantMsg = { role: 'assistant', content: data.reply, id: assistantMsgId, sources: citationSources - }; + } setChatSessions(prevSessions => prevSessions.map(session => @@ -1028,24 +1215,23 @@ async function sendMessage() { ? { ...session, messages: [...(session.messages || []), assistantMsg] } : session ) - ); + ) if (assistantMsgId) { if (activeSessionIdRef.current === targetSessionId) { if (!userScrolledUpRef.current[targetSessionId]) { - requestAnimationFrame(() => scrollMessageToTop(assistantMsgId, 'smooth', targetSessionId)); + requestAnimationFrame(() => scrollMessageToTop(assistantMsgId, 'smooth', targetSessionId)) } else { - setNewMsgTip(prev => ({ ...prev, [targetSessionId]: assistantMsgId })); + setNewMsgTip(prev => ({ ...prev, [targetSessionId]: assistantMsgId })) } } else { - setPendingScrollToLastUser(prev => ({ ...prev, [targetSessionId]: assistantMsgId })); + setPendingScrollToLastUser(prev => ({ ...prev, [targetSessionId]: assistantMsgId })) } } - setIsSending(false); } if (activeSessionIdRef.current !== targetSessionId) { - setUnreadSessions(prev => [...new Set([...prev, targetSessionId])]); + setUnreadSessions(prev => [...new Set([...prev, targetSessionId])]) } if (isNewChat) { @@ -1055,30 +1241,36 @@ async function sendMessage() { body: JSON.stringify({ session_id: targetSessionId, message: userMsg.content, - model: model + model }) }) .then(r => r.json()) .then(data => { - const sanitizedTitle = data.title.replace(/[\s\S]*?<\/think(?:ing)?>/i, '').trim(); + const sanitizedTitle = data.title.replace(/[\s\S]*?<\/think(?:ing)?>/i, '').trim() setChatSessions(prevSessions => prevSessions.map(session => session.session_id === targetSessionId ? { ...session, name: sanitizedTitle } : session ) - ); - }); + ) + }) } - } catch (e) { - console.error("Failed to send message:", e); - const errorMsg = { role: 'assistant', content: 'Error: ' + e.message, id: `msg-${Date.now()}-${Math.random()}` }; + } catch (error) { + if (isAbortError(error)) { + finishCancelableRequest(requestController) + return + } + + console.error('Failed to send message:', error) + const errorMsg = { role: 'assistant', content: 'Error: ' + getErrorText(error), id: `msg-${Date.now()}-${Math.random()}` } setChatSessions(prevSessions => prevSessions.map(session => session.session_id === targetSessionId ? { ...session, messages: [...session.messages, errorMsg] } : session ) - ); - setIsSending(false); + ) + } finally { + finishCancelableRequest(requestController) } } @@ -1267,7 +1459,20 @@ async function createNewChat() { )} {activeSidebarMode === 'dbs' && (
-
No databases yet.
+ {libraries.length === 0 ? ( +
No databases yet.
+ ) : ( + libraries.map(library => ( +
setActiveLibrarySlug(library.slug)} + > + {library.name} + {chatLibrarySlug === library.slug &&
Chat
} +
+ )) + )}
)} {activeSidebarMode === 'settings' && ( @@ -1299,7 +1504,51 @@ async function createNewChat() { )} {activeSidebarMode === 'dbs' && ( - + isCreatingLibrary ? ( +
+ setNewLibraryName(e.target.value)} + onKeyDown={(e) => { + if (e.key === 'Enter') { + createLibrary() + } else if (e.key === 'Escape') { + setIsCreatingLibrary(false) + setNewLibraryName('') + setLibraryCreateError('') + } + }} + placeholder="Database name" + autoFocus + /> + {libraryCreateError &&
{libraryCreateError}
} +
+ + +
+
+ ) : ( + + ) )}
)} @@ -1310,6 +1559,7 @@ async function createNewChat() { <>
Chat - {chatSessions.find(s => s.session_id === activeSessionId)?.name || 'New Chat'} + {chatLibrary && KB: {chatLibrary.name}}
@@ -1445,7 +1695,12 @@ async function createNewChat() { -
@@ -1453,10 +1708,32 @@ async function createNewChat() { )} {activeSidebarMode === 'dbs' && ( -
-

Databases

-

This is a placeholder for the database management view.

-
+ <> +
+ {activeLibrary?.name || 'Databases'} + {chatLibrary && Chat KB: {chatLibrary.name}} +
+ { + await refreshLibraries(); + await refreshLibraryJobs(); + }} + onToggleChatLibrary={setChatLibrarySlug} + onDeleted={(slug) => { + if (activeLibrarySlug === slug) { + const next = libraries.find(lib => lib.slug !== slug); + setActiveLibrarySlug(next?.slug || null); + } + if (chatLibrarySlug === slug) { + setChatLibrarySlug(null); + } + }} + /> + )} {activeSidebarMode === 'settings' && ( <> diff --git a/src/InterfaceSettings.jsx b/src/InterfaceSettings.jsx index 40d88f8..c45fc2d 100644 --- a/src/InterfaceSettings.jsx +++ b/src/InterfaceSettings.jsx @@ -1,27 +1,58 @@ -import React, { useState, useEffect } from 'react'; -import { colorSchemes, applyColorScheme } from './colorSchemes'; +import React, { useEffect, useState } from 'react' +import { colorSchemes, applyColorScheme } from './colorSchemes' -const COLOR_SCHEME_KEY = 'colorScheme'; +const COLOR_SCHEME_KEY = 'colorScheme' +const UI_SCALE_KEY = 'uiScale' +const DEFAULT_UI_SCALE = 1 +const MIN_UI_SCALE = 0.7 +const MAX_UI_SCALE = 1.3 +const UI_SCALE_STEP = 0.05 + +function normalizeUiScale(value) { + const numericValue = Number(value) + if (!Number.isFinite(numericValue)) { + return DEFAULT_UI_SCALE + } + + return Math.min(MAX_UI_SCALE, Math.max(MIN_UI_SCALE, Math.round(numericValue * 100) / 100)) +} export default function InterfaceSettings() { - const [selectedColorScheme, setSelectedColorScheme] = useState('Default'); + const [selectedColorScheme, setSelectedColorScheme] = useState('Default') + const [uiScale, setUiScale] = useState(DEFAULT_UI_SCALE) useEffect(() => { window.electronAPI.getSettings().then(settings => { - setSelectedColorScheme(settings.colorScheme); - applyColorScheme(settings.colorScheme); - }); - }, []); + const schemeName = settings.colorScheme || 'Default' + setSelectedColorScheme(schemeName) + setUiScale(normalizeUiScale(settings.uiScale)) + applyColorScheme(schemeName) + }) + }, []) useEffect(() => { - applyColorScheme(selectedColorScheme); - }, [selectedColorScheme]); + applyColorScheme(selectedColorScheme) + }, [selectedColorScheme]) - const handleColorSchemeChange = (e) => { - const newScheme = e.target.value; - setSelectedColorScheme(newScheme); - window.electronAPI.setSetting(COLOR_SCHEME_KEY, newScheme); - }; + const handleColorSchemeChange = (event) => { + const newScheme = event.target.value + setSelectedColorScheme(newScheme) + window.electronAPI.setSetting(COLOR_SCHEME_KEY, newScheme) + } + + const persistUiScale = (value) => { + const nextScale = normalizeUiScale(value) + setUiScale(nextScale) + window.electronAPI.setSetting(UI_SCALE_KEY, nextScale) + } + + const handleUiScaleChange = (event) => { + persistUiScale(event.target.value) + } + + const handleUiScaleReset = () => { + persistUiScale(DEFAULT_UI_SCALE) + } return (
@@ -39,6 +70,32 @@ export default function InterfaceSettings() { ))}
+
+

UI Scale

+
+ + {Math.round(uiScale * 100)}% + +
+

+ Scales the whole interface, including fonts, spacing, and controls. 100% is the default size. +

+
- ); + ) } diff --git a/src/LibraryManager.jsx b/src/LibraryManager.jsx new file mode 100644 index 0000000..412b5cb --- /dev/null +++ b/src/LibraryManager.jsx @@ -0,0 +1,274 @@ +import React, { useEffect, useState } from 'react' + +function statusLabel(job) { + if (!job) return null + const progress = typeof job.progress === 'number' ? `${job.progress.toFixed(0)}%` : null + const detail = job.detail ? ` ${job.detail}` : '' + return `${job.type} · ${job.status}${progress ? ` · ${progress}` : ''}${detail}` +} + +export default function LibraryManager({ + apiBase, + library, + jobs, + chatLibrarySlug, + onRefresh, + onToggleChatLibrary, + onDeleted +}) { + const [busy, setBusy] = useState(false) + const [isRenaming, setIsRenaming] = useState(false) + const [renameValue, setRenameValue] = useState('') + const [confirmDelete, setConfirmDelete] = useState(false) + const [errorMessage, setErrorMessage] = useState('') + + useEffect(() => { + setIsRenaming(false) + setRenameValue(library?.name || '') + setConfirmDelete(false) + setErrorMessage('') + }, [library?.slug, library?.name]) + + async function expectOk(response) { + if (response.ok) return response + const detail = await response.text() + throw new Error(detail || `HTTP ${response.status}`) + } + + async function runAction(fn) { + setBusy(true) + try { + setErrorMessage('') + await fn() + setConfirmDelete(false) + } finally { + setBusy(false) + await onRefresh() + } + } + + async function addPaths() { + if (!library) return + const paths = await window.electronAPI?.pickPaths?.() + if (!Array.isArray(paths) || paths.length === 0) return + try { + await runAction(async () => { + const response = await fetch(`${apiBase}/libraries/${library.slug}/files/register`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ paths }) + }) + await expectOk(response) + }) + } catch (error) { + setErrorMessage(String(error?.message || error)) + } + } + + async function removeFile(rel) { + if (!library) return + try { + await runAction(async () => { + const response = await fetch(`${apiBase}/libraries/${library.slug}/files`, { + method: 'DELETE', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ rel }) + }) + await expectOk(response) + }) + } catch (error) { + setErrorMessage(String(error?.message || error)) + } + } + + async function renameLibrary() { + if (!library) return + const name = renameValue.trim() + if (!name || name === library.name) { + setIsRenaming(false) + setRenameValue(library.name || '') + return + } + await runAction(async () => { + const response = await fetch(`${apiBase}/libraries/${library.slug}`, { + method: 'PATCH', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name }) + }) + await expectOk(response) + }) + setIsRenaming(false) + } + + async function deleteLibrary() { + if (!library) return + await runAction(async () => { + const response = await fetch(`${apiBase}/libraries/${library.slug}`, { method: 'DELETE' }) + await expectOk(response) + }) + onDeleted?.(library.slug) + } + + async function startJob(kind) { + if (!library) return + try { + await runAction(async () => { + const endpoint = `${apiBase}/libraries/${library.slug}/jobs/${kind}` + const options = { + method: 'POST', + headers: { 'Content-Type': 'application/json' } + } + if (kind === 'embed') { + options.body = JSON.stringify({}) + } + const response = await fetch(endpoint, options) + await expectOk(response) + }) + } catch (error) { + setErrorMessage(String(error?.message || error)) + } + } + + if (!library) { + return ( +
+

Create a database, add files or folders, then build and index it for local RAG.

+
+ ) + } + + const activeJobs = (jobs || []).filter(job => job.slug === library.slug && (job.status === 'queued' || job.status === 'running')) + const usingInChat = chatLibrarySlug === library.slug + const canStartRename = () => { + setRenameValue(library.name || '') + setErrorMessage('') + setIsRenaming(true) + setConfirmDelete(false) + } + + return ( +
+ {isRenaming && ( +
+ setRenameValue(e.target.value)} + onKeyDown={(e) => { + if (e.key === 'Enter') { + renameLibrary().catch((error) => setErrorMessage(String(error?.message || error))) + } else if (e.key === 'Escape') { + setIsRenaming(false) + setRenameValue(library.name || '') + } + }} + autoFocus + /> +
+ + +
+
+ )} + + {confirmDelete && ( +
+
Delete "{library.name}"? This removes the local index and metadata for this database.
+
+ + +
+
+ )} + + {errorMessage &&
{errorMessage}
} + +
+ + + + + + + +
+ +
+
Files: {library.files?.length || 0}
+
Corpus: {library.artifacts?.corpus_records || 0}
+
Enriched: {library.artifacts?.enhanced_records || 0}
+
Indexed
+
+ + {usingInChat && ( +
+ This database will be queried before each chat request and its context will be appended to the prompt. +
+ )} + + {activeJobs.length > 0 && ( +
+ {activeJobs.map(job => ( +
+ {statusLabel(job)} +
+ ))} +
+ )} + +
+

Files

+ {library.files?.length ? ( +
+ {library.files.map(file => ( +
+
+
{file.name || file.path}
+
{file.path}
+
+
+ + +
+
+ ))} +
+ ) : ( +

No files registered yet.

+ )} +
+
+ ) +} diff --git a/src/colorSchemes.js b/src/colorSchemes.js index 124617e..f881fe9 100644 --- a/src/colorSchemes.js +++ b/src/colorSchemes.js @@ -1,5 +1,18 @@ const colorSchemes = { - 'Nightsky': { + Default: { + '--bg': '#0b1020', + '--panel': '#141b34', + '--text': '#e6e8ef', + '--muted': '#9aa3b2', + '--accent': '#6ea8fe', + '--border': '#24304f', + '--input-bg': '#121933', + '--user-msg-bg': '#18213d', + '--assistant-msg-bg': '#10172d', + '--active-bg': 'rgba(110, 168, 254, 0.16)', + '--hover-bg': 'rgba(255, 255, 255, 0.06)', + }, + Nightsky: { '--bg': '#0a0e1a', '--panel': '#18203a', '--text': '#ffffff', @@ -12,7 +25,7 @@ const colorSchemes = { '--active-bg': 'rgba(74, 144, 226, 0.15)', '--hover-bg': 'rgba(255, 255, 255, 0.05)', }, - 'Grayscale': { + Grayscale: { '--bg': '#1a1a1a', '--panel': '#2a2a2a', '--text': '#f0f0f0', @@ -25,33 +38,33 @@ const colorSchemes = { '--active-bg': 'rgba(136, 136, 136, 0.15)', '--hover-bg': 'rgba(255, 255, 255, 0.05)', }, - 'Japan': { + Japan: { '--bg': '#ffffff', '--panel': '#f5f5f5', '--text': '#000000', '--muted': '#444444', - '--accent': '#e74c3c', /* Vibrant Red */ + '--accent': '#e74c3c', '--border': '#999999', '--input-bg': '#ffffff', '--user-msg-bg': '#f0f0f0', '--assistant-msg-bg': '#f0f0f0', - '--active-bg': 'rgba(231, 76, 60, 0.15)', /* Light red for active */ - '--hover-bg': 'rgba(231, 76, 60, 0.08)', /* Lighter red for hover */ + '--active-bg': 'rgba(231, 76, 60, 0.15)', + '--hover-bg': 'rgba(231, 76, 60, 0.08)', }, - 'Lime': { + Lime: { '--bg': '#f0fff0', '--panel': '#e0ffe0', '--text': '#1a1a1a', '--muted': '#72a272ff', - '--accent': '#deef88', + '--accent': '#8e9f38ff', '--border': '#a0c0a0', '--input-bg': '#ffffff', - '--user-msg-bg': '#f8f7ad', + '--user-msg-bg': '#f8f7adff', '--assistant-msg-bg': '#f5fff5', '--active-bg': 'rgba(104, 159, 56, 0.2)', '--hover-bg': 'rgba(104, 159, 56, 0.1)', }, - 'Vampire': { + Vampire: { '--bg': '#1a050a', '--panel': '#2a1015', '--text': '#ffefff', @@ -64,15 +77,80 @@ const colorSchemes = { '--active-bg': 'rgba(216, 27, 96, 0.15)', '--hover-bg': 'rgba(255, 255, 255, 0.05)', }, -}; + 'Sunset Drive': { + '--bg': '#1f1024', + '--panel': '#2e1632', + '--text': '#fff2ea', + '--muted': '#caa8b7', + '--accent': '#ff8a5b', + '--border': '#593050', + '--input-bg': '#26132a', + '--user-msg-bg': '#442038', + '--assistant-msg-bg': '#32172c', + '--active-bg': 'rgba(255, 138, 91, 0.18)', + '--hover-bg': 'rgba(255, 138, 91, 0.08)', + }, + 'Aurora Pulse': { + '--bg': '#07171d', + '--panel': '#102730', + '--text': '#eafcff', + '--muted': '#9bc8cf', + '--accent': '#54f2c2', + '--border': '#214853', + '--input-bg': '#0b2028', + '--user-msg-bg': '#12313d', + '--assistant-msg-bg': '#0f2530', + '--active-bg': 'rgba(84, 242, 194, 0.18)', + '--hover-bg': 'rgba(84, 242, 194, 0.08)', + }, + 'Sakura Neon': { + '--bg': '#160b1d', + '--panel': '#251331', + '--text': '#fff5fd', + '--muted': '#d4abc7', + '--accent': '#ff4fb6', + '--border': '#52315f', + '--input-bg': '#1d1027', + '--user-msg-bg': '#341844', + '--assistant-msg-bg': '#281534', + '--active-bg': 'rgba(255, 79, 182, 0.18)', + '--hover-bg': 'rgba(255, 79, 182, 0.09)', + }, + 'Cobalt Punch': { + '--bg': '#081527', + '--panel': '#102643', + '--text': '#eef6ff', + '--muted': '#9fb7d0', + '--accent': '#ffb703', + '--border': '#234164', + '--input-bg': '#0d1f37', + '--user-msg-bg': '#162f54', + '--assistant-msg-bg': '#102640', + '--active-bg': 'rgba(255, 183, 3, 0.18)', + '--hover-bg': 'rgba(255, 183, 3, 0.08)', + }, + 'Mango Mojito': { + '--bg': '#fff7ea', + '--panel': '#ffe9c8', + '--text': '#2a1c13', + '--muted': '#7c6150', + '--accent': '#ff6b35', + '--border': '#e6bf91', + '--input-bg': '#fffdf9', + '--user-msg-bg': '#fff0d7', + '--assistant-msg-bg': '#fff8ed', + '--active-bg': 'rgba(255, 107, 53, 0.14)', + '--hover-bg': 'rgba(255, 107, 53, 0.08)', + }, +} function applyColorScheme(schemeName) { - const scheme = colorSchemes[schemeName]; - if (scheme) { - for (const [key, value] of Object.entries(scheme)) { - document.documentElement.style.setProperty(key, value); - } + const scheme = colorSchemes[schemeName] || colorSchemes.Default + if (!scheme) return + + for (const [key, value] of Object.entries(scheme)) { + document.documentElement.style.setProperty(key, value) } } -export { colorSchemes, applyColorScheme }; +export { colorSchemes, applyColorScheme } diff --git a/src/main.jsx b/src/main.jsx index de4c1e0..8169910 100644 --- a/src/main.jsx +++ b/src/main.jsx @@ -9,9 +9,7 @@ import { applyColorScheme } from './colorSchemes' function Main() { useEffect(() => { window.electronAPI.getSettings().then(settings => { - if (settings.colorScheme) { - applyColorScheme(settings.colorScheme) - } + applyColorScheme(settings.colorScheme || 'Default') }) }, []) diff --git a/src/styles.css b/src/styles.css index c32cdca..455e1b4 100644 --- a/src/styles.css +++ b/src/styles.css @@ -245,6 +245,22 @@ body { background: var(--bg); color: var(--text); font-family: ui-sans-serif, sy background: var(--panel); } +.new-db-form, +.library-inline-form { + display: grid; + gap: 8px; +} + +.new-db-actions { + display: flex; + gap: 8px; +} + +.form-error { + color: #ff9aa8; + font-size: 12px; +} + .new-chat-button { width: 100%; padding: 10px; @@ -291,6 +307,13 @@ body { background: var(--bg); color: var(--text); font-family: ui-sans-serif, sy .select { min-width: 220px; } .button { cursor: pointer; } .button:hover { border-color: var(--accent); } +.button.ghost { background: transparent; } +.button.danger { border-color: #8f3d49; color: #ffb8c2; } +.button.danger:hover { border-color: #d86a79; } +.header-subtle { + color: var(--muted); + font-size: 13px; +} .chat { display: grid; @@ -510,6 +533,30 @@ textarea.input { min-width: unset; } +.setting-control-row { + display: flex; + align-items: center; + gap: 12px; + flex-wrap: wrap; +} + +.range-input { + width: min(360px, 100%); + accent-color: var(--accent); +} + +.setting-value { + min-width: 48px; + color: var(--text); + font-variant-numeric: tabular-nums; +} + +.setting-description { + margin: 10px 0 0; + color: var(--muted); + line-height: 1.5; +} + /* Markdown Styles */ .msg h1, .msg h2, .msg h3, .msg h4 { margin: 10px 0; @@ -972,3 +1019,113 @@ input:checked + .slider:before { white-space: nowrap; margin-top: 0.5rem; } + +.db-active-badge { + margin-left: 8px; + padding: 2px 8px; + border-radius: 999px; + background: color-mix(in srgb, var(--accent) 20%, transparent); + color: var(--accent); + font-size: 11px; +} + +.placeholder-view, +.library-panel { + overflow: auto; + padding: 20px; +} + +.placeholder-view h1 { + margin-top: 0; +} + +.library-toolbar { + display: flex; + flex-wrap: wrap; + gap: 10px; + margin-bottom: 18px; +} + +.library-states { + display: flex; + flex-wrap: wrap; + gap: 10px; + margin-bottom: 14px; +} + +.state-pill { + padding: 6px 10px; + border-radius: 999px; + border: 1px solid var(--border); + color: var(--muted); + font-size: 13px; +} + +.state-pill.ready { + color: var(--text); + border-color: color-mix(in srgb, var(--accent) 45%, var(--border)); +} + +.library-chat-note, +.job-card { + margin-bottom: 12px; + padding: 12px 14px; + border-radius: 12px; + background: color-mix(in srgb, var(--panel) 82%, black); + border: 1px solid var(--border); +} + +.library-inline-form { + margin-bottom: 14px; + padding: 12px 14px; + border-radius: 12px; + border: 1px solid var(--border); + background: color-mix(in srgb, var(--panel) 88%, black); +} + +.danger-zone { + border-color: #8f3d49; +} + +.library-files h2 { + margin: 18px 0 12px; + font-size: 16px; +} + +.library-file-list { + display: grid; + gap: 10px; +} + +.library-file-row { + display: flex; + justify-content: space-between; + gap: 14px; + align-items: flex-start; + padding: 12px 14px; + border-radius: 12px; + border: 1px solid var(--border); + background: color-mix(in srgb, var(--panel) 88%, black); +} + +.library-file-meta { + min-width: 0; +} + +.library-file-name { + font-weight: 600; + margin-bottom: 4px; +} + +.library-file-path, +.muted-copy { + color: var(--muted); + font-size: 13px; + word-break: break-word; +} + +.library-file-actions { + display: flex; + gap: 8px; + flex-shrink: 0; +} diff --git a/wheelcheck2117/pydantic-2.11.7-py3-none-any.whl b/wheelcheck2117/pydantic-2.11.7-py3-none-any.whl new file mode 100644 index 0000000..a88d95b Binary files /dev/null and b/wheelcheck2117/pydantic-2.11.7-py3-none-any.whl differ diff --git a/wheelcheck274/pydantic-2.7.4-py3-none-any.whl b/wheelcheck274/pydantic-2.7.4-py3-none-any.whl new file mode 100644 index 0000000..4e5638d Binary files /dev/null and b/wheelcheck274/pydantic-2.7.4-py3-none-any.whl differ