diff --git a/backend/local_rag.py b/backend/local_rag.py index ab5def3..81581dc 100644 --- a/backend/local_rag.py +++ b/backend/local_rag.py @@ -25,6 +25,14 @@ router = APIRouter(tags=["local-rag"]) LIB_ROOT = Path(__file__).parent / "libraries" LIB_ROOT.mkdir(parents=True, exist_ok=True) +RAW_CORPUS_PROFILE = "per-file-default-v1" +PREPARE_PROFILE = "selective-enrich-v1" +DEFAULT_EMBED_MODEL = "dengcao/Qwen3-Embedding-0.6B:F16" +DEFAULT_ENRICH_MODEL = "qwen3:4b" +DEFAULT_ENRICH_MIN_CHARS = 240 +DEFAULT_ENRICH_MAX_TEXT = 6000 +DEFAULT_ENRICH_CONCURRENCY = max(1, min(4, (os.cpu_count() or 4) // 2)) + JOB_EXECUTOR = ThreadPoolExecutor(max_workers=2) JOBS: Dict[str, Dict[str, Any]] = {} LIB_LOCKS: Dict[str, asyncio.Lock] = {} @@ -46,8 +54,13 @@ class RemoveFileRequest(BaseModel): rel: str +class UpdateFileEnrichmentRequest(BaseModel): + rel: str + enabled: bool + + class EmbedLibraryRequest(BaseModel): - embed_model: str = "dengcao/Qwen3-Embedding-0.6B:F16" + embed_model: str = DEFAULT_EMBED_MODEL ollama: str = "http://localhost:11434" target_chars: int = 2000 overlap_chars: int = 200 @@ -58,7 +71,7 @@ class LibraryContextRequest(BaseModel): prompt: str top_k: int = 5 ollama: str = "http://localhost:11434" - embed_model: str = "dengcao/Qwen3-Embedding-0.6B:F16" + embed_model: str = DEFAULT_EMBED_MODEL gen_model: str = "qwen3:4b"