diff --git a/backend/local_rag.py b/backend/local_rag.py index cd7be08..4571270 100644 --- a/backend/local_rag.py +++ b/backend/local_rag.py @@ -769,6 +769,7 @@ def _run_prepare_pipeline(slug: str, on_progress=None, **opts): out=paths["corpus"], on_progress=build_progress, emit="per-file", + lang_detect=False, ) _mark_pipeline_stage(slug, "build", corpus_signature) states["has_corpus"] = True @@ -1087,6 +1088,7 @@ async def build_library(slug: str): root=stage_dir(slug), out=_collect_library_paths(slug)["corpus"], emit="per-file", + lang_detect=False, stage_signature=payload.get("corpus_signature"), ) return {"job_id": job_id} diff --git a/backend/rag/corpus_builder.py b/backend/rag/corpus_builder.py index c7ddcaf..3db99f5 100644 --- a/backend/rag/corpus_builder.py +++ b/backend/rag/corpus_builder.py @@ -267,6 +267,11 @@ try: except Exception: _ld_detect = None +_LANGID_RUNTIME_SAFE = ( + langid is not None and + not (sys.platform == "darwin" and sys.version_info >= (3, 13)) +) + # Progress try: from tqdm import tqdm @@ -501,11 +506,11 @@ def detect_language(text: str) -> Optional[str]: else: sample = text try: - if langid is not None: - lang, _ = langid.classify(sample) - return lang if _ld_detect is not None: return _ld_detect(sample) + if _LANGID_RUNTIME_SAFE: + lang, _ = langid.classify(sample) + return lang except Exception: pass return None