lang-quiz/gemini_replicate_batch.py

#!/usr/bin/env python3
"""
gemini_replicate_batch.py

Batch-generate <ID>.json files for Instagram reels using Replicate's
google/gemini-2.5-flash model with dynamic_thinking enabled.

Input:  data/**/<ID>.mp4        (any subfolder under data)
Output: data/**/<ID>.json       (parsed JSON, next to video)
        data/**/<ID>.raw.txt    (raw model output, next to video)

Usage:
  python3 gemini_replicate_batch.py --data data
  python3 gemini_replicate_batch.py --data data --only-missing
  python3 gemini_replicate_batch.py --data data --prompt-file prompt.txt
  python3 gemini_replicate_batch.py --data data --max-output-tokens 12000
"""

from __future__ import annotations

import argparse
import json
import os
import re
import sys
import time
from pathlib import Path
from typing import Any, Dict, Optional

import replicate


DEFAULT_PROMPT = r"""
You analyze an Instagram-style Japanese language video.
The video is either Japanese-only or English+Japanese.

Goal: Create a compact learning JSON for a custom quiz website.
The website will ask the user questions; if the user is wrong or taps "Don't know",
we will show the explanation from this JSON and the original IG media.

TOKEN BUDGET:
Be concise. Do not duplicate explanations inside questions.

STRICT RULES:
1) Do not invent. Only include words/phrases/grammar that clearly appear in the video.
2) For every Japanese string containing kanji, provide a full hiragana reading in a separate field "kana".
3) Keep it small:
   - grammar: max 2
   - vocab: max 10
   - conversation lines: max 10
   - key_phrases: max 10
4) NO timestamps. NO evidence_quote. NO source fields. NO confidence fields.
5) Questions MUST reference items by id (do not repeat long explanations in questions).
6) Provide 1–3 questions per item (depending on usefulness). Prefer: cloze, multiple choice, match, register-choice.

OUTPUT:
Return ONLY valid JSON (no markdown). UTF-8 Japanese.

SCHEMA:

{
  "meta": {
    "mode": "ja_only|en+ja",
    "type": "grammar|vocab|conversation|mixed|unknown",
    "title_en": "short title (5-8 words max)"
  },
  "items": {
    "grammar": [
      {
        "id": "g1",
        "pattern": "string",
        "meaning_en": "one line",
        "use_note_en": "1-2 lines max",
        "register": "polite|neutral|casual|slang|formal|unknown",
        "example": { "jp": "string", "kana": "string", "en": "string" }
      }
    ],
    "vocab": [
      {
        "id": "v1",
        "jp": "surface form",
        "kana": "hiragana reading",
        "meaning_en": "short",
        "register": "polite|neutral|casual|slang|formal|unknown",
        "note_en": "optional, 1 line max",
        "example": { "jp": "optional", "kana": "optional", "en": "optional" }
      }
    ],
    "conversation": [
      {
        "id": "c1",
        "jp": "exact line",
        "kana": "hiragana reading",
        "en": "translation",
        "register": "polite|neutral|casual|slang|mixed|unknown"
      }
    ],
    "key_phrases": [
      {
        "id": "k1",
        "jp": "phrase",
        "kana": "reading",
        "meaning_en": "short",
        "when_to_use_en": "1-2 lines max",
        "register": "polite|neutral|casual|slang|formal|unknown"
      }
    ]
  },
  "quiz": [
    {
      "id": "q1",
      "targets": ["k1"],
      "type": "mc_meaning|mc_register|cloze|match|choose_best_reply",
      "prompt_en": "string",
      "payload": {
        "sentence_jp": "optional",
        "sentence_kana": "optional",
        "blanked": "optional",
        "options": ["A","B","C","D"],
        "pairs": [{"left":"","right":""}]
      },
      "answer": {
        "correct_index": 0,
        "correct_text": "optional"
      }
    }
  ],
  "ui_hints": {
    "recommended_order": ["g1","k1","v1"],
    "show_first": "quiz",
    "explain_on_fail": true
  }
}

QUESTION GUIDELINES:
- For each grammar item: at least 1 cloze question + 1 meaning/usage question.
- For vocab/key_phrases: at least 1 meaning MC and optionally 1 register/situation question.
- For conversation lines: optionally “what does this mean” or “best reply”.
- Keep prompts short. Do not restate long explanations (shown after fail).

Return ONLY the JSON object. No markdown fences.
""".strip()


def load_env_files(paths: list[Path]) -> None:
    """
    Load simple KEY=VALUE pairs from one or more .env files without
    overriding existing environment variables.
    """
    seen = set()
    for path in paths:
        try:
            resolved = path.resolve()
        except FileNotFoundError:
            continue
        if resolved in seen or not resolved.exists():
            continue
        seen.add(resolved)

        for line in resolved.read_text(encoding="utf-8").splitlines():
            line = line.strip()
            if not line or line.startswith("#") or "=" not in line:
                continue

            key, value = line.split("=", 1)
            key = key.strip().removeprefix("export ").strip()
            value = value.strip()
            if value and len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
                value = value[1:-1]
            if key:
                os.environ.setdefault(key, value)


def strip_code_fences(text: str) -> str:
    # Remove ```json ... ``` or ``` ... ```
    text = text.strip()
    text = re.sub(r"^\s*```(?:json)?\s*", "", text, flags=re.IGNORECASE)
    text = re.sub(r"\s*```\s*$", "", text)
    return text.strip()


def extract_json_object(text: str) -> Dict[str, Any]:
    """
    Try to recover JSON if the model wrapped it with text or fences.
    """
    cleaned = strip_code_fences(text)

    # If it's already pure JSON:
    try:
        return json.loads(cleaned)
    except Exception:
        pass

    # Otherwise, take substring from first { to last }
    start = cleaned.find("{")
    end = cleaned.rfind("}")
    if start == -1 or end == -1 or end <= start:
        raise ValueError("Could not locate JSON object in model output.")

    snippet = cleaned[start : end + 1].strip()
    return json.loads(snippet)


def validate_minimal_schema(obj: Dict[str, Any]) -> None:
    # Minimal checks only (Gemini can be slightly variable)
    if not isinstance(obj, dict):
        raise ValueError("Top-level JSON is not an object.")
    for key in ("meta", "items", "quiz"):
        if key not in obj:
            raise ValueError(f"Missing required top-level key: {key}")
    if "title_en" not in obj["meta"]:
        raise ValueError("meta.title_en missing")
    if not isinstance(obj["quiz"], list):
        raise ValueError("quiz must be an array")


def run_gemini_on_video(
    video_path: Path,
    video_url: str,
    prompt: str,
    *,
    top_p: float,
    temperature: float,
    dynamic_thinking: bool,
    max_output_tokens: int,
    client: replicate.Client,
    prefer_wait_seconds: Optional[int] = None,
) -> str:
    """
    Calls Replicate model and returns raw text output.
    """
    inp = {
        "top_p": top_p,
        "temperature": temperature,
        "dynamic_thinking": dynamic_thinking,
        "max_output_tokens": max_output_tokens,
        "prompt": prompt,
        "images": [],
        "videos": [video_url],
    }

    try:
        out = client.run("google/gemini-2.5-flash", input=inp)
        if isinstance(out, str):
            return out
        if isinstance(out, list):
            return "".join(str(x) for x in out)
        return str(out)
    except Exception as e:
        raise RuntimeError(f"Replicate call failed for {video_path.name}: {e}") from e


def main() -> None:
    ap = argparse.ArgumentParser()
    ap.add_argument("--data", default="data", help="Data directory containing .mp4 files (default: data)")
    ap.add_argument("--prompt-file", default=None, help="Optional prompt.txt to override the default prompt")
    ap.add_argument("--only-missing", action="store_true", help="Only process videos without an existing .json")
    ap.add_argument("--overwrite", action="store_true", help="Overwrite existing .json outputs")
    ap.add_argument("--sleep", type=float, default=0.0, help="Sleep seconds between requests (default: 0)")
    ap.add_argument("--top-p", type=float, default=0.95)
    ap.add_argument("--temperature", type=float, default=0.7)
    ap.add_argument("--dynamic-thinking", action="store_true", default=True,
                    help="Enable dynamic_thinking (default: ON)")
    ap.add_argument("--max-output-tokens", type=int, default=12000,
                    help="Max output tokens (default: 12000; raise if you need bigger JSON)")
    ap.add_argument("--remote-base-url", default=None,
                    help="Base URL where the --data tree is mirrored (e.g., https://example.com/data)")
    args = ap.parse_args()

    script_dir = Path(__file__).resolve().parent
    load_env_files([Path.cwd() / ".env", script_dir / ".env"])

    token = os.environ.get("REPLICATE_API_TOKEN") or os.environ.get("REPLICATE_API_KEY")
    if token and not os.environ.get("REPLICATE_API_TOKEN"):
        os.environ["REPLICATE_API_TOKEN"] = token  # replicate library expects this name
    if not token:
        print("ERROR: REPLICATE_API_TOKEN not set.", file=sys.stderr)
        sys.exit(2)

    base_url = args.remote_base_url or os.environ.get("REMOTE_BASE_URL")
    if not base_url:
        print("ERROR: --remote-base-url or REMOTE_BASE_URL env var is required (public URL of mirrored data)", file=sys.stderr)
        sys.exit(2)
    base_url = base_url.rstrip("/")

    client = replicate.Client()

    data_dir = Path(args.data).expanduser().resolve()
    if not data_dir.exists():
        print(f"ERROR: data dir not found: {data_dir}", file=sys.stderr)
        sys.exit(2)

    prompt = DEFAULT_PROMPT
    if args.prompt_file:
        prompt_path = Path(args.prompt_file).expanduser().resolve()
        prompt = prompt_path.read_text(encoding="utf-8").strip()

    mp4s = sorted(data_dir.rglob("*.mp4"))
    if not mp4s:
        print(f"No .mp4 files found under {data_dir}")
        return

    print(f"Found {len(mp4s)} videos under {data_dir}")

    for video_path in mp4s:
        stem = video_path.stem
        out_json = video_path.with_suffix(".json")
        out_raw = video_path.with_suffix(".raw.txt")
        rel_video = video_path.relative_to(data_dir)
        video_url = f"{base_url}/{rel_video.as_posix()}"

        if out_json.exists() and args.only_missing:
            print(f"SKIP (exists): {rel_video}")
            continue
        if out_json.exists() and (not args.overwrite) and (not args.only_missing):
            print(f"SKIP (use --overwrite to replace): {rel_video}")
            continue

        # Quick size warning for local uploads
        size_mb = video_path.stat().st_size / (1024 * 1024)
        if size_mb > 150:
            print(f"WARNING: {video_path.name} is {size_mb:.1f}MB (>150MB). "
                  f"Downloads from the remote server may be slow.")

        print(f"RUN: {rel_video}")

        try:
            raw = run_gemini_on_video(
                video_path,
                video_url,
                prompt,
                top_p=args.top_p,
                temperature=args.temperature,
                dynamic_thinking=True,  # you asked for this explicitly
                max_output_tokens=args.max_output_tokens,
                client=client,
            )

            out_raw.write_text(raw, encoding="utf-8")

            obj = extract_json_object(raw)
            validate_minimal_schema(obj)

            out_json.write_text(json.dumps(obj, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
            print(f"OK  -> {out_json.relative_to(data_dir)}")

        except Exception as e:
            print(f"FAIL: {video_path.name}: {e}", file=sys.stderr)
            # keep raw if we got it
            if out_raw.exists():
                print(f"      Raw output saved: {out_raw.name}", file=sys.stderr)

        if args.sleep > 0:
            time.sleep(args.sleep)


if __name__ == "__main__":
    main()
-												auto-git:
 [add] README.md
 [add] client/.gitignore
 [add] client/README.md
 [add] client/eslint.config.js
 [add] client/index.html
 [add] client/package.json
 [add] client/public/vite.svg
 [add] client/src/App.css
 [add] client/src/App.tsx
 [add] client/src/api.ts
 [add] client/src/assets/react.svg
 [add] client/src/components/EntryCard.tsx
 [add] client/src/components/ItemPanels.tsx
 [add] client/src/components/QuizRunner.tsx
 [add] client/src/components/VideoPlayer.tsx
 [add] client/src/index.css
 [add] client/src/main.tsx
 [add] client/src/pages/EntryPage.tsx
 [add] client/src/pages/OverviewPage.tsx
 [add] client/src/pages/QuizPage.tsx
 [add] client/src/types.ts
 [add] client/tsconfig.app.json
 [add] client/tsconfig.json
 [add] client/tsconfig.node.json
 [add] client/vite.config.ts
 [add] gemini_replicate_batch.py
 [add] package.json
 [add] prompt.txt
 [add] server/package.json
 [add] server/src/index.ts
 [add] server/tsconfig.json

											
										
										
											2026-01-07 18:35:20 +01:00
+								#!/usr/bin/env python3
 								"""
 								gemini_replicate_batch.py
 								Batch-generate <ID>.json files for Instagram reels using Replicate's
 								google/gemini-2.5-flash model with dynamic_thinking enabled.
 								Input:  data/**/<ID>.mp4        (any subfolder under data)
 								Output: data/**/<ID>.json       (parsed JSON, next to video)
 								        data/**/<ID>.raw.txt    (raw model output, next to video)
 								Usage:
 								  python3 gemini_replicate_batch.py --data data
 								  python3 gemini_replicate_batch.py --data data --only-missing
 								  python3 gemini_replicate_batch.py --data data --prompt-file prompt.txt
 								  python3 gemini_replicate_batch.py --data data --max-output-tokens 12000
 								"""
 								from __future__ import annotations
 								import argparse
 								import json
 								import os
 								import re
 								import sys
 								import time
 								from pathlib import Path
 								from typing import Any, Dict, Optional
 								import replicate
 								DEFAULT_PROMPT = r"""
 								You analyze an Instagram-style Japanese language video.
 								The video is either Japanese-only or English+Japanese.
 								Goal: Create a compact learning JSON for a custom quiz website.
 								The website will ask the user questions; if the user is wrong or taps "Don't know",
 								we will show the explanation from this JSON and the original IG media.
 								TOKEN BUDGET:
 								Be concise. Do not duplicate explanations inside questions.
 								STRICT RULES:
 ) Do not invent. Only include words/phrases/grammar that clearly appear in the video.
 ) For every Japanese string containing kanji, provide a full hiragana reading in a separate field "kana".
 ) Keep it small:
 								   - grammar: max 2
 								   - vocab: max 10
 								   - conversation lines: max 10
 								   - key_phrases: max 10
 ) NO timestamps. NO evidence_quote. NO source fields. NO confidence fields.
 ) Questions MUST reference items by id (do not repeat long explanations in questions).
 ) Provide 1–3 questions per item (depending on usefulness). Prefer: cloze, multiple choice, match, register-choice.
 								OUTPUT:
 								Return ONLY valid JSON (no markdown). UTF-8 Japanese.
 								SCHEMA:
 								{
 								  "meta": {
 								    "mode": "ja_only|en+ja",
 								    "type": "grammar|vocab|conversation|mixed|unknown",
 								    "title_en": "short title (5-8 words max)"
 								  },
 								  "items": {
 								    "grammar": [
 								      {
 								        "id": "g1",
 								        "pattern": "string",
 								        "meaning_en": "one line",
 								        "use_note_en": "1-2 lines max",
 								        "register": "polite|neutral|casual|slang|formal|unknown",
 								        "example": { "jp": "string", "kana": "string", "en": "string" }
 								      }
 								    ],
 								    "vocab": [
 								      {
 								        "id": "v1",
 								        "jp": "surface form",
 								        "kana": "hiragana reading",
 								        "meaning_en": "short",
 								        "register": "polite|neutral|casual|slang|formal|unknown",
 								        "note_en": "optional, 1 line max",
 								        "example": { "jp": "optional", "kana": "optional", "en": "optional" }
 								      }
 								    ],
 								    "conversation": [
 								      {
 								        "id": "c1",
 								        "jp": "exact line",
 								        "kana": "hiragana reading",
 								        "en": "translation",
 								        "register": "polite|neutral|casual|slang|mixed|unknown"
 								      }
 								    ],
 								    "key_phrases": [
 								      {
 								        "id": "k1",
 								        "jp": "phrase",
 								        "kana": "reading",
 								        "meaning_en": "short",
 								        "when_to_use_en": "1-2 lines max",
 								        "register": "polite|neutral|casual|slang|formal|unknown"
 								      }
 								    ]
 								  },
 								  "quiz": [
 								    {
 								      "id": "q1",
 								      "targets": ["k1"],
 								      "type": "mc_meaning|mc_register|cloze|match|choose_best_reply",
 								      "prompt_en": "string",
 								      "payload": {
 								        "sentence_jp": "optional",
 								        "sentence_kana": "optional",
 								        "blanked": "optional",
 								        "options": ["A","B","C","D"],
 								        "pairs": [{"left":"","right":""}]
 								      },
 								      "answer": {
 								        "correct_index": 0,
 								        "correct_text": "optional"
 								      }
 								    }
 								  ],
 								  "ui_hints": {
 								    "recommended_order": ["g1","k1","v1"],
 								    "show_first": "quiz",
 								    "explain_on_fail": true
 								  }
 								}
 								QUESTION GUIDELINES:
 								- For each grammar item: at least 1 cloze question + 1 meaning/usage question.
 								- For vocab/key_phrases: at least 1 meaning MC and optionally 1 register/situation question.
 								- For conversation lines: optionally “what does this mean” or “best reply”.
 								- Keep prompts short. Do not restate long explanations (shown after fail).
 								Return ONLY the JSON object. No markdown fences.
 								""".strip()
 								def load_env_files(paths: list[Path]) -> None:
 								    """
 								    Load simple KEY=VALUE pairs from one or more .env files without
 								    overriding existing environment variables.
 								    """
 								    seen = set()
 								    for path in paths:
 								        try:
 								            resolved = path.resolve()
 								        except FileNotFoundError:
 								            continue
 								        if resolved in seen or not resolved.exists():
 								            continue
 								        seen.add(resolved)
 								        for line in resolved.read_text(encoding="utf-8").splitlines():
 								            line = line.strip()
 								            if not line or line.startswith("#") or "=" not in line:
 								                continue
 								            key, value = line.split("=", 1)
 								            key = key.strip().removeprefix("export ").strip()
 								            value = value.strip()
 								            if value and len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
 								                value = value[1:-1]
 								            if key:
 								                os.environ.setdefault(key, value)
 								def strip_code_fences(text: str) -> str:
 								    # Remove ```json ... ``` or ``` ... ```
 								    text = text.strip()
 								    text = re.sub(r"^\s*```(?:json)?\s*", "", text, flags=re.IGNORECASE)
 								    text = re.sub(r"\s*```\s*$", "", text)
 								    return text.strip()
 								def extract_json_object(text: str) -> Dict[str, Any]:
 								    """
 								    Try to recover JSON if the model wrapped it with text or fences.
 								    """
 								    cleaned = strip_code_fences(text)
 								    # If it's already pure JSON:
 								    try:
 								        return json.loads(cleaned)
 								    except Exception:
 								        pass
 								    # Otherwise, take substring from first { to last }
 								    start = cleaned.find("{")
 								    end = cleaned.rfind("}")
 								    if start == -1 or end == -1 or end <= start:
 								        raise ValueError("Could not locate JSON object in model output.")
 								    snippet = cleaned[start : end + 1].strip()
 								    return json.loads(snippet)
 								def validate_minimal_schema(obj: Dict[str, Any]) -> None:
 								    # Minimal checks only (Gemini can be slightly variable)
 								    if not isinstance(obj, dict):
 								        raise ValueError("Top-level JSON is not an object.")
 								    for key in ("meta", "items", "quiz"):
 								        if key not in obj:
 								            raise ValueError(f"Missing required top-level key: {key}")
 								    if "title_en" not in obj["meta"]:
 								        raise ValueError("meta.title_en missing")
 								    if not isinstance(obj["quiz"], list):
 								        raise ValueError("quiz must be an array")
 								def run_gemini_on_video(
 								    video_path: Path,
 								    video_url: str,
 								    prompt: str,
 								    *,
 								    top_p: float,
 								    temperature: float,
 								    dynamic_thinking: bool,
 								    max_output_tokens: int,
 								    client: replicate.Client,
 								    prefer_wait_seconds: Optional[int] = None,
 								) -> str:
 								    """
 								    Calls Replicate model and returns raw text output.
 								    """
 								    inp = {
 								        "top_p": top_p,
 								        "temperature": temperature,
 								        "dynamic_thinking": dynamic_thinking,
 								        "max_output_tokens": max_output_tokens,
 								        "prompt": prompt,
 								        "images": [],
 								        "videos": [video_url],
 								    }
 								    try:
 								        out = client.run("google/gemini-2.5-flash", input=inp)
 								        if isinstance(out, str):
 								            return out
 								        if isinstance(out, list):
 								            return "".join(str(x) for x in out)
 								        return str(out)
 								    except Exception as e:
 								        raise RuntimeError(f"Replicate call failed for {video_path.name}: {e}") from e
 								def main() -> None:
 								    ap = argparse.ArgumentParser()
 								    ap.add_argument("--data", default="data", help="Data directory containing .mp4 files (default: data)")
 								    ap.add_argument("--prompt-file", default=None, help="Optional prompt.txt to override the default prompt")
 								    ap.add_argument("--only-missing", action="store_true", help="Only process videos without an existing .json")
 								    ap.add_argument("--overwrite", action="store_true", help="Overwrite existing .json outputs")
 								    ap.add_argument("--sleep", type=float, default=0.0, help="Sleep seconds between requests (default: 0)")
 								    ap.add_argument("--top-p", type=float, default=0.95)
 								    ap.add_argument("--temperature", type=float, default=0.7)
 								    ap.add_argument("--dynamic-thinking", action="store_true", default=True,
 								                    help="Enable dynamic_thinking (default: ON)")
 								    ap.add_argument("--max-output-tokens", type=int, default=12000,
 								                    help="Max output tokens (default: 12000; raise if you need bigger JSON)")
 								    ap.add_argument("--remote-base-url", default=None,
 								                    help="Base URL where the --data tree is mirrored (e.g., https://example.com/data)")
 								    args = ap.parse_args()
 								    script_dir = Path(__file__).resolve().parent
 								    load_env_files([Path.cwd() / ".env", script_dir / ".env"])
 								    token = os.environ.get("REPLICATE_API_TOKEN") or os.environ.get("REPLICATE_API_KEY")
 								    if token and not os.environ.get("REPLICATE_API_TOKEN"):
 								        os.environ["REPLICATE_API_TOKEN"] = token  # replicate library expects this name
 								    if not token:
 								        print("ERROR: REPLICATE_API_TOKEN not set.", file=sys.stderr)
 								        sys.exit(2)
 								    base_url = args.remote_base_url or os.environ.get("REMOTE_BASE_URL")
 								    if not base_url:
 								        print("ERROR: --remote-base-url or REMOTE_BASE_URL env var is required (public URL of mirrored data)", file=sys.stderr)
 								        sys.exit(2)
 								    base_url = base_url.rstrip("/")
 								    client = replicate.Client()
 								    data_dir = Path(args.data).expanduser().resolve()
 								    if not data_dir.exists():
 								        print(f"ERROR: data dir not found: {data_dir}", file=sys.stderr)
 								        sys.exit(2)
 								    prompt = DEFAULT_PROMPT
 								    if args.prompt_file:
 								        prompt_path = Path(args.prompt_file).expanduser().resolve()
 								        prompt = prompt_path.read_text(encoding="utf-8").strip()
 								    mp4s = sorted(data_dir.rglob("*.mp4"))
 								    if not mp4s:
 								        print(f"No .mp4 files found under {data_dir}")
 								        return
 								    print(f"Found {len(mp4s)} videos under {data_dir}")
 								    for video_path in mp4s:
 								        stem = video_path.stem
 								        out_json = video_path.with_suffix(".json")
 								        out_raw = video_path.with_suffix(".raw.txt")
 								        rel_video = video_path.relative_to(data_dir)
 								        video_url = f"{base_url}/{rel_video.as_posix()}"
 								        if out_json.exists() and args.only_missing:
 								            print(f"SKIP (exists): {rel_video}")
 								            continue
 								        if out_json.exists() and (not args.overwrite) and (not args.only_missing):
 								            print(f"SKIP (use --overwrite to replace): {rel_video}")
 								            continue
 								        # Quick size warning for local uploads
 								        size_mb = video_path.stat().st_size / (1024 * 1024)
 								        if size_mb > 150:
 								            print(f"WARNING: {video_path.name} is {size_mb:.1f}MB (>150MB). "
 								                  f"Downloads from the remote server may be slow.")
 								        print(f"RUN: {rel_video}")
 								        try:
 								            raw = run_gemini_on_video(
 								                video_path,
 								                video_url,
 								                prompt,
 								                top_p=args.top_p,
 								                temperature=args.temperature,
 								                dynamic_thinking=True,  # you asked for this explicitly
 								                max_output_tokens=args.max_output_tokens,
 								                client=client,
 								            )
 								            out_raw.write_text(raw, encoding="utf-8")
 								            obj = extract_json_object(raw)
 								            validate_minimal_schema(obj)
 								            out_json.write_text(json.dumps(obj, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
 								            print(f"OK  -> {out_json.relative_to(data_dir)}")
 								        except Exception as e:
 								            print(f"FAIL: {video_path.name}: {e}", file=sys.stderr)
 								            # keep raw if we got it
 								            if out_raw.exists():
 								                print(f"      Raw output saved: {out_raw.name}", file=sys.stderr)
 								        if args.sleep > 0:
 								            time.sleep(args.sleep)
 								if __name__ == "__main__":
 								    main()