diff --git a/README.md b/README.md new file mode 100644 index 0000000..b5ea13f --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +# IG Japanese Quizzer + +A full-stack web app for drilling Japanese grammar, vocabulary, and phrases from locally stored Instagram posts. The server indexes every `*.mp4` + matching `*.json` pair under `data/` and exposes them to a React/Vite frontend with a quiz wizard. + +## Quick start + +1. Install dependencies (root workspace): + ```bash + npm install + ``` +2. Run both servers (frontend on 5173, backend on 5174): + ```bash + npm run dev + ``` + - Vite proxies `/api` and `/data` to the Express server, so the client can use relative URLs. +3. Open the app at http://localhost:5173. + +### Scripts +- `npm run dev` – concurrently runs `server` (Express + ts-node-dev) and `client` (Vite dev server). +- `npm run build` – builds the server TypeScript and Vite client. +- `npm run dev --workspace server` / `npm run dev --workspace client` – run either side individually. + +## Data layout + +Files live under `data/` (scanned recursively): +``` +data//.mp4 +\_ .json # quiz payload for that video +``` +- Base filenames must match. Extra sidecars (`.raw.txt`, `.mp4.json`, images) are ignored. +- The JSON structure is tolerant but expects keys: `meta`, `items`, `quiz`, `ui_hints` (additional fields are ignored). +- `entry_id` is the path from `data/` to the mp4 **without** the `.mp4` extension (e.g., `C1abc/12345`). It is URL-encoded in routes/query params. +- Videos are served statically at `/data/...` by the backend; JSON is only accessible through the API. + +## API +- `GET /api/entries` → list of entries `{ id, title, mode, type, counts, video_url }`, sorted by title. +- `GET /api/entry?id=` → full entry JSON plus derived fields `{ id, title, video_url, counts }`. + +## Frontend features +- Overview grid of all entries with counts and metadata. +- Entry detail page with embedded video and learning panels. +- Quiz Wizard with three modes: + - All entries (random 10 questions) + - Selected entries (checkbox picker) + - Single entry (linked from detail page) +- Quiz types: cloze input, multiple-choice variants, match pairs, and best reply. Wrong answers reveal explanations and the source video. + +## Notes +- The server prevents path traversal by validating resolved paths against the data root and only serving scanned entries. +- Update or add new posts by dropping files into `data/` and restarting the server to rescan. diff --git a/client/.gitignore b/client/.gitignore new file mode 100644 index 0000000..a547bf3 --- /dev/null +++ b/client/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/client/README.md b/client/README.md new file mode 100644 index 0000000..d2e7761 --- /dev/null +++ b/client/README.md @@ -0,0 +1,73 @@ +# React + TypeScript + Vite + +This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules. + +Currently, two official plugins are available: + +- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) (or [oxc](https://oxc.rs) when used in [rolldown-vite](https://vite.dev/guide/rolldown)) for Fast Refresh +- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh + +## React Compiler + +The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation). + +## Expanding the ESLint configuration + +If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules: + +```js +export default defineConfig([ + globalIgnores(['dist']), + { + files: ['**/*.{ts,tsx}'], + extends: [ + // Other configs... + + // Remove tseslint.configs.recommended and replace with this + tseslint.configs.recommendedTypeChecked, + // Alternatively, use this for stricter rules + tseslint.configs.strictTypeChecked, + // Optionally, add this for stylistic rules + tseslint.configs.stylisticTypeChecked, + + // Other configs... + ], + languageOptions: { + parserOptions: { + project: ['./tsconfig.node.json', './tsconfig.app.json'], + tsconfigRootDir: import.meta.dirname, + }, + // other options... + }, + }, +]) +``` + +You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules: + +```js +// eslint.config.js +import reactX from 'eslint-plugin-react-x' +import reactDom from 'eslint-plugin-react-dom' + +export default defineConfig([ + globalIgnores(['dist']), + { + files: ['**/*.{ts,tsx}'], + extends: [ + // Other configs... + // Enable lint rules for React + reactX.configs['recommended-typescript'], + // Enable lint rules for React DOM + reactDom.configs.recommended, + ], + languageOptions: { + parserOptions: { + project: ['./tsconfig.node.json', './tsconfig.app.json'], + tsconfigRootDir: import.meta.dirname, + }, + // other options... + }, + }, +]) +``` diff --git a/client/eslint.config.js b/client/eslint.config.js new file mode 100644 index 0000000..5e6b472 --- /dev/null +++ b/client/eslint.config.js @@ -0,0 +1,23 @@ +import js from '@eslint/js' +import globals from 'globals' +import reactHooks from 'eslint-plugin-react-hooks' +import reactRefresh from 'eslint-plugin-react-refresh' +import tseslint from 'typescript-eslint' +import { defineConfig, globalIgnores } from 'eslint/config' + +export default defineConfig([ + globalIgnores(['dist']), + { + files: ['**/*.{ts,tsx}'], + extends: [ + js.configs.recommended, + tseslint.configs.recommended, + reactHooks.configs.flat.recommended, + reactRefresh.configs.vite, + ], + languageOptions: { + ecmaVersion: 2020, + globals: globals.browser, + }, + }, +]) diff --git a/client/index.html b/client/index.html new file mode 100644 index 0000000..a16cb6b --- /dev/null +++ b/client/index.html @@ -0,0 +1,13 @@ + + + + + + + client + + +
+ + + diff --git a/client/package.json b/client/package.json new file mode 100644 index 0000000..b0bbbd7 --- /dev/null +++ b/client/package.json @@ -0,0 +1,31 @@ +{ + "name": "client", + "private": true, + "version": "0.0.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc -b && vite build", + "lint": "eslint .", + "preview": "vite preview" + }, + "dependencies": { + "react": "^19.2.0", + "react-dom": "^19.2.0", + "react-router-dom": "^6.28.0" + }, + "devDependencies": { + "@eslint/js": "^9.39.1", + "@types/node": "^24.10.1", + "@types/react": "^19.2.5", + "@types/react-dom": "^19.2.3", + "@vitejs/plugin-react": "^5.1.1", + "eslint": "^9.39.1", + "eslint-plugin-react-hooks": "^7.0.1", + "eslint-plugin-react-refresh": "^0.4.24", + "globals": "^16.5.0", + "typescript": "~5.9.3", + "typescript-eslint": "^8.46.4", + "vite": "^7.2.4" + } +} diff --git a/client/public/vite.svg b/client/public/vite.svg new file mode 100644 index 0000000..e7b8dfb --- /dev/null +++ b/client/public/vite.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/client/src/App.css b/client/src/App.css new file mode 100644 index 0000000..d6e24d3 --- /dev/null +++ b/client/src/App.css @@ -0,0 +1,45 @@ +.app-shell { + min-height: 100vh; +} + +.topbar { + display: flex; + align-items: center; + justify-content: space-between; + padding: 1.25rem 0; + position: sticky; + top: 0; + z-index: 10; + backdrop-filter: blur(12px); +} + +.brand { + font-weight: 800; + letter-spacing: -0.01em; + font-size: 1.1rem; + text-decoration: none; +} + +nav { + display: flex; + gap: 0.5rem; +} + +.link { + padding: 0.55rem 0.85rem; + border-radius: 12px; + text-decoration: none; + color: inherit; + font-weight: 600; +} + +.link.active { + background: rgba(0, 0, 0, 0.06); + color: var(--accent); +} + +.main { + max-width: 1080px; + margin: 0 auto; + padding: 0 1rem 3rem; +} diff --git a/client/src/App.tsx b/client/src/App.tsx new file mode 100644 index 0000000..c4d2e09 --- /dev/null +++ b/client/src/App.tsx @@ -0,0 +1,33 @@ +import { NavLink, Route, Routes } from 'react-router-dom'; +import OverviewPage from './pages/OverviewPage'; +import EntryPage from './pages/EntryPage'; +import QuizPage from './pages/QuizPage'; +import './App.css'; + +export default function App() { + return ( +
+
+ + IG Japanese Quizzer + + +
+ +
+ + } /> + } /> + } /> + +
+
+ ); +} diff --git a/client/src/api.ts b/client/src/api.ts new file mode 100644 index 0000000..4ccba8f --- /dev/null +++ b/client/src/api.ts @@ -0,0 +1,57 @@ +import type { EntryDetail, EntryItems, EntrySummary } from './types'; + +const entryCache = new Map(); + +function normalizeItems(items: Partial | undefined): EntryItems { + return { + grammar: Array.isArray(items?.grammar) ? items.grammar : [], + vocab: Array.isArray(items?.vocab) ? items.vocab : [], + conversation: Array.isArray(items?.conversation) ? items.conversation : [], + key_phrases: Array.isArray(items?.key_phrases) ? items.key_phrases : [], + }; +} + +function normalizeDetail(payload: any): EntryDetail { + const items = normalizeItems(payload?.items); + const counts = payload?.counts || { + grammar: items.grammar.length, + vocab: items.vocab.length, + key_phrases: items.key_phrases.length, + conversation: items.conversation.length, + quiz: Array.isArray(payload?.quiz) ? payload.quiz.length : 0, + }; + + return { + id: String(payload?.id ?? ''), + title: payload?.title || payload?.meta?.title_en || String(payload?.id ?? 'Untitled'), + meta: payload?.meta || {}, + items, + quiz: Array.isArray(payload?.quiz) ? payload.quiz : [], + ui_hints: payload?.ui_hints || {}, + video_url: payload?.video_url || '', + counts, + }; +} + +export async function fetchEntries(): Promise { + const response = await fetch('/api/entries'); + if (!response.ok) { + throw new Error('Failed to load entries'); + } + return response.json(); +} + +export async function fetchEntry(id: string): Promise { + if (entryCache.has(id)) { + return entryCache.get(id)!; + } + + const response = await fetch(`/api/entry?id=${encodeURIComponent(id)}`); + if (!response.ok) { + throw new Error('Failed to load entry'); + } + const payload = await response.json(); + const detail = normalizeDetail(payload); + entryCache.set(id, detail); + return detail; +} diff --git a/client/src/assets/react.svg b/client/src/assets/react.svg new file mode 100644 index 0000000..6c87de9 --- /dev/null +++ b/client/src/assets/react.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/client/src/components/EntryCard.tsx b/client/src/components/EntryCard.tsx new file mode 100644 index 0000000..1accc87 --- /dev/null +++ b/client/src/components/EntryCard.tsx @@ -0,0 +1,26 @@ +import { Link } from 'react-router-dom'; +import type { EntrySummary } from '../types'; + +interface Props { + entry: EntrySummary; +} + +export default function EntryCard({ entry }: Props) { + const { counts } = entry; + return ( + +
{entry.title}
+
+ {entry.mode || 'mixed'} + {entry.type && {entry.type}} +
+
+ Grammar {counts.grammar} + Vocab {counts.vocab} + Phrases {counts.key_phrases} + Conversation {counts.conversation} + Quiz {counts.quiz} +
+ + ); +} diff --git a/client/src/components/ItemPanels.tsx b/client/src/components/ItemPanels.tsx new file mode 100644 index 0000000..961ad05 --- /dev/null +++ b/client/src/components/ItemPanels.tsx @@ -0,0 +1,146 @@ +import type { ConversationItem, GrammarItem, KeyPhraseItem, VocabItem } from '../types'; + +interface ItemProps { + items?: T[]; +} + +const renderExample = (item: any) => { + const example = item.example || {}; + const hasExample = example.jp || example.en || example.kana || item.example_jp || item.example_en || item.example_kana; + if (!hasExample) return null; + return ( +
+ Example +
+ {example.jp || item.example_jp} + {example.kana || item.example_kana ?
{example.kana || item.example_kana}
: null} + {example.en || item.example_en ?
{example.en || item.example_en}
: null} +
+
+ ); +}; + +const renderNote = (item: any) => { + const note = item.use_note_en || item.note_en || item.when_to_use_en; + if (!note) return null; + return ( +
+ Usage +
{note}
+
+ ); +}; + +const renderRegister = (item: any) => { + if (!item.register) return null; + return ( +
+ Register +
{item.register}
+
+ ); +}; + +export function GrammarPanel({ items }: ItemProps) { + if (!items?.length) return null; + return ( +
+
+

Grammar

+
+
+ {items.map((item) => ( +
+
{item.pattern || item.jp || item.id}
+
+ {(item.meaning_en || item.meaning) &&
{item.meaning_en || item.meaning}
} + {renderRegister(item)} + {renderNote(item)} + {renderExample(item)} +
+
+ ))} +
+
+ ); +} + +export function VocabPanel({ items }: ItemProps) { + if (!items?.length) return null; + return ( +
+
+

Vocabulary

+
+
+ {items.map((item) => ( +
+
{item.jp || item.id}
+ {(item.kana || item.meaning_en || item.meaning) && ( +
+ {item.kana &&
{item.kana}
} + {(item.meaning_en || item.meaning) &&
{item.meaning_en || item.meaning}
} +
+ )} + {renderRegister(item)} + {renderNote(item)} + {renderExample(item)} +
+ ))} +
+
+ ); +} + +export function KeyPhrasePanel({ items }: ItemProps) { + if (!items?.length) return null; + return ( +
+
+

Key Phrases

+
+
+ {items.map((item) => ( +
+
{item.jp || item.id}
+ {(item.kana || item.meaning_en || item.meaning) && ( +
+ {item.kana &&
{item.kana}
} + {(item.meaning_en || item.meaning) &&
{item.meaning_en || item.meaning}
} +
+ )} + {renderRegister(item)} + {renderNote(item)} + {renderExample(item)} +
+ ))} +
+
+ ); +} + +export function ConversationPanel({ items }: ItemProps) { + if (!items?.length) return null; + return ( +
+
+

Conversation

+
+
+ {items.map((item) => ( +
+
{item.jp || item.id}
+ {(item.kana || item.en) && ( +
+ {item.kana &&
{item.kana}
} + {item.en &&
{item.en}
} +
+ )} + {renderRegister(item)} + {renderNote(item)} +
+ ))} +
+
+ ); +} diff --git a/client/src/components/QuizRunner.tsx b/client/src/components/QuizRunner.tsx new file mode 100644 index 0000000..cf50423 --- /dev/null +++ b/client/src/components/QuizRunner.tsx @@ -0,0 +1,463 @@ +import { useEffect, useMemo, useState } from 'react'; +import { fetchEntries, fetchEntry } from '../api'; +import type { EntryItems, EntrySummary, QuizQuestionWithEntry } from '../types'; +import VideoPlayer from './VideoPlayer'; + +type Mode = 'all' | 'selected' | 'single'; + +const TOTAL_QUESTIONS = 10; + +interface QuizRunnerProps { + defaultMode?: Mode; + defaultEntryId?: string; +} + +interface TargetHit { + group: string; + item: Record; +} + +function shuffle(list: T[]) { + const copy = [...list]; + for (let i = copy.length - 1; i > 0; i -= 1) { + const j = Math.floor(Math.random() * (i + 1)); + [copy[i], copy[j]] = [copy[j], copy[i]]; + } + return copy; +} + +const normalize = (val: any) => (val === undefined || val === null ? '' : String(val).trim()); + +function resolveTargets(question: QuizQuestionWithEntry): TargetHit[] { + const targetIds = new Set((question.targets || []).map((t) => normalize(t))); + const groups: { label: string; items: any[] }[] = [ + { label: 'Grammar', items: question.items?.grammar || [] }, + { label: 'Vocabulary', items: question.items?.vocab || [] }, + { label: 'Key Phrases', items: question.items?.key_phrases || [] }, + { label: 'Conversation', items: question.items?.conversation || [] }, + ]; + + const found: TargetHit[] = []; + groups.forEach(({ label, items }) => { + items.forEach((item) => { + if (item?.id && targetIds.has(normalize(item.id))) { + found.push({ group: label, item }); + } + }); + }); + + return found; +} + +function deriveCorrectText(question: QuizQuestionWithEntry) { + const options: any[] = Array.isArray(question.payload?.options) ? question.payload?.options : []; + if (typeof question.answer?.correct_index === 'number' && options[question.answer.correct_index]) { + return options[question.answer.correct_index]; + } + if (question.answer?.correct_text) return question.answer.correct_text; + if (question.payload?.blanked) return question.payload.blanked; + const pairs = Array.isArray(question.payload?.pairs) ? question.payload.pairs : []; + if (pairs.length) { + return pairs.map((p: any) => `${p.left} → ${p.right}`).join(' | '); + } + return ''; +} + +function checkClozeAnswer(question: QuizQuestionWithEntry, response: string) { + if (!response) return false; + const expected = [question.answer?.correct_text, question.answer?.correct, question.payload?.blanked].filter(Boolean).map(normalize); + const answer = normalize(response); + return expected.some((val) => val === answer || val.toLowerCase() === answer.toLowerCase()); +} + +function checkMatchAnswer(question: QuizQuestionWithEntry, response: Record | null) { + const pairs: any[] = Array.isArray(question.payload?.pairs) ? question.payload.pairs : []; + if (!pairs.length) return false; + return pairs.every((pair, idx) => { + const expected = normalize(pair.right); + const user = normalize(response?.[idx]); + return expected === user; + }); +} + +function checkMcAnswer(question: QuizQuestionWithEntry, response: number | null) { + if (typeof response !== 'number') return false; + if (typeof question.answer?.correct_index !== 'number') return false; + return response === question.answer.correct_index; +} + +function QuestionRenderer({ + question, + response, + onChange, +}: { + question: QuizQuestionWithEntry; + response: any; + onChange: (val: any) => void; +}) { + const payload = question.payload || {}; + const type = question.type || ''; + + if (type === 'cloze') { + const sentence = payload.sentence_jp || payload.sentence || ''; + return ( +
+ {sentence &&
{sentence.replace(payload.blanked || '', '____')}
} + onChange(e.target.value)} + /> + {Array.isArray(payload.options) && payload.options.length > 0 && ( +
Hints: {payload.options.join(' • ')}
+ )} +
+ ); + } + + if (type === 'match') { + const pairs: any[] = Array.isArray(payload.pairs) ? payload.pairs : []; + const rightOptions = useMemo( + () => shuffle(pairs.map((p) => p.right).filter(Boolean)), + [question.id, question.entryId] + ); + return ( +
+ {pairs.map((pair, idx) => ( +
+
{pair.left}
+ +
+ ))} +
+ ); + } + + const options: any[] = Array.isArray(payload.options) ? payload.options : []; + if (!options.length) { + return
No options provided for this question.
; + } + return ( +
+ {options.map((option, idx) => ( + + ))} +
+ ); +} + +function ExplanationPanel({ question, targets }: { question: QuizQuestionWithEntry; targets: TargetHit[] }) { + return ( +
+

Explanation

+ {targets.length ? ( +
+ {targets.map(({ group, item }) => ( +
+
{item.jp || item.pattern || item.id}
+
{item.meaning_en || item.en || item.when_to_use_en}
+ {item.use_note_en &&
{item.use_note_en}
} + {item.register && {item.register}} +
{group}
+
+ ))} +
+ ) : ( +

No linked study items were found for this question.

+ )} + +
+ ); +} + +export default function QuizRunner({ defaultMode = 'all', defaultEntryId }: QuizRunnerProps) { + const [entries, setEntries] = useState([]); + const [loadingEntries, setLoadingEntries] = useState(true); + const [mode, setMode] = useState(defaultMode); + const [selectedIds, setSelectedIds] = useState(defaultEntryId ? [defaultEntryId] : []); + const [questions, setQuestions] = useState([]); + const [status, setStatus] = useState<'setup' | 'loading' | 'running' | 'finished'>('setup'); + const [currentIndex, setCurrentIndex] = useState(0); + const [score, setScore] = useState(0); + const [response, setResponse] = useState(null); + const [showResult, setShowResult] = useState(false); + const [lastCorrect, setLastCorrect] = useState(false); + const [error, setError] = useState(null); + + useEffect(() => { + setLoadingEntries(true); + fetchEntries() + .then((data) => setEntries(data)) + .catch(() => setError('Could not load entries.')) + .finally(() => setLoadingEntries(false)); + }, []); + + useEffect(() => { + if (defaultEntryId) { + setMode('single'); + setSelectedIds([defaultEntryId]); + } + }, [defaultEntryId]); + + const currentQuestion = useMemo(() => questions[currentIndex], [questions, currentIndex]); + + const resetQuestionState = () => { + setResponse(null); + setShowResult(false); + setLastCorrect(false); + }; + + const startQuiz = async () => { + const ids: string[] = + mode === 'all' + ? entries.map((e) => e.id) + : mode === 'selected' + ? selectedIds + : selectedIds.slice(0, 1); + + if (!ids.length) { + setError('Pick at least one entry to quiz on.'); + return; + } + + setError(null); + setStatus('loading'); + + try { + const uniqueIds = Array.from(new Set(ids)); + const details = await Promise.all(uniqueIds.map((id) => fetchEntry(id))); + const pool: QuizQuestionWithEntry[] = details.flatMap((entry) => { + const safeItems: EntryItems = entry.items || { grammar: [], vocab: [], conversation: [], key_phrases: [] }; + return (entry.quiz || []).map((q) => ({ + ...q, + entryId: entry.id, + entryTitle: entry.title, + items: safeItems, + video_url: entry.video_url, + targets: q.targets || [], + type: q.type || 'unknown', + payload: q.payload || {}, + answer: q.answer || {}, + })); + }); + + if (!pool.length) { + setError('No quiz questions found in the selected entries.'); + setStatus('setup'); + return; + } + + const chosen = shuffle(pool).slice(0, Math.min(TOTAL_QUESTIONS, pool.length)); + setQuestions(chosen); + setCurrentIndex(0); + setScore(0); + resetQuestionState(); + setStatus('running'); + } catch (err: any) { + setError(err?.message || 'Could not start quiz.'); + setStatus('setup'); + } + }; + + const handleSubmit = (skip = false) => { + if (!currentQuestion || showResult) return; + let correct = false; + if (!skip) { + if ((currentQuestion.type || '').startsWith('mc') || currentQuestion.type === 'choose_best_reply') { + correct = checkMcAnswer(currentQuestion, response); + } else if (currentQuestion.type === 'cloze') { + correct = checkClozeAnswer(currentQuestion, response); + } else if (currentQuestion.type === 'match') { + correct = checkMatchAnswer(currentQuestion, response); + } else if (typeof currentQuestion.answer?.correct_index === 'number') { + correct = checkMcAnswer(currentQuestion, response); + } + } + + if (correct) { + setScore((s) => s + 1); + } + setLastCorrect(correct); + setShowResult(true); + }; + + const goNext = () => { + if (currentIndex + 1 >= questions.length) { + setStatus('finished'); + } else { + setCurrentIndex((idx) => idx + 1); + resetQuestionState(); + } + }; + + if (loadingEntries) { + return
Loading quiz setup…
; + } + + if (status === 'setup') { + return ( +
+
+
+

Quiz Wizard

+

Pick a mode

+

Build a 10-question run from all entries, a custom set, or a single reel.

+
+
+ +
+ + + +
+ + {mode === 'selected' && ( +
+

Check the entries you want in the pool.

+
+ {entries.map((entry) => ( + + ))} +
+
+ )} + + {mode === 'single' && ( +
+

Pick the entry to drill.

+ +
+ )} + + {error &&
{error}
} + + +
+ ); + } + + if (status === 'loading') { + return
Building your quiz…
; + } + + if (status === 'finished') { + return ( +
+

Nice work!

+

You scored {score} out of {questions.length}.

+
+ +
+
+ ); + } + + if (!currentQuestion) { + return
No questions available.
; + } + + const targets = resolveTargets(currentQuestion); + const correctText = deriveCorrectText(currentQuestion); + + return ( +
+
+
+

{currentQuestion.entryTitle}

+

{currentQuestion.prompt_en || 'Answer the prompt'}

+
+
+
{currentIndex + 1} / {questions.length}
+
Score: {score}
+
+
+ + + + {showResult && ( +
+ {lastCorrect ? 'Correct!' : 'Not quite.'} + {!lastCorrect && correctText &&
Answer: {correctText}
} +
+ )} + +
+ {!showResult && ( + <> + + + + )} + {showResult && ( + + )} +
+ + {showResult && !lastCorrect && } +
+ ); +} diff --git a/client/src/components/VideoPlayer.tsx b/client/src/components/VideoPlayer.tsx new file mode 100644 index 0000000..e8e2e27 --- /dev/null +++ b/client/src/components/VideoPlayer.tsx @@ -0,0 +1,12 @@ +interface Props { + src: string; +} + +export default function VideoPlayer({ src }: Props) { + if (!src) return null; + return ( +
+
+ ); +} diff --git a/client/src/index.css b/client/src/index.css new file mode 100644 index 0000000..2431319 --- /dev/null +++ b/client/src/index.css @@ -0,0 +1,429 @@ +@import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;600;700&family=DM+Sans:wght@400;500;700&display=swap'); + +:root { + font-family: 'DM Sans', 'Space Grotesk', sans-serif; + line-height: 1.6; + font-weight: 400; + color: #0f172a; + background: radial-gradient(circle at 10% 20%, rgba(255, 192, 203, 0.25), transparent 35%), + radial-gradient(circle at 80% 0%, rgba(109, 195, 255, 0.2), transparent 32%), + #f6f7fb; + min-height: 100vh; + --accent: #ff5a3c; + --card: #ffffff; + --border: #e5e7eb; + --muted: #4b5563; +} + +* { + box-sizing: border-box; +} + +body { + margin: 0; + padding: 0; + color: inherit; + background: inherit; +} + +a { + color: inherit; + text-decoration: none; +} + +p { + margin: 0; +} + +h1, +h2, +h3, +h4 { + margin: 0; + letter-spacing: -0.01em; +} + +.main { + padding-top: 0.5rem; +} + +.page-header { + display: flex; + align-items: flex-start; + justify-content: space-between; + gap: 1rem; + margin: 1rem 0 1.5rem; +} + +.eyebrow { + text-transform: uppercase; + font-size: 0.75rem; + letter-spacing: 0.25em; + color: var(--muted); + margin-bottom: 0.35rem; +} + +.muted { + color: var(--muted); +} + +.grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(260px, 1fr)); + gap: 1rem; +} + +.entry-card { + display: block; + background: var(--card); + padding: 1rem; + border-radius: 16px; + border: 1px solid var(--border); + box-shadow: 0 10px 30px rgba(0, 0, 0, 0.05); + transition: transform 150ms ease, box-shadow 150ms ease; +} + +.entry-card:hover { + transform: translateY(-4px); + box-shadow: 0 18px 40px rgba(0, 0, 0, 0.08); +} + +.entry-card__title { + font-weight: 700; + margin-bottom: 0.35rem; +} + +.entry-card__meta { + display: flex; + gap: 0.5rem; + margin-bottom: 0.5rem; +} + +.entry-card__counts { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 0.35rem; + font-size: 0.9rem; + color: var(--muted); +} + +.pill { + display: inline-flex; + align-items: center; + gap: 0.25rem; + padding: 0.25rem 0.6rem; + border-radius: 999px; + background: rgba(0, 0, 0, 0.06); + font-size: 0.85rem; + font-weight: 600; +} + +.pill--ghost { + background: rgba(0, 0, 0, 0.04); + color: var(--muted); +} + +.pill--accent { + background: rgba(255, 90, 60, 0.1); + color: #d9482b; +} + +.button { + border: 1px solid var(--border); + border-radius: 14px; + padding: 0.65rem 1rem; + background: #fff; + cursor: pointer; + font-weight: 700; + transition: all 120ms ease; +} + +.button:hover { + transform: translateY(-1px); +} + +.button--primary, +.button--solid { + background: var(--accent); + color: #fff; + border-color: var(--accent); +} + +.button--ghost { + background: transparent; + color: inherit; +} + +.actions { + display: flex; + gap: 0.75rem; + align-items: center; +} + +.video-shell { + margin: 1rem 0; + background: #000; + border-radius: 16px; + overflow: hidden; + box-shadow: 0 14px 30px rgba(0, 0, 0, 0.2); +} + +.video-shell video { + width: 100%; + height: auto; + display: block; +} + +.meta-box { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); + gap: 1rem; + margin: 1rem 0 2rem; + padding: 1rem; + background: var(--card); + border-radius: 16px; + border: 1px solid var(--border); +} + +.label { + font-size: 0.85rem; + color: var(--muted); +} + +.code { + word-break: break-all; + font-family: 'Space Grotesk', monospace; +} + +.crumbs { + margin: 0.5rem 0; +} + +.link { + background: transparent; +} + +.panel { + margin: 1.5rem 0; +} + +.panel-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(240px, 1fr)); + gap: 0.75rem; +} + +.panel-card { + background: var(--card); + border: 1px solid var(--border); + border-radius: 14px; + padding: 0.85rem; + position: relative; +} + +.panel-card__title { + font-weight: 700; + margin-bottom: 0.25rem; +} + +.panel-card__body { + color: var(--muted); + font-size: 0.95rem; +} + +.tag { + position: absolute; + top: 0.65rem; + right: 0.65rem; + font-size: 0.7rem; + color: var(--muted); +} + +.subline { + color: var(--muted); + font-size: 0.9rem; +} + +.item-row { + margin-top: 0.35rem; +} + +.chips { + display: flex; + flex-wrap: wrap; + gap: 0.35rem; + margin-top: 0.5rem; +} + +.loading, +.error { + padding: 1rem; + background: var(--card); + border-radius: 12px; + border: 1px solid var(--border); + color: #111827; +} + +.error { + border-color: #ef4444; +} + +.input { + width: 100%; + padding: 0.65rem 0.75rem; + border-radius: 12px; + border: 1px solid var(--border); + background: #fff; + font-size: 1rem; +} + +.selector { + background: var(--card); + padding: 1rem; + border-radius: 12px; + border: 1px solid var(--border); + margin: 1rem 0; +} + +.selector-grid { + max-height: 260px; + overflow: auto; + display: grid; + grid-template-columns: repeat(auto-fill, minmax(240px, 1fr)); + gap: 0.35rem; +} + +.selector-row { + display: flex; + gap: 0.5rem; + align-items: center; + background: #f9fafb; + padding: 0.45rem 0.6rem; + border-radius: 10px; +} + +.mode-switch { + display: flex; + gap: 0.65rem; + flex-wrap: wrap; + margin: 1rem 0; +} + +.quiz-setup { + background: var(--card); + padding: 1.25rem; + border-radius: 14px; + border: 1px solid var(--border); + box-shadow: 0 12px 28px rgba(0, 0, 0, 0.05); +} + +.quiz-runner { + background: var(--card); + padding: 1.25rem; + border-radius: 16px; + border: 1px solid var(--border); + box-shadow: 0 14px 35px rgba(0, 0, 0, 0.05); +} + +.quiz-top { + display: flex; + align-items: center; + justify-content: space-between; + gap: 1rem; +} + +.score-box { + text-align: right; +} + +.score { + font-weight: 800; +} + +.question-block { + margin: 1rem 0; +} + +.option { + display: flex; + gap: 0.5rem; + align-items: center; + background: #f9fafb; + border-radius: 10px; + padding: 0.5rem 0.75rem; + margin-bottom: 0.35rem; +} + +.matches { + display: flex; + flex-direction: column; + gap: 0.5rem; +} + +.match-row { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 0.5rem; + align-items: center; +} + +.match-left { + font-weight: 700; +} + +.option-hints { + font-size: 0.9rem; + color: var(--muted); +} + +.quiz-actions { + display: flex; + gap: 0.5rem; + margin: 0.5rem 0 1rem; +} + +.callout { + padding: 0.85rem 1rem; + border-radius: 12px; + border: 1px solid var(--border); + background: #fff6f4; + color: #d9482b; +} + +.callout.success { + background: #e5fbef; + color: #0f9d58; +} + +.explanation { + margin-top: 1rem; + padding-top: 0.5rem; + border-top: 1px solid var(--border); +} + +.explanation-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); + gap: 0.75rem; + margin-bottom: 0.75rem; +} + +.quiz-finished { + background: var(--card); + padding: 1.5rem; + border-radius: 14px; + border: 1px solid var(--border); + text-align: center; +} + +@media (max-width: 720px) { + .page-header, + .quiz-top { + flex-direction: column; + align-items: flex-start; + } + + .match-row { + grid-template-columns: 1fr; + } +} diff --git a/client/src/main.tsx b/client/src/main.tsx new file mode 100644 index 0000000..dde7c4a --- /dev/null +++ b/client/src/main.tsx @@ -0,0 +1,13 @@ +import { StrictMode } from 'react'; +import { createRoot } from 'react-dom/client'; +import { BrowserRouter } from 'react-router-dom'; +import './index.css'; +import App from './App'; + +createRoot(document.getElementById('root')!).render( + + + + + +); diff --git a/client/src/pages/EntryPage.tsx b/client/src/pages/EntryPage.tsx new file mode 100644 index 0000000..eb98fa5 --- /dev/null +++ b/client/src/pages/EntryPage.tsx @@ -0,0 +1,88 @@ +import { useEffect, useMemo, useState } from 'react'; +import { Link, useNavigate, useParams } from 'react-router-dom'; +import { fetchEntry } from '../api'; +import VideoPlayer from '../components/VideoPlayer'; +import { ConversationPanel, GrammarPanel, KeyPhrasePanel, VocabPanel } from '../components/ItemPanels'; +import type { EntryDetail } from '../types'; + +export default function EntryPage() { + const { idEncoded } = useParams(); + const navigate = useNavigate(); + const [entry, setEntry] = useState(null); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + + const entryId = useMemo(() => { + try { + return decodeURIComponent(idEncoded || ''); + } catch { + return idEncoded || ''; + } + }, [idEncoded]); + + useEffect(() => { + if (!entryId) return; + setLoading(true); + fetchEntry(entryId) + .then((data) => setEntry(data)) + .catch(() => setError('Entry not found')) + .finally(() => setLoading(false)); + }, [entryId]); + + if (!entryId) { + return
No entry id provided.
; + } + + if (loading) return
Loading entry…
; + if (error || !entry) return
{error || 'Entry not found.'}
; + + const counts = entry.counts || { grammar: 0, vocab: 0, key_phrases: 0, conversation: 0, quiz: 0 }; + const quizLink = `/quiz?mode=entry&id=${encodeURIComponent(entry.id)}`; + + return ( +
+
+ +
+
+
+

{entry.meta?.mode || 'mode not set'}

+

{entry.title}

+

{entry.meta?.type}

+
+ Grammar {counts.grammar} + Vocab {counts.vocab} + Phrases {counts.key_phrases} + Conversation {counts.conversation} + Quiz {counts.quiz} +
+
+
+ Start quiz (this entry) +
+
+ + + +
+
+
Mode
+
{entry.meta?.mode || 'n/a'}
+
+
+
Type
+
{entry.meta?.type || 'n/a'}
+
+
+
Entry ID
+
{entry.id}
+
+
+ + + + + +
+ ); +} diff --git a/client/src/pages/OverviewPage.tsx b/client/src/pages/OverviewPage.tsx new file mode 100644 index 0000000..c8e1fbf --- /dev/null +++ b/client/src/pages/OverviewPage.tsx @@ -0,0 +1,43 @@ +import { useEffect, useState } from 'react'; +import { Link } from 'react-router-dom'; +import EntryCard from '../components/EntryCard'; +import { fetchEntries } from '../api'; +import type { EntrySummary } from '../types'; + +export default function OverviewPage() { + const [entries, setEntries] = useState([]); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + + useEffect(() => { + fetchEntries() + .then((data) => setEntries(data)) + .catch(() => setError('Could not load entries')) + .finally(() => setLoading(false)); + }, []); + + if (loading) return
Loading entries…
; + if (error) return
{error}
; + + return ( +
+
+
+

IG Japanese Quizzer

+

Choose a reel to study

+

Each card bundles grammar, vocab, phrases, and quizzes pulled from your local data folder.

+
+ Jump to Quiz Wizard +
+ {entries.length === 0 ? ( +
No entries detected in data/. Add mp4 + json pairs and restart the server.
+ ) : ( +
+ {entries.map((entry) => ( + + ))} +
+ )} +
+ ); +} diff --git a/client/src/pages/QuizPage.tsx b/client/src/pages/QuizPage.tsx new file mode 100644 index 0000000..718eb9a --- /dev/null +++ b/client/src/pages/QuizPage.tsx @@ -0,0 +1,25 @@ +import { useMemo } from 'react'; +import { useSearchParams } from 'react-router-dom'; +import QuizRunner from '../components/QuizRunner'; + +export default function QuizPage() { + const [params] = useSearchParams(); + + const { mode, entryId } = useMemo(() => { + const modeParam = params.get('mode'); + const idParam = params.get('id'); + let decodedId: string | undefined; + if (idParam) { + try { + decodedId = decodeURIComponent(idParam); + } catch { + decodedId = idParam; + } + } + return { mode: modeParam, entryId: decodedId }; + }, [params]); + + const defaultMode = mode === 'entry' ? 'single' : mode === 'selected' ? 'selected' : 'all'; + + return ; +} diff --git a/client/src/types.ts b/client/src/types.ts new file mode 100644 index 0000000..0c19448 --- /dev/null +++ b/client/src/types.ts @@ -0,0 +1,92 @@ +export interface ExampleBlock { + jp?: string; + kana?: string; + en?: string; +} + +export interface BaseItem { + id?: string; + jp?: string; + kana?: string; + meaning_en?: string; + meaning?: string; + use_note_en?: string; + when_to_use_en?: string; + register?: string; + note_en?: string; + example?: ExampleBlock; + example_jp?: string; + example_kana?: string; + example_en?: string; +} + +export interface GrammarItem extends BaseItem { + pattern?: string; +} + +export interface VocabItem extends BaseItem {} + +export interface ConversationItem extends BaseItem { + en?: string; +} + +export interface KeyPhraseItem extends BaseItem {} + +export interface EntryItems { + grammar: GrammarItem[]; + vocab: VocabItem[]; + conversation: ConversationItem[]; + key_phrases: KeyPhraseItem[]; +} + +export interface EntryCounts { + grammar: number; + vocab: number; + key_phrases: number; + conversation: number; + quiz: number; +} + +export interface EntrySummary { + id: string; + title: string; + mode?: string; + type?: string; + counts: EntryCounts; + video_url: string; +} + +export interface EntryDetail { + id: string; + title: string; + meta?: { + mode?: string; + type?: string; + title_en?: string; + }; + items: EntryItems; + quiz: QuizQuestion[]; + ui_hints?: { + recommended_order?: (string | number)[]; + show_first?: string; + explain_on_fail?: boolean; + }; + video_url: string; + counts: EntryCounts; +} + +export interface QuizQuestion { + id?: string | number; + targets?: (string | number)[]; + type?: string; + prompt_en?: string; + payload?: Record; + answer?: Record; +} + +export interface QuizQuestionWithEntry extends QuizQuestion { + entryId: string; + entryTitle: string; + items: EntryItems; + video_url: string; +} diff --git a/client/tsconfig.app.json b/client/tsconfig.app.json new file mode 100644 index 0000000..a9b5a59 --- /dev/null +++ b/client/tsconfig.app.json @@ -0,0 +1,28 @@ +{ + "compilerOptions": { + "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo", + "target": "ES2022", + "useDefineForClassFields": true, + "lib": ["ES2022", "DOM", "DOM.Iterable"], + "module": "ESNext", + "types": ["vite/client"], + "skipLibCheck": true, + + /* Bundler mode */ + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "moduleDetection": "force", + "noEmit": true, + "jsx": "react-jsx", + + /* Linting */ + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "erasableSyntaxOnly": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedSideEffectImports": true + }, + "include": ["src"] +} diff --git a/client/tsconfig.json b/client/tsconfig.json new file mode 100644 index 0000000..1ffef60 --- /dev/null +++ b/client/tsconfig.json @@ -0,0 +1,7 @@ +{ + "files": [], + "references": [ + { "path": "./tsconfig.app.json" }, + { "path": "./tsconfig.node.json" } + ] +} diff --git a/client/tsconfig.node.json b/client/tsconfig.node.json new file mode 100644 index 0000000..8a67f62 --- /dev/null +++ b/client/tsconfig.node.json @@ -0,0 +1,26 @@ +{ + "compilerOptions": { + "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", + "target": "ES2023", + "lib": ["ES2023"], + "module": "ESNext", + "types": ["node"], + "skipLibCheck": true, + + /* Bundler mode */ + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "moduleDetection": "force", + "noEmit": true, + + /* Linting */ + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "erasableSyntaxOnly": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedSideEffectImports": true + }, + "include": ["vite.config.ts"] +} diff --git a/client/vite.config.ts b/client/vite.config.ts new file mode 100644 index 0000000..c03b60b --- /dev/null +++ b/client/vite.config.ts @@ -0,0 +1,13 @@ +import { defineConfig } from 'vite'; +import react from '@vitejs/plugin-react'; + +export default defineConfig({ + plugins: [react()], + server: { + port: 5173, + proxy: { + '/api': 'http://localhost:5174', + '/data': 'http://localhost:5174', + }, + }, +}); diff --git a/gemini_replicate_batch.py b/gemini_replicate_batch.py new file mode 100644 index 0000000..3eda2f2 --- /dev/null +++ b/gemini_replicate_batch.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +""" +gemini_replicate_batch.py + +Batch-generate .json files for Instagram reels using Replicate's +google/gemini-2.5-flash model with dynamic_thinking enabled. + +Input: data/**/.mp4 (any subfolder under data) +Output: data/**/.json (parsed JSON, next to video) + data/**/.raw.txt (raw model output, next to video) + +Usage: + python3 gemini_replicate_batch.py --data data + python3 gemini_replicate_batch.py --data data --only-missing + python3 gemini_replicate_batch.py --data data --prompt-file prompt.txt + python3 gemini_replicate_batch.py --data data --max-output-tokens 12000 +""" + +from __future__ import annotations + +import argparse +import json +import os +import re +import sys +import time +from pathlib import Path +from typing import Any, Dict, Optional + +import replicate + + +DEFAULT_PROMPT = r""" +You analyze an Instagram-style Japanese language video. +The video is either Japanese-only or English+Japanese. + +Goal: Create a compact learning JSON for a custom quiz website. +The website will ask the user questions; if the user is wrong or taps "Don't know", +we will show the explanation from this JSON and the original IG media. + +TOKEN BUDGET: +Be concise. Do not duplicate explanations inside questions. + +STRICT RULES: +1) Do not invent. Only include words/phrases/grammar that clearly appear in the video. +2) For every Japanese string containing kanji, provide a full hiragana reading in a separate field "kana". +3) Keep it small: + - grammar: max 2 + - vocab: max 10 + - conversation lines: max 10 + - key_phrases: max 10 +4) NO timestamps. NO evidence_quote. NO source fields. NO confidence fields. +5) Questions MUST reference items by id (do not repeat long explanations in questions). +6) Provide 1–3 questions per item (depending on usefulness). Prefer: cloze, multiple choice, match, register-choice. + +OUTPUT: +Return ONLY valid JSON (no markdown). UTF-8 Japanese. + +SCHEMA: + +{ + "meta": { + "mode": "ja_only|en+ja", + "type": "grammar|vocab|conversation|mixed|unknown", + "title_en": "short title (5-8 words max)" + }, + "items": { + "grammar": [ + { + "id": "g1", + "pattern": "string", + "meaning_en": "one line", + "use_note_en": "1-2 lines max", + "register": "polite|neutral|casual|slang|formal|unknown", + "example": { "jp": "string", "kana": "string", "en": "string" } + } + ], + "vocab": [ + { + "id": "v1", + "jp": "surface form", + "kana": "hiragana reading", + "meaning_en": "short", + "register": "polite|neutral|casual|slang|formal|unknown", + "note_en": "optional, 1 line max", + "example": { "jp": "optional", "kana": "optional", "en": "optional" } + } + ], + "conversation": [ + { + "id": "c1", + "jp": "exact line", + "kana": "hiragana reading", + "en": "translation", + "register": "polite|neutral|casual|slang|mixed|unknown" + } + ], + "key_phrases": [ + { + "id": "k1", + "jp": "phrase", + "kana": "reading", + "meaning_en": "short", + "when_to_use_en": "1-2 lines max", + "register": "polite|neutral|casual|slang|formal|unknown" + } + ] + }, + "quiz": [ + { + "id": "q1", + "targets": ["k1"], + "type": "mc_meaning|mc_register|cloze|match|choose_best_reply", + "prompt_en": "string", + "payload": { + "sentence_jp": "optional", + "sentence_kana": "optional", + "blanked": "optional", + "options": ["A","B","C","D"], + "pairs": [{"left":"","right":""}] + }, + "answer": { + "correct_index": 0, + "correct_text": "optional" + } + } + ], + "ui_hints": { + "recommended_order": ["g1","k1","v1"], + "show_first": "quiz", + "explain_on_fail": true + } +} + +QUESTION GUIDELINES: +- For each grammar item: at least 1 cloze question + 1 meaning/usage question. +- For vocab/key_phrases: at least 1 meaning MC and optionally 1 register/situation question. +- For conversation lines: optionally “what does this mean” or “best reply”. +- Keep prompts short. Do not restate long explanations (shown after fail). + +Return ONLY the JSON object. No markdown fences. +""".strip() + + +def load_env_files(paths: list[Path]) -> None: + """ + Load simple KEY=VALUE pairs from one or more .env files without + overriding existing environment variables. + """ + seen = set() + for path in paths: + try: + resolved = path.resolve() + except FileNotFoundError: + continue + if resolved in seen or not resolved.exists(): + continue + seen.add(resolved) + + for line in resolved.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + + key, value = line.split("=", 1) + key = key.strip().removeprefix("export ").strip() + value = value.strip() + if value and len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}: + value = value[1:-1] + if key: + os.environ.setdefault(key, value) + + +def strip_code_fences(text: str) -> str: + # Remove ```json ... ``` or ``` ... ``` + text = text.strip() + text = re.sub(r"^\s*```(?:json)?\s*", "", text, flags=re.IGNORECASE) + text = re.sub(r"\s*```\s*$", "", text) + return text.strip() + + +def extract_json_object(text: str) -> Dict[str, Any]: + """ + Try to recover JSON if the model wrapped it with text or fences. + """ + cleaned = strip_code_fences(text) + + # If it's already pure JSON: + try: + return json.loads(cleaned) + except Exception: + pass + + # Otherwise, take substring from first { to last } + start = cleaned.find("{") + end = cleaned.rfind("}") + if start == -1 or end == -1 or end <= start: + raise ValueError("Could not locate JSON object in model output.") + + snippet = cleaned[start : end + 1].strip() + return json.loads(snippet) + + +def validate_minimal_schema(obj: Dict[str, Any]) -> None: + # Minimal checks only (Gemini can be slightly variable) + if not isinstance(obj, dict): + raise ValueError("Top-level JSON is not an object.") + for key in ("meta", "items", "quiz"): + if key not in obj: + raise ValueError(f"Missing required top-level key: {key}") + if "title_en" not in obj["meta"]: + raise ValueError("meta.title_en missing") + if not isinstance(obj["quiz"], list): + raise ValueError("quiz must be an array") + + +def run_gemini_on_video( + video_path: Path, + video_url: str, + prompt: str, + *, + top_p: float, + temperature: float, + dynamic_thinking: bool, + max_output_tokens: int, + client: replicate.Client, + prefer_wait_seconds: Optional[int] = None, +) -> str: + """ + Calls Replicate model and returns raw text output. + """ + inp = { + "top_p": top_p, + "temperature": temperature, + "dynamic_thinking": dynamic_thinking, + "max_output_tokens": max_output_tokens, + "prompt": prompt, + "images": [], + "videos": [video_url], + } + + try: + out = client.run("google/gemini-2.5-flash", input=inp) + if isinstance(out, str): + return out + if isinstance(out, list): + return "".join(str(x) for x in out) + return str(out) + except Exception as e: + raise RuntimeError(f"Replicate call failed for {video_path.name}: {e}") from e + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--data", default="data", help="Data directory containing .mp4 files (default: data)") + ap.add_argument("--prompt-file", default=None, help="Optional prompt.txt to override the default prompt") + ap.add_argument("--only-missing", action="store_true", help="Only process videos without an existing .json") + ap.add_argument("--overwrite", action="store_true", help="Overwrite existing .json outputs") + ap.add_argument("--sleep", type=float, default=0.0, help="Sleep seconds between requests (default: 0)") + ap.add_argument("--top-p", type=float, default=0.95) + ap.add_argument("--temperature", type=float, default=0.7) + ap.add_argument("--dynamic-thinking", action="store_true", default=True, + help="Enable dynamic_thinking (default: ON)") + ap.add_argument("--max-output-tokens", type=int, default=12000, + help="Max output tokens (default: 12000; raise if you need bigger JSON)") + ap.add_argument("--remote-base-url", default=None, + help="Base URL where the --data tree is mirrored (e.g., https://example.com/data)") + args = ap.parse_args() + + script_dir = Path(__file__).resolve().parent + load_env_files([Path.cwd() / ".env", script_dir / ".env"]) + + token = os.environ.get("REPLICATE_API_TOKEN") or os.environ.get("REPLICATE_API_KEY") + if token and not os.environ.get("REPLICATE_API_TOKEN"): + os.environ["REPLICATE_API_TOKEN"] = token # replicate library expects this name + if not token: + print("ERROR: REPLICATE_API_TOKEN not set.", file=sys.stderr) + sys.exit(2) + + base_url = args.remote_base_url or os.environ.get("REMOTE_BASE_URL") + if not base_url: + print("ERROR: --remote-base-url or REMOTE_BASE_URL env var is required (public URL of mirrored data)", file=sys.stderr) + sys.exit(2) + base_url = base_url.rstrip("/") + + client = replicate.Client() + + data_dir = Path(args.data).expanduser().resolve() + if not data_dir.exists(): + print(f"ERROR: data dir not found: {data_dir}", file=sys.stderr) + sys.exit(2) + + prompt = DEFAULT_PROMPT + if args.prompt_file: + prompt_path = Path(args.prompt_file).expanduser().resolve() + prompt = prompt_path.read_text(encoding="utf-8").strip() + + mp4s = sorted(data_dir.rglob("*.mp4")) + if not mp4s: + print(f"No .mp4 files found under {data_dir}") + return + + print(f"Found {len(mp4s)} videos under {data_dir}") + + for video_path in mp4s: + stem = video_path.stem + out_json = video_path.with_suffix(".json") + out_raw = video_path.with_suffix(".raw.txt") + rel_video = video_path.relative_to(data_dir) + video_url = f"{base_url}/{rel_video.as_posix()}" + + if out_json.exists() and args.only_missing: + print(f"SKIP (exists): {rel_video}") + continue + if out_json.exists() and (not args.overwrite) and (not args.only_missing): + print(f"SKIP (use --overwrite to replace): {rel_video}") + continue + + # Quick size warning for local uploads + size_mb = video_path.stat().st_size / (1024 * 1024) + if size_mb > 150: + print(f"WARNING: {video_path.name} is {size_mb:.1f}MB (>150MB). " + f"Downloads from the remote server may be slow.") + + print(f"RUN: {rel_video}") + + try: + raw = run_gemini_on_video( + video_path, + video_url, + prompt, + top_p=args.top_p, + temperature=args.temperature, + dynamic_thinking=True, # you asked for this explicitly + max_output_tokens=args.max_output_tokens, + client=client, + ) + + out_raw.write_text(raw, encoding="utf-8") + + obj = extract_json_object(raw) + validate_minimal_schema(obj) + + out_json.write_text(json.dumps(obj, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + print(f"OK -> {out_json.relative_to(data_dir)}") + + except Exception as e: + print(f"FAIL: {video_path.name}: {e}", file=sys.stderr) + # keep raw if we got it + if out_raw.exists(): + print(f" Raw output saved: {out_raw.name}", file=sys.stderr) + + if args.sleep > 0: + time.sleep(args.sleep) + + +if __name__ == "__main__": + main() diff --git a/package.json b/package.json new file mode 100644 index 0000000..e21524e --- /dev/null +++ b/package.json @@ -0,0 +1,16 @@ +{ + "name": "ig-japanese-quizzer", + "version": "1.0.0", + "private": true, + "workspaces": [ + "server", + "client" + ], + "scripts": { + "dev": "concurrently \"npm run dev --workspace server\" \"npm run dev --workspace client\"", + "build": "npm run build --workspace server && npm run build --workspace client" + }, + "devDependencies": { + "concurrently": "^8.2.2" + } +} diff --git a/prompt.txt b/prompt.txt new file mode 100644 index 0000000..943c4fa --- /dev/null +++ b/prompt.txt @@ -0,0 +1,116 @@ +You analyze an Instagram-style Japanese language video. +The video is either Japanese-only or English+Japanese. + +Goal: Create a compact learning JSON for a custom quiz website. +The website will ask the user questions; if the user is wrong or taps "Don't know", +we will show the explanation from this JSON and the original IG media. + +TOKEN BUDGET: +Be concise. Do not duplicate explanations inside questions. + +STRICT RULES: +1) Do not invent. Only include words/phrases/grammar that clearly appear in the video. +2) For every Japanese string containing kanji, provide a full hiragana reading in a separate field "kana". +3) Keep it small: + - grammar: max 2 + - vocab: max 10 + - conversation lines: max 10 + - key_phrases: max 10 +4) NO timestamps. NO evidence_quote. NO source fields. NO confidence fields. +5) Questions MUST reference items by id (do not repeat long explanations in questions). +6) Provide 1–3 questions per item (depending on usefulness). Prefer: cloze, multiple choice, match, register-choice. + +OUTPUT: +Return ONLY valid JSON (no markdown). UTF-8 Japanese. + +SCHEMA: + +{ + "meta": { + "mode": "ja_only|en+ja", + "type": "grammar|vocab|conversation|mixed|unknown", + "title_en": "short title (5-8 words max)" + }, + "items": { + "grammar": [ + { + "id": "g1", + "pattern": "string", + "meaning_en": "one line", + "use_note_en": "1-2 lines max", + "register": "polite|neutral|casual|slang|formal|unknown", + "example": { + "jp": "string", + "kana": "string (hiragana reading; required if jp has kanji)", + "en": "string" + } + } + ], + "vocab": [ + { + "id": "v1", + "jp": "surface form", + "kana": "hiragana reading (required if jp has kanji; for kana-only words keep as-is)", + "meaning_en": "short", + "register": "polite|neutral|casual|slang|formal|unknown", + "note_en": "optional, 1 line max", + "example": { + "jp": "optional", + "kana": "optional", + "en": "optional" + } + } + ], + "conversation": [ + { + "id": "c1", + "jp": "exact line", + "kana": "hiragana reading (required if jp has kanji)", + "en": "translation", + "register": "polite|neutral|casual|slang|mixed|unknown" + } + ], + "key_phrases": [ + { + "id": "k1", + "jp": "phrase", + "kana": "reading (hiragana if needed)", + "meaning_en": "short", + "when_to_use_en": "1-2 lines max", + "register": "polite|neutral|casual|slang|formal|unknown" + } + ] + }, + "quiz": [ + { + "id": "q1", + "targets": ["k1"], + "type": "mc_meaning|mc_register|cloze|match|choose_best_reply", + "prompt_en": "string", + "payload": { + "sentence_jp": "optional", + "sentence_kana": "optional", + "blanked": "optional (use ____ for blank)", + "options": ["A", "B", "C", "D"], + "pairs": [{"left":"", "right":""}] + }, + "answer": { + "correct_index": 0, + "correct_text": "optional (for non-mc types)" + } + } + ], + "ui_hints": { + "recommended_order": ["g1","k1","k2","v1"], + "show_first": "quiz", + "explain_on_fail": true + } +} + +QUESTION GUIDELINES: +- For each grammar item: make at least 1 cloze question from the example, and 1 meaning or usage question. +- For vocab/key_phrases: make at least 1 meaning multiple-choice and optionally 1 register or “best situation” question. +- For conversation lines: optionally make “choose the best reply” or “what does this line mean” questions. +- Keep prompts short. Do not restate long explanations. The website will show explanations after. + +Now analyze the video and output ONLY the JSON. \ No newline at end of file diff --git a/server/package.json b/server/package.json new file mode 100644 index 0000000..aa0c1c0 --- /dev/null +++ b/server/package.json @@ -0,0 +1,23 @@ +{ + "name": "ig-japanese-quizzer-server", + "version": "1.0.0", + "private": true, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "scripts": { + "dev": "ts-node-dev --respawn --transpile-only src/index.ts", + "build": "tsc --project tsconfig.json", + "start": "node dist/index.js" + }, + "dependencies": { + "express": "^4.19.2", + "glob": "^10.3.12", + "zod": "^3.23.8" + }, + "devDependencies": { + "@types/express": "^4.17.21", + "@types/node": "^22.4.1", + "ts-node-dev": "^2.0.0", + "typescript": "^5.5.4" + } +} diff --git a/server/src/index.ts b/server/src/index.ts new file mode 100644 index 0000000..7b24fa1 --- /dev/null +++ b/server/src/index.ts @@ -0,0 +1,272 @@ +import express from 'express'; +import path from 'path'; +import fs from 'fs/promises'; +import { glob } from 'glob'; +import { z } from 'zod'; + +const DEFAULT_DATA_ROOT = path.resolve(__dirname, '..', '..', 'data'); +const DATA_ROOT = process.env.DATA_ROOT ? path.resolve(process.env.DATA_ROOT) : DEFAULT_DATA_ROOT; + +const metaSchema = z + .object({ + mode: z.string().optional(), + type: z.string().optional(), + title_en: z.string().optional(), + }) + .partial() + .default({}); + +const itemsSchema = z + .object({ + grammar: z.array(z.record(z.any())).default([]), + vocab: z.array(z.record(z.any())).default([]), + conversation: z.array(z.record(z.any())).default([]), + key_phrases: z.array(z.record(z.any())).default([]), + }) + .partial() + .default({ + grammar: [], + vocab: [], + conversation: [], + key_phrases: [], + }); + +const quizSchema = z + .array( + z + .object({ + id: z.union([z.string(), z.number()]).optional(), + targets: z.array(z.union([z.string(), z.number()])).default([]), + type: z.string().default(''), + prompt_en: z.string().optional(), + payload: z.record(z.any()).default({}), + answer: z.record(z.any()).default({}), + }) + .partial() + ) + .default([]); + +const uiHintsSchema = z + .object({ + recommended_order: z.array(z.union([z.string(), z.number()])).default([]), + show_first: z.string().optional(), + explain_on_fail: z.boolean().optional(), + }) + .partial() + .default({ recommended_order: [] }); + +const entrySchema = z + .object({ + meta: metaSchema, + items: itemsSchema, + quiz: quizSchema, + ui_hints: uiHintsSchema, + }) + .partial() + .passthrough() + .default({ + meta: {}, + items: { grammar: [], vocab: [], conversation: [], key_phrases: [] }, + quiz: [], + ui_hints: { recommended_order: [] }, + }); + +type EntryData = z.infer; + +interface EntryRecord { + id: string; + title: string; + meta: EntryData['meta']; + items: EntryData['items']; + quiz: EntryData['quiz']; + ui_hints: EntryData['ui_hints']; + videoPath: string; + jsonPath: string; + video_url: string; + counts: { + grammar: number; + vocab: number; + key_phrases: number; + conversation: number; + quiz: number; + }; +} + +const entryIndex = new Map(); + +function ensureWithinDataRoot(targetPath: string) { + const resolved = path.resolve(targetPath); + return resolved === DATA_ROOT || resolved.startsWith(DATA_ROOT + path.sep); +} + +async function fileExists(targetPath: string) { + try { + await fs.access(targetPath); + return true; + } catch { + return false; + } +} + +function toPosixId(relativePath: string) { + return relativePath.split(path.sep).join('/'); +} + +function buildVideoUrl(id: string) { + const encoded = id + .split('/') + .filter(Boolean) + .map((segment) => encodeURIComponent(segment)) + .join('/'); + return `/data/${encoded}.mp4`; +} + +function computeCounts(items: EntryData['items'], quiz: EntryData['quiz']) { + return { + grammar: items?.grammar?.length || 0, + vocab: items?.vocab?.length || 0, + key_phrases: items?.key_phrases?.length || 0, + conversation: items?.conversation?.length || 0, + quiz: quiz?.length || 0, + }; +} + +async function loadEntries() { + entryIndex.clear(); + + const dataExists = await fileExists(DATA_ROOT); + if (!dataExists) { + console.warn(`Data root not found at ${DATA_ROOT}`); + return; + } + + const mp4Paths = await glob('**/*.mp4', { cwd: DATA_ROOT, absolute: true }); + for (const mp4Path of mp4Paths) { + const resolvedMp4 = path.resolve(mp4Path); + if (!ensureWithinDataRoot(resolvedMp4)) { + continue; + } + + const dir = path.dirname(resolvedMp4); + const baseName = path.basename(resolvedMp4, '.mp4'); + const jsonPath = path.join(dir, `${baseName}.json`); + + if (!(await fileExists(jsonPath))) { + continue; + } + + const resolvedJson = path.resolve(jsonPath); + if (!ensureWithinDataRoot(resolvedJson)) { + continue; + } + + let parsed: EntryData | null = null; + try { + const raw = await fs.readFile(resolvedJson, 'utf-8'); + const json = JSON.parse(raw); + const safe = entrySchema.safeParse(json); + parsed = safe.success ? safe.data : entrySchema.parse({}); + if (!safe.success) { + console.warn(`Entry at ${resolvedJson} parsed with defaults due to validation issues.`); + } + } catch (err) { + console.warn(`Failed to parse ${resolvedJson}:`, err); + parsed = entrySchema.parse({}); + } + + const relative = path.relative(DATA_ROOT, resolvedMp4); + const id = toPosixId(relative.replace(/\.mp4$/i, '')); + const title = parsed.meta?.title_en?.trim() || baseName; + const video_url = buildVideoUrl(id); + const counts = computeCounts(parsed.items || { grammar: [], vocab: [], conversation: [], key_phrases: [] }, parsed.quiz || []); + + entryIndex.set(id, { + id, + title, + meta: parsed.meta || {}, + items: parsed.items || { grammar: [], vocab: [], conversation: [], key_phrases: [] }, + quiz: parsed.quiz || [], + ui_hints: parsed.ui_hints || { recommended_order: [] }, + videoPath: resolvedMp4, + jsonPath: resolvedJson, + video_url, + counts, + }); + } + + console.log(`Loaded ${entryIndex.size} entries from data directory.`); +} + +function sanitizeEntryResponse(entry: EntryRecord) { + return { + id: entry.id, + title: entry.title, + meta: entry.meta || {}, + items: entry.items || { grammar: [], vocab: [], conversation: [], key_phrases: [] }, + quiz: entry.quiz || [], + ui_hints: entry.ui_hints || { recommended_order: [] }, + video_url: entry.video_url, + counts: entry.counts, + }; +} + +async function main() { + await loadEntries(); + + const app = express(); + const port = process.env.PORT || 5174; + + app.disable('x-powered-by'); + + app.use('/data', express.static(DATA_ROOT)); + + app.get('/', (_req, res) => { + res.type('text/plain').send('IG Japanese Quizzer backend is running. See /api/entries.'); + }); + + app.get('/api/entries', (_req, res) => { + const entries = Array.from(entryIndex.values()) + .map((entry) => ({ + id: entry.id, + title: entry.title, + mode: entry.meta?.mode, + type: entry.meta?.type, + counts: entry.counts, + video_url: entry.video_url, + })) + .sort((a, b) => a.title.localeCompare(b.title, undefined, { sensitivity: 'base' })); + + res.json(entries); + }); + + app.get('/api/entry', (req, res) => { + const idParam = req.query.id; + if (!idParam || typeof idParam !== 'string') { + res.status(400).json({ error: 'Missing id query param' }); + return; + } + + const entry = entryIndex.get(idParam); + if (!entry) { + res.status(404).json({ error: 'Entry not found' }); + return; + } + + res.json(sanitizeEntryResponse(entry)); + }); + + app.get('/api/health', (_req, res) => { + res.json({ ok: true, entries: entryIndex.size }); + }); + + app.listen(port, () => { + console.log(`Server listening on http://localhost:${port}`); + console.log(`Data root: ${DATA_ROOT}`); + console.log(`Entries loaded: ${entryIndex.size}`); + }); +} + +main().catch((err) => { + console.error('Failed to start server', err); + process.exit(1); +}); diff --git a/server/tsconfig.json b/server/tsconfig.json new file mode 100644 index 0000000..673fe1e --- /dev/null +++ b/server/tsconfig.json @@ -0,0 +1,17 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs", + "moduleResolution": "node", + "rootDir": "src", + "outDir": "dist", + "esModuleInterop": true, + "declaration": true, + "forceConsistentCasingInFileNames": true, + "skipLibCheck": true, + "strict": false, + "resolveJsonModule": true, + "types": ["node"] + }, + "include": ["src/**/*"] +}