commit cb26424173d77ef41865e2a10b554f6969f2de54 Author: Victor Giers Date: Tue Sep 9 17:29:49 2025 +0200 initial commit diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..6593630 --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,13 @@ + + + + + + + 10AM + + +
+ + + diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..3f5fae0 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,30 @@ +{ + "name": "10am-frontend", + "private": true, + "version": "0.0.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc && vite build", + "preview": "vite preview" + }, + "dependencies": { + "@react-three/drei": "^9.92.5", + "@react-three/fiber": "^8.15.12", + "framer-motion": "^10.16.0", + "react": "^18.2.0", + "react-dom": "^18.2.0", + "three": "^0.158.0" + }, + "devDependencies": { + "@types/react": "^18.2.37", + "@types/react-dom": "^18.2.15", + "@types/three": "^0.158.0", + "@vitejs/plugin-react": "^4.1.0", + "autoprefixer": "^10.4.16", + "postcss": "^8.4.32", + "tailwindcss": "^3.3.6", + "typescript": "^5.2.2", + "vite": "^5.0.0" + } +} diff --git a/frontend/postcss.config.js b/frontend/postcss.config.js new file mode 100644 index 0000000..2e7af2b --- /dev/null +++ b/frontend/postcss.config.js @@ -0,0 +1,6 @@ +export default { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} diff --git a/frontend/public/earth-specular.jpg b/frontend/public/earth-specular.jpg new file mode 100644 index 0000000..22f91c1 --- /dev/null +++ b/frontend/public/earth-specular.jpg @@ -0,0 +1,7 @@ + + +301 Moved Permanently + +

Moved Permanently

+

The document has moved here.

+ diff --git a/frontend/public/world-map.png b/frontend/public/world-map.png new file mode 100644 index 0000000..81c4238 Binary files /dev/null and b/frontend/public/world-map.png differ diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx new file mode 100644 index 0000000..1078789 --- /dev/null +++ b/frontend/src/App.tsx @@ -0,0 +1,477 @@ +import React, { useState, useEffect, useRef, useCallback } from 'react'; +import { Canvas, useFrame, ThreeEvent, useLoader } from '@react-three/fiber'; +import * as THREE from 'three'; +import { motion, AnimatePresence } from 'framer-motion'; + +// ---------- Types ---------- +interface TenAmResult { + country: string; + capital: string; + tzid: string; + local_time: string; + utc_now: string; + /** English summary or null */ + summary_en: string | null; + /** German translation or null */ + summary_de: string | null; + /** Japanese translation or null */ + summary_jp: string | null; + /** Optional summary alias when backend is queried with ?lang */ + summary?: string | null; +} +interface TenAmResponse { + count: number; + exact: boolean; + at_utc: string; + results: TenAmResult[]; +} + +/** New grouped endpoint types */ +interface NowGroup { + label: string; // "HH:MM" + results: TenAmResult[]; +} +interface NowGroupsResponse { + at_utc: string; + groups: NowGroup[]; + next_cursor: string | null; +} + +// ---------- Cookie & language helpers ---------- +function getCookie(name: string): string | null { + const value = `; ${document.cookie}`; + const parts = value.split(`; ${name}=`); + if (parts.length === 2) { + const part = parts.pop(); + if (part) { + return part.split(';').shift() || null; + } + } + return null; +} + +function setCookie(name: string, value: string, maxAgeSeconds: number = 60 * 60 * 24 * 180) { + document.cookie = `${name}=${value}; Max-Age=${maxAgeSeconds}; path=/`; +} + +function detectDefaultLang(): string { + // 1. Cookie + const ck = getCookie('lang'); + if (ck && ['en', 'de', 'jp'].includes(ck)) return ck; + // 2. Browser preferences + if (typeof navigator !== 'undefined') { + const langs = (navigator.languages || [navigator.language || '']).map(l => l.toLowerCase()); + for (const l of langs) { + if (l.startsWith('de')) return 'de'; + if (l.startsWith('ja') || l.startsWith('jp')) return 'jp'; + } + } + // Default + return 'en'; +} + +// ---------- Group fetching helpers ---------- +function msUntilNextQuarter(): number { + const now = new Date(); + const mins = now.getUTCMinutes(); + const secs = now.getUTCSeconds(); + const nextQuarter = ((Math.floor(mins / 15) + 1) * 15) % 60; + const minDelta = (nextQuarter - mins + 60) % 60; + return minDelta * 60_000 - secs * 1000; +} + +// ---------- Pure helpers (NO hooks here) ---------- +const DEG2RAD = THREE.MathUtils.DEG2RAD; + +function rotationDegFor10am(nowUtc: Date): number { + // 15° per hour; local 10:00 line in relation to UTC + const utcHours = nowUtc.getUTCHours() + nowUtc.getUTCMinutes() / 60 + nowUtc.getUTCSeconds() / 3600; + let lon = 15 * (10 + utcHours); + lon -= 30; // small framing shift + return lon; +} + +// ---------- Globe ---------- +function Globe({ + onManualRotate, + onDragStart, +}: { + onManualRotate: (scrubOffset: number, isDragEnd: boolean) => void; + onDragStart?: (scrubOffset: number) => void; +}) { + const globeRef = useRef(null); + const texture = useLoader(THREE.TextureLoader, '/world-map.png'); // your file in /public + + const isDragging = useRef(false); + const previousMousePos = useRef({ x: 0, y: 0 }); + + const [scrubOffset, setScrubOffset] = useState(0); + const scrubOffsetRef = useRef(0); + useEffect(() => { scrubOffsetRef.current = scrubOffset; }, [scrubOffset]); + + const detachRef = useRef<(() => void) | null>(null); + + useFrame(() => { + if (!globeRef.current) return; + const now = new Date(); + const rotDeg = rotationDegFor10am(now); + globeRef.current.rotation.y = rotDeg * DEG2RAD + scrubOffset; + }); + + const handleWindowMove = useCallback((ev: PointerEvent) => { + if (!isDragging.current) return; + const deltaX = ev.clientX - previousMousePos.current.x; + previousMousePos.current = { x: ev.clientX, y: ev.clientY }; + setScrubOffset((prev) => { + const next = Math.min(0, prev + deltaX * 0.005); // allow “past” only + onManualRotate(next, false); + return next; + }); + }, [onManualRotate]); + + const handleWindowUp = useCallback((_ev: Event) => { + if (!isDragging.current) return; + isDragging.current = false; + onManualRotate(scrubOffsetRef.current, true); + // cleanup listeners + detachRef.current?.(); + detachRef.current = null; + }, [onManualRotate]); + + const attachWindowListeners = useCallback(() => { + const move = (ev: PointerEvent) => handleWindowMove(ev); + const up = (ev: Event) => handleWindowUp(ev); + + window.addEventListener('pointermove', move, { passive: true }); + window.addEventListener('pointerup', up, { passive: true }); + window.addEventListener('pointercancel', up, { passive: true }); + window.addEventListener('blur', up, { passive: true }); + + detachRef.current = () => { + window.removeEventListener('pointermove', move); + window.removeEventListener('pointerup', up); + window.removeEventListener('pointercancel', up); + window.removeEventListener('blur', up); + }; + }, [handleWindowMove, handleWindowUp]); + + // Ensure listeners are removed if component unmounts mid-drag + useEffect(() => { + return () => { + detachRef.current?.(); + detachRef.current = null; + isDragging.current = false; + }; + }, []); + + const onPointerDown = (event: ThreeEvent) => { + event.stopPropagation(); + isDragging.current = true; + previousMousePos.current = { x: event.clientX, y: event.clientY }; + (event.nativeEvent.target as HTMLElement).style.cursor = 'grabbing'; + onDragStart?.(scrubOffset); + attachWindowListeners(); + }; + + // If user releases inside the canvas, we still finalize here; + // window 'pointerup' will also run but we guard on isDragging. + const onPointerUp = (event: ThreeEvent) => { + event.stopPropagation(); + if (!isDragging.current) return; + isDragging.current = false; + (event.nativeEvent.target as HTMLElement).style.cursor = 'grab'; + onManualRotate(scrubOffsetRef.current, true); + detachRef.current?.(); + detachRef.current = null; + }; + + // While dragging, we let the WINDOW listener handle movement to catch outside-canvas drags. + const onPointerMove = (event: ThreeEvent) => { + if (!isDragging.current) return; + event.stopPropagation(); + // No-op here; window 'pointermove' handles the updates (avoids double-handling). + }; + + // Leaving the canvas shouldn't end the drag; window listeners keep handling it. + const onPointerOut = (event: ThreeEvent) => { + if (!isDragging.current) return; + event.stopPropagation(); + // Intentionally do nothing – drag continues until pointerup anywhere. + }; + + return ( + + + + + + + ); +} + +// ---------- Country card ---------- +const CountryCard = ({ country, lang }: { country: TenAmResult; lang: string }) => { + // Determine which summary to display based on current language. Fallback to English. + let display: string | null = null; + if (lang === 'de') { + display = country.summary_de ?? country.summary_en ?? null; + } else if (lang === 'jp') { + display = country.summary_jp ?? country.summary_en ?? null; + } else { + display = country.summary_en ?? null; + } + return ( + +

{country.country}

+

+ {country.capital} / {country.tzid} +

+ {display &&

{display}

} +
+ ); +}; + +// ---------- App ---------- +const App: React.FC = () => { + const [scrubOffset, setScrubOffset] = useState(0); + // Selected language for summaries (en/de/jp). Detect default on mount. + const [lang, setLang] = useState(() => detectDefaultLang()); + // Controls visibility of burger menu + const [menuOpen, setMenuOpen] = useState(false); + + // New grouped sections state + const [sections, setSections] = useState([]); + const [nextCursor, setNextCursor] = useState(null); + const [isLoading, setIsLoading] = useState(false); + + // Globe drag helpers + const startScrubRef = useRef(0); + const ROTATE_EPS = 0.0015; // radians (~0.086° ≈ ~20s) + + // (legacy ID-first flow removed; we now use grouped sections + lazy load + quarter-hour refresh) + + // update logic when user rotates the globe + const handleManualRotate = (newScrubOffset: number, isDragEnd: boolean) => { + setScrubOffset(newScrubOffset); + + if (isDragEnd) { + const movedEnough = Math.abs(newScrubOffset - startScrubRef.current) > ROTATE_EPS; + if (movedEnough) { + // Reset sections for the new 'at' and fetch the first page + setSections([]); + setNextCursor(null); + fetchGroups({ reset: true }); + } + } + }; + + const daysScrubbed = Math.floor(-scrubOffset / (2 * Math.PI)); + + // Build 'at' from scrub offset (same math as before) + function atFromScrubOffset(): Date { + const now = new Date(); + const deltaHours = (scrubOffset / DEG2RAD) / 15; + return new Date(now.getTime() + deltaHours * 3600 * 1000); + } + + async function fetchGroups({ reset, cursor }: { reset?: boolean; cursor?: string } = {}) { + if (isLoading) return; + setIsLoading(true); + try { + const params = new URLSearchParams(); + const at = atFromScrubOffset(); + params.set("at", at.toISOString()); + params.set("page_size", "6"); // groups per page; tweak as you like + if (cursor) params.set("cursor", cursor); + + const res = await fetch(`/now-groups?${params.toString()}`); + if (!res.ok) return; + const data: NowGroupsResponse = await res.json(); + + setSections(prev => { + const base = reset ? [] : prev; + // De-duplicate by label if re-fetch overlaps + const existing = new Set(base.map(g => g.label)); + const appended = data.groups.filter(g => !existing.has(g.label)); + return [...base, ...appended]; + }); + setNextCursor(data.next_cursor); + } catch (e) { + console.error("fetchGroups failed", e); + } finally { + setIsLoading(false); + } + } + + // Initial load + quarter-hour aligned refresh + useEffect(() => { + let timeoutId: number; + let intervalId: number; + + const prime = async () => { + // first page + await fetchGroups({ reset: true }); + // schedule next quarter tick + const ms = msUntilNextQuarter(); + timeoutId = window.setTimeout(() => { + // on tick: reset and refetch + fetchGroups({ reset: true }); + // then every 15 minutes + intervalId = window.setInterval(() => { + fetchGroups({ reset: true }); + }, 15 * 60 * 1000); + }, Math.max(500, ms)); // minimum small delay to avoid 0ms storm + }; + + prime(); + + return () => { + if (timeoutId) clearTimeout(timeoutId); + if (intervalId) clearInterval(intervalId); + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + const loaderRef = useRef(null); + + useEffect(() => { + const el = loaderRef.current; + if (!el) return; + + const io = new IntersectionObserver((entries) => { + const first = entries[0]; + if (first.isIntersecting && nextCursor && !isLoading) { + fetchGroups({ cursor: nextCursor }); + } + }, { root: null, rootMargin: "600px", threshold: 0 }); + + io.observe(el); + return () => io.disconnect(); + }, [nextCursor, isLoading]); // eslint-disable-line react-hooks/exhaustive-deps + + return ( +
+
+
+

10AM

+ + {/* Burger pinned within the 1100px container */} + +
+ + {/* Subtitle (centered, within container) */} +
+

+ {daysScrubbed === 0 ? 'Today' : daysScrubbed === 1 ? 'Yesterday' : `${daysScrubbed} days ago`} +

+
+ + {/* Give Canvas a concrete height; it will fill this container */} +
+ + + + { startScrubRef.current = s; }} /> + +
+
+ {/* Language selection overlay (burger menu) */} + {menuOpen && ( +
+ {/* dim background */} +
setMenuOpen(false)} /> + + {/* Panel: top dropdown on small screens; right sidebar on md+ */} +
+

Language

+ + + + + + +
+
+ )} +
+ {sections.map((group) => ( +
+
+

{group.label}

+
+
+ + {group.results.map((country) => ( + + ))} + +
+
+ ))} + + {/* Sentinel for infinite scroll */} +
+ {isLoading ? "Loading…" : (nextCursor ? "Scroll for more…" : "End of list")} +
+
+
+ ); +}; + +export default App; \ No newline at end of file diff --git a/frontend/src/index.css b/frontend/src/index.css new file mode 100644 index 0000000..f290d9a --- /dev/null +++ b/frontend/src/index.css @@ -0,0 +1,38 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +@layer base { + * { + box-sizing: border-box; + } + + html, body { + margin: 0; + padding: 0; + font-family: 'Helvetica Neue', Helvetica, Arial, system-ui, -apple-system, sans-serif; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; + } + + body { + background-color: #000000; + color: #ffffff; + overflow-x: hidden; + } + + #root { + width: 100%; + height: 100vh; + } +} + +@layer utilities { + .text-shadow { + text-shadow: 0 2px 4px rgba(0,0,0,0.5); + } + + .backdrop-blur-subtle { + backdrop-filter: blur(2px); + } +} diff --git a/frontend/src/main.tsx b/frontend/src/main.tsx new file mode 100644 index 0000000..3d7150d --- /dev/null +++ b/frontend/src/main.tsx @@ -0,0 +1,10 @@ +import React from 'react' +import ReactDOM from 'react-dom/client' +import App from './App.tsx' +import './index.css' + +ReactDOM.createRoot(document.getElementById('root')!).render( + + + , +) diff --git a/frontend/tailwind.config.js b/frontend/tailwind.config.js new file mode 100644 index 0000000..c7538a6 --- /dev/null +++ b/frontend/tailwind.config.js @@ -0,0 +1,18 @@ +/** @type {import('tailwindcss').Config} */ +export default { + content: [ + "./index.html", + "./src/**/*.{js,ts,jsx,tsx}", + ], + theme: { + extend: { + fontFamily: { + 'helvetica': ['Helvetica Neue', 'Helvetica', 'Arial', 'system-ui', '-apple-system', 'sans-serif'], + }, + colors: { + 'gray-850': '#1a1a1a', + } + }, + }, + plugins: [], +} diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json new file mode 100644 index 0000000..a7fc6fb --- /dev/null +++ b/frontend/tsconfig.json @@ -0,0 +1,25 @@ +{ + "compilerOptions": { + "target": "ES2020", + "useDefineForClassFields": true, + "lib": ["ES2020", "DOM", "DOM.Iterable"], + "module": "ESNext", + "skipLibCheck": true, + + /* Bundler mode */ + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "resolveJsonModule": true, + "isolatedModules": true, + "noEmit": true, + "jsx": "react-jsx", + + /* Linting */ + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noFallthroughCasesInSwitch": true + }, + "include": ["src"], + "references": [{ "path": "./tsconfig.node.json" }] +} diff --git a/frontend/tsconfig.node.json b/frontend/tsconfig.node.json new file mode 100644 index 0000000..42872c5 --- /dev/null +++ b/frontend/tsconfig.node.json @@ -0,0 +1,10 @@ +{ + "compilerOptions": { + "composite": true, + "skipLibCheck": true, + "module": "ESNext", + "moduleResolution": "bundler", + "allowSyntheticDefaultImports": true + }, + "include": ["vite.config.ts"] +} diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts new file mode 100644 index 0000000..6b9d797 --- /dev/null +++ b/frontend/vite.config.ts @@ -0,0 +1,23 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +export default defineConfig({ + plugins: [react()], + // inside export default defineConfig({ ... }) + server: { + proxy: { + '/ten-am': 'http://127.0.0.1:8000', + '/ten-am-id': 'http://127.0.0.1:8000', + '/state': 'http://127.0.0.1:8000', + + // NEW: grouped endpoint(s) + '/now-groups': 'http://127.0.0.1:8000', + '/api/now-groups': 'http://127.0.0.1:8000', + } + }, + // Optional: keep this if you do "npm run build" to ship static files with Python + build: { + outDir: '../server/static', + emptyOutDir: true, + }, +}) \ No newline at end of file diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..bbc49d9 --- /dev/null +++ b/run.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")" && pwd)" + +# Helpful banner +echo "10AM – dev runner" +echo "Frontend will be on http://localhost:5173" +echo "Backend API on http://127.0.0.1:8000" +echo + +# Ensure deps +command -v python3 >/dev/null || { echo "python3 not found"; exit 1; } +command -v npm >/dev/null || { echo "npm not found"; exit 1; } + +# Start backend (serves /ten-am and /state). It will also start the collector (see patch below). +( + cd "$ROOT/server" + python -m venv .venv + source .venv/bin/activate + pip install -r requirements.txt + export TENAM_DATA="$ROOT/server/capitals_tz.json" + python3 tenam_full_server.py --host 127.0.0.1 --port 8000 +) & +BACKEND_PID=$! + +# Start frontend (Vite dev) with proxy to backend (already configured in vite.config.ts) +( + cd "$ROOT/frontend" + if [ ! -d node_modules ]; then + echo "Installing frontend deps…" + npm install + fi + npm run dev -- --host --port 5173 +) & +FRONTEND_PID=$! + +# Cleanup on exit +cleanup() { + echo + echo "Shutting down…" + kill "$BACKEND_PID" "$FRONTEND_PID" 2>/dev/null || true + wait 2>/dev/null || true +} +trap cleanup INT TERM + +# Info +echo "Open: http://localhost:5173" +echo "Press Ctrl+C to stop." +wait \ No newline at end of file diff --git a/server/capitals_tz.json b/server/capitals_tz.json new file mode 100644 index 0000000..ae3926c --- /dev/null +++ b/server/capitals_tz.json @@ -0,0 +1,1022 @@ +[ + { + "country": "Algeria", + "capital": "Algiers", + "tzid": "Africa/Algiers" + }, + { + "country": "Angola", + "capital": "Luanda", + "tzid": "Africa/Luanda" + }, + { + "country": "Benin", + "capital": "Porto-Novo", + "tzid": "Africa/Porto-Novo" + }, + { + "country": "Botswana", + "capital": "Gaborone", + "tzid": "Africa/Gaborone" + }, + { + "country": "Burkina Faso", + "capital": "Ouagadougou", + "tzid": "Africa/Ouagadougou" + }, + { + "country": "Burundi", + "capital": "Gitega", + "tzid": "Africa/Bujumbura" + }, + { + "country": "Cabo Verde", + "capital": "Praia", + "tzid": "Atlantic/Cape_Verde" + }, + { + "country": "Cameroon", + "capital": "Yaoundé", + "tzid": "Africa/Douala" + }, + { + "country": "Central African Republic", + "capital": "Bangui", + "tzid": "Africa/Bangui" + }, + { + "country": "Chad", + "capital": "N'Djamena", + "tzid": "Africa/Ndjamena" + }, + { + "country": "Comoros", + "capital": "Moroni", + "tzid": "Indian/Comoro" + }, + { + "country": "Congo (Republic of the)", + "capital": "Brazzaville", + "tzid": "Africa/Brazzaville" + }, + { + "country": "Congo (Democratic Republic of the)", + "capital": "Kinshasa", + "tzid": "Africa/Kinshasa" + }, + { + "country": "Côte d’Ivoire", + "capital": "Yamoussoukro", + "tzid": "Africa/Abidjan" + }, + { + "country": "Djibouti", + "capital": "Djibouti", + "tzid": "Africa/Djibouti" + }, + { + "country": "Egypt", + "capital": "Cairo", + "tzid": "Africa/Cairo" + }, + { + "country": "Equatorial Guinea", + "capital": "Malabo", + "tzid": "Africa/Malabo" + }, + { + "country": "Eritrea", + "capital": "Asmara", + "tzid": "Africa/Asmara" + }, + { + "country": "Eswatini", + "capital": "Mbabane", + "tzid": "Africa/Mbabane" + }, + { + "country": "Ethiopia", + "capital": "Addis Ababa", + "tzid": "Africa/Addis_Ababa" + }, + { + "country": "Gabon", + "capital": "Libreville", + "tzid": "Africa/Libreville" + }, + { + "country": "Gambia", + "capital": "Banjul", + "tzid": "Africa/Banjul" + }, + { + "country": "Ghana", + "capital": "Accra", + "tzid": "Africa/Accra" + }, + { + "country": "Guinea", + "capital": "Conakry", + "tzid": "Africa/Conakry" + }, + { + "country": "Guinea-Bissau", + "capital": "Bissau", + "tzid": "Africa/Bissau" + }, + { + "country": "Kenya", + "capital": "Nairobi", + "tzid": "Africa/Nairobi" + }, + { + "country": "Lesotho", + "capital": "Maseru", + "tzid": "Africa/Maseru" + }, + { + "country": "Liberia", + "capital": "Monrovia", + "tzid": "Africa/Monrovia" + }, + { + "country": "Libya", + "capital": "Tripoli", + "tzid": "Africa/Tripoli" + }, + { + "country": "Madagascar", + "capital": "Antananarivo", + "tzid": "Indian/Antananarivo" + }, + { + "country": "Malawi", + "capital": "Lilongwe", + "tzid": "Africa/Blantyre" + }, + { + "country": "Mali", + "capital": "Bamako", + "tzid": "Africa/Bamako" + }, + { + "country": "Mauritania", + "capital": "Nouakchott", + "tzid": "Africa/Nouakchott" + }, + { + "country": "Mauritius", + "capital": "Port Louis", + "tzid": "Indian/Mauritius" + }, + { + "country": "Morocco", + "capital": "Rabat", + "tzid": "Africa/Casablanca" + }, + { + "country": "Mozambique", + "capital": "Maputo", + "tzid": "Africa/Maputo" + }, + { + "country": "Namibia", + "capital": "Windhoek", + "tzid": "Africa/Windhoek" + }, + { + "country": "Niger", + "capital": "Niamey", + "tzid": "Africa/Niamey" + }, + { + "country": "Nigeria", + "capital": "Abuja", + "tzid": "Africa/Lagos" + }, + { + "country": "Rwanda", + "capital": "Kigali", + "tzid": "Africa/Kigali" + }, + { + "country": "São Tomé and Príncipe", + "capital": "São Tomé", + "tzid": "Africa/Sao_Tome" + }, + { + "country": "Senegal", + "capital": "Dakar", + "tzid": "Africa/Dakar" + }, + { + "country": "Seychelles", + "capital": "Victoria", + "tzid": "Indian/Mahe" + }, + { + "country": "Sierra Leone", + "capital": "Freetown", + "tzid": "Africa/Freetown" + }, + { + "country": "Somalia", + "capital": "Mogadishu", + "tzid": "Africa/Mogadishu" + }, + { + "country": "South Africa", + "capital": "Pretoria", + "tzid": "Africa/Johannesburg" + }, + { + "country": "South Sudan", + "capital": "Juba", + "tzid": "Africa/Juba" + }, + { + "country": "Sudan", + "capital": "Khartoum", + "tzid": "Africa/Khartoum" + }, + { + "country": "Tanzania", + "capital": "Dodoma", + "tzid": "Africa/Dar_es_Salaam" + }, + { + "country": "Togo", + "capital": "Lomé", + "tzid": "Africa/Lome" + }, + { + "country": "Tunisia", + "capital": "Tunis", + "tzid": "Africa/Tunis" + }, + { + "country": "Uganda", + "capital": "Kampala", + "tzid": "Africa/Kampala" + }, + { + "country": "Zambia", + "capital": "Lusaka", + "tzid": "Africa/Lusaka" + }, + { + "country": "Zimbabwe", + "capital": "Harare", + "tzid": "Africa/Harare" + }, + { + "country": "Afghanistan", + "capital": "Kabul", + "tzid": "Asia/Kabul" + }, + { + "country": "Armenia", + "capital": "Yerevan", + "tzid": "Asia/Yerevan" + }, + { + "country": "Azerbaijan", + "capital": "Baku", + "tzid": "Asia/Baku" + }, + { + "country": "Bahrain", + "capital": "Manama", + "tzid": "Asia/Bahrain" + }, + { + "country": "Bangladesh", + "capital": "Dhaka", + "tzid": "Asia/Dhaka" + }, + { + "country": "Bhutan", + "capital": "Thimphu", + "tzid": "Asia/Thimphu" + }, + { + "country": "Brunei", + "capital": "Bandar Seri Begawan", + "tzid": "Asia/Brunei" + }, + { + "country": "Cambodia", + "capital": "Phnom Penh", + "tzid": "Asia/Phnom_Penh" + }, + { + "country": "China", + "capital": "Beijing", + "tzid": "Asia/Shanghai" + }, + { + "country": "Georgia", + "capital": "Tbilisi", + "tzid": "Asia/Tbilisi" + }, + { + "country": "India", + "capital": "New Delhi", + "tzid": "Asia/Kolkata" + }, + { + "country": "Indonesia", + "capital": "Jakarta", + "tzid": "Asia/Jakarta" + }, + { + "country": "Iran", + "capital": "Tehran", + "tzid": "Asia/Tehran" + }, + { + "country": "Iraq", + "capital": "Baghdad", + "tzid": "Asia/Baghdad" + }, + { + "country": "Israel", + "capital": "Jerusalem", + "tzid": "Asia/Jerusalem" + }, + { + "country": "Japan", + "capital": "Tokyo", + "tzid": "Asia/Tokyo" + }, + { + "country": "Jordan", + "capital": "Amman", + "tzid": "Asia/Amman" + }, + { + "country": "Kazakhstan", + "capital": "Astana", + "tzid": "Asia/Almaty" + }, + { + "country": "Kuwait", + "capital": "Kuwait City", + "tzid": "Asia/Kuwait" + }, + { + "country": "Kyrgyzstan", + "capital": "Bishkek", + "tzid": "Asia/Bishkek" + }, + { + "country": "Laos", + "capital": "Vientiane", + "tzid": "Asia/Vientiane" + }, + { + "country": "Lebanon", + "capital": "Beirut", + "tzid": "Asia/Beirut" + }, + { + "country": "Malaysia", + "capital": "Kuala Lumpur", + "tzid": "Asia/Kuala_Lumpur" + }, + { + "country": "Maldives", + "capital": "Malé", + "tzid": "Indian/Maldives" + }, + { + "country": "Mongolia", + "capital": "Ulaanbaatar", + "tzid": "Asia/Ulaanbaatar" + }, + { + "country": "Myanmar", + "capital": "Naypyidaw", + "tzid": "Asia/Yangon" + }, + { + "country": "Nepal", + "capital": "Kathmandu", + "tzid": "Asia/Kathmandu" + }, + { + "country": "North Korea", + "capital": "Pyongyang", + "tzid": "Asia/Pyongyang" + }, + { + "country": "Oman", + "capital": "Muscat", + "tzid": "Asia/Muscat" + }, + { + "country": "Pakistan", + "capital": "Islamabad", + "tzid": "Asia/Karachi" + }, + { + "country": "Philippines", + "capital": "Manila", + "tzid": "Asia/Manila" + }, + { + "country": "Qatar", + "capital": "Doha", + "tzid": "Asia/Qatar" + }, + { + "country": "Saudi Arabia", + "capital": "Riyadh", + "tzid": "Asia/Riyadh" + }, + { + "country": "Singapore", + "capital": "Singapore", + "tzid": "Asia/Singapore" + }, + { + "country": "South Korea", + "capital": "Seoul", + "tzid": "Asia/Seoul" + }, + { + "country": "Sri Lanka", + "capital": "Sri Jayawardenepura Kotte", + "tzid": "Asia/Colombo" + }, + { + "country": "Syria", + "capital": "Damascus", + "tzid": "Asia/Damascus" + }, + { + "country": "Tajikistan", + "capital": "Dushanbe", + "tzid": "Asia/Dushanbe" + }, + { + "country": "Thailand", + "capital": "Bangkok", + "tzid": "Asia/Bangkok" + }, + { + "country": "Timor-Leste", + "capital": "Dili", + "tzid": "Asia/Dili" + }, + { + "country": "Turkey", + "capital": "Ankara", + "tzid": "Europe/Istanbul" + }, + { + "country": "Turkmenistan", + "capital": "Ashgabat", + "tzid": "Asia/Ashgabat" + }, + { + "country": "United Arab Emirates", + "capital": "Abu Dhabi", + "tzid": "Asia/Dubai" + }, + { + "country": "Uzbekistan", + "capital": "Tashkent", + "tzid": "Asia/Tashkent" + }, + { + "country": "Vietnam", + "capital": "Hanoi", + "tzid": "Asia/Ho_Chi_Minh" + }, + { + "country": "Yemen", + "capital": "Sana'a", + "tzid": "Asia/Aden" + }, + { + "country": "Albania", + "capital": "Tirana", + "tzid": "Europe/Tirane" + }, + { + "country": "Andorra", + "capital": "Andorra la Vella", + "tzid": "Europe/Andorra" + }, + { + "country": "Austria", + "capital": "Vienna", + "tzid": "Europe/Vienna" + }, + { + "country": "Belarus", + "capital": "Minsk", + "tzid": "Europe/Minsk" + }, + { + "country": "Belgium", + "capital": "Brussels", + "tzid": "Europe/Brussels" + }, + { + "country": "Bosnia and Herzegovina", + "capital": "Sarajevo", + "tzid": "Europe/Sarajevo" + }, + { + "country": "Bulgaria", + "capital": "Sofia", + "tzid": "Europe/Sofia" + }, + { + "country": "Croatia", + "capital": "Zagreb", + "tzid": "Europe/Zagreb" + }, + { + "country": "Cyprus", + "capital": "Nicosia", + "tzid": "Asia/Nicosia" + }, + { + "country": "Czechia", + "capital": "Prague", + "tzid": "Europe/Prague" + }, + { + "country": "Denmark", + "capital": "Copenhagen", + "tzid": "Europe/Copenhagen" + }, + { + "country": "Estonia", + "capital": "Tallinn", + "tzid": "Europe/Tallinn" + }, + { + "country": "Finland", + "capital": "Helsinki", + "tzid": "Europe/Helsinki" + }, + { + "country": "France", + "capital": "Paris", + "tzid": "Europe/Paris" + }, + { + "country": "Germany", + "capital": "Berlin", + "tzid": "Europe/Berlin" + }, + { + "country": "Greece", + "capital": "Athens", + "tzid": "Europe/Athens" + }, + { + "country": "Hungary", + "capital": "Budapest", + "tzid": "Europe/Budapest" + }, + { + "country": "Iceland", + "capital": "Reykjavík", + "tzid": "Atlantic/Reykjavik" + }, + { + "country": "Ireland", + "capital": "Dublin", + "tzid": "Europe/Dublin" + }, + { + "country": "Italy", + "capital": "Rome", + "tzid": "Europe/Rome" + }, + { + "country": "Latvia", + "capital": "Riga", + "tzid": "Europe/Riga" + }, + { + "country": "Liechtenstein", + "capital": "Vaduz", + "tzid": "Europe/Zurich" + }, + { + "country": "Lithuania", + "capital": "Vilnius", + "tzid": "Europe/Vilnius" + }, + { + "country": "Luxembourg", + "capital": "Luxembourg", + "tzid": "Europe/Luxembourg" + }, + { + "country": "Malta", + "capital": "Valletta", + "tzid": "Europe/Malta" + }, + { + "country": "Moldova", + "capital": "Chișinău", + "tzid": "Europe/Chisinau" + }, + { + "country": "Monaco", + "capital": "Monaco", + "tzid": "Europe/Monaco" + }, + { + "country": "Montenegro", + "capital": "Podgorica", + "tzid": "Europe/Belgrade" + }, + { + "country": "Netherlands", + "capital": "Amsterdam", + "tzid": "Europe/Amsterdam" + }, + { + "country": "North Macedonia", + "capital": "Skopje", + "tzid": "Europe/Skopje" + }, + { + "country": "Norway", + "capital": "Oslo", + "tzid": "Europe/Oslo" + }, + { + "country": "Poland", + "capital": "Warsaw", + "tzid": "Europe/Warsaw" + }, + { + "country": "Portugal", + "capital": "Lisbon", + "tzid": "Europe/Lisbon" + }, + { + "country": "Romania", + "capital": "Bucharest", + "tzid": "Europe/Bucharest" + }, + { + "country": "Russia", + "capital": "Moscow", + "tzid": "Europe/Moscow" + }, + { + "country": "San Marino", + "capital": "San Marino", + "tzid": "Europe/Rome" + }, + { + "country": "Serbia", + "capital": "Belgrade", + "tzid": "Europe/Belgrade" + }, + { + "country": "Slovakia", + "capital": "Bratislava", + "tzid": "Europe/Bratislava" + }, + { + "country": "Slovenia", + "capital": "Ljubljana", + "tzid": "Europe/Ljubljana" + }, + { + "country": "Spain", + "capital": "Madrid", + "tzid": "Europe/Madrid" + }, + { + "country": "Sweden", + "capital": "Stockholm", + "tzid": "Europe/Stockholm" + }, + { + "country": "Switzerland", + "capital": "Bern", + "tzid": "Europe/Zurich" + }, + { + "country": "Ukraine", + "capital": "Kyiv", + "tzid": "Europe/Kyiv" + }, + { + "country": "United Kingdom", + "capital": "London", + "tzid": "Europe/London" + }, + { + "country": "Antigua and Barbuda", + "capital": "St. John's", + "tzid": "America/Antigua" + }, + { + "country": "Argentina", + "capital": "Buenos Aires", + "tzid": "America/Argentina/Buenos_Aires" + }, + { + "country": "Bahamas", + "capital": "Nassau", + "tzid": "America/Nassau" + }, + { + "country": "Barbados", + "capital": "Bridgetown", + "tzid": "America/Barbados" + }, + { + "country": "Belize", + "capital": "Belmopan", + "tzid": "America/Belize" + }, + { + "country": "Bolivia", + "capital": "Sucre", + "tzid": "America/La_Paz" + }, + { + "country": "Brazil", + "capital": "Brasília", + "tzid": "America/Sao_Paulo" + }, + { + "country": "Canada", + "capital": "Ottawa", + "tzid": "America/Toronto" + }, + { + "country": "Chile", + "capital": "Santiago", + "tzid": "America/Santiago" + }, + { + "country": "Colombia", + "capital": "Bogotá", + "tzid": "America/Bogota" + }, + { + "country": "Costa Rica", + "capital": "San José", + "tzid": "America/Costa_Rica" + }, + { + "country": "Cuba", + "capital": "Havana", + "tzid": "America/Havana" + }, + { + "country": "Dominica", + "capital": "Roseau", + "tzid": "America/Dominica" + }, + { + "country": "Dominican Republic", + "capital": "Santo Domingo", + "tzid": "America/Santo_Domingo" + }, + { + "country": "Ecuador", + "capital": "Quito", + "tzid": "America/Guayaquil" + }, + { + "country": "El Salvador", + "capital": "San Salvador", + "tzid": "America/El_Salvador" + }, + { + "country": "Grenada", + "capital": "St. George's", + "tzid": "America/Grenada" + }, + { + "country": "Guatemala", + "capital": "Guatemala City", + "tzid": "America/Guatemala" + }, + { + "country": "Guyana", + "capital": "Georgetown", + "tzid": "America/Guyana" + }, + { + "country": "Haiti", + "capital": "Port-au-Prince", + "tzid": "America/Port-au-Prince" + }, + { + "country": "Honduras", + "capital": "Tegucigalpa", + "tzid": "America/Tegucigalpa" + }, + { + "country": "Jamaica", + "capital": "Kingston", + "tzid": "America/Jamaica" + }, + { + "country": "Mexico", + "capital": "Mexico City", + "tzid": "America/Mexico_City" + }, + { + "country": "Nicaragua", + "capital": "Managua", + "tzid": "America/Managua" + }, + { + "country": "Panama", + "capital": "Panama City", + "tzid": "America/Panama" + }, + { + "country": "Paraguay", + "capital": "Asunción", + "tzid": "America/Asuncion" + }, + { + "country": "Peru", + "capital": "Lima", + "tzid": "America/Lima" + }, + { + "country": "Saint Kitts and Nevis", + "capital": "Basseterre", + "tzid": "America/St_Kitts" + }, + { + "country": "Saint Lucia", + "capital": "Castries", + "tzid": "America/St_Lucia" + }, + { + "country": "Saint Vincent and the Grenadines", + "capital": "Kingstown", + "tzid": "America/St_Vincent" + }, + { + "country": "Suriname", + "capital": "Paramaribo", + "tzid": "America/Paramaribo" + }, + { + "country": "Trinidad and Tobago", + "capital": "Port of Spain", + "tzid": "America/Port_of_Spain" + }, + { + "country": "United States", + "capital": "Washington, D.C.", + "tzid": "America/New_York" + }, + { + "country": "Uruguay", + "capital": "Montevideo", + "tzid": "America/Montevideo" + }, + { + "country": "Venezuela", + "capital": "Caracas", + "tzid": "America/Caracas" + }, + { + "country": "Australia", + "capital": "Canberra", + "tzid": "Australia/Sydney" + }, + { + "country": "Fiji", + "capital": "Suva", + "tzid": "Pacific/Fiji" + }, + { + "country": "Kiribati", + "capital": "South Tarawa", + "tzid": "Pacific/Tarawa" + }, + { + "country": "Marshall Islands", + "capital": "Majuro", + "tzid": "Pacific/Majuro" + }, + { + "country": "Micronesia (Federated States of)", + "capital": "Palikir", + "tzid": "Pacific/Pohnpei" + }, + { + "country": "Nauru", + "capital": "Yaren (de facto)", + "tzid": "Pacific/Nauru" + }, + { + "country": "New Zealand", + "capital": "Wellington", + "tzid": "Pacific/Auckland" + }, + { + "country": "Palau", + "capital": "Ngerulmud", + "tzid": "Pacific/Palau" + }, + { + "country": "Papua New Guinea", + "capital": "Port Moresby", + "tzid": "Pacific/Port_Moresby" + }, + { + "country": "Samoa", + "capital": "Apia", + "tzid": "Pacific/Apia" + }, + { + "country": "Solomon Islands", + "capital": "Honiara", + "tzid": "Pacific/Guadalcanal" + }, + { + "country": "Tonga", + "capital": "Nuku'alofa", + "tzid": "Pacific/Tongatapu" + }, + { + "country": "Tuvalu", + "capital": "Funafuti", + "tzid": "Pacific/Funafuti" + }, + { + "country": "Vanuatu", + "capital": "Port Vila", + "tzid": "Pacific/Efate" + }, + { + "country": "Holy See", + "capital": "Vatican City", + "tzid": "Europe/Rome" + }, + { + "country": "Palestine", + "capital": "Ramallah", + "tzid": "Asia/Hebron" + }, + { + "country": "Kosovo", + "capital": "Pristina", + "tzid": "Europe/Belgrade" + }, + { + "country": "Taiwan", + "capital": "Taipei", + "tzid": "Asia/Taipei" + }, + { + "country": "Western Sahara (SADR)", + "capital": "El Aaiún", + "tzid": "Africa/El_Aaiun" + }, + { + "country": "Northern Cyprus", + "capital": "North Nicosia", + "tzid": "Asia/Nicosia" + }, + { + "country": "South Ossetia", + "capital": "Tskhinvali", + "tzid": "Europe/Moscow" + }, + { + "country": "Abkhazia", + "capital": "Sukhumi", + "tzid": "Europe/Moscow" + }, + { + "country": "Transnistria", + "capital": "Tiraspol", + "tzid": "Europe/Chisinau" + }, + { + "country": "Artsakh (Nagorno-Karabakh)", + "capital": "Stepanakert", + "tzid": "Asia/Yerevan" + }, + { + "country": "Somaliland", + "capital": "Hargeisa", + "tzid": "Africa/Mogadishu" + } +] \ No newline at end of file diff --git a/server/news_collector.py b/server/news_collector.py new file mode 100644 index 0000000..d7a0b0b --- /dev/null +++ b/server/news_collector.py @@ -0,0 +1,396 @@ +import json +import os +import sqlite3 +import time +import argparse +from datetime import datetime, timezone, timedelta +from zoneinfo import ZoneInfo +import requests +import ollama + +# --- Configuration --- +SCRIPT_DIR = os.path.dirname(__file__) +DB_FILE = os.path.join(SCRIPT_DIR, "news.db") +CAPITALS_FILE = os.environ.get("TENAM_DATA", os.path.join(SCRIPT_DIR, "capitals_tz.json")) +SEARXNG_URL = "http://localhost:8888" +OLLAMA_MODEL = "gpt-oss:20b" + +# --- Database Setup --- +def setup_database(): + """ + Creates the news table if it doesn't exist. New schema uses + multilingual summaries: summary_en, summary_de, summary_jp. All + summary columns are nullable to allow incremental backfill. + """ + conn = sqlite3.connect(DB_FILE) + cursor = conn.cursor() + # Create table with new schema if it doesn't exist + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS news ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + country_name TEXT NOT NULL, + news_date DATE NOT NULL, + summary_en TEXT, + summary_de TEXT, + summary_jp TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + UNIQUE(country_name, news_date) + ) + """ + ) + conn.commit() + conn.close() + +def run_db_migrations(): + """ + Performs safe, idempotent migrations on the `news` table. On old + databases, the column `summary` is renamed to `summary_en`. New + columns `summary_de` and `summary_jp` are added if missing. Uses + ALTER TABLE where available and falls back to table copy when + column rename isn't supported. + """ + conn = sqlite3.connect(DB_FILE) + cursor = conn.cursor() + try: + # Inspect existing columns + cursor.execute("PRAGMA table_info(news)") + cols = [row[1] for row in cursor.fetchall()] + # Rename summary -> summary_en if needed + if "summary_en" not in cols and "summary" in cols: + try: + cursor.execute("ALTER TABLE news RENAME COLUMN summary TO summary_en") + conn.commit() + # Refresh column list after rename + cursor.execute("PRAGMA table_info(news)") + cols = [row[1] for row in cursor.fetchall()] + except sqlite3.OperationalError: + # Fallback: recreate table with proper schema + cursor.execute( + """ + CREATE TABLE news_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + country_name TEXT NOT NULL, + news_date DATE NOT NULL, + summary_en TEXT, + summary_de TEXT, + summary_jp TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + UNIQUE(country_name, news_date) + ) + """ + ) + # Copy old data: map summary -> summary_en + cursor.execute( + "INSERT INTO news_new (id, country_name, news_date, summary_en, created_at) " + "SELECT id, country_name, news_date, summary, created_at FROM news" + ) + cursor.execute("DROP TABLE news") + cursor.execute("ALTER TABLE news_new RENAME TO news") + conn.commit() + cursor.execute("PRAGMA table_info(news)") + cols = [row[1] for row in cursor.fetchall()] + # Add missing translation columns + if "summary_de" not in cols: + cursor.execute("ALTER TABLE news ADD COLUMN summary_de TEXT") + if "summary_jp" not in cols: + cursor.execute("ALTER TABLE news ADD COLUMN summary_jp TEXT") + conn.commit() + finally: + conn.close() + +# --- Core Logic --- +def get_countries_at_time(target_hour, target_minute, capitals_data): + """Finds countries where the local time matches the target hour and minute.""" + now_utc = datetime.now(timezone.utc) + hits = [] + for entry in capitals_data: + try: + tz = ZoneInfo(entry["tzid"]) + local_time = now_utc.astimezone(tz) + if local_time.hour == target_hour and local_time.minute == target_minute: + hits.append(entry) + except Exception as e: + print(f"Error processing timezone {entry.get('tzid', 'N/A')}: {e}") + return hits + +def get_next_10am_countries(capitals_data): + """Finds the next group of countries that will reach 10:00 AM.""" + now_utc = datetime.now(timezone.utc) + next_event_time = None + countries_for_next_event = [] + + for entry in capitals_data: + try: + tz = ZoneInfo(entry["tzid"]) + local_time = now_utc.astimezone(tz) + + # Calculate next 10:00 AM in this timezone + next_10am = local_time.replace(hour=10, minute=0, second=0, microsecond=0) + if local_time >= next_10am: + next_10am += timedelta(days=1) + + if next_event_time is None or next_10am < next_event_time: + next_event_time = next_10am + countries_for_next_event = [entry] + elif next_10am == next_event_time: + countries_for_next_event.append(entry) + except Exception as e: + print(f"Error processing timezone {entry.get('tzid', 'N/A')} for dev mode: {e}") + + return countries_for_next_event + +def fetch_searxng_results(country_name): + """Fetches news from SearXNG.""" + query = f"top news in {country_name} today" + params = {"q": query, "format": "json"} + try: + response = requests.get(SEARXNG_URL, params=params, timeout=15) + response.raise_for_status() + results = response.json().get("results", []) + # Simple concatenation of titles and content for summary + content_to_summarize = " ".join([ + f"{r.get('title', '')}: {r.get('content', '')}" for r in results[:5] + ]) + return content_to_summarize + except requests.RequestException as e: + print(f"Error fetching from SearXNG for {country_name}: {e}") + return None + +def summarize_with_ollama(content, country): + """Summarizes content using the ollama library.""" + if not content or content.isspace(): + return "No content available to summarize." + + prompt = f"Here are excerpts of news websites from {content}.\nPlease provide a concise summary of these news in English.\nList each individual news in bulletin points. DO NOT list by news outlets - list by relevant news topics.\nDon't intro your response with things like \"..here are the news..\" or anything like that - reply with ONLY the news listed as bulletin points.\nKeep it concise.\nThe news:\n\n{content}" + try: + response = ollama.generate( + model=OLLAMA_MODEL, + prompt=prompt + ) + return response.get('response', 'Summary generation failed.').strip() + except Exception as e: + # The ollama library might raise various exceptions. + print(f"Error communicating with Ollama using library: {e}") + return "Summary failed due to a communication error." + +def translate_summary(summary_en: str, target_lang: str) -> str | None: + """ + Translate an English news summary into the specified language using the + ollama model. Supported languages: 'de' for German and 'jp' for Japanese. + Returns None on failure. + """ + if not summary_en or summary_en.isspace(): + return None + if target_lang == 'de': + prompt = ( + "Translate the following news summary into natural, accurate German. " + "Keep the format.\nKeep it concise; do not add new facts.\n\n" + "Summary (English):\n" + summary_en + ) + elif target_lang == 'jp': + prompt = ( + "Translate the following news summary into natural, accurate Japanese. " + "Keep the format.\nKeep it concise; do not add new facts.\n\n" + "Summary (English):\n" + summary_en + ) + else: + # Unsupported language + return None + try: + response = ollama.generate(model=OLLAMA_MODEL, prompt=prompt) + return response.get('response', '').strip() + except Exception as e: + print(f"Error translating summary to {target_lang}: {e}") + return None + +def store_news(country_name: str, summary_en: str, summary_de: str | None = None, summary_jp: str | None = None, overwrite: bool = False) -> None: + """ + Insert/update with verification. After commit, we re-read the row to confirm + presence, avoiding misleading “success” logs if an insert was ignored or + overwritten later. + """ + conn = sqlite3.connect(DB_FILE) + cursor = conn.cursor() + news_date = datetime.now(timezone.utc).date() + try: + if overwrite: + sql = ( + "INSERT INTO news (country_name, news_date, summary_en, summary_de, summary_jp) " + "VALUES (?, ?, ?, ?, ?) " + "ON CONFLICT(country_name, news_date) DO UPDATE SET " + "summary_en = excluded.summary_en, " + "summary_de = COALESCE(excluded.summary_de, news.summary_de), " + "summary_jp = COALESCE(excluded.summary_jp, news.summary_jp), " + "created_at = CURRENT_TIMESTAMP" + ) + params = (country_name, news_date, summary_en, summary_de, summary_jp) + verb = "upserted" + else: + sql = ( + "INSERT OR IGNORE INTO news (country_name, news_date, summary_en, summary_de, summary_jp) " + "VALUES (?, ?, ?, ?, ?)" + ) + params = (country_name, news_date, summary_en, summary_de, summary_jp) + verb = "inserted" + + cursor.execute(sql, params) + conn.commit() + + # Verify by reading back the row we *expect* to exist + cursor.execute( + "SELECT id FROM news WHERE country_name = ? AND news_date = ? LIMIT 1", + (country_name, news_date), + ) + row = cursor.fetchone() + if row: + print(f"Successfully {verb} news for {country_name}. (id={row[0]})") + else: + # This should not happen; make it explicit in logs + print(f"[warn] Post-commit verification failed for {country_name} ({news_date}).") + except Exception as e: + print(f"Error storing news for {country_name}: {e}") + finally: + conn.close() + +def update_translations(country_name: str, news_date, summary_de: str | None = None, summary_jp: str | None = None) -> None: + """ + Update translation fields for a given country and date. Only updates + provided languages; leaving a language as None will not change that + column. Updates `created_at` timestamp to reflect the change. + """ + conn = sqlite3.connect(DB_FILE) + cursor = conn.cursor() + try: + updates = [] + params: list = [] + if summary_de is not None: + updates.append("summary_de = ?") + params.append(summary_de) + if summary_jp is not None: + updates.append("summary_jp = ?") + params.append(summary_jp) + if not updates: + return + # Append timestamp update + updates.append("created_at = CURRENT_TIMESTAMP") + params.append(country_name) + params.append(news_date) + sql = f"UPDATE news SET {', '.join(updates)} WHERE country_name = ? AND news_date = ?" + cursor.execute(sql, params) + conn.commit() + except Exception as e: + print(f"Error updating translations for {country_name}: {e}") + finally: + conn.close() + +def process_country_queue(queue, overwrite=False): + """ + Processes the queue of countries sequentially. Generates English summaries and + translations, then inserts or updates rows in the database accordingly. + When overwrite=False, existing rows are not replaced; missing translations + for existing rows are filled in if possible. + """ + print(f"Starting to process a queue of {len(queue)} countries. Overwrite: {overwrite}") + for country_entry in queue: + country_name = country_entry["country"] + print(f"\n--- Processing: {country_name} ---") + news_date = datetime.now(timezone.utc).date() + # Determine if a record exists for this country/date + conn = sqlite3.connect(DB_FILE) + cursor = conn.cursor() + cursor.execute( + "SELECT summary_en, summary_de, summary_jp FROM news WHERE country_name = ? AND news_date = ?", + (country_name, news_date), + ) + existing_row = cursor.fetchone() + conn.close() + + if existing_row and not overwrite: + # Existing row: fill missing translations if possible + summary_en_db, summary_de_db, summary_jp_db = existing_row + # Only attempt translation if English summary is present + if summary_en_db: + need_de = summary_de_db is None + need_jp = summary_jp_db is None + if need_de or need_jp: + print(f"Existing summary found for {country_name}; translating missing languages...") + summary_de_new = translate_summary(summary_en_db, 'de') if need_de else None + summary_jp_new = translate_summary(summary_en_db, 'jp') if need_jp else None + # Log translation success/failure + if need_de and summary_de_new: + print(f"Filled German translation for {country_name}.") + if need_jp and summary_jp_new: + print(f"Filled Japanese translation for {country_name}.") + # Update only provided translations + update_translations(country_name, news_date, summary_de_new, summary_jp_new) + else: + print(f"Existing row for {country_name} lacks English summary; cannot translate.") + # Skip summarization for existing rows when not overwriting + continue + # Fetch news content + print(f"Fetching news for {country_name}...") + news_content = fetch_searxng_results(country_name) + if not news_content: + print(f"No content fetched for {country_name}. Skipping summary.") + continue + # Summarize in English + print(f"Summarizing news for {country_name}...") + summary_en = summarize_with_ollama(news_content, country_name) + # Translate to German and Japanese + summary_de = translate_summary(summary_en, 'de') + summary_jp = translate_summary(summary_en, 'jp') + # Store the summaries + store_news(country_name, summary_en, summary_de, summary_jp, overwrite=overwrite) + # Brief pause between requests + time.sleep(1) # Small delay to avoid overwhelming services + +# --- Main Loop --- +def main(): + parser = argparse.ArgumentParser(description="10AM News Collector Service") + parser.add_argument("--dev", action="store_true", help="Run in development mode: process next 10am countries once and exit.") + parser.add_argument("--overwrite", action="store_true", help="Overwrite existing news summaries in the database.") + args = parser.parse_args() + + print("Starting News Collector Service...") + setup_database() + # Perform schema migrations (safe, idempotent) + try: + run_db_migrations() + except Exception as e: + print(f"Migration error: {e}") + + + try: + with open(CAPITALS_FILE, "r", encoding="utf-8") as f: + capitals_data = json.load(f) + except FileNotFoundError: + print(f"Error: Capitals data file not found at {CAPITALS_FILE}") + return + + if args.dev: + print(f"Running in DEV mode... Overwrite: {args.overwrite}") + country_queue = get_next_10am_countries(capitals_data) + if country_queue: + process_country_queue(country_queue, overwrite=args.overwrite) + else: + print("Could not determine the next set of countries for 10 AM.") + print("DEV mode run complete.") + return + + print("Running in PRODUCTION mode. Checking UTC minute windows (:00/:15/:30/:45) every minute...") + while True: + now_utc = datetime.now(timezone.utc) + # Fire at every 15-minute UTC slot to cover quarter-hour timezones + if (now_utc.minute % 15) == 0: + print(f"UTC {now_utc.strftime('%H:%M')} → checking for countries at local 09:30.") + country_queue = get_countries_at_time(9, 30, capitals_data) + if country_queue: + process_country_queue(country_queue, overwrite=args.overwrite) + else: + print("No countries are at 09:30 local time right now.") + # Sleep until next minute boundary (UTC) + time.sleep(60 - now_utc.second) + +if __name__ == "__main__": + main() diff --git a/server/requirements.txt b/server/requirements.txt new file mode 100644 index 0000000..aa20712 --- /dev/null +++ b/server/requirements.txt @@ -0,0 +1,2 @@ +requests +ollama diff --git a/server/tenam_full_server.py b/server/tenam_full_server.py new file mode 100644 index 0000000..c28e1f9 --- /dev/null +++ b/server/tenam_full_server.py @@ -0,0 +1,516 @@ +import json, os, math, sqlite3, traceback, hashlib +from http.server import BaseHTTPRequestHandler, HTTPServer +from urllib.parse import urlparse, parse_qs +from datetime import datetime, timezone, timedelta +from zoneinfo import ZoneInfo + +SCRIPT_DIR = os.path.dirname(__file__) +DATA_FILE = os.environ.get("TENAM_DATA", os.path.join(SCRIPT_DIR, "capitals_tz.json")) +DB_FILE = os.path.join(SCRIPT_DIR, "news.db") +STATIC_DIR = os.environ.get("TENAM_STATIC", os.path.join(SCRIPT_DIR, "static")) + +with open(DATA_FILE, "r", encoding="utf-8") as f: + CAPITALS = json.load(f) + +# --- Database migration --- +def run_db_migrations() -> None: + """ + Ensures the news table schema supports multilingual summaries. If the old + column `summary` exists, it will be renamed to `summary_en`. Missing + columns `summary_de` and `summary_jp` will be added. Idempotent and safe + across multiple runs. + """ + conn = sqlite3.connect(DB_FILE) + cursor = conn.cursor() + try: + cursor.execute("PRAGMA table_info(news)") + cols = [row[1] for row in cursor.fetchall()] + if "summary_en" not in cols and "summary" in cols: + try: + cursor.execute("ALTER TABLE news RENAME COLUMN summary TO summary_en") + conn.commit() + cursor.execute("PRAGMA table_info(news)") + cols = [row[1] for row in cursor.fetchall()] + except sqlite3.OperationalError: + # fallback: create new table and copy data + cursor.execute( + """ + CREATE TABLE news_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + country_name TEXT NOT NULL, + news_date DATE NOT NULL, + summary_en TEXT, + summary_de TEXT, + summary_jp TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + UNIQUE(country_name, news_date) + ) + """ + ) + cursor.execute( + "INSERT INTO news_new (id, country_name, news_date, summary_en, created_at) " + "SELECT id, country_name, news_date, summary, created_at FROM news" + ) + cursor.execute("DROP TABLE news") + cursor.execute("ALTER TABLE news_new RENAME TO news") + conn.commit() + cursor.execute("PRAGMA table_info(news)") + cols = [row[1] for row in cursor.fetchall()] + # Add translation columns if missing + if "summary_de" not in cols: + cursor.execute("ALTER TABLE news ADD COLUMN summary_de TEXT") + if "summary_jp" not in cols: + cursor.execute("ALTER TABLE news ADD COLUMN summary_jp TEXT") + conn.commit() + finally: + conn.close() + +# --- News collector background daemon (reuses news_collector.py) --- +def start_collector_daemon(): + import threading, time + try: + # Import lazily so the backend can still boot even if requests/ollama aren't installed yet. + import news_collector as nc + except Exception as e: + print(f"[collector] Not starting: failed to import news_collector: {e}") + return + + # Make sure DB exists and run migrations + try: + nc.setup_database() + # Attempt to run migrations via collector's helper (if present) + if hasattr(nc, "run_db_migrations"): + try: + nc.run_db_migrations() + except Exception as me: + print(f"[collector] migration via news_collector failed: {me}") + # Also run local migrations in case collector is outdated + try: + run_db_migrations() + except Exception as e2: + print(f"[collector] local migration failed: {e2}") + except Exception as e: + print(f"[collector] setup_database() failed: {e}") + + def _loop(): + print("[collector] daemon started (1s polling; triggers at 09:30 local time on UTC :00/:15/:30/:45).") + while True: + # Drive scheduling off UTC so quarter-hour timezones are handled + now_utc = datetime.now(timezone.utc) + try: + if now_utc.second < 5 and (now_utc.minute % 15 == 0): + # Reload capitals data (keeps it in sync with TENAM_DATA / DATA_FILE) + try: + with open(DATA_FILE, "r", encoding="utf-8") as f: + capitals_data = json.load(f) + except Exception: + capitals_data = CAPITALS + + # Prefetch at local 09:30 (covers :00/:15/:30/:45 UTC slots) + queue = nc.get_countries_at_time(9, 30, capitals_data) + if queue: + print(f"[collector] {len(queue)} countries at local 09:30 → fetching & summarizing…") + nc.process_country_queue(queue, overwrite=False) + else: + print("[collector] No countries at local 09:30 right now.") + # Avoid duplicate runs within the same minute window + time.sleep(60) + + except Exception as e: + print(f"[collector] loop error: {e}") + + time.sleep(1) + + threading.Thread(target=_loop, name="tenam-collector", daemon=True).start() + +def get_news_for_country(country_name: str, news_date) -> dict | None: + """ + Fetches the latest news summaries for a specific country and date. Returns + a dictionary with keys 'summary_en', 'summary_de', 'summary_jp' or None if + no entry exists. + """ + try: + conn = sqlite3.connect(DB_FILE) + cursor = conn.cursor() + cursor.execute( + "SELECT summary_en, summary_de, summary_jp FROM news WHERE country_name = ? AND news_date = ? ORDER BY created_at DESC LIMIT 1", + (country_name, news_date), + ) + row = cursor.fetchone() + conn.close() + if row: + return {"summary_en": row[0], "summary_de": row[1], "summary_jp": row[2]} + return None + except sqlite3.OperationalError: + # This can happen if the news_collector hasn't run yet and created the DB. + return None + + +def compute_countries_at_time(target_hour=10, exact_minute=None, at_utc_iso=None): + # Parse target UTC timestamp (if provided) + if at_utc_iso: + try: + iso = at_utc_iso[:-1] + "+00:00" if at_utc_iso.endswith("Z") else at_utc_iso + _now_utc = datetime.fromisoformat(iso).astimezone(timezone.utc) + except Exception: + _now_utc = datetime.now(timezone.utc) + else: + _now_utc = datetime.now(timezone.utc) + + hits = [] + for entry in CAPITALS: + try: + tzid = entry.get("tzid") + if not tzid: + continue + tz = ZoneInfo(tzid) # can throw if tzid invalid + local = _now_utc.astimezone(tz) + + if local.hour == target_hour and (exact_minute is None or local.minute == exact_minute): + summaries = get_news_for_country(entry["country"], local.date()) + # Build result entry with multilingual summaries (may be None) + hit = { + "country": entry["country"], + "capital": entry["capital"], + "tzid": tzid, + "local_time": local.strftime("%Y-%m-%d %H:%M:%S"), + "utc_now": _now_utc.strftime("%Y-%m-%d %H:%M:%S"), + "summary_en": None, + "summary_de": None, + "summary_jp": None, + } + if summaries: + hit["summary_en"] = summaries.get("summary_en") + hit["summary_de"] = summaries.get("summary_de") + hit["summary_jp"] = summaries.get("summary_jp") + hits.append(hit) + except Exception as e: + # Skip bad timezones/entries instead of crashing the request + print(f"[compute] skipped {entry.get('country','?')} ({entry.get('tzid','?')}): {e}") + continue + + hits.sort(key=lambda x: (x["country"].lower(), x["capital"].lower())) + return hits, _now_utc + +def compute_countries_meta_at_time(target_hour=10, exact_minute=None, at_utc_iso=None): + """Return only meta (no summaries) for the target time.""" + if at_utc_iso: + try: + iso = at_utc_iso[:-1] + "+00:00" if at_utc_iso.endswith("Z") else at_utc_iso + _now_utc = datetime.fromisoformat(iso).astimezone(timezone.utc) + except Exception: + _now_utc = datetime.now(timezone.utc) + else: + _now_utc = datetime.now(timezone.utc) + + hits = [] + for entry in CAPITALS: + try: + tzid = entry.get("tzid") + if not tzid: + continue + tz = ZoneInfo(tzid) + local = _now_utc.astimezone(tz) + if local.hour == target_hour and (exact_minute is None or local.minute == exact_minute): + hits.append({ + "country": entry["country"], + "capital": entry["capital"], + "tzid": tzid, + "local_time": local.strftime("%Y-%m-%d %H:%M:%S"), + "utc_now": _now_utc.strftime("%Y-%m-%d %H:%M:%S"), + }) + except Exception as e: + print(f"[compute-meta] skipped {entry.get('country','?')} ({entry.get('tzid','?')}): {e}") + continue + + hits.sort(key=lambda x: (x["country"].lower(), x["capital"].lower())) + return hits, _now_utc + +def block_id_for_meta(meta_list, at_dt_utc): + """Deterministic ID based on at_utc (minute) and set of results.""" + at_norm = at_dt_utc.replace(second=0, microsecond=0).strftime("%Y-%m-%dT%H:%MZ") + data = { + "at": at_norm, + "results": [(m["country"], m["capital"], m["tzid"]) for m in meta_list], + } + s = json.dumps(data, sort_keys=True, separators=(',',':')).encode("utf-8") + return hashlib.sha1(s).hexdigest() + +def _results_for_all_capitals(at_utc_iso: str | None): + """ + Build per-capital results at the given UTC instant (or now). + Returns list of tuples: (label 'HH:MM', result_dict) + where result_dict contains multilingual summaries (may be None). + """ + # Parse target UTC timestamp (if provided) + if at_utc_iso: + try: + iso = at_utc_iso[:-1] + "+00:00" if at_utc_iso.endswith("Z") else at_utc_iso + now_utc = datetime.fromisoformat(iso).astimezone(timezone.utc) + except Exception: + now_utc = datetime.now(timezone.utc) + else: + now_utc = datetime.now(timezone.utc) + + rows = [] + for entry in CAPITALS: + try: + tzid = entry.get("tzid") + if not tzid: + continue + tz = ZoneInfo(tzid) + local = now_utc.astimezone(tz) + label = local.strftime("%H:%M") # minute-granularity buckets + + summaries = get_news_for_country(entry["country"], local.date()) + res = { + "country": entry["country"], + "capital": entry["capital"], + "tzid": tzid, + "local_time": local.strftime("%Y-%m-%d %H:%M:%S"), + "utc_now": now_utc.strftime("%Y-%m-%d %H:%M:%S"), + "summary_en": None, + "summary_de": None, + "summary_jp": None, + } + if summaries: + res["summary_en"] = summaries.get("summary_en") + res["summary_de"] = summaries.get("summary_de") + res["summary_jp"] = summaries.get("summary_jp") + + rows.append((label, res)) + except Exception as e: + print(f"[_results_for_all_capitals] skipped {entry.get('country','?')} ({entry.get('tzid','?')}): {e}") + continue + return rows, now_utc + + +def _order_group_labels(labels: list[str], start_hour: int = 10) -> list[str]: + """ + Order labels (HH:MM) to start from the first label whose hour == start_hour, + then continue through the day (by minute), then wrap. + If no label with that hour exists, start from the earliest label in the next available hour. + """ + # Parse labels into (hour, minute, label) + parsed = [] + for lab in labels: + try: + h, m = lab.split(":") + parsed.append((int(h), int(m), lab)) + except Exception: + # Skip malformed labels + continue + + # Group by hour + by_hour = {} + for h, m, lab in parsed: + by_hour.setdefault(h, []).append((m, lab)) + + # Sort minutes within each hour + for h in by_hour: + by_hour[h].sort(key=lambda x: x[0]) # by minute + + # Build an ordered hours list starting at start_hour + hours_order = list(range(start_hour, 24)) + list(range(0, start_hour)) + ordered_labels = [] + + found_start = False + # Try to start at start_hour + if start_hour in by_hour and by_hour[start_hour]: + found_start = True + ordered_labels.extend([lab for (_m, lab) in by_hour[start_hour]]) + + # Continue with subsequent hours + for h in hours_order: + if h == start_hour: + continue + if h in by_hour and by_hour[h]: + if not found_start: + # first non-empty hour becomes the start + found_start = True + ordered_labels.extend([lab for (_m, lab) in by_hour[h]]) + + return ordered_labels + + +def _paginate_labels(ordered_labels: list[str], cursor: str | None, page_size: int) -> tuple[list[str], str | None]: + """ + Cursor is of form 'gidx:' meaning start index in ordered_labels. + If cursor is None, start at 0. Returns (page_labels, next_cursor_or_None). + """ + start_idx = 0 + if cursor and cursor.startswith("gidx:"): + try: + start_idx = int(cursor.split(":")[1]) + except Exception: + start_idx = 0 + + end_idx = min(start_idx + page_size, len(ordered_labels)) + page = ordered_labels[start_idx:end_idx] + next_cursor = f"gidx:{end_idx}" if end_idx < len(ordered_labels) else None + return page, next_cursor + + +class Handler(BaseHTTPRequestHandler): + def _send_json(self, payload: dict, code=200): + body = json.dumps(payload, ensure_ascii=False, indent=2).encode("utf-8") + self.send_response(code) + self.send_header("Content-Type", "application/json; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(body) + + def _send_static(self, path): + full = os.path.join(STATIC_DIR, path.lstrip("/")) + if os.path.isdir(full): + full = os.path.join(full, "index.html") + if not os.path.exists(full): + self.send_response(404); self.end_headers(); return + if full.endswith(".html"): + ctype = "text/html; charset=utf-8" + elif full.endswith(".js"): + ctype = "text/javascript; charset=utf-8" + elif full.endswith(".css"): + ctype = "text/css; charset=utf-8" + else: + ctype = "application/octet-stream" + with open(full, "rb") as f: + data = f.read() + self.send_response(200) + self.send_header("Content-Type", ctype) + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + + def do_GET(self): + parsed = urlparse(self.path) + + # Lightweight ID-only endpoint + if parsed.path in ("/ten-am-id", "/api/ten-am-id"): + try: + qs = parse_qs(parsed.query or "") + exact = qs.get("exact", ["false"])[0].lower() in ("1", "true", "yes") + at = qs.get("at", [None])[0] + minute = 0 if exact else None + + meta, _now_utc = compute_countries_meta_at_time(10, exact_minute=minute, at_utc_iso=at) + block_id = block_id_for_meta(meta, _now_utc) + payload = { + "id": block_id, + "count": len(meta), + "exact": exact, + "at_utc": (at or _now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")), + } + return self._send_json(payload) + except Exception as e: + print("[/ten-am-id] error:", e) + traceback.print_exc() + return self._send_json({"error": "internal_error", "detail": str(e)}, code=500) + # Grouped-now endpoint with cursor pagination + if parsed.path in ("/now-groups", "/api/now-groups"): + try: + qs = parse_qs(parsed.query or "") + at = qs.get("at", [None])[0] + cursor = qs.get("cursor", [None])[0] + page_size = int(qs.get("page_size", ["6"])[0]) # groups per page; tune as needed + start_hour = int(qs.get("start_hour", ["10"])[0]) + + rows, now_utc = _results_for_all_capitals(at) + # Build groups: label => results[] + groups = {} + for label, res in rows: + groups.setdefault(label, []).append(res) + + # Sort countries inside a label (stable) + for label in groups: + groups[label].sort(key=lambda r: (r["country"].lower(), r["capital"].lower())) + + all_labels = sorted(set(groups.keys())) # 00:00..23:59 existing labels + ordered_labels = _order_group_labels(all_labels, start_hour=start_hour) + + page_labels, next_cursor = _paginate_labels(ordered_labels, cursor, page_size) + + payload_groups = [{"label": lab, "results": groups.get(lab, [])} for lab in page_labels] + payload = { + "at_utc": (at or now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")), + "groups": payload_groups, + "next_cursor": next_cursor, + } + return self._send_json(payload) + except Exception as e: + print("[/now-groups] error:", e) + traceback.print_exc() + return self._send_json({"error": "internal_error", "detail": str(e)}, code=500) + if parsed.path in ("/ten-am", "/api/ten-am"): + try: + qs = parse_qs(parsed.query or "") + exact = qs.get("exact", ["false"])[0].lower() in ("1", "true", "yes") + at = qs.get("at", [None])[0] + lang = qs.get("lang", [None])[0] + minute = 0 if exact else None + + hits, _now_utc = compute_countries_at_time(10, exact_minute=minute, at_utc_iso=at) + # Optionally include a legacy `summary` alias when lang is provided + if lang: + lang = lang.lower() + for h in hits: + preferred = None + if lang.startswith("de"): + preferred = h.get("summary_de") + elif lang.startswith("jp") or lang.startswith("ja"): + preferred = h.get("summary_jp") + else: + preferred = h.get("summary_en") + # fallback to English if preferred is missing + if not preferred: + preferred = h.get("summary_en") + h["summary"] = preferred + + payload = { + "count": len(hits), + "exact": exact, + "at_utc": (at or _now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")), + "results": hits, + } + return self._send_json(payload) + except Exception as e: + print("[/ten-am] error:", e) + traceback.print_exc() + return self._send_json({"error": "internal_error", "detail": str(e)}, code=500) + +def main(): + import argparse + parser = argparse.ArgumentParser(description="10AM server with static frontend") + parser.add_argument("--host", default="127.0.0.1") + parser.add_argument("--port", type=int, default=8000) + parser.add_argument("--data", default=os.path.join(SCRIPT_DIR, "capitals_tz.json")) + parser.add_argument("--static", default=os.path.join(SCRIPT_DIR, "static")) + args = parser.parse_args() + + global DATA_FILE, STATIC_DIR, CAPITALS + DATA_FILE = args.data + STATIC_DIR = args.static + with open(DATA_FILE, "r", encoding="utf-8") as f: + CAPITALS = json.load(f) + + print(f"Serving on http://{args.host}:{args.port}") + print(f"Data: {DATA_FILE}") + print(f"Static: {STATIC_DIR}/ (open / in browser)") + + # Run database migrations before starting services + try: + run_db_migrations() + except Exception as mig_err: + print(f"[startup] migration failed: {mig_err}") + # Start the background news collector (uses news_collector.py logic) + start_collector_daemon() + + with HTTPServer((args.host, args.port), Handler) as httpd: + try: + httpd.serve_forever() + except KeyboardInterrupt: + print("\nShutting down.") + +if __name__ == "__main__": + main() diff --git a/server/tools/backfill_translations.py b/server/tools/backfill_translations.py new file mode 100644 index 0000000..d22a4ea --- /dev/null +++ b/server/tools/backfill_translations.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +""" +Backfill missing translations for the `news` table. + +- Finds rows where summary_de and/or summary_jp are NULL/empty + but summary_en is present. +- Uses the same Ollama-based translation routine as the collector. +- Safe to run multiple times. + +Usage examples: + python server/tools/backfill_translations.py + python server/tools/backfill_translations.py --langs de + python server/tools/backfill_translations.py --limit 25 --verbose + python server/tools/backfill_translations.py --dry-run +""" + +import os +import sqlite3 +import argparse +from typing import Iterable, Tuple, Optional + +# Make parent directory (server/) importable so we can import news_collector.py +import sys, os +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +SERVER_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, "..")) +if SERVER_ROOT not in sys.path: + sys.path.insert(0, SERVER_ROOT) + +# Reuse the exact same translation logic as the collector +from news_collector import translate_summary, setup_database, run_db_migrations + +DB_FILE = os.path.join(SERVER_ROOT, "news.db") + + +def is_blank(s: Optional[str]) -> bool: + return s is None or str(s).strip() == "" + + +def select_rows(conn: sqlite3.Connection, limit: Optional[int]) -> Iterable[Tuple]: + """ + Select rows that have EN present but are missing DE and/or JP. + Returns tuples: (id, country_name, news_date, summary_en, summary_de, summary_jp) + """ + sql = """ + SELECT id, country_name, news_date, summary_en, summary_de, summary_jp + FROM news + WHERE + summary_en IS NOT NULL AND TRIM(summary_en) <> '' + AND ( + summary_de IS NULL OR TRIM(summary_de) = '' + OR summary_jp IS NULL OR TRIM(summary_jp) = '' + ) + ORDER BY news_date DESC, id ASC + """ + if limit and limit > 0: + sql += " LIMIT ?" + return conn.execute(sql, (limit,)) + return conn.execute(sql) + + +def backfill(conn: sqlite3.Connection, langs: Iterable[str], dry_run: bool = False, verbose: bool = False) -> int: + """ + Perform backfill. Returns the count of rows updated. + """ + cur = conn.cursor() + rows = list(select_rows(conn, limit=args.limit)) + if verbose: + print(f"[backfill] candidates: {len(rows)}") + + updated_count = 0 + + for row in rows: + row_id, country, news_date, en, de, jp = row + + to_update = {} + if "de" in langs and is_blank(de) and not is_blank(en): + tr_de = translate_summary(en, "de") + if tr_de: + to_update["summary_de"] = tr_de + if verbose: + print(f" ✓ {country} [{news_date}] -> DE translated") + + if "jp" in langs and is_blank(jp) and not is_blank(en): + tr_jp = translate_summary(en, "jp") + if tr_jp: + to_update["summary_jp"] = tr_jp + if verbose: + print(f" ✓ {country} [{news_date}] -> JP translated") + + if to_update: + updated_count += 1 + if not dry_run: + sets = ", ".join([f"{k} = ?" for k in to_update.keys()]) + params = list(to_update.values()) + [row_id] + cur.execute(f"UPDATE news SET {sets}, created_at = CURRENT_TIMESTAMP WHERE id = ?", params) + + if not dry_run: + conn.commit() + + return updated_count + + +def main(): + parser = argparse.ArgumentParser(description="Backfill missing news translations (DE/JP) from summary_en.") + parser.add_argument("--langs", nargs="+", default=["de", "jp"], choices=["de", "jp"], + help="Which languages to backfill (default: de jp)") + parser.add_argument("--limit", type=int, default=0, help="Max rows to process (0 = no limit)") + parser.add_argument("--dry-run", action="store_true", help="Do not write changes to the database") + parser.add_argument("--verbose", action="store_true", help="Verbose output") + args_local = parser.parse_args() + + # Make args available inside backfill() call + global args + args = args_local + + # Ensure DB & schema are ready + setup_database() + try: + run_db_migrations() + except Exception as e: + print(f"[warn] migration step failed: {e}") + + if not os.path.exists(DB_FILE): + print(f"[error] DB file not found: {DB_FILE}") + return 1 + + conn = sqlite3.connect(DB_FILE) + try: + count = backfill(conn, args.langs, dry_run=args.dry_run, verbose=args.verbose) + if args.dry_run: + print(f"[dry-run] would update {count} row(s).") + else: + print(f"[done] updated {count} row(s).") + finally: + conn.close() + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) \ No newline at end of file