import json, os, math, sqlite3, traceback, hashlib from http.server import BaseHTTPRequestHandler, HTTPServer from urllib.parse import urlparse, parse_qs from datetime import datetime, timezone, timedelta from zoneinfo import ZoneInfo SCRIPT_DIR = os.path.dirname(__file__) DATA_FILE = os.environ.get("TENAM_DATA", os.path.join(SCRIPT_DIR, "capitals_tz.json")) DB_FILE = os.path.join(SCRIPT_DIR, "news.db") STATIC_DIR = os.environ.get("TENAM_STATIC", os.path.join(SCRIPT_DIR, "static")) with open(DATA_FILE, "r", encoding="utf-8") as f: CAPITALS = json.load(f) # --- Database migration --- def run_db_migrations() -> None: """ Ensures the news table schema supports multilingual summaries. If the old column `summary` exists, it will be renamed to `summary_en`. Missing columns `summary_de` and `summary_jp` will be added. Idempotent and safe across multiple runs. """ conn = sqlite3.connect(DB_FILE) cursor = conn.cursor() try: cursor.execute("PRAGMA table_info(news)") cols = [row[1] for row in cursor.fetchall()] if "summary_en" not in cols and "summary" in cols: try: cursor.execute("ALTER TABLE news RENAME COLUMN summary TO summary_en") conn.commit() cursor.execute("PRAGMA table_info(news)") cols = [row[1] for row in cursor.fetchall()] except sqlite3.OperationalError: # fallback: create new table and copy data cursor.execute( """ CREATE TABLE news_new ( id INTEGER PRIMARY KEY AUTOINCREMENT, country_name TEXT NOT NULL, news_date DATE NOT NULL, summary_en TEXT, summary_de TEXT, summary_jp TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(country_name, news_date) ) """ ) cursor.execute( "INSERT INTO news_new (id, country_name, news_date, summary_en, created_at) " "SELECT id, country_name, news_date, summary, created_at FROM news" ) cursor.execute("DROP TABLE news") cursor.execute("ALTER TABLE news_new RENAME TO news") conn.commit() cursor.execute("PRAGMA table_info(news)") cols = [row[1] for row in cursor.fetchall()] # Add translation columns if missing if "summary_de" not in cols: cursor.execute("ALTER TABLE news ADD COLUMN summary_de TEXT") if "summary_jp" not in cols: cursor.execute("ALTER TABLE news ADD COLUMN summary_jp TEXT") conn.commit() finally: conn.close() # --- News collector background daemon (reuses news_collector.py) --- def start_collector_daemon(): import threading, time try: # Import lazily so the backend can still boot even if requests/ollama aren't installed yet. import news_collector as nc except Exception as e: print(f"[collector] Not starting: failed to import news_collector: {e}") return # Make sure DB exists and run migrations try: nc.setup_database() # Attempt to run migrations via collector's helper (if present) if hasattr(nc, "run_db_migrations"): try: nc.run_db_migrations() except Exception as me: print(f"[collector] migration via news_collector failed: {me}") # Also run local migrations in case collector is outdated try: run_db_migrations() except Exception as e2: print(f"[collector] local migration failed: {e2}") except Exception as e: print(f"[collector] setup_database() failed: {e}") def _loop(): print("[collector] daemon started (1s polling; triggers at 09:30 local time on UTC :00/:15/:30/:45).") while True: # Drive scheduling off UTC so quarter-hour timezones are handled now_utc = datetime.now(timezone.utc) try: if now_utc.second < 5 and (now_utc.minute % 15 == 0): # Reload capitals data (keeps it in sync with TENAM_DATA / DATA_FILE) try: with open(DATA_FILE, "r", encoding="utf-8") as f: capitals_data = json.load(f) except Exception: capitals_data = CAPITALS # Prefetch at local 09:30 (covers :00/:15/:30/:45 UTC slots) queue = nc.get_countries_at_time(9, 30, capitals_data) if queue: print(f"[collector] {len(queue)} countries at local 09:30 → fetching & summarizing…") nc.process_country_queue(queue, overwrite=False) else: print("[collector] No countries at local 09:30 right now.") # Avoid duplicate runs within the same minute window time.sleep(60) except Exception as e: print(f"[collector] loop error: {e}") time.sleep(1) threading.Thread(target=_loop, name="tenam-collector", daemon=True).start() def get_news_for_country(country_name: str, news_date) -> dict | None: """ Fetches the latest news summaries for a specific country and date. Returns a dictionary with keys 'summary_en', 'summary_de', 'summary_jp' or None if no entry exists. """ try: conn = sqlite3.connect(DB_FILE) cursor = conn.cursor() cursor.execute( "SELECT summary_en, summary_de, summary_jp FROM news WHERE country_name = ? AND news_date = ? ORDER BY created_at DESC LIMIT 1", (country_name, news_date), ) row = cursor.fetchone() conn.close() if row: return {"summary_en": row[0], "summary_de": row[1], "summary_jp": row[2]} return None except sqlite3.OperationalError: # This can happen if the news_collector hasn't run yet and created the DB. return None def compute_countries_at_time(target_hour=10, exact_minute=None, at_utc_iso=None): # Parse target UTC timestamp (if provided) if at_utc_iso: try: iso = at_utc_iso[:-1] + "+00:00" if at_utc_iso.endswith("Z") else at_utc_iso _now_utc = datetime.fromisoformat(iso).astimezone(timezone.utc) except Exception: _now_utc = datetime.now(timezone.utc) else: _now_utc = datetime.now(timezone.utc) hits = [] for entry in CAPITALS: try: tzid = entry.get("tzid") if not tzid: continue tz = ZoneInfo(tzid) # can throw if tzid invalid local = _now_utc.astimezone(tz) if local.hour == target_hour and (exact_minute is None or local.minute == exact_minute): summaries = get_news_for_country(entry["country"], local.date()) # Build result entry with multilingual summaries (may be None) hit = { "country": entry["country"], "capital": entry["capital"], "tzid": tzid, "local_time": local.strftime("%Y-%m-%d %H:%M:%S"), "utc_now": _now_utc.strftime("%Y-%m-%d %H:%M:%S"), "summary_en": None, "summary_de": None, "summary_jp": None, } if summaries: hit["summary_en"] = summaries.get("summary_en") hit["summary_de"] = summaries.get("summary_de") hit["summary_jp"] = summaries.get("summary_jp") hits.append(hit) except Exception as e: # Skip bad timezones/entries instead of crashing the request print(f"[compute] skipped {entry.get('country','?')} ({entry.get('tzid','?')}): {e}") continue hits.sort(key=lambda x: (x["country"].lower(), x["capital"].lower())) return hits, _now_utc def compute_countries_meta_at_time(target_hour=10, exact_minute=None, at_utc_iso=None): """Return only meta (no summaries) for the target time.""" if at_utc_iso: try: iso = at_utc_iso[:-1] + "+00:00" if at_utc_iso.endswith("Z") else at_utc_iso _now_utc = datetime.fromisoformat(iso).astimezone(timezone.utc) except Exception: _now_utc = datetime.now(timezone.utc) else: _now_utc = datetime.now(timezone.utc) hits = [] for entry in CAPITALS: try: tzid = entry.get("tzid") if not tzid: continue tz = ZoneInfo(tzid) local = _now_utc.astimezone(tz) if local.hour == target_hour and (exact_minute is None or local.minute == exact_minute): hits.append({ "country": entry["country"], "capital": entry["capital"], "tzid": tzid, "local_time": local.strftime("%Y-%m-%d %H:%M:%S"), "utc_now": _now_utc.strftime("%Y-%m-%d %H:%M:%S"), }) except Exception as e: print(f"[compute-meta] skipped {entry.get('country','?')} ({entry.get('tzid','?')}): {e}") continue hits.sort(key=lambda x: (x["country"].lower(), x["capital"].lower())) return hits, _now_utc def block_id_for_meta(meta_list, at_dt_utc): """Deterministic ID based on at_utc (minute) and set of results.""" at_norm = at_dt_utc.replace(second=0, microsecond=0).strftime("%Y-%m-%dT%H:%MZ") data = { "at": at_norm, "results": [(m["country"], m["capital"], m["tzid"]) for m in meta_list], } s = json.dumps(data, sort_keys=True, separators=(',',':')).encode("utf-8") return hashlib.sha1(s).hexdigest() def _results_for_all_capitals(at_utc_iso: str | None): """ Build per-capital results at the given UTC instant (or now). Returns list of tuples: (label 'HH:MM', result_dict) where result_dict contains multilingual summaries (may be None). """ # Parse target UTC timestamp (if provided) if at_utc_iso: try: iso = at_utc_iso[:-1] + "+00:00" if at_utc_iso.endswith("Z") else at_utc_iso now_utc = datetime.fromisoformat(iso).astimezone(timezone.utc) except Exception: now_utc = datetime.now(timezone.utc) else: now_utc = datetime.now(timezone.utc) rows = [] for entry in CAPITALS: try: tzid = entry.get("tzid") if not tzid: continue tz = ZoneInfo(tzid) local = now_utc.astimezone(tz) label = local.strftime("%H:%M") # minute-granularity buckets summaries = get_news_for_country(entry["country"], local.date()) res = { "country": entry["country"], "capital": entry["capital"], "tzid": tzid, "local_time": local.strftime("%Y-%m-%d %H:%M:%S"), "utc_now": now_utc.strftime("%Y-%m-%d %H:%M:%S"), "summary_en": None, "summary_de": None, "summary_jp": None, } if summaries: res["summary_en"] = summaries.get("summary_en") res["summary_de"] = summaries.get("summary_de") res["summary_jp"] = summaries.get("summary_jp") rows.append((label, res)) except Exception as e: print(f"[_results_for_all_capitals] skipped {entry.get('country','?')} ({entry.get('tzid','?')}): {e}") continue return rows, now_utc def _order_group_labels(labels: list[str], start_hour: int = 10) -> list[str]: """ Order labels (HH:MM) to start from the first label whose hour == start_hour, then continue through the day (by minute), then wrap. If no label with that hour exists, start from the earliest label in the next available hour. """ # Parse labels into (hour, minute, label) parsed = [] for lab in labels: try: h, m = lab.split(":") parsed.append((int(h), int(m), lab)) except Exception: # Skip malformed labels continue # Group by hour by_hour = {} for h, m, lab in parsed: by_hour.setdefault(h, []).append((m, lab)) # Sort minutes within each hour for h in by_hour: by_hour[h].sort(key=lambda x: x[0]) # by minute # Build an ordered hours list starting at start_hour hours_order = list(range(start_hour, 24)) + list(range(0, start_hour)) ordered_labels = [] found_start = False # Try to start at start_hour if start_hour in by_hour and by_hour[start_hour]: found_start = True ordered_labels.extend([lab for (_m, lab) in by_hour[start_hour]]) # Continue with subsequent hours for h in hours_order: if h == start_hour: continue if h in by_hour and by_hour[h]: if not found_start: # first non-empty hour becomes the start found_start = True ordered_labels.extend([lab for (_m, lab) in by_hour[h]]) return ordered_labels def _paginate_labels(ordered_labels: list[str], cursor: str | None, page_size: int) -> tuple[list[str], str | None]: """ Cursor is of form 'gidx:' meaning start index in ordered_labels. If cursor is None, start at 0. Returns (page_labels, next_cursor_or_None). """ start_idx = 0 if cursor and cursor.startswith("gidx:"): try: start_idx = int(cursor.split(":")[1]) except Exception: start_idx = 0 end_idx = min(start_idx + page_size, len(ordered_labels)) page = ordered_labels[start_idx:end_idx] next_cursor = f"gidx:{end_idx}" if end_idx < len(ordered_labels) else None return page, next_cursor class Handler(BaseHTTPRequestHandler): def _send_json(self, payload: dict, code=200): body = json.dumps(payload, ensure_ascii=False, indent=2).encode("utf-8") self.send_response(code) self.send_header("Content-Type", "application/json; charset=utf-8") self.send_header("Content-Length", str(len(body))) self.send_header("Cache-Control", "no-store") self.end_headers() self.wfile.write(body) def _send_static(self, path): full = os.path.join(STATIC_DIR, path.lstrip("/")) if os.path.isdir(full): full = os.path.join(full, "index.html") if not os.path.exists(full): self.send_response(404); self.end_headers(); return if full.endswith(".html"): ctype = "text/html; charset=utf-8" elif full.endswith(".js"): ctype = "text/javascript; charset=utf-8" elif full.endswith(".css"): ctype = "text/css; charset=utf-8" else: ctype = "application/octet-stream" with open(full, "rb") as f: data = f.read() self.send_response(200) self.send_header("Content-Type", ctype) self.send_header("Content-Length", str(len(data))) self.end_headers() self.wfile.write(data) def do_GET(self): parsed = urlparse(self.path) # Lightweight ID-only endpoint if parsed.path in ("/ten-am-id", "/api/ten-am-id"): try: qs = parse_qs(parsed.query or "") exact = qs.get("exact", ["false"])[0].lower() in ("1", "true", "yes") at = qs.get("at", [None])[0] minute = 0 if exact else None meta, _now_utc = compute_countries_meta_at_time(10, exact_minute=minute, at_utc_iso=at) block_id = block_id_for_meta(meta, _now_utc) payload = { "id": block_id, "count": len(meta), "exact": exact, "at_utc": (at or _now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")), } return self._send_json(payload) except Exception as e: print("[/ten-am-id] error:", e) traceback.print_exc() return self._send_json({"error": "internal_error", "detail": str(e)}, code=500) # Grouped-now endpoint with cursor pagination if parsed.path in ("/now-groups", "/api/now-groups"): try: qs = parse_qs(parsed.query or "") at = qs.get("at", [None])[0] cursor = qs.get("cursor", [None])[0] page_size = int(qs.get("page_size", ["6"])[0]) # groups per page; tune as needed start_hour = int(qs.get("start_hour", ["10"])[0]) rows, now_utc = _results_for_all_capitals(at) # Build groups: label => results[] groups = {} for label, res in rows: groups.setdefault(label, []).append(res) # Sort countries inside a label (stable) for label in groups: groups[label].sort(key=lambda r: (r["country"].lower(), r["capital"].lower())) all_labels = sorted(set(groups.keys())) # 00:00..23:59 existing labels ordered_labels = _order_group_labels(all_labels, start_hour=start_hour) page_labels, next_cursor = _paginate_labels(ordered_labels, cursor, page_size) payload_groups = [{"label": lab, "results": groups.get(lab, [])} for lab in page_labels] payload = { "at_utc": (at or now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")), "groups": payload_groups, "next_cursor": next_cursor, } return self._send_json(payload) except Exception as e: print("[/now-groups] error:", e) traceback.print_exc() return self._send_json({"error": "internal_error", "detail": str(e)}, code=500) if parsed.path in ("/ten-am", "/api/ten-am"): try: qs = parse_qs(parsed.query or "") exact = qs.get("exact", ["false"])[0].lower() in ("1", "true", "yes") at = qs.get("at", [None])[0] lang = qs.get("lang", [None])[0] minute = 0 if exact else None hits, _now_utc = compute_countries_at_time(10, exact_minute=minute, at_utc_iso=at) # Optionally include a legacy `summary` alias when lang is provided if lang: lang = lang.lower() for h in hits: preferred = None if lang.startswith("de"): preferred = h.get("summary_de") elif lang.startswith("jp") or lang.startswith("ja"): preferred = h.get("summary_jp") else: preferred = h.get("summary_en") # fallback to English if preferred is missing if not preferred: preferred = h.get("summary_en") h["summary"] = preferred payload = { "count": len(hits), "exact": exact, "at_utc": (at or _now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")), "results": hits, } return self._send_json(payload) except Exception as e: print("[/ten-am] error:", e) traceback.print_exc() return self._send_json({"error": "internal_error", "detail": str(e)}, code=500) def main(): import argparse parser = argparse.ArgumentParser(description="10AM server with static frontend") parser.add_argument("--host", default="127.0.0.1") parser.add_argument("--port", type=int, default=8000) parser.add_argument("--data", default=os.path.join(SCRIPT_DIR, "capitals_tz.json")) parser.add_argument("--static", default=os.path.join(SCRIPT_DIR, "static")) args = parser.parse_args() global DATA_FILE, STATIC_DIR, CAPITALS DATA_FILE = args.data STATIC_DIR = args.static with open(DATA_FILE, "r", encoding="utf-8") as f: CAPITALS = json.load(f) print(f"Serving on http://{args.host}:{args.port}") print(f"Data: {DATA_FILE}") print(f"Static: {STATIC_DIR}/ (open / in browser)") # Run database migrations before starting services try: run_db_migrations() except Exception as mig_err: print(f"[startup] migration failed: {mig_err}") # Start the background news collector (uses news_collector.py logic) start_collector_daemon() with HTTPServer((args.host, args.port), Handler) as httpd: try: httpd.serve_forever() except KeyboardInterrupt: print("\nShutting down.") if __name__ == "__main__": main()