517 lines
21 KiB
Python
517 lines
21 KiB
Python
import json, os, math, sqlite3, traceback, hashlib
|
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
from urllib.parse import urlparse, parse_qs
|
|
from datetime import datetime, timezone, timedelta
|
|
from zoneinfo import ZoneInfo
|
|
|
|
SCRIPT_DIR = os.path.dirname(__file__)
|
|
DATA_FILE = os.environ.get("TENAM_DATA", os.path.join(SCRIPT_DIR, "capitals_tz.json"))
|
|
DB_FILE = os.path.join(SCRIPT_DIR, "news.db")
|
|
STATIC_DIR = os.environ.get("TENAM_STATIC", os.path.join(SCRIPT_DIR, "static"))
|
|
|
|
with open(DATA_FILE, "r", encoding="utf-8") as f:
|
|
CAPITALS = json.load(f)
|
|
|
|
# --- Database migration ---
|
|
def run_db_migrations() -> None:
|
|
"""
|
|
Ensures the news table schema supports multilingual summaries. If the old
|
|
column `summary` exists, it will be renamed to `summary_en`. Missing
|
|
columns `summary_de` and `summary_jp` will be added. Idempotent and safe
|
|
across multiple runs.
|
|
"""
|
|
conn = sqlite3.connect(DB_FILE)
|
|
cursor = conn.cursor()
|
|
try:
|
|
cursor.execute("PRAGMA table_info(news)")
|
|
cols = [row[1] for row in cursor.fetchall()]
|
|
if "summary_en" not in cols and "summary" in cols:
|
|
try:
|
|
cursor.execute("ALTER TABLE news RENAME COLUMN summary TO summary_en")
|
|
conn.commit()
|
|
cursor.execute("PRAGMA table_info(news)")
|
|
cols = [row[1] for row in cursor.fetchall()]
|
|
except sqlite3.OperationalError:
|
|
# fallback: create new table and copy data
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE news_new (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
country_name TEXT NOT NULL,
|
|
news_date DATE NOT NULL,
|
|
summary_en TEXT,
|
|
summary_de TEXT,
|
|
summary_jp TEXT,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(country_name, news_date)
|
|
)
|
|
"""
|
|
)
|
|
cursor.execute(
|
|
"INSERT INTO news_new (id, country_name, news_date, summary_en, created_at) "
|
|
"SELECT id, country_name, news_date, summary, created_at FROM news"
|
|
)
|
|
cursor.execute("DROP TABLE news")
|
|
cursor.execute("ALTER TABLE news_new RENAME TO news")
|
|
conn.commit()
|
|
cursor.execute("PRAGMA table_info(news)")
|
|
cols = [row[1] for row in cursor.fetchall()]
|
|
# Add translation columns if missing
|
|
if "summary_de" not in cols:
|
|
cursor.execute("ALTER TABLE news ADD COLUMN summary_de TEXT")
|
|
if "summary_jp" not in cols:
|
|
cursor.execute("ALTER TABLE news ADD COLUMN summary_jp TEXT")
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
# --- News collector background daemon (reuses news_collector.py) ---
|
|
def start_collector_daemon():
|
|
import threading, time
|
|
try:
|
|
# Import lazily so the backend can still boot even if requests/ollama aren't installed yet.
|
|
import news_collector as nc
|
|
except Exception as e:
|
|
print(f"[collector] Not starting: failed to import news_collector: {e}")
|
|
return
|
|
|
|
# Make sure DB exists and run migrations
|
|
try:
|
|
nc.setup_database()
|
|
# Attempt to run migrations via collector's helper (if present)
|
|
if hasattr(nc, "run_db_migrations"):
|
|
try:
|
|
nc.run_db_migrations()
|
|
except Exception as me:
|
|
print(f"[collector] migration via news_collector failed: {me}")
|
|
# Also run local migrations in case collector is outdated
|
|
try:
|
|
run_db_migrations()
|
|
except Exception as e2:
|
|
print(f"[collector] local migration failed: {e2}")
|
|
except Exception as e:
|
|
print(f"[collector] setup_database() failed: {e}")
|
|
|
|
def _loop():
|
|
print("[collector] daemon started (1s polling; triggers at 09:30 local time on UTC :00/:15/:30/:45).")
|
|
while True:
|
|
# Drive scheduling off UTC so quarter-hour timezones are handled
|
|
now_utc = datetime.now(timezone.utc)
|
|
try:
|
|
if now_utc.second < 5 and (now_utc.minute % 15 == 0):
|
|
# Reload capitals data (keeps it in sync with TENAM_DATA / DATA_FILE)
|
|
try:
|
|
with open(DATA_FILE, "r", encoding="utf-8") as f:
|
|
capitals_data = json.load(f)
|
|
except Exception:
|
|
capitals_data = CAPITALS
|
|
|
|
# Prefetch at local 09:30 (covers :00/:15/:30/:45 UTC slots)
|
|
queue = nc.get_countries_at_time(9, 30, capitals_data)
|
|
if queue:
|
|
print(f"[collector] {len(queue)} countries at local 09:30 → fetching & summarizing…")
|
|
nc.process_country_queue(queue, overwrite=False)
|
|
else:
|
|
print("[collector] No countries at local 09:30 right now.")
|
|
# Avoid duplicate runs within the same minute window
|
|
time.sleep(60)
|
|
|
|
except Exception as e:
|
|
print(f"[collector] loop error: {e}")
|
|
|
|
time.sleep(1)
|
|
|
|
threading.Thread(target=_loop, name="tenam-collector", daemon=True).start()
|
|
|
|
def get_news_for_country(country_name: str, news_date) -> dict | None:
|
|
"""
|
|
Fetches the latest news summaries for a specific country and date. Returns
|
|
a dictionary with keys 'summary_en', 'summary_de', 'summary_jp' or None if
|
|
no entry exists.
|
|
"""
|
|
try:
|
|
conn = sqlite3.connect(DB_FILE)
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT summary_en, summary_de, summary_jp FROM news WHERE country_name = ? AND news_date = ? ORDER BY created_at DESC LIMIT 1",
|
|
(country_name, news_date),
|
|
)
|
|
row = cursor.fetchone()
|
|
conn.close()
|
|
if row:
|
|
return {"summary_en": row[0], "summary_de": row[1], "summary_jp": row[2]}
|
|
return None
|
|
except sqlite3.OperationalError:
|
|
# This can happen if the news_collector hasn't run yet and created the DB.
|
|
return None
|
|
|
|
|
|
def compute_countries_at_time(target_hour=10, exact_minute=None, at_utc_iso=None):
|
|
# Parse target UTC timestamp (if provided)
|
|
if at_utc_iso:
|
|
try:
|
|
iso = at_utc_iso[:-1] + "+00:00" if at_utc_iso.endswith("Z") else at_utc_iso
|
|
_now_utc = datetime.fromisoformat(iso).astimezone(timezone.utc)
|
|
except Exception:
|
|
_now_utc = datetime.now(timezone.utc)
|
|
else:
|
|
_now_utc = datetime.now(timezone.utc)
|
|
|
|
hits = []
|
|
for entry in CAPITALS:
|
|
try:
|
|
tzid = entry.get("tzid")
|
|
if not tzid:
|
|
continue
|
|
tz = ZoneInfo(tzid) # can throw if tzid invalid
|
|
local = _now_utc.astimezone(tz)
|
|
|
|
if local.hour == target_hour and (exact_minute is None or local.minute == exact_minute):
|
|
summaries = get_news_for_country(entry["country"], local.date())
|
|
# Build result entry with multilingual summaries (may be None)
|
|
hit = {
|
|
"country": entry["country"],
|
|
"capital": entry["capital"],
|
|
"tzid": tzid,
|
|
"local_time": local.strftime("%Y-%m-%d %H:%M:%S"),
|
|
"utc_now": _now_utc.strftime("%Y-%m-%d %H:%M:%S"),
|
|
"summary_en": None,
|
|
"summary_de": None,
|
|
"summary_jp": None,
|
|
}
|
|
if summaries:
|
|
hit["summary_en"] = summaries.get("summary_en")
|
|
hit["summary_de"] = summaries.get("summary_de")
|
|
hit["summary_jp"] = summaries.get("summary_jp")
|
|
hits.append(hit)
|
|
except Exception as e:
|
|
# Skip bad timezones/entries instead of crashing the request
|
|
print(f"[compute] skipped {entry.get('country','?')} ({entry.get('tzid','?')}): {e}")
|
|
continue
|
|
|
|
hits.sort(key=lambda x: (x["country"].lower(), x["capital"].lower()))
|
|
return hits, _now_utc
|
|
|
|
def compute_countries_meta_at_time(target_hour=10, exact_minute=None, at_utc_iso=None):
|
|
"""Return only meta (no summaries) for the target time."""
|
|
if at_utc_iso:
|
|
try:
|
|
iso = at_utc_iso[:-1] + "+00:00" if at_utc_iso.endswith("Z") else at_utc_iso
|
|
_now_utc = datetime.fromisoformat(iso).astimezone(timezone.utc)
|
|
except Exception:
|
|
_now_utc = datetime.now(timezone.utc)
|
|
else:
|
|
_now_utc = datetime.now(timezone.utc)
|
|
|
|
hits = []
|
|
for entry in CAPITALS:
|
|
try:
|
|
tzid = entry.get("tzid")
|
|
if not tzid:
|
|
continue
|
|
tz = ZoneInfo(tzid)
|
|
local = _now_utc.astimezone(tz)
|
|
if local.hour == target_hour and (exact_minute is None or local.minute == exact_minute):
|
|
hits.append({
|
|
"country": entry["country"],
|
|
"capital": entry["capital"],
|
|
"tzid": tzid,
|
|
"local_time": local.strftime("%Y-%m-%d %H:%M:%S"),
|
|
"utc_now": _now_utc.strftime("%Y-%m-%d %H:%M:%S"),
|
|
})
|
|
except Exception as e:
|
|
print(f"[compute-meta] skipped {entry.get('country','?')} ({entry.get('tzid','?')}): {e}")
|
|
continue
|
|
|
|
hits.sort(key=lambda x: (x["country"].lower(), x["capital"].lower()))
|
|
return hits, _now_utc
|
|
|
|
def block_id_for_meta(meta_list, at_dt_utc):
|
|
"""Deterministic ID based on at_utc (minute) and set of results."""
|
|
at_norm = at_dt_utc.replace(second=0, microsecond=0).strftime("%Y-%m-%dT%H:%MZ")
|
|
data = {
|
|
"at": at_norm,
|
|
"results": [(m["country"], m["capital"], m["tzid"]) for m in meta_list],
|
|
}
|
|
s = json.dumps(data, sort_keys=True, separators=(',',':')).encode("utf-8")
|
|
return hashlib.sha1(s).hexdigest()
|
|
|
|
def _results_for_all_capitals(at_utc_iso: str | None):
|
|
"""
|
|
Build per-capital results at the given UTC instant (or now).
|
|
Returns list of tuples: (label 'HH:MM', result_dict)
|
|
where result_dict contains multilingual summaries (may be None).
|
|
"""
|
|
# Parse target UTC timestamp (if provided)
|
|
if at_utc_iso:
|
|
try:
|
|
iso = at_utc_iso[:-1] + "+00:00" if at_utc_iso.endswith("Z") else at_utc_iso
|
|
now_utc = datetime.fromisoformat(iso).astimezone(timezone.utc)
|
|
except Exception:
|
|
now_utc = datetime.now(timezone.utc)
|
|
else:
|
|
now_utc = datetime.now(timezone.utc)
|
|
|
|
rows = []
|
|
for entry in CAPITALS:
|
|
try:
|
|
tzid = entry.get("tzid")
|
|
if not tzid:
|
|
continue
|
|
tz = ZoneInfo(tzid)
|
|
local = now_utc.astimezone(tz)
|
|
label = local.strftime("%H:%M") # minute-granularity buckets
|
|
|
|
summaries = get_news_for_country(entry["country"], local.date())
|
|
res = {
|
|
"country": entry["country"],
|
|
"capital": entry["capital"],
|
|
"tzid": tzid,
|
|
"local_time": local.strftime("%Y-%m-%d %H:%M:%S"),
|
|
"utc_now": now_utc.strftime("%Y-%m-%d %H:%M:%S"),
|
|
"summary_en": None,
|
|
"summary_de": None,
|
|
"summary_jp": None,
|
|
}
|
|
if summaries:
|
|
res["summary_en"] = summaries.get("summary_en")
|
|
res["summary_de"] = summaries.get("summary_de")
|
|
res["summary_jp"] = summaries.get("summary_jp")
|
|
|
|
rows.append((label, res))
|
|
except Exception as e:
|
|
print(f"[_results_for_all_capitals] skipped {entry.get('country','?')} ({entry.get('tzid','?')}): {e}")
|
|
continue
|
|
return rows, now_utc
|
|
|
|
|
|
def _order_group_labels(labels: list[str], start_hour: int = 10) -> list[str]:
|
|
"""
|
|
Order labels (HH:MM) to start from the first label whose hour == start_hour,
|
|
then continue through the day (by minute), then wrap.
|
|
If no label with that hour exists, start from the earliest label in the next available hour.
|
|
"""
|
|
# Parse labels into (hour, minute, label)
|
|
parsed = []
|
|
for lab in labels:
|
|
try:
|
|
h, m = lab.split(":")
|
|
parsed.append((int(h), int(m), lab))
|
|
except Exception:
|
|
# Skip malformed labels
|
|
continue
|
|
|
|
# Group by hour
|
|
by_hour = {}
|
|
for h, m, lab in parsed:
|
|
by_hour.setdefault(h, []).append((m, lab))
|
|
|
|
# Sort minutes within each hour
|
|
for h in by_hour:
|
|
by_hour[h].sort(key=lambda x: x[0]) # by minute
|
|
|
|
# Build an ordered hours list starting at start_hour
|
|
hours_order = list(range(start_hour, 24)) + list(range(0, start_hour))
|
|
ordered_labels = []
|
|
|
|
found_start = False
|
|
# Try to start at start_hour
|
|
if start_hour in by_hour and by_hour[start_hour]:
|
|
found_start = True
|
|
ordered_labels.extend([lab for (_m, lab) in by_hour[start_hour]])
|
|
|
|
# Continue with subsequent hours
|
|
for h in hours_order:
|
|
if h == start_hour:
|
|
continue
|
|
if h in by_hour and by_hour[h]:
|
|
if not found_start:
|
|
# first non-empty hour becomes the start
|
|
found_start = True
|
|
ordered_labels.extend([lab for (_m, lab) in by_hour[h]])
|
|
|
|
return ordered_labels
|
|
|
|
|
|
def _paginate_labels(ordered_labels: list[str], cursor: str | None, page_size: int) -> tuple[list[str], str | None]:
|
|
"""
|
|
Cursor is of form 'gidx:<int>' meaning start index in ordered_labels.
|
|
If cursor is None, start at 0. Returns (page_labels, next_cursor_or_None).
|
|
"""
|
|
start_idx = 0
|
|
if cursor and cursor.startswith("gidx:"):
|
|
try:
|
|
start_idx = int(cursor.split(":")[1])
|
|
except Exception:
|
|
start_idx = 0
|
|
|
|
end_idx = min(start_idx + page_size, len(ordered_labels))
|
|
page = ordered_labels[start_idx:end_idx]
|
|
next_cursor = f"gidx:{end_idx}" if end_idx < len(ordered_labels) else None
|
|
return page, next_cursor
|
|
|
|
|
|
class Handler(BaseHTTPRequestHandler):
|
|
def _send_json(self, payload: dict, code=200):
|
|
body = json.dumps(payload, ensure_ascii=False, indent=2).encode("utf-8")
|
|
self.send_response(code)
|
|
self.send_header("Content-Type", "application/json; charset=utf-8")
|
|
self.send_header("Content-Length", str(len(body)))
|
|
self.send_header("Cache-Control", "no-store")
|
|
self.end_headers()
|
|
self.wfile.write(body)
|
|
|
|
def _send_static(self, path):
|
|
full = os.path.join(STATIC_DIR, path.lstrip("/"))
|
|
if os.path.isdir(full):
|
|
full = os.path.join(full, "index.html")
|
|
if not os.path.exists(full):
|
|
self.send_response(404); self.end_headers(); return
|
|
if full.endswith(".html"):
|
|
ctype = "text/html; charset=utf-8"
|
|
elif full.endswith(".js"):
|
|
ctype = "text/javascript; charset=utf-8"
|
|
elif full.endswith(".css"):
|
|
ctype = "text/css; charset=utf-8"
|
|
else:
|
|
ctype = "application/octet-stream"
|
|
with open(full, "rb") as f:
|
|
data = f.read()
|
|
self.send_response(200)
|
|
self.send_header("Content-Type", ctype)
|
|
self.send_header("Content-Length", str(len(data)))
|
|
self.end_headers()
|
|
self.wfile.write(data)
|
|
|
|
def do_GET(self):
|
|
parsed = urlparse(self.path)
|
|
|
|
# Lightweight ID-only endpoint
|
|
if parsed.path in ("/ten-am-id", "/api/ten-am-id"):
|
|
try:
|
|
qs = parse_qs(parsed.query or "")
|
|
exact = qs.get("exact", ["false"])[0].lower() in ("1", "true", "yes")
|
|
at = qs.get("at", [None])[0]
|
|
minute = 0 if exact else None
|
|
|
|
meta, _now_utc = compute_countries_meta_at_time(10, exact_minute=minute, at_utc_iso=at)
|
|
block_id = block_id_for_meta(meta, _now_utc)
|
|
payload = {
|
|
"id": block_id,
|
|
"count": len(meta),
|
|
"exact": exact,
|
|
"at_utc": (at or _now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")),
|
|
}
|
|
return self._send_json(payload)
|
|
except Exception as e:
|
|
print("[/ten-am-id] error:", e)
|
|
traceback.print_exc()
|
|
return self._send_json({"error": "internal_error", "detail": str(e)}, code=500)
|
|
# Grouped-now endpoint with cursor pagination
|
|
if parsed.path in ("/now-groups", "/api/now-groups"):
|
|
try:
|
|
qs = parse_qs(parsed.query or "")
|
|
at = qs.get("at", [None])[0]
|
|
cursor = qs.get("cursor", [None])[0]
|
|
page_size = int(qs.get("page_size", ["6"])[0]) # groups per page; tune as needed
|
|
start_hour = int(qs.get("start_hour", ["10"])[0])
|
|
|
|
rows, now_utc = _results_for_all_capitals(at)
|
|
# Build groups: label => results[]
|
|
groups = {}
|
|
for label, res in rows:
|
|
groups.setdefault(label, []).append(res)
|
|
|
|
# Sort countries inside a label (stable)
|
|
for label in groups:
|
|
groups[label].sort(key=lambda r: (r["country"].lower(), r["capital"].lower()))
|
|
|
|
all_labels = sorted(set(groups.keys())) # 00:00..23:59 existing labels
|
|
ordered_labels = _order_group_labels(all_labels, start_hour=start_hour)
|
|
|
|
page_labels, next_cursor = _paginate_labels(ordered_labels, cursor, page_size)
|
|
|
|
payload_groups = [{"label": lab, "results": groups.get(lab, [])} for lab in page_labels]
|
|
payload = {
|
|
"at_utc": (at or now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")),
|
|
"groups": payload_groups,
|
|
"next_cursor": next_cursor,
|
|
}
|
|
return self._send_json(payload)
|
|
except Exception as e:
|
|
print("[/now-groups] error:", e)
|
|
traceback.print_exc()
|
|
return self._send_json({"error": "internal_error", "detail": str(e)}, code=500)
|
|
if parsed.path in ("/ten-am", "/api/ten-am"):
|
|
try:
|
|
qs = parse_qs(parsed.query or "")
|
|
exact = qs.get("exact", ["false"])[0].lower() in ("1", "true", "yes")
|
|
at = qs.get("at", [None])[0]
|
|
lang = qs.get("lang", [None])[0]
|
|
minute = 0 if exact else None
|
|
|
|
hits, _now_utc = compute_countries_at_time(10, exact_minute=minute, at_utc_iso=at)
|
|
# Optionally include a legacy `summary` alias when lang is provided
|
|
if lang:
|
|
lang = lang.lower()
|
|
for h in hits:
|
|
preferred = None
|
|
if lang.startswith("de"):
|
|
preferred = h.get("summary_de")
|
|
elif lang.startswith("jp") or lang.startswith("ja"):
|
|
preferred = h.get("summary_jp")
|
|
else:
|
|
preferred = h.get("summary_en")
|
|
# fallback to English if preferred is missing
|
|
if not preferred:
|
|
preferred = h.get("summary_en")
|
|
h["summary"] = preferred
|
|
|
|
payload = {
|
|
"count": len(hits),
|
|
"exact": exact,
|
|
"at_utc": (at or _now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")),
|
|
"results": hits,
|
|
}
|
|
return self._send_json(payload)
|
|
except Exception as e:
|
|
print("[/ten-am] error:", e)
|
|
traceback.print_exc()
|
|
return self._send_json({"error": "internal_error", "detail": str(e)}, code=500)
|
|
|
|
def main():
|
|
import argparse
|
|
parser = argparse.ArgumentParser(description="10AM server with static frontend")
|
|
parser.add_argument("--host", default="127.0.0.1")
|
|
parser.add_argument("--port", type=int, default=8000)
|
|
parser.add_argument("--data", default=os.path.join(SCRIPT_DIR, "capitals_tz.json"))
|
|
parser.add_argument("--static", default=os.path.join(SCRIPT_DIR, "static"))
|
|
args = parser.parse_args()
|
|
|
|
global DATA_FILE, STATIC_DIR, CAPITALS
|
|
DATA_FILE = args.data
|
|
STATIC_DIR = args.static
|
|
with open(DATA_FILE, "r", encoding="utf-8") as f:
|
|
CAPITALS = json.load(f)
|
|
|
|
print(f"Serving on http://{args.host}:{args.port}")
|
|
print(f"Data: {DATA_FILE}")
|
|
print(f"Static: {STATIC_DIR}/ (open / in browser)")
|
|
|
|
# Run database migrations before starting services
|
|
try:
|
|
run_db_migrations()
|
|
except Exception as mig_err:
|
|
print(f"[startup] migration failed: {mig_err}")
|
|
# Start the background news collector (uses news_collector.py logic)
|
|
start_collector_daemon()
|
|
|
|
with HTTPServer((args.host, args.port), Handler) as httpd:
|
|
try:
|
|
httpd.serve_forever()
|
|
except KeyboardInterrupt:
|
|
print("\nShutting down.")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|