commit 94051dd0f8a6342e4ee0cc157b7247f4202f4770 Author: Victor Giers Date: Sun Mar 15 14:51:29 2026 +0100 initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..2888a03 --- /dev/null +++ b/README.md @@ -0,0 +1,113 @@ +# YouTube Summarizer + +This is a local-first desktop app for summarizing YouTube videos with Ollama. + +It uses: + +- Tauri for the desktop shell +- a bundled Python backend for transcript/audio processing in release builds +- Ollama on `localhost` for summarization and translation +- SQLite for local history + +## What It Does + +Given a YouTube URL, the app can: + +- fetch a transcript via the YouTube transcript API or via Whisper +- generate an English summary with a local Ollama model +- optionally translate that summary into German and Japanese +- store the results locally so they can be reopened later + +## Local-Only Behavior + +This repository is intentionally reset to a clean publishable state: + +- no Discord webhook integration +- no remote PHP/MySQL sync +- no bundled production data or pre-filled database +- runtime data is stored in the OS app data directory, not in the repo + +## End User Requirements + +If you ship a built installer, the user should only need: + +- Ollama installed locally +- the Ollama model they want to use pulled locally + +Notes: + +- The installer is designed to bundle the backend helper plus `ffmpeg` / `ffprobe`. +- Whisper model weights are not bundled; the selected Whisper model is downloaded on first use and then cached locally. + +## Developer Requirements + +For development in this repo you still need: + +- Python 3.8+ +- Rust/Cargo +- FFmpeg in `PATH` +- Ollama running locally on `http://localhost:11434` + +Python dependencies are listed in [requirements.txt](/Users/giers/youtube_summarizer/requirements.txt). + +## Run In Development + +macOS/Linux: + +```bash +./run.sh +``` + +Windows: + +```bat +run.bat +``` + +Or directly: + +```bash +python3 -m venv venv +source venv/bin/activate +pip install -r requirements.txt +cargo run --manifest-path src-tauri/Cargo.toml +``` + +The app prefers a bundled backend executable when one is present under [src-tauri/resources/backend](/Users/giers/youtube_summarizer/src-tauri/resources/backend), and otherwise falls back to the local Python environment for development. + +## Build A Shippable Bundle + +1. Make sure the build machine has Python, Rust/Cargo, and `ffmpeg` / `ffprobe` available on `PATH`. +2. Run: + +```bash +python3 tools/prepare_bundle.py +``` + +3. Then build the installer: + +```bash +cargo tauri build +``` + +What `tools/prepare_bundle.py` does: + +- installs PyInstaller into the current Python environment +- builds a single-file backend executable from [backend_cli.py](/Users/giers/youtube_summarizer/backend_cli.py) +- copies that executable into [src-tauri/resources/backend](/Users/giers/youtube_summarizer/src-tauri/resources/backend) +- copies `ffmpeg` and `ffprobe` from the build machine into [src-tauri/resources/ffmpeg](/Users/giers/youtube_summarizer/src-tauri/resources/ffmpeg) + +Build once on each target OS you want to ship. For Windows 10, build on Windows. + +## Build On GitHub Actions + +A Windows build workflow is included at [.github/workflows/windows-installer.yml](/Users/giers/youtube_summarizer/.github/workflows/windows-installer.yml). + +It runs on `windows-latest`, installs `ffmpeg` and NSIS, prepares the bundled Python backend with [tools/prepare_bundle.py](/Users/giers/youtube_summarizer/tools/prepare_bundle.py), builds an NSIS installer, and uploads the result as a workflow artifact named `windows-installer`. + +## Notes + +- If Python is not on your `PATH` for development, set `YTS_PYTHON` to the interpreter you want the Tauri backend to use. +- If you want to test a prebuilt backend executable during development, set `YTS_BACKEND_BIN` to its full path. +- If `ffmpeg` or `ffprobe` are not on `PATH` during bundle prep, set `YTS_FFMPEG` and `YTS_FFPROBE` to their full paths before running [tools/prepare_bundle.py](/Users/giers/youtube_summarizer/tools/prepare_bundle.py). +- Generated thumbnails and the SQLite database are created on first run in the app's local data directory. diff --git a/backend_cli.py b/backend_cli.py new file mode 100644 index 0000000..7522f2e --- /dev/null +++ b/backend_cli.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +""" +Single CLI entrypoint for the bundled summarizer backend. + +This wrapper lets the Tauri app launch one helper executable in production +while still supporting direct Python execution during development. +""" + +import argparse +import json +import sys +from pathlib import Path + +from translate_summary import translate_summary_text +from youtube_summarizer import process_video + + +DEFAULT_MODEL = "mistral:latest" + + +def configure_stdio() -> None: + """Keep progress output line-buffered for the desktop app.""" + if hasattr(sys.stdout, "reconfigure"): + sys.stdout.reconfigure(line_buffering=True) + if hasattr(sys.stderr, "reconfigure"): + sys.stderr.reconfigure(line_buffering=True) + + +def summarize(args: argparse.Namespace) -> int: + meta = process_video( + args.url, + use_whisper=args.use_whisper, + model=args.model, + output_json=args.output_json, + ) + if not args.output_json: + print(json.dumps(meta, ensure_ascii=False), flush=True) + return 0 + + +def translate(args: argparse.Namespace) -> int: + summary_path = Path(args.summary_file) + summary_text = summary_path.read_text(encoding="utf-8").strip() + if not summary_text: + raise SystemExit("Empty summary text!") + + translation = translate_summary_text(summary_text, args.lang, args.model) + + if args.output_file: + Path(args.output_file).write_text(translation, encoding="utf-8") + else: + print(translation, flush=True) + return 0 + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Bundled backend for YouTube Summarizer") + subparsers = parser.add_subparsers(dest="command", required=True) + + summarize_parser = subparsers.add_parser("summarize", help="Summarize a YouTube video") + summarize_parser.add_argument("--url", required=True, help="YouTube video URL") + summarize_parser.add_argument("--model", default=DEFAULT_MODEL, help="Ollama model to use") + summarize_parser.add_argument( + "--no-whisper", + dest="use_whisper", + action="store_false", + help="Use transcript/subtitle workflows instead of Whisper", + ) + summarize_parser.add_argument( + "--output-json", + help="Write the result metadata to a JSON file instead of stdout", + ) + summarize_parser.set_defaults(use_whisper=True, handler=summarize) + + translate_parser = subparsers.add_parser("translate", help="Translate an English summary") + translate_parser.add_argument("--summary-file", required=True, help="Path to the English summary text") + translate_parser.add_argument("--lang", required=True, choices=["de", "jp"], help="Target language") + translate_parser.add_argument("--model", default=DEFAULT_MODEL, help="Ollama model to use") + translate_parser.add_argument("--output-file", help="Optional path to write the translated text") + translate_parser.set_defaults(handler=translate) + + return parser + + +def main() -> int: + configure_stdio() + parser = build_parser() + args = parser.parse_args() + return args.handler(args) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/icon.png b/icon.png new file mode 100644 index 0000000..efaa522 Binary files /dev/null and b/icon.png differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e053eaf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +requests +yt-dlp +webvtt-py +youtube-transcript-api +openai-whisper \ No newline at end of file diff --git a/run.bat b/run.bat new file mode 100755 index 0000000..f6a348b --- /dev/null +++ b/run.bat @@ -0,0 +1,27 @@ +@echo off +setlocal + +REM 1. Prüfen, ob venv existiert, sonst erstellen +if not exist venv ( + echo Erstelle Python venv... + python -m venv venv +) + +REM 2. venv aktivieren +echo Aktiviere venv... +call venv\Scripts\activate + +REM 3. Python-Abhängigkeiten installieren +echo Installiere Python requirements... +pip install --upgrade pip +pip install -r requirements.txt + +REM 4. Tauri App starten +echo Starte die Tauri App... +cargo run --manifest-path src-tauri/Cargo.toml + +REM 6. Deaktivieren (optional) +deactivate + +endlocal +pause diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..27d4289 --- /dev/null +++ b/run.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -e + +# 1. Python venv einrichten +GREEN="\033[0;32m" +CYAN="\033[0;36m" +NC="\033[0m" # No Color +echo -e "${CYAN}1. Python venv einrichten …${NC}" +if [ ! -d "venv" ]; then + python3 -m venv venv +fi + +# 2. venv aktivieren +echo -e "${CYAN}2. Aktiviere venv …${NC}" +source venv/bin/activate + +# 3. Python-Abhängigkeiten installieren +echo -e "${CYAN}3. Python-Abhängigkeiten installieren …${NC}" +pip install --upgrade pip +pip install -r requirements.txt +pip install --upgrade yt-dlp + +# 4. Tauri App starten +echo -e "${CYAN}4. Starte die Tauri App …${NC}" +cargo run --manifest-path src-tauri/Cargo.toml diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml new file mode 100644 index 0000000..bbadb4c --- /dev/null +++ b/src-tauri/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "youtube-summarizer" +version = "1.0.0" +description = "A local-first desktop tool for summarizing YouTube videos" +authors = ["Victor Giers "] +edition = "2021" + +[build-dependencies] +tauri-build = { version = "2.5.6", features = [] } + +[dependencies] +open = "5.3.3" +reqwest = { version = "0.12.24", default-features = false, features = ["blocking", "json", "rustls-tls"] } +rusqlite = { version = "0.37.0", features = ["bundled"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +tauri = { version = "2.10.3", features = ["protocol-asset"] } +tauri-plugin-dialog = "2.6.0" diff --git a/src-tauri/build.rs b/src-tauri/build.rs new file mode 100644 index 0000000..c53d3ba --- /dev/null +++ b/src-tauri/build.rs @@ -0,0 +1,7 @@ +fn main() { + println!( + "cargo:rustc-env=TAURI_BUILD_TARGET={}", + std::env::var("TARGET").expect("TARGET not set by cargo") + ); + tauri_build::build(); +} diff --git a/src-tauri/capabilities/default.json b/src-tauri/capabilities/default.json new file mode 100644 index 0000000..b32a4d7 --- /dev/null +++ b/src-tauri/capabilities/default.json @@ -0,0 +1,6 @@ +{ + "identifier": "default", + "description": "Default capability set for the main window.", + "windows": ["main"], + "permissions": ["core:default", "dialog:allow-confirm"] +} diff --git a/src-tauri/icons/icon.png b/src-tauri/icons/icon.png new file mode 100644 index 0000000..a099373 Binary files /dev/null and b/src-tauri/icons/icon.png differ diff --git a/src-tauri/resources/backend/.gitkeep b/src-tauri/resources/backend/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src-tauri/resources/backend/.gitkeep @@ -0,0 +1 @@ + diff --git a/src-tauri/resources/ffmpeg/.gitkeep b/src-tauri/resources/ffmpeg/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src-tauri/resources/ffmpeg/.gitkeep @@ -0,0 +1 @@ + diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs new file mode 100644 index 0000000..ef948e2 --- /dev/null +++ b/src-tauri/src/main.rs @@ -0,0 +1,755 @@ +#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")] + +use std::{ + env, fs, + io::{BufRead, BufReader, ErrorKind}, + path::{Path, PathBuf}, + process::{Command, Stdio}, + sync::{Arc, Mutex}, + thread, + time::{SystemTime, UNIX_EPOCH}, +}; + +use open::that; +use reqwest::blocking::Client; +use rusqlite::{params, Connection, OptionalExtension}; +use serde::{Deserialize, Serialize}; +use tauri::{AppHandle, Emitter, Manager, State, WebviewWindow}; + +const DEFAULT_MODEL: &str = "mistral:latest"; +const OLLAMA_TAGS_URL: &str = "http://localhost:11434/api/tags"; +const BACKEND_EXECUTABLE_NAME: &str = "yts-backend"; +const TARGET_TRIPLE: &str = env!("TAURI_BUILD_TARGET"); + +#[derive(Clone)] +enum BackendRuntime { + Bundled { + executable: PathBuf, + }, + Python { + python: PathBuf, + script_dir: PathBuf, + }, +} + +#[derive(Clone)] +struct AppState { + app_dir: PathBuf, + media_dir: PathBuf, + db_path: PathBuf, + backend: BackendRuntime, + ffmpeg_path: Option, + ffprobe_path: Option, + whisper_cache_dir: PathBuf, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct SummarizeVideoRequest { + url: String, + use_whisper: bool, + model: Option, +} + +#[derive(Debug, Deserialize)] +struct DeleteSummaryRequest { + id: i64, +} + +#[derive(Debug, Deserialize)] +struct TranslateSummaryRequest { + id: i64, + lang: String, + model: Option, +} + +#[derive(Debug, Deserialize)] +struct BackendSummaryMeta { + timestamp: String, + video_id: String, + url: String, + video_name: String, + channel: Option, + thumbnail: Option, + audio: Option, + transcript: Option, + summary: String, +} + +#[derive(Debug, Deserialize)] +struct OllamaTagsResponse { + models: Vec, +} + +#[derive(Debug, Deserialize)] +struct OllamaModel { + name: String, +} + +#[derive(Debug)] +struct StoredSummary { + id: i64, + timestamp: Option, + video_id: Option, + url: Option, + video_name: Option, + channel: Option, + thumbnail: Option, + audio: Option, + transcript: Option, + summary_en: Option, + summary_de: Option, + summary_jp: Option, +} + +#[derive(Debug, Serialize)] +struct SummaryEntry { + id: i64, + timestamp: Option, + video_id: Option, + url: Option, + video_name: Option, + channel: Option, + thumbnail: Option, + audio: Option, + transcript: Option, + summary_en: Option, + summary_de: Option, + summary_jp: Option, +} + +impl StoredSummary { + fn from_row(row: &rusqlite::Row<'_>) -> rusqlite::Result { + Ok(Self { + id: row.get("id")?, + timestamp: row.get("timestamp")?, + video_id: row.get("video_id")?, + url: row.get("url")?, + video_name: row.get("video_name")?, + channel: row.get("channel")?, + thumbnail: row.get("thumbnail")?, + audio: row.get("audio")?, + transcript: row.get("transcript")?, + summary_en: row.get("summary_en")?, + summary_de: row.get("summary_de")?, + summary_jp: row.get("summary_jp")?, + }) + } + + fn into_entry(self, state: &AppState) -> SummaryEntry { + SummaryEntry { + id: self.id, + timestamp: self.timestamp, + video_id: self.video_id, + url: self.url, + video_name: self.video_name, + channel: self.channel, + thumbnail: absolute_media_path(state, self.thumbnail), + audio: absolute_media_path(state, self.audio), + transcript: absolute_media_path(state, self.transcript), + summary_en: self.summary_en, + summary_de: self.summary_de, + summary_jp: self.summary_jp, + } + } +} + +fn absolute_media_path(state: &AppState, file_name: Option) -> Option { + file_name.map(|name| state.media_dir.join(name).to_string_lossy().into_owned()) +} + +fn normalize_model(model: Option) -> String { + model + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()) + .unwrap_or_else(|| DEFAULT_MODEL.to_string()) +} + +fn now_millis() -> u128 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() +} + +fn resolve_project_root() -> Result { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .canonicalize() + .map_err(|err| format!("Failed to resolve project root: {err}")) +} + +fn platform_executable_name(base_name: &str) -> String { + if cfg!(windows) { + format!("{base_name}.exe") + } else { + base_name.to_string() + } +} + +fn resolve_resource_file(app: &AppHandle, relative_path: &Path) -> Option { + let mut candidates = Vec::new(); + + if let Ok(resource_dir) = app.path().resource_dir() { + candidates.push(resource_dir.join(relative_path)); + } + + candidates.push( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("resources") + .join(relative_path), + ); + + candidates.into_iter().find(|path| path.exists()) +} + +fn resolve_backend_binary(app: &AppHandle) -> Option { + if let Ok(path) = env::var("YTS_BACKEND_BIN") { + let trimmed = path.trim(); + if !trimmed.is_empty() { + return Some(PathBuf::from(trimmed)); + } + } + + let relative_path = Path::new("backend") + .join(TARGET_TRIPLE) + .join(platform_executable_name(BACKEND_EXECUTABLE_NAME)); + resolve_resource_file(app, &relative_path) +} + +fn resolve_script_dir(app: &AppHandle) -> Result { + if let Ok(resource_dir) = app.path().resource_dir() { + if resource_dir.join("backend_cli.py").exists() { + return Ok(resource_dir); + } + } + + let project_dir = resolve_project_root()?; + if project_dir.join("backend_cli.py").exists() { + return Ok(project_dir); + } + + Err("Unable to locate bundled or development backend Python scripts.".to_string()) +} + +fn resolve_python_command(script_dir: &Path) -> Result { + if let Ok(path) = env::var("YTS_PYTHON") { + let trimmed = path.trim(); + if !trimmed.is_empty() { + return Ok(PathBuf::from(trimmed)); + } + } + + let mut candidates = Vec::new(); + candidates.push(script_dir.join("venv").join("bin").join("python3")); + candidates.push(script_dir.join("venv").join("bin").join("python")); + candidates.push(script_dir.join("venv").join("Scripts").join("python.exe")); + candidates.push(PathBuf::from("python3")); + candidates.push(PathBuf::from("python")); + + for candidate in candidates { + if Command::new(&candidate).arg("--version").output().is_ok() { + return Ok(candidate); + } + } + + Err("Unable to find a usable Python interpreter. Set YTS_PYTHON to override.".to_string()) +} + +fn resolve_backend_runtime(app: &AppHandle) -> Result { + if let Some(executable) = resolve_backend_binary(app) { + return Ok(BackendRuntime::Bundled { executable }); + } + + let script_dir = resolve_script_dir(app)?; + let python = resolve_python_command(&script_dir)?; + Ok(BackendRuntime::Python { python, script_dir }) +} + +fn resolve_optional_tool_path(app: &AppHandle, env_name: &str, tool_name: &str) -> Option { + if let Ok(path) = env::var(env_name) { + let trimmed = path.trim(); + if !trimmed.is_empty() { + return Some(PathBuf::from(trimmed)); + } + } + + let relative_path = Path::new("ffmpeg") + .join(TARGET_TRIPLE) + .join(platform_executable_name(tool_name)); + resolve_resource_file(app, &relative_path) +} + +fn resolve_whisper_cache_dir(app: &AppHandle) -> Result { + let cache_root = app + .path() + .app_cache_dir() + .or_else(|_| app.path().app_local_data_dir()) + .map_err(|err| format!("Failed to resolve application cache directory: {err}"))?; + let whisper_cache_dir = cache_root.join("whisper"); + fs::create_dir_all(&whisper_cache_dir) + .map_err(|err| format!("Failed to create Whisper cache directory: {err}"))?; + Ok(whisper_cache_dir) +} + +fn open_connection(state: &AppState) -> Result { + Connection::open(&state.db_path).map_err(|err| format!("Failed to open SQLite database: {err}")) +} + +fn init_db(state: &AppState) -> Result<(), String> { + let db = open_connection(state)?; + db.execute_batch( + r#" + CREATE TABLE IF NOT EXISTS summaries ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp TEXT, + video_id TEXT, + url TEXT, + video_name TEXT, + channel TEXT, + thumbnail TEXT, + audio TEXT, + transcript TEXT, + summary_en TEXT, + summary_de TEXT, + summary_jp TEXT + ); + "#, + ) + .map_err(|err| format!("Failed to initialize SQLite schema: {err}"))?; + Ok(()) +} + +fn remove_named_media_file(media_dir: &Path, file_name: &str) { + let path = media_dir.join(file_name); + if let Err(err) = fs::remove_file(&path) { + if err.kind() != ErrorKind::NotFound { + eprintln!("Failed to remove {}: {}", path.display(), err); + } + } +} + +fn cleanup_artifacts(state: &AppState, audio: Option<&str>, transcript: Option<&str>) { + if let Some(audio_file) = audio.filter(|value| !value.trim().is_empty()) { + remove_named_media_file(&state.media_dir, audio_file); + } + if let Some(transcript_file) = transcript.filter(|value| !value.trim().is_empty()) { + remove_named_media_file(&state.media_dir, transcript_file); + } +} + +fn purge_existing_artifacts(state: &AppState) -> Result<(), String> { + let db = open_connection(state)?; + let mut stmt = db + .prepare("SELECT id, audio, transcript FROM summaries WHERE audio IS NOT NULL OR transcript IS NOT NULL") + .map_err(|err| format!("Failed to prepare artifact cleanup query: {err}"))?; + + let rows = stmt + .query_map([], |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, Option>(1)?, + row.get::<_, Option>(2)?, + )) + }) + .map_err(|err| format!("Failed to load stored artifacts: {err}"))?; + + let mut entries = Vec::new(); + for row in rows { + entries.push(row.map_err(|err| format!("Failed to decode stored artifact row: {err}"))?); + } + drop(stmt); + + for (id, audio, transcript) in entries { + cleanup_artifacts(state, audio.as_deref(), transcript.as_deref()); + db.execute( + "UPDATE summaries SET audio = NULL, transcript = NULL WHERE id = ?", + [id], + ) + .map_err(|err| format!("Failed to clear stored artifact references: {err}"))?; + } + + Ok(()) +} + +fn ensure_app_state(app: &AppHandle) -> Result { + let app_dir = app + .path() + .app_local_data_dir() + .map_err(|err| format!("Failed to resolve application data directory: {err}"))?; + let media_dir = app_dir.join("data"); + fs::create_dir_all(&media_dir) + .map_err(|err| format!("Failed to create application data directory: {err}"))?; + + let state = AppState { + backend: resolve_backend_runtime(app)?, + ffmpeg_path: resolve_optional_tool_path(app, "YTS_FFMPEG", "ffmpeg"), + ffprobe_path: resolve_optional_tool_path(app, "YTS_FFPROBE", "ffprobe"), + whisper_cache_dir: resolve_whisper_cache_dir(app)?, + app_dir: app_dir.clone(), + media_dir, + db_path: app_dir.join("summaries.db"), + }; + + init_db(&state)?; + purge_existing_artifacts(&state)?; + Ok(state) +} + +fn emit_progress(app: &AppHandle, window_label: &str, line: &str) { + let trimmed = line.trim(); + if !trimmed.is_empty() { + let _ = app.emit_to(window_label, "summarize-progress", trimmed.to_string()); + } +} + +fn apply_backend_env(command: &mut Command, state: &AppState) { + command.env("PYTHONUNBUFFERED", "1"); + command.env("YTS_WHISPER_CACHE_DIR", &state.whisper_cache_dir); + + if let Some(ffmpeg_path) = &state.ffmpeg_path { + command.env("YTS_FFMPEG", ffmpeg_path); + } + if let Some(ffprobe_path) = &state.ffprobe_path { + command.env("YTS_FFPROBE", ffprobe_path); + } +} + +fn build_backend_command(state: &AppState, args: &[String]) -> Command { + let mut command = match &state.backend { + BackendRuntime::Bundled { executable } => Command::new(executable), + BackendRuntime::Python { python, script_dir } => { + let mut command = Command::new(python); + command.arg(script_dir.join("backend_cli.py")); + command + } + }; + + command.args(args).current_dir(&state.media_dir); + apply_backend_env(&mut command, state); + command +} + +fn run_backend_json_command( + state: &AppState, + app: &AppHandle, + window_label: &str, + args: &[String], +) -> Result { + let output_path = state.app_dir.join(format!("tmp_{}.json", now_millis())); + let mut command_args = args.to_vec(); + command_args.push("--output-json".to_string()); + command_args.push(output_path.to_string_lossy().into_owned()); + + let mut child = build_backend_command(state, &command_args) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|err| format!("Failed to start bundled backend: {err}"))?; + + let stdout = child + .stdout + .take() + .ok_or_else(|| "Backend stdout was not captured.".to_string())?; + let stderr = child + .stderr + .take() + .ok_or_else(|| "Backend stderr was not captured.".to_string())?; + let stderr_buffer = Arc::new(Mutex::new(String::new())); + + let stdout_app = app.clone(); + let stdout_label = window_label.to_string(); + let stdout_handle = thread::spawn(move || { + for line in BufReader::new(stdout).lines() { + match line { + Ok(line) => emit_progress(&stdout_app, &stdout_label, &line), + Err(_) => break, + } + } + }); + + let stderr_app = app.clone(); + let stderr_label = window_label.to_string(); + let stderr_buffer_clone = Arc::clone(&stderr_buffer); + let stderr_handle = thread::spawn(move || { + for line in BufReader::new(stderr).lines() { + match line { + Ok(line) => { + emit_progress(&stderr_app, &stderr_label, &line); + if let Ok(mut buffer) = stderr_buffer_clone.lock() { + buffer.push_str(&line); + buffer.push('\n'); + } + } + Err(_) => break, + } + } + }); + + let status = child + .wait() + .map_err(|err| format!("Failed to wait for bundled backend: {err}"))?; + + let _ = stdout_handle.join(); + let _ = stderr_handle.join(); + + if !status.success() { + let stderr_output = stderr_buffer + .lock() + .map(|buffer| buffer.trim().to_string()) + .unwrap_or_else(|_| String::new()); + let message = if stderr_output.is_empty() { + format!("Bundled backend exited with status {status}.") + } else { + stderr_output + }; + let _ = fs::remove_file(&output_path); + return Err(message); + } + + let raw_json = fs::read_to_string(&output_path) + .map_err(|err| format!("Failed to read backend output JSON: {err}"))?; + let _ = fs::remove_file(&output_path); + + serde_json::from_str(&raw_json).map_err(|err| format!("Invalid backend output JSON: {err}")) +} + +fn run_backend_text_command(state: &AppState, args: &[String]) -> Result { + let output = build_backend_command(state, args) + .output() + .map_err(|err| format!("Failed to start translation backend: {err}"))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string(); + return Err(if stderr.is_empty() { + format!("Translation backend exited with status {}.", output.status) + } else { + stderr + }); + } + + let translation = String::from_utf8(output.stdout) + .map_err(|err| format!("Translation backend returned invalid UTF-8: {err}"))? + .trim() + .to_string(); + if translation.is_empty() { + return Err("Translation backend returned an empty result.".to_string()); + } + + Ok(translation) +} + +fn get_entry_by_id(state: &AppState, id: i64) -> Result { + let db = open_connection(state)?; + let stored = db + .query_row( + "SELECT * FROM summaries WHERE id = ?", + [id], + StoredSummary::from_row, + ) + .optional() + .map_err(|err| format!("Failed to query summary entry: {err}"))? + .ok_or_else(|| "Entry not found.".to_string())?; + Ok(stored.into_entry(state)) +} + +fn summarize_video_inner( + state: &AppState, + app: &AppHandle, + window_label: &str, + request: SummarizeVideoRequest, +) -> Result { + let model = normalize_model(request.model); + let mut args = vec![ + "summarize".to_string(), + "--url".to_string(), + request.url, + "--model".to_string(), + model, + ]; + if !request.use_whisper { + args.push("--no-whisper".to_string()); + } + + let info = run_backend_json_command(state, app, window_label, &args)?; + cleanup_artifacts(state, info.audio.as_deref(), info.transcript.as_deref()); + + let db = open_connection(state)?; + db.execute( + "INSERT INTO summaries (timestamp, video_id, url, video_name, channel, thumbnail, audio, transcript, summary_en, summary_de, summary_jp) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + params![ + info.timestamp, + info.video_id, + info.url, + info.video_name, + info.channel, + info.thumbnail, + Option::::None, + Option::::None, + info.summary, + Option::::None, + Option::::None, + ], + ) + .map_err(|err| format!("Failed to save summary entry: {err}"))?; + + get_entry_by_id(state, db.last_insert_rowid()) +} + +fn translate_summary_inner( + state: &AppState, + request: TranslateSummaryRequest, +) -> Result { + let db = open_connection(state)?; + let summary_text = db + .query_row( + "SELECT summary_en FROM summaries WHERE id = ?", + [request.id], + |row| row.get::<_, Option>(0), + ) + .optional() + .map_err(|err| format!("Failed to load English summary for translation: {err}"))? + .flatten() + .ok_or_else(|| "No English summary found for translation.".to_string())?; + + let tmp_summary_path = + state + .app_dir + .join(format!("tmp_summary_{}_{}.txt", request.id, now_millis())); + fs::write(&tmp_summary_path, summary_text) + .map_err(|err| format!("Failed to write temporary summary file: {err}"))?; + + let model = normalize_model(request.model); + let args = vec![ + "translate".to_string(), + "--summary-file".to_string(), + tmp_summary_path.to_string_lossy().into_owned(), + "--lang".to_string(), + request.lang.clone(), + "--model".to_string(), + model, + ]; + let result = run_backend_text_command(state, &args); + + let _ = fs::remove_file(&tmp_summary_path); + let translation = result?; + + let column = match request.lang.as_str() { + "de" => "summary_de", + "jp" => "summary_jp", + _ => return Err("Unsupported language code.".to_string()), + }; + + db.execute( + &format!("UPDATE summaries SET {column} = ? WHERE id = ?"), + params![translation, request.id], + ) + .map_err(|err| format!("Failed to save translated summary: {err}"))?; + + get_entry_by_id(state, request.id) +} + +#[tauri::command] +fn get_models() -> Result, String> { + let payload = Client::new() + .get(OLLAMA_TAGS_URL) + .send() + .and_then(|response| response.error_for_status()) + .map_err(|err| format!("Failed to query Ollama models: {err}"))? + .json::() + .map_err(|err| format!("Failed to parse Ollama model list: {err}"))?; + + Ok(payload.models.into_iter().map(|model| model.name).collect()) +} + +#[tauri::command] +fn get_summaries(state: State<'_, AppState>) -> Result, String> { + let db = open_connection(&state)?; + let mut stmt = db + .prepare("SELECT * FROM summaries ORDER BY id DESC") + .map_err(|err| format!("Failed to prepare summary query: {err}"))?; + let rows = stmt + .query_map([], StoredSummary::from_row) + .map_err(|err| format!("Failed to read summaries: {err}"))?; + + let mut items = Vec::new(); + for row in rows { + let entry = row + .map_err(|err| format!("Failed to decode summary row: {err}"))? + .into_entry(&state); + items.push(entry); + } + + Ok(items) +} + +#[tauri::command] +async fn summarize_video( + state: State<'_, AppState>, + window: WebviewWindow, + request: SummarizeVideoRequest, +) -> Result { + let state = state.inner().clone(); + let app = window.app_handle().clone(); + let window_label = window.label().to_string(); + tauri::async_runtime::spawn_blocking(move || { + summarize_video_inner(&state, &app, &window_label, request) + }) + .await + .map_err(|err| format!("Summarize task failed: {err}"))? +} + +#[tauri::command] +fn delete_summary(state: State<'_, AppState>, request: DeleteSummaryRequest) -> Result<(), String> { + let db = open_connection(&state)?; + db.execute("DELETE FROM summaries WHERE id = ?", [request.id]) + .map_err(|err| format!("Failed to delete summary entry: {err}"))?; + Ok(()) +} + +#[tauri::command] +async fn translate_summary( + state: State<'_, AppState>, + request: TranslateSummaryRequest, +) -> Result { + let state = state.inner().clone(); + tauri::async_runtime::spawn_blocking(move || translate_summary_inner(&state, request)) + .await + .map_err(|err| format!("Translate task failed: {err}"))? +} + +#[tauri::command] +fn open_external(url: String) -> Result<(), String> { + that(url).map_err(|err| format!("Failed to open URL: {err}")) +} + +#[tauri::command] +fn open_file(file_path: String) -> Result<(), String> { + let path = Path::new(&file_path); + if !path.exists() { + return Err("Requested file does not exist.".to_string()); + } + that(path).map_err(|err| format!("Failed to open file: {err}")) +} + +fn main() { + tauri::Builder::default() + .plugin(tauri_plugin_dialog::init()) + .setup(|app| { + let state = ensure_app_state(app.handle())?; + app.manage(state); + Ok(()) + }) + .invoke_handler(tauri::generate_handler![ + get_models, + get_summaries, + summarize_video, + delete_summary, + translate_summary, + open_external, + open_file + ]) + .run(tauri::generate_context!()) + .expect("error while running tauri application"); +} diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json new file mode 100644 index 0000000..9e8f15f --- /dev/null +++ b/src-tauri/tauri.conf.json @@ -0,0 +1,39 @@ +{ + "$schema": "https://schema.tauri.app/config/2", + "productName": "YouTube Summarizer", + "version": "1.0.0", + "identifier": "com.victorgiers.youtube-summarizer", + "build": { + "frontendDist": "../ui" + }, + "app": { + "withGlobalTauri": true, + "security": { + "assetProtocol": { + "enable": true, + "scope": ["$APPLOCALDATA/data/**"] + }, + "csp": null + }, + "windows": [ + { + "label": "main", + "title": "YouTube Summarizer", + "width": 1104, + "height": 800, + "resizable": true + } + ] + }, + "bundle": { + "active": true, + "resources": [ + "../backend_cli.py", + "../youtube_summarizer.py", + "../translate_summary.py", + "../requirements.txt", + "resources/backend", + "resources/ffmpeg" + ] + } +} diff --git a/tools/autofill_translations.py b/tools/autofill_translations.py new file mode 100644 index 0000000..9de497b --- /dev/null +++ b/tools/autofill_translations.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +import os +import sqlite3 +import subprocess +import sys + +DB_FILE = os.path.join(os.path.dirname(__file__), 'summaries.db') +TRANSLATE_SCRIPT = os.path.join(os.path.dirname(__file__), 'translate_summary.py') +MODEL = "mistral-small3.1:24b" + +def get_entries_needing_translation(conn): + cursor = conn.cursor() + cursor.execute( + "SELECT id, summary_en, summary_de, summary_jp FROM summaries" + ) + return [ + (row[0], row[1], row[2], row[3]) + for row in cursor.fetchall() + if row[1] and (not row[2] or not row[3]) # summary_en vorhanden, mind. eine Übersetzung fehlt + ] + +def translate(summary_text, lang): + # Schreibe summary_text temporär in Datei + import tempfile + with tempfile.NamedTemporaryFile('w+', delete=False, suffix='.txt', encoding='utf-8') as f: + f.write(summary_text) + tmp_summary_path = f.name + try: + # Führe das Übersetzungsskript aus + cmd = [ + sys.executable, # benutzt aktuelles Python + TRANSLATE_SCRIPT, + "--summary-file", tmp_summary_path, + "--lang", lang, + "--model", MODEL, + ] + print(f"[{lang}] Translating with: {' '.join(cmd)}") + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + translation = result.stdout.strip() + return translation + finally: + os.remove(tmp_summary_path) + +def main(): + conn = sqlite3.connect(DB_FILE) + cursor = conn.cursor() + entries = get_entries_needing_translation(conn) + print(f"Found {len(entries)} entries needing translation.") + for entry_id, summary_en, summary_de, summary_jp in entries: + updated = False + if not summary_de: + print(f"Translating to DE for entry id {entry_id}…") + try: + translation = translate(summary_en, "de") + cursor.execute("UPDATE summaries SET summary_de = ? WHERE id = ?", (translation, entry_id)) + updated = True + except Exception as e: + print(f"Failed to translate DE for id {entry_id}: {e}") + if not summary_jp: + print(f"Translating to JP for entry id {entry_id}…") + try: + translation = translate(summary_en, "jp") + cursor.execute("UPDATE summaries SET summary_jp = ? WHERE id = ?", (translation, entry_id)) + updated = True + except Exception as e: + print(f"Failed to translate JP for id {entry_id}: {e}") + if updated: + conn.commit() + conn.close() + print("Done.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tools/prepare_bundle.py b/tools/prepare_bundle.py new file mode 100644 index 0000000..ef7a25a --- /dev/null +++ b/tools/prepare_bundle.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +""" +Prepare local bundle assets for a distributable Tauri build. + +This script: +1. installs PyInstaller into the current Python environment +2. builds the bundled backend helper as a single executable +3. copies ffmpeg / ffprobe from the local PATH into Tauri resources + +It targets the current host platform. Run it once per build machine before +`cargo tauri build`. +""" + +from __future__ import annotations + +import os +import shutil +import stat +import subprocess +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +SRC_TAURI = ROOT / "src-tauri" +BACKEND_ROOT = SRC_TAURI / "resources" / "backend" +FFMPEG_ROOT = SRC_TAURI / "resources" / "ffmpeg" +BUILD_DIR = ROOT / "build" +DIST_DIR = BUILD_DIR / "pyinstaller-dist" +WORK_DIR = BUILD_DIR / "pyinstaller-work" +SPEC_DIR = BUILD_DIR / "pyinstaller-spec" +BACKEND_NAME = "yts-backend" + + +def run(cmd: list[str]) -> None: + subprocess.run(cmd, check=True, cwd=ROOT) + + +def detect_target_triple() -> str: + try: + output = subprocess.check_output(["rustc", "--print", "host-tuple"], text=True) + return output.strip() + except subprocess.CalledProcessError: + verbose = subprocess.check_output(["rustc", "-Vv"], text=True) + for line in verbose.splitlines(): + if line.startswith("host: "): + return line.split(": ", 1)[1].strip() + raise SystemExit("Unable to determine the Rust host target triple.") + + +def executable_suffix() -> str: + return ".exe" if os.name == "nt" else "" + + +def ensure_pyinstaller() -> None: + run([sys.executable, "-m", "pip", "install", "--upgrade", "pip"]) + run([sys.executable, "-m", "pip", "install", "-r", str(ROOT / "requirements.txt"), "pyinstaller"]) + + +def build_backend_binary() -> Path: + DIST_DIR.mkdir(parents=True, exist_ok=True) + WORK_DIR.mkdir(parents=True, exist_ok=True) + SPEC_DIR.mkdir(parents=True, exist_ok=True) + + cmd = [ + sys.executable, + "-m", + "PyInstaller", + "--noconfirm", + "--clean", + "--onefile", + "--name", + BACKEND_NAME, + "--distpath", + str(DIST_DIR), + "--workpath", + str(WORK_DIR), + "--specpath", + str(SPEC_DIR), + "--paths", + str(ROOT), + "--collect-submodules", + "whisper", + "--collect-submodules", + "yt_dlp", + "--collect-submodules", + "youtube_transcript_api", + "--collect-data", + "whisper", + "--collect-data", + "yt_dlp", + "--collect-data", + "webvtt", + "--collect-data", + "youtube_transcript_api", + str(ROOT / "backend_cli.py"), + ] + run(cmd) + binary = DIST_DIR / f"{BACKEND_NAME}{executable_suffix()}" + if not binary.exists(): + raise SystemExit(f"Expected backend binary was not produced: {binary}") + return binary + + +def install_sidecar(binary: Path, target_triple: str) -> Path: + target_dir = BACKEND_ROOT / target_triple + target_dir.mkdir(parents=True, exist_ok=True) + target = target_dir / f"{BACKEND_NAME}{executable_suffix()}" + shutil.copy2(binary, target) + if os.name != "nt": + target.chmod(target.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + return target + + +def resolve_tool_source(env_name: str, tool_name: str) -> Path: + override = os.environ.get(env_name, "").strip() + if override: + return Path(override).expanduser().resolve() + + source = shutil.which(tool_name) + if not source: + raise SystemExit( + f"Required build dependency not found: {tool_name}. " + f"Put it on PATH or set {env_name}." + ) + return Path(source).resolve() + + +def copy_tool_to_resources(env_name: str, tool_name: str, resource_dir: Path) -> Path: + source_path = resolve_tool_source(env_name, tool_name) + destination = resource_dir / source_path.name + shutil.copy2(source_path, destination) + if os.name != "nt": + destination.chmod(destination.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + return destination + + +def install_ffmpeg_resources(target_triple: str) -> tuple[Path, Path]: + resource_dir = FFMPEG_ROOT / target_triple + resource_dir.mkdir(parents=True, exist_ok=True) + ffmpeg = copy_tool_to_resources("YTS_FFMPEG", "ffmpeg", resource_dir) + ffprobe = copy_tool_to_resources("YTS_FFPROBE", "ffprobe", resource_dir) + return ffmpeg, ffprobe + + +def main() -> int: + target_triple = detect_target_triple() + ensure_pyinstaller() + backend_binary = build_backend_binary() + sidecar = install_sidecar(backend_binary, target_triple) + ffmpeg, ffprobe = install_ffmpeg_resources(target_triple) + + print(f"Prepared backend sidecar: {sidecar}") + print(f"Prepared ffmpeg resource: {ffmpeg}") + print(f"Prepared ffprobe resource: {ffprobe}") + print("Next step: cargo tauri build") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/translate_summary.py b/translate_summary.py new file mode 100644 index 0000000..2d0204a --- /dev/null +++ b/translate_summary.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +""" +translate_summary.py + +Usage: + python3 translate_summary.py --summary-file --lang [--model ] [--output-file ] + +Arguments: + --summary-file Path to the file containing the English summary text. + --lang Target language ('de' for German, 'jp' for Japanese). + --model (Optional) Ollama model name, defaults to mistral:latest. + --output-file (Optional) Where to write translated summary as plain text. + +Example: + python3 translate_summary.py --summary-file summary.txt --lang de --model mistral:latest +""" + +import sys +import argparse +import json +import requests + +LANG_MAP = { + "de": "German", + "jp": "Japanese" +} + +def translate_summary_text(summary_text, target_language, model="mistral:latest"): + if target_language not in LANG_MAP: + raise ValueError("Supported languages: de (German), jp (Japanese)") + prompt = ( + f"Translate the following summary into {LANG_MAP[target_language]}. Only output the translated summary, " + "no explanation or intro. If it's already in the target language, do nothing but repeat it.\n\n" + f"Summary:\n{summary_text}\n\nTranslation:" + ) + payload = { + "model": model, + "messages": [ + {"role": "system", "content": f"You are an expert translator proficient in {LANG_MAP[target_language]} and English."}, + {"role": "user", "content": prompt} + ], + "stream": False + } + resp = requests.post("http://localhost:11434/api/chat", json=payload) + resp.raise_for_status() + data = resp.json() + return data.get("message", {}).get("content", "").strip() + + +def translate_summary_file(summary_file, target_language, model="mistral:latest"): + with open(summary_file, "r", encoding="utf-8") as f: + summary_text = f.read().strip() + if not summary_text: + raise ValueError("Empty summary text!") + return translate_summary_text(summary_text, target_language, model) + +def main(): + parser = argparse.ArgumentParser(description="Translate summary using Ollama") + parser.add_argument("--summary-file", required=True, help="Path to file with English summary text") + parser.add_argument("--lang", required=True, choices=["de", "jp"], help="Target language: 'de' or 'jp'") + parser.add_argument("--model", default="mistral:latest", help="Ollama model to use") + parser.add_argument("--output-file", help="Output file for translated summary") + args = parser.parse_args() + + # Read summary + try: + translation = translate_summary_file(args.summary_file, args.lang, args.model) + except Exception as e: + print(f"Translation failed: {e}", file=sys.stderr) + sys.exit(2) + + # Output result + if args.output_file: + with open(args.output_file, "w", encoding="utf-8") as f: + f.write(translation) + else: + print(translation) + +if __name__ == "__main__": + main() diff --git a/ui/index.html b/ui/index.html new file mode 100644 index 0000000..d9590b3 --- /dev/null +++ b/ui/index.html @@ -0,0 +1,166 @@ + + + + + + YouTube Summaries + + + +
+
+ + +
+ + +
+ +
+ +
+ +
+ + + + diff --git a/ui/renderer.js b/ui/renderer.js new file mode 100644 index 0000000..c69cfd7 --- /dev/null +++ b/ui/renderer.js @@ -0,0 +1,578 @@ +const tauriApi = window.__TAURI__; +const invoke = tauriApi?.core?.invoke; +const listen = tauriApi?.event?.listen; +const convertFileSrc = tauriApi?.core?.convertFileSrc; +const confirmDialog = tauriApi?.dialog?.confirm; + +if (!invoke || !listen) { + throw new Error('Tauri runtime API is unavailable.'); +} + +function toWebviewFileUrl(filePath) { + if (!filePath) { + return filePath; + } + if (typeof convertFileSrc === 'function') { + return convertFileSrc(filePath); + } + return filePath; +} + +window.api = { + getModels: () => invoke('get_models'), + getSummaries: () => invoke('get_summaries'), + summarizeVideo: (url, useWhisper, model) => invoke('summarize_video', { + request: { + url, + useWhisper, + model: model || null + } + }), + openExternal: (url) => invoke('open_external', { url }), + openFile: (filePath) => invoke('open_file', { filePath }), + deleteSummary: (id) => invoke('delete_summary', { + request: { id } + }), + translateSummary: (id, lang, model) => invoke('translate_summary', { + request: { + id, + lang, + model: model || null + } + }), + onSummarizeProgress: (callback) => listen('summarize-progress', (event) => { + callback(String(event.payload || '')); + }) +}; + +window.addEventListener('DOMContentLoaded', async () => { + const form = document.getElementById('summarize-form'); + const urlInput = document.getElementById('url-input'); + const whisperCheckbox = document.getElementById('whisper-checkbox'); + const summariesContainer = document.getElementById('summaries-container'); + const loadingIndicator = document.getElementById('loading'); + const modelSelect = document.getElementById('model-select'); + const paginationTop = document.getElementById('pagination-top'); + const paginationBottom = document.getElementById('pagination-bottom'); + const summarizeButton = form.querySelector('button[type="submit"]'); + const autoTranslateCheckbox = document.getElementById('autotranslate-checkbox'); + + let fullSummaries = []; + let currentPage = 1; + const PAGE_SIZE = 20; + let isLoading = false; + let entryUiState = {}; + + function setLoadingMessage(message) { + if (!isLoading) { + return; + } + loadingIndicator.style.display = 'inline'; + loadingIndicator.textContent = message; + } + + whisperCheckbox.checked = localStorage.getItem('useWhisper') === '0' ? false : true; + autoTranslateCheckbox.checked = localStorage.getItem('autoTranslate') === '1' ? true : false; + + whisperCheckbox.addEventListener('change', () => { + localStorage.setItem('useWhisper', whisperCheckbox.checked ? '1' : '0'); + }); + autoTranslateCheckbox.addEventListener('change', () => { + localStorage.setItem('autoTranslate', autoTranslateCheckbox.checked ? '1' : '0'); + }); + + function renderSummaries(list) { + summariesContainer.innerHTML = ''; + const renderedIds = new Set(); + + list.forEach(item => { + renderedIds.add(item.id); + if (!entryUiState[item.id]) { + entryUiState[item.id] = { expanded: false, lang: 'en' }; + } + let { expanded, lang } = entryUiState[item.id]; + + const entry = document.createElement('div'); + entry.classList.add('entry'); + entry.style.overflow = 'hidden'; + + const deleteButton = document.createElement('button'); + deleteButton.type = 'button'; + deleteButton.innerHTML = '×'; + deleteButton.classList.add('delete-entry-button'); + deleteButton.style.width = '24px'; + deleteButton.style.height = '24px'; + deleteButton.style.display = 'flex'; + deleteButton.style.alignItems = 'center'; + deleteButton.style.justifyContent = 'center'; + deleteButton.style.border = 'none'; + deleteButton.style.background = 'transparent'; + deleteButton.style.color = '#9f1239'; + deleteButton.style.fontSize = '22px'; + deleteButton.style.fontWeight = 'normal'; + deleteButton.style.cursor = 'pointer'; + deleteButton.style.padding = '0'; + deleteButton.style.lineHeight = '1'; + deleteButton.disabled = isLoading; + deleteButton.addEventListener('click', (e) => { + e.preventDefault(); + e.stopPropagation(); + if (isLoading) { + return; + } + if (typeof confirmDialog !== 'function') { + alert('Delete confirmation is unavailable.'); + return; + } + confirmDialog('Are you sure you want to delete this entry?', { + title: 'Delete entry', + kind: 'warning' + }).then((confirmed) => { + if (!confirmed) { + return; + } + window.api.deleteSummary(item.id) + .then(() => { + delete entryUiState[item.id]; + return window.api.getSummaries().then(setSummaries); + }) + .catch(err => { + alert('Error deleting summary: ' + err.message); + }); + }); + }); + const left = document.createElement('div'); + left.classList.add('left'); + if (item.thumbnail) { + const img = document.createElement('img'); + img.src = toWebviewFileUrl(item.thumbnail); + img.alt = item.video_name; + img.classList.add('thumbnail'); + if (item.url) { + img.style.cursor = 'pointer'; + img.title = 'Open video'; + img.addEventListener('click', (e) => { + e.stopPropagation(); + window.api.openExternal(item.url); + }); + } + left.appendChild(img); + } + + const langSwitcher = document.createElement('span'); + langSwitcher.style.display = 'flex'; + langSwitcher.style.gap = '6px'; + langSwitcher.style.marginTop = '8px'; + langSwitcher.style.marginBottom = '2px'; + + const summaryFields = { + en: item.summary_en, + de: item.summary_de, + jp: item.summary_jp + }; + + ['en', 'de', 'jp'].forEach(thisLang => { + const btn = document.createElement('button'); + btn.type = 'button'; + btn.textContent = thisLang.toUpperCase(); + btn.style.fontSize = '12px'; + btn.style.padding = '2px 8px'; + btn.style.borderRadius = '5px'; + btn.style.border = '1px solid #eee'; + btn.style.background = (thisLang === lang) ? '#9f1239' : '#fff1f2'; + btn.style.color = (thisLang === lang) ? '#fff' : '#9f1239'; + btn.disabled = isLoading; + btn.addEventListener('click', () => { + lang = thisLang; + entryUiState[item.id].lang = lang; + renderSummaryContent(); + Array.from(langSwitcher.children).forEach((button, index) => { + const language = ['en', 'de', 'jp'][index]; + button.style.background = (language === lang) ? '#9f1239' : '#fff1f2'; + button.style.color = (language === lang) ? '#fff' : '#9f1239'; + }); + }); + langSwitcher.appendChild(btn); + }); + left.appendChild(langSwitcher); + + const middle = document.createElement('div'); + middle.classList.add('middle'); + const headline = document.createElement('div'); + headline.style.display = 'flex'; + headline.style.alignItems = 'center'; + headline.style.justifyContent = 'space-between'; + headline.style.gap = '12px'; + const headlineMain = document.createElement('div'); + headlineMain.style.display = 'flex'; + headlineMain.style.alignItems = 'center'; + headlineMain.style.minWidth = '0'; + const titleEl = document.createElement('strong'); + titleEl.style.display = 'block'; + titleEl.style.fontSize = '16px'; + titleEl.style.cursor = 'default'; + titleEl.style.marginLeft = '0'; + titleEl.textContent = item.video_name; + + const arrow = document.createElement('span'); + arrow.textContent = expanded ? '▼' : '▶'; + arrow.style.marginRight = '8px'; + arrow.style.marginLeft = '0'; + arrow.style.fontSize = '18px'; + arrow.style.userSelect = 'none'; + arrow.style.transition = 'transform 0.15s'; + + headlineMain.appendChild(arrow); + headlineMain.appendChild(titleEl); + headline.appendChild(headlineMain); + headline.appendChild(deleteButton); + + const channelEl = document.createElement('span'); + channelEl.style.fontSize = '14px'; + channelEl.style.opacity = '0.8'; + channelEl.style.marginBottom = '12px'; + channelEl.textContent = item.channel || ''; + channelEl.style.display = 'block'; + channelEl.style.marginTop = '2px'; + + middle.appendChild(headline); + middle.appendChild(channelEl); + + const summaryHTML = document.createElement('div'); + summaryHTML.classList.add('summary'); + summaryHTML.style.display = '-webkit-box'; + summaryHTML.style.webkitBoxOrient = 'vertical'; + summaryHTML.style.overflow = 'hidden'; + summaryHTML.style.transition = 'max-height 0.2s'; + + function renderSummaryContent() { + const text = summaryFields[lang]; + summaryHTML.innerHTML = ''; + if (text && text.trim()) { + summaryHTML.innerHTML = markdownToHTML(text); + } else { + const missingMsg = document.createElement('span'); + missingMsg.textContent = ( + lang === 'de' ? 'German not available. ' : + lang === 'jp' ? 'Japanese not available. ' : + 'Not available. ' + ); + summaryHTML.appendChild(missingMsg); + } + if (!expanded) { + summaryHTML.style.webkitLineClamp = '2'; + summaryHTML.style.maxHeight = '2.8em'; + } else { + summaryHTML.style.webkitLineClamp = ''; + summaryHTML.style.maxHeight = ''; + } + } + + middle.appendChild(summaryHTML); + + entry.appendChild(left); + entry.appendChild(middle); + + summariesContainer.appendChild(entry); + + function applyCollapsedStyle() { + if (!expanded) { + entry.classList.add('collapsed'); + arrow.textContent = '▶'; + } else { + entry.classList.remove('collapsed'); + arrow.textContent = '▼'; + } + renderSummaryContent(); + } + applyCollapsedStyle(); + + middle.addEventListener('click', () => { + if (!expanded) { + expanded = true; + entryUiState[item.id].expanded = true; + applyCollapsedStyle(); + } + }); + + headline.addEventListener('click', (e) => { + if (expanded) { + expanded = false; + entryUiState[item.id].expanded = false; + applyCollapsedStyle(); + e.stopPropagation(); + } + }); + }); + + Object.keys(entryUiState).forEach(id => { + if (!renderedIds.has(Number(id))) { + delete entryUiState[id]; + } + }); + + setActionLinksDisabled(isLoading); + } + + function markdownToHTML(text) { + text = text.replace(/<\/think(?:ing)?>[^\S\n]*\n+[^\S\n]*/gi, ''); + text = text.replace( + /(^|\n)\s*[\s\S]*?<\/think(?:ing)?>\s*(\n\s*\n)?/gi, + (_, lead) => (lead ? '\n' : '') + ); + + let tmp = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); + tmp = tmp.replace( + /(^|\n)\s*[\s\S]*?<\/think(?:ing)?>\s*(?=\n|$)/gi, + (_, lead) => (lead ? '\n' : '') + ); + + const codeblocks = []; + const placeholder = idx => `@@CODEBLOCK${idx}@@`; + tmp = tmp.replace(/```([\s\S]*?)```/g, (_, code) => { + codeblocks.push(code); + return placeholder(codeblocks.length - 1); + }); + + let escaped = tmp + .replace(/&/g, '&') + .replace(//g, '>'); + + escaped = escaped + .replace(/^#### (.+)$/gm, '

$1

') + .replace(/^### (.+)$/gm, '

$1

') + .replace(/^## (.+)$/gm, '

$1

') + .replace(/^# (.+)$/gm, '

$1

'); + + escaped = escaped.replace( + /(^|\n)([ \t]*\* .+(?:\n[ \t]*\* .+)*)/g, + (_, lead, listBlock) => { + const items = listBlock + .split(/\n/) + .map(line => line.replace(/^[ \t]*\*\s+/, '').trim()) + .map(item => `
  • ${item}
  • `) + .join(''); + return `${lead}
      ${items}
    `; + } + ); + + let html = escaped + .replace(/\*\*(.+?)\*\*/g, '$1') + .replace(/(?$1') + .replace(/`(.+?)`/g, '$1'); + + html = html.replace(/@@CODEBLOCK(\d+)@@/g, (_, idx) => { + const code = codeblocks[Number(idx)]; + return `
    ${code}
    `; + }); + + html = html.replace(/\n*(.*?<\/h[1-3]>)\n*/g, '$1\n'); + html = html.replace(/\n/g, '
    '); + html = html + .replace(/
    \s*()/g, '$1') + .replace(/(<\/h[1-3]>)\s*
    /g, '$1'); + + return html; + } + + function setActionLinksDisabled(disabled) { + document.querySelectorAll('.delete-entry-button').forEach(button => { + if (disabled) { + button.disabled = true; + button.style.opacity = '0.5'; + } else { + button.disabled = false; + button.style.opacity = ''; + } + }); + document.querySelectorAll('.left button').forEach(btn => { + btn.disabled = disabled; + btn.style.opacity = disabled ? '0.5' : ''; + }); + } + + function updatePaginationControls() { + if (!fullSummaries || fullSummaries.length <= PAGE_SIZE) { + paginationTop.style.display = 'none'; + paginationBottom.style.display = 'none'; + return; + } + paginationTop.style.display = 'flex'; + paginationBottom.style.display = 'flex'; + const totalPages = Math.ceil(fullSummaries.length / PAGE_SIZE); + + const buildNav = (container) => { + container.innerHTML = ''; + + const prevBtn = document.createElement('button'); + prevBtn.textContent = '«'; + prevBtn.disabled = currentPage === 1; + prevBtn.addEventListener('click', () => { + if (currentPage > 1) { + showPage(currentPage - 1); + updatePaginationControls(); + } + }); + container.appendChild(prevBtn); + + for (let i = 1; i <= totalPages; i += 1) { + const btn = document.createElement('button'); + btn.textContent = i; + if (i === currentPage) { + btn.classList.add('active'); + } + btn.addEventListener('click', () => { + showPage(i); + updatePaginationControls(); + }); + container.appendChild(btn); + } + + const nextBtn = document.createElement('button'); + nextBtn.textContent = '»'; + nextBtn.disabled = currentPage === totalPages; + nextBtn.addEventListener('click', () => { + if (currentPage < totalPages) { + showPage(currentPage + 1); + updatePaginationControls(); + } + }); + container.appendChild(nextBtn); + }; + + buildNav(paginationTop); + buildNav(paginationBottom); + } + + function showPage(page) { + const totalPages = Math.ceil(fullSummaries.length / PAGE_SIZE); + currentPage = Math.max(1, Math.min(page, totalPages || 1)); + const start = (currentPage - 1) * PAGE_SIZE; + const end = start + PAGE_SIZE; + renderSummaries(fullSummaries.slice(start, end)); + } + + function setSummaries(list) { + fullSummaries = list || []; + const totalPages = Math.ceil(fullSummaries.length / PAGE_SIZE); + if (currentPage > totalPages) { + currentPage = Math.max(1, totalPages); + } + showPage(currentPage); + updatePaginationControls(); + } + + try { + const models = await window.api.getModels(); + modelSelect.innerHTML = ''; + const hasMistral = Array.isArray(models) && models.includes('mistral:latest'); + const placeholder = document.createElement('option'); + placeholder.disabled = true; + placeholder.value = ''; + placeholder.innerText = 'Select model'; + modelSelect.appendChild(placeholder); + if (Array.isArray(models)) { + models.forEach(name => { + const option = document.createElement('option'); + option.value = name; + option.innerText = name; + modelSelect.appendChild(option); + }); + } + const saved = localStorage.getItem('selectedModel'); + let toSelect = ''; + if (saved && models.includes(saved)) { + toSelect = saved; + } else if (hasMistral) { + toSelect = 'mistral:latest'; + } + if (toSelect) { + modelSelect.value = toSelect; + placeholder.selected = false; + } else { + placeholder.selected = true; + } + } catch (err) { + console.error('Error loading models:', err); + modelSelect.innerHTML = ''; + const placeholder = document.createElement('option'); + placeholder.disabled = true; + placeholder.selected = true; + placeholder.value = ''; + placeholder.innerText = 'Select model'; + modelSelect.appendChild(placeholder); + } + + modelSelect.addEventListener('change', () => { + localStorage.setItem('selectedModel', modelSelect.value); + }); + + window.api.getSummaries().then(setSummaries).catch(console.error); + + form.addEventListener('submit', (e) => { + e.preventDefault(); + const url = urlInput.value.trim(); + const useWhisper = whisperCheckbox.checked; + const autoTranslate = autoTranslateCheckbox.checked; + if (!url || isLoading) { + return; + } + + isLoading = true; + summarizeButton.disabled = true; + setLoadingMessage('Summarizing…'); + setActionLinksDisabled(true); + + const selectedModel = modelSelect.value; + window.api.summarizeVideo(url, useWhisper, selectedModel) + .then((newEntry) => { + if (!newEntry || !newEntry.id) { + return window.api.getSummaries().then(setSummaries); + } + + entryUiState[newEntry.id] = { expanded: true, lang: 'en' }; + + if (!autoTranslate) { + return window.api.getSummaries().then(setSummaries); + } + + let translationsOk = true; + setLoadingMessage('Translating to German (DE)…'); + return window.api.translateSummary(newEntry.id, 'de', selectedModel) + .then(() => { + setLoadingMessage('Translating to Japanese (JP)…'); + return window.api.translateSummary(newEntry.id, 'jp', selectedModel); + }) + .catch(err => { + translationsOk = false; + alert('Error translating summary: ' + err.message); + }) + .then(() => { + entryUiState[newEntry.id] = { + expanded: true, + lang: translationsOk ? 'jp' : 'en' + }; + return window.api.getSummaries().then(setSummaries); + }); + }) + .catch(err => { + alert('Error summarizing video: ' + err.message); + }) + .finally(() => { + loadingIndicator.style.display = 'none'; + loadingIndicator.textContent = 'Loading…'; + summarizeButton.disabled = false; + isLoading = false; + setActionLinksDisabled(false); + urlInput.value = ''; + }); + }); + + window.api.onSummarizeProgress(line => { + if (!isLoading || !line) { + return; + } + setLoadingMessage(line); + }); +}); diff --git a/youtube_summarizer.py b/youtube_summarizer.py new file mode 100644 index 0000000..0dec6f7 --- /dev/null +++ b/youtube_summarizer.py @@ -0,0 +1,693 @@ +#!/usr/bin/env python3 +""" +youtube_summarizer.py + +This script accepts a YouTube URL, retrieves a transcript either via the +YouTube API or via Whisper (depending on the flags), generates a concise +summary using Ollama and optionally writes a JSON descriptor containing +metadata about the processed video. The metadata includes the video +identifier, original URL, title, downloaded thumbnail filename, audio +filename, transcript filename and the summary text itself. The script +has been adapted from an earlier command‑line tool to better integrate +with a GUI. The summarizer now returns the summary text instead of +printing it directly and supports additional command line arguments for +JSON output. + +Usage: + python3 youtube_summarizer.py [--no-ai] [--output-json ] + +Options: + --no-ai Use the classic API/subtitle workflow instead of Whisper for + transcription (default uses Whisper). + --output-json Specify a file path where metadata about the processed video + will be written as JSON. If omitted the metadata is + printed to standard output in JSON format. + +This script relies on yt_dlp for fetching video metadata, requests for +thumbnail download and the whisper and youtube_transcript_api packages for +transcription. + +""" + +import sys +import os +import re +import time +import json +import glob +import subprocess +import multiprocessing +import requests +import yt_dlp +import webvtt +from datetime import datetime +from typing import List, Tuple, Optional +from xml.parsers.expat import ExpatError +from xml.etree.ElementTree import ParseError +from youtube_transcript_api import YouTubeTranscriptApi +from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound + +try: + import whisper +except ImportError: + whisper = None # handle gracefully if whisper isn't installed + +# ----------------------- +# Konfiguration & Flags +# ----------------------- +DEBUG = False + +# Whisper‑Settings +NUM_SLICES = 8 +OVERLAP_SEC = 1 +MAX_OVERLAP_WORDS = 7 +WHISPER_MODEL = "small" # e.g. "small", "medium", "large-v3" … + + +def debug_print(*args, **kwargs): + """Print debug messages when DEBUG is enabled.""" + if DEBUG: + print("[DEBUG]", *args, **kwargs, file=sys.stderr) + + +def get_ffmpeg_binary() -> str: + """Return the ffmpeg executable path, preferring a bundled override.""" + value = os.environ.get("YTS_FFMPEG", "").strip() + return value or "ffmpeg" + + +def get_ffprobe_binary() -> str: + """Return the ffprobe executable path, preferring a bundled override.""" + value = os.environ.get("YTS_FFPROBE", "").strip() + return value or "ffprobe" + + +def get_whisper_download_root() -> Optional[str]: + """Return a stable Whisper cache directory when one is configured.""" + value = os.environ.get("YTS_WHISPER_CACHE_DIR", "").strip() + if not value: + return None + os.makedirs(value, exist_ok=True) + return value + + +# ----------------------- +# 1) Utilities +# ----------------------- + +def extract_video_id(url: str) -> Optional[str]: + """Extract the eleven character YouTube video ID from a URL.""" + debug_print(f"Extracting video ID from URL: {url}") + m = re.search(r'(?:v=|youtu\.be/)([0-9A-Za-z_-]{11})', url) + vid = m.group(1) if m else None + debug_print(f"Video ID: {vid}") + return vid + + +def get_transcript_api(video_id: str) -> str: + """ + Fetch transcript via YouTubeTranscriptApi, trying 'en', then 'de', then any available language. + """ + debug_print(f"Trying transcript API for {video_id}") + + # Try English first + try: + data = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"]) + text = " ".join(item["text"] for item in data) + debug_print(f"Transcript fetched in EN, length {len(text)} chars") + return text + except (TranscriptsDisabled, NoTranscriptFound): + pass + + # Try German + try: + data = YouTubeTranscriptApi.get_transcript(video_id, languages=["de"]) + text = " ".join(item["text"] for item in data) + debug_print(f"Transcript fetched in DE, length {len(text)} chars") + return text + except (TranscriptsDisabled, NoTranscriptFound): + pass + + # Try any available language (prefer auto-generated if possible) + try: + tx_list = YouTubeTranscriptApi.list_transcripts(video_id) + # Try manually created first + for tr in tx_list: + try: + if not tr.is_generated: + data = tr.fetch() + text = " ".join(item["text"] for item in data) + debug_print(f"Transcript fetched: {tr.language_code} (manual)") + return text + except Exception: + continue + # Then fallback to auto-generated + for tr in tx_list: + try: + if tr.is_generated: + data = tr.fetch() + text = " ".join(item["text"] for item in data) + debug_print(f"Transcript fetched: {tr.language_code} (auto-generated)") + return text + except Exception: + continue + except Exception as e: + debug_print(f"list_transcripts failed: {e}") + + # Nothing found, fail with info + raise SystemExit( + "No transcript available in EN, DE or any other language via API. " + "Try 'Use Whisper' mode or wait if you hit a YouTube rate limit." + ) + + +def vtt_to_lines(path: str) -> List[str]: + """Convert a VTT file into deduplicated lines of text.""" + cues, last = [], None + for caption in webvtt.read(path): + cur = caption.text.replace("\n", " ").strip() + if not cur or cur == last: + continue + if last and cur.startswith(last): + cur = cur[len(last):].strip(" -") + cues.append(cur) + last = caption.text.replace("\n", " ").strip() + return cues + + +def remove_consecutive_line_duplicates(lines: List[str]) -> List[str]: + """Remove consecutive duplicate lines.""" + deduped, last = [], None + for l in lines: + if l != last: + deduped.append(l) + last = l + return deduped + + +def remove_phrase_duplicates_from_lines(lines: List[str]) -> List[str]: + """Remove duplicate phrases within lines (used for subtitle deduplication).""" + out, last = [], None + for l in lines: + if last and l.startswith(last): + trimmed = l[len(last):].strip() + if trimmed: + out.append(trimmed) + else: + out.append(l) + last = l + return out + + +def remove_empty_lines(lines: List[str]) -> List[str]: + """Remove empty lines.""" + return [l for l in lines if l.strip()] + + +def get_subtitles_via_yt_dlp(url: str) -> Optional[str]: + """Try to fetch subtitles via yt_dlp when API transcripts fail.""" + debug_print(f"Fetching metadata via yt‑dlp for URL: {url}") + opts = {'skip_download': True, 'quiet': True, 'ignoreerrors': True} + with yt_dlp.YoutubeDL(opts) as ydl: + info = ydl.extract_info(url, download=False) + available = list(info.get('subtitles', {})) + list(info.get('automatic_captions', {})) + debug_print(f"Available subtitle languages: {available}") + if not available: + return None + + priority = ['en', 'es', 'fr', 'de', 'zh', 'ja'] + langs = [l for l in priority if l in available] + [l for l in available if l not in priority] + + for lang in langs: + debug_print(f"Trying subtitle language {lang}") + dl_opts = { + 'skip_download': True, + 'writesubtitles': True, + 'writeautomaticsub': True, + 'subtitlesformat': 'vtt', + 'subtitlelangs': [lang], + 'outtmpl': "transcript.%(language)s.%(ext)s", + 'quiet': True, + } + with yt_dlp.YoutubeDL(dl_opts) as ydl: + ydl.download([url]) + + files = [f for f in os.listdir('.') if f.startswith('transcript') and f.endswith('.vtt')] + if not files: + continue + path = files[0] + try: + lines = vtt_to_lines(path) + lines = remove_consecutive_line_duplicates(lines) + lines = remove_phrase_duplicates_from_lines(lines) + lines = remove_empty_lines(lines) + text = "\n".join(lines) + debug_print(f"Subtitle text length: {len(text)}") + return text + except Exception as e: + debug_print(f"Subtitle parsing failed: {e}") + return None + + +# -------------------------- +# 2) Whisper‑based workflow +# -------------------------- + +def _cleanup_audio_artifacts(vid: str) -> None: + """Remove partial audio download artifacts for the given video id.""" + for path in glob.glob(f"audio_{vid}.*"): + # Keep any existing mp3; it may belong to a previous summary. + if path.endswith(".mp3"): + continue + try: + os.remove(path) + except OSError: + pass + + +def _download_audio_with_yt_dlp(url: str, vid: str, extractor_args: Optional[dict] = None) -> str: + """Download audio via yt_dlp and extract to wav.""" + audio_fn = f"audio_{vid}.wav" + opts = { + "format": "bestaudio/best", + "outtmpl": f"audio_{vid}.%(ext)s", + "quiet": True, + "noprogress": True, + "nopart": True, + "continuedl": False, + "overwrites": True, + "noplaylist": True, + "retries": 3, + "fragment_retries": 3, + "postprocessors": [{ + "key": "FFmpegExtractAudio", + "preferredcodec": "wav", + }], + } + if extractor_args: + opts["extractor_args"] = extractor_args + with yt_dlp.YoutubeDL(opts) as ydl: + ydl.download([url]) + if not os.path.exists(audio_fn): + raise RuntimeError("yt_dlp completed but wav file was not created") + return audio_fn + + +def download_video_audio(url: str, vid: str) -> str: + """Download the best available audio for a YouTube video.""" + print(f"📥 Downloading audio from {url} …") + + # Clean up any stale partials that can trigger HTTP 416 resume errors. + _cleanup_audio_artifacts(vid) + + attempts = [ + ("android player client", {"youtube": {"player_client": ["android"]}}), + ("default player client", None), + ] + + last_err = None + for label, extractor_args in attempts: + try: + debug_print(f"yt_dlp audio attempt: {label}") + audio_fn = _download_audio_with_yt_dlp(url, vid, extractor_args) + debug_print(f"Audio saved as {audio_fn}") + return audio_fn + except Exception as e: + last_err = e + debug_print(f"yt_dlp attempt failed ({label}): {e}") + _cleanup_audio_artifacts(vid) + + raise RuntimeError("Audio download failed after multiple attempts") from last_err + + +def get_audio_duration(path: str) -> float: + """Return the duration of an audio file using ffprobe.""" + res = subprocess.run([ + get_ffprobe_binary(), "-v", "error", "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", path + ], capture_output=True, text=True) + return float(res.stdout.strip()) + + +def slice_audio(audio_path: str, vid: str) -> List[Tuple[str, float, float]]: + """Slice a long audio file into overlapping chunks for Whisper.""" + print("Slicing audio …") + duration = get_audio_duration(audio_path) + length = duration / NUM_SLICES + slices = [] + for i in range(NUM_SLICES): + start = max(0, i * length - (OVERLAP_SEC if i > 0 else 0)) + end = min(duration, (i + 1) * length + (OVERLAP_SEC if i < NUM_SLICES - 1 else 0)) + fn = f"audio_{vid}_slice_{i:02d}.wav" + subprocess.run([ + get_ffmpeg_binary(), "-y", "-hide_banner", "-loglevel", "error", + "-ss", str(start), "-to", str(end), + "-i", audio_path, "-acodec", "copy", fn + ], check=True) + debug_print(f" slice {i}: {start:.1f}s→{end:.1f}s ({fn})") + slices.append((fn, start, end)) + return slices + + +def transcribe_slice(args: Tuple[str, int, str, str]) -> str: + """Transcribe a single audio slice using Whisper and save to a text file.""" + slice_path, idx, model_name, vid = args + if whisper is None: + raise RuntimeError("Whisper package is required but not installed") + m = whisper.load_model(model_name, download_root=get_whisper_download_root()) + res = m.transcribe(slice_path, task="transcribe") + out = f"transcript_{vid}_slice_{idx:02d}.txt" + with open(out, "w", encoding="utf-8") as f: + f.write(res["text"]) + debug_print(f"Transcribed slice {idx} → {out}") + return out + + +def merge_transcripts(files: List[str]) -> str: + """Merge transcribed slices by eliminating overlapping words.""" + merged, prev = [], [] + for i, fn in enumerate(files): + words = open(fn, encoding="utf-8").read().split() + if i > 0: + p_tail = prev[-MAX_OVERLAP_WORDS:] + c_head = words[:MAX_OVERLAP_WORDS] + L = min(len(p_tail), len(c_head)) + best = 0 + for n in range(L, 4, -1): + if p_tail[-n:] == c_head[:n]: + best = n + break + if best: + debug_print(f" overlap {best} words between slices {i-1}↔{i}") + words = words[best:] + merged += words + prev = words + text = " ".join(merged) + debug_print(f"Merged transcript: {len(text)} chars, {len(merged)} words") + return text + + +def clean_temp(pattern: str) -> None: + """Remove temporary files matching the given glob pattern.""" + for f in glob.glob(pattern): + try: + os.remove(f) + except Exception: + pass + + +def whisper_transcript(url: str, vid: str) -> str: + """Run the Whisper pipeline and return the final transcript text.""" + audio = download_video_audio(url, vid) + slices = slice_audio(audio, vid) + print("✍️ Transcribing using Whisper...", flush=True) + args = [(p, i, WHISPER_MODEL, vid) for i, (p, _, _) in enumerate(slices)] + with multiprocessing.Pool(len(slices)) as pool: + t_files = pool.map(transcribe_slice, args) + text = merge_transcripts(t_files) + clean_temp(f"audio_{vid}_slice_*.wav") + clean_temp(f"transcript_{vid}_slice_*.txt") + # Leave the original audio file so it can be referenced by the GUI + return text + + +# ----------------------- +# Ollama‑Summarizer +# ----------------------- + +def summarize_with_ollama(title: str, transcript: str, model: str = "mistral:latest") -> str: + """ + Send video title and transcript text to Ollama and return the summary string. + """ + debug_print(f"Preparing summary with model {model}, transcript length={len(transcript)}") + prompt = ( + "You are an expert summarizer. Summarize the following video concisely:\n\n" + f"Title: {title}\n\n" + f"Transcript:\n{transcript}\n\n" + "Summary:" + ) + debug_print(prompt) + payload = { + "model": model, + "messages": [ + {"role": "system", "content": "You are an intelligent summarizer."}, + {"role": "user", "content": prompt} + ], + "stream": True + } + debug_print("Sending request to Ollama …") + resp = requests.post("http://localhost:11434/api/chat", json=payload, stream=True) + debug_print(f"Ollama status: {resp.status_code}") + summary = "" + for line in resp.iter_lines(decode_unicode=True): + if not line: + continue + try: + msg = json.loads(line).get("message", {}).get("content", "") + summary += msg + except Exception: + continue + debug_print(f"Summary generated, length={len(summary)}") + return summary + + +# ----------------------- +# Video metadata and thumbnail download +# ----------------------- + +def fetch_video_metadata(url: str) -> Tuple[str, str, str]: + """ + Fetch the title, thumbnail URL and video ID for a YouTube URL using yt_dlp. + Returns a tuple: (video_id, title, thumbnail_url) + """ + with yt_dlp.YoutubeDL({'quiet': True}) as ydl: + info = ydl.extract_info(url, download=False) + vid = info.get('id') + title = info.get('title', f"Video {vid}") + thumbnail_url = info.get('thumbnail') + return vid, title, thumbnail_url + + +def fetch_channel_name(url: str) -> Optional[str]: + """ + Retrieve the channel or uploader name for a YouTube video using yt_dlp. + Returns None if it cannot be determined. + """ + try: + with yt_dlp.YoutubeDL({'quiet': True}) as ydl: + info = ydl.extract_info(url, download=False) + # Try channel, uploader, then return None + return info.get('channel') or info.get('uploader') + except Exception as e: + debug_print(f"Failed to fetch channel name: {e}") + return None + + +def download_thumbnail(vid: str, thumbnail_url: str) -> Optional[str]: + """ + Download the thumbnail image given its URL and save it as thumb_.. + Returns the local filename or None if download fails. + """ + if not thumbnail_url: + return None + try: + response = requests.get(thumbnail_url, timeout=10) + response.raise_for_status() + # Determine extension from content type or URL + ext = None + if 'content-type' in response.headers: + ctype = response.headers['content-type'] + if 'jpeg' in ctype: + ext = 'jpg' + elif 'png' in ctype: + ext = 'png' + if ext is None: + ext = thumbnail_url.split('.')[-1].split('?')[0] + filename = f"thumb_{vid}.{ext}" + with open(filename, 'wb') as f: + f.write(response.content) + debug_print(f"Thumbnail downloaded as {filename}") + return filename + except Exception as e: + debug_print(f"Thumbnail download failed: {e}") + return None + + +# ----------------------- +# Main +# ----------------------- + +def process_video(url: str, use_whisper: bool, model: str = "mistral:latest", output_json: Optional[str] = None) -> dict: + """ + Core processing routine. Retrieves metadata, obtains transcript via the + selected workflow, generates a summary using Ollama and writes the + transcript, thumbnail and audio (converted to mp3) to disk. Returns a + dictionary containing metadata which may also be dumped to a JSON file if + output_json is provided. + + Parameters + ---------- + url : str + The YouTube video URL. + use_whisper : bool + If True, use the Whisper transcription workflow; if False, use the + classic API/subtitle workflow. + model : str, optional + The Ollama model name to use for summarization. Defaults to + "mistral:latest". + output_json : str or None, optional + If provided, path to a file where JSON metadata should be written. + + Returns + ------- + dict + A dictionary containing metadata about the processed video. + """ + vid, title, thumb_url = fetch_video_metadata(url) + if not vid: + raise SystemExit("Invalid YouTube URL.") + + # Fetch the channel/uploader name + channel_name = fetch_channel_name(url) + + # Fetch transcript + if use_whisper: + print("🤖 Using Whisper parallel transcription…") + transcript_text = whisper_transcript(url, vid) + if not transcript_text.strip(): + raise SystemExit("Whisper transcription failed or empty.") + else: + print("▶️ Using classic API/subtitle workflow…") + # Try API first + try: + transcript_text = get_transcript_api(vid) + except Exception: + print("API failed, falling back to subtitles…") + transcript_text = get_subtitles_via_yt_dlp(url) + if not transcript_text: + raise SystemExit("No transcript/subtitles available.") + + # Save transcript to file + transcript_filename = f"transcript_{vid}.txt" + with open(transcript_filename, 'w', encoding='utf-8') as f: + f.write(transcript_text) + debug_print(f"Transcript saved to {transcript_filename}") + + # Download thumbnail + thumbnail_filename = download_thumbnail(vid, thumb_url) + + # Determine audio filename if generated and convert to mp3 + audio_filename = None + if use_whisper: + wav_name = f"audio_{vid}.wav" + mp3_name = f"audio_{vid}.mp3" + # Convert to mp3 using ffmpeg if wav exists + if os.path.exists(wav_name): + try: + subprocess.run([ + get_ffmpeg_binary(), '-y', '-i', wav_name, + '-codec:a', 'libmp3lame', '-qscale:a', '2', + mp3_name + ], check=True) + os.remove(wav_name) + debug_print(f"Converted {wav_name} to {mp3_name} and removed wav") + audio_filename = mp3_name + except Exception as e: + debug_print(f"Failed to convert audio to mp3: {e}") + # fallback: keep wav + audio_filename = wav_name + else: + # If wav file doesn't exist yet (perhaps removed elsewhere), do not set audio + audio_filename = None + + # Generate summary + print("✍️ Generating summary with Ollama…", flush=True) + summary_text = summarize_with_ollama(title, transcript_text, model) + + # Create metadata dictionary + meta = { + 'timestamp': datetime.utcnow().isoformat() + 'Z', + 'video_id': vid, + 'url': url, + 'video_name': title, + 'channel': channel_name, + 'thumbnail': thumbnail_filename, + 'audio': audio_filename, + 'transcript': transcript_filename, + 'summary': summary_text + } + + # Write JSON output if requested + if output_json: + with open(output_json, 'w', encoding='utf-8') as f: + json.dump(meta, f, ensure_ascii=False, indent=2) + debug_print(f"Metadata written to {output_json}") + return meta + + +def rewrite_summary(title: str, transcript_file: str, model: str = "mistral:latest", output_json: Optional[str] = None) -> dict: + """ + Regenerate a summary from an existing transcript file using the specified model. + + Parameters + ---------- + transcript_file : str + Path to a text file containing the transcript. + model : str, optional + Name of the Ollama model to use for summarization. + output_json : str or None, optional + If provided, write the resulting summary dictionary to this file. + + Returns + ------- + dict + A dictionary containing just the summary. + """ + if not os.path.exists(transcript_file): + raise SystemExit(f"Transcript file not found: {transcript_file}") + with open(transcript_file, 'r', encoding='utf-8') as f: + transcript_text = f.read() + debug_print(f"Rewriting summary using model {model} for {transcript_file}") + summary_text = summarize_with_ollama(title, transcript_text, model) + meta = {'summary': summary_text} + if output_json: + with open(output_json, 'w', encoding='utf-8') as f: + json.dump(meta, f, ensure_ascii=False, indent=2) + debug_print(f"Summary written to {output_json}") + return meta + + +def main(): + import argparse + parser = argparse.ArgumentParser(description="YouTube → Transcript → Ollama Summary") + parser.add_argument('url', help="YouTube‑Video‑URL") + parser.add_argument('--no-ai', action='store_true', + help="Use classic API/subtitle workflow instead of Whisper") + parser.add_argument('--output-json', type=str, default=None, + help="Write metadata JSON to the specified file instead of STDOUT") + parser.add_argument('--model', type=str, default='mistral:latest', + help="Ollama model to use for summarization (default: mistral:latest)") + parser.add_argument('--transcript-file', type=str, default=None, + help="Path to an existing transcript file; when provided the script will skip transcription and only generate a summary.") + args = parser.parse_args() + + use_whisper = not args.no_ai + + try: + # If a transcript file is provided, skip the normal processing and only rewrite summary + if args.transcript_file: + vid, title, _ = fetch_video_metadata(args.url) + meta = rewrite_summary(title, args.transcript_file, args.model, args.output_json) + else: + meta = process_video(args.url, use_whisper, args.model, args.output_json) + # If no JSON output specified, print metadata as JSON to stdout + if not args.output_json: + print(json.dumps(meta, ensure_ascii=False, indent=2)) + except SystemExit as e: + # Provide a friendly exit message without a stacktrace + print(str(e)) + sys.exit(1) + + +if __name__ == '__main__': + main()