Add kokoro_ja.py, requirements.txt, and run.sh; update .gitignore
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -25,3 +25,4 @@ tmp*
|
||||
*.tmp
|
||||
*.swp
|
||||
*.wav
|
||||
.DS_Store
|
||||
|
||||
42
kokoro_ja.py
Normal file
42
kokoro_ja.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import argparse, os
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
from kokoro import KPipeline
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("text", help="Japanese text")
|
||||
ap.add_argument("--voice", default="jf_alpha", help="e.g. jf_alpha, jf_tebukuro, jm_kumo ...")
|
||||
ap.add_argument("--speed", type=float, default=1.0, help="1.0 = normal, >1 faster, <1 slower")
|
||||
ap.add_argument("--out", default="out.wav")
|
||||
args = ap.parse_args()
|
||||
|
||||
# Japanese pipeline
|
||||
pipeline = KPipeline(lang_code="j") # Japanese [oai_citation:2‡Hugging Face](https://huggingface.co/hexgrad/Kokoro-82M/blob/938257c07e326d534677886ca13829b39347fff7/README.md)
|
||||
|
||||
# Split at Japanese punctuation to avoid “rushing” long passages
|
||||
# (Kokoro voices often behave best around moderate chunk sizes.) [oai_citation:3‡Hugging Face](https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md)
|
||||
split_pattern = r"(?<=[。!?\n])\s*"
|
||||
|
||||
audio_parts = []
|
||||
generator = pipeline(args.text, voice=args.voice, speed=args.speed, split_pattern=split_pattern)
|
||||
for _, _, audio in generator:
|
||||
# audio can be a torch.Tensor or already a numpy array depending on device/backend
|
||||
if hasattr(audio, "detach"): # torch.Tensor
|
||||
audio = audio.detach()
|
||||
if hasattr(audio, "cpu"): # move to CPU if needed
|
||||
audio = audio.cpu()
|
||||
if hasattr(audio, "numpy"): # torch -> numpy
|
||||
audio = audio.numpy()
|
||||
|
||||
audio_parts.append(np.asarray(audio, dtype=np.float32))
|
||||
|
||||
if not audio_parts:
|
||||
raise SystemExit("No audio generated (empty input?)")
|
||||
|
||||
audio_all = np.concatenate(audio_parts, axis=0)
|
||||
sf.write(args.out, audio_all, 24000)
|
||||
print(f"Wrote: {args.out} (24kHz)")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
kokoro>=0.9.4
|
||||
soundfile
|
||||
misaki[ja]
|
||||
unidic
|
||||
fugashi
|
||||
24
run.sh
Executable file
24
run.sh
Executable file
@@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
python3.11 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
|
||||
python -m pip install -U pip setuptools wheel
|
||||
python -m pip install -r requirements.txt
|
||||
|
||||
# Download UniDic only if mecabrc is missing (fast/no-op if already present)
|
||||
python - <<'PY'
|
||||
import os, sys, subprocess
|
||||
import unidic
|
||||
|
||||
mecabrc = os.path.join(unidic.DICDIR, "mecabrc")
|
||||
if not os.path.exists(mecabrc):
|
||||
print("UniDic not downloaded yet -> downloading (this can be large)...")
|
||||
subprocess.check_call([sys.executable, "-m", "unidic", "download"])
|
||||
else:
|
||||
print("UniDic already present:", mecabrc)
|
||||
PY
|
||||
|
||||
# run your tts
|
||||
PYTORCH_ENABLE_MPS_FALLBACK=1 python kokoro_ja.py "$@"
|
||||
Reference in New Issue
Block a user