Remplace la voicebank générée par Kokoro (timbre anglais sur français phonémisé -> accent que Qwen3 clonait) par 41 vraies voix FR issues de CML-TTS (livres audio studio) : 1 narrateur dédié, 18F/14M nommées, 4F/4M anonymes réservées. - scripts/import_voices.py : import multi-shards parquet, 1 clip/locuteur (le plus propre via levenshtein), genre estimé par F0 (YIN, anti-octave), filtre débit de parole (ref_text aligné sur l'audio). - VoiceEntry.anonymous + assign_voices : les figurants « anonyme (...) » tirent dans un pool réservé, jamais mélangé avec les voix nommées ; narrateur dédié (fr_narrator remplace fr_f_siwis). - dedup._anon_attrs : genre/âge déduits du nom anonyme (bon genre de voix). - tts/qwen3.py : garde-fou anti-dérive (rejette/réessaie les sorties en boucle ou coupées en estimant la durée plausible du chunk). Limite connue : Qwen3 ne sait pas synthétiser les fragments d'1-2 mots (incises, titres) -> trous ; à traiter (repli Kokoro ou fusion des incises). Inclut aussi du travail en cours antérieur (refacto backend LLM pluggable mlx/lmstudio, benchmark, ajustements frontend/API). Claude-Session: https://claude.ai/code/session_01XSVvcy1mfb4k1xDgib9vVU
343 lines
11 KiB
Python
343 lines
11 KiB
Python
"""Application FastAPI : pilote le pipeline et sert l'UI.
|
|
|
|
Toutes les routes lourdes (analyse, casting, rendu) sont *enfilees* dans
|
|
l'orchestrateur et rendent la main immediatement ; l'avancement arrive par
|
|
WebSocket. Les operations rapides (preview de voix) tournent dans un threadpool.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import io
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import soundfile as sf
|
|
from fastapi import FastAPI, HTTPException, UploadFile, WebSocket, WebSocketDisconnect
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.responses import FileResponse, Response
|
|
from fastapi.staticfiles import StaticFiles
|
|
from pydantic import BaseModel
|
|
|
|
from ..config import DATA_DIR, book_data_dir, book_output_dir, ensure_dirs
|
|
from ..epub.parser import load_book, load_chapter_text, parse_epub
|
|
from ..models import Cast, ChapterAnalysis, Character, Pronunciation
|
|
from ..pipeline.orchestrator import load_state, orchestrator
|
|
from ..settings import Settings, get_settings, save_settings
|
|
from ..store import artifacts
|
|
from ..util import slugify
|
|
from .ws import manager
|
|
|
|
app = FastAPI(title="InkFlow API")
|
|
app.add_middleware(
|
|
CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"],
|
|
)
|
|
|
|
|
|
@app.on_event("startup")
|
|
async def _startup() -> None:
|
|
ensure_dirs()
|
|
manager.bind_loop(asyncio.get_running_loop())
|
|
orchestrator.set_broadcaster(manager.broadcast_threadsafe)
|
|
|
|
|
|
# --- Helpers -----------------------------------------------------------------
|
|
|
|
def _list_book_slugs() -> list[str]:
|
|
if not DATA_DIR.exists():
|
|
return []
|
|
return sorted(p.parent.name for p in DATA_DIR.glob("*/book.json"))
|
|
|
|
|
|
def _book_summary(slug: str) -> dict:
|
|
book = load_book(slug)
|
|
state = load_state(slug)
|
|
rendered = sum(1 for r in state.render.values() if r.mp3)
|
|
return {
|
|
"slug": slug,
|
|
"title": book.title,
|
|
"author": book.author,
|
|
"chapters": len(book.render_chapters),
|
|
"rendered": rendered,
|
|
"cover": f"/api/books/{slug}/cover" if book.cover_file else None,
|
|
}
|
|
|
|
|
|
# --- Bibliotheque / upload ---------------------------------------------------
|
|
|
|
@app.get("/api/books")
|
|
def list_books() -> list[dict]:
|
|
return [_book_summary(s) for s in _list_book_slugs()]
|
|
|
|
|
|
@app.post("/api/books")
|
|
async def upload_book(file: UploadFile) -> dict:
|
|
ensure_dirs()
|
|
uploads = DATA_DIR / "_uploads"
|
|
uploads.mkdir(parents=True, exist_ok=True)
|
|
dest = uploads / (file.filename or "livre.epub")
|
|
dest.write_bytes(await file.read())
|
|
book = await asyncio.to_thread(parse_epub, dest)
|
|
# Initialise l'etat.
|
|
load_state(book.slug)
|
|
return {"slug": book.slug, "title": book.title}
|
|
|
|
|
|
@app.get("/api/books/{slug}")
|
|
def get_book(slug: str) -> dict:
|
|
_require(slug)
|
|
book = load_book(slug)
|
|
return {"book": book.model_dump(mode="json"),
|
|
"state": load_state(slug).model_dump(mode="json")}
|
|
|
|
|
|
@app.get("/api/books/{slug}/cover")
|
|
def get_cover(slug: str):
|
|
book = load_book(slug)
|
|
if not book.cover_file:
|
|
raise HTTPException(404, "pas de couverture")
|
|
return FileResponse(str(book_data_dir(slug) / book.cover_file))
|
|
|
|
|
|
@app.get("/api/books/{slug}/chapters/{index}")
|
|
def get_chapter(slug: str, index: int) -> dict:
|
|
_require(slug)
|
|
book = load_book(slug)
|
|
ch = next((c for c in book.chapters if c.index == index), None)
|
|
if ch is None:
|
|
raise HTTPException(404, "chapitre inconnu")
|
|
out: dict = {"chapter": ch.model_dump(mode="json")}
|
|
apath = artifacts.analysis_path(slug, index)
|
|
if apath.exists():
|
|
out["analysis"] = artifacts.load_analysis(slug, index).model_dump(mode="json")
|
|
elif ch.text_file:
|
|
out["text"] = load_chapter_text(slug, ch).model_dump(mode="json")
|
|
return out
|
|
|
|
|
|
@app.put("/api/books/{slug}/chapters/{index}/analysis")
|
|
def put_analysis(slug: str, index: int, analysis: ChapterAnalysis) -> dict:
|
|
_require(slug)
|
|
if analysis.index != index:
|
|
raise HTTPException(400, "index incoherent")
|
|
artifacts.save_analysis(slug, analysis)
|
|
return {"saved": True}
|
|
|
|
|
|
# --- Etapes (enfilees) -------------------------------------------------------
|
|
|
|
class ChaptersBody(BaseModel):
|
|
chapters: Optional[list[int]] = None
|
|
|
|
|
|
@app.post("/api/books/{slug}/analyze")
|
|
def analyze(slug: str, body: ChaptersBody) -> dict:
|
|
_require(slug)
|
|
orchestrator.run_analyze(slug, body.chapters)
|
|
return {"queued": True}
|
|
|
|
|
|
@app.post("/api/books/{slug}/pronounce")
|
|
def pronounce(slug: str) -> dict:
|
|
_require(slug)
|
|
orchestrator.run_pronounce(slug)
|
|
return {"queued": True}
|
|
|
|
|
|
@app.post("/api/books/{slug}/cast/auto")
|
|
def cast_auto(slug: str) -> dict:
|
|
_require(slug)
|
|
orchestrator.run_cast(slug)
|
|
return {"queued": True}
|
|
|
|
|
|
@app.post("/api/books/{slug}/cast/analyze")
|
|
def cast_analyze(slug: str, body: ChaptersBody) -> dict:
|
|
"""(Re)analyse le casting d'un/des chapitre(s) avec reconciliation."""
|
|
_require(slug)
|
|
orchestrator.run_cast_analyze(slug, body.chapters)
|
|
return {"queued": True}
|
|
|
|
|
|
@app.post("/api/books/{slug}/cast/dedup")
|
|
def cast_dedup(slug: str) -> dict:
|
|
"""Deduplique le casting existant (variantes de noms -> aliases)."""
|
|
_require(slug)
|
|
orchestrator.run_dedup_cast(slug)
|
|
return {"queued": True}
|
|
|
|
|
|
class RenderBody(BaseModel):
|
|
chapters: list[int]
|
|
backend: Optional[str] = None
|
|
mono: bool = False
|
|
|
|
|
|
@app.post("/api/books/{slug}/render")
|
|
def render(slug: str, body: RenderBody) -> dict:
|
|
_require(slug)
|
|
orchestrator.run_render(slug, body.chapters, backend=body.backend, mono=body.mono)
|
|
return {"queued": True}
|
|
|
|
|
|
# --- Casting / prononciation (lecture-ecriture directe) ----------------------
|
|
|
|
@app.get("/api/books/{slug}/cast")
|
|
def get_cast(slug: str) -> dict:
|
|
from ..casting.voicebank import load_voicebank
|
|
_require(slug)
|
|
return {"cast": artifacts.load_cast(slug).model_dump(mode="json"),
|
|
"voicebank": load_voicebank().model_dump(mode="json")}
|
|
|
|
|
|
@app.put("/api/books/{slug}/cast")
|
|
def put_cast(slug: str, cast: Cast) -> dict:
|
|
_require(slug)
|
|
artifacts.save_cast(slug, cast)
|
|
return {"saved": True}
|
|
|
|
|
|
@app.get("/api/books/{slug}/cast/unresolved")
|
|
def get_unresolved_speakers(slug: str) -> dict:
|
|
"""Locuteurs apparaissant dans l'analyse mais rattaches a aucun personnage.
|
|
|
|
Surface les surfaces que la canonicalisation deterministe a refuse de
|
|
trancher, pour que l'utilisateur les aliase/fusionne a la main. Predicat =
|
|
rattachement a un Character (par nom/alias exact ou heuristique), independant
|
|
de l'assignation de voix."""
|
|
from ..casting.dedup import heuristic_match
|
|
from ..epub.parser import load_book
|
|
_require(slug)
|
|
cast = artifacts.load_cast(slug)
|
|
|
|
def resolves(spk: str) -> bool:
|
|
low = spk.lower()
|
|
for ch in cast.characters:
|
|
if ch.name.lower() == low or low in (a.lower() for a in ch.aliases):
|
|
return True
|
|
return isinstance(heuristic_match(spk, cast.characters), Character)
|
|
|
|
agg: dict[str, dict] = {}
|
|
for ch in load_book(slug).chapters:
|
|
if not artifacts.analysis_path(slug, ch.index).exists():
|
|
continue
|
|
for seg in artifacts.load_analysis(slug, ch.index).segments:
|
|
spk = (seg.speaker or "").strip()
|
|
if not spk or spk.lower() in {"narrateur", "inconnu", "?"}:
|
|
continue
|
|
if resolves(spk):
|
|
continue
|
|
row = agg.setdefault(spk, {"speaker": spk, "count": 0, "chapters": []})
|
|
row["count"] += 1
|
|
if ch.index not in row["chapters"]:
|
|
row["chapters"].append(ch.index)
|
|
return {"unresolved": sorted(agg.values(), key=lambda r: -r["count"])}
|
|
|
|
|
|
@app.get("/api/books/{slug}/pronunciation")
|
|
def get_pron(slug: str) -> dict:
|
|
_require(slug)
|
|
return artifacts.load_pronunciation(slug).model_dump(mode="json")
|
|
|
|
|
|
@app.put("/api/books/{slug}/pronunciation")
|
|
def put_pron(slug: str, pron: Pronunciation) -> dict:
|
|
_require(slug)
|
|
artifacts.save_pronunciation(slug, pron)
|
|
return {"saved": True}
|
|
|
|
|
|
# --- Reglages techniques globaux ---------------------------------------------
|
|
|
|
@app.get("/api/settings")
|
|
def read_settings() -> dict:
|
|
return get_settings().model_dump(mode="json")
|
|
|
|
|
|
@app.put("/api/settings")
|
|
def write_settings(settings: Settings) -> dict:
|
|
save_settings(settings)
|
|
return {"saved": True}
|
|
|
|
|
|
@app.get("/api/lmstudio/models")
|
|
def list_lmstudio_models() -> dict:
|
|
"""Modeles telecharges dans LM Studio (pour peupler le selecteur de l'UI)."""
|
|
from ..analysis.llm.lmstudio_backend import list_models
|
|
try:
|
|
return {"models": list_models(get_settings().lmstudio_base_url)}
|
|
except Exception as exc: # noqa: BLE001 — serveur down / injoignable
|
|
raise HTTPException(503, f"LM Studio injoignable: {exc}")
|
|
|
|
|
|
# --- Voicebank + preview -----------------------------------------------------
|
|
|
|
@app.get("/api/voicebank")
|
|
def get_voicebank() -> dict:
|
|
from ..casting.voicebank import load_voicebank
|
|
return load_voicebank().model_dump(mode="json")
|
|
|
|
|
|
class PreviewBody(BaseModel):
|
|
voice_id: str
|
|
text: str = "Bonjour, voici un aperçu de cette voix pour votre livre audio."
|
|
|
|
|
|
@app.post("/api/voicebank/preview")
|
|
async def preview_voice(body: PreviewBody):
|
|
from ..casting.voicebank import load_voicebank
|
|
from ..tts.base import VoiceSpec
|
|
|
|
entry = load_voicebank().by_id(body.voice_id)
|
|
if entry is None:
|
|
raise HTTPException(404, "voix inconnue")
|
|
|
|
def _synth() -> bytes:
|
|
from ..tts.factory import get_backend
|
|
backend = get_backend("kokoro")
|
|
audio, sr = backend.synthesize(body.text, VoiceSpec(preset=entry.kokoro_voice))
|
|
buf = io.BytesIO()
|
|
sf.write(buf, audio, sr, format="WAV")
|
|
return buf.getvalue()
|
|
|
|
data = await asyncio.to_thread(_synth)
|
|
return Response(content=data, media_type="audio/wav")
|
|
|
|
|
|
@app.get("/api/books/{slug}/audio/{index}")
|
|
def get_audio(slug: str, index: int):
|
|
state = load_state(slug)
|
|
rs = state.render.get(index)
|
|
if not rs or not rs.mp3:
|
|
raise HTTPException(404, "audio non genere")
|
|
path = book_output_dir(load_book(slug).title) / rs.mp3
|
|
if not path.exists():
|
|
raise HTTPException(404, "fichier introuvable")
|
|
return FileResponse(str(path), media_type="audio/mpeg", filename=rs.mp3)
|
|
|
|
|
|
# --- WebSocket ---------------------------------------------------------------
|
|
|
|
@app.websocket("/ws/{slug}")
|
|
async def ws_endpoint(ws: WebSocket, slug: str) -> None:
|
|
await manager.connect(slug, ws)
|
|
try:
|
|
# Envoi de l'etat courant a la connexion.
|
|
await ws.send_json({"type": "state", "state": load_state(slug).model_dump(mode="json")})
|
|
while True:
|
|
await ws.receive_text() # garde la connexion ouverte
|
|
except WebSocketDisconnect:
|
|
manager.disconnect(slug, ws)
|
|
except Exception: # noqa: BLE001
|
|
manager.disconnect(slug, ws)
|
|
|
|
|
|
def _require(slug: str) -> None:
|
|
if not (book_data_dir(slug) / "book.json").exists():
|
|
raise HTTPException(404, "livre inconnu")
|
|
|
|
|
|
# --- Service du frontend build (si present) ----------------------------------
|
|
_FRONTEND_DIST = Path(__file__).resolve().parents[2].parent / "frontend" / "dist"
|
|
if _FRONTEND_DIST.exists():
|
|
app.mount("/", StaticFiles(directory=str(_FRONTEND_DIST), html=True), name="ui")
|