"""Orchestrateur : execute les etapes du pipeline en tache de fond, piste l'etat et diffuse l'etat complet a l'UI a chaque changement. - Un seul worker thread execute les jobs en serie (un Mac = une charge MLX a la fois). Les jobs sont enfiles et rendent la main immediatement a l'API. - L'etat (ProjectState) est persiste dans data//state.json -> reprenable. - La diffusion passe par un `broadcaster` injecte par la couche API (pour rester independant de FastAPI). Il recoit (slug, dict_etat). """ from __future__ import annotations import queue import threading import traceback from pathlib import Path from typing import Callable, Optional from ..config import book_data_dir, book_output_dir from ..epub.parser import load_book, load_chapter_text from ..models import ChapterRenderState, ProjectState, StageStatus from ..store import artifacts Broadcaster = Callable[[str, dict], None] def state_path(slug: str) -> Path: return book_data_dir(slug) / "state.json" def load_state(slug: str) -> ProjectState: path = state_path(slug) if path.exists(): state = ProjectState.model_validate_json(path.read_text(encoding="utf-8")) else: book = load_book(slug) state = ProjectState(slug=slug, title=book.title, stages={"parse": StageStatus.DONE}) return _reconcile(slug, state) def _reconcile(slug: str, state: ProjectState) -> ProjectState: """Aligne l'etat sur les artefacts presents sur disque (reprise robuste). Permet a l'UI de refleter ce qui a deja ete fait, meme via la CLI ou apres un redemarrage, sans rejouer les etapes. """ book = load_book(slug) state.stages.setdefault("parse", StageStatus.DONE) # Analyse : chapitres possedant un artefact d'analyse. analyzed = [c.index for c in book.render_chapters if artifacts.analysis_path(slug, c.index).exists()] if analyzed: for idx in analyzed: if idx not in state.analyzed_chapters: state.analyzed_chapters.append(idx) if state.stage("analyze") == StageStatus.PENDING: state.stages["analyze"] = ( StageStatus.DONE if len(analyzed) == len(book.render_chapters) else StageStatus.RUNNING) # Casting : au moins une voix attribuee. cast = artifacts.load_cast(slug) if cast.narrator_voice_id or any(c.voice_id for c in cast.characters): state.stages.setdefault("cast", StageStatus.DONE) # Prononciation : au moins une entree. if artifacts.load_pronunciation(slug).entries: state.stages.setdefault("pronounce", StageStatus.DONE) # Rendu : mp3 presents en sortie. out_dir = book_output_dir(book.title) for ch in book.render_chapters: existing = state.render.get(ch.index) if existing and existing.mp3: continue if ch.output_name and (out_dir / ch.output_name).exists(): state.render[ch.index] = ChapterRenderState( index=ch.index, status=StageStatus.DONE, progress=1.0, mp3=ch.output_name) return state class Orchestrator: def __init__(self) -> None: self._q: "queue.Queue[tuple[str, Callable[[], None]]]" = queue.Queue() self._worker: Optional[threading.Thread] = None self._broadcaster: Optional[Broadcaster] = None self._lock = threading.Lock() self.busy_slug: Optional[str] = None # --- infra --------------------------------------------------------------- def set_broadcaster(self, fn: Broadcaster) -> None: self._broadcaster = fn def _ensure_worker(self) -> None: if self._worker is None or not self._worker.is_alive(): self._worker = threading.Thread(target=self._loop, daemon=True) self._worker.start() def _loop(self) -> None: while True: slug, job = self._q.get() self.busy_slug = slug try: job() except Exception: # noqa: BLE001 traceback.print_exc() finally: self.busy_slug = None self._q.task_done() def _save_and_emit(self, state: ProjectState) -> None: path = state_path(state.slug) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(state.model_dump_json(indent=2), encoding="utf-8") if self._broadcaster: self._broadcaster(state.slug, state.model_dump(mode="json")) def enqueue(self, slug: str, job: Callable[[], None]) -> None: self._ensure_worker() self._q.put((slug, job)) # --- etapes -------------------------------------------------------------- def run_analyze(self, slug: str, chapter_indexes: Optional[list[int]] = None) -> None: def job() -> None: from ..analysis.gemma import Gemma from ..analysis.segmenter import analyze_chapter from ..models import Cast from ..settings import get_settings state = load_state(slug) book = load_book(slug) targets = [c for c in book.render_chapters if chapter_indexes is None or c.index in chapter_indexes] state.stages["analyze"] = StageStatus.RUNNING state.active_stage = "analyze" self._save_and_emit(state) gemma = Gemma() dedup_gemma = gemma if get_settings().dedup_use_gemma else None cast = artifacts.load_cast(slug) chars = list(cast.characters) total = len(targets) for i, ch in enumerate(targets): state.active_detail = f"Analyse {ch.title}" state.active_progress = i / max(total, 1) self._save_and_emit(state) ct = load_chapter_text(slug, ch) try: # La dedup est faite dans analyze_chapter : `chars` recoit le # cast cumule reconcilie. analysis, chars = analyze_chapter( ch, ct, gemma, book_chars=chars, dedup_gemma=dedup_gemma) except Exception: # noqa: BLE001 — chapitre ignore, on continue traceback.print_exc() continue artifacts.save_analysis(slug, analysis) if ch.index not in state.analyzed_chapters: state.analyzed_chapters.append(ch.index) self._save_and_emit(state) artifacts.save_cast(slug, Cast( narrator_voice_id=cast.narrator_voice_id, characters=chars)) state.stages["analyze"] = StageStatus.DONE self._finish(state) self.enqueue(slug, job) def run_cast(self, slug: str) -> None: def job() -> None: from ..casting.assign import assign_voices from ..casting.voicebank import build_voicebank, load_voicebank state = load_state(slug) state.stages["cast"] = StageStatus.RUNNING state.active_stage = "cast" state.active_detail = "Preparation de la voicebank" self._save_and_emit(state) vb = load_voicebank() if not vb.entries or not any(e.ref_audio for e in vb.entries): vb = build_voicebank() cast = artifacts.load_cast(slug) cast = assign_voices(cast.characters, vb, narrator_voice_id=cast.narrator_voice_id) artifacts.save_cast(slug, cast) state.stages["cast"] = StageStatus.DONE self._finish(state) self.enqueue(slug, job) def run_cast_analyze(self, slug: str, chapter_indexes: Optional[list[int]] = None) -> None: """(Re)extrait les personnages d'un/des chapitre(s) et les reconcilie. Plus leger que `run_analyze` : ne re-segmente pas (les artefacts d'analyse existants restent intacts). Sert le casting "a l'echelle d'un chapitre" tout en maintenant la coherence du livre (deduplication). """ def job() -> None: from ..analysis.gemma import Gemma from ..analysis.segmenter import extract_characters from ..casting.dedup import reconcile_characters from ..models import Cast from ..settings import get_settings state = load_state(slug) book = load_book(slug) targets = [c for c in book.render_chapters if chapter_indexes is None or c.index in chapter_indexes] state.active_stage = "cast" self._save_and_emit(state) gemma = Gemma() dedup_gemma = gemma if get_settings().dedup_use_gemma else None cast = artifacts.load_cast(slug) chars = list(cast.characters) total = len(targets) for i, ch in enumerate(targets): state.active_detail = f"Casting — {ch.title}" state.active_progress = i / max(total, 1) self._save_and_emit(state) ct = load_chapter_text(slug, ch) try: found = extract_characters("\n".join(ct.paragraphs), gemma) speakers: list[str] = [] if artifacts.analysis_path(slug, ch.index).exists(): analysis = artifacts.load_analysis(slug, ch.index) speakers = [s.speaker for s in analysis.segments] chars, _ = reconcile_characters( chars, found, dedup_gemma, speaker_names=speakers) except Exception: # noqa: BLE001 — chapitre ignore, on continue traceback.print_exc() continue artifacts.save_cast(slug, Cast( narrator_voice_id=cast.narrator_voice_id, characters=chars)) self._save_and_emit(state) self._finish(state) self.enqueue(slug, job) def run_dedup_cast(self, slug: str) -> None: """Replie les doublons d'un casting deja constitue (Holden/James Holden...).""" def job() -> None: from ..analysis.gemma import Gemma from ..casting.dedup import dedup_cast from ..models import Cast from ..settings import get_settings state = load_state(slug) state.active_stage = "cast" state.active_detail = "Deduplication du casting" self._save_and_emit(state) cast = artifacts.load_cast(slug) gemma = Gemma() if get_settings().dedup_use_gemma else None chars = dedup_cast(cast.characters, gemma) artifacts.save_cast(slug, Cast( narrator_voice_id=cast.narrator_voice_id, characters=chars)) self._finish(state) self.enqueue(slug, job) def run_pronounce(self, slug: str) -> None: def job() -> None: from ..analysis.gemma import Gemma from ..analysis.pronunciation import ( merge_pronunciations, propose_pronunciations, ) state = load_state(slug) book = load_book(slug) state.stages["pronounce"] = StageStatus.RUNNING state.active_stage = "pronounce" self._save_and_emit(state) gemma = Gemma() pron = artifacts.load_pronunciation(slug) targets = book.render_chapters[:3] # echantillon de chapitres for i, ch in enumerate(targets): state.active_detail = f"Mots a risque — {ch.title}" state.active_progress = i / max(len(targets), 1) self._save_and_emit(state) ct = load_chapter_text(slug, ch) pron = merge_pronunciations( pron, propose_pronunciations("\n".join(ct.paragraphs), gemma)) artifacts.save_pronunciation(slug, pron) state.stages["pronounce"] = StageStatus.DONE self._finish(state) self.enqueue(slug, job) def run_render(self, slug: str, chapter_indexes: list[int], backend: Optional[str] = None, mono: bool = False) -> None: from ..settings import get_settings backend = backend or get_settings().default_backend def job() -> None: from ..casting.voicebank import load_voicebank, voice_spec_for from ..pipeline.render import ( build_units_mono, build_units_multi, make_voice_resolver, render_chapter_to_mp3, ) from ..tts.factory import get_backend state = load_state(slug) book = load_book(slug) state.stages["render"] = StageStatus.RUNNING state.active_stage = "render" self._save_and_emit(state) tts = get_backend(backend) pron = artifacts.load_pronunciation(slug) cast = artifacts.load_cast(slug) vb = load_voicebank() render_list = [c for c in book.render_chapters if c.index in chapter_indexes] for ch in render_list: rs = state.render.get(ch.index) or ChapterRenderState(index=ch.index) rs.status = StageStatus.RUNNING rs.progress = 0.0 rs.backend = backend state.render[ch.index] = rs state.active_detail = f"Synthese — {ch.title}" self._save_and_emit(state) try: ct = load_chapter_text(slug, ch) if mono or ch.index not in state.analyzed_chapters: units = build_units_mono(ct, tts.default_voice()) else: analysis = artifacts.load_analysis(slug, ch.index) narr = vb.by_id(cast.narrator_voice_id) if cast.narrator_voice_id else None default_voice = (voice_spec_for(narr, backend) if narr else tts.default_voice()) resolver = make_voice_resolver(cast, vb, backend) units = build_units_multi(analysis, resolver, default_voice) def _p(done: int, total: int, _rs=rs, _state=state) -> None: _rs.progress = done / max(total, 1) _state.active_progress = _rs.progress self._save_and_emit(_state) track = book.render_chapters.index(ch) + 1 mp3 = render_chapter_to_mp3(book, ch, units, tts, pron=pron, track=track, progress=_p) rs.status = StageStatus.DONE rs.progress = 1.0 rs.mp3 = mp3.name except Exception as exc: # noqa: BLE001 rs.status = StageStatus.ERROR rs.error = str(exc) self._save_and_emit(state) state.stages["render"] = StageStatus.DONE self._finish(state) self.enqueue(slug, job) def _finish(self, state: ProjectState) -> None: state.active_stage = None state.active_detail = None state.active_progress = 0.0 self._save_and_emit(state) # Singleton partage par l'API. orchestrator = Orchestrator()