"""Schemas de donnees partages dans tout le pipeline (pydantic v2). Ces modeles sont serialises en JSON sur disque (book.json, analysis/chNN.json, cast.json, pronunciation.json) et constituent le contrat entre les etapes du pipeline. Chaque etape lit l'artefact de la precedente et ecrit le sien. """ from __future__ import annotations from enum import Enum from typing import Optional from pydantic import BaseModel, Field class ChapterKind(str, Enum): FRONT = "front" # couverture, page de titre, mentions editeur (non lu) CHAPTER = "chapter" # prologue, chapitres numerotes, epilogue (lu) BACK = "back" # remerciements, glossaire... (lu si texte significatif) class Chapter(BaseModel): index: int # ordre dans le spine (0-based) item_id: str # idref du manifest opf src: str # chemin interne xhtml title: str # titre toc brut, ex "1 - ELVI" kind: ChapterKind render: bool # doit-on synthetiser l'audio ? number: Optional[str] = None # "1", "PROLOGUE", "EPILOGUE"... pov: Optional[str] = None # personnage point de vue, ex "ELVI" word_count: int = 0 text_file: Optional[str] = None # chemin relatif du json de texte (chapters/chNN.json) output_name: Optional[str] = None # nom du mp3 final, ex "02-Chapitre 1.mp3" class Book(BaseModel): slug: str # identifiant interne (dossier data) title: str author: Optional[str] = None language: str = "fr" description: Optional[str] = None cover_file: Optional[str] = None # chemin du cover extrait dans data// chapters: list[Chapter] = Field(default_factory=list) @property def render_chapters(self) -> list[Chapter]: return [c for c in self.chapters if c.render] class ChapterText(BaseModel): """Texte brut normalise d'un chapitre (sortie du parser).""" index: int title: str paragraphs: list[str] = Field(default_factory=list) @property def word_count(self) -> int: return sum(len(p.split()) for p in self.paragraphs) # --- Analyse (etape Gemma) --------------------------------------------------- class SegmentType(str, Enum): NARRATION = "narration" DIALOGUE = "dialogue" class Incise(BaseModel): """Borne d'une incise de narration inseree dans une replique de dialogue. Offsets (caracteres) dans `Segment.text` : la sous-chaine `text[start:end]` est de la narration (ex: "dit-il", "lanca Drummer") a porter par la voix du narrateur au rendu, sans fragmenter la replique persistee. """ start: int # offset inclus end: int # offset exclu class Segment(BaseModel): """Unite de synthese : un bout de texte attribue a un locuteur.""" type: SegmentType text: str speaker: str = "narrateur" # "narrateur" ou nom de personnage glued_to_prev: bool = False # sous-segment issu du meme paragraphe (incise) # -> gap audio reduit avec le segment precedent incises: list[Incise] = Field(default_factory=list) # spans narrateur DANS text class ChapterAnalysis(BaseModel): index: int title: str segments: list[Segment] = Field(default_factory=list) class Character(BaseModel): name: str # nom canonique aliases: list[str] = Field(default_factory=list) gender: Optional[str] = None # "male" | "female" | "unknown" age: Optional[str] = None # "child" | "young" | "adult" | "old" description: Optional[str] = None voice_id: Optional[str] = None # id dans la voicebank (assigne au casting) class Cast(BaseModel): narrator_voice_id: Optional[str] = None characters: list[Character] = Field(default_factory=list) class VoiceEntry(BaseModel): """Une voix de la banque, agnostique du moteur. `kokoro_voice` est l'identite (rendu Kokoro direct + clip de reference) ; `ref_audio`/`ref_text` servent au clonage Qwen3 (rendu final). """ id: str # ex "fr_f_siwis" kokoro_voice: str # ex "ff_siwis" gender: str = "unknown" # male | female | unknown age: str = "adult" # child | young | adult | old lang: str = "fr" label: Optional[str] = None # libelle lisible ref_audio: Optional[str] = None # chemin du clip (relatif a voicebank/) ref_text: Optional[str] = None # transcription du clip anonymous: bool = False # voix reservee aux figurants "anonyme (...)" class Voicebank(BaseModel): entries: list[VoiceEntry] = Field(default_factory=list) def by_id(self, voice_id: str) -> Optional[VoiceEntry]: return next((e for e in self.entries if e.id == voice_id), None) def by_gender(self, gender: str, *, anonymous: Optional[bool] = None) -> list[VoiceEntry]: """Voix d'un genre. `anonymous=False`/`True` filtre le pool reserve aux figurants ; None ne filtre pas.""" return [e for e in self.entries if e.gender == gender and (anonymous is None or e.anonymous == anonymous)] class PronunciationEntry(BaseModel): term: str # graphie d'origine, ex "Tiamat" replacement: str # graphie phonetique guidee, ex "Tia-mat" note: Optional[str] = None enabled: bool = True class Pronunciation(BaseModel): entries: list[PronunciationEntry] = Field(default_factory=list) # --- Etat du projet (orchestration / UI) ------------------------------------ class StageStatus(str, Enum): PENDING = "pending" RUNNING = "running" DONE = "done" ERROR = "error" class ChapterRenderState(BaseModel): index: int status: StageStatus = StageStatus.PENDING progress: float = 0.0 # 0..1 mp3: Optional[str] = None # nom du fichier de sortie backend: Optional[str] = None error: Optional[str] = None class ProjectState(BaseModel): """Etat persistant d'un livre, pilote par l'orchestrateur et lu par l'UI.""" slug: str title: str stages: dict[str, StageStatus] = Field(default_factory=dict) # parse/analyze/cast/pronounce analyzed_chapters: list[int] = Field(default_factory=list) render: dict[int, ChapterRenderState] = Field(default_factory=dict) # Job courant (pour l'affichage temps reel). active_stage: Optional[str] = None active_detail: Optional[str] = None active_progress: float = 0.0 def stage(self, name: str) -> StageStatus: return self.stages.get(name, StageStatus.PENDING)