"""Rendu audio d'un chapitre : (segments + voix) -> WAV -> MP3. Une `RenderUnit` = un bout de texte + la voix a employer. On construit la liste d'unites (mono-narrateur ou multi-voix selon le casting), on synthetise chacune, on concatene avec des silences, on normalise puis on encode en MP3. """ from __future__ import annotations from dataclasses import dataclass from pathlib import Path from typing import Callable, Optional from ..analysis.pronunciation import apply_pronunciation from ..audio.postprocess import concat_segments, encode_mp3, normalize_loudness, write_wav from ..config import book_data_dir, book_output_dir from ..models import ( Book, Chapter, ChapterAnalysis, ChapterText, Pronunciation, SegmentType, ) from ..tts.base import TTSBackend, VoiceSpec # Resout un nom de locuteur en une voix concrete. VoiceResolver = Callable[[str], VoiceSpec] @dataclass class RenderUnit: text: str voice: VoiceSpec speaker: str = "narrateur" glued_to_prev: bool = False # incise -> gap reduit avec l'unite precedente def build_units_mono(ct: ChapterText, narrator: VoiceSpec) -> list[RenderUnit]: """Mono-narrateur : chaque paragraphe est lu par la voix du narrateur.""" return [RenderUnit(text=p, voice=narrator) for p in ct.paragraphs if p.strip()] def make_voice_resolver(cast, voicebank, engine: str) -> VoiceResolver: """Construit un resolver locuteur -> VoiceSpec via le casting + la voicebank. Replie sur la voix du narrateur si le locuteur n'a pas de voix attribuee. """ from ..casting.assign import resolve_speaker_voice from ..casting.voicebank import voice_spec_for def resolve(speaker: str): vid = resolve_speaker_voice(speaker, cast, voicebank) if vid is None: vid = cast.narrator_voice_id entry = voicebank.by_id(vid) if vid else None if entry is None: return None # le backend utilisera sa voix par defaut return voice_spec_for(entry, engine) return resolve def build_units_multi( analysis: ChapterAnalysis, resolve: VoiceResolver, default_voice: "VoiceSpec", ) -> list[RenderUnit]: """Multi-voix : narration -> narrateur, dialogue -> voix du personnage. Les incises annotees sur une replique (bornes dans le texte) sont detachees ici, au dernier moment : la sous-chaine d'incise est portee par la voix du narrateur (`glued_to_prev` pour reduire le silence), le reste par la voix du personnage. Les repliques sans incise sont rendues entieres. """ from ..analysis.segmenter import iter_incise_pieces narrator = resolve("narrateur") or default_voice units: list[RenderUnit] = [] for seg in analysis.segments: if not seg.text.strip(): continue if seg.type is SegmentType.NARRATION: units.append(RenderUnit(text=seg.text, voice=narrator, speaker="narrateur", glued_to_prev=seg.glued_to_prev)) continue char_voice = resolve(seg.speaker) or default_voice if not seg.incises: units.append(RenderUnit(text=seg.text, voice=char_voice, speaker=seg.speaker, glued_to_prev=seg.glued_to_prev)) continue for k, (is_incise, piece) in enumerate( iter_incise_pieces(seg.text, seg.incises)): glued = seg.glued_to_prev if k == 0 else True if is_incise: units.append(RenderUnit(text=piece, voice=narrator, speaker="narrateur", glued_to_prev=glued)) else: units.append(RenderUnit(text=piece, voice=char_voice, speaker=seg.speaker, glued_to_prev=glued)) return units def render_units( units: list[RenderUnit], backend: TTSBackend, *, pron: Optional[Pronunciation] = None, progress: Optional[Callable[[int, int], None]] = None, ) -> tuple["list", int]: """Synthetise toutes les unites et renvoie (liste (audio,sr), n_units).""" parts = [] total = len(units) for i, unit in enumerate(units): text = apply_pronunciation(unit.text, pron) if pron else unit.text audio, sr = backend.synthesize(text, unit.voice) parts.append((audio, sr)) if progress: progress(i + 1, total) return parts, total def render_chapter_to_mp3( book: Book, chapter: Chapter, units: list[RenderUnit], backend: TTSBackend, *, pron: Optional[Pronunciation] = None, track: Optional[int] = None, progress: Optional[Callable[[int, int], None]] = None, ) -> Path: """Pipeline complet pour un chapitre -> output//NN-...mp3.""" parts, _ = render_units(units, backend, pron=pron, progress=progress) # parts est aligne 1:1 avec units -> on transmet les marqueurs d'incise. audio, sr = concat_segments(parts, glued=[u.glued_to_prev for u in units]) audio = normalize_loudness(audio) # WAV intermediaire dans data/, MP3 final dans output/. wav_path = book_data_dir(book.slug) / "audio" / f"ch{chapter.index:02d}.wav" write_wav(wav_path, audio, sr) out_dir = book_output_dir(book.title) mp3_path = out_dir / (chapter.output_name or f"ch{chapter.index:02d}.mp3") cover = None if book.cover_file: candidate = book_data_dir(book.slug) / book.cover_file cover = candidate if candidate.exists() else None encode_mp3( wav_path, mp3_path, title=chapter.title, album=book.title, artist=book.author, track=track, cover_path=cover, ) return mp3_path