Initial commit: InkFlow — EPUB vers livre audio local (MLX/Kokoro)
This commit is contained in:
158
backend/inkflow/pipeline/render.py
Normal file
158
backend/inkflow/pipeline/render.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""Rendu audio d'un chapitre : (segments + voix) -> WAV -> MP3.
|
||||
|
||||
Une `RenderUnit` = un bout de texte + la voix a employer. On construit la liste
|
||||
d'unites (mono-narrateur ou multi-voix selon le casting), on synthetise chacune,
|
||||
on concatene avec des silences, on normalise puis on encode en MP3.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional
|
||||
|
||||
from ..analysis.pronunciation import apply_pronunciation
|
||||
from ..audio.postprocess import concat_segments, encode_mp3, normalize_loudness, write_wav
|
||||
from ..config import book_data_dir, book_output_dir
|
||||
from ..models import (
|
||||
Book,
|
||||
Chapter,
|
||||
ChapterAnalysis,
|
||||
ChapterText,
|
||||
Pronunciation,
|
||||
SegmentType,
|
||||
)
|
||||
from ..tts.base import TTSBackend, VoiceSpec
|
||||
|
||||
# Resout un nom de locuteur en une voix concrete.
|
||||
VoiceResolver = Callable[[str], VoiceSpec]
|
||||
|
||||
|
||||
@dataclass
|
||||
class RenderUnit:
|
||||
text: str
|
||||
voice: VoiceSpec
|
||||
speaker: str = "narrateur"
|
||||
glued_to_prev: bool = False # incise -> gap reduit avec l'unite precedente
|
||||
|
||||
|
||||
def build_units_mono(ct: ChapterText, narrator: VoiceSpec) -> list[RenderUnit]:
|
||||
"""Mono-narrateur : chaque paragraphe est lu par la voix du narrateur."""
|
||||
return [RenderUnit(text=p, voice=narrator) for p in ct.paragraphs if p.strip()]
|
||||
|
||||
|
||||
def make_voice_resolver(cast, voicebank, engine: str) -> VoiceResolver:
|
||||
"""Construit un resolver locuteur -> VoiceSpec via le casting + la voicebank.
|
||||
|
||||
Replie sur la voix du narrateur si le locuteur n'a pas de voix attribuee.
|
||||
"""
|
||||
from ..casting.assign import resolve_speaker_voice
|
||||
from ..casting.voicebank import voice_spec_for
|
||||
|
||||
def resolve(speaker: str):
|
||||
vid = resolve_speaker_voice(speaker, cast, voicebank)
|
||||
if vid is None:
|
||||
vid = cast.narrator_voice_id
|
||||
entry = voicebank.by_id(vid) if vid else None
|
||||
if entry is None:
|
||||
return None # le backend utilisera sa voix par defaut
|
||||
return voice_spec_for(entry, engine)
|
||||
|
||||
return resolve
|
||||
|
||||
|
||||
def build_units_multi(
|
||||
analysis: ChapterAnalysis,
|
||||
resolve: VoiceResolver,
|
||||
default_voice: "VoiceSpec",
|
||||
) -> list[RenderUnit]:
|
||||
"""Multi-voix : narration -> narrateur, dialogue -> voix du personnage.
|
||||
|
||||
Les incises annotees sur une replique (bornes dans le texte) sont detachees
|
||||
ici, au dernier moment : la sous-chaine d'incise est portee par la voix du
|
||||
narrateur (`glued_to_prev` pour reduire le silence), le reste par la voix du
|
||||
personnage. Les repliques sans incise sont rendues entieres.
|
||||
"""
|
||||
from ..analysis.segmenter import iter_incise_pieces
|
||||
|
||||
narrator = resolve("narrateur") or default_voice
|
||||
units: list[RenderUnit] = []
|
||||
for seg in analysis.segments:
|
||||
if not seg.text.strip():
|
||||
continue
|
||||
if seg.type is SegmentType.NARRATION:
|
||||
units.append(RenderUnit(text=seg.text, voice=narrator,
|
||||
speaker="narrateur",
|
||||
glued_to_prev=seg.glued_to_prev))
|
||||
continue
|
||||
|
||||
char_voice = resolve(seg.speaker) or default_voice
|
||||
if not seg.incises:
|
||||
units.append(RenderUnit(text=seg.text, voice=char_voice,
|
||||
speaker=seg.speaker,
|
||||
glued_to_prev=seg.glued_to_prev))
|
||||
continue
|
||||
|
||||
for k, (is_incise, piece) in enumerate(
|
||||
iter_incise_pieces(seg.text, seg.incises)):
|
||||
glued = seg.glued_to_prev if k == 0 else True
|
||||
if is_incise:
|
||||
units.append(RenderUnit(text=piece, voice=narrator,
|
||||
speaker="narrateur", glued_to_prev=glued))
|
||||
else:
|
||||
units.append(RenderUnit(text=piece, voice=char_voice,
|
||||
speaker=seg.speaker, glued_to_prev=glued))
|
||||
return units
|
||||
|
||||
|
||||
def render_units(
|
||||
units: list[RenderUnit],
|
||||
backend: TTSBackend,
|
||||
*,
|
||||
pron: Optional[Pronunciation] = None,
|
||||
progress: Optional[Callable[[int, int], None]] = None,
|
||||
) -> tuple["list", int]:
|
||||
"""Synthetise toutes les unites et renvoie (liste (audio,sr), n_units)."""
|
||||
parts = []
|
||||
total = len(units)
|
||||
for i, unit in enumerate(units):
|
||||
text = apply_pronunciation(unit.text, pron) if pron else unit.text
|
||||
audio, sr = backend.synthesize(text, unit.voice)
|
||||
parts.append((audio, sr))
|
||||
if progress:
|
||||
progress(i + 1, total)
|
||||
return parts, total
|
||||
|
||||
|
||||
def render_chapter_to_mp3(
|
||||
book: Book,
|
||||
chapter: Chapter,
|
||||
units: list[RenderUnit],
|
||||
backend: TTSBackend,
|
||||
*,
|
||||
pron: Optional[Pronunciation] = None,
|
||||
track: Optional[int] = None,
|
||||
progress: Optional[Callable[[int, int], None]] = None,
|
||||
) -> Path:
|
||||
"""Pipeline complet pour un chapitre -> output/<livre>/NN-...mp3."""
|
||||
parts, _ = render_units(units, backend, pron=pron, progress=progress)
|
||||
# parts est aligne 1:1 avec units -> on transmet les marqueurs d'incise.
|
||||
audio, sr = concat_segments(parts, glued=[u.glued_to_prev for u in units])
|
||||
audio = normalize_loudness(audio)
|
||||
|
||||
# WAV intermediaire dans data/, MP3 final dans output/.
|
||||
wav_path = book_data_dir(book.slug) / "audio" / f"ch{chapter.index:02d}.wav"
|
||||
write_wav(wav_path, audio, sr)
|
||||
|
||||
out_dir = book_output_dir(book.title)
|
||||
mp3_path = out_dir / (chapter.output_name or f"ch{chapter.index:02d}.mp3")
|
||||
cover = None
|
||||
if book.cover_file:
|
||||
candidate = book_data_dir(book.slug) / book.cover_file
|
||||
cover = candidate if candidate.exists() else None
|
||||
|
||||
encode_mp3(
|
||||
wav_path, mp3_path,
|
||||
title=chapter.title, album=book.title, artist=book.author,
|
||||
track=track, cover_path=cover,
|
||||
)
|
||||
return mp3_path
|
||||
Reference in New Issue
Block a user