"""Banque de voix : un jeu de voix francaises variees (genre, pool anonyme). La banque de reference est peuplee par `scripts/import_voices.py` a partir de **vrais clips de locuteurs francais** (CML-TTS, livres audio) : chaque voix a son `ref_audio` + `ref_text`, qui servent de reference de timbre au clonage Qwen3 (rendu final). C'est la source de verite (metadata.json versionne). `build_voicebank()` ci-dessous est un fallback **legacy** : il regenere des clips *avec Kokoro* (presets a timbre anglais lisant du francais -> accent). Il ne se declenche que si metadata.json est absent ou sans `ref_audio`. Re-peupler la banque = relancer le script d'import, pas ce fallback. Resolution moteur : - Kokoro -> VoiceSpec(preset=kokoro_voice) (rapide, preview / draft) - Qwen3 -> VoiceSpec(ref_audio=clip, ref_text=…) (qualite, clonage) """ from __future__ import annotations from pathlib import Path import soundfile as sf from ..config import VOICEBANK_DIR from ..models import VoiceEntry, Voicebank from ..tts.base import VoiceSpec # Passage de reference lu par chaque voix pour creer son clip de clonage. REFERENCE_TEXT = ( "L'univers est toujours plus étrange qu'on ne le croit. " "Chaque nouvelle merveille pose les bases d'une découverte plus éblouissante encore." ) # Jeu de voix par defaut (varie en genre). ff_siwis est la seule voix FR native ; # les autres empruntent un timbre anglais mais lisent un texte phonemise en FR. SEED: list[VoiceEntry] = [ VoiceEntry(id="fr_f_siwis", kokoro_voice="ff_siwis", gender="female", age="adult", label="Siwis (FR)"), VoiceEntry(id="f_bella", kokoro_voice="af_bella", gender="female", age="adult", label="Bella"), VoiceEntry(id="f_heart", kokoro_voice="af_heart", gender="female", age="young", label="Heart"), VoiceEntry(id="f_emma", kokoro_voice="bf_emma", gender="female", age="adult", label="Emma"), VoiceEntry(id="f_nicole", kokoro_voice="af_nicole", gender="female", age="adult", label="Nicole"), VoiceEntry(id="m_fenrir", kokoro_voice="am_fenrir", gender="male", age="adult", label="Fenrir"), VoiceEntry(id="m_michael", kokoro_voice="am_michael", gender="male", age="adult", label="Michael"), VoiceEntry(id="m_george", kokoro_voice="bm_george", gender="male", age="adult", label="George"), VoiceEntry(id="m_lewis", kokoro_voice="bm_lewis", gender="male", age="adult", label="Lewis"), VoiceEntry(id="m_eric", kokoro_voice="am_eric", gender="male", age="young", label="Eric"), VoiceEntry(id="m_santa", kokoro_voice="am_santa", gender="male", age="old", label="Santa"), ] def metadata_path() -> Path: return VOICEBANK_DIR / "metadata.json" def clips_dir() -> Path: return VOICEBANK_DIR / "clips" def load_voicebank() -> Voicebank: path = metadata_path() if path.exists(): return Voicebank.model_validate_json(path.read_text(encoding="utf-8")) return Voicebank(entries=list(SEED)) def save_voicebank(vb: Voicebank) -> Path: VOICEBANK_DIR.mkdir(parents=True, exist_ok=True) metadata_path().write_text(vb.model_dump_json(indent=2), encoding="utf-8") return metadata_path() def build_voicebank(*, regenerate: bool = False) -> Voicebank: """Genere les clips de reference manquants et ecrit metadata.json.""" from ..tts.kokoro import KokoroBackend clips_dir().mkdir(parents=True, exist_ok=True) backend = KokoroBackend() entries: list[VoiceEntry] = [] for seed in SEED: clip_rel = f"clips/{seed.id}.wav" clip_abs = VOICEBANK_DIR / clip_rel if regenerate or not clip_abs.exists(): audio, sr = backend.synthesize(REFERENCE_TEXT, VoiceSpec(preset=seed.kokoro_voice)) sf.write(str(clip_abs), audio, sr) entry = seed.model_copy(update={"ref_audio": clip_rel, "ref_text": REFERENCE_TEXT}) entries.append(entry) vb = Voicebank(entries=entries) save_voicebank(vb) return vb def voice_spec_for(entry: VoiceEntry, engine: str, *, speed: float = 1.0) -> VoiceSpec: """Construit la VoiceSpec adaptee au moteur cible.""" if engine == "qwen3" and entry.ref_audio: ref_abs = str(VOICEBANK_DIR / entry.ref_audio) return VoiceSpec(ref_audio=ref_abs, ref_text=entry.ref_text, speed=speed) return VoiceSpec(preset=entry.kokoro_voice, speed=speed)