InkFlow/backend/inkflow/casting/assign.py

"""Auto-casting : attribue une voix distincte a chaque personnage.

Strategie deterministe :
- Narrateur : voix dediee de la voicebank (PREFERRED_NARRATOR), sinon 1re voix.
- Personnages nommes : voix du meme genre dans le pool *nomme* (anonymous=False),
  distinctes tant qu'il en reste ; au-dela recyclage equitable.
- Figurants anonymes ("anonyme (...)") : voix du meme genre dans le pool *reserve*
  (anonymous=True), pour ne pas consommer les voix des personnages nommes.
Genre inconnu -> pool mixte. L'ordre (tri par nom) garantit la reproductibilite.
L'utilisateur pourra surcharger ces choix dans l'UI.
"""
from __future__ import annotations

from collections import Counter
from typing import Optional

from ..models import Cast, Character, Voicebank

# Voix narrateur preferee (voix dediee de la voicebank CML).
PREFERRED_NARRATOR = "fr_narrator"


def _is_anonymous(name: str) -> bool:
    """Un figurant anonyme ("anonyme (homme)", "anonyme (femme, vieux)", ...)."""
    return name.strip().lower().startswith("anonyme")


def _pick_pool(vb: Voicebank, gender: Optional[str], narrator_id: str,
               *, anonymous: bool) -> list[str]:
    """Voix candidates : genre STRICT et pool reserve selon `anonymous`.

    Les figurants anonymes tirent dans le sous-ensemble `anonymous=True`, les
    personnages nommes dans le sous-ensemble `anonymous=False` — les deux ne se
    melangent pas. On ne croise (tag puis genre) qu'en dernier recours si le pool
    cible est vide. Le narrateur est exclu tant qu'il reste d'autres options.
    """
    genders = (gender,) if gender in ("male", "female") else ("male", "female")
    # 1) genre + tag exacts ; 2) genre seul ; 3) tout.
    same_tag = [e.id for g in genders for e in vb.by_gender(g, anonymous=anonymous)]
    same_gender = [e.id for g in genders for e in vb.by_gender(g)]
    pool = same_tag or same_gender or [e.id for e in vb.entries]
    non_narrator = [vid for vid in pool if vid != narrator_id]
    return non_narrator or pool  # garde le narrateur seulement s'il est seul


def assign_voices(
    characters: list[Character],
    vb: Voicebank,
    *,
    narrator_voice_id: Optional[str] = None,
    respect_existing: bool = False,
) -> Cast:
    """Renvoie un Cast avec narrateur + voix par personnage (mutation des chars).

    `respect_existing=True` conserve les voix deja attribuees (overrides UI) ;
    sinon tout est re-calcule (auto-casting frais).
    """
    if not vb.entries:
        return Cast(narrator_voice_id=narrator_voice_id, characters=characters)

    narrator_id = narrator_voice_id or (
        PREFERRED_NARRATOR if vb.by_id(PREFERRED_NARRATOR) else vb.entries[0].id)

    usage: Counter[str] = Counter()
    usage[narrator_id] += 1  # le narrateur compte deja

    for ch in sorted(characters, key=lambda c: c.name.lower()):
        if respect_existing and ch.voice_id and vb.by_id(ch.voice_id):
            usage[ch.voice_id] += 1
            continue  # respecte une attribution existante (override utilisateur)
        pool = _pick_pool(vb, ch.gender, narrator_id, anonymous=_is_anonymous(ch.name))
        # Choisit la voix la moins utilisee du pool (donc une voix neuve d'abord).
        best = min(pool, key=lambda vid: (usage[vid], pool.index(vid)))
        ch.voice_id = best
        usage[best] += 1

    return Cast(narrator_voice_id=narrator_id, characters=characters)


def resolve_speaker_voice(
    speaker: str, cast: Cast, vb: Voicebank
) -> Optional[str]:
    """Mappe un nom de locuteur (segment) vers un id de voix.

    Matche d'abord par nom/alias exact (rapide), puis en dernier recours par
    rapprochement heuristique de tokens (ex: un "Jim" qui n'aurait pas encore
    ete absorbe comme alias de "James Holden").
    """
    if speaker == "narrateur":
        return cast.narrator_voice_id
    low = speaker.lower()
    for ch in cast.characters:
        if ch.name.lower() == low or low in (a.lower() for a in ch.aliases):
            return ch.voice_id
    from .dedup import heuristic_match
    match = heuristic_match(speaker, cast.characters)
    if isinstance(match, Character):
        return match.voice_id
    return None  # inconnu -> le rendu repliera sur le narrateur