"""Tests purs : canonicalisation des noms variants + anonymes par genre/age. `_canonicalize_speakers`, `_apply_anonymous_speakers` et `_anon_identity` sont deterministes et testables sans Gemma ni disque (cf. test_incises.py). """ from __future__ import annotations from inkflow.analysis.segmenter import ( _anon_identity, _apply_anonymous_speakers, _canonicalize_speakers, _inversion_gender, _resolve_anonymous_figurants, ) from inkflow.models import Character, Incise, Segment, SegmentType def _C(name, gender=None, age=None, aliases=None): return Character(name=name, gender=gender, age=age, aliases=aliases or []) def _D(text, speaker, incises=None): return Segment(type=SegmentType.DIALOGUE, text=text, speaker=speaker, incises=incises or []) def _N(text="narration"): return Segment(type=SegmentType.NARRATION, text=text, speaker="narrateur") # --- Canonicalisation des variantes de noms ---------------------------------- def test_canon_variante_vers_canonique(): chars = [_C("Sagale"), _C("Elvi"), _C("Holden")] segs = [_D("a", "Amiral Mehmet Sagale"), _D("b", "Elvi Okoye"), _D("c", "Holden")] _canonicalize_speakers(segs, chars) assert [s.speaker for s in segs] == ["Sagale", "Elvi", "Holden"] def test_canon_reciproque_forme_courte_vers_complete(): # Le cast porte le nom complet ; une surface courte distinctive s'y recolle. chars = [_C("Elvi Okoye")] segs = [_D("a", "Okoye")] _canonicalize_speakers(segs, chars) assert segs[0].speaker == "Elvi Okoye" def test_canon_marine_unique_distinctif(): chars = [_C("Marine"), _C("Holden")] segs = [_D("a", "Marine de gauche")] _canonicalize_speakers(segs, chars) assert segs[0].speaker == "Marine" def test_canon_ambiguite_sabstient(): # Deux personnages partagent le token "marine" -> non distinctif -> abstention. chars = [_C("Marine Lopez"), _C("Marine Cho")] segs = [_D("a", "Marine de gauche")] _canonicalize_speakers(segs, chars) assert segs[0].speaker == "Marine de gauche" # inchange def test_canon_inconnu_total_inchange(): chars = [_C("Holden"), _C("Kajri")] segs = [_D("a", "Bob")] _canonicalize_speakers(segs, chars) assert segs[0].speaker == "Bob" def test_canon_narrateur_et_inconnu_jamais_touches(): chars = [_C("Sagale")] segs = [_N(), _D("a", "inconnu"), _D("b", "?")] _canonicalize_speakers(segs, chars) assert [s.speaker for s in segs] == ["narrateur", "inconnu", "?"] def test_canon_idempotent(): chars = [_C("Sagale")] segs = [_D("a", "Amiral Mehmet Sagale")] _canonicalize_speakers(segs, chars) once = segs[0].speaker _canonicalize_speakers(segs, chars) assert segs[0].speaker == once == "Sagale" # --- Identite anonyme par (genre, age) --------------------------------------- def test_anon_identity_format(): assert _anon_identity("male", "adult") == "anonyme (homme, adulte)" assert _anon_identity("male", None) == "anonyme (homme)" assert _anon_identity("female", None) == "anonyme (femme)" assert _anon_identity(None, None) == "anonyme" assert _anon_identity(None, "child") == "anonyme (enfant)" def test_apply_anonymous_role_par_genre(): # "informa le soldat" -> anonyme (homme) ; renvoie le bucket avec genre/age. t = "La réception commence, madame, informa le soldat." inc = Incise(start=t.index("informa"), end=len(t)) segs = [_D(t, "inconnu", [inc])] used = _apply_anonymous_speakers(segs, names={"Kajri"}) assert segs[0].speaker == "anonyme (homme)" assert used == {"anonyme (homme)": ("male", None)} def test_apply_anonymous_role_inconnu_genre(): # "une voix" : role sans genre fiable -> bucket generique "anonyme". t = "Par ici, indiqua une voix." inc = Incise(start=t.index("indiqua"), end=len(t)) segs = [_D(t, "inconnu", [inc])] used = _apply_anonymous_speakers(segs, names=set()) assert segs[0].speaker == "anonyme" assert used == {"anonyme": (None, None)} def test_apply_anonymous_ignore_nom_propre(): # Incise a nom propre -> pas un anonyme, speaker inchange. t = "Bonjour, lança Drummer." inc = Incise(start=t.index("lança"), end=len(t)) segs = [_D(t, "Drummer", [inc])] used = _apply_anonymous_speakers(segs, names={"Drummer"}) assert segs[0].speaker == "Drummer" assert used == {} # --- Rang/titre devant un nom propre ----------------------------------------- def test_rang_titre_capte_le_nom_propre(): # "dit l'amiral Sagale" : le rang n'est pas un anonyme, on capte "Sagale". from inkflow.analysis.segmenter import detect_incises, incise_role, incise_speaker t = "Dr Okoye, dit l'amiral Sagale." inc = detect_incises(t, names={"Sagale"})[0] assert incise_speaker(t, inc, {"Sagale"}) == "Sagale" assert incise_role(t, inc, {"Sagale"}) is None # --- Stabilite du nom canonique etabli --------------------------------------- def test_reconcile_garde_nom_etabli_stable(): # Un nom deja dans le cast ("Sagale") n'est pas renomme par une forme plus # longue trouvee dans un chapitre ("Amiral Mehmet Sagale") -> alias. from inkflow.casting.dedup import reconcile_characters book = [_C("Sagale", gender="male")] found = [_C("Amiral Mehmet Sagale", gender="male")] chars, _ = reconcile_characters(book, found, None) sagale = next(c for c in chars if c.name == "Sagale") assert "Amiral Mehmet Sagale" in sagale.aliases def test_reconcile_nouveau_perso_garde_forme_complete(): # Sans nom etabli, le comportement reste "la forme la plus complete gagne". from inkflow.casting.dedup import reconcile_characters chars, _ = reconcile_characters([], [_C("Jim"), _C("Jim Holden")], None) assert any(c.name == "Jim Holden" and "Jim" in c.aliases for c in chars) # --- Figurants anonymes resolus via la narration adjacente ------------------- def test_inversion_gender(): assert _inversion_gender("Souhaitez-vous une escorte ? demanda-t-elle.") == "female" assert _inversion_gender("Stop, dit-il.") == "male" assert _inversion_gender("Je pars maintenant.") is None def test_figurant_femme_via_narration_avant(): # Replique indeterminee + narration decrivant "La jeune marine" -> anonyme femme. segs = [ _N("La jeune marine toucha quelque chose au poignet de son armure."), _D("Prévenez-nous quand vous serez prête à ressortir.", "inconnu"), ] used = _resolve_anonymous_figurants(segs) assert segs[1].speaker == "anonyme (femme)" assert "anonyme (femme)" in used def test_figurant_genre_par_pronom_inversion_prioritaire(): # "demanda-t-elle" (féminin) prime, narration "Le soldat" -> on garde femme. segs = [ _N("Le soldat s'avança vers eux."), _D("Souhaitez-vous une escorte ? demanda-t-elle.", "?"), ] _resolve_anonymous_figurants(segs) assert segs[0].speaker == "narrateur" assert segs[1].speaker == "anonyme (femme)" def test_figurant_ne_touche_pas_les_resolus(): # Une replique deja attribuee n'est jamais ecrasee, meme avec narration de role. segs = [ _N("Le soldat montait la garde."), _D("J'arrive.", "Holden"), ] _resolve_anonymous_figurants(segs) assert segs[1].speaker == "Holden" def test_figurant_sans_narration_de_role_inchange(): segs = [_N("La pièce était sombre."), _D("Qui est là ?", "inconnu")] _resolve_anonymous_figurants(segs) assert segs[1].speaker == "inconnu"