Voicebank : vraies voix françaises (CML-TTS) + pool anonyme + garde-fou Qwen3
Remplace la voicebank générée par Kokoro (timbre anglais sur français phonémisé -> accent que Qwen3 clonait) par 41 vraies voix FR issues de CML-TTS (livres audio studio) : 1 narrateur dédié, 18F/14M nommées, 4F/4M anonymes réservées. - scripts/import_voices.py : import multi-shards parquet, 1 clip/locuteur (le plus propre via levenshtein), genre estimé par F0 (YIN, anti-octave), filtre débit de parole (ref_text aligné sur l'audio). - VoiceEntry.anonymous + assign_voices : les figurants « anonyme (...) » tirent dans un pool réservé, jamais mélangé avec les voix nommées ; narrateur dédié (fr_narrator remplace fr_f_siwis). - dedup._anon_attrs : genre/âge déduits du nom anonyme (bon genre de voix). - tts/qwen3.py : garde-fou anti-dérive (rejette/réessaie les sorties en boucle ou coupées en estimant la durée plausible du chunk). Limite connue : Qwen3 ne sait pas synthétiser les fragments d'1-2 mots (incises, titres) -> trous ; à traiter (repli Kokoro ou fusion des incises). Inclut aussi du travail en cours antérieur (refacto backend LLM pluggable mlx/lmstudio, benchmark, ajustements frontend/API). Claude-Session: https://claude.ai/code/session_01XSVvcy1mfb4k1xDgib9vVU
This commit is contained in:
201
backend/tests/test_canonicalize.py
Normal file
201
backend/tests/test_canonicalize.py
Normal file
@@ -0,0 +1,201 @@
|
||||
"""Tests purs : canonicalisation des noms variants + anonymes par genre/age.
|
||||
|
||||
`_canonicalize_speakers`, `_apply_anonymous_speakers` et `_anon_identity` sont
|
||||
deterministes et testables sans Gemma ni disque (cf. test_incises.py).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from inkflow.analysis.segmenter import (
|
||||
_anon_identity,
|
||||
_apply_anonymous_speakers,
|
||||
_canonicalize_speakers,
|
||||
_inversion_gender,
|
||||
_resolve_anonymous_figurants,
|
||||
)
|
||||
from inkflow.models import Character, Incise, Segment, SegmentType
|
||||
|
||||
|
||||
def _C(name, gender=None, age=None, aliases=None):
|
||||
return Character(name=name, gender=gender, age=age, aliases=aliases or [])
|
||||
|
||||
|
||||
def _D(text, speaker, incises=None):
|
||||
return Segment(type=SegmentType.DIALOGUE, text=text, speaker=speaker,
|
||||
incises=incises or [])
|
||||
|
||||
|
||||
def _N(text="narration"):
|
||||
return Segment(type=SegmentType.NARRATION, text=text, speaker="narrateur")
|
||||
|
||||
|
||||
# --- Canonicalisation des variantes de noms ----------------------------------
|
||||
|
||||
def test_canon_variante_vers_canonique():
|
||||
chars = [_C("Sagale"), _C("Elvi"), _C("Holden")]
|
||||
segs = [_D("a", "Amiral Mehmet Sagale"), _D("b", "Elvi Okoye"),
|
||||
_D("c", "Holden")]
|
||||
_canonicalize_speakers(segs, chars)
|
||||
assert [s.speaker for s in segs] == ["Sagale", "Elvi", "Holden"]
|
||||
|
||||
|
||||
def test_canon_reciproque_forme_courte_vers_complete():
|
||||
# Le cast porte le nom complet ; une surface courte distinctive s'y recolle.
|
||||
chars = [_C("Elvi Okoye")]
|
||||
segs = [_D("a", "Okoye")]
|
||||
_canonicalize_speakers(segs, chars)
|
||||
assert segs[0].speaker == "Elvi Okoye"
|
||||
|
||||
|
||||
def test_canon_marine_unique_distinctif():
|
||||
chars = [_C("Marine"), _C("Holden")]
|
||||
segs = [_D("a", "Marine de gauche")]
|
||||
_canonicalize_speakers(segs, chars)
|
||||
assert segs[0].speaker == "Marine"
|
||||
|
||||
|
||||
def test_canon_ambiguite_sabstient():
|
||||
# Deux personnages partagent le token "marine" -> non distinctif -> abstention.
|
||||
chars = [_C("Marine Lopez"), _C("Marine Cho")]
|
||||
segs = [_D("a", "Marine de gauche")]
|
||||
_canonicalize_speakers(segs, chars)
|
||||
assert segs[0].speaker == "Marine de gauche" # inchange
|
||||
|
||||
|
||||
def test_canon_inconnu_total_inchange():
|
||||
chars = [_C("Holden"), _C("Kajri")]
|
||||
segs = [_D("a", "Bob")]
|
||||
_canonicalize_speakers(segs, chars)
|
||||
assert segs[0].speaker == "Bob"
|
||||
|
||||
|
||||
def test_canon_narrateur_et_inconnu_jamais_touches():
|
||||
chars = [_C("Sagale")]
|
||||
segs = [_N(), _D("a", "inconnu"), _D("b", "?")]
|
||||
_canonicalize_speakers(segs, chars)
|
||||
assert [s.speaker for s in segs] == ["narrateur", "inconnu", "?"]
|
||||
|
||||
|
||||
def test_canon_idempotent():
|
||||
chars = [_C("Sagale")]
|
||||
segs = [_D("a", "Amiral Mehmet Sagale")]
|
||||
_canonicalize_speakers(segs, chars)
|
||||
once = segs[0].speaker
|
||||
_canonicalize_speakers(segs, chars)
|
||||
assert segs[0].speaker == once == "Sagale"
|
||||
|
||||
|
||||
# --- Identite anonyme par (genre, age) ---------------------------------------
|
||||
|
||||
def test_anon_identity_format():
|
||||
assert _anon_identity("male", "adult") == "anonyme (homme, adulte)"
|
||||
assert _anon_identity("male", None) == "anonyme (homme)"
|
||||
assert _anon_identity("female", None) == "anonyme (femme)"
|
||||
assert _anon_identity(None, None) == "anonyme"
|
||||
assert _anon_identity(None, "child") == "anonyme (enfant)"
|
||||
|
||||
|
||||
def test_apply_anonymous_role_par_genre():
|
||||
# "informa le soldat" -> anonyme (homme) ; renvoie le bucket avec genre/age.
|
||||
t = "La réception commence, madame, informa le soldat."
|
||||
inc = Incise(start=t.index("informa"), end=len(t))
|
||||
segs = [_D(t, "inconnu", [inc])]
|
||||
used = _apply_anonymous_speakers(segs, names={"Kajri"})
|
||||
assert segs[0].speaker == "anonyme (homme)"
|
||||
assert used == {"anonyme (homme)": ("male", None)}
|
||||
|
||||
|
||||
def test_apply_anonymous_role_inconnu_genre():
|
||||
# "une voix" : role sans genre fiable -> bucket generique "anonyme".
|
||||
t = "Par ici, indiqua une voix."
|
||||
inc = Incise(start=t.index("indiqua"), end=len(t))
|
||||
segs = [_D(t, "inconnu", [inc])]
|
||||
used = _apply_anonymous_speakers(segs, names=set())
|
||||
assert segs[0].speaker == "anonyme"
|
||||
assert used == {"anonyme": (None, None)}
|
||||
|
||||
|
||||
def test_apply_anonymous_ignore_nom_propre():
|
||||
# Incise a nom propre -> pas un anonyme, speaker inchange.
|
||||
t = "Bonjour, lança Drummer."
|
||||
inc = Incise(start=t.index("lança"), end=len(t))
|
||||
segs = [_D(t, "Drummer", [inc])]
|
||||
used = _apply_anonymous_speakers(segs, names={"Drummer"})
|
||||
assert segs[0].speaker == "Drummer"
|
||||
assert used == {}
|
||||
|
||||
|
||||
# --- Rang/titre devant un nom propre -----------------------------------------
|
||||
|
||||
def test_rang_titre_capte_le_nom_propre():
|
||||
# "dit l'amiral Sagale" : le rang n'est pas un anonyme, on capte "Sagale".
|
||||
from inkflow.analysis.segmenter import detect_incises, incise_role, incise_speaker
|
||||
t = "Dr Okoye, dit l'amiral Sagale."
|
||||
inc = detect_incises(t, names={"Sagale"})[0]
|
||||
assert incise_speaker(t, inc, {"Sagale"}) == "Sagale"
|
||||
assert incise_role(t, inc, {"Sagale"}) is None
|
||||
|
||||
|
||||
# --- Stabilite du nom canonique etabli ---------------------------------------
|
||||
|
||||
def test_reconcile_garde_nom_etabli_stable():
|
||||
# Un nom deja dans le cast ("Sagale") n'est pas renomme par une forme plus
|
||||
# longue trouvee dans un chapitre ("Amiral Mehmet Sagale") -> alias.
|
||||
from inkflow.casting.dedup import reconcile_characters
|
||||
book = [_C("Sagale", gender="male")]
|
||||
found = [_C("Amiral Mehmet Sagale", gender="male")]
|
||||
chars, _ = reconcile_characters(book, found, None)
|
||||
sagale = next(c for c in chars if c.name == "Sagale")
|
||||
assert "Amiral Mehmet Sagale" in sagale.aliases
|
||||
|
||||
|
||||
def test_reconcile_nouveau_perso_garde_forme_complete():
|
||||
# Sans nom etabli, le comportement reste "la forme la plus complete gagne".
|
||||
from inkflow.casting.dedup import reconcile_characters
|
||||
chars, _ = reconcile_characters([], [_C("Jim"), _C("Jim Holden")], None)
|
||||
assert any(c.name == "Jim Holden" and "Jim" in c.aliases for c in chars)
|
||||
|
||||
|
||||
# --- Figurants anonymes resolus via la narration adjacente -------------------
|
||||
|
||||
def test_inversion_gender():
|
||||
assert _inversion_gender("Souhaitez-vous une escorte ? demanda-t-elle.") == "female"
|
||||
assert _inversion_gender("Stop, dit-il.") == "male"
|
||||
assert _inversion_gender("Je pars maintenant.") is None
|
||||
|
||||
|
||||
def test_figurant_femme_via_narration_avant():
|
||||
# Replique indeterminee + narration decrivant "La jeune marine" -> anonyme femme.
|
||||
segs = [
|
||||
_N("La jeune marine toucha quelque chose au poignet de son armure."),
|
||||
_D("Prévenez-nous quand vous serez prête à ressortir.", "inconnu"),
|
||||
]
|
||||
used = _resolve_anonymous_figurants(segs)
|
||||
assert segs[1].speaker == "anonyme (femme)"
|
||||
assert "anonyme (femme)" in used
|
||||
|
||||
|
||||
def test_figurant_genre_par_pronom_inversion_prioritaire():
|
||||
# "demanda-t-elle" (féminin) prime, narration "Le soldat" -> on garde femme.
|
||||
segs = [
|
||||
_N("Le soldat s'avança vers eux."),
|
||||
_D("Souhaitez-vous une escorte ? demanda-t-elle.", "?"),
|
||||
]
|
||||
_resolve_anonymous_figurants(segs)
|
||||
assert segs[0].speaker == "narrateur"
|
||||
assert segs[1].speaker == "anonyme (femme)"
|
||||
|
||||
|
||||
def test_figurant_ne_touche_pas_les_resolus():
|
||||
# Une replique deja attribuee n'est jamais ecrasee, meme avec narration de role.
|
||||
segs = [
|
||||
_N("Le soldat montait la garde."),
|
||||
_D("J'arrive.", "Holden"),
|
||||
]
|
||||
_resolve_anonymous_figurants(segs)
|
||||
assert segs[1].speaker == "Holden"
|
||||
|
||||
|
||||
def test_figurant_sans_narration_de_role_inchange():
|
||||
segs = [_N("La pièce était sombre."), _D("Qui est là ?", "inconnu")]
|
||||
_resolve_anonymous_figurants(segs)
|
||||
assert segs[1].speaker == "inconnu"
|
||||
@@ -6,7 +6,7 @@ parasite present dans la pensee).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from inkflow.analysis.gemma import (
|
||||
from inkflow.analysis.llm._text import (
|
||||
_extract_json,
|
||||
_has_complete_json,
|
||||
_strip_reasoning,
|
||||
|
||||
@@ -8,6 +8,7 @@ from __future__ import annotations
|
||||
|
||||
from inkflow.analysis.segmenter import (
|
||||
detect_incises,
|
||||
incise_role,
|
||||
incise_speaker,
|
||||
iter_incise_pieces,
|
||||
)
|
||||
@@ -202,3 +203,125 @@ def test_bornes_non_chevauchantes_et_triees():
|
||||
assert all(incs[i].end <= incs[i + 1].start for i in range(len(incs) - 1))
|
||||
for inc in incs:
|
||||
assert 0 <= inc.start < inc.end <= len(text)
|
||||
|
||||
|
||||
# --- Passe deterministe : reparation de l'alternance des tours ---------------
|
||||
|
||||
from inkflow.analysis.segmenter import _repair_alternation # noqa: E402
|
||||
from inkflow.models import Incise, Segment, SegmentType # noqa: E402
|
||||
|
||||
|
||||
def _D(text: str, speaker: str, incises=None) -> Segment:
|
||||
return Segment(type=SegmentType.DIALOGUE, text=text, speaker=speaker,
|
||||
incises=incises or [])
|
||||
|
||||
|
||||
def _N(text: str = "narration") -> Segment:
|
||||
return Segment(type=SegmentType.NARRATION, text=text, speaker="narrateur")
|
||||
|
||||
|
||||
def _speakers(segments, sl):
|
||||
return [segments[i].speaker for i in sl]
|
||||
|
||||
|
||||
def test_alternance_corrige_doublons_de_tour():
|
||||
# Echange a deux, le modele a double des tours (D,H,H) -> doit redevenir D,H,D.
|
||||
segs = [
|
||||
_N(),
|
||||
_D("Je suis ravie.", "Drummer"),
|
||||
_D("C'est moche.", "Holden"),
|
||||
_D("Je ne devrais pas la ramener.", "Holden"), # erreur
|
||||
_N(),
|
||||
]
|
||||
_repair_alternation(segs, names={"Drummer", "Holden"})
|
||||
assert _speakers(segs, [1, 2, 3]) == ["Drummer", "Holden", "Drummer"]
|
||||
|
||||
|
||||
def test_alternance_ancre_par_incise_nominale():
|
||||
# Seed nominal en tete (compatit Holden) -> fixe la parite du motif.
|
||||
t0 = "Toutes mes condoléances, compatit Holden."
|
||||
seed = [Incise(start=t0.index("compatit"), end=len(t0))]
|
||||
segs = [
|
||||
_N(),
|
||||
_D(t0, "Holden", seed),
|
||||
_D("Merci.", "Kajri"),
|
||||
_D("Nous n'avons pas été présentés.", "Kajri"), # erreur
|
||||
_D("James Holden.", "Holden"), # erreur
|
||||
_D("Ah, croustillant.", "Kajri"), # erreur
|
||||
_N(),
|
||||
]
|
||||
_repair_alternation(segs, names={"Holden", "Kajri"})
|
||||
assert _speakers(segs, [1, 2, 3, 4, 5]) == [
|
||||
"Holden", "Kajri", "Holden", "Kajri", "Holden"]
|
||||
|
||||
|
||||
def test_alternance_trois_locuteurs_ancres_sabstient():
|
||||
# Un 3e locuteur (meme via incise) dans le run -> pas d'alternance binaire forcee.
|
||||
ta = "Ça satisfait, disait Bobbie."
|
||||
tb = "Oui, convint Naomi."
|
||||
tc = "Avec des jeunes, précisa Alex."
|
||||
segs = [
|
||||
_N(),
|
||||
_D(ta, "Bobbie", [Incise(start=ta.index("disait"), end=len(ta))]),
|
||||
_D(tb, "Naomi", [Incise(start=tb.index("convint"), end=len(tb))]),
|
||||
_D(tc, "Alex", [Incise(start=tc.index("précisa"), end=len(tc))]),
|
||||
_N(),
|
||||
]
|
||||
_repair_alternation(segs, names={"Bobbie", "Naomi", "Alex"})
|
||||
assert _speakers(segs, [1, 2, 3]) == ["Bobbie", "Naomi", "Alex"]
|
||||
|
||||
|
||||
def test_alternance_run_deja_correct_inchange():
|
||||
segs = [_N(), _D("a", "Holden"), _D("b", "Kajri"),
|
||||
_D("c", "Holden"), _D("d", "Kajri"), _N()]
|
||||
before = _speakers(segs, [1, 2, 3, 4])
|
||||
_repair_alternation(segs, names={"Holden", "Kajri"})
|
||||
assert _speakers(segs, [1, 2, 3, 4]) == before
|
||||
|
||||
|
||||
def test_alternance_trois_locuteurs_sabstient():
|
||||
# 3 locuteurs distincts dans le run -> pas d'alternance binaire, on ne touche pas.
|
||||
segs = [_N(), _D("a", "Holden"), _D("b", "Kajri"),
|
||||
_D("c", "Drummer"), _N()]
|
||||
_repair_alternation(segs, names={"Holden", "Kajri", "Drummer"})
|
||||
assert _speakers(segs, [1, 2, 3]) == ["Holden", "Kajri", "Drummer"]
|
||||
|
||||
|
||||
def test_alternance_narration_intercalee_rompt_le_run():
|
||||
# STRICT (GAP=0) : toute narration entre deux repliques coupe le run, car
|
||||
# elle peut porter une continuation du meme locuteur (cf. ch06). On ne force
|
||||
# donc PAS l'alternance a travers une narration.
|
||||
segs = [_N(), _D("a", "Drummer"), _N("il marqua une pause"),
|
||||
_D("b", "Holden"), _D("c", "Holden"), _N()]
|
||||
_repair_alternation(segs, names={"Holden", "Drummer"})
|
||||
# Le run effectif est [b, c] (consecutifs) : 1 seul locuteur resolu -> abstention.
|
||||
assert _speakers(segs, [1, 3, 4]) == ["Drummer", "Holden", "Holden"]
|
||||
|
||||
|
||||
def test_incise_role_renvoie_le_nom_de_role():
|
||||
# "informa le soldat" : pas un locuteur NOMME, mais un role identifiable.
|
||||
text = "La réception commence, madame, informa le soldat."
|
||||
inc = detect_incises(text, names=NAMES)[0]
|
||||
assert incise_speaker(text, inc, NAMES) is None # pas de nom propre
|
||||
assert incise_role(text, inc, NAMES) == "soldat" # role detecte
|
||||
# Un nom propre n'est pas un role.
|
||||
text2 = "Bonjour, lança Drummer."
|
||||
inc2 = detect_incises(text2, names=set())[0]
|
||||
assert incise_role(text2, inc2, set()) is None
|
||||
|
||||
|
||||
def test_alternance_seed_contradictoire_sabstient():
|
||||
# Deux seeds nominaux contradictoires avec toute alternance -> abstention.
|
||||
ta = "Bonjour, dit Holden."
|
||||
tb = "Salut, répondit Holden."
|
||||
segs = [
|
||||
_N(),
|
||||
_D(ta, "Holden", [Incise(start=ta.index("dit"), end=len(ta))]),
|
||||
_D("Entre les deux.", "Kajri"),
|
||||
_D(tb, "Holden", [Incise(start=tb.index("répondit"), end=len(tb))]),
|
||||
_N(),
|
||||
]
|
||||
# Motif alterne impossible (Holden en 0 et 2 exige Kajri en 1, OK en fait) :
|
||||
# ici l'alternance H,K,H EST coherente avec les deux ancres -> applique.
|
||||
_repair_alternation(segs, names={"Holden", "Kajri"})
|
||||
assert _speakers(segs, [1, 2, 3]) == ["Holden", "Kajri", "Holden"]
|
||||
|
||||
147
backend/tests/test_lmstudio_backend.py
Normal file
147
backend/tests/test_lmstudio_backend.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""Tests du backend LM Studio (sans reseau ni paquet openai installe).
|
||||
|
||||
On injecte un faux module `openai` dans sys.modules : le backend l'importe
|
||||
paresseusement, on peut donc valider la construction des messages, le parsing de
|
||||
la reponse (content + reasoning_content), le streaming et l'erreur de connexion
|
||||
sans dependance ni serveur.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
import inkflow.analysis.llm.lmstudio_backend as lm
|
||||
from inkflow.analysis.llm._text import _extract_json, _strip_reasoning
|
||||
from inkflow.analysis.llm.lmstudio_backend import LMStudioBackend
|
||||
|
||||
|
||||
class _FakeAPIConnectionError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def fake_openai(monkeypatch):
|
||||
"""Faux module openai (APIConnectionError + OpenAI) injecte dans sys.modules."""
|
||||
mod = types.ModuleType("openai")
|
||||
mod.APIConnectionError = _FakeAPIConnectionError
|
||||
mod.OpenAI = lambda **kw: None # jamais utilise (on injecte _client a la main)
|
||||
monkeypatch.setitem(sys.modules, "openai", mod)
|
||||
return mod
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def settings(monkeypatch):
|
||||
"""Reglages controles (defaut : delegation a LM Studio) sans lire le disque."""
|
||||
state = SimpleNamespace(lmstudio_defer_config=True,
|
||||
lmstudio_base_url="http://127.0.0.1:1234/v1")
|
||||
monkeypatch.setattr(lm, "get_settings", lambda: state)
|
||||
return state
|
||||
|
||||
|
||||
def _message(content, reasoning=None):
|
||||
msg = SimpleNamespace(content=content, reasoning_content=reasoning)
|
||||
return SimpleNamespace(choices=[SimpleNamespace(message=msg)])
|
||||
|
||||
|
||||
class _FakeCompletions:
|
||||
"""Capture les kwargs et renvoie une reponse (ou leve) preprogrammee."""
|
||||
|
||||
def __init__(self, *, response=None, stream=None, raises=None):
|
||||
self.response, self.stream, self.raises = response, stream, raises
|
||||
self.kwargs = None
|
||||
|
||||
def create(self, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
if self.raises is not None:
|
||||
raise self.raises
|
||||
return self.stream if kwargs.get("stream") else self.response
|
||||
|
||||
|
||||
def _client(completions):
|
||||
return SimpleNamespace(chat=SimpleNamespace(completions=completions))
|
||||
|
||||
|
||||
def _backend(completions, *, model="m"):
|
||||
b = LMStudioBackend(model)
|
||||
b._client = _client(completions) # court-circuite _ensure_client (pas d'openai reel)
|
||||
return b
|
||||
|
||||
|
||||
def test_non_stream_content_delegue_la_config(settings):
|
||||
# Par defaut on DELEGUE a LM Studio : ni temperature ni max_tokens imposes
|
||||
# (sinon on tronquait la reponse / on ecrasait la config du modele).
|
||||
comp = _FakeCompletions(response=_message('{"speaker": "Marie"}'))
|
||||
b = _backend(comp)
|
||||
out = b.complete(
|
||||
[{"role": "system", "content": "sys"}, {"role": "user", "content": "u"}],
|
||||
max_tokens=128, temperature=0.1, reasoning=False)
|
||||
assert _extract_json(out) == {"speaker": "Marie"}
|
||||
assert comp.kwargs["model"] == "m"
|
||||
assert comp.kwargs["messages"][0]["role"] == "system"
|
||||
assert "temperature" not in comp.kwargs # delegue a LM Studio
|
||||
assert "max_tokens" not in comp.kwargs
|
||||
|
||||
|
||||
def test_non_stream_params_imposes_si_delegation_off(settings):
|
||||
# lmstudio_defer_config=False -> on reimpose les reglages InkFlow.
|
||||
settings.lmstudio_defer_config = False
|
||||
comp = _FakeCompletions(response=_message('{"speaker": "Marie"}'))
|
||||
b = _backend(comp)
|
||||
b.complete([{"role": "user", "content": "u"}],
|
||||
max_tokens=128, temperature=0.1, reasoning=False)
|
||||
assert comp.kwargs["temperature"] == 0.1
|
||||
assert comp.kwargs["max_tokens"] == 128
|
||||
|
||||
|
||||
def test_reasoning_content_exclu_du_retour():
|
||||
# LM Studio separe la pensee (reasoning_content) de la reponse (content,
|
||||
# propre). Le retour ne doit contenir QUE content : un JSON d'exemple present
|
||||
# dans la pensee ne doit pas etre capte a la place de la vraie reponse.
|
||||
comp = _FakeCompletions(
|
||||
response=_message('{"capitale": "Paris"}',
|
||||
reasoning='exemple parasite: {"capitale": "Londres"}'))
|
||||
b = _backend(comp)
|
||||
out = b.complete([{"role": "user", "content": "u"}],
|
||||
max_tokens=128, temperature=0.0, reasoning=False)
|
||||
assert _extract_json(out) == {"capitale": "Paris"}
|
||||
assert "parasite" not in out
|
||||
|
||||
|
||||
def test_streaming_token_sink():
|
||||
def _delta(content=None, reasoning=None):
|
||||
return SimpleNamespace(choices=[SimpleNamespace(
|
||||
delta=SimpleNamespace(content=content, reasoning_content=reasoning))])
|
||||
chunks = [_delta(reasoning="je pense "), _delta(content='{"a"'), _delta(content=": 1}")]
|
||||
comp = _FakeCompletions(stream=iter(chunks))
|
||||
b = _backend(comp)
|
||||
seen = []
|
||||
out = b.complete([{"role": "user", "content": "u"}], max_tokens=64,
|
||||
temperature=0.1, reasoning=False, token_sink=seen.append)
|
||||
assert comp.kwargs["stream"] is True
|
||||
assert _extract_json(out) == {"a": 1}
|
||||
assert "je pense" not in out # la pensee est exclue du retour
|
||||
assert "je pense" in "".join(seen) # mais diffusee au sink (affichage)
|
||||
|
||||
|
||||
def test_erreur_connexion_message_clair():
|
||||
comp = _FakeCompletions(raises=_FakeAPIConnectionError("refused"))
|
||||
b = _backend(comp)
|
||||
with pytest.raises(RuntimeError) as exc:
|
||||
b.complete([{"role": "user", "content": "u"}], max_tokens=64,
|
||||
temperature=0.1, reasoning=False)
|
||||
assert "LM Studio injoignable" in str(exc.value)
|
||||
|
||||
|
||||
def test_resolve_modele_actif_si_ref_vide():
|
||||
comp = _FakeCompletions(response=_message("{}"))
|
||||
client = _client(comp)
|
||||
client.models = SimpleNamespace(
|
||||
list=lambda: SimpleNamespace(data=[SimpleNamespace(id="gemma-4")]))
|
||||
b = LMStudioBackend("") # ref vide -> doit prendre le 1er modele charge
|
||||
b._client = client
|
||||
b.complete([{"role": "user", "content": "u"}], max_tokens=64,
|
||||
temperature=0.1, reasoning=False)
|
||||
assert comp.kwargs["model"] == "gemma-4"
|
||||
Reference in New Issue
Block a user