Remplace la voicebank générée par Kokoro (timbre anglais sur français phonémisé -> accent que Qwen3 clonait) par 41 vraies voix FR issues de CML-TTS (livres audio studio) : 1 narrateur dédié, 18F/14M nommées, 4F/4M anonymes réservées. - scripts/import_voices.py : import multi-shards parquet, 1 clip/locuteur (le plus propre via levenshtein), genre estimé par F0 (YIN, anti-octave), filtre débit de parole (ref_text aligné sur l'audio). - VoiceEntry.anonymous + assign_voices : les figurants « anonyme (...) » tirent dans un pool réservé, jamais mélangé avec les voix nommées ; narrateur dédié (fr_narrator remplace fr_f_siwis). - dedup._anon_attrs : genre/âge déduits du nom anonyme (bon genre de voix). - tts/qwen3.py : garde-fou anti-dérive (rejette/réessaie les sorties en boucle ou coupées en estimant la durée plausible du chunk). Limite connue : Qwen3 ne sait pas synthétiser les fragments d'1-2 mots (incises, titres) -> trous ; à traiter (repli Kokoro ou fusion des incises). Inclut aussi du travail en cours antérieur (refacto backend LLM pluggable mlx/lmstudio, benchmark, ajustements frontend/API). Claude-Session: https://claude.ai/code/session_01XSVvcy1mfb4k1xDgib9vVU
148 lines
5.8 KiB
Python
148 lines
5.8 KiB
Python
"""Tests du backend LM Studio (sans reseau ni paquet openai installe).
|
|
|
|
On injecte un faux module `openai` dans sys.modules : le backend l'importe
|
|
paresseusement, on peut donc valider la construction des messages, le parsing de
|
|
la reponse (content + reasoning_content), le streaming et l'erreur de connexion
|
|
sans dependance ni serveur.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
import types
|
|
from types import SimpleNamespace
|
|
|
|
import pytest
|
|
|
|
import inkflow.analysis.llm.lmstudio_backend as lm
|
|
from inkflow.analysis.llm._text import _extract_json, _strip_reasoning
|
|
from inkflow.analysis.llm.lmstudio_backend import LMStudioBackend
|
|
|
|
|
|
class _FakeAPIConnectionError(Exception):
|
|
pass
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def fake_openai(monkeypatch):
|
|
"""Faux module openai (APIConnectionError + OpenAI) injecte dans sys.modules."""
|
|
mod = types.ModuleType("openai")
|
|
mod.APIConnectionError = _FakeAPIConnectionError
|
|
mod.OpenAI = lambda **kw: None # jamais utilise (on injecte _client a la main)
|
|
monkeypatch.setitem(sys.modules, "openai", mod)
|
|
return mod
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def settings(monkeypatch):
|
|
"""Reglages controles (defaut : delegation a LM Studio) sans lire le disque."""
|
|
state = SimpleNamespace(lmstudio_defer_config=True,
|
|
lmstudio_base_url="http://127.0.0.1:1234/v1")
|
|
monkeypatch.setattr(lm, "get_settings", lambda: state)
|
|
return state
|
|
|
|
|
|
def _message(content, reasoning=None):
|
|
msg = SimpleNamespace(content=content, reasoning_content=reasoning)
|
|
return SimpleNamespace(choices=[SimpleNamespace(message=msg)])
|
|
|
|
|
|
class _FakeCompletions:
|
|
"""Capture les kwargs et renvoie une reponse (ou leve) preprogrammee."""
|
|
|
|
def __init__(self, *, response=None, stream=None, raises=None):
|
|
self.response, self.stream, self.raises = response, stream, raises
|
|
self.kwargs = None
|
|
|
|
def create(self, **kwargs):
|
|
self.kwargs = kwargs
|
|
if self.raises is not None:
|
|
raise self.raises
|
|
return self.stream if kwargs.get("stream") else self.response
|
|
|
|
|
|
def _client(completions):
|
|
return SimpleNamespace(chat=SimpleNamespace(completions=completions))
|
|
|
|
|
|
def _backend(completions, *, model="m"):
|
|
b = LMStudioBackend(model)
|
|
b._client = _client(completions) # court-circuite _ensure_client (pas d'openai reel)
|
|
return b
|
|
|
|
|
|
def test_non_stream_content_delegue_la_config(settings):
|
|
# Par defaut on DELEGUE a LM Studio : ni temperature ni max_tokens imposes
|
|
# (sinon on tronquait la reponse / on ecrasait la config du modele).
|
|
comp = _FakeCompletions(response=_message('{"speaker": "Marie"}'))
|
|
b = _backend(comp)
|
|
out = b.complete(
|
|
[{"role": "system", "content": "sys"}, {"role": "user", "content": "u"}],
|
|
max_tokens=128, temperature=0.1, reasoning=False)
|
|
assert _extract_json(out) == {"speaker": "Marie"}
|
|
assert comp.kwargs["model"] == "m"
|
|
assert comp.kwargs["messages"][0]["role"] == "system"
|
|
assert "temperature" not in comp.kwargs # delegue a LM Studio
|
|
assert "max_tokens" not in comp.kwargs
|
|
|
|
|
|
def test_non_stream_params_imposes_si_delegation_off(settings):
|
|
# lmstudio_defer_config=False -> on reimpose les reglages InkFlow.
|
|
settings.lmstudio_defer_config = False
|
|
comp = _FakeCompletions(response=_message('{"speaker": "Marie"}'))
|
|
b = _backend(comp)
|
|
b.complete([{"role": "user", "content": "u"}],
|
|
max_tokens=128, temperature=0.1, reasoning=False)
|
|
assert comp.kwargs["temperature"] == 0.1
|
|
assert comp.kwargs["max_tokens"] == 128
|
|
|
|
|
|
def test_reasoning_content_exclu_du_retour():
|
|
# LM Studio separe la pensee (reasoning_content) de la reponse (content,
|
|
# propre). Le retour ne doit contenir QUE content : un JSON d'exemple present
|
|
# dans la pensee ne doit pas etre capte a la place de la vraie reponse.
|
|
comp = _FakeCompletions(
|
|
response=_message('{"capitale": "Paris"}',
|
|
reasoning='exemple parasite: {"capitale": "Londres"}'))
|
|
b = _backend(comp)
|
|
out = b.complete([{"role": "user", "content": "u"}],
|
|
max_tokens=128, temperature=0.0, reasoning=False)
|
|
assert _extract_json(out) == {"capitale": "Paris"}
|
|
assert "parasite" not in out
|
|
|
|
|
|
def test_streaming_token_sink():
|
|
def _delta(content=None, reasoning=None):
|
|
return SimpleNamespace(choices=[SimpleNamespace(
|
|
delta=SimpleNamespace(content=content, reasoning_content=reasoning))])
|
|
chunks = [_delta(reasoning="je pense "), _delta(content='{"a"'), _delta(content=": 1}")]
|
|
comp = _FakeCompletions(stream=iter(chunks))
|
|
b = _backend(comp)
|
|
seen = []
|
|
out = b.complete([{"role": "user", "content": "u"}], max_tokens=64,
|
|
temperature=0.1, reasoning=False, token_sink=seen.append)
|
|
assert comp.kwargs["stream"] is True
|
|
assert _extract_json(out) == {"a": 1}
|
|
assert "je pense" not in out # la pensee est exclue du retour
|
|
assert "je pense" in "".join(seen) # mais diffusee au sink (affichage)
|
|
|
|
|
|
def test_erreur_connexion_message_clair():
|
|
comp = _FakeCompletions(raises=_FakeAPIConnectionError("refused"))
|
|
b = _backend(comp)
|
|
with pytest.raises(RuntimeError) as exc:
|
|
b.complete([{"role": "user", "content": "u"}], max_tokens=64,
|
|
temperature=0.1, reasoning=False)
|
|
assert "LM Studio injoignable" in str(exc.value)
|
|
|
|
|
|
def test_resolve_modele_actif_si_ref_vide():
|
|
comp = _FakeCompletions(response=_message("{}"))
|
|
client = _client(comp)
|
|
client.models = SimpleNamespace(
|
|
list=lambda: SimpleNamespace(data=[SimpleNamespace(id="gemma-4")]))
|
|
b = LMStudioBackend("") # ref vide -> doit prendre le 1er modele charge
|
|
b._client = client
|
|
b.complete([{"role": "user", "content": "u"}], max_tokens=64,
|
|
temperature=0.1, reasoning=False)
|
|
assert comp.kwargs["model"] == "gemma-4"
|