Voicebank : vraies voix françaises (CML-TTS) + pool anonyme + garde-fou Qwen3
Remplace la voicebank générée par Kokoro (timbre anglais sur français phonémisé -> accent que Qwen3 clonait) par 41 vraies voix FR issues de CML-TTS (livres audio studio) : 1 narrateur dédié, 18F/14M nommées, 4F/4M anonymes réservées. - scripts/import_voices.py : import multi-shards parquet, 1 clip/locuteur (le plus propre via levenshtein), genre estimé par F0 (YIN, anti-octave), filtre débit de parole (ref_text aligné sur l'audio). - VoiceEntry.anonymous + assign_voices : les figurants « anonyme (...) » tirent dans un pool réservé, jamais mélangé avec les voix nommées ; narrateur dédié (fr_narrator remplace fr_f_siwis). - dedup._anon_attrs : genre/âge déduits du nom anonyme (bon genre de voix). - tts/qwen3.py : garde-fou anti-dérive (rejette/réessaie les sorties en boucle ou coupées en estimant la durée plausible du chunk). Limite connue : Qwen3 ne sait pas synthétiser les fragments d'1-2 mots (incises, titres) -> trous ; à traiter (repli Kokoro ou fusion des incises). Inclut aussi du travail en cours antérieur (refacto backend LLM pluggable mlx/lmstudio, benchmark, ajustements frontend/API). Claude-Session: https://claude.ai/code/session_01XSVvcy1mfb4k1xDgib9vVU
This commit is contained in:
147
backend/tests/test_lmstudio_backend.py
Normal file
147
backend/tests/test_lmstudio_backend.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""Tests du backend LM Studio (sans reseau ni paquet openai installe).
|
||||
|
||||
On injecte un faux module `openai` dans sys.modules : le backend l'importe
|
||||
paresseusement, on peut donc valider la construction des messages, le parsing de
|
||||
la reponse (content + reasoning_content), le streaming et l'erreur de connexion
|
||||
sans dependance ni serveur.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
import inkflow.analysis.llm.lmstudio_backend as lm
|
||||
from inkflow.analysis.llm._text import _extract_json, _strip_reasoning
|
||||
from inkflow.analysis.llm.lmstudio_backend import LMStudioBackend
|
||||
|
||||
|
||||
class _FakeAPIConnectionError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def fake_openai(monkeypatch):
|
||||
"""Faux module openai (APIConnectionError + OpenAI) injecte dans sys.modules."""
|
||||
mod = types.ModuleType("openai")
|
||||
mod.APIConnectionError = _FakeAPIConnectionError
|
||||
mod.OpenAI = lambda **kw: None # jamais utilise (on injecte _client a la main)
|
||||
monkeypatch.setitem(sys.modules, "openai", mod)
|
||||
return mod
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def settings(monkeypatch):
|
||||
"""Reglages controles (defaut : delegation a LM Studio) sans lire le disque."""
|
||||
state = SimpleNamespace(lmstudio_defer_config=True,
|
||||
lmstudio_base_url="http://127.0.0.1:1234/v1")
|
||||
monkeypatch.setattr(lm, "get_settings", lambda: state)
|
||||
return state
|
||||
|
||||
|
||||
def _message(content, reasoning=None):
|
||||
msg = SimpleNamespace(content=content, reasoning_content=reasoning)
|
||||
return SimpleNamespace(choices=[SimpleNamespace(message=msg)])
|
||||
|
||||
|
||||
class _FakeCompletions:
|
||||
"""Capture les kwargs et renvoie une reponse (ou leve) preprogrammee."""
|
||||
|
||||
def __init__(self, *, response=None, stream=None, raises=None):
|
||||
self.response, self.stream, self.raises = response, stream, raises
|
||||
self.kwargs = None
|
||||
|
||||
def create(self, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
if self.raises is not None:
|
||||
raise self.raises
|
||||
return self.stream if kwargs.get("stream") else self.response
|
||||
|
||||
|
||||
def _client(completions):
|
||||
return SimpleNamespace(chat=SimpleNamespace(completions=completions))
|
||||
|
||||
|
||||
def _backend(completions, *, model="m"):
|
||||
b = LMStudioBackend(model)
|
||||
b._client = _client(completions) # court-circuite _ensure_client (pas d'openai reel)
|
||||
return b
|
||||
|
||||
|
||||
def test_non_stream_content_delegue_la_config(settings):
|
||||
# Par defaut on DELEGUE a LM Studio : ni temperature ni max_tokens imposes
|
||||
# (sinon on tronquait la reponse / on ecrasait la config du modele).
|
||||
comp = _FakeCompletions(response=_message('{"speaker": "Marie"}'))
|
||||
b = _backend(comp)
|
||||
out = b.complete(
|
||||
[{"role": "system", "content": "sys"}, {"role": "user", "content": "u"}],
|
||||
max_tokens=128, temperature=0.1, reasoning=False)
|
||||
assert _extract_json(out) == {"speaker": "Marie"}
|
||||
assert comp.kwargs["model"] == "m"
|
||||
assert comp.kwargs["messages"][0]["role"] == "system"
|
||||
assert "temperature" not in comp.kwargs # delegue a LM Studio
|
||||
assert "max_tokens" not in comp.kwargs
|
||||
|
||||
|
||||
def test_non_stream_params_imposes_si_delegation_off(settings):
|
||||
# lmstudio_defer_config=False -> on reimpose les reglages InkFlow.
|
||||
settings.lmstudio_defer_config = False
|
||||
comp = _FakeCompletions(response=_message('{"speaker": "Marie"}'))
|
||||
b = _backend(comp)
|
||||
b.complete([{"role": "user", "content": "u"}],
|
||||
max_tokens=128, temperature=0.1, reasoning=False)
|
||||
assert comp.kwargs["temperature"] == 0.1
|
||||
assert comp.kwargs["max_tokens"] == 128
|
||||
|
||||
|
||||
def test_reasoning_content_exclu_du_retour():
|
||||
# LM Studio separe la pensee (reasoning_content) de la reponse (content,
|
||||
# propre). Le retour ne doit contenir QUE content : un JSON d'exemple present
|
||||
# dans la pensee ne doit pas etre capte a la place de la vraie reponse.
|
||||
comp = _FakeCompletions(
|
||||
response=_message('{"capitale": "Paris"}',
|
||||
reasoning='exemple parasite: {"capitale": "Londres"}'))
|
||||
b = _backend(comp)
|
||||
out = b.complete([{"role": "user", "content": "u"}],
|
||||
max_tokens=128, temperature=0.0, reasoning=False)
|
||||
assert _extract_json(out) == {"capitale": "Paris"}
|
||||
assert "parasite" not in out
|
||||
|
||||
|
||||
def test_streaming_token_sink():
|
||||
def _delta(content=None, reasoning=None):
|
||||
return SimpleNamespace(choices=[SimpleNamespace(
|
||||
delta=SimpleNamespace(content=content, reasoning_content=reasoning))])
|
||||
chunks = [_delta(reasoning="je pense "), _delta(content='{"a"'), _delta(content=": 1}")]
|
||||
comp = _FakeCompletions(stream=iter(chunks))
|
||||
b = _backend(comp)
|
||||
seen = []
|
||||
out = b.complete([{"role": "user", "content": "u"}], max_tokens=64,
|
||||
temperature=0.1, reasoning=False, token_sink=seen.append)
|
||||
assert comp.kwargs["stream"] is True
|
||||
assert _extract_json(out) == {"a": 1}
|
||||
assert "je pense" not in out # la pensee est exclue du retour
|
||||
assert "je pense" in "".join(seen) # mais diffusee au sink (affichage)
|
||||
|
||||
|
||||
def test_erreur_connexion_message_clair():
|
||||
comp = _FakeCompletions(raises=_FakeAPIConnectionError("refused"))
|
||||
b = _backend(comp)
|
||||
with pytest.raises(RuntimeError) as exc:
|
||||
b.complete([{"role": "user", "content": "u"}], max_tokens=64,
|
||||
temperature=0.1, reasoning=False)
|
||||
assert "LM Studio injoignable" in str(exc.value)
|
||||
|
||||
|
||||
def test_resolve_modele_actif_si_ref_vide():
|
||||
comp = _FakeCompletions(response=_message("{}"))
|
||||
client = _client(comp)
|
||||
client.models = SimpleNamespace(
|
||||
list=lambda: SimpleNamespace(data=[SimpleNamespace(id="gemma-4")]))
|
||||
b = LMStudioBackend("") # ref vide -> doit prendre le 1er modele charge
|
||||
b._client = client
|
||||
b.complete([{"role": "user", "content": "u"}], max_tokens=64,
|
||||
temperature=0.1, reasoning=False)
|
||||
assert comp.kwargs["model"] == "gemma-4"
|
||||
Reference in New Issue
Block a user