"""Tests du backend LM Studio (sans reseau ni paquet openai installe). On injecte un faux module `openai` dans sys.modules : le backend l'importe paresseusement, on peut donc valider la construction des messages, le parsing de la reponse (content + reasoning_content), le streaming et l'erreur de connexion sans dependance ni serveur. """ from __future__ import annotations import sys import types from types import SimpleNamespace import pytest import inkflow.analysis.llm.lmstudio_backend as lm from inkflow.analysis.llm._text import _extract_json, _strip_reasoning from inkflow.analysis.llm.lmstudio_backend import LMStudioBackend class _FakeAPIConnectionError(Exception): pass @pytest.fixture(autouse=True) def fake_openai(monkeypatch): """Faux module openai (APIConnectionError + OpenAI) injecte dans sys.modules.""" mod = types.ModuleType("openai") mod.APIConnectionError = _FakeAPIConnectionError mod.OpenAI = lambda **kw: None # jamais utilise (on injecte _client a la main) monkeypatch.setitem(sys.modules, "openai", mod) return mod @pytest.fixture(autouse=True) def settings(monkeypatch): """Reglages controles (defaut : delegation a LM Studio) sans lire le disque.""" state = SimpleNamespace(lmstudio_defer_config=True, lmstudio_base_url="http://127.0.0.1:1234/v1") monkeypatch.setattr(lm, "get_settings", lambda: state) return state def _message(content, reasoning=None): msg = SimpleNamespace(content=content, reasoning_content=reasoning) return SimpleNamespace(choices=[SimpleNamespace(message=msg)]) class _FakeCompletions: """Capture les kwargs et renvoie une reponse (ou leve) preprogrammee.""" def __init__(self, *, response=None, stream=None, raises=None): self.response, self.stream, self.raises = response, stream, raises self.kwargs = None def create(self, **kwargs): self.kwargs = kwargs if self.raises is not None: raise self.raises return self.stream if kwargs.get("stream") else self.response def _client(completions): return SimpleNamespace(chat=SimpleNamespace(completions=completions)) def _backend(completions, *, model="m"): b = LMStudioBackend(model) b._client = _client(completions) # court-circuite _ensure_client (pas d'openai reel) return b def test_non_stream_content_delegue_la_config(settings): # Par defaut on DELEGUE a LM Studio : ni temperature ni max_tokens imposes # (sinon on tronquait la reponse / on ecrasait la config du modele). comp = _FakeCompletions(response=_message('{"speaker": "Marie"}')) b = _backend(comp) out = b.complete( [{"role": "system", "content": "sys"}, {"role": "user", "content": "u"}], max_tokens=128, temperature=0.1, reasoning=False) assert _extract_json(out) == {"speaker": "Marie"} assert comp.kwargs["model"] == "m" assert comp.kwargs["messages"][0]["role"] == "system" assert "temperature" not in comp.kwargs # delegue a LM Studio assert "max_tokens" not in comp.kwargs def test_non_stream_params_imposes_si_delegation_off(settings): # lmstudio_defer_config=False -> on reimpose les reglages InkFlow. settings.lmstudio_defer_config = False comp = _FakeCompletions(response=_message('{"speaker": "Marie"}')) b = _backend(comp) b.complete([{"role": "user", "content": "u"}], max_tokens=128, temperature=0.1, reasoning=False) assert comp.kwargs["temperature"] == 0.1 assert comp.kwargs["max_tokens"] == 128 def test_reasoning_content_exclu_du_retour(): # LM Studio separe la pensee (reasoning_content) de la reponse (content, # propre). Le retour ne doit contenir QUE content : un JSON d'exemple present # dans la pensee ne doit pas etre capte a la place de la vraie reponse. comp = _FakeCompletions( response=_message('{"capitale": "Paris"}', reasoning='exemple parasite: {"capitale": "Londres"}')) b = _backend(comp) out = b.complete([{"role": "user", "content": "u"}], max_tokens=128, temperature=0.0, reasoning=False) assert _extract_json(out) == {"capitale": "Paris"} assert "parasite" not in out def test_streaming_token_sink(): def _delta(content=None, reasoning=None): return SimpleNamespace(choices=[SimpleNamespace( delta=SimpleNamespace(content=content, reasoning_content=reasoning))]) chunks = [_delta(reasoning="je pense "), _delta(content='{"a"'), _delta(content=": 1}")] comp = _FakeCompletions(stream=iter(chunks)) b = _backend(comp) seen = [] out = b.complete([{"role": "user", "content": "u"}], max_tokens=64, temperature=0.1, reasoning=False, token_sink=seen.append) assert comp.kwargs["stream"] is True assert _extract_json(out) == {"a": 1} assert "je pense" not in out # la pensee est exclue du retour assert "je pense" in "".join(seen) # mais diffusee au sink (affichage) def test_erreur_connexion_message_clair(): comp = _FakeCompletions(raises=_FakeAPIConnectionError("refused")) b = _backend(comp) with pytest.raises(RuntimeError) as exc: b.complete([{"role": "user", "content": "u"}], max_tokens=64, temperature=0.1, reasoning=False) assert "LM Studio injoignable" in str(exc.value) def test_resolve_modele_actif_si_ref_vide(): comp = _FakeCompletions(response=_message("{}")) client = _client(comp) client.models = SimpleNamespace( list=lambda: SimpleNamespace(data=[SimpleNamespace(id="gemma-4")])) b = LMStudioBackend("") # ref vide -> doit prendre le 1er modele charge b._client = client b.complete([{"role": "user", "content": "u"}], max_tokens=64, temperature=0.1, reasoning=False) assert comp.kwargs["model"] == "gemma-4"