Initial commit: InkFlow — EPUB vers livre audio local (MLX/Kokoro)
This commit is contained in:
59
backend/inkflow/analysis/pronunciation.py
Normal file
59
backend/inkflow/analysis/pronunciation.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""Dictionnaire de prononciation : application + proposition de candidats.
|
||||
|
||||
L'application est une simple reecriture de surface du texte (graphie guidee)
|
||||
avant synthese. Les candidats (noms propres, termes SF) peuvent etre proposes
|
||||
par Gemma puis valides par l'utilisateur dans l'UI.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Iterable
|
||||
|
||||
from ..models import Pronunciation, PronunciationEntry
|
||||
from ..settings import get_settings
|
||||
from .gemma import Gemma
|
||||
|
||||
|
||||
def apply_pronunciation(text: str, pron: Pronunciation) -> str:
|
||||
"""Remplace chaque terme actif par sa graphie phonetique (mot entier)."""
|
||||
for entry in pron.entries:
|
||||
if not entry.enabled or not entry.term:
|
||||
continue
|
||||
pattern = re.compile(rf"\b{re.escape(entry.term)}\b")
|
||||
text = pattern.sub(entry.replacement, text)
|
||||
return text
|
||||
|
||||
|
||||
# Le prompt systeme est editable dans les reglages (settings.prompt_pronunciation).
|
||||
|
||||
|
||||
def propose_pronunciations(text: str, gemma: Gemma, *, max_chars: int = 16000) -> list[PronunciationEntry]:
|
||||
"""Propose des candidats de prononciation a valider."""
|
||||
sample = text[:max_chars]
|
||||
prompt = (
|
||||
"Repere dans cet extrait les mots a risque de mauvaise prononciation par "
|
||||
"une voix de synthese francaise. Pour chacun, propose une graphie "
|
||||
"phonetique francaise (replacement) qui guide la prononciation.\n\n"
|
||||
f"EXTRAIT:\n{sample}\n\n"
|
||||
'Reponds par un tableau JSON: '
|
||||
'[{"term":"Tiamat","replacement":"Tia-matt","note":"nom propre"}]'
|
||||
)
|
||||
result = gemma.generate_json(prompt, system=get_settings().prompt_pronunciation)
|
||||
entries: list[PronunciationEntry] = []
|
||||
for item in result:
|
||||
if isinstance(item, dict) and item.get("term") and item.get("replacement"):
|
||||
entries.append(PronunciationEntry(
|
||||
term=str(item["term"]).strip(),
|
||||
replacement=str(item["replacement"]).strip(),
|
||||
note=item.get("note"),
|
||||
))
|
||||
return entries
|
||||
|
||||
|
||||
def merge_pronunciations(
|
||||
existing: Pronunciation, new: Iterable[PronunciationEntry]
|
||||
) -> Pronunciation:
|
||||
by_term = {e.term.lower(): e for e in existing.entries}
|
||||
for e in new:
|
||||
by_term.setdefault(e.term.lower(), e)
|
||||
return Pronunciation(entries=list(by_term.values()))
|
||||
Reference in New Issue
Block a user