"""postprocess.rules / postprocess.llm (urllib monkeypatch).""" from __future__ import annotations import json import pytest from luke_scribe.postprocess import llm, rules def test_rules_normalize(): assert rules.normalize("구글 Embedding Gemma 소개") == "구글 EmbeddingGemma 소개" assert rules.normalize("그대로") == "그대로" def test_llm_not_configured(): with pytest.raises(llm.LLMNotConfigured): llm.correct("x", base_url=None, api_key=None) class _FakeResp: def __init__(self, payload: dict) -> None: self._p = payload def read(self) -> bytes: return json.dumps(self._p).encode() def __enter__(self): return self def __exit__(self, *_a): return False def test_llm_correct_monkeypatched(monkeypatch): def fake_urlopen(_req, timeout=90): # noqa: ARG001 return _FakeResp({"choices": [{"message": {"content": "EmbeddingGemma 복원됨"}}]}) monkeypatch.setattr(llm.urllib.request, "urlopen", fake_urlopen) out = llm.correct("인베딩 점마", base_url="http://x/v1", api_key="k", model="m") assert out == "EmbeddingGemma 복원됨" def test_llm_chunking_and_glossary(monkeypatch): """긴 입력 → 청크 분할 + 러닝 글로서리(작은 컨텍스트 창 대응).""" calls: list[list[dict]] = [] def fake_request(messages, **_kw): calls.append(messages) return messages[1]["content"] # 청크 그대로 echo monkeypatch.setattr(llm, "_request", fake_request) long_text = ". ".join(f"문장{i} EmbeddingGemma 설명" for i in range(400)) out = llm.correct(long_text, base_url="http://x/v1", api_key="k", max_chars=200) assert len(calls) > 1 # 분할됨 assert "EmbeddingGemma" in out # 재조립됨 # 2번째 청크부터 이전에 확정된 영문 표기가 system에 주입됨 assert any("확정된 영문 표기" in m[0]["content"] for m in calls[1:])