refactor(ai): OCR sidecar canonical naming cleanup — typhoon→np-dms, remove hardcoded keys, asyncio.to_thread, ADR-040/041
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
# File: tests/integration/ocr-sidecar/test_active_prompt.py
|
||||
# Change Log:
|
||||
# - 2026-06-20: Initial creation for US3 active prompt integration tests.
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
UNIT_DIR = Path(__file__).resolve().parents[2] / "unit" / "ocr-sidecar"
|
||||
if str(UNIT_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(UNIT_DIR))
|
||||
|
||||
from test_path_traversal import FakeDocument, load_app
|
||||
|
||||
|
||||
class FakeAsyncResponse:
|
||||
def raise_for_status(self) -> None:
|
||||
return None
|
||||
|
||||
def json(self) -> dict:
|
||||
return {"choices": [{"message": {"content": "{\"natural_text\": \"prompt result\"}"}}]}
|
||||
|
||||
|
||||
class FakeAsyncClient:
|
||||
last_payload = None
|
||||
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
pass
|
||||
|
||||
async def post(self, url: str, json: dict, headers: dict) -> FakeAsyncResponse:
|
||||
FakeAsyncClient.last_payload = json
|
||||
return FakeAsyncResponse()
|
||||
|
||||
async def aclose(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def test_ocr_injects_system_prompt_and_dms_tags(tmp_path: Path) -> None:
|
||||
upload_base = tmp_path / "uploads"
|
||||
upload_base.mkdir()
|
||||
pdf_path = upload_base / "document.pdf"
|
||||
pdf_path.write_bytes(b"%PDF-1.4\n")
|
||||
|
||||
app_module = load_app(upload_base)
|
||||
client = TestClient(app_module.app)
|
||||
|
||||
decision = SimpleNamespace(keep_alive_seconds=120, reason="headroom-sufficient", vram_headroom_mb=9000.0)
|
||||
fake_client = FakeAsyncClient()
|
||||
FakeAsyncClient.last_payload = None
|
||||
|
||||
# Prepare dummy message structure
|
||||
initial_messages = [{"role": "user", "content": [{"type": "text", "text": "OCR Page content"}]}]
|
||||
|
||||
with patch.object(app_module, "calculate_ocr_residency", return_value=decision), \
|
||||
patch.object(app_module, "prepare_ocr_messages", return_value=initial_messages), \
|
||||
patch.object(app_module.fitz, "open", return_value=FakeDocument()), \
|
||||
patch.object(app_module, "ollama_client", fake_client):
|
||||
|
||||
response = client.post(
|
||||
"/ocr",
|
||||
json={
|
||||
"pdfPath": str(pdf_path),
|
||||
"engine": "np-dms-ocr",
|
||||
"system_prompt": "Custom system instruction",
|
||||
"dms_tags": {
|
||||
"document_number": "true",
|
||||
"document_date": "true"
|
||||
},
|
||||
"runtime_params": {
|
||||
"temperature": 0.1,
|
||||
"top_p": 0.5,
|
||||
"repeat_penalty": 1.0,
|
||||
"max_tokens": 4096
|
||||
}
|
||||
},
|
||||
headers={"X-API-Key": "test-key"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify the message content in last payload sent to Ollama
|
||||
sent_messages = FakeAsyncClient.last_payload["messages"]
|
||||
|
||||
# We expect system_prompt to be appended to messages[0]["content"]
|
||||
content_list = sent_messages[0]["content"]
|
||||
|
||||
# Verify system prompt exists
|
||||
system_prompt_found = any(c.get("type") == "text" and c.get("text") == "Custom system instruction" for c in content_list)
|
||||
assert system_prompt_found, "System prompt was not injected into message content"
|
||||
|
||||
# Verify DMS tags instruction exists
|
||||
dms_tags_instruction = any(c.get("type") == "text" and "<document_number>" in c.get("text") and "<document_date>" in c.get("text") for c in content_list)
|
||||
assert dms_tags_instruction, "DMS tags instructions were not injected correctly"
|
||||
@@ -0,0 +1,129 @@
|
||||
# File: tests/integration/ocr-sidecar/test_async_performance.py
|
||||
# Change Log:
|
||||
# - 2026-06-20: Added ADR-040 US4 async I/O performance tests for process_ocr and lifespan.
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
UNIT_DIR = Path(__file__).resolve().parents[2] / "unit" / "ocr-sidecar"
|
||||
if str(UNIT_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(UNIT_DIR))
|
||||
|
||||
from test_path_traversal import load_app
|
||||
|
||||
|
||||
class FakeAsyncResponse:
|
||||
"""จำลอง httpx.AsyncClient response"""
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
return None
|
||||
|
||||
def json(self) -> dict:
|
||||
return {"choices": [{"message": {"content": '{"natural_text": "ok"}'}}]}
|
||||
|
||||
|
||||
class FakeAsyncClient:
|
||||
"""จำลอง httpx.AsyncClient สำหรับ async process_ocr"""
|
||||
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
self.payload = None
|
||||
FakeAsyncClient.last_payload = None
|
||||
|
||||
async def post(self, url: str, json: dict, headers: dict) -> FakeAsyncResponse:
|
||||
self.payload = json
|
||||
FakeAsyncClient.last_payload = json
|
||||
return FakeAsyncResponse()
|
||||
|
||||
async def aclose(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
FakeAsyncClient.last_payload = None
|
||||
|
||||
|
||||
def test_process_ocr_is_coroutine_function(tmp_path: Path) -> None:
|
||||
"""T042: process_ocr ต้องเป็น async def (coroutine function)"""
|
||||
app_module = load_app(tmp_path)
|
||||
assert inspect.iscoroutinefunction(app_module.process_ocr), (
|
||||
"process_ocr must be async def per ADR-040 US4"
|
||||
)
|
||||
|
||||
|
||||
def test_process_pdf_doc_is_coroutine_function(tmp_path: Path) -> None:
|
||||
"""T042: _process_pdf_doc ต้องเป็น async def เพราะเรียก process_ocr"""
|
||||
app_module = load_app(tmp_path)
|
||||
assert inspect.iscoroutinefunction(app_module._process_pdf_doc), (
|
||||
"_process_pdf_doc must be async def per ADR-040 US4"
|
||||
)
|
||||
|
||||
|
||||
def test_app_uses_lifespan_not_startup_event(tmp_path: Path) -> None:
|
||||
"""T045: app ต้องใช้ lifespan context manager ไม่ใช่ @app.on_event('startup')"""
|
||||
app_module = load_app(tmp_path)
|
||||
app_obj = app_module.app
|
||||
# FastAPI เก็บ lifespan ใน app.router.lifespan_context
|
||||
assert hasattr(app_obj.router, "lifespan_context"), (
|
||||
"App must use lifespan parameter, not @app.on_event('startup')"
|
||||
)
|
||||
# ตรวจสอบว่าไม่มี startup event handlers แบบเดิม
|
||||
startup_handlers = app_obj.router.on_startup
|
||||
assert len(startup_handlers) == 0, (
|
||||
"App must not register @app.on_event('startup') handlers"
|
||||
)
|
||||
|
||||
|
||||
def test_app_has_async_client_global(tmp_path: Path) -> None:
|
||||
"""T043: app module ต้องมี ollama_client global สำหรับ AsyncClient"""
|
||||
app_module = load_app(tmp_path)
|
||||
assert hasattr(app_module, "ollama_client"), (
|
||||
"app module must have ollama_client global for shared AsyncClient"
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_endpoint_removed(tmp_path: Path) -> None:
|
||||
"""T054: /normalize endpoint ต้องถูกลบออกแล้ว"""
|
||||
app_module = load_app(tmp_path)
|
||||
routes = [r.path for r in app_module.app.routes]
|
||||
assert "/normalize" not in routes, (
|
||||
"/normalize endpoint must be removed per ADR-040 D2"
|
||||
)
|
||||
|
||||
|
||||
def test_concurrent_ocr_requests_dont_block(tmp_path: Path) -> None:
|
||||
"""T041: concurrent OCR requests ต้องไม่ block กัน (async I/O)"""
|
||||
app_module = load_app(tmp_path)
|
||||
|
||||
decision = SimpleNamespace(
|
||||
keep_alive_seconds=60,
|
||||
reason="headroom-sufficient",
|
||||
vram_headroom_mb=9000.0,
|
||||
)
|
||||
|
||||
fake_client = FakeAsyncClient()
|
||||
|
||||
async def run_concurrent() -> list[str]:
|
||||
"""รัน process_ocr 3 ครั้งพร้อมกัน วัดว่าไม่ block"""
|
||||
with (
|
||||
patch.object(app_module, "calculate_ocr_residency", return_value=decision),
|
||||
patch.object(app_module, "prepare_ocr_messages", return_value=[{"content": []}]),
|
||||
patch.object(app_module, "ollama_client", fake_client),
|
||||
):
|
||||
tasks = [
|
||||
app_module.process_ocr("/tmp/test.pdf", page_num=i + 1)
|
||||
for i in range(3)
|
||||
]
|
||||
results = await asyncio.gather(*tasks)
|
||||
return results
|
||||
|
||||
results = asyncio.run(run_concurrent())
|
||||
assert len(results) == 3
|
||||
assert all(r == "ok" for r in results)
|
||||
# ทุก request ต้องส่ง payload ได้สำเร็จ
|
||||
assert FakeAsyncClient.last_payload is not None
|
||||
assert FakeAsyncClient.last_payload["keep_alive"] == 60
|
||||
@@ -0,0 +1,49 @@
|
||||
# File: tests/integration/ocr-sidecar/test_cpu_fallback.py
|
||||
# Change Log:
|
||||
# - 2026-06-20: Added ADR-040 CPU fallback integration coverage for retrieval endpoints.
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
import sys
|
||||
|
||||
UNIT_DIR = Path(__file__).resolve().parents[2] / "unit" / "ocr-sidecar"
|
||||
if str(UNIT_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(UNIT_DIR))
|
||||
|
||||
from test_path_traversal import load_app
|
||||
|
||||
|
||||
def test_embed_uses_cpu_when_vram_headroom_is_low(tmp_path: Path) -> None:
|
||||
app_module = load_app(tmp_path)
|
||||
client = TestClient(app_module.app)
|
||||
bge_model = MagicMock()
|
||||
bge_model.encode.return_value = {
|
||||
"dense_vecs": [[0.1, 0.2]],
|
||||
"lexical_weights": [{"101": 0.5}],
|
||||
}
|
||||
headroom = MagicMock(total_mb=16384.0, used_mb=15000.0, available_mb=1000.0, query_success=True)
|
||||
with patch.object(app_module, "bge_model", bge_model), patch.object(app_module, "get_vram_headroom", return_value=headroom):
|
||||
response = client.post("/embed", json={"text": "hello"}, headers={"X-API-Key": "test-key"})
|
||||
assert response.status_code == 200
|
||||
assert response.json()["device"] == "cpu"
|
||||
bge_model.model.to.assert_called_with("cpu")
|
||||
|
||||
|
||||
def test_rerank_uses_cpu_when_vram_headroom_is_low(tmp_path: Path) -> None:
|
||||
app_module = load_app(tmp_path)
|
||||
client = TestClient(app_module.app)
|
||||
reranker = MagicMock()
|
||||
reranker.compute_score.return_value = [0.9]
|
||||
headroom = MagicMock(total_mb=16384.0, used_mb=15000.0, available_mb=1000.0, query_success=True)
|
||||
with patch.object(app_module, "reranker", reranker), patch.object(app_module, "get_vram_headroom", return_value=headroom):
|
||||
response = client.post(
|
||||
"/rerank",
|
||||
json={"query": "q", "chunks": ["chunk"]},
|
||||
headers={"X-API-Key": "test-key"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["device"] == "cpu"
|
||||
reranker.model.to.assert_called_with("cpu")
|
||||
@@ -0,0 +1,81 @@
|
||||
# File: tests/integration/ocr-sidecar/test_parameter_governance.py
|
||||
# Change Log:
|
||||
# - 2026-06-20: Initial creation for US3 parameter governance integration tests.
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
UNIT_DIR = Path(__file__).resolve().parents[2] / "unit" / "ocr-sidecar"
|
||||
if str(UNIT_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(UNIT_DIR))
|
||||
|
||||
from test_path_traversal import FakeDocument, load_app
|
||||
|
||||
|
||||
class FakeAsyncResponse:
|
||||
def raise_for_status(self) -> None:
|
||||
return None
|
||||
|
||||
def json(self) -> dict:
|
||||
return {"choices": [{"message": {"content": "{\"natural_text\": \"governed result\"}"}}]}
|
||||
|
||||
|
||||
class FakeAsyncClient:
|
||||
last_payload = None
|
||||
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
pass
|
||||
|
||||
async def post(self, url: str, json: dict, headers: dict) -> FakeAsyncResponse:
|
||||
FakeAsyncClient.last_payload = json
|
||||
return FakeAsyncResponse()
|
||||
|
||||
async def aclose(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def test_ocr_uses_governed_runtime_parameters(tmp_path: Path) -> None:
|
||||
upload_base = tmp_path / "uploads"
|
||||
upload_base.mkdir()
|
||||
pdf_path = upload_base / "document.pdf"
|
||||
pdf_path.write_bytes(b"%PDF-1.4\n")
|
||||
|
||||
app_module = load_app(upload_base)
|
||||
client = TestClient(app_module.app)
|
||||
|
||||
decision = SimpleNamespace(keep_alive_seconds=120, reason="headroom-sufficient", vram_headroom_mb=9000.0)
|
||||
fake_client = FakeAsyncClient()
|
||||
FakeAsyncClient.last_payload = None
|
||||
|
||||
with patch.object(app_module, "calculate_ocr_residency", return_value=decision), \
|
||||
patch.object(app_module, "prepare_ocr_messages", return_value=[{"content": []}]), \
|
||||
patch.object(app_module.fitz, "open", return_value=FakeDocument()), \
|
||||
patch.object(app_module, "ollama_client", fake_client):
|
||||
|
||||
response = client.post(
|
||||
"/ocr",
|
||||
json={
|
||||
"pdfPath": str(pdf_path),
|
||||
"engine": "np-dms-ocr",
|
||||
"runtime_params": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.9,
|
||||
"repeat_penalty": 1.1,
|
||||
"max_tokens": 4096
|
||||
}
|
||||
},
|
||||
headers={"X-API-Key": "test-key"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["text"] == "governed result"
|
||||
|
||||
# Check that parameters were passed to Ollama payload
|
||||
assert FakeAsyncClient.last_payload["temperature"] == 0.7
|
||||
assert FakeAsyncClient.last_payload["top_p"] == 0.9
|
||||
assert FakeAsyncClient.last_payload["repetition_penalty"] == 1.1
|
||||
assert FakeAsyncClient.last_payload["max_tokens"] == 4096
|
||||
@@ -0,0 +1,42 @@
|
||||
# File: tests/unit/ocr-sidecar/test_api_key_validation.py
|
||||
# Change Log:
|
||||
# - 2026-06-20: Added ADR-040 API key startup and request validation tests.
|
||||
|
||||
import importlib
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from test_path_traversal import SIDECAR_DIR, install_import_stubs, load_app
|
||||
|
||||
|
||||
def test_sidecar_fails_fast_when_api_key_missing(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
install_import_stubs()
|
||||
monkeypatch.delenv("OCR_SIDECAR_API_KEY", raising=False)
|
||||
monkeypatch.setenv("OCR_SIDECAR_UPLOAD_BASE", str(tmp_path))
|
||||
if str(SIDECAR_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(SIDECAR_DIR))
|
||||
sys.modules.pop("app", None)
|
||||
with pytest.raises(RuntimeError, match="OCR_SIDECAR_API_KEY is required"):
|
||||
importlib.import_module("app")
|
||||
|
||||
|
||||
def test_sidecar_rejects_invalid_api_key(tmp_path: Path) -> None:
|
||||
app_module = load_app(tmp_path)
|
||||
client = TestClient(app_module.app)
|
||||
response = client.post(
|
||||
"/embed",
|
||||
json={"text": "hello"},
|
||||
headers={"X-API-Key": "wrong-key"},
|
||||
)
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
def test_sidecar_rejects_missing_api_key(tmp_path: Path) -> None:
|
||||
app_module = load_app(tmp_path)
|
||||
client = TestClient(app_module.app)
|
||||
response = client.post("/embed", json={"text": "hello"})
|
||||
assert response.status_code == 401
|
||||
@@ -0,0 +1,114 @@
|
||||
# File: tests/unit/ocr-sidecar/test_path_traversal.py
|
||||
# Change Log:
|
||||
# - 2026-06-20: Added ADR-040 path traversal tests for OCR sidecar.
|
||||
|
||||
import importlib
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
SIDECAR_DIR = Path(__file__).resolve().parents[3] / "specs" / "04-Infrastructure-OPS" / "04-00-docker-compose" / "Desk-5439" / "ocr-sidecar"
|
||||
|
||||
|
||||
def install_import_stubs() -> None:
|
||||
"""ติดตั้ง stub สำหรับ dependency หนักเพื่อให้ unit test import app ได้เร็ว"""
|
||||
fitz_module = types.ModuleType("fitz")
|
||||
fitz_module.Document = object
|
||||
fitz_module.open = lambda *args, **kwargs: None
|
||||
sys.modules["fitz"] = fitz_module
|
||||
typhoon_module = types.ModuleType("typhoon_ocr")
|
||||
typhoon_module.prepare_ocr_messages = lambda *args, **kwargs: [{"content": []}]
|
||||
sys.modules["typhoon_ocr"] = typhoon_module
|
||||
flag_module = types.ModuleType("FlagEmbedding")
|
||||
flag_module.BGEM3FlagModel = lambda *args, **kwargs: None
|
||||
flag_module.FlagReranker = lambda *args, **kwargs: None
|
||||
sys.modules["FlagEmbedding"] = flag_module
|
||||
pil_module = types.ModuleType("PIL")
|
||||
pil_image_module = types.ModuleType("PIL.Image")
|
||||
pil_module.Image = pil_image_module
|
||||
sys.modules["PIL"] = pil_module
|
||||
sys.modules["PIL.Image"] = pil_image_module
|
||||
pythainlp_module = types.ModuleType("pythainlp")
|
||||
tokenize_module = types.ModuleType("pythainlp.tokenize")
|
||||
tokenize_module.word_tokenize = lambda text, **kwargs: text.split()
|
||||
util_module = types.ModuleType("pythainlp.util")
|
||||
util_module.normalize = lambda text: text
|
||||
sys.modules["pythainlp"] = pythainlp_module
|
||||
sys.modules["pythainlp.tokenize"] = tokenize_module
|
||||
sys.modules["pythainlp.util"] = util_module
|
||||
|
||||
|
||||
def load_app(upload_base: Path):
|
||||
install_import_stubs()
|
||||
os.environ["OCR_SIDECAR_API_KEY"] = "test-key"
|
||||
os.environ["OCR_SIDECAR_UPLOAD_BASE"] = str(upload_base)
|
||||
if str(SIDECAR_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(SIDECAR_DIR))
|
||||
sys.modules.pop("app", None)
|
||||
return importlib.import_module("app")
|
||||
|
||||
|
||||
class FakePage:
|
||||
def get_text(self) -> str:
|
||||
return "A" * 120
|
||||
|
||||
|
||||
class FakeDocument:
|
||||
name = "fake.pdf"
|
||||
|
||||
def __len__(self) -> int:
|
||||
return 1
|
||||
|
||||
def __getitem__(self, index: int) -> FakePage:
|
||||
return FakePage()
|
||||
|
||||
|
||||
def test_ocr_rejects_parent_traversal_outside_upload_base(tmp_path: Path) -> None:
|
||||
upload_base = tmp_path / "uploads"
|
||||
upload_base.mkdir()
|
||||
app_module = load_app(upload_base)
|
||||
client = TestClient(app_module.app)
|
||||
outside_path = upload_base / ".." / "outside.pdf"
|
||||
response = client.post(
|
||||
"/ocr",
|
||||
json={"pdfPath": str(outside_path)},
|
||||
headers={"X-API-Key": "test-key"},
|
||||
)
|
||||
assert response.status_code == 403
|
||||
|
||||
|
||||
def test_ocr_rejects_prefix_sibling_path(tmp_path: Path) -> None:
|
||||
upload_base = tmp_path / "uploads"
|
||||
sibling = tmp_path / "uploads_evil"
|
||||
upload_base.mkdir()
|
||||
sibling.mkdir()
|
||||
app_module = load_app(upload_base)
|
||||
client = TestClient(app_module.app)
|
||||
response = client.post(
|
||||
"/ocr",
|
||||
json={"pdfPath": str(sibling / "document.pdf")},
|
||||
headers={"X-API-Key": "test-key"},
|
||||
)
|
||||
assert response.status_code == 403
|
||||
|
||||
|
||||
def test_ocr_accepts_canonical_path_inside_upload_base(tmp_path: Path) -> None:
|
||||
upload_base = tmp_path / "uploads"
|
||||
upload_base.mkdir()
|
||||
pdf_path = upload_base / "document.pdf"
|
||||
pdf_path.write_bytes(b"%PDF-1.4\n")
|
||||
app_module = load_app(upload_base)
|
||||
client = TestClient(app_module.app)
|
||||
with patch.object(app_module.fitz, "open", return_value=FakeDocument()):
|
||||
response = client.post(
|
||||
"/ocr",
|
||||
json={"pdfPath": str(pdf_path)},
|
||||
headers={"X-API-Key": "test-key"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["engineUsed"] == "fast-path"
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
# File: tests/unit/ocr-sidecar/test_residency_wiring.py
|
||||
# Change Log:
|
||||
# - 2026-06-20: Added ADR-040 residency wiring tests for process_ocr.
|
||||
# - 2026-06-20: Updated for async process_ocr (Phase 6 — async I/O refactor).
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from test_path_traversal import load_app
|
||||
|
||||
|
||||
class FakeAsyncResponse:
|
||||
"""จำลอง httpx.AsyncClient response สำหรับ async process_ocr"""
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
return None
|
||||
|
||||
def json(self) -> dict:
|
||||
return {"choices": [{"message": {"content": "{\"natural_text\": \"ok\"}"}}]}
|
||||
|
||||
|
||||
class FakeAsyncClient:
|
||||
"""จำลอง httpx.AsyncClient สำหรับ async process_ocr"""
|
||||
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
self.payload = None
|
||||
FakeAsyncClient.last_payload = None
|
||||
|
||||
async def post(self, url: str, json: dict, headers: dict) -> FakeAsyncResponse:
|
||||
self.payload = json
|
||||
FakeAsyncClient.last_payload = json
|
||||
return FakeAsyncResponse()
|
||||
|
||||
async def aclose(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
FakeAsyncClient.last_payload = None
|
||||
|
||||
|
||||
def test_process_ocr_uses_calculated_residency_keep_alive(tmp_path: Path) -> None:
|
||||
"""T019: process_ocr ต้องเรียก calculate_ocr_residency และใช้ค่า keep_alive ที่คำนวณได้"""
|
||||
app_module = load_app(tmp_path)
|
||||
decision = SimpleNamespace(keep_alive_seconds=120, reason="headroom-sufficient", vram_headroom_mb=9000.0)
|
||||
fake_client = FakeAsyncClient()
|
||||
with patch.object(app_module, "calculate_ocr_residency", return_value=decision) as calculate, \
|
||||
patch.object(app_module, "prepare_ocr_messages", return_value=[{"content": []}]), \
|
||||
patch.object(app_module, "ollama_client", fake_client):
|
||||
result = asyncio.run(app_module.process_ocr("/tmp/test.pdf", page_num=1))
|
||||
assert result == "ok"
|
||||
calculate.assert_called_once_with(app_module.OCR_ACTIVE_PROFILE)
|
||||
assert FakeAsyncClient.last_payload["keep_alive"] == 120
|
||||
|
||||
|
||||
def test_process_ocr_rejects_backend_keep_alive_override(tmp_path: Path) -> None:
|
||||
"""T021: process_ocr ต้องปฏิเสธ keep_alive จาก backend"""
|
||||
app_module = load_app(tmp_path)
|
||||
|
||||
async def run_test():
|
||||
with pytest.raises(ValueError, match="keep_alive must be calculated"):
|
||||
await app_module.process_ocr("/tmp/test.pdf", options_override={"keep_alive": 0})
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
|
||||
def test_ocr_endpoint_rejects_keep_alive_override(tmp_path: Path) -> None:
|
||||
"""T021: /ocr endpoint ต้องปฏิเสธ keep_alive ใน request body"""
|
||||
app_module = load_app(tmp_path)
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
client = TestClient(app_module.app)
|
||||
response = client.post(
|
||||
"/ocr",
|
||||
json={"pdfPath": str(tmp_path / "document.pdf"), "keep_alive": 0},
|
||||
headers={"X-API-Key": "test-key"},
|
||||
)
|
||||
assert response.status_code == 400
|
||||
Reference in New Issue
Block a user