chore: update OCR tests and documentation (refs #121)
Some checks failed
Deploy to Staging / Build Images (pull_request) Failing after 7m4s
Deploy to Staging / Deploy to Staging (pull_request) Has been skipped
Deploy to Staging / Verify Staging (pull_request) Has been skipped
Deploy to Staging / Notify Staging Ready (pull_request) Has been skipped
Deploy to Staging / Notify Staging Failure (pull_request) Successful in 7s
Some checks failed
Deploy to Staging / Build Images (pull_request) Failing after 7m4s
Deploy to Staging / Deploy to Staging (pull_request) Has been skipped
Deploy to Staging / Verify Staging (pull_request) Has been skipped
Deploy to Staging / Notify Staging Ready (pull_request) Has been skipped
Deploy to Staging / Notify Staging Failure (pull_request) Successful in 7s
Add engine abstraction tests and update docs to reflect PaddleOCR primary architecture with optional Google Vision cloud fallback. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
675
ocr/tests/test_engine_abstraction.py
Normal file
675
ocr/tests/test_engine_abstraction.py
Normal file
@@ -0,0 +1,675 @@
|
||||
"""Tests for OCR engine abstraction layer.
|
||||
|
||||
Covers: base types, exception hierarchy, PaddleOcrEngine,
|
||||
TesseractEngine, CloudEngine, HybridEngine, and engine_factory.
|
||||
"""
|
||||
|
||||
import io
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from PIL import Image
|
||||
|
||||
from app.engines.base_engine import (
|
||||
EngineError,
|
||||
EngineProcessingError,
|
||||
EngineUnavailableError,
|
||||
OcrConfig,
|
||||
OcrEngine,
|
||||
OcrEngineResult,
|
||||
WordBox,
|
||||
)
|
||||
|
||||
|
||||
# --- Helpers ---
|
||||
|
||||
|
||||
def _create_test_image_bytes() -> bytes:
|
||||
"""Create minimal PNG image bytes for engine testing."""
|
||||
img = Image.new("RGB", (100, 50), (255, 255, 255))
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="PNG")
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def _make_result(
|
||||
text: str, confidence: float, engine_name: str
|
||||
) -> OcrEngineResult:
|
||||
"""Create a minimal OcrEngineResult for testing."""
|
||||
return OcrEngineResult(
|
||||
text=text, confidence=confidence, word_boxes=[], engine_name=engine_name
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Exception hierarchy
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestExceptionHierarchy:
|
||||
"""Engine errors form a proper hierarchy under EngineError."""
|
||||
|
||||
def test_unavailable_is_engine_error(self) -> None:
|
||||
assert issubclass(EngineUnavailableError, EngineError)
|
||||
|
||||
def test_processing_is_engine_error(self) -> None:
|
||||
assert issubclass(EngineProcessingError, EngineError)
|
||||
|
||||
def test_engine_error_is_exception(self) -> None:
|
||||
assert issubclass(EngineError, Exception)
|
||||
|
||||
def test_catch_base_catches_subtypes(self) -> None:
|
||||
with pytest.raises(EngineError):
|
||||
raise EngineUnavailableError("not installed")
|
||||
with pytest.raises(EngineError):
|
||||
raise EngineProcessingError("OCR failed")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestWordBox:
|
||||
def test_default_positions(self) -> None:
|
||||
wb = WordBox(text="VIN", confidence=0.95)
|
||||
assert wb.x == 0
|
||||
assert wb.y == 0
|
||||
assert wb.width == 0
|
||||
assert wb.height == 0
|
||||
|
||||
def test_all_fields(self) -> None:
|
||||
wb = WordBox(text="ABC", confidence=0.88, x=10, y=20, width=100, height=30)
|
||||
assert wb.text == "ABC"
|
||||
assert wb.confidence == 0.88
|
||||
assert wb.x == 10
|
||||
assert wb.width == 100
|
||||
|
||||
|
||||
class TestOcrConfig:
|
||||
def test_defaults(self) -> None:
|
||||
config = OcrConfig()
|
||||
assert config.char_whitelist is None
|
||||
assert config.single_line is False
|
||||
assert config.single_word is False
|
||||
assert config.use_angle_cls is True
|
||||
assert config.hints == {}
|
||||
|
||||
def test_vin_whitelist_excludes_ioq(self) -> None:
|
||||
whitelist = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
|
||||
config = OcrConfig(char_whitelist=whitelist)
|
||||
assert "I" not in config.char_whitelist
|
||||
assert "O" not in config.char_whitelist
|
||||
assert "Q" not in config.char_whitelist
|
||||
|
||||
def test_hints_are_independent_across_instances(self) -> None:
|
||||
c1 = OcrConfig()
|
||||
c2 = OcrConfig()
|
||||
c1.hints["psm"] = 7
|
||||
assert "psm" not in c2.hints
|
||||
|
||||
|
||||
class TestOcrEngineResult:
|
||||
def test_construction(self) -> None:
|
||||
result = OcrEngineResult(
|
||||
text="1HGBH41JXMN109186",
|
||||
confidence=0.94,
|
||||
word_boxes=[WordBox(text="1HGBH41JXMN109186", confidence=0.94)],
|
||||
engine_name="paddleocr",
|
||||
)
|
||||
assert result.text == "1HGBH41JXMN109186"
|
||||
assert result.confidence == 0.94
|
||||
assert len(result.word_boxes) == 1
|
||||
assert result.engine_name == "paddleocr"
|
||||
|
||||
def test_empty_result(self) -> None:
|
||||
result = OcrEngineResult(
|
||||
text="", confidence=0.0, word_boxes=[], engine_name="tesseract"
|
||||
)
|
||||
assert result.text == ""
|
||||
assert result.word_boxes == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OcrEngine ABC
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestOcrEngineABC:
|
||||
def test_cannot_instantiate_directly(self) -> None:
|
||||
with pytest.raises(TypeError):
|
||||
OcrEngine() # type: ignore[abstract]
|
||||
|
||||
def test_concrete_subclass_works(self) -> None:
|
||||
class StubEngine(OcrEngine):
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "stub"
|
||||
|
||||
def recognize(
|
||||
self, image_bytes: bytes, config: OcrConfig
|
||||
) -> OcrEngineResult:
|
||||
return OcrEngineResult(
|
||||
text="ok", confidence=1.0, word_boxes=[], engine_name="stub"
|
||||
)
|
||||
|
||||
engine = StubEngine()
|
||||
assert engine.name == "stub"
|
||||
result = engine.recognize(b"", OcrConfig())
|
||||
assert result.text == "ok"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PaddleOcrEngine
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPaddleOcrEngine:
|
||||
def test_name(self) -> None:
|
||||
from app.engines.paddle_engine import PaddleOcrEngine
|
||||
|
||||
engine = PaddleOcrEngine()
|
||||
assert engine.name == "paddleocr"
|
||||
|
||||
def test_lazy_init_not_loaded_at_construction(self) -> None:
|
||||
from app.engines.paddle_engine import PaddleOcrEngine
|
||||
|
||||
engine = PaddleOcrEngine()
|
||||
assert engine._ocr is None
|
||||
|
||||
def test_recognize_empty_results(self) -> None:
|
||||
from app.engines.paddle_engine import PaddleOcrEngine
|
||||
|
||||
engine = PaddleOcrEngine()
|
||||
mock_ocr = MagicMock()
|
||||
mock_ocr.ocr.return_value = [None]
|
||||
engine._ocr = mock_ocr
|
||||
|
||||
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
|
||||
assert result.text == ""
|
||||
assert result.confidence == 0.0
|
||||
assert result.word_boxes == []
|
||||
assert result.engine_name == "paddleocr"
|
||||
|
||||
def test_recognize_with_results(self) -> None:
|
||||
from app.engines.paddle_engine import PaddleOcrEngine
|
||||
|
||||
engine = PaddleOcrEngine()
|
||||
mock_ocr = MagicMock()
|
||||
mock_ocr.ocr.return_value = [
|
||||
[
|
||||
[[[10, 20], [110, 20], [110, 50], [10, 50]], ("HELLO", 0.95)],
|
||||
[[[10, 60], [110, 60], [110, 90], [10, 90]], ("WORLD", 0.88)],
|
||||
]
|
||||
]
|
||||
engine._ocr = mock_ocr
|
||||
|
||||
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
|
||||
assert result.text == "HELLO WORLD"
|
||||
assert abs(result.confidence - 0.915) < 0.01
|
||||
assert len(result.word_boxes) == 2
|
||||
assert result.word_boxes[0].text == "HELLO"
|
||||
assert result.word_boxes[0].confidence == 0.95
|
||||
assert result.word_boxes[1].text == "WORLD"
|
||||
assert result.engine_name == "paddleocr"
|
||||
|
||||
def test_recognize_whitelist_filters_characters(self) -> None:
|
||||
from app.engines.paddle_engine import PaddleOcrEngine
|
||||
|
||||
engine = PaddleOcrEngine()
|
||||
mock_ocr = MagicMock()
|
||||
mock_ocr.ocr.return_value = [
|
||||
[
|
||||
[[[0, 0], [100, 0], [100, 30], [0, 30]], ("1HG-BH4!", 0.9)],
|
||||
]
|
||||
]
|
||||
engine._ocr = mock_ocr
|
||||
|
||||
config = OcrConfig(char_whitelist="ABCDEFGHJKLMNPRSTUVWXYZ0123456789")
|
||||
result = engine.recognize(_create_test_image_bytes(), config)
|
||||
assert "-" not in result.text
|
||||
assert "!" not in result.text
|
||||
assert result.word_boxes[0].text == "1HGBH4"
|
||||
|
||||
def test_recognize_quadrilateral_to_bounding_box(self) -> None:
|
||||
from app.engines.paddle_engine import PaddleOcrEngine
|
||||
|
||||
engine = PaddleOcrEngine()
|
||||
mock_ocr = MagicMock()
|
||||
# Slightly rotated quad: min x=8, min y=20, max x=110, max y=55
|
||||
mock_ocr.ocr.return_value = [
|
||||
[
|
||||
[[[10, 20], [110, 25], [108, 55], [8, 50]], ("TEXT", 0.9)],
|
||||
]
|
||||
]
|
||||
engine._ocr = mock_ocr
|
||||
|
||||
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
|
||||
wb = result.word_boxes[0]
|
||||
assert wb.x == 8
|
||||
assert wb.y == 20
|
||||
assert wb.width == 102 # 110 - 8
|
||||
assert wb.height == 35 # 55 - 20
|
||||
|
||||
def test_recognize_skips_empty_after_whitelist(self) -> None:
|
||||
"""Text consisting only of non-whitelisted characters is skipped."""
|
||||
from app.engines.paddle_engine import PaddleOcrEngine
|
||||
|
||||
engine = PaddleOcrEngine()
|
||||
mock_ocr = MagicMock()
|
||||
mock_ocr.ocr.return_value = [
|
||||
[
|
||||
[[[0, 0], [50, 0], [50, 20], [0, 20]], ("---", 0.9)],
|
||||
]
|
||||
]
|
||||
engine._ocr = mock_ocr
|
||||
|
||||
config = OcrConfig(char_whitelist="ABC")
|
||||
result = engine.recognize(_create_test_image_bytes(), config)
|
||||
assert result.text == ""
|
||||
assert result.word_boxes == []
|
||||
assert result.confidence == 0.0
|
||||
|
||||
def test_import_error_raises_unavailable(self) -> None:
|
||||
from app.engines.paddle_engine import PaddleOcrEngine
|
||||
|
||||
engine = PaddleOcrEngine()
|
||||
engine._ocr = None
|
||||
with patch.dict("sys.modules", {"paddleocr": None}):
|
||||
with patch(
|
||||
"app.engines.paddle_engine.importlib.import_module",
|
||||
side_effect=ImportError("No module"),
|
||||
):
|
||||
# Force re-import by removing cached paddleocr
|
||||
original_import = __builtins__.__import__ if hasattr(__builtins__, '__import__') else __import__
|
||||
def mock_import(name, *args, **kwargs):
|
||||
if name == "paddleocr":
|
||||
raise ImportError("No module named 'paddleocr'")
|
||||
return original_import(name, *args, **kwargs)
|
||||
|
||||
with patch("builtins.__import__", side_effect=mock_import):
|
||||
with pytest.raises(EngineUnavailableError, match="paddleocr"):
|
||||
engine._get_ocr()
|
||||
|
||||
def test_processing_error_on_exception(self) -> None:
|
||||
from app.engines.paddle_engine import PaddleOcrEngine
|
||||
|
||||
engine = PaddleOcrEngine()
|
||||
mock_ocr = MagicMock()
|
||||
mock_ocr.ocr.side_effect = RuntimeError("OCR crashed")
|
||||
engine._ocr = mock_ocr
|
||||
|
||||
with pytest.raises(EngineProcessingError, match="PaddleOCR recognition failed"):
|
||||
engine.recognize(_create_test_image_bytes(), OcrConfig())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TesseractEngine
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTesseractEngine:
|
||||
"""Tests for TesseractEngine using mocked pytesseract."""
|
||||
|
||||
@pytest.fixture()
|
||||
def engine(self) -> "TesseractEngine": # type: ignore[name-defined]
|
||||
"""Create a TesseractEngine with mocked pytesseract dependency."""
|
||||
mock_pytesseract = MagicMock()
|
||||
mock_pytesseract.Output.DICT = "dict"
|
||||
|
||||
with patch.dict("sys.modules", {"pytesseract": mock_pytesseract}):
|
||||
with patch("app.engines.tesseract_engine.settings") as mock_settings:
|
||||
mock_settings.tesseract_cmd = "/usr/bin/tesseract"
|
||||
from app.engines.tesseract_engine import TesseractEngine
|
||||
|
||||
eng = TesseractEngine()
|
||||
eng._mock_pytesseract = mock_pytesseract # type: ignore[attr-defined]
|
||||
return eng
|
||||
|
||||
def test_name(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
|
||||
assert engine.name == "tesseract"
|
||||
|
||||
def test_build_config_default_psm(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
|
||||
config_str = engine._build_config(OcrConfig())
|
||||
assert "--psm 6" in config_str
|
||||
|
||||
def test_build_config_single_line(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
|
||||
config_str = engine._build_config(OcrConfig(single_line=True))
|
||||
assert "--psm 7" in config_str
|
||||
|
||||
def test_build_config_single_word(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
|
||||
config_str = engine._build_config(OcrConfig(single_word=True))
|
||||
assert "--psm 8" in config_str
|
||||
|
||||
def test_build_config_whitelist(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
|
||||
config_str = engine._build_config(OcrConfig(char_whitelist="ABC123"))
|
||||
assert "-c tessedit_char_whitelist=ABC123" in config_str
|
||||
|
||||
def test_build_config_psm_hint(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
|
||||
config_str = engine._build_config(OcrConfig(hints={"psm": 11}))
|
||||
assert "--psm 11" in config_str
|
||||
|
||||
def test_recognize_normalizes_confidence(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
|
||||
"""Tesseract returns 0-100 confidence; engine normalizes to 0.0-1.0."""
|
||||
engine._pytesseract.image_to_data.return_value = {
|
||||
"text": ["HELLO", ""],
|
||||
"conf": [92, -1],
|
||||
"left": [10],
|
||||
"top": [20],
|
||||
"width": [100],
|
||||
"height": [30],
|
||||
}
|
||||
|
||||
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
|
||||
assert result.text == "HELLO"
|
||||
assert abs(result.confidence - 0.92) < 0.01
|
||||
assert result.engine_name == "tesseract"
|
||||
|
||||
def test_import_error_raises_unavailable(self) -> None:
|
||||
with patch.dict("sys.modules", {"pytesseract": None}):
|
||||
with patch("app.engines.tesseract_engine.settings") as mock_settings:
|
||||
mock_settings.tesseract_cmd = "/usr/bin/tesseract"
|
||||
|
||||
def mock_import(name, *args, **kwargs):
|
||||
if name == "pytesseract":
|
||||
raise ImportError("No module named 'pytesseract'")
|
||||
return __import__(name, *args, **kwargs)
|
||||
|
||||
with patch("builtins.__import__", side_effect=mock_import):
|
||||
from app.engines.tesseract_engine import TesseractEngine
|
||||
|
||||
with pytest.raises(EngineUnavailableError, match="pytesseract"):
|
||||
TesseractEngine()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CloudEngine
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCloudEngine:
|
||||
def test_name(self) -> None:
|
||||
from app.engines.cloud_engine import CloudEngine
|
||||
|
||||
engine = CloudEngine(key_path="/fake/path.json")
|
||||
assert engine.name == "google_vision"
|
||||
|
||||
def test_lazy_init_not_loaded_at_construction(self) -> None:
|
||||
from app.engines.cloud_engine import CloudEngine
|
||||
|
||||
engine = CloudEngine(key_path="/fake/path.json")
|
||||
assert engine._client is None
|
||||
|
||||
def test_missing_key_file_raises_unavailable(self) -> None:
|
||||
from app.engines.cloud_engine import CloudEngine
|
||||
|
||||
engine = CloudEngine(key_path="/nonexistent/key.json")
|
||||
with pytest.raises(EngineUnavailableError, match="key not found"):
|
||||
engine._get_client()
|
||||
|
||||
@patch("os.path.isfile", return_value=True)
|
||||
def test_missing_library_raises_unavailable(self, _mock_isfile: MagicMock) -> None:
|
||||
from app.engines.cloud_engine import CloudEngine
|
||||
|
||||
engine = CloudEngine(key_path="/fake/key.json")
|
||||
|
||||
def mock_import(name, *args, **kwargs):
|
||||
if "google.cloud" in name:
|
||||
raise ImportError("No module named 'google.cloud'")
|
||||
return __import__(name, *args, **kwargs)
|
||||
|
||||
with patch("builtins.__import__", side_effect=mock_import):
|
||||
with pytest.raises(EngineUnavailableError, match="google-cloud-vision"):
|
||||
engine._get_client()
|
||||
|
||||
def test_recognize_empty_annotations(self) -> None:
|
||||
from app.engines.cloud_engine import CloudEngine
|
||||
|
||||
engine = CloudEngine(key_path="/fake/key.json")
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.error.message = ""
|
||||
mock_response.text_annotations = []
|
||||
mock_client.text_detection.return_value = mock_response
|
||||
engine._client = mock_client
|
||||
|
||||
# Mock the google.cloud.vision import inside recognize()
|
||||
mock_vision = MagicMock()
|
||||
with patch.dict("sys.modules", {"google.cloud.vision": mock_vision, "google.cloud": MagicMock(), "google": MagicMock()}):
|
||||
result = engine.recognize(b"fake_image", OcrConfig())
|
||||
assert result.text == ""
|
||||
assert result.confidence == 0.0
|
||||
assert result.engine_name == "google_vision"
|
||||
|
||||
def test_recognize_api_error_raises_processing_error(self) -> None:
|
||||
from app.engines.cloud_engine import CloudEngine
|
||||
|
||||
engine = CloudEngine(key_path="/fake/key.json")
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.error.message = "API quota exceeded"
|
||||
mock_client.text_detection.return_value = mock_response
|
||||
engine._client = mock_client
|
||||
|
||||
mock_vision = MagicMock()
|
||||
with patch.dict("sys.modules", {"google.cloud.vision": mock_vision, "google.cloud": MagicMock(), "google": MagicMock()}):
|
||||
with pytest.raises(EngineProcessingError, match="API quota exceeded"):
|
||||
engine.recognize(b"fake_image", OcrConfig())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HybridEngine
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHybridEngine:
|
||||
def test_name_with_fallback(self) -> None:
|
||||
from app.engines.hybrid_engine import HybridEngine
|
||||
|
||||
primary = MagicMock(spec=OcrEngine)
|
||||
primary.name = "paddleocr"
|
||||
fallback = MagicMock(spec=OcrEngine)
|
||||
fallback.name = "google_vision"
|
||||
engine = HybridEngine(primary=primary, fallback=fallback)
|
||||
assert engine.name == "hybrid(paddleocr+google_vision)"
|
||||
|
||||
def test_name_without_fallback(self) -> None:
|
||||
from app.engines.hybrid_engine import HybridEngine
|
||||
|
||||
primary = MagicMock(spec=OcrEngine)
|
||||
primary.name = "paddleocr"
|
||||
engine = HybridEngine(primary=primary)
|
||||
assert engine.name == "hybrid(paddleocr+none)"
|
||||
|
||||
def test_high_confidence_skips_fallback(self) -> None:
|
||||
from app.engines.hybrid_engine import HybridEngine
|
||||
|
||||
primary = MagicMock(spec=OcrEngine)
|
||||
fallback = MagicMock(spec=OcrEngine)
|
||||
primary.name = "paddleocr"
|
||||
fallback.name = "cloud"
|
||||
primary.recognize.return_value = _make_result("VIN123", 0.95, "paddleocr")
|
||||
|
||||
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
|
||||
result = engine.recognize(b"img", OcrConfig())
|
||||
assert result.text == "VIN123"
|
||||
assert result.engine_name == "paddleocr"
|
||||
fallback.recognize.assert_not_called()
|
||||
|
||||
def test_low_confidence_triggers_fallback(self) -> None:
|
||||
from app.engines.hybrid_engine import HybridEngine
|
||||
|
||||
primary = MagicMock(spec=OcrEngine)
|
||||
fallback = MagicMock(spec=OcrEngine)
|
||||
primary.name = "paddleocr"
|
||||
fallback.name = "google_vision"
|
||||
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
|
||||
fallback.recognize.return_value = _make_result("VIN456", 0.92, "google_vision")
|
||||
|
||||
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
|
||||
result = engine.recognize(b"img", OcrConfig())
|
||||
assert result.text == "VIN456"
|
||||
assert result.engine_name == "google_vision"
|
||||
fallback.recognize.assert_called_once()
|
||||
|
||||
def test_low_confidence_no_fallback_returns_primary(self) -> None:
|
||||
from app.engines.hybrid_engine import HybridEngine
|
||||
|
||||
primary = MagicMock(spec=OcrEngine)
|
||||
primary.name = "paddleocr"
|
||||
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
|
||||
|
||||
engine = HybridEngine(primary=primary, fallback=None, threshold=0.6)
|
||||
result = engine.recognize(b"img", OcrConfig())
|
||||
assert result.text == "VIN123"
|
||||
|
||||
def test_fallback_lower_confidence_returns_primary(self) -> None:
|
||||
from app.engines.hybrid_engine import HybridEngine
|
||||
|
||||
primary = MagicMock(spec=OcrEngine)
|
||||
fallback = MagicMock(spec=OcrEngine)
|
||||
primary.name = "paddleocr"
|
||||
fallback.name = "google_vision"
|
||||
primary.recognize.return_value = _make_result("VIN123", 0.4, "paddleocr")
|
||||
fallback.recognize.return_value = _make_result("VIN456", 0.3, "google_vision")
|
||||
|
||||
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
|
||||
result = engine.recognize(b"img", OcrConfig())
|
||||
assert result.text == "VIN123"
|
||||
|
||||
def test_fallback_engine_error_returns_primary(self) -> None:
|
||||
from app.engines.hybrid_engine import HybridEngine
|
||||
|
||||
primary = MagicMock(spec=OcrEngine)
|
||||
fallback = MagicMock(spec=OcrEngine)
|
||||
primary.name = "paddleocr"
|
||||
fallback.name = "google_vision"
|
||||
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
|
||||
fallback.recognize.side_effect = EngineUnavailableError("key missing")
|
||||
|
||||
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
|
||||
result = engine.recognize(b"img", OcrConfig())
|
||||
assert result.text == "VIN123"
|
||||
|
||||
def test_fallback_unexpected_error_returns_primary(self) -> None:
|
||||
from app.engines.hybrid_engine import HybridEngine
|
||||
|
||||
primary = MagicMock(spec=OcrEngine)
|
||||
fallback = MagicMock(spec=OcrEngine)
|
||||
primary.name = "paddleocr"
|
||||
fallback.name = "google_vision"
|
||||
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
|
||||
fallback.recognize.side_effect = RuntimeError("network error")
|
||||
|
||||
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
|
||||
result = engine.recognize(b"img", OcrConfig())
|
||||
assert result.text == "VIN123"
|
||||
|
||||
@patch("app.engines.hybrid_engine.time")
|
||||
def test_fallback_timeout_returns_primary(self, mock_time: MagicMock) -> None:
|
||||
from app.engines.hybrid_engine import HybridEngine
|
||||
|
||||
primary = MagicMock(spec=OcrEngine)
|
||||
fallback = MagicMock(spec=OcrEngine)
|
||||
primary.name = "paddleocr"
|
||||
fallback.name = "google_vision"
|
||||
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
|
||||
fallback.recognize.return_value = _make_result("VIN456", 0.92, "google_vision")
|
||||
# Simulate 6-second delay (exceeds 5s limit)
|
||||
mock_time.monotonic.side_effect = [0.0, 6.0]
|
||||
|
||||
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
|
||||
result = engine.recognize(b"img", OcrConfig())
|
||||
assert result.text == "VIN123" # timeout -> use primary
|
||||
|
||||
def test_exact_threshold_skips_fallback(self) -> None:
|
||||
"""When confidence == threshold, no fallback needed (>= check)."""
|
||||
from app.engines.hybrid_engine import HybridEngine
|
||||
|
||||
primary = MagicMock(spec=OcrEngine)
|
||||
fallback = MagicMock(spec=OcrEngine)
|
||||
primary.name = "paddleocr"
|
||||
fallback.name = "cloud"
|
||||
primary.recognize.return_value = _make_result("VIN", 0.6, "paddleocr")
|
||||
|
||||
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
|
||||
result = engine.recognize(b"img", OcrConfig())
|
||||
assert result.engine_name == "paddleocr"
|
||||
fallback.recognize.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Engine factory
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEngineFactory:
|
||||
def test_unknown_engine_raises(self) -> None:
|
||||
from app.engines.engine_factory import _create_single_engine
|
||||
|
||||
with pytest.raises(EngineUnavailableError, match="Unknown engine"):
|
||||
_create_single_engine("nonexistent")
|
||||
|
||||
@patch("app.engines.engine_factory.settings")
|
||||
@patch("app.engines.engine_factory._create_single_engine")
|
||||
def test_defaults_to_settings_primary(
|
||||
self, mock_create: MagicMock, mock_settings: MagicMock
|
||||
) -> None:
|
||||
mock_settings.ocr_primary_engine = "paddleocr"
|
||||
mock_settings.ocr_fallback_engine = "none"
|
||||
mock_engine = MagicMock(spec=OcrEngine)
|
||||
mock_create.return_value = mock_engine
|
||||
|
||||
from app.engines.engine_factory import create_engine
|
||||
|
||||
result = create_engine()
|
||||
mock_create.assert_called_once_with("paddleocr")
|
||||
assert result == mock_engine
|
||||
|
||||
@patch("app.engines.engine_factory.settings")
|
||||
@patch("app.engines.engine_factory._create_single_engine")
|
||||
def test_explicit_name_overrides_settings(
|
||||
self, mock_create: MagicMock, mock_settings: MagicMock
|
||||
) -> None:
|
||||
mock_settings.ocr_fallback_engine = "none"
|
||||
mock_engine = MagicMock(spec=OcrEngine)
|
||||
mock_create.return_value = mock_engine
|
||||
|
||||
from app.engines.engine_factory import create_engine
|
||||
|
||||
create_engine("tesseract")
|
||||
mock_create.assert_called_once_with("tesseract")
|
||||
|
||||
@patch("app.engines.engine_factory.settings")
|
||||
@patch("app.engines.engine_factory._create_single_engine")
|
||||
def test_creates_hybrid_when_fallback_configured(
|
||||
self, mock_create: MagicMock, mock_settings: MagicMock
|
||||
) -> None:
|
||||
mock_settings.ocr_primary_engine = "paddleocr"
|
||||
mock_settings.ocr_fallback_engine = "google_vision"
|
||||
mock_settings.ocr_fallback_threshold = 0.7
|
||||
mock_primary = MagicMock(spec=OcrEngine)
|
||||
mock_fallback = MagicMock(spec=OcrEngine)
|
||||
mock_create.side_effect = [mock_primary, mock_fallback]
|
||||
|
||||
from app.engines.engine_factory import create_engine
|
||||
from app.engines.hybrid_engine import HybridEngine
|
||||
|
||||
result = create_engine()
|
||||
assert isinstance(result, HybridEngine)
|
||||
|
||||
@patch("app.engines.engine_factory.settings")
|
||||
@patch("app.engines.engine_factory._create_single_engine")
|
||||
def test_fallback_failure_returns_primary_only(
|
||||
self, mock_create: MagicMock, mock_settings: MagicMock
|
||||
) -> None:
|
||||
mock_settings.ocr_primary_engine = "paddleocr"
|
||||
mock_settings.ocr_fallback_engine = "google_vision"
|
||||
mock_settings.ocr_fallback_threshold = 0.6
|
||||
mock_primary = MagicMock(spec=OcrEngine)
|
||||
mock_create.side_effect = [mock_primary, EngineUnavailableError("no key")]
|
||||
|
||||
from app.engines.engine_factory import create_engine
|
||||
|
||||
result = create_engine()
|
||||
assert result == mock_primary
|
||||
Reference in New Issue
Block a user