Files
motovaultpro/ocr/tests/test_engine_abstraction.py
Eric Gullickson e6dd7492a1
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 8m46s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 22s
Deploy to Staging / Verify Staging (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
test: add monthly limit, counter, and cloud-primary engine tests (refs #127)
- Update existing hybrid engine tests for new Redis counter behavior
- Add cloud-primary path tests (under/at limit, fallback, errors)
- Add Redis counter increment and TTL verification tests
- Add Redis failure graceful handling test
- Update cloud engine error message assertion for WIF config

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 20:56:51 -06:00

848 lines
31 KiB
Python

"""Tests for OCR engine abstraction layer.
Covers: base types, exception hierarchy, PaddleOcrEngine,
CloudEngine, HybridEngine, and engine_factory.
"""
import io
from unittest.mock import MagicMock, patch
import pytest
from PIL import Image
from app.engines.base_engine import (
EngineError,
EngineProcessingError,
EngineUnavailableError,
OcrConfig,
OcrEngine,
OcrEngineResult,
WordBox,
)
# --- Helpers ---
def _create_test_image_bytes() -> bytes:
"""Create minimal PNG image bytes for engine testing."""
img = Image.new("RGB", (100, 50), (255, 255, 255))
buf = io.BytesIO()
img.save(buf, format="PNG")
return buf.getvalue()
def _make_result(
text: str, confidence: float, engine_name: str
) -> OcrEngineResult:
"""Create a minimal OcrEngineResult for testing."""
return OcrEngineResult(
text=text, confidence=confidence, word_boxes=[], engine_name=engine_name
)
def _mock_paddle_result(
dt_polys: list, rec_texts: list[str], rec_scores: list[float]
) -> MagicMock:
"""Create a mock PaddleOCR v3.x predict() result object.
Wraps data under ``"res"`` key to match save_to_json format.
"""
result = MagicMock()
result.json = {
"res": {
"dt_polys": dt_polys,
"rec_texts": rec_texts,
"rec_scores": rec_scores,
}
}
return result
# ---------------------------------------------------------------------------
# Exception hierarchy
# ---------------------------------------------------------------------------
class TestExceptionHierarchy:
"""Engine errors form a proper hierarchy under EngineError."""
def test_unavailable_is_engine_error(self) -> None:
assert issubclass(EngineUnavailableError, EngineError)
def test_processing_is_engine_error(self) -> None:
assert issubclass(EngineProcessingError, EngineError)
def test_engine_error_is_exception(self) -> None:
assert issubclass(EngineError, Exception)
def test_catch_base_catches_subtypes(self) -> None:
with pytest.raises(EngineError):
raise EngineUnavailableError("not installed")
with pytest.raises(EngineError):
raise EngineProcessingError("OCR failed")
# ---------------------------------------------------------------------------
# Data types
# ---------------------------------------------------------------------------
class TestWordBox:
def test_default_positions(self) -> None:
wb = WordBox(text="VIN", confidence=0.95)
assert wb.x == 0
assert wb.y == 0
assert wb.width == 0
assert wb.height == 0
def test_all_fields(self) -> None:
wb = WordBox(text="ABC", confidence=0.88, x=10, y=20, width=100, height=30)
assert wb.text == "ABC"
assert wb.confidence == 0.88
assert wb.x == 10
assert wb.width == 100
class TestOcrConfig:
def test_defaults(self) -> None:
config = OcrConfig()
assert config.char_whitelist is None
assert config.single_line is False
assert config.single_word is False
assert config.use_angle_cls is True
assert config.hints == {}
def test_vin_whitelist_excludes_ioq(self) -> None:
whitelist = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
config = OcrConfig(char_whitelist=whitelist)
assert "I" not in config.char_whitelist
assert "O" not in config.char_whitelist
assert "Q" not in config.char_whitelist
def test_hints_are_independent_across_instances(self) -> None:
c1 = OcrConfig()
c2 = OcrConfig()
c1.hints["psm"] = 7
assert "psm" not in c2.hints
class TestOcrEngineResult:
def test_construction(self) -> None:
result = OcrEngineResult(
text="1HGBH41JXMN109186",
confidence=0.94,
word_boxes=[WordBox(text="1HGBH41JXMN109186", confidence=0.94)],
engine_name="paddleocr",
)
assert result.text == "1HGBH41JXMN109186"
assert result.confidence == 0.94
assert len(result.word_boxes) == 1
assert result.engine_name == "paddleocr"
def test_empty_result(self) -> None:
result = OcrEngineResult(
text="", confidence=0.0, word_boxes=[], engine_name="paddleocr"
)
assert result.text == ""
assert result.word_boxes == []
# ---------------------------------------------------------------------------
# OcrEngine ABC
# ---------------------------------------------------------------------------
class TestOcrEngineABC:
def test_cannot_instantiate_directly(self) -> None:
with pytest.raises(TypeError):
OcrEngine() # type: ignore[abstract]
def test_concrete_subclass_works(self) -> None:
class StubEngine(OcrEngine):
@property
def name(self) -> str:
return "stub"
def recognize(
self, image_bytes: bytes, config: OcrConfig
) -> OcrEngineResult:
return OcrEngineResult(
text="ok", confidence=1.0, word_boxes=[], engine_name="stub"
)
engine = StubEngine()
assert engine.name == "stub"
result = engine.recognize(b"", OcrConfig())
assert result.text == "ok"
# ---------------------------------------------------------------------------
# PaddleOcrEngine
# ---------------------------------------------------------------------------
class TestPaddleOcrEngine:
def test_name(self) -> None:
from app.engines.paddle_engine import PaddleOcrEngine
engine = PaddleOcrEngine()
assert engine.name == "paddleocr"
def test_lazy_init_not_loaded_at_construction(self) -> None:
from app.engines.paddle_engine import PaddleOcrEngine
engine = PaddleOcrEngine()
assert engine._ocr is None
def test_recognize_empty_results(self) -> None:
from app.engines.paddle_engine import PaddleOcrEngine
engine = PaddleOcrEngine()
mock_ocr = MagicMock()
mock_ocr.predict.return_value = iter([
_mock_paddle_result(dt_polys=[], rec_texts=[], rec_scores=[])
])
engine._ocr = mock_ocr
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
assert result.text == ""
assert result.confidence == 0.0
assert result.word_boxes == []
assert result.engine_name == "paddleocr"
def test_recognize_with_results(self) -> None:
from app.engines.paddle_engine import PaddleOcrEngine
engine = PaddleOcrEngine()
mock_ocr = MagicMock()
mock_ocr.predict.return_value = iter([
_mock_paddle_result(
dt_polys=[
[[10, 20], [110, 20], [110, 50], [10, 50]],
[[10, 60], [110, 60], [110, 90], [10, 90]],
],
rec_texts=["HELLO", "WORLD"],
rec_scores=[0.95, 0.88],
)
])
engine._ocr = mock_ocr
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
assert result.text == "HELLO WORLD"
assert abs(result.confidence - 0.915) < 0.01
assert len(result.word_boxes) == 2
assert result.word_boxes[0].text == "HELLO"
assert result.word_boxes[0].confidence == 0.95
assert result.word_boxes[1].text == "WORLD"
assert result.engine_name == "paddleocr"
def test_recognize_whitelist_filters_characters(self) -> None:
from app.engines.paddle_engine import PaddleOcrEngine
engine = PaddleOcrEngine()
mock_ocr = MagicMock()
mock_ocr.predict.return_value = iter([
_mock_paddle_result(
dt_polys=[[[0, 0], [100, 0], [100, 30], [0, 30]]],
rec_texts=["1HG-BH4!"],
rec_scores=[0.9],
)
])
engine._ocr = mock_ocr
config = OcrConfig(char_whitelist="ABCDEFGHJKLMNPRSTUVWXYZ0123456789")
result = engine.recognize(_create_test_image_bytes(), config)
assert "-" not in result.text
assert "!" not in result.text
assert result.word_boxes[0].text == "1HGBH4"
def test_recognize_quadrilateral_to_bounding_box(self) -> None:
from app.engines.paddle_engine import PaddleOcrEngine
engine = PaddleOcrEngine()
mock_ocr = MagicMock()
# Slightly rotated quad: min x=8, min y=20, max x=110, max y=55
mock_ocr.predict.return_value = iter([
_mock_paddle_result(
dt_polys=[[[10, 20], [110, 25], [108, 55], [8, 50]]],
rec_texts=["TEXT"],
rec_scores=[0.9],
)
])
engine._ocr = mock_ocr
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
wb = result.word_boxes[0]
assert wb.x == 8
assert wb.y == 20
assert wb.width == 102 # 110 - 8
assert wb.height == 35 # 55 - 20
def test_recognize_skips_empty_after_whitelist(self) -> None:
"""Text consisting only of non-whitelisted characters is skipped."""
from app.engines.paddle_engine import PaddleOcrEngine
engine = PaddleOcrEngine()
mock_ocr = MagicMock()
mock_ocr.predict.return_value = iter([
_mock_paddle_result(
dt_polys=[[[0, 0], [50, 0], [50, 20], [0, 20]]],
rec_texts=["---"],
rec_scores=[0.9],
)
])
engine._ocr = mock_ocr
config = OcrConfig(char_whitelist="ABC")
result = engine.recognize(_create_test_image_bytes(), config)
assert result.text == ""
assert result.word_boxes == []
assert result.confidence == 0.0
def test_import_error_raises_unavailable(self) -> None:
from app.engines.paddle_engine import PaddleOcrEngine
engine = PaddleOcrEngine()
engine._ocr = None
with patch.dict("sys.modules", {"paddleocr": None}):
with patch(
"app.engines.paddle_engine.importlib.import_module",
side_effect=ImportError("No module"),
):
# Force re-import by removing cached paddleocr
original_import = __builtins__.__import__ if hasattr(__builtins__, '__import__') else __import__
def mock_import(name, *args, **kwargs):
if name == "paddleocr":
raise ImportError("No module named 'paddleocr'")
return original_import(name, *args, **kwargs)
with patch("builtins.__import__", side_effect=mock_import):
with pytest.raises(EngineUnavailableError, match="paddleocr"):
engine._get_ocr()
def test_processing_error_on_exception(self) -> None:
from app.engines.paddle_engine import PaddleOcrEngine
engine = PaddleOcrEngine()
mock_ocr = MagicMock()
mock_ocr.predict.side_effect = RuntimeError("OCR crashed")
engine._ocr = mock_ocr
with pytest.raises(EngineProcessingError, match="PaddleOCR recognition failed"):
engine.recognize(_create_test_image_bytes(), OcrConfig())
# ---------------------------------------------------------------------------
# CloudEngine
# ---------------------------------------------------------------------------
class TestCloudEngine:
def test_name(self) -> None:
from app.engines.cloud_engine import CloudEngine
engine = CloudEngine(key_path="/fake/path.json")
assert engine.name == "google_vision"
def test_lazy_init_not_loaded_at_construction(self) -> None:
from app.engines.cloud_engine import CloudEngine
engine = CloudEngine(key_path="/fake/path.json")
assert engine._client is None
def test_missing_key_file_raises_unavailable(self) -> None:
from app.engines.cloud_engine import CloudEngine
engine = CloudEngine(key_path="/nonexistent/key.json")
with pytest.raises(EngineUnavailableError, match="credential config not found"):
engine._get_client()
@patch("os.path.isfile", return_value=True)
def test_missing_library_raises_unavailable(self, _mock_isfile: MagicMock) -> None:
from app.engines.cloud_engine import CloudEngine
engine = CloudEngine(key_path="/fake/key.json")
def mock_import(name, *args, **kwargs):
if "google.cloud" in name:
raise ImportError("No module named 'google.cloud'")
return __import__(name, *args, **kwargs)
with patch("builtins.__import__", side_effect=mock_import):
with pytest.raises(EngineUnavailableError, match="google-cloud-vision"):
engine._get_client()
def test_recognize_empty_annotations(self) -> None:
from app.engines.cloud_engine import CloudEngine
engine = CloudEngine(key_path="/fake/key.json")
mock_client = MagicMock()
mock_response = MagicMock()
mock_response.error.message = ""
mock_response.text_annotations = []
mock_client.text_detection.return_value = mock_response
engine._client = mock_client
# Mock the google.cloud.vision import inside recognize()
mock_vision = MagicMock()
with patch.dict("sys.modules", {"google.cloud.vision": mock_vision, "google.cloud": MagicMock(), "google": MagicMock()}):
result = engine.recognize(b"fake_image", OcrConfig())
assert result.text == ""
assert result.confidence == 0.0
assert result.engine_name == "google_vision"
def test_recognize_api_error_raises_processing_error(self) -> None:
from app.engines.cloud_engine import CloudEngine
engine = CloudEngine(key_path="/fake/key.json")
mock_client = MagicMock()
mock_response = MagicMock()
mock_response.error.message = "API quota exceeded"
mock_client.text_detection.return_value = mock_response
engine._client = mock_client
mock_vision = MagicMock()
with patch.dict("sys.modules", {"google.cloud.vision": mock_vision, "google.cloud": MagicMock(), "google": MagicMock()}):
with pytest.raises(EngineProcessingError, match="API quota exceeded"):
engine.recognize(b"fake_image", OcrConfig())
# ---------------------------------------------------------------------------
# HybridEngine
# ---------------------------------------------------------------------------
class TestHybridEngine:
"""Tests for HybridEngine with monthly Vision API cap."""
def _mock_redis(self, current_count: int = 0) -> MagicMock:
"""Create a mock Redis instance with a configurable counter value."""
mock_r = MagicMock()
mock_r.get.return_value = str(current_count) if current_count else None
mock_pipe = MagicMock()
mock_r.pipeline.return_value = mock_pipe
return mock_r
def test_name_with_fallback(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback = MagicMock(spec=OcrEngine)
fallback.name = "google_vision"
engine = HybridEngine(primary=primary, fallback=fallback)
assert engine.name == "hybrid(paddleocr+google_vision)"
def test_name_without_fallback(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
engine = HybridEngine(primary=primary)
assert engine.name == "hybrid(paddleocr+none)"
# --- Local-primary path (original confidence-based fallback) ---
def test_high_confidence_skips_fallback(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "tesseract"
primary.recognize.return_value = _make_result("VIN123", 0.95, "paddleocr")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123"
assert result.engine_name == "paddleocr"
fallback.recognize.assert_not_called()
def test_low_confidence_no_fallback_returns_primary(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
engine = HybridEngine(primary=primary, fallback=None, threshold=0.6)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123"
def test_exact_threshold_skips_fallback(self) -> None:
"""When confidence == threshold, no fallback needed (>= check)."""
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "tesseract"
primary.recognize.return_value = _make_result("VIN", 0.6, "paddleocr")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
result = engine.recognize(b"img", OcrConfig())
assert result.engine_name == "paddleocr"
fallback.recognize.assert_not_called()
# --- Local-primary with cloud fallback (subject to monthly cap) ---
def test_low_confidence_triggers_cloud_fallback(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "google_vision"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.return_value = _make_result("VIN456", 0.92, "google_vision")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
engine._redis = self._mock_redis(current_count=0)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN456"
assert result.engine_name == "google_vision"
def test_cloud_fallback_skipped_when_limit_reached(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "google_vision"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=100
)
engine._redis = self._mock_redis(current_count=100)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123"
assert result.engine_name == "paddleocr"
fallback.recognize.assert_not_called()
def test_fallback_lower_confidence_returns_primary(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "google_vision"
primary.recognize.return_value = _make_result("VIN123", 0.4, "paddleocr")
fallback.recognize.return_value = _make_result("VIN456", 0.3, "google_vision")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
engine._redis = self._mock_redis(current_count=0)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123"
def test_cloud_fallback_error_returns_primary(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "google_vision"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.side_effect = EngineUnavailableError("key missing")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
engine._redis = self._mock_redis(current_count=0)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123"
@patch("app.engines.hybrid_engine.time")
def test_cloud_fallback_timeout_returns_primary(
self, mock_time: MagicMock
) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "google_vision"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.return_value = _make_result(
"VIN456", 0.92, "google_vision"
)
mock_time.monotonic.side_effect = [0.0, 6.0]
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
engine._redis = self._mock_redis(current_count=0)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123"
# --- Cloud-primary path ---
def test_cloud_primary_returns_result_when_under_limit(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
fallback.name = "paddleocr"
primary.recognize.return_value = _make_result(
"VIN789", 0.95, "google_vision"
)
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=1000
)
engine._redis = self._mock_redis(current_count=500)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN789"
assert result.engine_name == "google_vision"
fallback.recognize.assert_not_called()
def test_cloud_primary_falls_back_when_limit_reached(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
fallback.name = "paddleocr"
fallback.recognize.return_value = _make_result(
"VIN_LOCAL", 0.75, "paddleocr"
)
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=1000
)
engine._redis = self._mock_redis(current_count=1000)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN_LOCAL"
assert result.engine_name == "paddleocr"
primary.recognize.assert_not_called()
def test_cloud_primary_no_fallback_raises_when_limit_reached(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
engine = HybridEngine(
primary=primary, fallback=None, threshold=0.6, monthly_limit=1000
)
engine._redis = self._mock_redis(current_count=1000)
with pytest.raises(EngineProcessingError, match="no fallback"):
engine.recognize(b"img", OcrConfig())
def test_cloud_primary_error_falls_back(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
fallback.name = "paddleocr"
primary.recognize.side_effect = EngineUnavailableError("API down")
fallback.recognize.return_value = _make_result(
"VIN_LOCAL", 0.75, "paddleocr"
)
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=1000
)
engine._redis = self._mock_redis(current_count=500)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN_LOCAL"
assert result.engine_name == "paddleocr"
# --- Redis counter behavior ---
def test_counter_increments_on_successful_cloud_call(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
fallback.name = "paddleocr"
primary.recognize.return_value = _make_result(
"VIN789", 0.95, "google_vision"
)
mock_r = self._mock_redis(current_count=10)
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=1000
)
engine._redis = mock_r
engine.recognize(b"img", OcrConfig())
mock_r.pipeline.assert_called_once()
pipe = mock_r.pipeline.return_value
pipe.incr.assert_called_once()
pipe.expire.assert_called_once()
pipe.execute.assert_called_once()
def test_counter_not_incremented_when_limit_reached(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
fallback.name = "paddleocr"
fallback.recognize.return_value = _make_result(
"VIN_LOCAL", 0.75, "paddleocr"
)
mock_r = self._mock_redis(current_count=1000)
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=1000
)
engine._redis = mock_r
engine.recognize(b"img", OcrConfig())
mock_r.pipeline.assert_not_called()
def test_redis_failure_assumes_limit_not_reached(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
fallback.name = "paddleocr"
primary.recognize.return_value = _make_result(
"VIN789", 0.95, "google_vision"
)
mock_r = MagicMock()
mock_r.get.side_effect = Exception("Redis connection refused")
mock_pipe = MagicMock()
mock_r.pipeline.return_value = mock_pipe
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=1000
)
engine._redis = mock_r
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN789"
# --- Non-cloud fallback path (no cap needed) ---
def test_non_cloud_fallback_not_subject_to_cap(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "tesseract"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.return_value = _make_result(
"VIN456", 0.92, "tesseract"
)
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN456"
assert result.engine_name == "tesseract"
@patch("app.engines.hybrid_engine.time")
def test_non_cloud_fallback_timeout_returns_primary(
self, mock_time: MagicMock
) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "tesseract"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.return_value = _make_result(
"VIN456", 0.92, "tesseract"
)
mock_time.monotonic.side_effect = [0.0, 6.0]
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123"
def test_non_cloud_fallback_error_returns_primary(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "tesseract"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.side_effect = RuntimeError("crash")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123"
# ---------------------------------------------------------------------------
# Engine factory
# ---------------------------------------------------------------------------
class TestEngineFactory:
def test_unknown_engine_raises(self) -> None:
from app.engines.engine_factory import _create_single_engine
with pytest.raises(EngineUnavailableError, match="Unknown engine"):
_create_single_engine("nonexistent")
@patch("app.engines.engine_factory.settings")
@patch("app.engines.engine_factory._create_single_engine")
def test_defaults_to_settings_primary(
self, mock_create: MagicMock, mock_settings: MagicMock
) -> None:
mock_settings.ocr_primary_engine = "paddleocr"
mock_settings.ocr_fallback_engine = "none"
mock_engine = MagicMock(spec=OcrEngine)
mock_create.return_value = mock_engine
from app.engines.engine_factory import create_engine
result = create_engine()
mock_create.assert_called_once_with("paddleocr")
assert result == mock_engine
@patch("app.engines.engine_factory.settings")
@patch("app.engines.engine_factory._create_single_engine")
def test_explicit_name_overrides_settings(
self, mock_create: MagicMock, mock_settings: MagicMock
) -> None:
mock_settings.ocr_fallback_engine = "none"
mock_engine = MagicMock(spec=OcrEngine)
mock_create.return_value = mock_engine
from app.engines.engine_factory import create_engine
create_engine("google_vision")
mock_create.assert_called_once_with("google_vision")
@patch("app.engines.engine_factory.settings")
@patch("app.engines.engine_factory._create_single_engine")
def test_creates_hybrid_when_fallback_configured(
self, mock_create: MagicMock, mock_settings: MagicMock
) -> None:
mock_settings.ocr_primary_engine = "paddleocr"
mock_settings.ocr_fallback_engine = "google_vision"
mock_settings.ocr_fallback_threshold = 0.7
mock_settings.vision_monthly_limit = 1000
mock_primary = MagicMock(spec=OcrEngine)
mock_fallback = MagicMock(spec=OcrEngine)
mock_create.side_effect = [mock_primary, mock_fallback]
from app.engines.engine_factory import create_engine
from app.engines.hybrid_engine import HybridEngine
result = create_engine()
assert isinstance(result, HybridEngine)
@patch("app.engines.engine_factory.settings")
@patch("app.engines.engine_factory._create_single_engine")
def test_fallback_failure_returns_primary_only(
self, mock_create: MagicMock, mock_settings: MagicMock
) -> None:
mock_settings.ocr_primary_engine = "paddleocr"
mock_settings.ocr_fallback_engine = "google_vision"
mock_settings.ocr_fallback_threshold = 0.6
mock_primary = MagicMock(spec=OcrEngine)
mock_create.side_effect = [mock_primary, EngineUnavailableError("no key")]
from app.engines.engine_factory import create_engine
result = create_engine()
assert result == mock_primary