chore: update OCR tests and documentation (refs #121)
Some checks failed
Deploy to Staging / Build Images (pull_request) Failing after 7m4s
Deploy to Staging / Deploy to Staging (pull_request) Has been skipped
Deploy to Staging / Verify Staging (pull_request) Has been skipped
Deploy to Staging / Notify Staging Ready (pull_request) Has been skipped
Deploy to Staging / Notify Staging Failure (pull_request) Successful in 7s

Add engine abstraction tests and update docs to reflect PaddleOCR primary
architecture with optional Google Vision cloud fallback.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-07 11:42:51 -06:00
parent 1e96baca6f
commit 47c5676498
7 changed files with 870 additions and 68 deletions

View File

@@ -1,11 +1,12 @@
"""Integration tests for VIN extraction endpoint."""
"""Integration tests for VIN extraction endpoint and engine integration."""
import io
from unittest.mock import patch, MagicMock
import pytest
from fastapi.testclient import TestClient
from PIL import Image, ImageDraw, ImageFont
from PIL import Image, ImageDraw
from app.engines.base_engine import OcrConfig, OcrEngineResult, WordBox
from app.main import app
@@ -240,3 +241,106 @@ class TestVinExtractionContentTypes:
)
assert response.status_code == 200
# ---------------------------------------------------------------------------
# VIN extractor engine integration tests
# ---------------------------------------------------------------------------
class TestVinExtractorEngineIntegration:
"""Tests verifying VinExtractor integrates correctly with engine abstraction."""
@patch("app.extractors.vin_extractor.create_engine")
def test_perform_ocr_calls_engine_with_vin_config(
self, mock_create_engine: MagicMock
) -> None:
"""_perform_ocr passes VIN whitelist and angle_cls to engine."""
from app.extractors.vin_extractor import VinExtractor
mock_engine = MagicMock()
mock_engine.recognize.return_value = OcrEngineResult(
text="1HGBH41JXMN109186",
confidence=0.94,
word_boxes=[WordBox(text="1HGBH41JXMN109186", confidence=0.94)],
engine_name="paddleocr",
)
mock_create_engine.return_value = mock_engine
extractor = VinExtractor()
text, confidences = extractor._perform_ocr(b"fake_image")
mock_engine.recognize.assert_called_once()
call_config = mock_engine.recognize.call_args[0][1]
assert isinstance(call_config, OcrConfig)
assert call_config.char_whitelist == VinExtractor.VIN_WHITELIST
assert call_config.use_angle_cls is True
assert call_config.single_line is False
assert call_config.single_word is False
assert text == "1HGBH41JXMN109186"
assert confidences == [0.94]
@patch("app.extractors.vin_extractor.create_engine")
def test_perform_ocr_single_line_mode(
self, mock_create_engine: MagicMock
) -> None:
"""_perform_ocr passes single_line flag to engine config."""
from app.extractors.vin_extractor import VinExtractor
mock_engine = MagicMock()
mock_engine.recognize.return_value = OcrEngineResult(
text="VIN123", confidence=0.9, word_boxes=[], engine_name="paddleocr"
)
mock_create_engine.return_value = mock_engine
extractor = VinExtractor()
extractor._perform_ocr(b"img", single_line=True)
call_config = mock_engine.recognize.call_args[0][1]
assert call_config.single_line is True
assert call_config.single_word is False
@patch("app.extractors.vin_extractor.create_engine")
def test_perform_ocr_single_word_mode(
self, mock_create_engine: MagicMock
) -> None:
"""_perform_ocr passes single_word flag to engine config."""
from app.extractors.vin_extractor import VinExtractor
mock_engine = MagicMock()
mock_engine.recognize.return_value = OcrEngineResult(
text="VIN123", confidence=0.9, word_boxes=[], engine_name="paddleocr"
)
mock_create_engine.return_value = mock_engine
extractor = VinExtractor()
extractor._perform_ocr(b"img", single_word=True)
call_config = mock_engine.recognize.call_args[0][1]
assert call_config.single_word is True
assert call_config.single_line is False
def test_calculate_base_confidence_empty_returns_default(self) -> None:
"""Empty word confidences return 0.5 default."""
from app.extractors.vin_extractor import VinExtractor
extractor = VinExtractor.__new__(VinExtractor)
assert extractor._calculate_base_confidence([]) == 0.5
def test_calculate_base_confidence_weighted_blend(self) -> None:
"""Confidence = 70% average + 30% minimum."""
from app.extractors.vin_extractor import VinExtractor
extractor = VinExtractor.__new__(VinExtractor)
# avg = (0.9 + 0.8) / 2 = 0.85, min = 0.8
# result = 0.7 * 0.85 + 0.3 * 0.8 = 0.595 + 0.24 = 0.835
result = extractor._calculate_base_confidence([0.9, 0.8])
assert abs(result - 0.835) < 0.001
def test_calculate_base_confidence_single_value(self) -> None:
"""Single confidence value: avg == min, so result equals that value."""
from app.extractors.vin_extractor import VinExtractor
extractor = VinExtractor.__new__(VinExtractor)
result = extractor._calculate_base_confidence([0.92])
assert abs(result - 0.92) < 0.001