Files
motovaultpro/ocr/tests/test_vin_extraction.py
Eric Gullickson 47c5676498
Some checks failed
Deploy to Staging / Build Images (pull_request) Failing after 7m4s
Deploy to Staging / Deploy to Staging (pull_request) Has been skipped
Deploy to Staging / Verify Staging (pull_request) Has been skipped
Deploy to Staging / Notify Staging Ready (pull_request) Has been skipped
Deploy to Staging / Notify Staging Failure (pull_request) Successful in 7s
chore: update OCR tests and documentation (refs #121)
Add engine abstraction tests and update docs to reflect PaddleOCR primary
architecture with optional Google Vision cloud fallback.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-07 11:42:51 -06:00

347 lines
12 KiB
Python

"""Integration tests for VIN extraction endpoint and engine integration."""
import io
from unittest.mock import patch, MagicMock
import pytest
from fastapi.testclient import TestClient
from PIL import Image, ImageDraw
from app.engines.base_engine import OcrConfig, OcrEngineResult, WordBox
from app.main import app
@pytest.fixture
def client() -> TestClient:
"""Create test client."""
return TestClient(app)
def create_vin_image(vin: str = "1HGBH41JXMN109186") -> bytes:
"""Create a test image with VIN text."""
# Create white image
image = Image.new("RGB", (400, 100), (255, 255, 255))
draw = ImageDraw.Draw(image)
# Draw VIN text (use default font)
draw.text((50, 40), vin, fill=(0, 0, 0))
buffer = io.BytesIO()
image.save(buffer, format="PNG")
return buffer.getvalue()
def create_empty_image() -> bytes:
"""Create an empty test image."""
image = Image.new("RGB", (400, 100), (255, 255, 255))
buffer = io.BytesIO()
image.save(buffer, format="PNG")
return buffer.getvalue()
class TestVinExtractionEndpoint:
"""Tests for POST /extract/vin endpoint."""
def test_endpoint_exists(self, client: TestClient) -> None:
"""Test VIN endpoint is registered."""
response = client.get("/")
assert response.status_code == 200
data = response.json()
assert any("vin" in endpoint.lower() for endpoint in data.get("endpoints", []))
def test_extract_vin_no_file(self, client: TestClient) -> None:
"""Test endpoint returns error when no file provided."""
response = client.post("/extract/vin")
assert response.status_code == 422 # Validation error
def test_extract_vin_empty_file(self, client: TestClient) -> None:
"""Test endpoint returns error for empty file."""
response = client.post(
"/extract/vin",
files={"file": ("empty.png", b"", "image/png")},
)
assert response.status_code == 400
assert "empty" in response.json()["detail"].lower()
def test_extract_vin_large_file(self, client: TestClient) -> None:
"""Test endpoint returns error for file too large."""
# Create file larger than 10MB
large_content = b"x" * (11 * 1024 * 1024)
response = client.post(
"/extract/vin",
files={"file": ("large.png", large_content, "image/png")},
)
assert response.status_code == 413
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_extract_vin_success(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test successful VIN extraction."""
from app.extractors.vin_extractor import VinExtractionResult
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.94,
bounding_box=None,
alternatives=[],
processing_time_ms=500,
)
image_bytes = create_vin_image()
response = client.post(
"/extract/vin",
files={"file": ("vin.png", image_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
assert data["success"] is True
assert data["vin"] == "1HGBH41JXMN109186"
assert data["confidence"] == 0.94
assert "processingTimeMs" in data
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_extract_vin_not_found(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test VIN not found returns success=false."""
from app.extractors.vin_extractor import VinExtractionResult
mock_extract.return_value = VinExtractionResult(
success=False,
vin=None,
confidence=0.0,
error="No VIN pattern found in image",
processing_time_ms=300,
)
image_bytes = create_empty_image()
response = client.post(
"/extract/vin",
files={"file": ("empty.png", image_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
assert data["success"] is False
assert data["vin"] is None
assert data["error"] == "No VIN pattern found in image"
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_extract_vin_with_alternatives(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test VIN extraction with alternatives."""
from app.extractors.vin_extractor import VinExtractionResult, VinAlternative
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.94,
bounding_box=None,
alternatives=[
VinAlternative(vin="1HGBH41JXMN109186", confidence=0.72),
],
processing_time_ms=600,
)
image_bytes = create_vin_image()
response = client.post(
"/extract/vin",
files={"file": ("vin.png", image_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
assert data["success"] is True
assert len(data["alternatives"]) == 1
assert data["alternatives"][0]["confidence"] == 0.72
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_extract_vin_with_bounding_box(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test VIN extraction includes bounding box."""
from app.extractors.vin_extractor import VinExtractionResult
from app.preprocessors.vin_preprocessor import BoundingBox
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.94,
bounding_box=BoundingBox(x=50, y=40, width=300, height=20),
alternatives=[],
processing_time_ms=500,
)
image_bytes = create_vin_image()
response = client.post(
"/extract/vin",
files={"file": ("vin.png", image_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
assert data["boundingBox"] is not None
assert data["boundingBox"]["x"] == 50
assert data["boundingBox"]["y"] == 40
assert data["boundingBox"]["width"] == 300
assert data["boundingBox"]["height"] == 20
class TestVinExtractionContentTypes:
"""Tests for different content types."""
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_accepts_jpeg(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test endpoint accepts JPEG images."""
from app.extractors.vin_extractor import VinExtractionResult
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.9,
processing_time_ms=400,
)
# Create JPEG image
image = Image.new("RGB", (400, 100), (255, 255, 255))
buffer = io.BytesIO()
image.save(buffer, format="JPEG")
response = client.post(
"/extract/vin",
files={"file": ("vin.jpg", buffer.getvalue(), "image/jpeg")},
)
assert response.status_code == 200
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_accepts_png(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test endpoint accepts PNG images."""
from app.extractors.vin_extractor import VinExtractionResult
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.9,
processing_time_ms=400,
)
image_bytes = create_vin_image()
response = client.post(
"/extract/vin",
files={"file": ("vin.png", image_bytes, "image/png")},
)
assert response.status_code == 200
# ---------------------------------------------------------------------------
# VIN extractor engine integration tests
# ---------------------------------------------------------------------------
class TestVinExtractorEngineIntegration:
"""Tests verifying VinExtractor integrates correctly with engine abstraction."""
@patch("app.extractors.vin_extractor.create_engine")
def test_perform_ocr_calls_engine_with_vin_config(
self, mock_create_engine: MagicMock
) -> None:
"""_perform_ocr passes VIN whitelist and angle_cls to engine."""
from app.extractors.vin_extractor import VinExtractor
mock_engine = MagicMock()
mock_engine.recognize.return_value = OcrEngineResult(
text="1HGBH41JXMN109186",
confidence=0.94,
word_boxes=[WordBox(text="1HGBH41JXMN109186", confidence=0.94)],
engine_name="paddleocr",
)
mock_create_engine.return_value = mock_engine
extractor = VinExtractor()
text, confidences = extractor._perform_ocr(b"fake_image")
mock_engine.recognize.assert_called_once()
call_config = mock_engine.recognize.call_args[0][1]
assert isinstance(call_config, OcrConfig)
assert call_config.char_whitelist == VinExtractor.VIN_WHITELIST
assert call_config.use_angle_cls is True
assert call_config.single_line is False
assert call_config.single_word is False
assert text == "1HGBH41JXMN109186"
assert confidences == [0.94]
@patch("app.extractors.vin_extractor.create_engine")
def test_perform_ocr_single_line_mode(
self, mock_create_engine: MagicMock
) -> None:
"""_perform_ocr passes single_line flag to engine config."""
from app.extractors.vin_extractor import VinExtractor
mock_engine = MagicMock()
mock_engine.recognize.return_value = OcrEngineResult(
text="VIN123", confidence=0.9, word_boxes=[], engine_name="paddleocr"
)
mock_create_engine.return_value = mock_engine
extractor = VinExtractor()
extractor._perform_ocr(b"img", single_line=True)
call_config = mock_engine.recognize.call_args[0][1]
assert call_config.single_line is True
assert call_config.single_word is False
@patch("app.extractors.vin_extractor.create_engine")
def test_perform_ocr_single_word_mode(
self, mock_create_engine: MagicMock
) -> None:
"""_perform_ocr passes single_word flag to engine config."""
from app.extractors.vin_extractor import VinExtractor
mock_engine = MagicMock()
mock_engine.recognize.return_value = OcrEngineResult(
text="VIN123", confidence=0.9, word_boxes=[], engine_name="paddleocr"
)
mock_create_engine.return_value = mock_engine
extractor = VinExtractor()
extractor._perform_ocr(b"img", single_word=True)
call_config = mock_engine.recognize.call_args[0][1]
assert call_config.single_word is True
assert call_config.single_line is False
def test_calculate_base_confidence_empty_returns_default(self) -> None:
"""Empty word confidences return 0.5 default."""
from app.extractors.vin_extractor import VinExtractor
extractor = VinExtractor.__new__(VinExtractor)
assert extractor._calculate_base_confidence([]) == 0.5
def test_calculate_base_confidence_weighted_blend(self) -> None:
"""Confidence = 70% average + 30% minimum."""
from app.extractors.vin_extractor import VinExtractor
extractor = VinExtractor.__new__(VinExtractor)
# avg = (0.9 + 0.8) / 2 = 0.85, min = 0.8
# result = 0.7 * 0.85 + 0.3 * 0.8 = 0.595 + 0.24 = 0.835
result = extractor._calculate_base_confidence([0.9, 0.8])
assert abs(result - 0.835) < 0.001
def test_calculate_base_confidence_single_value(self) -> None:
"""Single confidence value: avg == min, so result equals that value."""
from app.extractors.vin_extractor import VinExtractor
extractor = VinExtractor.__new__(VinExtractor)
result = extractor._calculate_base_confidence([0.92])
assert abs(result - 0.92) < 0.001