Files
motovaultpro/ocr/tests/test_vin_extraction.py
Eric Gullickson 54cbd49171
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 31s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 31s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m19s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
feat: add VIN photo OCR pipeline (refs #67)
Implement VIN-specific OCR extraction with optimized preprocessing:

- Add POST /extract/vin endpoint for VIN extraction
- VIN preprocessor: CLAHE, deskew, denoise, adaptive threshold
- VIN validator: check digit validation, OCR error correction (I->1, O->0)
- VIN extractor: PSM modes 6/7/8, character whitelist, alternatives
- Response includes confidence, bounding box, and alternatives
- Unit tests for validator and preprocessor
- Integration tests for VIN extraction endpoint

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 19:31:36 -06:00

243 lines
7.9 KiB
Python

"""Integration tests for VIN extraction endpoint."""
import io
from unittest.mock import patch, MagicMock
import pytest
from fastapi.testclient import TestClient
from PIL import Image, ImageDraw, ImageFont
from app.main import app
@pytest.fixture
def client() -> TestClient:
"""Create test client."""
return TestClient(app)
def create_vin_image(vin: str = "1HGBH41JXMN109186") -> bytes:
"""Create a test image with VIN text."""
# Create white image
image = Image.new("RGB", (400, 100), (255, 255, 255))
draw = ImageDraw.Draw(image)
# Draw VIN text (use default font)
draw.text((50, 40), vin, fill=(0, 0, 0))
buffer = io.BytesIO()
image.save(buffer, format="PNG")
return buffer.getvalue()
def create_empty_image() -> bytes:
"""Create an empty test image."""
image = Image.new("RGB", (400, 100), (255, 255, 255))
buffer = io.BytesIO()
image.save(buffer, format="PNG")
return buffer.getvalue()
class TestVinExtractionEndpoint:
"""Tests for POST /extract/vin endpoint."""
def test_endpoint_exists(self, client: TestClient) -> None:
"""Test VIN endpoint is registered."""
response = client.get("/")
assert response.status_code == 200
data = response.json()
assert any("vin" in endpoint.lower() for endpoint in data.get("endpoints", []))
def test_extract_vin_no_file(self, client: TestClient) -> None:
"""Test endpoint returns error when no file provided."""
response = client.post("/extract/vin")
assert response.status_code == 422 # Validation error
def test_extract_vin_empty_file(self, client: TestClient) -> None:
"""Test endpoint returns error for empty file."""
response = client.post(
"/extract/vin",
files={"file": ("empty.png", b"", "image/png")},
)
assert response.status_code == 400
assert "empty" in response.json()["detail"].lower()
def test_extract_vin_large_file(self, client: TestClient) -> None:
"""Test endpoint returns error for file too large."""
# Create file larger than 10MB
large_content = b"x" * (11 * 1024 * 1024)
response = client.post(
"/extract/vin",
files={"file": ("large.png", large_content, "image/png")},
)
assert response.status_code == 413
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_extract_vin_success(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test successful VIN extraction."""
from app.extractors.vin_extractor import VinExtractionResult
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.94,
bounding_box=None,
alternatives=[],
processing_time_ms=500,
)
image_bytes = create_vin_image()
response = client.post(
"/extract/vin",
files={"file": ("vin.png", image_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
assert data["success"] is True
assert data["vin"] == "1HGBH41JXMN109186"
assert data["confidence"] == 0.94
assert "processingTimeMs" in data
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_extract_vin_not_found(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test VIN not found returns success=false."""
from app.extractors.vin_extractor import VinExtractionResult
mock_extract.return_value = VinExtractionResult(
success=False,
vin=None,
confidence=0.0,
error="No VIN pattern found in image",
processing_time_ms=300,
)
image_bytes = create_empty_image()
response = client.post(
"/extract/vin",
files={"file": ("empty.png", image_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
assert data["success"] is False
assert data["vin"] is None
assert data["error"] == "No VIN pattern found in image"
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_extract_vin_with_alternatives(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test VIN extraction with alternatives."""
from app.extractors.vin_extractor import VinExtractionResult, VinAlternative
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.94,
bounding_box=None,
alternatives=[
VinAlternative(vin="1HGBH41JXMN109186", confidence=0.72),
],
processing_time_ms=600,
)
image_bytes = create_vin_image()
response = client.post(
"/extract/vin",
files={"file": ("vin.png", image_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
assert data["success"] is True
assert len(data["alternatives"]) == 1
assert data["alternatives"][0]["confidence"] == 0.72
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_extract_vin_with_bounding_box(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test VIN extraction includes bounding box."""
from app.extractors.vin_extractor import VinExtractionResult
from app.preprocessors.vin_preprocessor import BoundingBox
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.94,
bounding_box=BoundingBox(x=50, y=40, width=300, height=20),
alternatives=[],
processing_time_ms=500,
)
image_bytes = create_vin_image()
response = client.post(
"/extract/vin",
files={"file": ("vin.png", image_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
assert data["boundingBox"] is not None
assert data["boundingBox"]["x"] == 50
assert data["boundingBox"]["y"] == 40
assert data["boundingBox"]["width"] == 300
assert data["boundingBox"]["height"] == 20
class TestVinExtractionContentTypes:
"""Tests for different content types."""
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_accepts_jpeg(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test endpoint accepts JPEG images."""
from app.extractors.vin_extractor import VinExtractionResult
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.9,
processing_time_ms=400,
)
# Create JPEG image
image = Image.new("RGB", (400, 100), (255, 255, 255))
buffer = io.BytesIO()
image.save(buffer, format="JPEG")
response = client.post(
"/extract/vin",
files={"file": ("vin.jpg", buffer.getvalue(), "image/jpeg")},
)
assert response.status_code == 200
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_accepts_png(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test endpoint accepts PNG images."""
from app.extractors.vin_extractor import VinExtractionResult
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.9,
processing_time_ms=400,
)
image_bytes = create_vin_image()
response = client.post(
"/extract/vin",
files={"file": ("vin.png", image_bytes, "image/png")},
)
assert response.status_code == 200