feat: add VIN photo OCR pipeline (refs #67)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 31s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 31s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m19s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped

Implement VIN-specific OCR extraction with optimized preprocessing:

- Add POST /extract/vin endpoint for VIN extraction
- VIN preprocessor: CLAHE, deskew, denoise, adaptive threshold
- VIN validator: check digit validation, OCR error correction (I->1, O->0)
- VIN extractor: PSM modes 6/7/8, character whitelist, alternatives
- Response includes confidence, bounding box, and alternatives
- Unit tests for validator and preprocessor
- Integration tests for VIN extraction endpoint

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-01 19:31:36 -06:00
parent 004940b013
commit 54cbd49171
14 changed files with 1694 additions and 1 deletions

View File

@@ -0,0 +1,242 @@
"""Integration tests for VIN extraction endpoint."""
import io
from unittest.mock import patch, MagicMock
import pytest
from fastapi.testclient import TestClient
from PIL import Image, ImageDraw, ImageFont
from app.main import app
@pytest.fixture
def client() -> TestClient:
"""Create test client."""
return TestClient(app)
def create_vin_image(vin: str = "1HGBH41JXMN109186") -> bytes:
"""Create a test image with VIN text."""
# Create white image
image = Image.new("RGB", (400, 100), (255, 255, 255))
draw = ImageDraw.Draw(image)
# Draw VIN text (use default font)
draw.text((50, 40), vin, fill=(0, 0, 0))
buffer = io.BytesIO()
image.save(buffer, format="PNG")
return buffer.getvalue()
def create_empty_image() -> bytes:
"""Create an empty test image."""
image = Image.new("RGB", (400, 100), (255, 255, 255))
buffer = io.BytesIO()
image.save(buffer, format="PNG")
return buffer.getvalue()
class TestVinExtractionEndpoint:
"""Tests for POST /extract/vin endpoint."""
def test_endpoint_exists(self, client: TestClient) -> None:
"""Test VIN endpoint is registered."""
response = client.get("/")
assert response.status_code == 200
data = response.json()
assert any("vin" in endpoint.lower() for endpoint in data.get("endpoints", []))
def test_extract_vin_no_file(self, client: TestClient) -> None:
"""Test endpoint returns error when no file provided."""
response = client.post("/extract/vin")
assert response.status_code == 422 # Validation error
def test_extract_vin_empty_file(self, client: TestClient) -> None:
"""Test endpoint returns error for empty file."""
response = client.post(
"/extract/vin",
files={"file": ("empty.png", b"", "image/png")},
)
assert response.status_code == 400
assert "empty" in response.json()["detail"].lower()
def test_extract_vin_large_file(self, client: TestClient) -> None:
"""Test endpoint returns error for file too large."""
# Create file larger than 10MB
large_content = b"x" * (11 * 1024 * 1024)
response = client.post(
"/extract/vin",
files={"file": ("large.png", large_content, "image/png")},
)
assert response.status_code == 413
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_extract_vin_success(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test successful VIN extraction."""
from app.extractors.vin_extractor import VinExtractionResult
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.94,
bounding_box=None,
alternatives=[],
processing_time_ms=500,
)
image_bytes = create_vin_image()
response = client.post(
"/extract/vin",
files={"file": ("vin.png", image_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
assert data["success"] is True
assert data["vin"] == "1HGBH41JXMN109186"
assert data["confidence"] == 0.94
assert "processingTimeMs" in data
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_extract_vin_not_found(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test VIN not found returns success=false."""
from app.extractors.vin_extractor import VinExtractionResult
mock_extract.return_value = VinExtractionResult(
success=False,
vin=None,
confidence=0.0,
error="No VIN pattern found in image",
processing_time_ms=300,
)
image_bytes = create_empty_image()
response = client.post(
"/extract/vin",
files={"file": ("empty.png", image_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
assert data["success"] is False
assert data["vin"] is None
assert data["error"] == "No VIN pattern found in image"
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_extract_vin_with_alternatives(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test VIN extraction with alternatives."""
from app.extractors.vin_extractor import VinExtractionResult, VinAlternative
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.94,
bounding_box=None,
alternatives=[
VinAlternative(vin="1HGBH41JXMN109186", confidence=0.72),
],
processing_time_ms=600,
)
image_bytes = create_vin_image()
response = client.post(
"/extract/vin",
files={"file": ("vin.png", image_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
assert data["success"] is True
assert len(data["alternatives"]) == 1
assert data["alternatives"][0]["confidence"] == 0.72
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_extract_vin_with_bounding_box(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test VIN extraction includes bounding box."""
from app.extractors.vin_extractor import VinExtractionResult
from app.preprocessors.vin_preprocessor import BoundingBox
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.94,
bounding_box=BoundingBox(x=50, y=40, width=300, height=20),
alternatives=[],
processing_time_ms=500,
)
image_bytes = create_vin_image()
response = client.post(
"/extract/vin",
files={"file": ("vin.png", image_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
assert data["boundingBox"] is not None
assert data["boundingBox"]["x"] == 50
assert data["boundingBox"]["y"] == 40
assert data["boundingBox"]["width"] == 300
assert data["boundingBox"]["height"] == 20
class TestVinExtractionContentTypes:
"""Tests for different content types."""
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_accepts_jpeg(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test endpoint accepts JPEG images."""
from app.extractors.vin_extractor import VinExtractionResult
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.9,
processing_time_ms=400,
)
# Create JPEG image
image = Image.new("RGB", (400, 100), (255, 255, 255))
buffer = io.BytesIO()
image.save(buffer, format="JPEG")
response = client.post(
"/extract/vin",
files={"file": ("vin.jpg", buffer.getvalue(), "image/jpeg")},
)
assert response.status_code == 200
@patch("app.extractors.vin_extractor.vin_extractor.extract")
def test_accepts_png(
self, mock_extract: MagicMock, client: TestClient
) -> None:
"""Test endpoint accepts PNG images."""
from app.extractors.vin_extractor import VinExtractionResult
mock_extract.return_value = VinExtractionResult(
success=True,
vin="1HGBH41JXMN109186",
confidence=0.9,
processing_time_ms=400,
)
image_bytes = create_vin_image()
response = client.post(
"/extract/vin",
files={"file": ("vin.png", image_bytes, "image/png")},
)
assert response.status_code == 200