feat: add VIN photo OCR pipeline (refs #67)

Implement VIN-specific OCR extraction with optimized preprocessing: - Add POST /extract/vin endpoint for VIN extraction - VIN preprocessor: CLAHE, deskew, denoise, adaptive threshold - VIN validator: check digit validation, OCR error correction (I->1, O->0) - VIN extractor: PSM modes 6/7/8, character whitelist, alternatives - Response includes confidence, bounding box, and alternatives - Unit tests for validator and preprocessor - Integration tests for VIN extraction endpoint Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 19:31:36 -06:00
parent 004940b013
commit 54cbd49171
14 changed files with 1694 additions and 1 deletions
--- a/ocr/tests/test_vin_extraction.py
+++ b/ocr/tests/test_vin_extraction.py
@@ -0,0 +1,242 @@
+"""Integration tests for VIN extraction endpoint."""
+import io
+from unittest.mock import patch, MagicMock
+
+import pytest
+from fastapi.testclient import TestClient
+from PIL import Image, ImageDraw, ImageFont
+
+from app.main import app
+
+
+@pytest.fixture
+def client() -> TestClient:
+    """Create test client."""
+    return TestClient(app)
+
+
+def create_vin_image(vin: str = "1HGBH41JXMN109186") -> bytes:
+    """Create a test image with VIN text."""
+    # Create white image
+    image = Image.new("RGB", (400, 100), (255, 255, 255))
+    draw = ImageDraw.Draw(image)
+
+    # Draw VIN text (use default font)
+    draw.text((50, 40), vin, fill=(0, 0, 0))
+
+    buffer = io.BytesIO()
+    image.save(buffer, format="PNG")
+    return buffer.getvalue()
+
+
+def create_empty_image() -> bytes:
+    """Create an empty test image."""
+    image = Image.new("RGB", (400, 100), (255, 255, 255))
+    buffer = io.BytesIO()
+    image.save(buffer, format="PNG")
+    return buffer.getvalue()
+
+
+class TestVinExtractionEndpoint:
+    """Tests for POST /extract/vin endpoint."""
+
+    def test_endpoint_exists(self, client: TestClient) -> None:
+        """Test VIN endpoint is registered."""
+        response = client.get("/")
+        assert response.status_code == 200
+        data = response.json()
+        assert any("vin" in endpoint.lower() for endpoint in data.get("endpoints", []))
+
+    def test_extract_vin_no_file(self, client: TestClient) -> None:
+        """Test endpoint returns error when no file provided."""
+        response = client.post("/extract/vin")
+        assert response.status_code == 422  # Validation error
+
+    def test_extract_vin_empty_file(self, client: TestClient) -> None:
+        """Test endpoint returns error for empty file."""
+        response = client.post(
+            "/extract/vin",
+            files={"file": ("empty.png", b"", "image/png")},
+        )
+        assert response.status_code == 400
+        assert "empty" in response.json()["detail"].lower()
+
+    def test_extract_vin_large_file(self, client: TestClient) -> None:
+        """Test endpoint returns error for file too large."""
+        # Create file larger than 10MB
+        large_content = b"x" * (11 * 1024 * 1024)
+
+        response = client.post(
+            "/extract/vin",
+            files={"file": ("large.png", large_content, "image/png")},
+        )
+        assert response.status_code == 413
+
+    @patch("app.extractors.vin_extractor.vin_extractor.extract")
+    def test_extract_vin_success(
+        self, mock_extract: MagicMock, client: TestClient
+    ) -> None:
+        """Test successful VIN extraction."""
+        from app.extractors.vin_extractor import VinExtractionResult
+
+        mock_extract.return_value = VinExtractionResult(
+            success=True,
+            vin="1HGBH41JXMN109186",
+            confidence=0.94,
+            bounding_box=None,
+            alternatives=[],
+            processing_time_ms=500,
+        )
+
+        image_bytes = create_vin_image()
+        response = client.post(
+            "/extract/vin",
+            files={"file": ("vin.png", image_bytes, "image/png")},
+        )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is True
+        assert data["vin"] == "1HGBH41JXMN109186"
+        assert data["confidence"] == 0.94
+        assert "processingTimeMs" in data
+
+    @patch("app.extractors.vin_extractor.vin_extractor.extract")
+    def test_extract_vin_not_found(
+        self, mock_extract: MagicMock, client: TestClient
+    ) -> None:
+        """Test VIN not found returns success=false."""
+        from app.extractors.vin_extractor import VinExtractionResult
+
+        mock_extract.return_value = VinExtractionResult(
+            success=False,
+            vin=None,
+            confidence=0.0,
+            error="No VIN pattern found in image",
+            processing_time_ms=300,
+        )
+
+        image_bytes = create_empty_image()
+        response = client.post(
+            "/extract/vin",
+            files={"file": ("empty.png", image_bytes, "image/png")},
+        )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is False
+        assert data["vin"] is None
+        assert data["error"] == "No VIN pattern found in image"
+
+    @patch("app.extractors.vin_extractor.vin_extractor.extract")
+    def test_extract_vin_with_alternatives(
+        self, mock_extract: MagicMock, client: TestClient
+    ) -> None:
+        """Test VIN extraction with alternatives."""
+        from app.extractors.vin_extractor import VinExtractionResult, VinAlternative
+
+        mock_extract.return_value = VinExtractionResult(
+            success=True,
+            vin="1HGBH41JXMN109186",
+            confidence=0.94,
+            bounding_box=None,
+            alternatives=[
+                VinAlternative(vin="1HGBH41JXMN109186", confidence=0.72),
+            ],
+            processing_time_ms=600,
+        )
+
+        image_bytes = create_vin_image()
+        response = client.post(
+            "/extract/vin",
+            files={"file": ("vin.png", image_bytes, "image/png")},
+        )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is True
+        assert len(data["alternatives"]) == 1
+        assert data["alternatives"][0]["confidence"] == 0.72
+
+    @patch("app.extractors.vin_extractor.vin_extractor.extract")
+    def test_extract_vin_with_bounding_box(
+        self, mock_extract: MagicMock, client: TestClient
+    ) -> None:
+        """Test VIN extraction includes bounding box."""
+        from app.extractors.vin_extractor import VinExtractionResult
+        from app.preprocessors.vin_preprocessor import BoundingBox
+
+        mock_extract.return_value = VinExtractionResult(
+            success=True,
+            vin="1HGBH41JXMN109186",
+            confidence=0.94,
+            bounding_box=BoundingBox(x=50, y=40, width=300, height=20),
+            alternatives=[],
+            processing_time_ms=500,
+        )
+
+        image_bytes = create_vin_image()
+        response = client.post(
+            "/extract/vin",
+            files={"file": ("vin.png", image_bytes, "image/png")},
+        )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["boundingBox"] is not None
+        assert data["boundingBox"]["x"] == 50
+        assert data["boundingBox"]["y"] == 40
+        assert data["boundingBox"]["width"] == 300
+        assert data["boundingBox"]["height"] == 20
+
+
+class TestVinExtractionContentTypes:
+    """Tests for different content types."""
+
+    @patch("app.extractors.vin_extractor.vin_extractor.extract")
+    def test_accepts_jpeg(
+        self, mock_extract: MagicMock, client: TestClient
+    ) -> None:
+        """Test endpoint accepts JPEG images."""
+        from app.extractors.vin_extractor import VinExtractionResult
+
+        mock_extract.return_value = VinExtractionResult(
+            success=True,
+            vin="1HGBH41JXMN109186",
+            confidence=0.9,
+            processing_time_ms=400,
+        )
+
+        # Create JPEG image
+        image = Image.new("RGB", (400, 100), (255, 255, 255))
+        buffer = io.BytesIO()
+        image.save(buffer, format="JPEG")
+
+        response = client.post(
+            "/extract/vin",
+            files={"file": ("vin.jpg", buffer.getvalue(), "image/jpeg")},
+        )
+
+        assert response.status_code == 200
+
+    @patch("app.extractors.vin_extractor.vin_extractor.extract")
+    def test_accepts_png(
+        self, mock_extract: MagicMock, client: TestClient
+    ) -> None:
+        """Test endpoint accepts PNG images."""
+        from app.extractors.vin_extractor import VinExtractionResult
+
+        mock_extract.return_value = VinExtractionResult(
+            success=True,
+            vin="1HGBH41JXMN109186",
+            confidence=0.9,
+            processing_time_ms=400,
+        )
+
+        image_bytes = create_vin_image()
+        response = client.post(
+            "/extract/vin",
+            files={"file": ("vin.png", image_bytes, "image/png")},
+        )
+
+        assert response.status_code == 200