feat: add VIN decode endpoint to OCR Python service (refs #224)

Add POST /decode/vin endpoint using Gemini 2.5 Flash for VIN string
decoding. Returns structured vehicle data (year, make, model, trim,
body/drive/fuel type, engine, transmission) with confidence score.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-18 21:40:10 -06:00
parent 00aa2a5411
commit a75f7b5583
7 changed files with 403 additions and 5 deletions

View File

@@ -1,4 +1,4 @@
"""Gemini 2.5 Flash engine for maintenance schedule extraction from PDFs. """Gemini 2.5 Flash engine for document understanding and VIN decode.
Standalone module (does NOT extend OcrEngine) because Gemini performs Standalone module (does NOT extend OcrEngine) because Gemini performs
semantic document understanding, not traditional OCR word-box extraction. semantic document understanding, not traditional OCR word-box extraction.
@@ -37,6 +37,31 @@ Do not include one-time procedures, troubleshooting steps, or warranty informati
Return the results as a JSON object with a single "maintenanceSchedule" array.\ Return the results as a JSON object with a single "maintenanceSchedule" array.\
""" """
_VIN_DECODE_PROMPT = """\
Given the VIN (Vehicle Identification Number) below, decode it and return the vehicle specifications.
VIN: {vin}
Return the vehicle's year, make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
"""
_VIN_DECODE_SCHEMA: dict[str, Any] = {
"type": "object",
"properties": {
"year": {"type": "integer", "nullable": True},
"make": {"type": "string", "nullable": True},
"model": {"type": "string", "nullable": True},
"trimLevel": {"type": "string", "nullable": True},
"bodyType": {"type": "string", "nullable": True},
"driveType": {"type": "string", "nullable": True},
"fuelType": {"type": "string", "nullable": True},
"engine": {"type": "string", "nullable": True},
"transmission": {"type": "string", "nullable": True},
"confidence": {"type": "number"},
},
"required": ["confidence"],
}
_RESPONSE_SCHEMA: dict[str, Any] = { _RESPONSE_SCHEMA: dict[str, Any] = {
"type": "object", "type": "object",
"properties": { "properties": {
@@ -70,6 +95,22 @@ class GeminiProcessingError(GeminiEngineError):
"""Raised when Gemini fails to process a document.""" """Raised when Gemini fails to process a document."""
@dataclass
class VinDecodeResult:
"""Result from Gemini VIN decode."""
year: int | None = None
make: str | None = None
model: str | None = None
trim_level: str | None = None
body_type: str | None = None
drive_type: str | None = None
fuel_type: str | None = None
engine: str | None = None
transmission: str | None = None
confidence: float = 0.0
@dataclass @dataclass
class MaintenanceItem: class MaintenanceItem:
"""A single extracted maintenance schedule item.""" """A single extracted maintenance schedule item."""
@@ -89,13 +130,13 @@ class MaintenanceExtractionResult:
class GeminiEngine: class GeminiEngine:
"""Gemini 2.5 Flash wrapper for maintenance schedule extraction. """Gemini 2.5 Flash wrapper for maintenance schedule extraction and VIN decode.
Standalone class (not an OcrEngine subclass) because Gemini performs Standalone class (not an OcrEngine subclass) because Gemini performs
semantic document understanding rather than traditional OCR. semantic document understanding rather than traditional OCR.
Uses lazy initialization: the Vertex AI client is not created until Uses lazy initialization: the Vertex AI client is not created until
the first ``extract_maintenance()`` call. the first call to ``extract_maintenance()`` or ``decode_vin()``.
""" """
def __init__(self) -> None: def __init__(self) -> None:
@@ -228,3 +269,60 @@ class GeminiEngine:
raise GeminiProcessingError( raise GeminiProcessingError(
f"Gemini maintenance extraction failed: {exc}" f"Gemini maintenance extraction failed: {exc}"
) from exc ) from exc
def decode_vin(self, vin: str) -> VinDecodeResult:
"""Decode a VIN string into structured vehicle data via Gemini.
Args:
vin: A 17-character Vehicle Identification Number.
Returns:
Structured vehicle specification result.
Raises:
GeminiProcessingError: If Gemini fails to decode the VIN.
GeminiUnavailableError: If the engine cannot be initialized.
"""
model = self._get_model()
try:
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
vin_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_VIN_DECODE_SCHEMA,
)
prompt = _VIN_DECODE_PROMPT.format(vin=vin)
response = model.generate_content(
[prompt],
generation_config=vin_config,
)
raw = json.loads(response.text)
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
return VinDecodeResult(
year=raw.get("year"),
make=raw.get("make"),
model=raw.get("model"),
trim_level=raw.get("trimLevel"),
body_type=raw.get("bodyType"),
drive_type=raw.get("driveType"),
fuel_type=raw.get("fuelType"),
engine=raw.get("engine"),
transmission=raw.get("transmission"),
confidence=raw.get("confidence", 0.0),
)
except (GeminiEngineError,):
raise
except json.JSONDecodeError as exc:
raise GeminiProcessingError(
f"Gemini returned invalid JSON for VIN decode: {exc}"
) from exc
except Exception as exc:
raise GeminiProcessingError(
f"Gemini VIN decode failed: {exc}"
) from exc

View File

@@ -6,7 +6,7 @@ from typing import AsyncIterator
from fastapi import FastAPI from fastapi import FastAPI
from app.config import settings from app.config import settings
from app.routers import extract_router, jobs_router from app.routers import decode_router, extract_router, jobs_router
from app.services import job_queue from app.services import job_queue
# Configure logging # Configure logging
@@ -36,6 +36,7 @@ app = FastAPI(
) )
# Include routers # Include routers
app.include_router(decode_router)
app.include_router(extract_router) app.include_router(extract_router)
app.include_router(jobs_router) app.include_router(jobs_router)
@@ -54,6 +55,7 @@ async def root() -> dict:
"version": "1.0.0", "version": "1.0.0",
"log_level": settings.log_level, "log_level": settings.log_level,
"endpoints": [ "endpoints": [
"POST /decode/vin - VIN string decode via Gemini",
"POST /extract - Synchronous OCR extraction", "POST /extract - Synchronous OCR extraction",
"POST /extract/vin - VIN-specific extraction with validation", "POST /extract/vin - VIN-specific extraction with validation",
"POST /extract/receipt - Receipt extraction (fuel, general)", "POST /extract/receipt - Receipt extraction (fuel, general)",

View File

@@ -14,6 +14,8 @@ from .schemas import (
ReceiptExtractedField, ReceiptExtractedField,
ReceiptExtractionResponse, ReceiptExtractionResponse,
VinAlternative, VinAlternative,
VinDecodeRequest,
VinDecodeResponse,
VinExtractionResponse, VinExtractionResponse,
) )
@@ -32,5 +34,7 @@ __all__ = [
"ReceiptExtractedField", "ReceiptExtractedField",
"ReceiptExtractionResponse", "ReceiptExtractionResponse",
"VinAlternative", "VinAlternative",
"VinDecodeRequest",
"VinDecodeResponse",
"VinExtractionResponse", "VinExtractionResponse",
] ]

View File

@@ -169,3 +169,30 @@ class ManualJobResponse(BaseModel):
error: Optional[str] = None error: Optional[str] = None
model_config = {"populate_by_name": True} model_config = {"populate_by_name": True}
class VinDecodeRequest(BaseModel):
"""Request body for VIN decode endpoint."""
vin: str
class VinDecodeResponse(BaseModel):
"""Response from VIN decode endpoint."""
success: bool
vin: str
year: Optional[int] = None
make: Optional[str] = None
model: Optional[str] = None
trim_level: Optional[str] = Field(default=None, alias="trimLevel")
body_type: Optional[str] = Field(default=None, alias="bodyType")
drive_type: Optional[str] = Field(default=None, alias="driveType")
fuel_type: Optional[str] = Field(default=None, alias="fuelType")
engine: Optional[str] = None
transmission: Optional[str] = None
confidence: float = Field(ge=0.0, le=1.0)
processing_time_ms: int = Field(alias="processingTimeMs")
error: Optional[str] = None
model_config = {"populate_by_name": True}

View File

@@ -1,5 +1,6 @@
"""OCR API routers.""" """OCR API routers."""
from .decode import router as decode_router
from .extract import router as extract_router from .extract import router as extract_router
from .jobs import router as jobs_router from .jobs import router as jobs_router
__all__ = ["extract_router", "jobs_router"] __all__ = ["decode_router", "extract_router", "jobs_router"]

67
ocr/app/routers/decode.py Normal file
View File

@@ -0,0 +1,67 @@
"""VIN decode router - Gemini-powered VIN string decoding."""
import logging
import re
import time
from fastapi import APIRouter, HTTPException
from app.engines.gemini_engine import (
GeminiEngine,
GeminiProcessingError,
GeminiUnavailableError,
)
from app.models import VinDecodeRequest, VinDecodeResponse
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/decode", tags=["decode"])
_VIN_REGEX = re.compile(r"^[A-HJ-NPR-Z0-9]{17}$")
# Shared engine instance (lazy init on first request)
_gemini_engine = GeminiEngine()
@router.post("/vin", response_model=VinDecodeResponse)
async def decode_vin(request: VinDecodeRequest) -> VinDecodeResponse:
"""Decode a VIN string into structured vehicle data using Gemini.
Accepts a 17-character VIN and returns year, make, model, trim, etc.
"""
vin = request.vin.upper().strip()
if not _VIN_REGEX.match(vin):
raise HTTPException(
status_code=400,
detail=f"Invalid VIN format: must be 17 alphanumeric characters (excluding I, O, Q). Got: {vin}",
)
start_ms = time.monotonic_ns() // 1_000_000
try:
result = _gemini_engine.decode_vin(vin)
except GeminiUnavailableError as exc:
logger.error("Gemini unavailable for VIN decode: %s", exc)
raise HTTPException(status_code=503, detail=str(exc)) from exc
except GeminiProcessingError as exc:
logger.error("Gemini processing error for VIN %s: %s", vin, exc)
raise HTTPException(status_code=422, detail=str(exc)) from exc
elapsed_ms = (time.monotonic_ns() // 1_000_000) - start_ms
return VinDecodeResponse(
success=True,
vin=vin,
year=result.year,
make=result.make,
model=result.model,
trimLevel=result.trim_level,
bodyType=result.body_type,
driveType=result.drive_type,
fuelType=result.fuel_type,
engine=result.engine,
transmission=result.transmission,
confidence=result.confidence,
processingTimeMs=elapsed_ms,
error=None,
)

View File

@@ -0,0 +1,199 @@
"""Tests for the VIN decode endpoint (POST /decode/vin).
Covers: valid VIN returns 200 with correct response shape,
invalid VIN format returns 400, Gemini unavailable returns 503,
and Gemini processing error returns 422.
All GeminiEngine calls are mocked.
"""
from unittest.mock import MagicMock, patch
import pytest
from fastapi.testclient import TestClient
from app.engines.gemini_engine import (
GeminiProcessingError,
GeminiUnavailableError,
VinDecodeResult,
)
from app.main import app
client = TestClient(app)
# A valid 17-character VIN (no I, O, Q)
_VALID_VIN = "1HGBH41JXMN109186"
_FULL_RESULT = VinDecodeResult(
year=2021,
make="Honda",
model="Civic",
trim_level="EX",
body_type="Sedan",
drive_type="FWD",
fuel_type="Gasoline",
engine="2.0L I4",
transmission="CVT",
confidence=0.95,
)
# --- Valid VIN ---
class TestDecodeVinSuccess:
"""Verify successful VIN decode returns 200 with correct response shape."""
@patch("app.routers.decode._gemini_engine")
def test_valid_vin_returns_200(self, mock_engine):
"""Normal: Valid VIN returns 200 with all vehicle fields populated."""
mock_engine.decode_vin.return_value = _FULL_RESULT
response = client.post("/decode/vin", json={"vin": _VALID_VIN})
assert response.status_code == 200
data = response.json()
assert data["success"] is True
assert data["vin"] == _VALID_VIN
assert data["year"] == 2021
assert data["make"] == "Honda"
assert data["model"] == "Civic"
assert data["trimLevel"] == "EX"
assert data["bodyType"] == "Sedan"
assert data["driveType"] == "FWD"
assert data["fuelType"] == "Gasoline"
assert data["engine"] == "2.0L I4"
assert data["transmission"] == "CVT"
assert data["confidence"] == 0.95
assert "processingTimeMs" in data
assert data["error"] is None
@patch("app.routers.decode._gemini_engine")
def test_vin_uppercased_before_decode(self, mock_engine):
"""VIN submitted in lowercase is normalised to uppercase before decoding."""
mock_engine.decode_vin.return_value = _FULL_RESULT
response = client.post("/decode/vin", json={"vin": _VALID_VIN.lower()})
assert response.status_code == 200
data = response.json()
assert data["vin"] == _VALID_VIN
mock_engine.decode_vin.assert_called_once_with(_VALID_VIN)
@patch("app.routers.decode._gemini_engine")
def test_nullable_fields_allowed(self, mock_engine):
"""Edge: VIN decode with only confidence set returns valid response."""
mock_engine.decode_vin.return_value = VinDecodeResult(confidence=0.3)
response = client.post("/decode/vin", json={"vin": _VALID_VIN})
assert response.status_code == 200
data = response.json()
assert data["success"] is True
assert data["year"] is None
assert data["make"] is None
assert data["confidence"] == 0.3
# --- Invalid VIN format ---
class TestDecodeVinValidation:
"""Verify invalid VIN formats return 400."""
def test_too_short_vin_returns_400(self):
"""VIN shorter than 17 characters is rejected."""
response = client.post("/decode/vin", json={"vin": "1HGBH41JXM"})
assert response.status_code == 400
assert "Invalid VIN format" in response.json()["detail"]
def test_too_long_vin_returns_400(self):
"""VIN longer than 17 characters is rejected."""
response = client.post("/decode/vin", json={"vin": "1HGBH41JXMN109186X"})
assert response.status_code == 400
def test_vin_with_letter_i_returns_400(self):
"""VIN containing the letter I (invalid character) is rejected."""
# Replace position 0 with I to create invalid VIN
invalid_vin = "IHGBH41JXMN109186"
response = client.post("/decode/vin", json={"vin": invalid_vin})
assert response.status_code == 400
assert "Invalid VIN format" in response.json()["detail"]
def test_vin_with_letter_o_returns_400(self):
"""VIN containing the letter O (invalid character) is rejected."""
invalid_vin = "OHGBH41JXMN109186"
response = client.post("/decode/vin", json={"vin": invalid_vin})
assert response.status_code == 400
def test_vin_with_letter_q_returns_400(self):
"""VIN containing the letter Q (invalid character) is rejected."""
invalid_vin = "QHGBH41JXMN109186"
response = client.post("/decode/vin", json={"vin": invalid_vin})
assert response.status_code == 400
def test_empty_vin_returns_400(self):
"""Empty VIN string is rejected."""
response = client.post("/decode/vin", json={"vin": ""})
assert response.status_code == 400
def test_vin_with_special_chars_returns_400(self):
"""VIN containing special characters is rejected."""
response = client.post("/decode/vin", json={"vin": "1HGBH41J-MN109186"})
assert response.status_code == 400
# --- Gemini unavailable ---
class TestDecodeVinGeminiUnavailable:
"""Verify Gemini service unavailability returns 503."""
@patch("app.routers.decode._gemini_engine")
def test_gemini_unavailable_returns_503(self, mock_engine):
"""When Gemini cannot be initialized, endpoint returns 503."""
mock_engine.decode_vin.side_effect = GeminiUnavailableError(
"Google credential config not found"
)
response = client.post("/decode/vin", json={"vin": _VALID_VIN})
assert response.status_code == 503
assert "Google credential config not found" in response.json()["detail"]
# --- Gemini processing error ---
class TestDecodeVinGeminiProcessingError:
"""Verify Gemini processing failures return 422."""
@patch("app.routers.decode._gemini_engine")
def test_gemini_processing_error_returns_422(self, mock_engine):
"""When Gemini returns invalid output, endpoint returns 422."""
mock_engine.decode_vin.side_effect = GeminiProcessingError(
"Gemini returned invalid JSON for VIN decode: ..."
)
response = client.post("/decode/vin", json={"vin": _VALID_VIN})
assert response.status_code == 422
assert "Gemini returned invalid JSON" in response.json()["detail"]
@patch("app.routers.decode._gemini_engine")
def test_gemini_api_failure_returns_422(self, mock_engine):
"""When Gemini API call fails at runtime, endpoint returns 422."""
mock_engine.decode_vin.side_effect = GeminiProcessingError(
"Gemini VIN decode failed: API quota exceeded"
)
response = client.post("/decode/vin", json={"vin": _VALID_VIN})
assert response.status_code == 422
assert "Gemini VIN decode failed" in response.json()["detail"]