feat: add VIN decode endpoint to OCR Python service (refs #224)

Add POST /decode/vin endpoint using Gemini 2.5 Flash for VIN string
decoding. Returns structured vehicle data (year, make, model, trim,
body/drive/fuel type, engine, transmission) with confidence score.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-18 21:40:10 -06:00
parent 00aa2a5411
commit a75f7b5583
7 changed files with 403 additions and 5 deletions

View File

@@ -1,4 +1,4 @@
"""Gemini 2.5 Flash engine for maintenance schedule extraction from PDFs.
"""Gemini 2.5 Flash engine for document understanding and VIN decode.
Standalone module (does NOT extend OcrEngine) because Gemini performs
semantic document understanding, not traditional OCR word-box extraction.
@@ -37,6 +37,31 @@ Do not include one-time procedures, troubleshooting steps, or warranty informati
Return the results as a JSON object with a single "maintenanceSchedule" array.\
"""
_VIN_DECODE_PROMPT = """\
Given the VIN (Vehicle Identification Number) below, decode it and return the vehicle specifications.
VIN: {vin}
Return the vehicle's year, make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
"""
_VIN_DECODE_SCHEMA: dict[str, Any] = {
"type": "object",
"properties": {
"year": {"type": "integer", "nullable": True},
"make": {"type": "string", "nullable": True},
"model": {"type": "string", "nullable": True},
"trimLevel": {"type": "string", "nullable": True},
"bodyType": {"type": "string", "nullable": True},
"driveType": {"type": "string", "nullable": True},
"fuelType": {"type": "string", "nullable": True},
"engine": {"type": "string", "nullable": True},
"transmission": {"type": "string", "nullable": True},
"confidence": {"type": "number"},
},
"required": ["confidence"],
}
_RESPONSE_SCHEMA: dict[str, Any] = {
"type": "object",
"properties": {
@@ -70,6 +95,22 @@ class GeminiProcessingError(GeminiEngineError):
"""Raised when Gemini fails to process a document."""
@dataclass
class VinDecodeResult:
"""Result from Gemini VIN decode."""
year: int | None = None
make: str | None = None
model: str | None = None
trim_level: str | None = None
body_type: str | None = None
drive_type: str | None = None
fuel_type: str | None = None
engine: str | None = None
transmission: str | None = None
confidence: float = 0.0
@dataclass
class MaintenanceItem:
"""A single extracted maintenance schedule item."""
@@ -89,13 +130,13 @@ class MaintenanceExtractionResult:
class GeminiEngine:
"""Gemini 2.5 Flash wrapper for maintenance schedule extraction.
"""Gemini 2.5 Flash wrapper for maintenance schedule extraction and VIN decode.
Standalone class (not an OcrEngine subclass) because Gemini performs
semantic document understanding rather than traditional OCR.
Uses lazy initialization: the Vertex AI client is not created until
the first ``extract_maintenance()`` call.
the first call to ``extract_maintenance()`` or ``decode_vin()``.
"""
def __init__(self) -> None:
@@ -228,3 +269,60 @@ class GeminiEngine:
raise GeminiProcessingError(
f"Gemini maintenance extraction failed: {exc}"
) from exc
def decode_vin(self, vin: str) -> VinDecodeResult:
"""Decode a VIN string into structured vehicle data via Gemini.
Args:
vin: A 17-character Vehicle Identification Number.
Returns:
Structured vehicle specification result.
Raises:
GeminiProcessingError: If Gemini fails to decode the VIN.
GeminiUnavailableError: If the engine cannot be initialized.
"""
model = self._get_model()
try:
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
vin_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_VIN_DECODE_SCHEMA,
)
prompt = _VIN_DECODE_PROMPT.format(vin=vin)
response = model.generate_content(
[prompt],
generation_config=vin_config,
)
raw = json.loads(response.text)
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
return VinDecodeResult(
year=raw.get("year"),
make=raw.get("make"),
model=raw.get("model"),
trim_level=raw.get("trimLevel"),
body_type=raw.get("bodyType"),
drive_type=raw.get("driveType"),
fuel_type=raw.get("fuelType"),
engine=raw.get("engine"),
transmission=raw.get("transmission"),
confidence=raw.get("confidence", 0.0),
)
except (GeminiEngineError,):
raise
except json.JSONDecodeError as exc:
raise GeminiProcessingError(
f"Gemini returned invalid JSON for VIN decode: {exc}"
) from exc
except Exception as exc:
raise GeminiProcessingError(
f"Gemini VIN decode failed: {exc}"
) from exc

View File

@@ -6,7 +6,7 @@ from typing import AsyncIterator
from fastapi import FastAPI
from app.config import settings
from app.routers import extract_router, jobs_router
from app.routers import decode_router, extract_router, jobs_router
from app.services import job_queue
# Configure logging
@@ -36,6 +36,7 @@ app = FastAPI(
)
# Include routers
app.include_router(decode_router)
app.include_router(extract_router)
app.include_router(jobs_router)
@@ -54,6 +55,7 @@ async def root() -> dict:
"version": "1.0.0",
"log_level": settings.log_level,
"endpoints": [
"POST /decode/vin - VIN string decode via Gemini",
"POST /extract - Synchronous OCR extraction",
"POST /extract/vin - VIN-specific extraction with validation",
"POST /extract/receipt - Receipt extraction (fuel, general)",

View File

@@ -14,6 +14,8 @@ from .schemas import (
ReceiptExtractedField,
ReceiptExtractionResponse,
VinAlternative,
VinDecodeRequest,
VinDecodeResponse,
VinExtractionResponse,
)
@@ -32,5 +34,7 @@ __all__ = [
"ReceiptExtractedField",
"ReceiptExtractionResponse",
"VinAlternative",
"VinDecodeRequest",
"VinDecodeResponse",
"VinExtractionResponse",
]

View File

@@ -169,3 +169,30 @@ class ManualJobResponse(BaseModel):
error: Optional[str] = None
model_config = {"populate_by_name": True}
class VinDecodeRequest(BaseModel):
"""Request body for VIN decode endpoint."""
vin: str
class VinDecodeResponse(BaseModel):
"""Response from VIN decode endpoint."""
success: bool
vin: str
year: Optional[int] = None
make: Optional[str] = None
model: Optional[str] = None
trim_level: Optional[str] = Field(default=None, alias="trimLevel")
body_type: Optional[str] = Field(default=None, alias="bodyType")
drive_type: Optional[str] = Field(default=None, alias="driveType")
fuel_type: Optional[str] = Field(default=None, alias="fuelType")
engine: Optional[str] = None
transmission: Optional[str] = None
confidence: float = Field(ge=0.0, le=1.0)
processing_time_ms: int = Field(alias="processingTimeMs")
error: Optional[str] = None
model_config = {"populate_by_name": True}

View File

@@ -1,5 +1,6 @@
"""OCR API routers."""
from .decode import router as decode_router
from .extract import router as extract_router
from .jobs import router as jobs_router
__all__ = ["extract_router", "jobs_router"]
__all__ = ["decode_router", "extract_router", "jobs_router"]

67
ocr/app/routers/decode.py Normal file
View File

@@ -0,0 +1,67 @@
"""VIN decode router - Gemini-powered VIN string decoding."""
import logging
import re
import time
from fastapi import APIRouter, HTTPException
from app.engines.gemini_engine import (
GeminiEngine,
GeminiProcessingError,
GeminiUnavailableError,
)
from app.models import VinDecodeRequest, VinDecodeResponse
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/decode", tags=["decode"])
_VIN_REGEX = re.compile(r"^[A-HJ-NPR-Z0-9]{17}$")
# Shared engine instance (lazy init on first request)
_gemini_engine = GeminiEngine()
@router.post("/vin", response_model=VinDecodeResponse)
async def decode_vin(request: VinDecodeRequest) -> VinDecodeResponse:
"""Decode a VIN string into structured vehicle data using Gemini.
Accepts a 17-character VIN and returns year, make, model, trim, etc.
"""
vin = request.vin.upper().strip()
if not _VIN_REGEX.match(vin):
raise HTTPException(
status_code=400,
detail=f"Invalid VIN format: must be 17 alphanumeric characters (excluding I, O, Q). Got: {vin}",
)
start_ms = time.monotonic_ns() // 1_000_000
try:
result = _gemini_engine.decode_vin(vin)
except GeminiUnavailableError as exc:
logger.error("Gemini unavailable for VIN decode: %s", exc)
raise HTTPException(status_code=503, detail=str(exc)) from exc
except GeminiProcessingError as exc:
logger.error("Gemini processing error for VIN %s: %s", vin, exc)
raise HTTPException(status_code=422, detail=str(exc)) from exc
elapsed_ms = (time.monotonic_ns() // 1_000_000) - start_ms
return VinDecodeResponse(
success=True,
vin=vin,
year=result.year,
make=result.make,
model=result.model,
trimLevel=result.trim_level,
bodyType=result.body_type,
driveType=result.drive_type,
fuelType=result.fuel_type,
engine=result.engine,
transmission=result.transmission,
confidence=result.confidence,
processingTimeMs=elapsed_ms,
error=None,
)