feat: add VIN decode endpoint to OCR Python service (refs #224)
Add POST /decode/vin endpoint using Gemini 2.5 Flash for VIN string decoding. Returns structured vehicle data (year, make, model, trim, body/drive/fuel type, engine, transmission) with confidence score. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
"""Gemini 2.5 Flash engine for maintenance schedule extraction from PDFs.
|
||||
"""Gemini 2.5 Flash engine for document understanding and VIN decode.
|
||||
|
||||
Standalone module (does NOT extend OcrEngine) because Gemini performs
|
||||
semantic document understanding, not traditional OCR word-box extraction.
|
||||
@@ -37,6 +37,31 @@ Do not include one-time procedures, troubleshooting steps, or warranty informati
|
||||
Return the results as a JSON object with a single "maintenanceSchedule" array.\
|
||||
"""
|
||||
|
||||
_VIN_DECODE_PROMPT = """\
|
||||
Given the VIN (Vehicle Identification Number) below, decode it and return the vehicle specifications.
|
||||
|
||||
VIN: {vin}
|
||||
|
||||
Return the vehicle's year, make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
|
||||
"""
|
||||
|
||||
_VIN_DECODE_SCHEMA: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"year": {"type": "integer", "nullable": True},
|
||||
"make": {"type": "string", "nullable": True},
|
||||
"model": {"type": "string", "nullable": True},
|
||||
"trimLevel": {"type": "string", "nullable": True},
|
||||
"bodyType": {"type": "string", "nullable": True},
|
||||
"driveType": {"type": "string", "nullable": True},
|
||||
"fuelType": {"type": "string", "nullable": True},
|
||||
"engine": {"type": "string", "nullable": True},
|
||||
"transmission": {"type": "string", "nullable": True},
|
||||
"confidence": {"type": "number"},
|
||||
},
|
||||
"required": ["confidence"],
|
||||
}
|
||||
|
||||
_RESPONSE_SCHEMA: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -70,6 +95,22 @@ class GeminiProcessingError(GeminiEngineError):
|
||||
"""Raised when Gemini fails to process a document."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class VinDecodeResult:
|
||||
"""Result from Gemini VIN decode."""
|
||||
|
||||
year: int | None = None
|
||||
make: str | None = None
|
||||
model: str | None = None
|
||||
trim_level: str | None = None
|
||||
body_type: str | None = None
|
||||
drive_type: str | None = None
|
||||
fuel_type: str | None = None
|
||||
engine: str | None = None
|
||||
transmission: str | None = None
|
||||
confidence: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class MaintenanceItem:
|
||||
"""A single extracted maintenance schedule item."""
|
||||
@@ -89,13 +130,13 @@ class MaintenanceExtractionResult:
|
||||
|
||||
|
||||
class GeminiEngine:
|
||||
"""Gemini 2.5 Flash wrapper for maintenance schedule extraction.
|
||||
"""Gemini 2.5 Flash wrapper for maintenance schedule extraction and VIN decode.
|
||||
|
||||
Standalone class (not an OcrEngine subclass) because Gemini performs
|
||||
semantic document understanding rather than traditional OCR.
|
||||
|
||||
Uses lazy initialization: the Vertex AI client is not created until
|
||||
the first ``extract_maintenance()`` call.
|
||||
the first call to ``extract_maintenance()`` or ``decode_vin()``.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
@@ -228,3 +269,60 @@ class GeminiEngine:
|
||||
raise GeminiProcessingError(
|
||||
f"Gemini maintenance extraction failed: {exc}"
|
||||
) from exc
|
||||
|
||||
def decode_vin(self, vin: str) -> VinDecodeResult:
|
||||
"""Decode a VIN string into structured vehicle data via Gemini.
|
||||
|
||||
Args:
|
||||
vin: A 17-character Vehicle Identification Number.
|
||||
|
||||
Returns:
|
||||
Structured vehicle specification result.
|
||||
|
||||
Raises:
|
||||
GeminiProcessingError: If Gemini fails to decode the VIN.
|
||||
GeminiUnavailableError: If the engine cannot be initialized.
|
||||
"""
|
||||
model = self._get_model()
|
||||
|
||||
try:
|
||||
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
|
||||
|
||||
vin_config = GenerationConfig(
|
||||
response_mime_type="application/json",
|
||||
response_schema=_VIN_DECODE_SCHEMA,
|
||||
)
|
||||
|
||||
prompt = _VIN_DECODE_PROMPT.format(vin=vin)
|
||||
response = model.generate_content(
|
||||
[prompt],
|
||||
generation_config=vin_config,
|
||||
)
|
||||
|
||||
raw = json.loads(response.text)
|
||||
|
||||
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
|
||||
|
||||
return VinDecodeResult(
|
||||
year=raw.get("year"),
|
||||
make=raw.get("make"),
|
||||
model=raw.get("model"),
|
||||
trim_level=raw.get("trimLevel"),
|
||||
body_type=raw.get("bodyType"),
|
||||
drive_type=raw.get("driveType"),
|
||||
fuel_type=raw.get("fuelType"),
|
||||
engine=raw.get("engine"),
|
||||
transmission=raw.get("transmission"),
|
||||
confidence=raw.get("confidence", 0.0),
|
||||
)
|
||||
|
||||
except (GeminiEngineError,):
|
||||
raise
|
||||
except json.JSONDecodeError as exc:
|
||||
raise GeminiProcessingError(
|
||||
f"Gemini returned invalid JSON for VIN decode: {exc}"
|
||||
) from exc
|
||||
except Exception as exc:
|
||||
raise GeminiProcessingError(
|
||||
f"Gemini VIN decode failed: {exc}"
|
||||
) from exc
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import AsyncIterator
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app.config import settings
|
||||
from app.routers import extract_router, jobs_router
|
||||
from app.routers import decode_router, extract_router, jobs_router
|
||||
from app.services import job_queue
|
||||
|
||||
# Configure logging
|
||||
@@ -36,6 +36,7 @@ app = FastAPI(
|
||||
)
|
||||
|
||||
# Include routers
|
||||
app.include_router(decode_router)
|
||||
app.include_router(extract_router)
|
||||
app.include_router(jobs_router)
|
||||
|
||||
@@ -54,6 +55,7 @@ async def root() -> dict:
|
||||
"version": "1.0.0",
|
||||
"log_level": settings.log_level,
|
||||
"endpoints": [
|
||||
"POST /decode/vin - VIN string decode via Gemini",
|
||||
"POST /extract - Synchronous OCR extraction",
|
||||
"POST /extract/vin - VIN-specific extraction with validation",
|
||||
"POST /extract/receipt - Receipt extraction (fuel, general)",
|
||||
|
||||
@@ -14,6 +14,8 @@ from .schemas import (
|
||||
ReceiptExtractedField,
|
||||
ReceiptExtractionResponse,
|
||||
VinAlternative,
|
||||
VinDecodeRequest,
|
||||
VinDecodeResponse,
|
||||
VinExtractionResponse,
|
||||
)
|
||||
|
||||
@@ -32,5 +34,7 @@ __all__ = [
|
||||
"ReceiptExtractedField",
|
||||
"ReceiptExtractionResponse",
|
||||
"VinAlternative",
|
||||
"VinDecodeRequest",
|
||||
"VinDecodeResponse",
|
||||
"VinExtractionResponse",
|
||||
]
|
||||
|
||||
@@ -169,3 +169,30 @@ class ManualJobResponse(BaseModel):
|
||||
error: Optional[str] = None
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
class VinDecodeRequest(BaseModel):
|
||||
"""Request body for VIN decode endpoint."""
|
||||
|
||||
vin: str
|
||||
|
||||
|
||||
class VinDecodeResponse(BaseModel):
|
||||
"""Response from VIN decode endpoint."""
|
||||
|
||||
success: bool
|
||||
vin: str
|
||||
year: Optional[int] = None
|
||||
make: Optional[str] = None
|
||||
model: Optional[str] = None
|
||||
trim_level: Optional[str] = Field(default=None, alias="trimLevel")
|
||||
body_type: Optional[str] = Field(default=None, alias="bodyType")
|
||||
drive_type: Optional[str] = Field(default=None, alias="driveType")
|
||||
fuel_type: Optional[str] = Field(default=None, alias="fuelType")
|
||||
engine: Optional[str] = None
|
||||
transmission: Optional[str] = None
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
processing_time_ms: int = Field(alias="processingTimeMs")
|
||||
error: Optional[str] = None
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""OCR API routers."""
|
||||
from .decode import router as decode_router
|
||||
from .extract import router as extract_router
|
||||
from .jobs import router as jobs_router
|
||||
|
||||
__all__ = ["extract_router", "jobs_router"]
|
||||
__all__ = ["decode_router", "extract_router", "jobs_router"]
|
||||
|
||||
67
ocr/app/routers/decode.py
Normal file
67
ocr/app/routers/decode.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""VIN decode router - Gemini-powered VIN string decoding."""
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from app.engines.gemini_engine import (
|
||||
GeminiEngine,
|
||||
GeminiProcessingError,
|
||||
GeminiUnavailableError,
|
||||
)
|
||||
from app.models import VinDecodeRequest, VinDecodeResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/decode", tags=["decode"])
|
||||
|
||||
_VIN_REGEX = re.compile(r"^[A-HJ-NPR-Z0-9]{17}$")
|
||||
|
||||
# Shared engine instance (lazy init on first request)
|
||||
_gemini_engine = GeminiEngine()
|
||||
|
||||
|
||||
@router.post("/vin", response_model=VinDecodeResponse)
|
||||
async def decode_vin(request: VinDecodeRequest) -> VinDecodeResponse:
|
||||
"""Decode a VIN string into structured vehicle data using Gemini.
|
||||
|
||||
Accepts a 17-character VIN and returns year, make, model, trim, etc.
|
||||
"""
|
||||
vin = request.vin.upper().strip()
|
||||
|
||||
if not _VIN_REGEX.match(vin):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid VIN format: must be 17 alphanumeric characters (excluding I, O, Q). Got: {vin}",
|
||||
)
|
||||
|
||||
start_ms = time.monotonic_ns() // 1_000_000
|
||||
|
||||
try:
|
||||
result = _gemini_engine.decode_vin(vin)
|
||||
except GeminiUnavailableError as exc:
|
||||
logger.error("Gemini unavailable for VIN decode: %s", exc)
|
||||
raise HTTPException(status_code=503, detail=str(exc)) from exc
|
||||
except GeminiProcessingError as exc:
|
||||
logger.error("Gemini processing error for VIN %s: %s", vin, exc)
|
||||
raise HTTPException(status_code=422, detail=str(exc)) from exc
|
||||
|
||||
elapsed_ms = (time.monotonic_ns() // 1_000_000) - start_ms
|
||||
|
||||
return VinDecodeResponse(
|
||||
success=True,
|
||||
vin=vin,
|
||||
year=result.year,
|
||||
make=result.make,
|
||||
model=result.model,
|
||||
trimLevel=result.trim_level,
|
||||
bodyType=result.body_type,
|
||||
driveType=result.drive_type,
|
||||
fuelType=result.fuel_type,
|
||||
engine=result.engine,
|
||||
transmission=result.transmission,
|
||||
confidence=result.confidence,
|
||||
processingTimeMs=elapsed_ms,
|
||||
error=None,
|
||||
)
|
||||
Reference in New Issue
Block a user