diff --git a/ocr/app/engines/gemini_engine.py b/ocr/app/engines/gemini_engine.py index b26a3c0..6fa347a 100644 --- a/ocr/app/engines/gemini_engine.py +++ b/ocr/app/engines/gemini_engine.py @@ -1,4 +1,4 @@ -"""Gemini 2.5 Flash engine for maintenance schedule extraction from PDFs. +"""Gemini 2.5 Flash engine for document understanding and VIN decode. Standalone module (does NOT extend OcrEngine) because Gemini performs semantic document understanding, not traditional OCR word-box extraction. @@ -37,6 +37,31 @@ Do not include one-time procedures, troubleshooting steps, or warranty informati Return the results as a JSON object with a single "maintenanceSchedule" array.\ """ +_VIN_DECODE_PROMPT = """\ +Given the VIN (Vehicle Identification Number) below, decode it and return the vehicle specifications. + +VIN: {vin} + +Return the vehicle's year, make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\ +""" + +_VIN_DECODE_SCHEMA: dict[str, Any] = { + "type": "object", + "properties": { + "year": {"type": "integer", "nullable": True}, + "make": {"type": "string", "nullable": True}, + "model": {"type": "string", "nullable": True}, + "trimLevel": {"type": "string", "nullable": True}, + "bodyType": {"type": "string", "nullable": True}, + "driveType": {"type": "string", "nullable": True}, + "fuelType": {"type": "string", "nullable": True}, + "engine": {"type": "string", "nullable": True}, + "transmission": {"type": "string", "nullable": True}, + "confidence": {"type": "number"}, + }, + "required": ["confidence"], +} + _RESPONSE_SCHEMA: dict[str, Any] = { "type": "object", "properties": { @@ -70,6 +95,22 @@ class GeminiProcessingError(GeminiEngineError): """Raised when Gemini fails to process a document.""" +@dataclass +class VinDecodeResult: + """Result from Gemini VIN decode.""" + + year: int | None = None + make: str | None = None + model: str | None = None + trim_level: str | None = None + body_type: str | None = None + drive_type: str | None = None + fuel_type: str | None = None + engine: str | None = None + transmission: str | None = None + confidence: float = 0.0 + + @dataclass class MaintenanceItem: """A single extracted maintenance schedule item.""" @@ -89,13 +130,13 @@ class MaintenanceExtractionResult: class GeminiEngine: - """Gemini 2.5 Flash wrapper for maintenance schedule extraction. + """Gemini 2.5 Flash wrapper for maintenance schedule extraction and VIN decode. Standalone class (not an OcrEngine subclass) because Gemini performs semantic document understanding rather than traditional OCR. Uses lazy initialization: the Vertex AI client is not created until - the first ``extract_maintenance()`` call. + the first call to ``extract_maintenance()`` or ``decode_vin()``. """ def __init__(self) -> None: @@ -228,3 +269,60 @@ class GeminiEngine: raise GeminiProcessingError( f"Gemini maintenance extraction failed: {exc}" ) from exc + + def decode_vin(self, vin: str) -> VinDecodeResult: + """Decode a VIN string into structured vehicle data via Gemini. + + Args: + vin: A 17-character Vehicle Identification Number. + + Returns: + Structured vehicle specification result. + + Raises: + GeminiProcessingError: If Gemini fails to decode the VIN. + GeminiUnavailableError: If the engine cannot be initialized. + """ + model = self._get_model() + + try: + from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped] + + vin_config = GenerationConfig( + response_mime_type="application/json", + response_schema=_VIN_DECODE_SCHEMA, + ) + + prompt = _VIN_DECODE_PROMPT.format(vin=vin) + response = model.generate_content( + [prompt], + generation_config=vin_config, + ) + + raw = json.loads(response.text) + + logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0)) + + return VinDecodeResult( + year=raw.get("year"), + make=raw.get("make"), + model=raw.get("model"), + trim_level=raw.get("trimLevel"), + body_type=raw.get("bodyType"), + drive_type=raw.get("driveType"), + fuel_type=raw.get("fuelType"), + engine=raw.get("engine"), + transmission=raw.get("transmission"), + confidence=raw.get("confidence", 0.0), + ) + + except (GeminiEngineError,): + raise + except json.JSONDecodeError as exc: + raise GeminiProcessingError( + f"Gemini returned invalid JSON for VIN decode: {exc}" + ) from exc + except Exception as exc: + raise GeminiProcessingError( + f"Gemini VIN decode failed: {exc}" + ) from exc diff --git a/ocr/app/main.py b/ocr/app/main.py index d5c87ab..df0a9f2 100644 --- a/ocr/app/main.py +++ b/ocr/app/main.py @@ -6,7 +6,7 @@ from typing import AsyncIterator from fastapi import FastAPI from app.config import settings -from app.routers import extract_router, jobs_router +from app.routers import decode_router, extract_router, jobs_router from app.services import job_queue # Configure logging @@ -36,6 +36,7 @@ app = FastAPI( ) # Include routers +app.include_router(decode_router) app.include_router(extract_router) app.include_router(jobs_router) @@ -54,6 +55,7 @@ async def root() -> dict: "version": "1.0.0", "log_level": settings.log_level, "endpoints": [ + "POST /decode/vin - VIN string decode via Gemini", "POST /extract - Synchronous OCR extraction", "POST /extract/vin - VIN-specific extraction with validation", "POST /extract/receipt - Receipt extraction (fuel, general)", diff --git a/ocr/app/models/__init__.py b/ocr/app/models/__init__.py index 6c5a8aa..c72cdc0 100644 --- a/ocr/app/models/__init__.py +++ b/ocr/app/models/__init__.py @@ -14,6 +14,8 @@ from .schemas import ( ReceiptExtractedField, ReceiptExtractionResponse, VinAlternative, + VinDecodeRequest, + VinDecodeResponse, VinExtractionResponse, ) @@ -32,5 +34,7 @@ __all__ = [ "ReceiptExtractedField", "ReceiptExtractionResponse", "VinAlternative", + "VinDecodeRequest", + "VinDecodeResponse", "VinExtractionResponse", ] diff --git a/ocr/app/models/schemas.py b/ocr/app/models/schemas.py index d6a8737..826a27c 100644 --- a/ocr/app/models/schemas.py +++ b/ocr/app/models/schemas.py @@ -169,3 +169,30 @@ class ManualJobResponse(BaseModel): error: Optional[str] = None model_config = {"populate_by_name": True} + + +class VinDecodeRequest(BaseModel): + """Request body for VIN decode endpoint.""" + + vin: str + + +class VinDecodeResponse(BaseModel): + """Response from VIN decode endpoint.""" + + success: bool + vin: str + year: Optional[int] = None + make: Optional[str] = None + model: Optional[str] = None + trim_level: Optional[str] = Field(default=None, alias="trimLevel") + body_type: Optional[str] = Field(default=None, alias="bodyType") + drive_type: Optional[str] = Field(default=None, alias="driveType") + fuel_type: Optional[str] = Field(default=None, alias="fuelType") + engine: Optional[str] = None + transmission: Optional[str] = None + confidence: float = Field(ge=0.0, le=1.0) + processing_time_ms: int = Field(alias="processingTimeMs") + error: Optional[str] = None + + model_config = {"populate_by_name": True} diff --git a/ocr/app/routers/__init__.py b/ocr/app/routers/__init__.py index ded0afd..df35f23 100644 --- a/ocr/app/routers/__init__.py +++ b/ocr/app/routers/__init__.py @@ -1,5 +1,6 @@ """OCR API routers.""" +from .decode import router as decode_router from .extract import router as extract_router from .jobs import router as jobs_router -__all__ = ["extract_router", "jobs_router"] +__all__ = ["decode_router", "extract_router", "jobs_router"] diff --git a/ocr/app/routers/decode.py b/ocr/app/routers/decode.py new file mode 100644 index 0000000..7d737a7 --- /dev/null +++ b/ocr/app/routers/decode.py @@ -0,0 +1,67 @@ +"""VIN decode router - Gemini-powered VIN string decoding.""" +import logging +import re +import time + +from fastapi import APIRouter, HTTPException + +from app.engines.gemini_engine import ( + GeminiEngine, + GeminiProcessingError, + GeminiUnavailableError, +) +from app.models import VinDecodeRequest, VinDecodeResponse + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/decode", tags=["decode"]) + +_VIN_REGEX = re.compile(r"^[A-HJ-NPR-Z0-9]{17}$") + +# Shared engine instance (lazy init on first request) +_gemini_engine = GeminiEngine() + + +@router.post("/vin", response_model=VinDecodeResponse) +async def decode_vin(request: VinDecodeRequest) -> VinDecodeResponse: + """Decode a VIN string into structured vehicle data using Gemini. + + Accepts a 17-character VIN and returns year, make, model, trim, etc. + """ + vin = request.vin.upper().strip() + + if not _VIN_REGEX.match(vin): + raise HTTPException( + status_code=400, + detail=f"Invalid VIN format: must be 17 alphanumeric characters (excluding I, O, Q). Got: {vin}", + ) + + start_ms = time.monotonic_ns() // 1_000_000 + + try: + result = _gemini_engine.decode_vin(vin) + except GeminiUnavailableError as exc: + logger.error("Gemini unavailable for VIN decode: %s", exc) + raise HTTPException(status_code=503, detail=str(exc)) from exc + except GeminiProcessingError as exc: + logger.error("Gemini processing error for VIN %s: %s", vin, exc) + raise HTTPException(status_code=422, detail=str(exc)) from exc + + elapsed_ms = (time.monotonic_ns() // 1_000_000) - start_ms + + return VinDecodeResponse( + success=True, + vin=vin, + year=result.year, + make=result.make, + model=result.model, + trimLevel=result.trim_level, + bodyType=result.body_type, + driveType=result.drive_type, + fuelType=result.fuel_type, + engine=result.engine, + transmission=result.transmission, + confidence=result.confidence, + processingTimeMs=elapsed_ms, + error=None, + ) diff --git a/ocr/tests/test_vin_decode.py b/ocr/tests/test_vin_decode.py new file mode 100644 index 0000000..2a1fa56 --- /dev/null +++ b/ocr/tests/test_vin_decode.py @@ -0,0 +1,199 @@ +"""Tests for the VIN decode endpoint (POST /decode/vin). + +Covers: valid VIN returns 200 with correct response shape, +invalid VIN format returns 400, Gemini unavailable returns 503, +and Gemini processing error returns 422. +All GeminiEngine calls are mocked. +""" + +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + +from app.engines.gemini_engine import ( + GeminiProcessingError, + GeminiUnavailableError, + VinDecodeResult, +) +from app.main import app + +client = TestClient(app) + +# A valid 17-character VIN (no I, O, Q) +_VALID_VIN = "1HGBH41JXMN109186" + +_FULL_RESULT = VinDecodeResult( + year=2021, + make="Honda", + model="Civic", + trim_level="EX", + body_type="Sedan", + drive_type="FWD", + fuel_type="Gasoline", + engine="2.0L I4", + transmission="CVT", + confidence=0.95, +) + + +# --- Valid VIN --- + + +class TestDecodeVinSuccess: + """Verify successful VIN decode returns 200 with correct response shape.""" + + @patch("app.routers.decode._gemini_engine") + def test_valid_vin_returns_200(self, mock_engine): + """Normal: Valid VIN returns 200 with all vehicle fields populated.""" + mock_engine.decode_vin.return_value = _FULL_RESULT + + response = client.post("/decode/vin", json={"vin": _VALID_VIN}) + + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert data["vin"] == _VALID_VIN + assert data["year"] == 2021 + assert data["make"] == "Honda" + assert data["model"] == "Civic" + assert data["trimLevel"] == "EX" + assert data["bodyType"] == "Sedan" + assert data["driveType"] == "FWD" + assert data["fuelType"] == "Gasoline" + assert data["engine"] == "2.0L I4" + assert data["transmission"] == "CVT" + assert data["confidence"] == 0.95 + assert "processingTimeMs" in data + assert data["error"] is None + + @patch("app.routers.decode._gemini_engine") + def test_vin_uppercased_before_decode(self, mock_engine): + """VIN submitted in lowercase is normalised to uppercase before decoding.""" + mock_engine.decode_vin.return_value = _FULL_RESULT + + response = client.post("/decode/vin", json={"vin": _VALID_VIN.lower()}) + + assert response.status_code == 200 + data = response.json() + assert data["vin"] == _VALID_VIN + mock_engine.decode_vin.assert_called_once_with(_VALID_VIN) + + @patch("app.routers.decode._gemini_engine") + def test_nullable_fields_allowed(self, mock_engine): + """Edge: VIN decode with only confidence set returns valid response.""" + mock_engine.decode_vin.return_value = VinDecodeResult(confidence=0.3) + + response = client.post("/decode/vin", json={"vin": _VALID_VIN}) + + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert data["year"] is None + assert data["make"] is None + assert data["confidence"] == 0.3 + + +# --- Invalid VIN format --- + + +class TestDecodeVinValidation: + """Verify invalid VIN formats return 400.""" + + def test_too_short_vin_returns_400(self): + """VIN shorter than 17 characters is rejected.""" + response = client.post("/decode/vin", json={"vin": "1HGBH41JXM"}) + + assert response.status_code == 400 + assert "Invalid VIN format" in response.json()["detail"] + + def test_too_long_vin_returns_400(self): + """VIN longer than 17 characters is rejected.""" + response = client.post("/decode/vin", json={"vin": "1HGBH41JXMN109186X"}) + + assert response.status_code == 400 + + def test_vin_with_letter_i_returns_400(self): + """VIN containing the letter I (invalid character) is rejected.""" + # Replace position 0 with I to create invalid VIN + invalid_vin = "IHGBH41JXMN109186" + response = client.post("/decode/vin", json={"vin": invalid_vin}) + + assert response.status_code == 400 + assert "Invalid VIN format" in response.json()["detail"] + + def test_vin_with_letter_o_returns_400(self): + """VIN containing the letter O (invalid character) is rejected.""" + invalid_vin = "OHGBH41JXMN109186" + response = client.post("/decode/vin", json={"vin": invalid_vin}) + + assert response.status_code == 400 + + def test_vin_with_letter_q_returns_400(self): + """VIN containing the letter Q (invalid character) is rejected.""" + invalid_vin = "QHGBH41JXMN109186" + response = client.post("/decode/vin", json={"vin": invalid_vin}) + + assert response.status_code == 400 + + def test_empty_vin_returns_400(self): + """Empty VIN string is rejected.""" + response = client.post("/decode/vin", json={"vin": ""}) + + assert response.status_code == 400 + + def test_vin_with_special_chars_returns_400(self): + """VIN containing special characters is rejected.""" + response = client.post("/decode/vin", json={"vin": "1HGBH41J-MN109186"}) + + assert response.status_code == 400 + + +# --- Gemini unavailable --- + + +class TestDecodeVinGeminiUnavailable: + """Verify Gemini service unavailability returns 503.""" + + @patch("app.routers.decode._gemini_engine") + def test_gemini_unavailable_returns_503(self, mock_engine): + """When Gemini cannot be initialized, endpoint returns 503.""" + mock_engine.decode_vin.side_effect = GeminiUnavailableError( + "Google credential config not found" + ) + + response = client.post("/decode/vin", json={"vin": _VALID_VIN}) + + assert response.status_code == 503 + assert "Google credential config not found" in response.json()["detail"] + + +# --- Gemini processing error --- + + +class TestDecodeVinGeminiProcessingError: + """Verify Gemini processing failures return 422.""" + + @patch("app.routers.decode._gemini_engine") + def test_gemini_processing_error_returns_422(self, mock_engine): + """When Gemini returns invalid output, endpoint returns 422.""" + mock_engine.decode_vin.side_effect = GeminiProcessingError( + "Gemini returned invalid JSON for VIN decode: ..." + ) + + response = client.post("/decode/vin", json={"vin": _VALID_VIN}) + + assert response.status_code == 422 + assert "Gemini returned invalid JSON" in response.json()["detail"] + + @patch("app.routers.decode._gemini_engine") + def test_gemini_api_failure_returns_422(self, mock_engine): + """When Gemini API call fails at runtime, endpoint returns 422.""" + mock_engine.decode_vin.side_effect = GeminiProcessingError( + "Gemini VIN decode failed: API quota exceeded" + ) + + response = client.post("/decode/vin", json={"vin": _VALID_VIN}) + + assert response.status_code == 422 + assert "Gemini VIN decode failed" in response.json()["detail"]