feat: add VIN decode endpoint to OCR Python service (refs #224)
Add POST /decode/vin endpoint using Gemini 2.5 Flash for VIN string decoding. Returns structured vehicle data (year, make, model, trim, body/drive/fuel type, engine, transmission) with confidence score. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
"""Gemini 2.5 Flash engine for maintenance schedule extraction from PDFs.
|
||||
"""Gemini 2.5 Flash engine for document understanding and VIN decode.
|
||||
|
||||
Standalone module (does NOT extend OcrEngine) because Gemini performs
|
||||
semantic document understanding, not traditional OCR word-box extraction.
|
||||
@@ -37,6 +37,31 @@ Do not include one-time procedures, troubleshooting steps, or warranty informati
|
||||
Return the results as a JSON object with a single "maintenanceSchedule" array.\
|
||||
"""
|
||||
|
||||
_VIN_DECODE_PROMPT = """\
|
||||
Given the VIN (Vehicle Identification Number) below, decode it and return the vehicle specifications.
|
||||
|
||||
VIN: {vin}
|
||||
|
||||
Return the vehicle's year, make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
|
||||
"""
|
||||
|
||||
_VIN_DECODE_SCHEMA: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"year": {"type": "integer", "nullable": True},
|
||||
"make": {"type": "string", "nullable": True},
|
||||
"model": {"type": "string", "nullable": True},
|
||||
"trimLevel": {"type": "string", "nullable": True},
|
||||
"bodyType": {"type": "string", "nullable": True},
|
||||
"driveType": {"type": "string", "nullable": True},
|
||||
"fuelType": {"type": "string", "nullable": True},
|
||||
"engine": {"type": "string", "nullable": True},
|
||||
"transmission": {"type": "string", "nullable": True},
|
||||
"confidence": {"type": "number"},
|
||||
},
|
||||
"required": ["confidence"],
|
||||
}
|
||||
|
||||
_RESPONSE_SCHEMA: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -70,6 +95,22 @@ class GeminiProcessingError(GeminiEngineError):
|
||||
"""Raised when Gemini fails to process a document."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class VinDecodeResult:
|
||||
"""Result from Gemini VIN decode."""
|
||||
|
||||
year: int | None = None
|
||||
make: str | None = None
|
||||
model: str | None = None
|
||||
trim_level: str | None = None
|
||||
body_type: str | None = None
|
||||
drive_type: str | None = None
|
||||
fuel_type: str | None = None
|
||||
engine: str | None = None
|
||||
transmission: str | None = None
|
||||
confidence: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class MaintenanceItem:
|
||||
"""A single extracted maintenance schedule item."""
|
||||
@@ -89,13 +130,13 @@ class MaintenanceExtractionResult:
|
||||
|
||||
|
||||
class GeminiEngine:
|
||||
"""Gemini 2.5 Flash wrapper for maintenance schedule extraction.
|
||||
"""Gemini 2.5 Flash wrapper for maintenance schedule extraction and VIN decode.
|
||||
|
||||
Standalone class (not an OcrEngine subclass) because Gemini performs
|
||||
semantic document understanding rather than traditional OCR.
|
||||
|
||||
Uses lazy initialization: the Vertex AI client is not created until
|
||||
the first ``extract_maintenance()`` call.
|
||||
the first call to ``extract_maintenance()`` or ``decode_vin()``.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
@@ -228,3 +269,60 @@ class GeminiEngine:
|
||||
raise GeminiProcessingError(
|
||||
f"Gemini maintenance extraction failed: {exc}"
|
||||
) from exc
|
||||
|
||||
def decode_vin(self, vin: str) -> VinDecodeResult:
|
||||
"""Decode a VIN string into structured vehicle data via Gemini.
|
||||
|
||||
Args:
|
||||
vin: A 17-character Vehicle Identification Number.
|
||||
|
||||
Returns:
|
||||
Structured vehicle specification result.
|
||||
|
||||
Raises:
|
||||
GeminiProcessingError: If Gemini fails to decode the VIN.
|
||||
GeminiUnavailableError: If the engine cannot be initialized.
|
||||
"""
|
||||
model = self._get_model()
|
||||
|
||||
try:
|
||||
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
|
||||
|
||||
vin_config = GenerationConfig(
|
||||
response_mime_type="application/json",
|
||||
response_schema=_VIN_DECODE_SCHEMA,
|
||||
)
|
||||
|
||||
prompt = _VIN_DECODE_PROMPT.format(vin=vin)
|
||||
response = model.generate_content(
|
||||
[prompt],
|
||||
generation_config=vin_config,
|
||||
)
|
||||
|
||||
raw = json.loads(response.text)
|
||||
|
||||
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
|
||||
|
||||
return VinDecodeResult(
|
||||
year=raw.get("year"),
|
||||
make=raw.get("make"),
|
||||
model=raw.get("model"),
|
||||
trim_level=raw.get("trimLevel"),
|
||||
body_type=raw.get("bodyType"),
|
||||
drive_type=raw.get("driveType"),
|
||||
fuel_type=raw.get("fuelType"),
|
||||
engine=raw.get("engine"),
|
||||
transmission=raw.get("transmission"),
|
||||
confidence=raw.get("confidence", 0.0),
|
||||
)
|
||||
|
||||
except (GeminiEngineError,):
|
||||
raise
|
||||
except json.JSONDecodeError as exc:
|
||||
raise GeminiProcessingError(
|
||||
f"Gemini returned invalid JSON for VIN decode: {exc}"
|
||||
) from exc
|
||||
except Exception as exc:
|
||||
raise GeminiProcessingError(
|
||||
f"Gemini VIN decode failed: {exc}"
|
||||
) from exc
|
||||
|
||||
Reference in New Issue
Block a user