All checks were successful
Deploy to Staging / Build Images (push) Successful in 37s
Deploy to Staging / Deploy to Staging (push) Successful in 51s
Deploy to Staging / Verify Staging (push) Successful in 8s
Deploy to Staging / Notify Staging Ready (push) Successful in 7s
Deploy to Staging / Notify Staging Failure (push) Has been skipped
gemini-3-flash-preview was hallucinating year (e.g., returning 1993 instead of 2023 for position-10 code P). Prompt now includes the full 1980-2039 year code table and position-7 disambiguation rule. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
344 lines
12 KiB
Python
344 lines
12 KiB
Python
"""Gemini 2.5 Flash engine for document understanding and VIN decode.
|
|
|
|
Standalone module (does NOT extend OcrEngine) because Gemini performs
|
|
semantic document understanding, not traditional OCR word-box extraction.
|
|
Uses Vertex AI SDK with structured JSON output enforcement.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
from app.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# 20 MB hard limit for inline base64 PDF delivery
|
|
_MAX_PDF_BYTES = 20 * 1024 * 1024
|
|
|
|
_EXTRACTION_PROMPT = """\
|
|
Extract all routine scheduled maintenance items from this vehicle owners manual.
|
|
|
|
For each maintenance item, extract:
|
|
- serviceName: The maintenance task name (e.g., "Engine Oil Change", "Tire Rotation", \
|
|
"Cabin Air Filter Replacement")
|
|
- intervalMiles: The mileage interval as a number, or null if not specified \
|
|
(e.g., 5000, 30000)
|
|
- intervalMonths: The time interval in months as a number, or null if not specified \
|
|
(e.g., 6, 12, 24)
|
|
- details: Any additional details such as fluid specifications, part numbers, \
|
|
or special instructions (e.g., "Use 0W-20 full synthetic oil")
|
|
|
|
Only include routine scheduled maintenance items with clear intervals. \
|
|
Do not include one-time procedures, troubleshooting steps, or warranty information.
|
|
|
|
Return the results as a JSON object with a single "maintenanceSchedule" array.\
|
|
"""
|
|
|
|
_VIN_DECODE_PROMPT = """\
|
|
Decode the following VIN (Vehicle Identification Number) using standard VIN structure rules.
|
|
|
|
VIN: {vin}
|
|
|
|
VIN position reference:
|
|
- Positions 1-3 (WMI): World Manufacturer Identifier (country + manufacturer)
|
|
- Positions 4-8 (VDS): Vehicle attributes (model, body, engine, etc.)
|
|
- Position 9: Check digit
|
|
- Position 10: Model year code. Codes repeat on a 30-year cycle:
|
|
A=1980/2010 B=1981/2011 C=1982/2012 D=1983/2013 E=1984/2014
|
|
F=1985/2015 G=1986/2016 H=1987/2017 J=1988/2018 K=1989/2019
|
|
L=1990/2020 M=1991/2021 N=1992/2022 P=1993/2023 R=1994/2024
|
|
S=1995/2025 T=1996/2026 V=1997/2027 W=1998/2028 X=1999/2029
|
|
Y=2000/2030 1=2001/2031 2=2002/2032 3=2003/2033 4=2004/2034
|
|
5=2005/2035 6=2006/2036 7=2007/2037 8=2008/2038 9=2009/2039
|
|
To disambiguate: if position 7 is numeric, use the 2010+ year; if alphabetic, use the 1980-2009 year.
|
|
- Position 11: Assembly plant
|
|
- Positions 12-17: Sequential production number
|
|
|
|
Return the vehicle's year, make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. The year MUST be derived from position 10 using the table above. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
|
|
"""
|
|
|
|
_VIN_DECODE_SCHEMA: dict[str, Any] = {
|
|
"type": "object",
|
|
"properties": {
|
|
"year": {"type": "integer", "nullable": True},
|
|
"make": {"type": "string", "nullable": True},
|
|
"model": {"type": "string", "nullable": True},
|
|
"trimLevel": {"type": "string", "nullable": True},
|
|
"bodyType": {"type": "string", "nullable": True},
|
|
"driveType": {"type": "string", "nullable": True},
|
|
"fuelType": {"type": "string", "nullable": True},
|
|
"engine": {"type": "string", "nullable": True},
|
|
"transmission": {"type": "string", "nullable": True},
|
|
"confidence": {"type": "number"},
|
|
},
|
|
"required": ["confidence"],
|
|
}
|
|
|
|
_RESPONSE_SCHEMA: dict[str, Any] = {
|
|
"type": "object",
|
|
"properties": {
|
|
"maintenanceSchedule": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"serviceName": {"type": "string"},
|
|
"intervalMiles": {"type": "number", "nullable": True},
|
|
"intervalMonths": {"type": "number", "nullable": True},
|
|
"details": {"type": "string", "nullable": True},
|
|
},
|
|
"required": ["serviceName"],
|
|
},
|
|
},
|
|
},
|
|
"required": ["maintenanceSchedule"],
|
|
}
|
|
|
|
|
|
class GeminiEngineError(Exception):
|
|
"""Base exception for Gemini engine errors."""
|
|
|
|
|
|
class GeminiUnavailableError(GeminiEngineError):
|
|
"""Raised when the Gemini engine cannot be initialized."""
|
|
|
|
|
|
class GeminiProcessingError(GeminiEngineError):
|
|
"""Raised when Gemini fails to process a document."""
|
|
|
|
|
|
@dataclass
|
|
class VinDecodeResult:
|
|
"""Result from Gemini VIN decode."""
|
|
|
|
year: int | None = None
|
|
make: str | None = None
|
|
model: str | None = None
|
|
trim_level: str | None = None
|
|
body_type: str | None = None
|
|
drive_type: str | None = None
|
|
fuel_type: str | None = None
|
|
engine: str | None = None
|
|
transmission: str | None = None
|
|
confidence: float = 0.0
|
|
|
|
|
|
@dataclass
|
|
class MaintenanceItem:
|
|
"""A single extracted maintenance schedule item."""
|
|
|
|
service_name: str
|
|
interval_miles: int | None = None
|
|
interval_months: int | None = None
|
|
details: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class MaintenanceExtractionResult:
|
|
"""Result from Gemini maintenance schedule extraction."""
|
|
|
|
items: list[MaintenanceItem]
|
|
model: str
|
|
|
|
|
|
class GeminiEngine:
|
|
"""Gemini 2.5 Flash wrapper for maintenance schedule extraction and VIN decode.
|
|
|
|
Standalone class (not an OcrEngine subclass) because Gemini performs
|
|
semantic document understanding rather than traditional OCR.
|
|
|
|
Uses lazy initialization: the Vertex AI client is not created until
|
|
the first call to ``extract_maintenance()`` or ``decode_vin()``.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self._model: Any | None = None
|
|
|
|
def _get_model(self) -> Any:
|
|
"""Create the GenerativeModel on first use.
|
|
|
|
Authentication uses the same WIF credential path as Google Vision.
|
|
"""
|
|
if self._model is not None:
|
|
return self._model
|
|
|
|
key_path = settings.google_vision_key_path
|
|
if not os.path.isfile(key_path):
|
|
raise GeminiUnavailableError(
|
|
f"Google credential config not found at {key_path}. "
|
|
"Set GOOGLE_VISION_KEY_PATH or mount the secret."
|
|
)
|
|
|
|
try:
|
|
from google.cloud import aiplatform # type: ignore[import-untyped]
|
|
from vertexai.generative_models import ( # type: ignore[import-untyped]
|
|
GenerationConfig,
|
|
GenerativeModel,
|
|
)
|
|
|
|
# Point ADC at the WIF credential config
|
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
|
|
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
|
|
|
|
aiplatform.init(
|
|
project=settings.vertex_ai_project,
|
|
location=settings.vertex_ai_location,
|
|
)
|
|
|
|
model_name = settings.gemini_model
|
|
self._model = GenerativeModel(model_name)
|
|
self._generation_config = GenerationConfig(
|
|
response_mime_type="application/json",
|
|
response_schema=_RESPONSE_SCHEMA,
|
|
)
|
|
|
|
logger.info(
|
|
"Gemini engine initialized (model=%s, project=%s, location=%s)",
|
|
model_name,
|
|
settings.vertex_ai_project,
|
|
settings.vertex_ai_location,
|
|
)
|
|
return self._model
|
|
|
|
except ImportError as exc:
|
|
logger.exception("Vertex AI SDK import failed")
|
|
raise GeminiUnavailableError(
|
|
"google-cloud-aiplatform is not installed. "
|
|
"Install with: pip install google-cloud-aiplatform"
|
|
) from exc
|
|
except Exception as exc:
|
|
logger.exception("Vertex AI authentication failed")
|
|
raise GeminiUnavailableError(
|
|
f"Vertex AI authentication failed: {exc}"
|
|
) from exc
|
|
|
|
def extract_maintenance(
|
|
self, pdf_bytes: bytes
|
|
) -> MaintenanceExtractionResult:
|
|
"""Extract maintenance schedules from a PDF owners manual.
|
|
|
|
Args:
|
|
pdf_bytes: Raw PDF file bytes (<= 20 MB).
|
|
|
|
Returns:
|
|
Structured maintenance extraction result.
|
|
|
|
Raises:
|
|
GeminiProcessingError: If the PDF is too large or extraction fails.
|
|
GeminiUnavailableError: If the engine cannot be initialized.
|
|
"""
|
|
if len(pdf_bytes) > _MAX_PDF_BYTES:
|
|
size_mb = len(pdf_bytes) / (1024 * 1024)
|
|
raise GeminiProcessingError(
|
|
f"PDF size ({size_mb:.1f} MB) exceeds the 20 MB limit for "
|
|
"inline processing. Upload to GCS and use a gs:// URI instead."
|
|
)
|
|
|
|
model = self._get_model()
|
|
|
|
try:
|
|
from vertexai.generative_models import Part # type: ignore[import-untyped]
|
|
|
|
pdf_part = Part.from_data(
|
|
data=pdf_bytes,
|
|
mime_type="application/pdf",
|
|
)
|
|
|
|
response = model.generate_content(
|
|
[pdf_part, _EXTRACTION_PROMPT],
|
|
generation_config=self._generation_config,
|
|
)
|
|
|
|
raw = json.loads(response.text)
|
|
items = [
|
|
MaintenanceItem(
|
|
service_name=item["serviceName"],
|
|
interval_miles=item.get("intervalMiles"),
|
|
interval_months=item.get("intervalMonths"),
|
|
details=item.get("details"),
|
|
)
|
|
for item in raw.get("maintenanceSchedule", [])
|
|
]
|
|
|
|
logger.info(
|
|
"Gemini extracted %d maintenance items from PDF (%d bytes)",
|
|
len(items),
|
|
len(pdf_bytes),
|
|
)
|
|
|
|
return MaintenanceExtractionResult(
|
|
items=items,
|
|
model=settings.gemini_model,
|
|
)
|
|
|
|
except (GeminiEngineError,):
|
|
raise
|
|
except json.JSONDecodeError as exc:
|
|
raise GeminiProcessingError(
|
|
f"Gemini returned invalid JSON: {exc}"
|
|
) from exc
|
|
except Exception as exc:
|
|
raise GeminiProcessingError(
|
|
f"Gemini maintenance extraction failed: {exc}"
|
|
) from exc
|
|
|
|
def decode_vin(self, vin: str) -> VinDecodeResult:
|
|
"""Decode a VIN string into structured vehicle data via Gemini.
|
|
|
|
Args:
|
|
vin: A 17-character Vehicle Identification Number.
|
|
|
|
Returns:
|
|
Structured vehicle specification result.
|
|
|
|
Raises:
|
|
GeminiProcessingError: If Gemini fails to decode the VIN.
|
|
GeminiUnavailableError: If the engine cannot be initialized.
|
|
"""
|
|
model = self._get_model()
|
|
|
|
try:
|
|
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
|
|
|
|
vin_config = GenerationConfig(
|
|
response_mime_type="application/json",
|
|
response_schema=_VIN_DECODE_SCHEMA,
|
|
)
|
|
|
|
prompt = _VIN_DECODE_PROMPT.format(vin=vin)
|
|
response = model.generate_content(
|
|
[prompt],
|
|
generation_config=vin_config,
|
|
)
|
|
|
|
raw = json.loads(response.text)
|
|
|
|
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
|
|
|
|
return VinDecodeResult(
|
|
year=raw.get("year"),
|
|
make=raw.get("make"),
|
|
model=raw.get("model"),
|
|
trim_level=raw.get("trimLevel"),
|
|
body_type=raw.get("bodyType"),
|
|
drive_type=raw.get("driveType"),
|
|
fuel_type=raw.get("fuelType"),
|
|
engine=raw.get("engine"),
|
|
transmission=raw.get("transmission"),
|
|
confidence=raw.get("confidence", 0.0),
|
|
)
|
|
|
|
except (GeminiEngineError,):
|
|
raise
|
|
except json.JSONDecodeError as exc:
|
|
raise GeminiProcessingError(
|
|
f"Gemini returned invalid JSON for VIN decode: {exc}"
|
|
) from exc
|
|
except Exception as exc:
|
|
raise GeminiProcessingError(
|
|
f"Gemini VIN decode failed: {exc}"
|
|
) from exc
|