All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 3m33s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 52s
Deploy to Staging / Verify Staging (pull_request) Successful in 9s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
429 lines
15 KiB
Python
429 lines
15 KiB
Python
"""Gemini 2.5 Flash engine for document understanding and VIN decode.
|
|
|
|
Standalone module (does NOT extend OcrEngine) because Gemini performs
|
|
semantic document understanding, not traditional OCR word-box extraction.
|
|
Uses google-genai SDK with structured JSON output enforcement.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
from typing import Any
|
|
|
|
from app.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# 20 MB hard limit for inline base64 PDF delivery
|
|
_MAX_PDF_BYTES = 20 * 1024 * 1024
|
|
|
|
_EXTRACTION_PROMPT = """\
|
|
Extract all routine scheduled maintenance items from this vehicle owners manual.
|
|
|
|
For each maintenance item, extract:
|
|
- serviceName: The maintenance task name (e.g., "Engine Oil Change", "Tire Rotation", \
|
|
"Cabin Air Filter Replacement")
|
|
- intervalMiles: The mileage interval as a number, or null if not specified \
|
|
(e.g., 5000, 30000)
|
|
- intervalMonths: The time interval in months as a number, or null if not specified \
|
|
(e.g., 6, 12, 24)
|
|
- details: Any additional details such as fluid specifications, part numbers, \
|
|
or special instructions (e.g., "Use 0W-20 full synthetic oil")
|
|
|
|
Only include routine scheduled maintenance items with clear intervals. \
|
|
Do not include one-time procedures, troubleshooting steps, or warranty information.
|
|
|
|
Return the results as a JSON object with a single "maintenanceSchedule" array.\
|
|
"""
|
|
|
|
# VIN year code lookup: position 10 character -> base year (first cycle, 1980-2009).
|
|
# The 30-year cycle repeats: +30 for 2010-2039, +60 for 2040-2069.
|
|
# Disambiguation uses position 7: alphabetic -> 2010+ cycle, numeric -> 1980s cycle.
|
|
# Per NHTSA FMVSS No. 115: MY2010+ vehicles must use alphabetic position 7.
|
|
# For the 2040+ cycle (when position 7 is numeric again), we pick the most
|
|
# recent plausible year (not more than 2 years in the future).
|
|
_VIN_YEAR_CODES: dict[str, int] = {
|
|
"A": 1980, "B": 1981, "C": 1982, "D": 1983, "E": 1984,
|
|
"F": 1985, "G": 1986, "H": 1987, "J": 1988, "K": 1989,
|
|
"L": 1990, "M": 1991, "N": 1992, "P": 1993, "R": 1994,
|
|
"S": 1995, "T": 1996, "V": 1997, "W": 1998, "X": 1999,
|
|
"Y": 2000,
|
|
"1": 2001, "2": 2002, "3": 2003, "4": 2004, "5": 2005,
|
|
"6": 2006, "7": 2007, "8": 2008, "9": 2009,
|
|
}
|
|
|
|
|
|
def resolve_vin_year(vin: str) -> int | None:
|
|
"""Deterministically resolve model year from VIN positions 7 and 10.
|
|
|
|
VIN year codes repeat on a 30-year cycle. Position 7 disambiguates:
|
|
- Alphabetic position 7 -> 2010-2039 cycle (NHTSA MY2010+ requirement)
|
|
- Numeric position 7 -> 1980-2009 or 2040-2069 cycle
|
|
|
|
For the numeric case with two possible cycles, picks the most recent
|
|
year that is not more than 2 years in the future.
|
|
|
|
Returns None if the VIN is too short or position 10 is not a valid year code.
|
|
"""
|
|
if len(vin) < 17:
|
|
return None
|
|
|
|
code = vin[9].upper() # position 10 (0-indexed)
|
|
pos7 = vin[6].upper() # position 7 (0-indexed)
|
|
|
|
base_year = _VIN_YEAR_CODES.get(code)
|
|
if base_year is None:
|
|
return None
|
|
|
|
if pos7.isalpha():
|
|
# Alphabetic position 7 -> second cycle (2010-2039)
|
|
return base_year + 30
|
|
|
|
# Numeric position 7 -> first cycle (1980-2009) or third cycle (2040-2069)
|
|
# Pick the most recent plausible year
|
|
max_plausible = datetime.now().year + 2
|
|
|
|
third_cycle = base_year + 60 # 2040-2069
|
|
if third_cycle <= max_plausible:
|
|
return third_cycle
|
|
|
|
return base_year
|
|
|
|
|
|
_VIN_DECODE_PROMPT = """\
|
|
Decode the following VIN (Vehicle Identification Number) using standard VIN structure rules.
|
|
|
|
VIN: {vin}
|
|
Model year: {year} (determined from position 10 code '{year_code}')
|
|
|
|
The model year has already been resolved deterministically. Use {year} as the year.
|
|
|
|
VIN position reference:
|
|
- Positions 1-3 (WMI): World Manufacturer Identifier (country + manufacturer)
|
|
- Positions 4-8 (VDS): Vehicle attributes (model, body, engine, etc.)
|
|
- Position 9: Check digit
|
|
- Position 10: Model year code (30-year cycle, extended through 2050):
|
|
A=1980/2010/2040 B=1981/2011/2041 C=1982/2012/2042 D=1983/2013/2043 E=1984/2014/2044
|
|
F=1985/2015/2045 G=1986/2016/2046 H=1987/2017/2047 J=1988/2018/2048 K=1989/2019/2049
|
|
L=1990/2020/2050 M=1991/2021 N=1992/2022 P=1993/2023 R=1994/2024
|
|
S=1995/2025 T=1996/2026 V=1997/2027 W=1998/2028 X=1999/2029
|
|
Y=2000/2030 1=2001/2031 2=2002/2032 3=2003/2033 4=2004/2034
|
|
5=2005/2035 6=2006/2036 7=2007/2037 8=2008/2038 9=2009/2039
|
|
- Position 11: Assembly plant
|
|
- Positions 12-17: Sequential production number
|
|
|
|
Return the vehicle's make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
|
|
"""
|
|
|
|
_VIN_DECODE_SCHEMA: dict[str, Any] = {
|
|
"type": "OBJECT",
|
|
"properties": {
|
|
"year": {"type": "INTEGER", "nullable": True},
|
|
"make": {"type": "STRING", "nullable": True},
|
|
"model": {"type": "STRING", "nullable": True},
|
|
"trimLevel": {"type": "STRING", "nullable": True},
|
|
"bodyType": {"type": "STRING", "nullable": True},
|
|
"driveType": {"type": "STRING", "nullable": True},
|
|
"fuelType": {"type": "STRING", "nullable": True},
|
|
"engine": {"type": "STRING", "nullable": True},
|
|
"transmission": {"type": "STRING", "nullable": True},
|
|
"confidence": {"type": "NUMBER"},
|
|
},
|
|
"required": ["confidence"],
|
|
}
|
|
|
|
_RESPONSE_SCHEMA: dict[str, Any] = {
|
|
"type": "OBJECT",
|
|
"properties": {
|
|
"maintenanceSchedule": {
|
|
"type": "ARRAY",
|
|
"items": {
|
|
"type": "OBJECT",
|
|
"properties": {
|
|
"serviceName": {"type": "STRING"},
|
|
"intervalMiles": {"type": "NUMBER", "nullable": True},
|
|
"intervalMonths": {"type": "NUMBER", "nullable": True},
|
|
"details": {"type": "STRING", "nullable": True},
|
|
},
|
|
"required": ["serviceName"],
|
|
},
|
|
},
|
|
},
|
|
"required": ["maintenanceSchedule"],
|
|
}
|
|
|
|
|
|
class GeminiEngineError(Exception):
|
|
"""Base exception for Gemini engine errors."""
|
|
|
|
|
|
class GeminiUnavailableError(GeminiEngineError):
|
|
"""Raised when the Gemini engine cannot be initialized."""
|
|
|
|
|
|
class GeminiProcessingError(GeminiEngineError):
|
|
"""Raised when Gemini fails to process a document."""
|
|
|
|
|
|
@dataclass
|
|
class VinDecodeResult:
|
|
"""Result from Gemini VIN decode."""
|
|
|
|
year: int | None = None
|
|
make: str | None = None
|
|
model: str | None = None
|
|
trim_level: str | None = None
|
|
body_type: str | None = None
|
|
drive_type: str | None = None
|
|
fuel_type: str | None = None
|
|
engine: str | None = None
|
|
transmission: str | None = None
|
|
confidence: float = 0.0
|
|
|
|
|
|
@dataclass
|
|
class MaintenanceItem:
|
|
"""A single extracted maintenance schedule item."""
|
|
|
|
service_name: str
|
|
interval_miles: int | None = None
|
|
interval_months: int | None = None
|
|
details: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class MaintenanceExtractionResult:
|
|
"""Result from Gemini maintenance schedule extraction."""
|
|
|
|
items: list[MaintenanceItem]
|
|
model: str
|
|
|
|
|
|
class GeminiEngine:
|
|
"""Gemini 2.5 Flash wrapper for maintenance schedule extraction and VIN decode.
|
|
|
|
Standalone class (not an OcrEngine subclass) because Gemini performs
|
|
semantic document understanding rather than traditional OCR.
|
|
|
|
Uses lazy initialization: the Gemini client is not created until
|
|
the first call to ``extract_maintenance()`` or ``decode_vin()``.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self._client: Any | None = None
|
|
self._model_name: str = ""
|
|
|
|
def _get_client(self) -> Any:
|
|
"""Create the genai.Client on first use.
|
|
|
|
Authentication uses the same WIF credential path as Google Vision.
|
|
"""
|
|
if self._client is not None:
|
|
return self._client
|
|
|
|
key_path = settings.google_vision_key_path
|
|
if not os.path.isfile(key_path):
|
|
raise GeminiUnavailableError(
|
|
f"Google credential config not found at {key_path}. "
|
|
"Set GOOGLE_VISION_KEY_PATH or mount the secret."
|
|
)
|
|
|
|
try:
|
|
from google import genai # type: ignore[import-untyped]
|
|
|
|
# Point ADC at the WIF credential config (must be set BEFORE Client construction)
|
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
|
|
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
|
|
|
|
self._client = genai.Client(
|
|
vertexai=True,
|
|
project=settings.vertex_ai_project,
|
|
location=settings.vertex_ai_location,
|
|
)
|
|
self._model_name = settings.gemini_model
|
|
|
|
logger.info(
|
|
"Gemini engine initialized (model=%s, project=%s, location=%s)",
|
|
self._model_name,
|
|
settings.vertex_ai_project,
|
|
settings.vertex_ai_location,
|
|
)
|
|
return self._client
|
|
|
|
except ImportError as exc:
|
|
logger.exception("google-genai SDK import failed")
|
|
raise GeminiUnavailableError(
|
|
"google-genai is not installed. "
|
|
"Install with: pip install google-genai"
|
|
) from exc
|
|
except Exception as exc:
|
|
logger.exception("Gemini authentication failed: %s", type(exc).__name__)
|
|
raise GeminiUnavailableError(
|
|
f"Gemini authentication failed: {exc}"
|
|
) from exc
|
|
|
|
def extract_maintenance(
|
|
self, pdf_bytes: bytes
|
|
) -> MaintenanceExtractionResult:
|
|
"""Extract maintenance schedules from a PDF owners manual.
|
|
|
|
Args:
|
|
pdf_bytes: Raw PDF file bytes (<= 20 MB).
|
|
|
|
Returns:
|
|
Structured maintenance extraction result.
|
|
|
|
Raises:
|
|
GeminiProcessingError: If the PDF is too large or extraction fails.
|
|
GeminiUnavailableError: If the engine cannot be initialized.
|
|
"""
|
|
if len(pdf_bytes) > _MAX_PDF_BYTES:
|
|
size_mb = len(pdf_bytes) / (1024 * 1024)
|
|
raise GeminiProcessingError(
|
|
f"PDF size ({size_mb:.1f} MB) exceeds the 20 MB limit for "
|
|
"inline processing. Upload to GCS and use a gs:// URI instead."
|
|
)
|
|
|
|
client = self._get_client()
|
|
|
|
try:
|
|
from google.genai import types # type: ignore[import-untyped]
|
|
|
|
pdf_part = types.Part.from_bytes(
|
|
data=pdf_bytes,
|
|
mime_type="application/pdf",
|
|
)
|
|
|
|
response = client.models.generate_content(
|
|
model=self._model_name,
|
|
contents=[pdf_part, _EXTRACTION_PROMPT],
|
|
config=types.GenerateContentConfig(
|
|
response_mime_type="application/json",
|
|
response_schema=_RESPONSE_SCHEMA,
|
|
),
|
|
)
|
|
|
|
raw = json.loads(response.text)
|
|
items = [
|
|
MaintenanceItem(
|
|
service_name=item["serviceName"],
|
|
interval_miles=item.get("intervalMiles"),
|
|
interval_months=item.get("intervalMonths"),
|
|
details=item.get("details"),
|
|
)
|
|
for item in raw.get("maintenanceSchedule", [])
|
|
]
|
|
|
|
logger.info(
|
|
"Gemini extracted %d maintenance items from PDF (%d bytes)",
|
|
len(items),
|
|
len(pdf_bytes),
|
|
)
|
|
|
|
return MaintenanceExtractionResult(
|
|
items=items,
|
|
model=settings.gemini_model,
|
|
)
|
|
|
|
except (GeminiEngineError,):
|
|
raise
|
|
except json.JSONDecodeError as exc:
|
|
raise GeminiProcessingError(
|
|
f"Gemini returned invalid JSON: {exc}"
|
|
) from exc
|
|
except Exception as exc:
|
|
raise GeminiProcessingError(
|
|
f"Gemini maintenance extraction failed: {exc}"
|
|
) from exc
|
|
|
|
def decode_vin(self, vin: str) -> VinDecodeResult:
|
|
"""Decode a VIN string into structured vehicle data via Gemini.
|
|
|
|
The model year is resolved deterministically from VIN positions 7
|
|
and 10 -- never delegated to the LLM. Gemini handles make, model,
|
|
trim, and other fields that require manufacturer knowledge.
|
|
|
|
Args:
|
|
vin: A 17-character Vehicle Identification Number.
|
|
|
|
Returns:
|
|
Structured vehicle specification result.
|
|
|
|
Raises:
|
|
GeminiProcessingError: If Gemini fails to decode the VIN.
|
|
GeminiUnavailableError: If the engine cannot be initialized.
|
|
"""
|
|
client = self._get_client()
|
|
|
|
# Resolve year deterministically from VIN structure
|
|
resolved_year = resolve_vin_year(vin)
|
|
year_code = vin[9].upper() if len(vin) >= 10 else "?"
|
|
logger.info(
|
|
"VIN year resolved: code=%s pos7=%s -> year=%s",
|
|
year_code,
|
|
vin[6] if len(vin) >= 7 else "?",
|
|
resolved_year,
|
|
)
|
|
|
|
try:
|
|
from google.genai import types # type: ignore[import-untyped]
|
|
|
|
prompt = _VIN_DECODE_PROMPT.format(
|
|
vin=vin,
|
|
year=resolved_year or "unknown",
|
|
year_code=year_code,
|
|
)
|
|
response = client.models.generate_content(
|
|
model=self._model_name,
|
|
contents=[prompt],
|
|
config=types.GenerateContentConfig(
|
|
response_mime_type="application/json",
|
|
response_schema=_VIN_DECODE_SCHEMA,
|
|
tools=[types.Tool(google_search=types.GoogleSearch())],
|
|
automatic_function_calling=types.AutomaticFunctionCallingConfig(
|
|
max_remote_calls=3,
|
|
),
|
|
),
|
|
)
|
|
|
|
raw = json.loads(response.text)
|
|
|
|
# Override year with deterministic value -- never trust the LLM
|
|
# for a mechanical lookup
|
|
gemini_year = raw.get("year")
|
|
if resolved_year and gemini_year != resolved_year:
|
|
logger.warning(
|
|
"Gemini returned year %s but resolved year is %s for VIN %s -- overriding",
|
|
gemini_year,
|
|
resolved_year,
|
|
vin,
|
|
)
|
|
|
|
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
|
|
|
|
return VinDecodeResult(
|
|
year=resolved_year if resolved_year else raw.get("year"),
|
|
make=raw.get("make"),
|
|
model=raw.get("model"),
|
|
trim_level=raw.get("trimLevel"),
|
|
body_type=raw.get("bodyType"),
|
|
drive_type=raw.get("driveType"),
|
|
fuel_type=raw.get("fuelType"),
|
|
engine=raw.get("engine"),
|
|
transmission=raw.get("transmission"),
|
|
confidence=raw.get("confidence", 0.0),
|
|
)
|
|
|
|
except (GeminiEngineError,):
|
|
raise
|
|
except json.JSONDecodeError as exc:
|
|
raise GeminiProcessingError(
|
|
f"Gemini returned invalid JSON for VIN decode: {exc}"
|
|
) from exc
|
|
except Exception as exc:
|
|
raise GeminiProcessingError(
|
|
f"Gemini VIN decode failed: {exc}"
|
|
) from exc
|