feat: migrate GeminiEngine to google-genai SDK with Google Search grounding (refs #233)

Replace vertexai.generative_models with google.genai client pattern.
Add Google Search grounding tool to VIN decode for improved accuracy.
Convert response schema types to uppercase per Vertex AI Schema spec.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-28 11:16:18 -06:00
parent 398d67304f
commit b7f472b3e8
2 changed files with 59 additions and 63 deletions

View File

@@ -29,7 +29,7 @@ class Settings:
os.getenv("VISION_MONTHLY_LIMIT", "1000") os.getenv("VISION_MONTHLY_LIMIT", "1000")
) )
# Vertex AI / Gemini configuration # Google GenAI / Gemini configuration
self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "") self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "")
self.vertex_ai_location: str = os.getenv( self.vertex_ai_location: str = os.getenv(
"VERTEX_AI_LOCATION", "global" "VERTEX_AI_LOCATION", "global"

View File

@@ -2,7 +2,7 @@
Standalone module (does NOT extend OcrEngine) because Gemini performs Standalone module (does NOT extend OcrEngine) because Gemini performs
semantic document understanding, not traditional OCR word-box extraction. semantic document understanding, not traditional OCR word-box extraction.
Uses Vertex AI SDK with structured JSON output enforcement. Uses google-genai SDK with structured JSON output enforcement.
""" """
import json import json
@@ -117,34 +117,34 @@ Return the vehicle's make, model, trim level, body type, drive type, fuel type,
""" """
_VIN_DECODE_SCHEMA: dict[str, Any] = { _VIN_DECODE_SCHEMA: dict[str, Any] = {
"type": "object", "type": "OBJECT",
"properties": { "properties": {
"year": {"type": "integer", "nullable": True}, "year": {"type": "INTEGER", "nullable": True},
"make": {"type": "string", "nullable": True}, "make": {"type": "STRING", "nullable": True},
"model": {"type": "string", "nullable": True}, "model": {"type": "STRING", "nullable": True},
"trimLevel": {"type": "string", "nullable": True}, "trimLevel": {"type": "STRING", "nullable": True},
"bodyType": {"type": "string", "nullable": True}, "bodyType": {"type": "STRING", "nullable": True},
"driveType": {"type": "string", "nullable": True}, "driveType": {"type": "STRING", "nullable": True},
"fuelType": {"type": "string", "nullable": True}, "fuelType": {"type": "STRING", "nullable": True},
"engine": {"type": "string", "nullable": True}, "engine": {"type": "STRING", "nullable": True},
"transmission": {"type": "string", "nullable": True}, "transmission": {"type": "STRING", "nullable": True},
"confidence": {"type": "number"}, "confidence": {"type": "NUMBER"},
}, },
"required": ["confidence"], "required": ["confidence"],
} }
_RESPONSE_SCHEMA: dict[str, Any] = { _RESPONSE_SCHEMA: dict[str, Any] = {
"type": "object", "type": "OBJECT",
"properties": { "properties": {
"maintenanceSchedule": { "maintenanceSchedule": {
"type": "array", "type": "ARRAY",
"items": { "items": {
"type": "object", "type": "OBJECT",
"properties": { "properties": {
"serviceName": {"type": "string"}, "serviceName": {"type": "STRING"},
"intervalMiles": {"type": "number", "nullable": True}, "intervalMiles": {"type": "NUMBER", "nullable": True},
"intervalMonths": {"type": "number", "nullable": True}, "intervalMonths": {"type": "NUMBER", "nullable": True},
"details": {"type": "string", "nullable": True}, "details": {"type": "STRING", "nullable": True},
}, },
"required": ["serviceName"], "required": ["serviceName"],
}, },
@@ -206,20 +206,21 @@ class GeminiEngine:
Standalone class (not an OcrEngine subclass) because Gemini performs Standalone class (not an OcrEngine subclass) because Gemini performs
semantic document understanding rather than traditional OCR. semantic document understanding rather than traditional OCR.
Uses lazy initialization: the Vertex AI client is not created until Uses lazy initialization: the Gemini client is not created until
the first call to ``extract_maintenance()`` or ``decode_vin()``. the first call to ``extract_maintenance()`` or ``decode_vin()``.
""" """
def __init__(self) -> None: def __init__(self) -> None:
self._model: Any | None = None self._client: Any | None = None
self._model_name: str = ""
def _get_model(self) -> Any: def _get_client(self) -> Any:
"""Create the GenerativeModel on first use. """Create the genai.Client on first use.
Authentication uses the same WIF credential path as Google Vision. Authentication uses the same WIF credential path as Google Vision.
""" """
if self._model is not None: if self._client is not None:
return self._model return self._client
key_path = settings.google_vision_key_path key_path = settings.google_vision_key_path
if not os.path.isfile(key_path): if not os.path.isfile(key_path):
@@ -229,46 +230,37 @@ class GeminiEngine:
) )
try: try:
from google.cloud import aiplatform # type: ignore[import-untyped] from google import genai # type: ignore[import-untyped]
from vertexai.generative_models import ( # type: ignore[import-untyped]
GenerationConfig,
GenerativeModel,
)
# Point ADC at the WIF credential config # Point ADC at the WIF credential config (must be set BEFORE Client construction)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1" os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
aiplatform.init( self._client = genai.Client(
vertexai=True,
project=settings.vertex_ai_project, project=settings.vertex_ai_project,
location=settings.vertex_ai_location, location=settings.vertex_ai_location,
) )
self._model_name = settings.gemini_model
model_name = settings.gemini_model
self._model = GenerativeModel(model_name)
self._generation_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_RESPONSE_SCHEMA,
)
logger.info( logger.info(
"Gemini engine initialized (model=%s, project=%s, location=%s)", "Gemini engine initialized (model=%s, project=%s, location=%s)",
model_name, self._model_name,
settings.vertex_ai_project, settings.vertex_ai_project,
settings.vertex_ai_location, settings.vertex_ai_location,
) )
return self._model return self._client
except ImportError as exc: except ImportError as exc:
logger.exception("Vertex AI SDK import failed") logger.exception("google-genai SDK import failed")
raise GeminiUnavailableError( raise GeminiUnavailableError(
"google-cloud-aiplatform is not installed. " "google-genai is not installed. "
"Install with: pip install google-cloud-aiplatform" "Install with: pip install google-genai"
) from exc ) from exc
except Exception as exc: except Exception as exc:
logger.exception("Vertex AI authentication failed") logger.exception("Gemini authentication failed: %s", type(exc).__name__)
raise GeminiUnavailableError( raise GeminiUnavailableError(
f"Vertex AI authentication failed: {exc}" f"Gemini authentication failed: {exc}"
) from exc ) from exc
def extract_maintenance( def extract_maintenance(
@@ -293,19 +285,23 @@ class GeminiEngine:
"inline processing. Upload to GCS and use a gs:// URI instead." "inline processing. Upload to GCS and use a gs:// URI instead."
) )
model = self._get_model() client = self._get_client()
try: try:
from vertexai.generative_models import Part # type: ignore[import-untyped] from google.genai import types # type: ignore[import-untyped]
pdf_part = Part.from_data( pdf_part = types.Part.from_bytes(
data=pdf_bytes, data=pdf_bytes,
mime_type="application/pdf", mime_type="application/pdf",
) )
response = model.generate_content( response = client.models.generate_content(
[pdf_part, _EXTRACTION_PROMPT], model=self._model_name,
generation_config=self._generation_config, contents=[pdf_part, _EXTRACTION_PROMPT],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_RESPONSE_SCHEMA,
),
) )
raw = json.loads(response.text) raw = json.loads(response.text)
@@ -358,7 +354,7 @@ class GeminiEngine:
GeminiProcessingError: If Gemini fails to decode the VIN. GeminiProcessingError: If Gemini fails to decode the VIN.
GeminiUnavailableError: If the engine cannot be initialized. GeminiUnavailableError: If the engine cannot be initialized.
""" """
model = self._get_model() client = self._get_client()
# Resolve year deterministically from VIN structure # Resolve year deterministically from VIN structure
resolved_year = resolve_vin_year(vin) resolved_year = resolve_vin_year(vin)
@@ -371,21 +367,21 @@ class GeminiEngine:
) )
try: try:
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped] from google.genai import types # type: ignore[import-untyped]
vin_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_VIN_DECODE_SCHEMA,
)
prompt = _VIN_DECODE_PROMPT.format( prompt = _VIN_DECODE_PROMPT.format(
vin=vin, vin=vin,
year=resolved_year or "unknown", year=resolved_year or "unknown",
year_code=year_code, year_code=year_code,
) )
response = model.generate_content( response = client.models.generate_content(
[prompt], model=self._model_name,
generation_config=vin_config, contents=[prompt],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_VIN_DECODE_SCHEMA,
tools=[types.Tool(google_search=types.GoogleSearch())],
),
) )
raw = json.loads(response.text) raw = json.loads(response.text)