feat: migrate GeminiEngine to google-genai SDK with Google Search grounding (refs #233)

Replace vertexai.generative_models with google.genai client pattern. Add Google Search grounding tool to VIN decode for improved accuracy. Convert response schema types to uppercase per Vertex AI Schema spec. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 11:16:18 -06:00
parent 398d67304f
commit b7f472b3e8
2 changed files with 59 additions and 63 deletions
--- a/ocr/app/config.py
+++ b/ocr/app/config.py
@@ -29,7 +29,7 @@ class Settings:
            os.getenv("VISION_MONTHLY_LIMIT", "1000")
        )
-        # Vertex AI / Gemini configuration
+        # Google GenAI / Gemini configuration
        self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "")
        self.vertex_ai_location: str = os.getenv(
            "VERTEX_AI_LOCATION", "global"
--- a/ocr/app/engines/gemini_engine.py
+++ b/ocr/app/engines/gemini_engine.py
@@ -2,7 +2,7 @@
 Standalone module (does NOT extend OcrEngine) because Gemini performs
 semantic document understanding, not traditional OCR word-box extraction.
-Uses Vertex AI SDK with structured JSON output enforcement.
+Uses google-genai SDK with structured JSON output enforcement.
 """
 import json
@@ -117,34 +117,34 @@ Return the vehicle's make, model, trim level, body type, drive type, fuel type,
 """
 _VIN_DECODE_SCHEMA: dict[str, Any] = {
-    "type": "object",
+    "type": "OBJECT",
    "properties": {
-        "year": {"type": "integer", "nullable": True},
+        "year": {"type": "INTEGER", "nullable": True},
-        "make": {"type": "string", "nullable": True},
+        "make": {"type": "STRING", "nullable": True},
-        "model": {"type": "string", "nullable": True},
+        "model": {"type": "STRING", "nullable": True},
-        "trimLevel": {"type": "string", "nullable": True},
+        "trimLevel": {"type": "STRING", "nullable": True},
-        "bodyType": {"type": "string", "nullable": True},
+        "bodyType": {"type": "STRING", "nullable": True},
-        "driveType": {"type": "string", "nullable": True},
+        "driveType": {"type": "STRING", "nullable": True},
-        "fuelType": {"type": "string", "nullable": True},
+        "fuelType": {"type": "STRING", "nullable": True},
-        "engine": {"type": "string", "nullable": True},
+        "engine": {"type": "STRING", "nullable": True},
-        "transmission": {"type": "string", "nullable": True},
+        "transmission": {"type": "STRING", "nullable": True},
-        "confidence": {"type": "number"},
+        "confidence": {"type": "NUMBER"},
    },
    "required": ["confidence"],
 }
 _RESPONSE_SCHEMA: dict[str, Any] = {
-    "type": "object",
+    "type": "OBJECT",
    "properties": {
        "maintenanceSchedule": {
-            "type": "array",
+            "type": "ARRAY",
            "items": {
-                "type": "object",
+                "type": "OBJECT",
                "properties": {
-                    "serviceName": {"type": "string"},
+                    "serviceName": {"type": "STRING"},
-                    "intervalMiles": {"type": "number", "nullable": True},
+                    "intervalMiles": {"type": "NUMBER", "nullable": True},
-                    "intervalMonths": {"type": "number", "nullable": True},
+                    "intervalMonths": {"type": "NUMBER", "nullable": True},
-                    "details": {"type": "string", "nullable": True},
+                    "details": {"type": "STRING", "nullable": True},
                },
                "required": ["serviceName"],
            },
@@ -206,20 +206,21 @@ class GeminiEngine:
    Standalone class (not an OcrEngine subclass) because Gemini performs
    semantic document understanding rather than traditional OCR.
-    Uses lazy initialization: the Vertex AI client is not created until
+    Uses lazy initialization: the Gemini client is not created until
    the first call to ``extract_maintenance()`` or ``decode_vin()``.
    """
    def __init__(self) -> None:
-        self._model: Any | None = None
+        self._client: Any | None = None
        self._model_name: str = ""
-    def _get_model(self) -> Any:
+    def _get_client(self) -> Any:
-        """Create the GenerativeModel on first use.
+        """Create the genai.Client on first use.
        Authentication uses the same WIF credential path as Google Vision.
        """
-        if self._model is not None:
+        if self._client is not None:
-            return self._model
+            return self._client
        key_path = settings.google_vision_key_path
        if not os.path.isfile(key_path):
@@ -229,46 +230,37 @@ class GeminiEngine:
            )
        try:
-            from google.cloud import aiplatform  # type: ignore[import-untyped]
+            from google import genai  # type: ignore[import-untyped]
            from vertexai.generative_models import (  # type: ignore[import-untyped]
                GenerationConfig,
                GenerativeModel,
            )
-            # Point ADC at the WIF credential config
+            # Point ADC at the WIF credential config (must be set BEFORE Client construction)
            os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
            os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
-            aiplatform.init(
+            self._client = genai.Client(
                vertexai=True,
                project=settings.vertex_ai_project,
                location=settings.vertex_ai_location,
            )
-
+            self._model_name = settings.gemini_model
            model_name = settings.gemini_model
            self._model = GenerativeModel(model_name)
            self._generation_config = GenerationConfig(
                response_mime_type="application/json",
                response_schema=_RESPONSE_SCHEMA,
            )
            logger.info(
                "Gemini engine initialized (model=%s, project=%s, location=%s)",
-                model_name,
+                self._model_name,
                settings.vertex_ai_project,
                settings.vertex_ai_location,
            )
-            return self._model
+            return self._client
        except ImportError as exc:
-            logger.exception("Vertex AI SDK import failed")
+            logger.exception("google-genai SDK import failed")
            raise GeminiUnavailableError(
-                "google-cloud-aiplatform is not installed. "
+                "google-genai is not installed. "
-                "Install with: pip install google-cloud-aiplatform"
+                "Install with: pip install google-genai"
            ) from exc
        except Exception as exc:
-            logger.exception("Vertex AI authentication failed")
+            logger.exception("Gemini authentication failed: %s", type(exc).__name__)
            raise GeminiUnavailableError(
-                f"Vertex AI authentication failed: {exc}"
+                f"Gemini authentication failed: {exc}"
            ) from exc
    def extract_maintenance(
@@ -293,19 +285,23 @@ class GeminiEngine:
                "inline processing. Upload to GCS and use a gs:// URI instead."
            )
-        model = self._get_model()
+        client = self._get_client()
        try:
-            from vertexai.generative_models import Part  # type: ignore[import-untyped]
+            from google.genai import types  # type: ignore[import-untyped]
-            pdf_part = Part.from_data(
+            pdf_part = types.Part.from_bytes(
                data=pdf_bytes,
                mime_type="application/pdf",
            )
-            response = model.generate_content(
+            response = client.models.generate_content(
-                [pdf_part, _EXTRACTION_PROMPT],
+                model=self._model_name,
-                generation_config=self._generation_config,
+                contents=[pdf_part, _EXTRACTION_PROMPT],
                config=types.GenerateContentConfig(
                    response_mime_type="application/json",
                    response_schema=_RESPONSE_SCHEMA,
                ),
            )
            raw = json.loads(response.text)
@@ -358,7 +354,7 @@ class GeminiEngine:
            GeminiProcessingError: If Gemini fails to decode the VIN.
            GeminiUnavailableError: If the engine cannot be initialized.
        """
-        model = self._get_model()
+        client = self._get_client()
        # Resolve year deterministically from VIN structure
        resolved_year = resolve_vin_year(vin)
@@ -371,21 +367,21 @@ class GeminiEngine:
        )
        try:
-            from vertexai.generative_models import GenerationConfig  # type: ignore[import-untyped]
+            from google.genai import types  # type: ignore[import-untyped]
            vin_config = GenerationConfig(
                response_mime_type="application/json",
                response_schema=_VIN_DECODE_SCHEMA,
            )
            prompt = _VIN_DECODE_PROMPT.format(
                vin=vin,
                year=resolved_year or "unknown",
                year_code=year_code,
            )
-            response = model.generate_content(
+            response = client.models.generate_content(
-                [prompt],
+                model=self._model_name,
-                generation_config=vin_config,
+                contents=[prompt],
                config=types.GenerateContentConfig(
                    response_mime_type="application/json",
                    response_schema=_VIN_DECODE_SCHEMA,
                    tools=[types.Tool(google_search=types.GoogleSearch())],
                ),
            )
            raw = json.loads(response.text)