From 398d67304f5e39e181a97f314e0d4b271b12ac28 Mon Sep 17 00:00:00 2001
From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com>
Date: Sat, 28 Feb 2026 11:13:54 -0600
Subject: [PATCH 1/8] feat: replace google-cloud-aiplatform with google-genai
 dependency (refs #232)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ocr/requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ocr/requirements.txt b/ocr/requirements.txt
index 69864df..138f893 100644
--- a/ocr/requirements.txt
+++ b/ocr/requirements.txt
@@ -21,8 +21,8 @@ google-cloud-vision>=3.7.0
 # PDF Processing
 PyMuPDF>=1.23.0
 
-# Vertex AI / Gemini (maintenance schedule extraction)
-google-cloud-aiplatform>=1.40.0
+# Google GenAI / Gemini (maintenance schedule extraction, VIN decode)
+google-genai>=1.0.0
 
 # Redis for job queue
 redis>=5.0.0
-- 
2.49.1


From b7f472b3e85cc2dca8eae701c7cc9a55fd0645ce Mon Sep 17 00:00:00 2001
From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com>
Date: Sat, 28 Feb 2026 11:16:18 -0600
Subject: [PATCH 2/8] feat: migrate GeminiEngine to google-genai SDK with
 Google Search grounding (refs #233)

Replace vertexai.generative_models with google.genai client pattern.
Add Google Search grounding tool to VIN decode for improved accuracy.
Convert response schema types to uppercase per Vertex AI Schema spec.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ocr/app/config.py                |   2 +-
 ocr/app/engines/gemini_engine.py | 120 +++++++++++++++----------------
 2 files changed, 59 insertions(+), 63 deletions(-)

diff --git a/ocr/app/config.py b/ocr/app/config.py
index c1f1041..5784753 100644
--- a/ocr/app/config.py
+++ b/ocr/app/config.py
@@ -29,7 +29,7 @@ class Settings:
             os.getenv("VISION_MONTHLY_LIMIT", "1000")
         )
 
-        # Vertex AI / Gemini configuration
+        # Google GenAI / Gemini configuration
         self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "")
         self.vertex_ai_location: str = os.getenv(
             "VERTEX_AI_LOCATION", "global"
diff --git a/ocr/app/engines/gemini_engine.py b/ocr/app/engines/gemini_engine.py
index 6f2c556..e6f4cd5 100644
--- a/ocr/app/engines/gemini_engine.py
+++ b/ocr/app/engines/gemini_engine.py
@@ -2,7 +2,7 @@
 
 Standalone module (does NOT extend OcrEngine) because Gemini performs
 semantic document understanding, not traditional OCR word-box extraction.
-Uses Vertex AI SDK with structured JSON output enforcement.
+Uses google-genai SDK with structured JSON output enforcement.
 """
 
 import json
@@ -117,34 +117,34 @@ Return the vehicle's make, model, trim level, body type, drive type, fuel type,
 """
 
 _VIN_DECODE_SCHEMA: dict[str, Any] = {
-    "type": "object",
+    "type": "OBJECT",
     "properties": {
-        "year": {"type": "integer", "nullable": True},
-        "make": {"type": "string", "nullable": True},
-        "model": {"type": "string", "nullable": True},
-        "trimLevel": {"type": "string", "nullable": True},
-        "bodyType": {"type": "string", "nullable": True},
-        "driveType": {"type": "string", "nullable": True},
-        "fuelType": {"type": "string", "nullable": True},
-        "engine": {"type": "string", "nullable": True},
-        "transmission": {"type": "string", "nullable": True},
-        "confidence": {"type": "number"},
+        "year": {"type": "INTEGER", "nullable": True},
+        "make": {"type": "STRING", "nullable": True},
+        "model": {"type": "STRING", "nullable": True},
+        "trimLevel": {"type": "STRING", "nullable": True},
+        "bodyType": {"type": "STRING", "nullable": True},
+        "driveType": {"type": "STRING", "nullable": True},
+        "fuelType": {"type": "STRING", "nullable": True},
+        "engine": {"type": "STRING", "nullable": True},
+        "transmission": {"type": "STRING", "nullable": True},
+        "confidence": {"type": "NUMBER"},
     },
     "required": ["confidence"],
 }
 
 _RESPONSE_SCHEMA: dict[str, Any] = {
-    "type": "object",
+    "type": "OBJECT",
     "properties": {
         "maintenanceSchedule": {
-            "type": "array",
+            "type": "ARRAY",
             "items": {
-                "type": "object",
+                "type": "OBJECT",
                 "properties": {
-                    "serviceName": {"type": "string"},
-                    "intervalMiles": {"type": "number", "nullable": True},
-                    "intervalMonths": {"type": "number", "nullable": True},
-                    "details": {"type": "string", "nullable": True},
+                    "serviceName": {"type": "STRING"},
+                    "intervalMiles": {"type": "NUMBER", "nullable": True},
+                    "intervalMonths": {"type": "NUMBER", "nullable": True},
+                    "details": {"type": "STRING", "nullable": True},
                 },
                 "required": ["serviceName"],
             },
@@ -206,20 +206,21 @@ class GeminiEngine:
     Standalone class (not an OcrEngine subclass) because Gemini performs
     semantic document understanding rather than traditional OCR.
 
-    Uses lazy initialization: the Vertex AI client is not created until
+    Uses lazy initialization: the Gemini client is not created until
     the first call to ``extract_maintenance()`` or ``decode_vin()``.
     """
 
     def __init__(self) -> None:
-        self._model: Any | None = None
+        self._client: Any | None = None
+        self._model_name: str = ""
 
-    def _get_model(self) -> Any:
-        """Create the GenerativeModel on first use.
+    def _get_client(self) -> Any:
+        """Create the genai.Client on first use.
 
         Authentication uses the same WIF credential path as Google Vision.
         """
-        if self._model is not None:
-            return self._model
+        if self._client is not None:
+            return self._client
 
         key_path = settings.google_vision_key_path
         if not os.path.isfile(key_path):
@@ -229,46 +230,37 @@ class GeminiEngine:
             )
 
         try:
-            from google.cloud import aiplatform  # type: ignore[import-untyped]
-            from vertexai.generative_models import (  # type: ignore[import-untyped]
-                GenerationConfig,
-                GenerativeModel,
-            )
+            from google import genai  # type: ignore[import-untyped]
 
-            # Point ADC at the WIF credential config
+            # Point ADC at the WIF credential config (must be set BEFORE Client construction)
             os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
             os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
 
-            aiplatform.init(
+            self._client = genai.Client(
+                vertexai=True,
                 project=settings.vertex_ai_project,
                 location=settings.vertex_ai_location,
             )
-
-            model_name = settings.gemini_model
-            self._model = GenerativeModel(model_name)
-            self._generation_config = GenerationConfig(
-                response_mime_type="application/json",
-                response_schema=_RESPONSE_SCHEMA,
-            )
+            self._model_name = settings.gemini_model
 
             logger.info(
                 "Gemini engine initialized (model=%s, project=%s, location=%s)",
-                model_name,
+                self._model_name,
                 settings.vertex_ai_project,
                 settings.vertex_ai_location,
             )
-            return self._model
+            return self._client
 
         except ImportError as exc:
-            logger.exception("Vertex AI SDK import failed")
+            logger.exception("google-genai SDK import failed")
             raise GeminiUnavailableError(
-                "google-cloud-aiplatform is not installed. "
-                "Install with: pip install google-cloud-aiplatform"
+                "google-genai is not installed. "
+                "Install with: pip install google-genai"
             ) from exc
         except Exception as exc:
-            logger.exception("Vertex AI authentication failed")
+            logger.exception("Gemini authentication failed: %s", type(exc).__name__)
             raise GeminiUnavailableError(
-                f"Vertex AI authentication failed: {exc}"
+                f"Gemini authentication failed: {exc}"
             ) from exc
 
     def extract_maintenance(
@@ -293,19 +285,23 @@ class GeminiEngine:
                 "inline processing. Upload to GCS and use a gs:// URI instead."
             )
 
-        model = self._get_model()
+        client = self._get_client()
 
         try:
-            from vertexai.generative_models import Part  # type: ignore[import-untyped]
+            from google.genai import types  # type: ignore[import-untyped]
 
-            pdf_part = Part.from_data(
+            pdf_part = types.Part.from_bytes(
                 data=pdf_bytes,
                 mime_type="application/pdf",
             )
 
-            response = model.generate_content(
-                [pdf_part, _EXTRACTION_PROMPT],
-                generation_config=self._generation_config,
+            response = client.models.generate_content(
+                model=self._model_name,
+                contents=[pdf_part, _EXTRACTION_PROMPT],
+                config=types.GenerateContentConfig(
+                    response_mime_type="application/json",
+                    response_schema=_RESPONSE_SCHEMA,
+                ),
             )
 
             raw = json.loads(response.text)
@@ -358,7 +354,7 @@ class GeminiEngine:
             GeminiProcessingError: If Gemini fails to decode the VIN.
             GeminiUnavailableError: If the engine cannot be initialized.
         """
-        model = self._get_model()
+        client = self._get_client()
 
         # Resolve year deterministically from VIN structure
         resolved_year = resolve_vin_year(vin)
@@ -371,21 +367,21 @@ class GeminiEngine:
         )
 
         try:
-            from vertexai.generative_models import GenerationConfig  # type: ignore[import-untyped]
-
-            vin_config = GenerationConfig(
-                response_mime_type="application/json",
-                response_schema=_VIN_DECODE_SCHEMA,
-            )
+            from google.genai import types  # type: ignore[import-untyped]
 
             prompt = _VIN_DECODE_PROMPT.format(
                 vin=vin,
                 year=resolved_year or "unknown",
                 year_code=year_code,
             )
-            response = model.generate_content(
-                [prompt],
-                generation_config=vin_config,
+            response = client.models.generate_content(
+                model=self._model_name,
+                contents=[prompt],
+                config=types.GenerateContentConfig(
+                    response_mime_type="application/json",
+                    response_schema=_VIN_DECODE_SCHEMA,
+                    tools=[types.Tool(google_search=types.GoogleSearch())],
+                ),
             )
 
             raw = json.loads(response.text)
-- 
2.49.1


From 9f51e62b94190e161869b39f59f60fc1af9a6a95 Mon Sep 17 00:00:00 2001
From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com>
Date: Sat, 28 Feb 2026 11:17:14 -0600
Subject: [PATCH 3/8] feat: migrate MaintenanceReceiptExtractor to google-genai
 SDK (refs #234)

Replace vertexai.generative_models with google.genai client pattern.
Fix pre-existing bug: raise GeminiUnavailableError instead of bare
RuntimeError for missing credentials. Add proper try/except blocks
matching GeminiEngine error handling pattern.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../maintenance_receipt_extractor.py          | 96 +++++++++++--------
 1 file changed, 54 insertions(+), 42 deletions(-)

diff --git a/ocr/app/extractors/maintenance_receipt_extractor.py b/ocr/app/extractors/maintenance_receipt_extractor.py
index d5b4d13..194c484 100644
--- a/ocr/app/extractors/maintenance_receipt_extractor.py
+++ b/ocr/app/extractors/maintenance_receipt_extractor.py
@@ -14,6 +14,7 @@ import time
 from typing import Any, Optional
 
 from app.config import settings
+from app.engines.gemini_engine import GeminiUnavailableError
 from app.extractors.receipt_extractor import (
     ExtractedField,
     ReceiptExtractionResult,
@@ -54,16 +55,16 @@ OCR Text:
 """
 
 _RECEIPT_RESPONSE_SCHEMA: dict[str, Any] = {
-    "type": "object",
+    "type": "OBJECT",
     "properties": {
-        "serviceName": {"type": "string", "nullable": True},
-        "serviceDate": {"type": "string", "nullable": True},
-        "totalCost": {"type": "number", "nullable": True},
-        "shopName": {"type": "string", "nullable": True},
-        "laborCost": {"type": "number", "nullable": True},
-        "partsCost": {"type": "number", "nullable": True},
-        "odometerReading": {"type": "number", "nullable": True},
-        "vehicleInfo": {"type": "string", "nullable": True},
+        "serviceName": {"type": "STRING", "nullable": True},
+        "serviceDate": {"type": "STRING", "nullable": True},
+        "totalCost": {"type": "NUMBER", "nullable": True},
+        "shopName": {"type": "STRING", "nullable": True},
+        "laborCost": {"type": "NUMBER", "nullable": True},
+        "partsCost": {"type": "NUMBER", "nullable": True},
+        "odometerReading": {"type": "NUMBER", "nullable": True},
+        "vehicleInfo": {"type": "STRING", "nullable": True},
     },
     "required": [
         "serviceName",
@@ -87,8 +88,8 @@ class MaintenanceReceiptExtractor:
     """
 
     def __init__(self) -> None:
-        self._model: Any | None = None
-        self._generation_config: Any | None = None
+        self._client: Any | None = None
+        self._model_name: str = ""
 
     def extract(
         self,
@@ -169,47 +170,52 @@ class MaintenanceReceiptExtractor:
             processing_time_ms=processing_time_ms,
         )
 
-    def _get_model(self) -> Any:
-        """Lazy-initialize Vertex AI Gemini model.
+    def _get_client(self) -> Any:
+        """Lazy-initialize google-genai Gemini client.
 
         Uses the same authentication pattern as GeminiEngine.
         """
-        if self._model is not None:
-            return self._model
+        if self._client is not None:
+            return self._client
 
         key_path = settings.google_vision_key_path
         if not os.path.isfile(key_path):
-            raise RuntimeError(
+            raise GeminiUnavailableError(
                 f"Google credential config not found at {key_path}. "
                 "Set GOOGLE_VISION_KEY_PATH or mount the secret."
             )
 
-        from google.cloud import aiplatform  # type: ignore[import-untyped]
-        from vertexai.generative_models import (  # type: ignore[import-untyped]
-            GenerationConfig,
-            GenerativeModel,
-        )
+        try:
+            from google import genai  # type: ignore[import-untyped]
 
-        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
-        os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
+            # Point ADC at the WIF credential config (must be set BEFORE Client construction)
+            os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
+            os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
 
-        aiplatform.init(
-            project=settings.vertex_ai_project,
-            location=settings.vertex_ai_location,
-        )
+            self._client = genai.Client(
+                vertexai=True,
+                project=settings.vertex_ai_project,
+                location=settings.vertex_ai_location,
+            )
+            self._model_name = settings.gemini_model
 
-        model_name = settings.gemini_model
-        self._model = GenerativeModel(model_name)
-        self._generation_config = GenerationConfig(
-            response_mime_type="application/json",
-            response_schema=_RECEIPT_RESPONSE_SCHEMA,
-        )
+            logger.info(
+                "Maintenance receipt Gemini client initialized (model=%s)",
+                self._model_name,
+            )
+            return self._client
 
-        logger.info(
-            "Maintenance receipt Gemini model initialized (model=%s)",
-            model_name,
-        )
-        return self._model
+        except ImportError as exc:
+            logger.exception("google-genai SDK import failed")
+            raise GeminiUnavailableError(
+                "google-genai is not installed. "
+                "Install with: pip install google-genai"
+            ) from exc
+        except Exception as exc:
+            logger.exception("Gemini authentication failed: %s", type(exc).__name__)
+            raise GeminiUnavailableError(
+                f"Gemini authentication failed: {exc}"
+            ) from exc
 
     def _extract_with_gemini(self, ocr_text: str) -> dict:
         """Send OCR text to Gemini for semantic field extraction.
@@ -220,13 +226,19 @@ class MaintenanceReceiptExtractor:
         Returns:
             Dictionary of field_name -> extracted_value from Gemini.
         """
-        model = self._get_model()
+        client = self._get_client()
+
+        from google.genai import types  # type: ignore[import-untyped]
 
         prompt = _RECEIPT_EXTRACTION_PROMPT.format(ocr_text=ocr_text)
 
-        response = model.generate_content(
-            [prompt],
-            generation_config=self._generation_config,
+        response = client.models.generate_content(
+            model=self._model_name,
+            contents=[prompt],
+            config=types.GenerateContentConfig(
+                response_mime_type="application/json",
+                response_schema=_RECEIPT_RESPONSE_SCHEMA,
+            ),
         )
 
         raw = json.loads(response.text)
-- 
2.49.1


From 1464a0e1af2d4481c75f6c284c03d1bf747d01ea Mon Sep 17 00:00:00 2001
From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com>
Date: Sat, 28 Feb 2026 11:21:10 -0600
Subject: [PATCH 4/8] feat: update test mocks for google-genai SDK (refs #235)

Replace engine._model/engine._generation_config mocks with
engine._client/engine._model_name. Update sys.modules patches
from vertexai to google.genai. Remove dead if-False branch.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ocr/tests/test_gemini_engine.py | 85 ++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 45 deletions(-)

diff --git a/ocr/tests/test_gemini_engine.py b/ocr/tests/test_gemini_engine.py
index bf709e4..3674b4a 100644
--- a/ocr/tests/test_gemini_engine.py
+++ b/ocr/tests/test_gemini_engine.py
@@ -2,11 +2,11 @@
 
 Covers: GeminiEngine initialization, PDF size validation,
 successful extraction, empty results, and error handling.
-All Vertex AI SDK calls are mocked.
+All google-genai SDK calls are mocked.
 """
 
 import json
-from unittest.mock import MagicMock, patch, PropertyMock
+from unittest.mock import MagicMock, patch
 
 import pytest
 
@@ -156,22 +156,16 @@ class TestExtractMaintenance:
             },
         ]
 
-        mock_model = MagicMock()
-        mock_model.generate_content.return_value = _make_gemini_response(schedule)
+        mock_client = MagicMock()
+        mock_client.models.generate_content.return_value = _make_gemini_response(schedule)
 
-        with (
-            patch(
-                "app.engines.gemini_engine.importlib_vertex_ai"
-            ) if False else patch.dict("sys.modules", {
-                "google.cloud": MagicMock(),
-                "google.cloud.aiplatform": MagicMock(),
-                "vertexai": MagicMock(),
-                "vertexai.generative_models": MagicMock(),
-            }),
-        ):
+        with patch.dict("sys.modules", {
+            "google.genai": MagicMock(),
+            "google.genai.types": MagicMock(),
+        }):
             engine = GeminiEngine()
-            engine._model = mock_model
-            engine._generation_config = MagicMock()
+            engine._client = mock_client
+            engine._model_name = "gemini-2.5-flash"
 
             result = engine.extract_maintenance(_make_pdf_bytes())
 
@@ -200,12 +194,12 @@ class TestExtractMaintenance:
         mock_settings.vertex_ai_location = "us-central1"
         mock_settings.gemini_model = "gemini-2.5-flash"
 
-        mock_model = MagicMock()
-        mock_model.generate_content.return_value = _make_gemini_response([])
+        mock_client = MagicMock()
+        mock_client.models.generate_content.return_value = _make_gemini_response([])
 
         engine = GeminiEngine()
-        engine._model = mock_model
-        engine._generation_config = MagicMock()
+        engine._client = mock_client
+        engine._model_name = "gemini-2.5-flash"
 
         result = engine.extract_maintenance(_make_pdf_bytes())
 
@@ -223,12 +217,12 @@ class TestExtractMaintenance:
 
         schedule = [{"serviceName": "Brake Fluid Replacement"}]
 
-        mock_model = MagicMock()
-        mock_model.generate_content.return_value = _make_gemini_response(schedule)
+        mock_client = MagicMock()
+        mock_client.models.generate_content.return_value = _make_gemini_response(schedule)
 
         engine = GeminiEngine()
-        engine._model = mock_model
-        engine._generation_config = MagicMock()
+        engine._client = mock_client
+        engine._model_name = "gemini-2.5-flash"
 
         result = engine.extract_maintenance(_make_pdf_bytes())
 
@@ -264,7 +258,8 @@ class TestErrorHandling:
         with (
             patch("app.engines.gemini_engine.settings") as mock_settings,
             patch.dict("sys.modules", {
-                "google.cloud.aiplatform": None,
+                "google": None,
+                "google.genai": None,
             }),
         ):
             mock_settings.google_vision_key_path = "/fake/creds.json"
@@ -283,12 +278,12 @@ class TestErrorHandling:
         mock_settings.vertex_ai_location = "us-central1"
         mock_settings.gemini_model = "gemini-2.5-flash"
 
-        mock_model = MagicMock()
-        mock_model.generate_content.side_effect = RuntimeError("API quota exceeded")
+        mock_client = MagicMock()
+        mock_client.models.generate_content.side_effect = RuntimeError("API quota exceeded")
 
         engine = GeminiEngine()
-        engine._model = mock_model
-        engine._generation_config = MagicMock()
+        engine._client = mock_client
+        engine._model_name = "gemini-2.5-flash"
 
         with pytest.raises(GeminiProcessingError, match="maintenance extraction failed"):
             engine.extract_maintenance(_make_pdf_bytes())
@@ -307,12 +302,12 @@ class TestErrorHandling:
         mock_response = MagicMock()
         mock_response.text = "not valid json {{"
 
-        mock_model = MagicMock()
-        mock_model.generate_content.return_value = mock_response
+        mock_client = MagicMock()
+        mock_client.models.generate_content.return_value = mock_response
 
         engine = GeminiEngine()
-        engine._model = mock_model
-        engine._generation_config = MagicMock()
+        engine._client = mock_client
+        engine._model_name = "gemini-2.5-flash"
 
         with pytest.raises(GeminiProcessingError, match="invalid JSON"):
             engine.extract_maintenance(_make_pdf_bytes())
@@ -322,32 +317,32 @@ class TestErrorHandling:
 
 
 class TestLazyInitialization:
-    """Verify the model is not created until first use."""
+    """Verify the client is not created until first use."""
 
-    def test_model_is_none_after_construction(self):
-        """GeminiEngine should not initialize the model in __init__."""
+    def test_client_is_none_after_construction(self):
+        """GeminiEngine should not initialize the client in __init__."""
         engine = GeminiEngine()
-        assert engine._model is None
+        assert engine._client is None
 
     @patch("app.engines.gemini_engine.settings")
     @patch("app.engines.gemini_engine.os.path.isfile", return_value=True)
-    def test_model_reused_on_second_call(self, mock_isfile, mock_settings):
-        """Once initialized, the same model instance is reused."""
+    def test_client_reused_on_second_call(self, mock_isfile, mock_settings):
+        """Once initialized, the same client instance is reused."""
         mock_settings.google_vision_key_path = "/fake/creds.json"
         mock_settings.vertex_ai_project = "test-project"
         mock_settings.vertex_ai_location = "us-central1"
         mock_settings.gemini_model = "gemini-2.5-flash"
 
         schedule = [{"serviceName": "Oil Change", "intervalMiles": 5000}]
-        mock_model = MagicMock()
-        mock_model.generate_content.return_value = _make_gemini_response(schedule)
+        mock_client = MagicMock()
+        mock_client.models.generate_content.return_value = _make_gemini_response(schedule)
 
         engine = GeminiEngine()
-        engine._model = mock_model
-        engine._generation_config = MagicMock()
+        engine._client = mock_client
+        engine._model_name = "gemini-2.5-flash"
 
         engine.extract_maintenance(_make_pdf_bytes())
         engine.extract_maintenance(_make_pdf_bytes())
 
-        # Model's generate_content should have been called twice
-        assert mock_model.generate_content.call_count == 2
+        # Client's generate_content should have been called twice
+        assert mock_client.models.generate_content.call_count == 2
-- 
2.49.1


From 96e1dde7b2e705082cea49f94e5b6ee83e0074d0 Mon Sep 17 00:00:00 2001
From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com>
Date: Sat, 28 Feb 2026 11:21:58 -0600
Subject: [PATCH 5/8] docs: update CLAUDE.md references from Vertex AI to
 google-genai (refs #231)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ocr/app/CLAUDE.md         | 2 +-
 ocr/app/engines/CLAUDE.md | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ocr/app/CLAUDE.md b/ocr/app/CLAUDE.md
index bc0dfcc..f29864c 100644
--- a/ocr/app/CLAUDE.md
+++ b/ocr/app/CLAUDE.md
@@ -7,7 +7,7 @@ Python OCR microservice (FastAPI). Primary engine: PaddleOCR PP-OCRv4 with optio
 | File | What | When to read |
 | ---- | ---- | ------------ |
 | `main.py` | FastAPI application entry point | Route registration, app setup |
-| `config.py` | Configuration settings (OCR engines, Vertex AI, Redis, Vision API limits) | Environment variables, settings |
+| `config.py` | Configuration settings (OCR engines, Google GenAI, Redis, Vision API limits) | Environment variables, settings |
 | `__init__.py` | Package init | Package structure |
 
 ## Subdirectories
diff --git a/ocr/app/engines/CLAUDE.md b/ocr/app/engines/CLAUDE.md
index 68a4e82..8941522 100644
--- a/ocr/app/engines/CLAUDE.md
+++ b/ocr/app/engines/CLAUDE.md
@@ -3,7 +3,7 @@
 OCR engine abstraction layer. Two categories of engines:
 
 1. **OcrEngine subclasses** (image-to-text): PaddleOCR, Google Vision, Hybrid. Accept image bytes, return text + confidence + word boxes.
-2. **GeminiEngine** (PDF-to-structured-data and VIN decode): Standalone module for maintenance schedule extraction and VIN decoding via Vertex AI. Accepts PDF bytes or VIN strings, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ.
+2. **GeminiEngine** (PDF-to-structured-data and VIN decode): Standalone module for maintenance schedule extraction and VIN decoding via google-genai SDK. Accepts PDF bytes or VIN strings, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ.
 
 ## Files
 
@@ -15,7 +15,7 @@ OCR engine abstraction layer. Two categories of engines:
 | `cloud_engine.py` | Google Vision TEXT_DETECTION fallback engine (WIF authentication) | Cloud OCR configuration, API quota |
 | `hybrid_engine.py` | Combines primary + fallback engine with confidence threshold switching | Engine selection logic, fallback behavior |
 | `engine_factory.py` | Factory function and engine registry for instantiation | Adding new engine types |
-| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction and VIN decoding (Vertex AI SDK, 20MB PDF limit, structured JSON output) | Manual extraction debugging, VIN decode, Gemini configuration |
+| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction and VIN decoding (google-genai SDK, 20MB PDF limit, structured JSON output, Google Search grounding for VIN decode) | Manual extraction debugging, VIN decode, Gemini configuration |
 
 ## Engine Selection
 
-- 
2.49.1


From 936753fac22161c95e51ea8f67b0118b07ec259e Mon Sep 17 00:00:00 2001
From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com>
Date: Sat, 28 Feb 2026 12:02:26 -0600
Subject: [PATCH 6/8] fix: VIN Decoding timeouts and logic errors

---
 .../src/features/vehicles/api/vehicles.api.ts |  2 +-
 ocr/app/engines/gemini_engine.py              | 20 ++--
 ocr/tests/test_resolve_vin_year.py            | 94 +++++++++----------
 3 files changed, 60 insertions(+), 56 deletions(-)

diff --git a/frontend/src/features/vehicles/api/vehicles.api.ts b/frontend/src/features/vehicles/api/vehicles.api.ts
index 49ab6dd..3004455 100644
--- a/frontend/src/features/vehicles/api/vehicles.api.ts
+++ b/frontend/src/features/vehicles/api/vehicles.api.ts
@@ -87,7 +87,7 @@ export const vehiclesApi = {
    */
   decodeVin: async (vin: string): Promise<DecodedVehicleData> => {
     const response = await apiClient.post('/vehicles/decode-vin', { vin }, {
-      timeout: 60000 // 60 seconds for Gemini cold start
+      timeout: 120000 // 120 seconds for Gemini + Google Search grounding
     });
     return response.data;
   }
diff --git a/ocr/app/engines/gemini_engine.py b/ocr/app/engines/gemini_engine.py
index e6f4cd5..8e9d36b 100644
--- a/ocr/app/engines/gemini_engine.py
+++ b/ocr/app/engines/gemini_engine.py
@@ -40,8 +40,9 @@ Return the results as a JSON object with a single "maintenanceSchedule" array.\
 
 # VIN year code lookup: position 10 character -> base year (first cycle, 1980-2009).
 # The 30-year cycle repeats: +30 for 2010-2039, +60 for 2040-2069.
-# Disambiguation uses position 7: numeric -> 2010+ cycle, alphabetic -> 1980s cycle.
-# For the 2040+ cycle (when position 7 is alphabetic again), we pick the most
+# Disambiguation uses position 7: alphabetic -> 2010+ cycle, numeric -> 1980s cycle.
+# Per NHTSA FMVSS No. 115: MY2010+ vehicles must use alphabetic position 7.
+# For the 2040+ cycle (when position 7 is numeric again), we pick the most
 # recent plausible year (not more than 2 years in the future).
 _VIN_YEAR_CODES: dict[str, int] = {
     "A": 1980, "B": 1981, "C": 1982, "D": 1983, "E": 1984,
@@ -58,10 +59,10 @@ def resolve_vin_year(vin: str) -> int | None:
     """Deterministically resolve model year from VIN positions 7 and 10.
 
     VIN year codes repeat on a 30-year cycle. Position 7 disambiguates:
-      - Numeric position 7 -> 2010-2039 cycle
-      - Alphabetic position 7 -> 1980-2009 or 2040-2050+ cycle
+      - Alphabetic position 7 -> 2010-2039 cycle (NHTSA MY2010+ requirement)
+      - Numeric position 7 -> 1980-2009 or 2040-2069 cycle
 
-    For the alphabetic case with three possible cycles, picks the most recent
+    For the numeric case with two possible cycles, picks the most recent
     year that is not more than 2 years in the future.
 
     Returns None if the VIN is too short or position 10 is not a valid year code.
@@ -76,11 +77,11 @@ def resolve_vin_year(vin: str) -> int | None:
     if base_year is None:
         return None
 
-    if pos7.isdigit():
-        # Numeric position 7 -> second cycle (2010-2039)
+    if pos7.isalpha():
+        # Alphabetic position 7 -> second cycle (2010-2039)
         return base_year + 30
 
-    # Alphabetic position 7 -> first cycle (1980-2009) or third cycle (2040-2069)
+    # Numeric position 7 -> first cycle (1980-2009) or third cycle (2040-2069)
     # Pick the most recent plausible year
     max_plausible = datetime.now().year + 2
 
@@ -381,6 +382,9 @@ class GeminiEngine:
                     response_mime_type="application/json",
                     response_schema=_VIN_DECODE_SCHEMA,
                     tools=[types.Tool(google_search=types.GoogleSearch())],
+                    automatic_function_calling=types.AutomaticFunctionCallingConfig(
+                        max_remote_calls=3,
+                    ),
                 ),
             )
 
diff --git a/ocr/tests/test_resolve_vin_year.py b/ocr/tests/test_resolve_vin_year.py
index 54ea5f9..fcb63a6 100644
--- a/ocr/tests/test_resolve_vin_year.py
+++ b/ocr/tests/test_resolve_vin_year.py
@@ -12,86 +12,86 @@ from app.engines.gemini_engine import resolve_vin_year
 
 
 class TestSecondCycle:
-    """Position 7 numeric -> 2010-2039 cycle."""
+    """Position 7 alphabetic -> 2010-2039 cycle (NHTSA MY2010+ requirement)."""
 
-    def test_p_with_numeric_pos7_returns_2023(self):
-        """P=2023 when position 7 is numeric (the bug that triggered this fix)."""
-        # VIN: 1G1YE2D32P5602473 -- pos7='2' (numeric), pos10='P'
+    def test_p_with_alpha_pos7_returns_2023(self):
+        """P=2023 when position 7 is alphabetic (the bug that triggered this fix)."""
+        # VIN: 1G1YE2D32P5602473 -- pos7='D' (alphabetic), pos10='P'
         assert resolve_vin_year("1G1YE2D32P5602473") == 2023
 
-    def test_a_with_numeric_pos7_returns_2010(self):
-        """A=2010 when position 7 is numeric."""
-        assert resolve_vin_year("1G1YE2112A5602473") == 2010
+    def test_a_with_alpha_pos7_returns_2010(self):
+        """A=2010 when position 7 is alphabetic."""
+        assert resolve_vin_year("1G1YE2D12A5602473") == 2010
 
-    def test_l_with_numeric_pos7_returns_2020(self):
-        """L=2020 when position 7 is numeric."""
-        assert resolve_vin_year("1G1YE2112L5602473") == 2020
+    def test_l_with_alpha_pos7_returns_2020(self):
+        """L=2020 when position 7 is alphabetic."""
+        assert resolve_vin_year("1G1YE2D12L5602473") == 2020
 
-    def test_9_with_numeric_pos7_returns_2039(self):
-        """9=2039 when position 7 is numeric."""
-        assert resolve_vin_year("1G1YE211295602473") == 2039
+    def test_9_with_alpha_pos7_returns_2039(self):
+        """9=2039 when position 7 is alphabetic."""
+        assert resolve_vin_year("1G1YE2D1295602473") == 2039
 
-    def test_digit_1_with_numeric_pos7_returns_2031(self):
-        """1=2031 when position 7 is numeric."""
-        assert resolve_vin_year("1G1YE211215602473") == 2031
+    def test_digit_1_with_alpha_pos7_returns_2031(self):
+        """1=2031 when position 7 is alphabetic."""
+        assert resolve_vin_year("1G1YE2D1215602473") == 2031
 
-    def test_s_with_numeric_pos7_returns_2025(self):
-        """S=2025 when position 7 is numeric."""
-        assert resolve_vin_year("1G1YE2112S5602473") == 2025
+    def test_s_with_alpha_pos7_returns_2025(self):
+        """S=2025 when position 7 is alphabetic."""
+        assert resolve_vin_year("1G1YE2D12S5602473") == 2025
 
-    def test_t_with_numeric_pos7_returns_2026(self):
-        """T=2026 when position 7 is numeric."""
-        assert resolve_vin_year("1G1YE2112T5602473") == 2026
+    def test_t_with_alpha_pos7_returns_2026(self):
+        """T=2026 when position 7 is alphabetic."""
+        assert resolve_vin_year("1G1YE2D12T5602473") == 2026
 
 
 class TestFirstCycle:
-    """Position 7 alphabetic -> 1980-2009 cycle (when 2040+ is not yet plausible)."""
+    """Position 7 numeric -> 1980-2009 cycle."""
 
-    def test_m_with_alpha_pos7_returns_1991(self):
-        """M=1991 when position 7 is alphabetic (third cycle 2051 is not plausible)."""
-        assert resolve_vin_year("1G1YE2J32M5602473") == 1991
+    def test_m_with_numeric_pos7_returns_1991(self):
+        """M=1991 when position 7 is numeric."""
+        assert resolve_vin_year("1G1YE2132M5602473") == 1991
 
-    def test_n_with_alpha_pos7_returns_1992(self):
-        """N=1992 when position 7 is alphabetic."""
-        assert resolve_vin_year("1G1YE2J32N5602473") == 1992
+    def test_n_with_numeric_pos7_returns_1992(self):
+        """N=1992 when position 7 is numeric."""
+        assert resolve_vin_year("1G1YE2132N5602473") == 1992
 
-    def test_p_with_alpha_pos7_returns_1993(self):
-        """P=1993 when position 7 is alphabetic (third cycle 2053 not plausible)."""
-        assert resolve_vin_year("1G1YE2J32P5602473") == 1993
+    def test_p_with_numeric_pos7_returns_1993(self):
+        """P=1993 when position 7 is numeric."""
+        assert resolve_vin_year("1G1YE2132P5602473") == 1993
 
-    def test_y_with_alpha_pos7_returns_2000(self):
-        """Y=2000 when position 7 is alphabetic."""
-        assert resolve_vin_year("1G1YE2J32Y5602473") == 2000
+    def test_y_with_numeric_pos7_returns_2000(self):
+        """Y=2000 when position 7 is numeric."""
+        assert resolve_vin_year("1G1YE2132Y5602473") == 2000
 
 
 class TestThirdCycle:
-    """Position 7 alphabetic + third cycle year (2040-2050) is plausible."""
+    """Position 7 numeric + third cycle year (2040-2050) is plausible."""
 
     @patch("app.engines.gemini_engine.datetime")
-    def test_a_with_alpha_pos7_returns_2040_when_plausible(self, mock_dt):
-        """A=2040 when position 7 is alphabetic and year 2040 is plausible."""
+    def test_a_with_numeric_pos7_returns_2040_when_plausible(self, mock_dt):
+        """A=2040 when position 7 is numeric and year 2040 is plausible."""
         mock_dt.now.return_value = datetime(2039, 1, 1)
         # 2039 + 2 = 2041 >= 2040, so third cycle is plausible
-        assert resolve_vin_year("1G1YE2J32A5602473") == 2040
+        assert resolve_vin_year("1G1YE2132A5602473") == 2040
 
     @patch("app.engines.gemini_engine.datetime")
-    def test_l_with_alpha_pos7_returns_2050_when_plausible(self, mock_dt):
-        """L=2050 when position 7 is alphabetic and year 2050 is plausible."""
+    def test_l_with_numeric_pos7_returns_2050_when_plausible(self, mock_dt):
+        """L=2050 when position 7 is numeric and year 2050 is plausible."""
         mock_dt.now.return_value = datetime(2049, 6, 1)
-        assert resolve_vin_year("1G1YE2J32L5602473") == 2050
+        assert resolve_vin_year("1G1YE2132L5602473") == 2050
 
     @patch("app.engines.gemini_engine.datetime")
-    def test_a_with_alpha_pos7_returns_1980_when_2040_not_plausible(self, mock_dt):
+    def test_a_with_numeric_pos7_returns_1980_when_2040_not_plausible(self, mock_dt):
         """A=1980 when third cycle year (2040) exceeds max plausible."""
         mock_dt.now.return_value = datetime(2026, 2, 20)
         # 2026 + 2 = 2028 < 2040, so third cycle not plausible -> first cycle
-        assert resolve_vin_year("1G1YE2J32A5602473") == 1980
+        assert resolve_vin_year("1G1YE2132A5602473") == 1980
 
     @patch("app.engines.gemini_engine.datetime")
-    def test_k_with_alpha_pos7_returns_2049_when_plausible(self, mock_dt):
-        """K=2049 when position 7 is alphabetic and year is plausible."""
+    def test_k_with_numeric_pos7_returns_2049_when_plausible(self, mock_dt):
+        """K=2049 when position 7 is numeric and year is plausible."""
         mock_dt.now.return_value = datetime(2048, 1, 1)
-        assert resolve_vin_year("1G1YE2J32K5602473") == 2049
+        assert resolve_vin_year("1G1YE2132K5602473") == 2049
 
 
 class TestEdgeCases:
-- 
2.49.1


From 1add6c8240685069ccc4c06bf50aa2321cccafc1 Mon Sep 17 00:00:00 2001
From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com>
Date: Sat, 28 Feb 2026 12:59:04 -0600
Subject: [PATCH 7/8] fix: remove unsupported AutomaticFunctionCallingConfig
 parameter (refs #231)

The installed google-genai version does not support max_remote_calls on
AutomaticFunctionCallingConfig, causing a pydantic validation error that
broke VIN decode on staging.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ocr/app/engines/gemini_engine.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/ocr/app/engines/gemini_engine.py b/ocr/app/engines/gemini_engine.py
index 8e9d36b..1087736 100644
--- a/ocr/app/engines/gemini_engine.py
+++ b/ocr/app/engines/gemini_engine.py
@@ -382,9 +382,6 @@ class GeminiEngine:
                     response_mime_type="application/json",
                     response_schema=_VIN_DECODE_SCHEMA,
                     tools=[types.Tool(google_search=types.GoogleSearch())],
-                    automatic_function_calling=types.AutomaticFunctionCallingConfig(
-                        max_remote_calls=3,
-                    ),
                 ),
             )
 
-- 
2.49.1


From 56df5d48f3b82abc9930d58ab98b69112dc90561 Mon Sep 17 00:00:00 2001
From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com>
Date: Sat, 28 Feb 2026 21:16:56 -0600
Subject: [PATCH 8/8] fix: revert unsupported AFC config and add diagnostic
 logging for VIN decode (refs #231)

- Remove AutomaticFunctionCallingConfig(max_remote_calls=3) which caused
  pydantic validation error on the installed google-genai version
- Log full Gemini raw JSON response in OCR engine for debugging
- Add engine/transmission to backend raw values log
- Add hasTrim/hasEngine/hasTransmission to decode success log

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/src/features/vehicles/api/vehicles.controller.ts | 5 ++++-
 backend/src/features/vehicles/domain/vehicles.service.ts | 3 ++-
 ocr/app/engines/gemini_engine.py                         | 7 ++++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/backend/src/features/vehicles/api/vehicles.controller.ts b/backend/src/features/vehicles/api/vehicles.controller.ts
index a9e7f33..103fc72 100644
--- a/backend/src/features/vehicles/api/vehicles.controller.ts
+++ b/backend/src/features/vehicles/api/vehicles.controller.ts
@@ -416,7 +416,10 @@ export class VehiclesController {
         userId,
         hasYear: !!decodedData.year.value,
         hasMake: !!decodedData.make.value,
-        hasModel: !!decodedData.model.value
+        hasModel: !!decodedData.model.value,
+        hasTrim: !!decodedData.trimLevel.value,
+        hasEngine: !!decodedData.engine.value,
+        hasTransmission: !!decodedData.transmission.value,
       });
 
       return reply.code(200).send(decodedData);
diff --git a/backend/src/features/vehicles/domain/vehicles.service.ts b/backend/src/features/vehicles/domain/vehicles.service.ts
index f9f78c7..56a3e21 100644
--- a/backend/src/features/vehicles/domain/vehicles.service.ts
+++ b/backend/src/features/vehicles/domain/vehicles.service.ts
@@ -679,7 +679,8 @@ export class VehiclesService {
     logger.debug('VIN decode raw values', {
       vin: response.vin,
       year: sourceYear, make: sourceMake, model: sourceModel,
-      trim: sourceTrim, confidence: response.confidence
+      trim: sourceTrim, engine: sourceEngine, transmission: sourceTransmission,
+      confidence: response.confidence
     });
 
     // Year is always high confidence if present (exact numeric match)
diff --git a/ocr/app/engines/gemini_engine.py b/ocr/app/engines/gemini_engine.py
index 1087736..4bc92cf 100644
--- a/ocr/app/engines/gemini_engine.py
+++ b/ocr/app/engines/gemini_engine.py
@@ -398,7 +398,12 @@ class GeminiEngine:
                     vin,
                 )
 
-            logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
+            logger.info(
+                "Gemini decoded VIN %s (confidence=%.2f) raw=%s",
+                vin,
+                raw.get("confidence", 0),
+                json.dumps(raw, default=str),
+            )
 
             return VinDecodeResult(
                 year=resolved_year if resolved_year else raw.get("year"),
-- 
2.49.1