From 398d67304f5e39e181a97f314e0d4b271b12ac28 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 28 Feb 2026 11:13:54 -0600 Subject: [PATCH 1/8] feat: replace google-cloud-aiplatform with google-genai dependency (refs #232) Co-Authored-By: Claude Opus 4.6 --- ocr/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocr/requirements.txt b/ocr/requirements.txt index 69864df..138f893 100644 --- a/ocr/requirements.txt +++ b/ocr/requirements.txt @@ -21,8 +21,8 @@ google-cloud-vision>=3.7.0 # PDF Processing PyMuPDF>=1.23.0 -# Vertex AI / Gemini (maintenance schedule extraction) -google-cloud-aiplatform>=1.40.0 +# Google GenAI / Gemini (maintenance schedule extraction, VIN decode) +google-genai>=1.0.0 # Redis for job queue redis>=5.0.0 -- 2.49.1 From b7f472b3e85cc2dca8eae701c7cc9a55fd0645ce Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 28 Feb 2026 11:16:18 -0600 Subject: [PATCH 2/8] feat: migrate GeminiEngine to google-genai SDK with Google Search grounding (refs #233) Replace vertexai.generative_models with google.genai client pattern. Add Google Search grounding tool to VIN decode for improved accuracy. Convert response schema types to uppercase per Vertex AI Schema spec. Co-Authored-By: Claude Opus 4.6 --- ocr/app/config.py | 2 +- ocr/app/engines/gemini_engine.py | 120 +++++++++++++++---------------- 2 files changed, 59 insertions(+), 63 deletions(-) diff --git a/ocr/app/config.py b/ocr/app/config.py index c1f1041..5784753 100644 --- a/ocr/app/config.py +++ b/ocr/app/config.py @@ -29,7 +29,7 @@ class Settings: os.getenv("VISION_MONTHLY_LIMIT", "1000") ) - # Vertex AI / Gemini configuration + # Google GenAI / Gemini configuration self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "") self.vertex_ai_location: str = os.getenv( "VERTEX_AI_LOCATION", "global" diff --git a/ocr/app/engines/gemini_engine.py b/ocr/app/engines/gemini_engine.py index 6f2c556..e6f4cd5 100644 --- a/ocr/app/engines/gemini_engine.py +++ b/ocr/app/engines/gemini_engine.py @@ -2,7 +2,7 @@ Standalone module (does NOT extend OcrEngine) because Gemini performs semantic document understanding, not traditional OCR word-box extraction. -Uses Vertex AI SDK with structured JSON output enforcement. +Uses google-genai SDK with structured JSON output enforcement. """ import json @@ -117,34 +117,34 @@ Return the vehicle's make, model, trim level, body type, drive type, fuel type, """ _VIN_DECODE_SCHEMA: dict[str, Any] = { - "type": "object", + "type": "OBJECT", "properties": { - "year": {"type": "integer", "nullable": True}, - "make": {"type": "string", "nullable": True}, - "model": {"type": "string", "nullable": True}, - "trimLevel": {"type": "string", "nullable": True}, - "bodyType": {"type": "string", "nullable": True}, - "driveType": {"type": "string", "nullable": True}, - "fuelType": {"type": "string", "nullable": True}, - "engine": {"type": "string", "nullable": True}, - "transmission": {"type": "string", "nullable": True}, - "confidence": {"type": "number"}, + "year": {"type": "INTEGER", "nullable": True}, + "make": {"type": "STRING", "nullable": True}, + "model": {"type": "STRING", "nullable": True}, + "trimLevel": {"type": "STRING", "nullable": True}, + "bodyType": {"type": "STRING", "nullable": True}, + "driveType": {"type": "STRING", "nullable": True}, + "fuelType": {"type": "STRING", "nullable": True}, + "engine": {"type": "STRING", "nullable": True}, + "transmission": {"type": "STRING", "nullable": True}, + "confidence": {"type": "NUMBER"}, }, "required": ["confidence"], } _RESPONSE_SCHEMA: dict[str, Any] = { - "type": "object", + "type": "OBJECT", "properties": { "maintenanceSchedule": { - "type": "array", + "type": "ARRAY", "items": { - "type": "object", + "type": "OBJECT", "properties": { - "serviceName": {"type": "string"}, - "intervalMiles": {"type": "number", "nullable": True}, - "intervalMonths": {"type": "number", "nullable": True}, - "details": {"type": "string", "nullable": True}, + "serviceName": {"type": "STRING"}, + "intervalMiles": {"type": "NUMBER", "nullable": True}, + "intervalMonths": {"type": "NUMBER", "nullable": True}, + "details": {"type": "STRING", "nullable": True}, }, "required": ["serviceName"], }, @@ -206,20 +206,21 @@ class GeminiEngine: Standalone class (not an OcrEngine subclass) because Gemini performs semantic document understanding rather than traditional OCR. - Uses lazy initialization: the Vertex AI client is not created until + Uses lazy initialization: the Gemini client is not created until the first call to ``extract_maintenance()`` or ``decode_vin()``. """ def __init__(self) -> None: - self._model: Any | None = None + self._client: Any | None = None + self._model_name: str = "" - def _get_model(self) -> Any: - """Create the GenerativeModel on first use. + def _get_client(self) -> Any: + """Create the genai.Client on first use. Authentication uses the same WIF credential path as Google Vision. """ - if self._model is not None: - return self._model + if self._client is not None: + return self._client key_path = settings.google_vision_key_path if not os.path.isfile(key_path): @@ -229,46 +230,37 @@ class GeminiEngine: ) try: - from google.cloud import aiplatform # type: ignore[import-untyped] - from vertexai.generative_models import ( # type: ignore[import-untyped] - GenerationConfig, - GenerativeModel, - ) + from google import genai # type: ignore[import-untyped] - # Point ADC at the WIF credential config + # Point ADC at the WIF credential config (must be set BEFORE Client construction) os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1" - aiplatform.init( + self._client = genai.Client( + vertexai=True, project=settings.vertex_ai_project, location=settings.vertex_ai_location, ) - - model_name = settings.gemini_model - self._model = GenerativeModel(model_name) - self._generation_config = GenerationConfig( - response_mime_type="application/json", - response_schema=_RESPONSE_SCHEMA, - ) + self._model_name = settings.gemini_model logger.info( "Gemini engine initialized (model=%s, project=%s, location=%s)", - model_name, + self._model_name, settings.vertex_ai_project, settings.vertex_ai_location, ) - return self._model + return self._client except ImportError as exc: - logger.exception("Vertex AI SDK import failed") + logger.exception("google-genai SDK import failed") raise GeminiUnavailableError( - "google-cloud-aiplatform is not installed. " - "Install with: pip install google-cloud-aiplatform" + "google-genai is not installed. " + "Install with: pip install google-genai" ) from exc except Exception as exc: - logger.exception("Vertex AI authentication failed") + logger.exception("Gemini authentication failed: %s", type(exc).__name__) raise GeminiUnavailableError( - f"Vertex AI authentication failed: {exc}" + f"Gemini authentication failed: {exc}" ) from exc def extract_maintenance( @@ -293,19 +285,23 @@ class GeminiEngine: "inline processing. Upload to GCS and use a gs:// URI instead." ) - model = self._get_model() + client = self._get_client() try: - from vertexai.generative_models import Part # type: ignore[import-untyped] + from google.genai import types # type: ignore[import-untyped] - pdf_part = Part.from_data( + pdf_part = types.Part.from_bytes( data=pdf_bytes, mime_type="application/pdf", ) - response = model.generate_content( - [pdf_part, _EXTRACTION_PROMPT], - generation_config=self._generation_config, + response = client.models.generate_content( + model=self._model_name, + contents=[pdf_part, _EXTRACTION_PROMPT], + config=types.GenerateContentConfig( + response_mime_type="application/json", + response_schema=_RESPONSE_SCHEMA, + ), ) raw = json.loads(response.text) @@ -358,7 +354,7 @@ class GeminiEngine: GeminiProcessingError: If Gemini fails to decode the VIN. GeminiUnavailableError: If the engine cannot be initialized. """ - model = self._get_model() + client = self._get_client() # Resolve year deterministically from VIN structure resolved_year = resolve_vin_year(vin) @@ -371,21 +367,21 @@ class GeminiEngine: ) try: - from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped] - - vin_config = GenerationConfig( - response_mime_type="application/json", - response_schema=_VIN_DECODE_SCHEMA, - ) + from google.genai import types # type: ignore[import-untyped] prompt = _VIN_DECODE_PROMPT.format( vin=vin, year=resolved_year or "unknown", year_code=year_code, ) - response = model.generate_content( - [prompt], - generation_config=vin_config, + response = client.models.generate_content( + model=self._model_name, + contents=[prompt], + config=types.GenerateContentConfig( + response_mime_type="application/json", + response_schema=_VIN_DECODE_SCHEMA, + tools=[types.Tool(google_search=types.GoogleSearch())], + ), ) raw = json.loads(response.text) -- 2.49.1 From 9f51e62b94190e161869b39f59f60fc1af9a6a95 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 28 Feb 2026 11:17:14 -0600 Subject: [PATCH 3/8] feat: migrate MaintenanceReceiptExtractor to google-genai SDK (refs #234) Replace vertexai.generative_models with google.genai client pattern. Fix pre-existing bug: raise GeminiUnavailableError instead of bare RuntimeError for missing credentials. Add proper try/except blocks matching GeminiEngine error handling pattern. Co-Authored-By: Claude Opus 4.6 --- .../maintenance_receipt_extractor.py | 96 +++++++++++-------- 1 file changed, 54 insertions(+), 42 deletions(-) diff --git a/ocr/app/extractors/maintenance_receipt_extractor.py b/ocr/app/extractors/maintenance_receipt_extractor.py index d5b4d13..194c484 100644 --- a/ocr/app/extractors/maintenance_receipt_extractor.py +++ b/ocr/app/extractors/maintenance_receipt_extractor.py @@ -14,6 +14,7 @@ import time from typing import Any, Optional from app.config import settings +from app.engines.gemini_engine import GeminiUnavailableError from app.extractors.receipt_extractor import ( ExtractedField, ReceiptExtractionResult, @@ -54,16 +55,16 @@ OCR Text: """ _RECEIPT_RESPONSE_SCHEMA: dict[str, Any] = { - "type": "object", + "type": "OBJECT", "properties": { - "serviceName": {"type": "string", "nullable": True}, - "serviceDate": {"type": "string", "nullable": True}, - "totalCost": {"type": "number", "nullable": True}, - "shopName": {"type": "string", "nullable": True}, - "laborCost": {"type": "number", "nullable": True}, - "partsCost": {"type": "number", "nullable": True}, - "odometerReading": {"type": "number", "nullable": True}, - "vehicleInfo": {"type": "string", "nullable": True}, + "serviceName": {"type": "STRING", "nullable": True}, + "serviceDate": {"type": "STRING", "nullable": True}, + "totalCost": {"type": "NUMBER", "nullable": True}, + "shopName": {"type": "STRING", "nullable": True}, + "laborCost": {"type": "NUMBER", "nullable": True}, + "partsCost": {"type": "NUMBER", "nullable": True}, + "odometerReading": {"type": "NUMBER", "nullable": True}, + "vehicleInfo": {"type": "STRING", "nullable": True}, }, "required": [ "serviceName", @@ -87,8 +88,8 @@ class MaintenanceReceiptExtractor: """ def __init__(self) -> None: - self._model: Any | None = None - self._generation_config: Any | None = None + self._client: Any | None = None + self._model_name: str = "" def extract( self, @@ -169,47 +170,52 @@ class MaintenanceReceiptExtractor: processing_time_ms=processing_time_ms, ) - def _get_model(self) -> Any: - """Lazy-initialize Vertex AI Gemini model. + def _get_client(self) -> Any: + """Lazy-initialize google-genai Gemini client. Uses the same authentication pattern as GeminiEngine. """ - if self._model is not None: - return self._model + if self._client is not None: + return self._client key_path = settings.google_vision_key_path if not os.path.isfile(key_path): - raise RuntimeError( + raise GeminiUnavailableError( f"Google credential config not found at {key_path}. " "Set GOOGLE_VISION_KEY_PATH or mount the secret." ) - from google.cloud import aiplatform # type: ignore[import-untyped] - from vertexai.generative_models import ( # type: ignore[import-untyped] - GenerationConfig, - GenerativeModel, - ) + try: + from google import genai # type: ignore[import-untyped] - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path - os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1" + # Point ADC at the WIF credential config (must be set BEFORE Client construction) + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path + os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1" - aiplatform.init( - project=settings.vertex_ai_project, - location=settings.vertex_ai_location, - ) + self._client = genai.Client( + vertexai=True, + project=settings.vertex_ai_project, + location=settings.vertex_ai_location, + ) + self._model_name = settings.gemini_model - model_name = settings.gemini_model - self._model = GenerativeModel(model_name) - self._generation_config = GenerationConfig( - response_mime_type="application/json", - response_schema=_RECEIPT_RESPONSE_SCHEMA, - ) + logger.info( + "Maintenance receipt Gemini client initialized (model=%s)", + self._model_name, + ) + return self._client - logger.info( - "Maintenance receipt Gemini model initialized (model=%s)", - model_name, - ) - return self._model + except ImportError as exc: + logger.exception("google-genai SDK import failed") + raise GeminiUnavailableError( + "google-genai is not installed. " + "Install with: pip install google-genai" + ) from exc + except Exception as exc: + logger.exception("Gemini authentication failed: %s", type(exc).__name__) + raise GeminiUnavailableError( + f"Gemini authentication failed: {exc}" + ) from exc def _extract_with_gemini(self, ocr_text: str) -> dict: """Send OCR text to Gemini for semantic field extraction. @@ -220,13 +226,19 @@ class MaintenanceReceiptExtractor: Returns: Dictionary of field_name -> extracted_value from Gemini. """ - model = self._get_model() + client = self._get_client() + + from google.genai import types # type: ignore[import-untyped] prompt = _RECEIPT_EXTRACTION_PROMPT.format(ocr_text=ocr_text) - response = model.generate_content( - [prompt], - generation_config=self._generation_config, + response = client.models.generate_content( + model=self._model_name, + contents=[prompt], + config=types.GenerateContentConfig( + response_mime_type="application/json", + response_schema=_RECEIPT_RESPONSE_SCHEMA, + ), ) raw = json.loads(response.text) -- 2.49.1 From 1464a0e1af2d4481c75f6c284c03d1bf747d01ea Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 28 Feb 2026 11:21:10 -0600 Subject: [PATCH 4/8] feat: update test mocks for google-genai SDK (refs #235) Replace engine._model/engine._generation_config mocks with engine._client/engine._model_name. Update sys.modules patches from vertexai to google.genai. Remove dead if-False branch. Co-Authored-By: Claude Opus 4.6 --- ocr/tests/test_gemini_engine.py | 85 ++++++++++++++++----------------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/ocr/tests/test_gemini_engine.py b/ocr/tests/test_gemini_engine.py index bf709e4..3674b4a 100644 --- a/ocr/tests/test_gemini_engine.py +++ b/ocr/tests/test_gemini_engine.py @@ -2,11 +2,11 @@ Covers: GeminiEngine initialization, PDF size validation, successful extraction, empty results, and error handling. -All Vertex AI SDK calls are mocked. +All google-genai SDK calls are mocked. """ import json -from unittest.mock import MagicMock, patch, PropertyMock +from unittest.mock import MagicMock, patch import pytest @@ -156,22 +156,16 @@ class TestExtractMaintenance: }, ] - mock_model = MagicMock() - mock_model.generate_content.return_value = _make_gemini_response(schedule) + mock_client = MagicMock() + mock_client.models.generate_content.return_value = _make_gemini_response(schedule) - with ( - patch( - "app.engines.gemini_engine.importlib_vertex_ai" - ) if False else patch.dict("sys.modules", { - "google.cloud": MagicMock(), - "google.cloud.aiplatform": MagicMock(), - "vertexai": MagicMock(), - "vertexai.generative_models": MagicMock(), - }), - ): + with patch.dict("sys.modules", { + "google.genai": MagicMock(), + "google.genai.types": MagicMock(), + }): engine = GeminiEngine() - engine._model = mock_model - engine._generation_config = MagicMock() + engine._client = mock_client + engine._model_name = "gemini-2.5-flash" result = engine.extract_maintenance(_make_pdf_bytes()) @@ -200,12 +194,12 @@ class TestExtractMaintenance: mock_settings.vertex_ai_location = "us-central1" mock_settings.gemini_model = "gemini-2.5-flash" - mock_model = MagicMock() - mock_model.generate_content.return_value = _make_gemini_response([]) + mock_client = MagicMock() + mock_client.models.generate_content.return_value = _make_gemini_response([]) engine = GeminiEngine() - engine._model = mock_model - engine._generation_config = MagicMock() + engine._client = mock_client + engine._model_name = "gemini-2.5-flash" result = engine.extract_maintenance(_make_pdf_bytes()) @@ -223,12 +217,12 @@ class TestExtractMaintenance: schedule = [{"serviceName": "Brake Fluid Replacement"}] - mock_model = MagicMock() - mock_model.generate_content.return_value = _make_gemini_response(schedule) + mock_client = MagicMock() + mock_client.models.generate_content.return_value = _make_gemini_response(schedule) engine = GeminiEngine() - engine._model = mock_model - engine._generation_config = MagicMock() + engine._client = mock_client + engine._model_name = "gemini-2.5-flash" result = engine.extract_maintenance(_make_pdf_bytes()) @@ -264,7 +258,8 @@ class TestErrorHandling: with ( patch("app.engines.gemini_engine.settings") as mock_settings, patch.dict("sys.modules", { - "google.cloud.aiplatform": None, + "google": None, + "google.genai": None, }), ): mock_settings.google_vision_key_path = "/fake/creds.json" @@ -283,12 +278,12 @@ class TestErrorHandling: mock_settings.vertex_ai_location = "us-central1" mock_settings.gemini_model = "gemini-2.5-flash" - mock_model = MagicMock() - mock_model.generate_content.side_effect = RuntimeError("API quota exceeded") + mock_client = MagicMock() + mock_client.models.generate_content.side_effect = RuntimeError("API quota exceeded") engine = GeminiEngine() - engine._model = mock_model - engine._generation_config = MagicMock() + engine._client = mock_client + engine._model_name = "gemini-2.5-flash" with pytest.raises(GeminiProcessingError, match="maintenance extraction failed"): engine.extract_maintenance(_make_pdf_bytes()) @@ -307,12 +302,12 @@ class TestErrorHandling: mock_response = MagicMock() mock_response.text = "not valid json {{" - mock_model = MagicMock() - mock_model.generate_content.return_value = mock_response + mock_client = MagicMock() + mock_client.models.generate_content.return_value = mock_response engine = GeminiEngine() - engine._model = mock_model - engine._generation_config = MagicMock() + engine._client = mock_client + engine._model_name = "gemini-2.5-flash" with pytest.raises(GeminiProcessingError, match="invalid JSON"): engine.extract_maintenance(_make_pdf_bytes()) @@ -322,32 +317,32 @@ class TestErrorHandling: class TestLazyInitialization: - """Verify the model is not created until first use.""" + """Verify the client is not created until first use.""" - def test_model_is_none_after_construction(self): - """GeminiEngine should not initialize the model in __init__.""" + def test_client_is_none_after_construction(self): + """GeminiEngine should not initialize the client in __init__.""" engine = GeminiEngine() - assert engine._model is None + assert engine._client is None @patch("app.engines.gemini_engine.settings") @patch("app.engines.gemini_engine.os.path.isfile", return_value=True) - def test_model_reused_on_second_call(self, mock_isfile, mock_settings): - """Once initialized, the same model instance is reused.""" + def test_client_reused_on_second_call(self, mock_isfile, mock_settings): + """Once initialized, the same client instance is reused.""" mock_settings.google_vision_key_path = "/fake/creds.json" mock_settings.vertex_ai_project = "test-project" mock_settings.vertex_ai_location = "us-central1" mock_settings.gemini_model = "gemini-2.5-flash" schedule = [{"serviceName": "Oil Change", "intervalMiles": 5000}] - mock_model = MagicMock() - mock_model.generate_content.return_value = _make_gemini_response(schedule) + mock_client = MagicMock() + mock_client.models.generate_content.return_value = _make_gemini_response(schedule) engine = GeminiEngine() - engine._model = mock_model - engine._generation_config = MagicMock() + engine._client = mock_client + engine._model_name = "gemini-2.5-flash" engine.extract_maintenance(_make_pdf_bytes()) engine.extract_maintenance(_make_pdf_bytes()) - # Model's generate_content should have been called twice - assert mock_model.generate_content.call_count == 2 + # Client's generate_content should have been called twice + assert mock_client.models.generate_content.call_count == 2 -- 2.49.1 From 96e1dde7b2e705082cea49f94e5b6ee83e0074d0 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 28 Feb 2026 11:21:58 -0600 Subject: [PATCH 5/8] docs: update CLAUDE.md references from Vertex AI to google-genai (refs #231) Co-Authored-By: Claude Opus 4.6 --- ocr/app/CLAUDE.md | 2 +- ocr/app/engines/CLAUDE.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ocr/app/CLAUDE.md b/ocr/app/CLAUDE.md index bc0dfcc..f29864c 100644 --- a/ocr/app/CLAUDE.md +++ b/ocr/app/CLAUDE.md @@ -7,7 +7,7 @@ Python OCR microservice (FastAPI). Primary engine: PaddleOCR PP-OCRv4 with optio | File | What | When to read | | ---- | ---- | ------------ | | `main.py` | FastAPI application entry point | Route registration, app setup | -| `config.py` | Configuration settings (OCR engines, Vertex AI, Redis, Vision API limits) | Environment variables, settings | +| `config.py` | Configuration settings (OCR engines, Google GenAI, Redis, Vision API limits) | Environment variables, settings | | `__init__.py` | Package init | Package structure | ## Subdirectories diff --git a/ocr/app/engines/CLAUDE.md b/ocr/app/engines/CLAUDE.md index 68a4e82..8941522 100644 --- a/ocr/app/engines/CLAUDE.md +++ b/ocr/app/engines/CLAUDE.md @@ -3,7 +3,7 @@ OCR engine abstraction layer. Two categories of engines: 1. **OcrEngine subclasses** (image-to-text): PaddleOCR, Google Vision, Hybrid. Accept image bytes, return text + confidence + word boxes. -2. **GeminiEngine** (PDF-to-structured-data and VIN decode): Standalone module for maintenance schedule extraction and VIN decoding via Vertex AI. Accepts PDF bytes or VIN strings, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ. +2. **GeminiEngine** (PDF-to-structured-data and VIN decode): Standalone module for maintenance schedule extraction and VIN decoding via google-genai SDK. Accepts PDF bytes or VIN strings, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ. ## Files @@ -15,7 +15,7 @@ OCR engine abstraction layer. Two categories of engines: | `cloud_engine.py` | Google Vision TEXT_DETECTION fallback engine (WIF authentication) | Cloud OCR configuration, API quota | | `hybrid_engine.py` | Combines primary + fallback engine with confidence threshold switching | Engine selection logic, fallback behavior | | `engine_factory.py` | Factory function and engine registry for instantiation | Adding new engine types | -| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction and VIN decoding (Vertex AI SDK, 20MB PDF limit, structured JSON output) | Manual extraction debugging, VIN decode, Gemini configuration | +| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction and VIN decoding (google-genai SDK, 20MB PDF limit, structured JSON output, Google Search grounding for VIN decode) | Manual extraction debugging, VIN decode, Gemini configuration | ## Engine Selection -- 2.49.1 From 936753fac22161c95e51ea8f67b0118b07ec259e Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 28 Feb 2026 12:02:26 -0600 Subject: [PATCH 6/8] fix: VIN Decoding timeouts and logic errors --- .../src/features/vehicles/api/vehicles.api.ts | 2 +- ocr/app/engines/gemini_engine.py | 20 ++-- ocr/tests/test_resolve_vin_year.py | 94 +++++++++---------- 3 files changed, 60 insertions(+), 56 deletions(-) diff --git a/frontend/src/features/vehicles/api/vehicles.api.ts b/frontend/src/features/vehicles/api/vehicles.api.ts index 49ab6dd..3004455 100644 --- a/frontend/src/features/vehicles/api/vehicles.api.ts +++ b/frontend/src/features/vehicles/api/vehicles.api.ts @@ -87,7 +87,7 @@ export const vehiclesApi = { */ decodeVin: async (vin: string): Promise => { const response = await apiClient.post('/vehicles/decode-vin', { vin }, { - timeout: 60000 // 60 seconds for Gemini cold start + timeout: 120000 // 120 seconds for Gemini + Google Search grounding }); return response.data; } diff --git a/ocr/app/engines/gemini_engine.py b/ocr/app/engines/gemini_engine.py index e6f4cd5..8e9d36b 100644 --- a/ocr/app/engines/gemini_engine.py +++ b/ocr/app/engines/gemini_engine.py @@ -40,8 +40,9 @@ Return the results as a JSON object with a single "maintenanceSchedule" array.\ # VIN year code lookup: position 10 character -> base year (first cycle, 1980-2009). # The 30-year cycle repeats: +30 for 2010-2039, +60 for 2040-2069. -# Disambiguation uses position 7: numeric -> 2010+ cycle, alphabetic -> 1980s cycle. -# For the 2040+ cycle (when position 7 is alphabetic again), we pick the most +# Disambiguation uses position 7: alphabetic -> 2010+ cycle, numeric -> 1980s cycle. +# Per NHTSA FMVSS No. 115: MY2010+ vehicles must use alphabetic position 7. +# For the 2040+ cycle (when position 7 is numeric again), we pick the most # recent plausible year (not more than 2 years in the future). _VIN_YEAR_CODES: dict[str, int] = { "A": 1980, "B": 1981, "C": 1982, "D": 1983, "E": 1984, @@ -58,10 +59,10 @@ def resolve_vin_year(vin: str) -> int | None: """Deterministically resolve model year from VIN positions 7 and 10. VIN year codes repeat on a 30-year cycle. Position 7 disambiguates: - - Numeric position 7 -> 2010-2039 cycle - - Alphabetic position 7 -> 1980-2009 or 2040-2050+ cycle + - Alphabetic position 7 -> 2010-2039 cycle (NHTSA MY2010+ requirement) + - Numeric position 7 -> 1980-2009 or 2040-2069 cycle - For the alphabetic case with three possible cycles, picks the most recent + For the numeric case with two possible cycles, picks the most recent year that is not more than 2 years in the future. Returns None if the VIN is too short or position 10 is not a valid year code. @@ -76,11 +77,11 @@ def resolve_vin_year(vin: str) -> int | None: if base_year is None: return None - if pos7.isdigit(): - # Numeric position 7 -> second cycle (2010-2039) + if pos7.isalpha(): + # Alphabetic position 7 -> second cycle (2010-2039) return base_year + 30 - # Alphabetic position 7 -> first cycle (1980-2009) or third cycle (2040-2069) + # Numeric position 7 -> first cycle (1980-2009) or third cycle (2040-2069) # Pick the most recent plausible year max_plausible = datetime.now().year + 2 @@ -381,6 +382,9 @@ class GeminiEngine: response_mime_type="application/json", response_schema=_VIN_DECODE_SCHEMA, tools=[types.Tool(google_search=types.GoogleSearch())], + automatic_function_calling=types.AutomaticFunctionCallingConfig( + max_remote_calls=3, + ), ), ) diff --git a/ocr/tests/test_resolve_vin_year.py b/ocr/tests/test_resolve_vin_year.py index 54ea5f9..fcb63a6 100644 --- a/ocr/tests/test_resolve_vin_year.py +++ b/ocr/tests/test_resolve_vin_year.py @@ -12,86 +12,86 @@ from app.engines.gemini_engine import resolve_vin_year class TestSecondCycle: - """Position 7 numeric -> 2010-2039 cycle.""" + """Position 7 alphabetic -> 2010-2039 cycle (NHTSA MY2010+ requirement).""" - def test_p_with_numeric_pos7_returns_2023(self): - """P=2023 when position 7 is numeric (the bug that triggered this fix).""" - # VIN: 1G1YE2D32P5602473 -- pos7='2' (numeric), pos10='P' + def test_p_with_alpha_pos7_returns_2023(self): + """P=2023 when position 7 is alphabetic (the bug that triggered this fix).""" + # VIN: 1G1YE2D32P5602473 -- pos7='D' (alphabetic), pos10='P' assert resolve_vin_year("1G1YE2D32P5602473") == 2023 - def test_a_with_numeric_pos7_returns_2010(self): - """A=2010 when position 7 is numeric.""" - assert resolve_vin_year("1G1YE2112A5602473") == 2010 + def test_a_with_alpha_pos7_returns_2010(self): + """A=2010 when position 7 is alphabetic.""" + assert resolve_vin_year("1G1YE2D12A5602473") == 2010 - def test_l_with_numeric_pos7_returns_2020(self): - """L=2020 when position 7 is numeric.""" - assert resolve_vin_year("1G1YE2112L5602473") == 2020 + def test_l_with_alpha_pos7_returns_2020(self): + """L=2020 when position 7 is alphabetic.""" + assert resolve_vin_year("1G1YE2D12L5602473") == 2020 - def test_9_with_numeric_pos7_returns_2039(self): - """9=2039 when position 7 is numeric.""" - assert resolve_vin_year("1G1YE211295602473") == 2039 + def test_9_with_alpha_pos7_returns_2039(self): + """9=2039 when position 7 is alphabetic.""" + assert resolve_vin_year("1G1YE2D1295602473") == 2039 - def test_digit_1_with_numeric_pos7_returns_2031(self): - """1=2031 when position 7 is numeric.""" - assert resolve_vin_year("1G1YE211215602473") == 2031 + def test_digit_1_with_alpha_pos7_returns_2031(self): + """1=2031 when position 7 is alphabetic.""" + assert resolve_vin_year("1G1YE2D1215602473") == 2031 - def test_s_with_numeric_pos7_returns_2025(self): - """S=2025 when position 7 is numeric.""" - assert resolve_vin_year("1G1YE2112S5602473") == 2025 + def test_s_with_alpha_pos7_returns_2025(self): + """S=2025 when position 7 is alphabetic.""" + assert resolve_vin_year("1G1YE2D12S5602473") == 2025 - def test_t_with_numeric_pos7_returns_2026(self): - """T=2026 when position 7 is numeric.""" - assert resolve_vin_year("1G1YE2112T5602473") == 2026 + def test_t_with_alpha_pos7_returns_2026(self): + """T=2026 when position 7 is alphabetic.""" + assert resolve_vin_year("1G1YE2D12T5602473") == 2026 class TestFirstCycle: - """Position 7 alphabetic -> 1980-2009 cycle (when 2040+ is not yet plausible).""" + """Position 7 numeric -> 1980-2009 cycle.""" - def test_m_with_alpha_pos7_returns_1991(self): - """M=1991 when position 7 is alphabetic (third cycle 2051 is not plausible).""" - assert resolve_vin_year("1G1YE2J32M5602473") == 1991 + def test_m_with_numeric_pos7_returns_1991(self): + """M=1991 when position 7 is numeric.""" + assert resolve_vin_year("1G1YE2132M5602473") == 1991 - def test_n_with_alpha_pos7_returns_1992(self): - """N=1992 when position 7 is alphabetic.""" - assert resolve_vin_year("1G1YE2J32N5602473") == 1992 + def test_n_with_numeric_pos7_returns_1992(self): + """N=1992 when position 7 is numeric.""" + assert resolve_vin_year("1G1YE2132N5602473") == 1992 - def test_p_with_alpha_pos7_returns_1993(self): - """P=1993 when position 7 is alphabetic (third cycle 2053 not plausible).""" - assert resolve_vin_year("1G1YE2J32P5602473") == 1993 + def test_p_with_numeric_pos7_returns_1993(self): + """P=1993 when position 7 is numeric.""" + assert resolve_vin_year("1G1YE2132P5602473") == 1993 - def test_y_with_alpha_pos7_returns_2000(self): - """Y=2000 when position 7 is alphabetic.""" - assert resolve_vin_year("1G1YE2J32Y5602473") == 2000 + def test_y_with_numeric_pos7_returns_2000(self): + """Y=2000 when position 7 is numeric.""" + assert resolve_vin_year("1G1YE2132Y5602473") == 2000 class TestThirdCycle: - """Position 7 alphabetic + third cycle year (2040-2050) is plausible.""" + """Position 7 numeric + third cycle year (2040-2050) is plausible.""" @patch("app.engines.gemini_engine.datetime") - def test_a_with_alpha_pos7_returns_2040_when_plausible(self, mock_dt): - """A=2040 when position 7 is alphabetic and year 2040 is plausible.""" + def test_a_with_numeric_pos7_returns_2040_when_plausible(self, mock_dt): + """A=2040 when position 7 is numeric and year 2040 is plausible.""" mock_dt.now.return_value = datetime(2039, 1, 1) # 2039 + 2 = 2041 >= 2040, so third cycle is plausible - assert resolve_vin_year("1G1YE2J32A5602473") == 2040 + assert resolve_vin_year("1G1YE2132A5602473") == 2040 @patch("app.engines.gemini_engine.datetime") - def test_l_with_alpha_pos7_returns_2050_when_plausible(self, mock_dt): - """L=2050 when position 7 is alphabetic and year 2050 is plausible.""" + def test_l_with_numeric_pos7_returns_2050_when_plausible(self, mock_dt): + """L=2050 when position 7 is numeric and year 2050 is plausible.""" mock_dt.now.return_value = datetime(2049, 6, 1) - assert resolve_vin_year("1G1YE2J32L5602473") == 2050 + assert resolve_vin_year("1G1YE2132L5602473") == 2050 @patch("app.engines.gemini_engine.datetime") - def test_a_with_alpha_pos7_returns_1980_when_2040_not_plausible(self, mock_dt): + def test_a_with_numeric_pos7_returns_1980_when_2040_not_plausible(self, mock_dt): """A=1980 when third cycle year (2040) exceeds max plausible.""" mock_dt.now.return_value = datetime(2026, 2, 20) # 2026 + 2 = 2028 < 2040, so third cycle not plausible -> first cycle - assert resolve_vin_year("1G1YE2J32A5602473") == 1980 + assert resolve_vin_year("1G1YE2132A5602473") == 1980 @patch("app.engines.gemini_engine.datetime") - def test_k_with_alpha_pos7_returns_2049_when_plausible(self, mock_dt): - """K=2049 when position 7 is alphabetic and year is plausible.""" + def test_k_with_numeric_pos7_returns_2049_when_plausible(self, mock_dt): + """K=2049 when position 7 is numeric and year is plausible.""" mock_dt.now.return_value = datetime(2048, 1, 1) - assert resolve_vin_year("1G1YE2J32K5602473") == 2049 + assert resolve_vin_year("1G1YE2132K5602473") == 2049 class TestEdgeCases: -- 2.49.1 From 1add6c8240685069ccc4c06bf50aa2321cccafc1 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 28 Feb 2026 12:59:04 -0600 Subject: [PATCH 7/8] fix: remove unsupported AutomaticFunctionCallingConfig parameter (refs #231) The installed google-genai version does not support max_remote_calls on AutomaticFunctionCallingConfig, causing a pydantic validation error that broke VIN decode on staging. Co-Authored-By: Claude Opus 4.6 --- ocr/app/engines/gemini_engine.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ocr/app/engines/gemini_engine.py b/ocr/app/engines/gemini_engine.py index 8e9d36b..1087736 100644 --- a/ocr/app/engines/gemini_engine.py +++ b/ocr/app/engines/gemini_engine.py @@ -382,9 +382,6 @@ class GeminiEngine: response_mime_type="application/json", response_schema=_VIN_DECODE_SCHEMA, tools=[types.Tool(google_search=types.GoogleSearch())], - automatic_function_calling=types.AutomaticFunctionCallingConfig( - max_remote_calls=3, - ), ), ) -- 2.49.1 From 56df5d48f3b82abc9930d58ab98b69112dc90561 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 28 Feb 2026 21:16:56 -0600 Subject: [PATCH 8/8] fix: revert unsupported AFC config and add diagnostic logging for VIN decode (refs #231) - Remove AutomaticFunctionCallingConfig(max_remote_calls=3) which caused pydantic validation error on the installed google-genai version - Log full Gemini raw JSON response in OCR engine for debugging - Add engine/transmission to backend raw values log - Add hasTrim/hasEngine/hasTransmission to decode success log Co-Authored-By: Claude Opus 4.6 --- backend/src/features/vehicles/api/vehicles.controller.ts | 5 ++++- backend/src/features/vehicles/domain/vehicles.service.ts | 3 ++- ocr/app/engines/gemini_engine.py | 7 ++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/backend/src/features/vehicles/api/vehicles.controller.ts b/backend/src/features/vehicles/api/vehicles.controller.ts index a9e7f33..103fc72 100644 --- a/backend/src/features/vehicles/api/vehicles.controller.ts +++ b/backend/src/features/vehicles/api/vehicles.controller.ts @@ -416,7 +416,10 @@ export class VehiclesController { userId, hasYear: !!decodedData.year.value, hasMake: !!decodedData.make.value, - hasModel: !!decodedData.model.value + hasModel: !!decodedData.model.value, + hasTrim: !!decodedData.trimLevel.value, + hasEngine: !!decodedData.engine.value, + hasTransmission: !!decodedData.transmission.value, }); return reply.code(200).send(decodedData); diff --git a/backend/src/features/vehicles/domain/vehicles.service.ts b/backend/src/features/vehicles/domain/vehicles.service.ts index f9f78c7..56a3e21 100644 --- a/backend/src/features/vehicles/domain/vehicles.service.ts +++ b/backend/src/features/vehicles/domain/vehicles.service.ts @@ -679,7 +679,8 @@ export class VehiclesService { logger.debug('VIN decode raw values', { vin: response.vin, year: sourceYear, make: sourceMake, model: sourceModel, - trim: sourceTrim, confidence: response.confidence + trim: sourceTrim, engine: sourceEngine, transmission: sourceTransmission, + confidence: response.confidence }); // Year is always high confidence if present (exact numeric match) diff --git a/ocr/app/engines/gemini_engine.py b/ocr/app/engines/gemini_engine.py index 1087736..4bc92cf 100644 --- a/ocr/app/engines/gemini_engine.py +++ b/ocr/app/engines/gemini_engine.py @@ -398,7 +398,12 @@ class GeminiEngine: vin, ) - logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0)) + logger.info( + "Gemini decoded VIN %s (confidence=%.2f) raw=%s", + vin, + raw.get("confidence", 0), + json.dumps(raw, default=str), + ) return VinDecodeResult( year=resolved_year if resolved_year else raw.get("year"), -- 2.49.1