Compare commits

...

4 Commits

Author SHA1 Message Date
Eric Gullickson
96e1dde7b2 docs: update CLAUDE.md references from Vertex AI to google-genai (refs #231)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 8m4s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 24s
Deploy to Staging / Verify Staging (pull_request) Successful in 9s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 9s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 11:21:58 -06:00
Eric Gullickson
1464a0e1af feat: update test mocks for google-genai SDK (refs #235)
Replace engine._model/engine._generation_config mocks with
engine._client/engine._model_name. Update sys.modules patches
from vertexai to google.genai. Remove dead if-False branch.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 11:21:10 -06:00
Eric Gullickson
9f51e62b94 feat: migrate MaintenanceReceiptExtractor to google-genai SDK (refs #234)
Replace vertexai.generative_models with google.genai client pattern.
Fix pre-existing bug: raise GeminiUnavailableError instead of bare
RuntimeError for missing credentials. Add proper try/except blocks
matching GeminiEngine error handling pattern.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 11:17:14 -06:00
Eric Gullickson
b7f472b3e8 feat: migrate GeminiEngine to google-genai SDK with Google Search grounding (refs #233)
Replace vertexai.generative_models with google.genai client pattern.
Add Google Search grounding tool to VIN decode for improved accuracy.
Convert response schema types to uppercase per Vertex AI Schema spec.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 11:16:18 -06:00
6 changed files with 156 additions and 153 deletions

View File

@@ -7,7 +7,7 @@ Python OCR microservice (FastAPI). Primary engine: PaddleOCR PP-OCRv4 with optio
| File | What | When to read |
| ---- | ---- | ------------ |
| `main.py` | FastAPI application entry point | Route registration, app setup |
| `config.py` | Configuration settings (OCR engines, Vertex AI, Redis, Vision API limits) | Environment variables, settings |
| `config.py` | Configuration settings (OCR engines, Google GenAI, Redis, Vision API limits) | Environment variables, settings |
| `__init__.py` | Package init | Package structure |
## Subdirectories

View File

@@ -29,7 +29,7 @@ class Settings:
os.getenv("VISION_MONTHLY_LIMIT", "1000")
)
# Vertex AI / Gemini configuration
# Google GenAI / Gemini configuration
self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "")
self.vertex_ai_location: str = os.getenv(
"VERTEX_AI_LOCATION", "global"

View File

@@ -3,7 +3,7 @@
OCR engine abstraction layer. Two categories of engines:
1. **OcrEngine subclasses** (image-to-text): PaddleOCR, Google Vision, Hybrid. Accept image bytes, return text + confidence + word boxes.
2. **GeminiEngine** (PDF-to-structured-data and VIN decode): Standalone module for maintenance schedule extraction and VIN decoding via Vertex AI. Accepts PDF bytes or VIN strings, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ.
2. **GeminiEngine** (PDF-to-structured-data and VIN decode): Standalone module for maintenance schedule extraction and VIN decoding via google-genai SDK. Accepts PDF bytes or VIN strings, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ.
## Files
@@ -15,7 +15,7 @@ OCR engine abstraction layer. Two categories of engines:
| `cloud_engine.py` | Google Vision TEXT_DETECTION fallback engine (WIF authentication) | Cloud OCR configuration, API quota |
| `hybrid_engine.py` | Combines primary + fallback engine with confidence threshold switching | Engine selection logic, fallback behavior |
| `engine_factory.py` | Factory function and engine registry for instantiation | Adding new engine types |
| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction and VIN decoding (Vertex AI SDK, 20MB PDF limit, structured JSON output) | Manual extraction debugging, VIN decode, Gemini configuration |
| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction and VIN decoding (google-genai SDK, 20MB PDF limit, structured JSON output, Google Search grounding for VIN decode) | Manual extraction debugging, VIN decode, Gemini configuration |
## Engine Selection

View File

@@ -2,7 +2,7 @@
Standalone module (does NOT extend OcrEngine) because Gemini performs
semantic document understanding, not traditional OCR word-box extraction.
Uses Vertex AI SDK with structured JSON output enforcement.
Uses google-genai SDK with structured JSON output enforcement.
"""
import json
@@ -117,34 +117,34 @@ Return the vehicle's make, model, trim level, body type, drive type, fuel type,
"""
_VIN_DECODE_SCHEMA: dict[str, Any] = {
"type": "object",
"type": "OBJECT",
"properties": {
"year": {"type": "integer", "nullable": True},
"make": {"type": "string", "nullable": True},
"model": {"type": "string", "nullable": True},
"trimLevel": {"type": "string", "nullable": True},
"bodyType": {"type": "string", "nullable": True},
"driveType": {"type": "string", "nullable": True},
"fuelType": {"type": "string", "nullable": True},
"engine": {"type": "string", "nullable": True},
"transmission": {"type": "string", "nullable": True},
"confidence": {"type": "number"},
"year": {"type": "INTEGER", "nullable": True},
"make": {"type": "STRING", "nullable": True},
"model": {"type": "STRING", "nullable": True},
"trimLevel": {"type": "STRING", "nullable": True},
"bodyType": {"type": "STRING", "nullable": True},
"driveType": {"type": "STRING", "nullable": True},
"fuelType": {"type": "STRING", "nullable": True},
"engine": {"type": "STRING", "nullable": True},
"transmission": {"type": "STRING", "nullable": True},
"confidence": {"type": "NUMBER"},
},
"required": ["confidence"],
}
_RESPONSE_SCHEMA: dict[str, Any] = {
"type": "object",
"type": "OBJECT",
"properties": {
"maintenanceSchedule": {
"type": "array",
"type": "ARRAY",
"items": {
"type": "object",
"type": "OBJECT",
"properties": {
"serviceName": {"type": "string"},
"intervalMiles": {"type": "number", "nullable": True},
"intervalMonths": {"type": "number", "nullable": True},
"details": {"type": "string", "nullable": True},
"serviceName": {"type": "STRING"},
"intervalMiles": {"type": "NUMBER", "nullable": True},
"intervalMonths": {"type": "NUMBER", "nullable": True},
"details": {"type": "STRING", "nullable": True},
},
"required": ["serviceName"],
},
@@ -206,20 +206,21 @@ class GeminiEngine:
Standalone class (not an OcrEngine subclass) because Gemini performs
semantic document understanding rather than traditional OCR.
Uses lazy initialization: the Vertex AI client is not created until
Uses lazy initialization: the Gemini client is not created until
the first call to ``extract_maintenance()`` or ``decode_vin()``.
"""
def __init__(self) -> None:
self._model: Any | None = None
self._client: Any | None = None
self._model_name: str = ""
def _get_model(self) -> Any:
"""Create the GenerativeModel on first use.
def _get_client(self) -> Any:
"""Create the genai.Client on first use.
Authentication uses the same WIF credential path as Google Vision.
"""
if self._model is not None:
return self._model
if self._client is not None:
return self._client
key_path = settings.google_vision_key_path
if not os.path.isfile(key_path):
@@ -229,46 +230,37 @@ class GeminiEngine:
)
try:
from google.cloud import aiplatform # type: ignore[import-untyped]
from vertexai.generative_models import ( # type: ignore[import-untyped]
GenerationConfig,
GenerativeModel,
)
from google import genai # type: ignore[import-untyped]
# Point ADC at the WIF credential config
# Point ADC at the WIF credential config (must be set BEFORE Client construction)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
aiplatform.init(
self._client = genai.Client(
vertexai=True,
project=settings.vertex_ai_project,
location=settings.vertex_ai_location,
)
model_name = settings.gemini_model
self._model = GenerativeModel(model_name)
self._generation_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_RESPONSE_SCHEMA,
)
self._model_name = settings.gemini_model
logger.info(
"Gemini engine initialized (model=%s, project=%s, location=%s)",
model_name,
self._model_name,
settings.vertex_ai_project,
settings.vertex_ai_location,
)
return self._model
return self._client
except ImportError as exc:
logger.exception("Vertex AI SDK import failed")
logger.exception("google-genai SDK import failed")
raise GeminiUnavailableError(
"google-cloud-aiplatform is not installed. "
"Install with: pip install google-cloud-aiplatform"
"google-genai is not installed. "
"Install with: pip install google-genai"
) from exc
except Exception as exc:
logger.exception("Vertex AI authentication failed")
logger.exception("Gemini authentication failed: %s", type(exc).__name__)
raise GeminiUnavailableError(
f"Vertex AI authentication failed: {exc}"
f"Gemini authentication failed: {exc}"
) from exc
def extract_maintenance(
@@ -293,19 +285,23 @@ class GeminiEngine:
"inline processing. Upload to GCS and use a gs:// URI instead."
)
model = self._get_model()
client = self._get_client()
try:
from vertexai.generative_models import Part # type: ignore[import-untyped]
from google.genai import types # type: ignore[import-untyped]
pdf_part = Part.from_data(
pdf_part = types.Part.from_bytes(
data=pdf_bytes,
mime_type="application/pdf",
)
response = model.generate_content(
[pdf_part, _EXTRACTION_PROMPT],
generation_config=self._generation_config,
response = client.models.generate_content(
model=self._model_name,
contents=[pdf_part, _EXTRACTION_PROMPT],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_RESPONSE_SCHEMA,
),
)
raw = json.loads(response.text)
@@ -358,7 +354,7 @@ class GeminiEngine:
GeminiProcessingError: If Gemini fails to decode the VIN.
GeminiUnavailableError: If the engine cannot be initialized.
"""
model = self._get_model()
client = self._get_client()
# Resolve year deterministically from VIN structure
resolved_year = resolve_vin_year(vin)
@@ -371,21 +367,21 @@ class GeminiEngine:
)
try:
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
vin_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_VIN_DECODE_SCHEMA,
)
from google.genai import types # type: ignore[import-untyped]
prompt = _VIN_DECODE_PROMPT.format(
vin=vin,
year=resolved_year or "unknown",
year_code=year_code,
)
response = model.generate_content(
[prompt],
generation_config=vin_config,
response = client.models.generate_content(
model=self._model_name,
contents=[prompt],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_VIN_DECODE_SCHEMA,
tools=[types.Tool(google_search=types.GoogleSearch())],
),
)
raw = json.loads(response.text)

View File

@@ -14,6 +14,7 @@ import time
from typing import Any, Optional
from app.config import settings
from app.engines.gemini_engine import GeminiUnavailableError
from app.extractors.receipt_extractor import (
ExtractedField,
ReceiptExtractionResult,
@@ -54,16 +55,16 @@ OCR Text:
"""
_RECEIPT_RESPONSE_SCHEMA: dict[str, Any] = {
"type": "object",
"type": "OBJECT",
"properties": {
"serviceName": {"type": "string", "nullable": True},
"serviceDate": {"type": "string", "nullable": True},
"totalCost": {"type": "number", "nullable": True},
"shopName": {"type": "string", "nullable": True},
"laborCost": {"type": "number", "nullable": True},
"partsCost": {"type": "number", "nullable": True},
"odometerReading": {"type": "number", "nullable": True},
"vehicleInfo": {"type": "string", "nullable": True},
"serviceName": {"type": "STRING", "nullable": True},
"serviceDate": {"type": "STRING", "nullable": True},
"totalCost": {"type": "NUMBER", "nullable": True},
"shopName": {"type": "STRING", "nullable": True},
"laborCost": {"type": "NUMBER", "nullable": True},
"partsCost": {"type": "NUMBER", "nullable": True},
"odometerReading": {"type": "NUMBER", "nullable": True},
"vehicleInfo": {"type": "STRING", "nullable": True},
},
"required": [
"serviceName",
@@ -87,8 +88,8 @@ class MaintenanceReceiptExtractor:
"""
def __init__(self) -> None:
self._model: Any | None = None
self._generation_config: Any | None = None
self._client: Any | None = None
self._model_name: str = ""
def extract(
self,
@@ -169,47 +170,52 @@ class MaintenanceReceiptExtractor:
processing_time_ms=processing_time_ms,
)
def _get_model(self) -> Any:
"""Lazy-initialize Vertex AI Gemini model.
def _get_client(self) -> Any:
"""Lazy-initialize google-genai Gemini client.
Uses the same authentication pattern as GeminiEngine.
"""
if self._model is not None:
return self._model
if self._client is not None:
return self._client
key_path = settings.google_vision_key_path
if not os.path.isfile(key_path):
raise RuntimeError(
raise GeminiUnavailableError(
f"Google credential config not found at {key_path}. "
"Set GOOGLE_VISION_KEY_PATH or mount the secret."
)
from google.cloud import aiplatform # type: ignore[import-untyped]
from vertexai.generative_models import ( # type: ignore[import-untyped]
GenerationConfig,
GenerativeModel,
)
try:
from google import genai # type: ignore[import-untyped]
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
# Point ADC at the WIF credential config (must be set BEFORE Client construction)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
aiplatform.init(
project=settings.vertex_ai_project,
location=settings.vertex_ai_location,
)
self._client = genai.Client(
vertexai=True,
project=settings.vertex_ai_project,
location=settings.vertex_ai_location,
)
self._model_name = settings.gemini_model
model_name = settings.gemini_model
self._model = GenerativeModel(model_name)
self._generation_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_RECEIPT_RESPONSE_SCHEMA,
)
logger.info(
"Maintenance receipt Gemini client initialized (model=%s)",
self._model_name,
)
return self._client
logger.info(
"Maintenance receipt Gemini model initialized (model=%s)",
model_name,
)
return self._model
except ImportError as exc:
logger.exception("google-genai SDK import failed")
raise GeminiUnavailableError(
"google-genai is not installed. "
"Install with: pip install google-genai"
) from exc
except Exception as exc:
logger.exception("Gemini authentication failed: %s", type(exc).__name__)
raise GeminiUnavailableError(
f"Gemini authentication failed: {exc}"
) from exc
def _extract_with_gemini(self, ocr_text: str) -> dict:
"""Send OCR text to Gemini for semantic field extraction.
@@ -220,13 +226,19 @@ class MaintenanceReceiptExtractor:
Returns:
Dictionary of field_name -> extracted_value from Gemini.
"""
model = self._get_model()
client = self._get_client()
from google.genai import types # type: ignore[import-untyped]
prompt = _RECEIPT_EXTRACTION_PROMPT.format(ocr_text=ocr_text)
response = model.generate_content(
[prompt],
generation_config=self._generation_config,
response = client.models.generate_content(
model=self._model_name,
contents=[prompt],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_RECEIPT_RESPONSE_SCHEMA,
),
)
raw = json.loads(response.text)

View File

@@ -2,11 +2,11 @@
Covers: GeminiEngine initialization, PDF size validation,
successful extraction, empty results, and error handling.
All Vertex AI SDK calls are mocked.
All google-genai SDK calls are mocked.
"""
import json
from unittest.mock import MagicMock, patch, PropertyMock
from unittest.mock import MagicMock, patch
import pytest
@@ -156,22 +156,16 @@ class TestExtractMaintenance:
},
]
mock_model = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response(schedule)
mock_client = MagicMock()
mock_client.models.generate_content.return_value = _make_gemini_response(schedule)
with (
patch(
"app.engines.gemini_engine.importlib_vertex_ai"
) if False else patch.dict("sys.modules", {
"google.cloud": MagicMock(),
"google.cloud.aiplatform": MagicMock(),
"vertexai": MagicMock(),
"vertexai.generative_models": MagicMock(),
}),
):
with patch.dict("sys.modules", {
"google.genai": MagicMock(),
"google.genai.types": MagicMock(),
}):
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine._client = mock_client
engine._model_name = "gemini-2.5-flash"
result = engine.extract_maintenance(_make_pdf_bytes())
@@ -200,12 +194,12 @@ class TestExtractMaintenance:
mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash"
mock_model = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response([])
mock_client = MagicMock()
mock_client.models.generate_content.return_value = _make_gemini_response([])
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine._client = mock_client
engine._model_name = "gemini-2.5-flash"
result = engine.extract_maintenance(_make_pdf_bytes())
@@ -223,12 +217,12 @@ class TestExtractMaintenance:
schedule = [{"serviceName": "Brake Fluid Replacement"}]
mock_model = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response(schedule)
mock_client = MagicMock()
mock_client.models.generate_content.return_value = _make_gemini_response(schedule)
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine._client = mock_client
engine._model_name = "gemini-2.5-flash"
result = engine.extract_maintenance(_make_pdf_bytes())
@@ -264,7 +258,8 @@ class TestErrorHandling:
with (
patch("app.engines.gemini_engine.settings") as mock_settings,
patch.dict("sys.modules", {
"google.cloud.aiplatform": None,
"google": None,
"google.genai": None,
}),
):
mock_settings.google_vision_key_path = "/fake/creds.json"
@@ -283,12 +278,12 @@ class TestErrorHandling:
mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash"
mock_model = MagicMock()
mock_model.generate_content.side_effect = RuntimeError("API quota exceeded")
mock_client = MagicMock()
mock_client.models.generate_content.side_effect = RuntimeError("API quota exceeded")
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine._client = mock_client
engine._model_name = "gemini-2.5-flash"
with pytest.raises(GeminiProcessingError, match="maintenance extraction failed"):
engine.extract_maintenance(_make_pdf_bytes())
@@ -307,12 +302,12 @@ class TestErrorHandling:
mock_response = MagicMock()
mock_response.text = "not valid json {{"
mock_model = MagicMock()
mock_model.generate_content.return_value = mock_response
mock_client = MagicMock()
mock_client.models.generate_content.return_value = mock_response
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine._client = mock_client
engine._model_name = "gemini-2.5-flash"
with pytest.raises(GeminiProcessingError, match="invalid JSON"):
engine.extract_maintenance(_make_pdf_bytes())
@@ -322,32 +317,32 @@ class TestErrorHandling:
class TestLazyInitialization:
"""Verify the model is not created until first use."""
"""Verify the client is not created until first use."""
def test_model_is_none_after_construction(self):
"""GeminiEngine should not initialize the model in __init__."""
def test_client_is_none_after_construction(self):
"""GeminiEngine should not initialize the client in __init__."""
engine = GeminiEngine()
assert engine._model is None
assert engine._client is None
@patch("app.engines.gemini_engine.settings")
@patch("app.engines.gemini_engine.os.path.isfile", return_value=True)
def test_model_reused_on_second_call(self, mock_isfile, mock_settings):
"""Once initialized, the same model instance is reused."""
def test_client_reused_on_second_call(self, mock_isfile, mock_settings):
"""Once initialized, the same client instance is reused."""
mock_settings.google_vision_key_path = "/fake/creds.json"
mock_settings.vertex_ai_project = "test-project"
mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash"
schedule = [{"serviceName": "Oil Change", "intervalMiles": 5000}]
mock_model = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response(schedule)
mock_client = MagicMock()
mock_client.models.generate_content.return_value = _make_gemini_response(schedule)
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine._client = mock_client
engine._model_name = "gemini-2.5-flash"
engine.extract_maintenance(_make_pdf_bytes())
engine.extract_maintenance(_make_pdf_bytes())
# Model's generate_content should have been called twice
assert mock_model.generate_content.call_count == 2
# Client's generate_content should have been called twice
assert mock_client.models.generate_content.call_count == 2