feat: migrate MaintenanceReceiptExtractor to google-genai SDK (refs #234)

Replace vertexai.generative_models with google.genai client pattern.
Fix pre-existing bug: raise GeminiUnavailableError instead of bare
RuntimeError for missing credentials. Add proper try/except blocks
matching GeminiEngine error handling pattern.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-28 11:17:14 -06:00
parent b7f472b3e8
commit 9f51e62b94

View File

@@ -14,6 +14,7 @@ import time
from typing import Any, Optional
from app.config import settings
from app.engines.gemini_engine import GeminiUnavailableError
from app.extractors.receipt_extractor import (
ExtractedField,
ReceiptExtractionResult,
@@ -54,16 +55,16 @@ OCR Text:
"""
_RECEIPT_RESPONSE_SCHEMA: dict[str, Any] = {
"type": "object",
"type": "OBJECT",
"properties": {
"serviceName": {"type": "string", "nullable": True},
"serviceDate": {"type": "string", "nullable": True},
"totalCost": {"type": "number", "nullable": True},
"shopName": {"type": "string", "nullable": True},
"laborCost": {"type": "number", "nullable": True},
"partsCost": {"type": "number", "nullable": True},
"odometerReading": {"type": "number", "nullable": True},
"vehicleInfo": {"type": "string", "nullable": True},
"serviceName": {"type": "STRING", "nullable": True},
"serviceDate": {"type": "STRING", "nullable": True},
"totalCost": {"type": "NUMBER", "nullable": True},
"shopName": {"type": "STRING", "nullable": True},
"laborCost": {"type": "NUMBER", "nullable": True},
"partsCost": {"type": "NUMBER", "nullable": True},
"odometerReading": {"type": "NUMBER", "nullable": True},
"vehicleInfo": {"type": "STRING", "nullable": True},
},
"required": [
"serviceName",
@@ -87,8 +88,8 @@ class MaintenanceReceiptExtractor:
"""
def __init__(self) -> None:
self._model: Any | None = None
self._generation_config: Any | None = None
self._client: Any | None = None
self._model_name: str = ""
def extract(
self,
@@ -169,47 +170,52 @@ class MaintenanceReceiptExtractor:
processing_time_ms=processing_time_ms,
)
def _get_model(self) -> Any:
"""Lazy-initialize Vertex AI Gemini model.
def _get_client(self) -> Any:
"""Lazy-initialize google-genai Gemini client.
Uses the same authentication pattern as GeminiEngine.
"""
if self._model is not None:
return self._model
if self._client is not None:
return self._client
key_path = settings.google_vision_key_path
if not os.path.isfile(key_path):
raise RuntimeError(
raise GeminiUnavailableError(
f"Google credential config not found at {key_path}. "
"Set GOOGLE_VISION_KEY_PATH or mount the secret."
)
from google.cloud import aiplatform # type: ignore[import-untyped]
from vertexai.generative_models import ( # type: ignore[import-untyped]
GenerationConfig,
GenerativeModel,
)
try:
from google import genai # type: ignore[import-untyped]
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
# Point ADC at the WIF credential config (must be set BEFORE Client construction)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
aiplatform.init(
project=settings.vertex_ai_project,
location=settings.vertex_ai_location,
)
self._client = genai.Client(
vertexai=True,
project=settings.vertex_ai_project,
location=settings.vertex_ai_location,
)
self._model_name = settings.gemini_model
model_name = settings.gemini_model
self._model = GenerativeModel(model_name)
self._generation_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_RECEIPT_RESPONSE_SCHEMA,
)
logger.info(
"Maintenance receipt Gemini client initialized (model=%s)",
self._model_name,
)
return self._client
logger.info(
"Maintenance receipt Gemini model initialized (model=%s)",
model_name,
)
return self._model
except ImportError as exc:
logger.exception("google-genai SDK import failed")
raise GeminiUnavailableError(
"google-genai is not installed. "
"Install with: pip install google-genai"
) from exc
except Exception as exc:
logger.exception("Gemini authentication failed: %s", type(exc).__name__)
raise GeminiUnavailableError(
f"Gemini authentication failed: {exc}"
) from exc
def _extract_with_gemini(self, ocr_text: str) -> dict:
"""Send OCR text to Gemini for semantic field extraction.
@@ -220,13 +226,19 @@ class MaintenanceReceiptExtractor:
Returns:
Dictionary of field_name -> extracted_value from Gemini.
"""
model = self._get_model()
client = self._get_client()
from google.genai import types # type: ignore[import-untyped]
prompt = _RECEIPT_EXTRACTION_PROMPT.format(ocr_text=ocr_text)
response = model.generate_content(
[prompt],
generation_config=self._generation_config,
response = client.models.generate_content(
model=self._model_name,
contents=[prompt],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_RECEIPT_RESPONSE_SCHEMA,
),
)
raw = json.loads(response.text)