diff --git a/ocr/app/extractors/maintenance_receipt_extractor.py b/ocr/app/extractors/maintenance_receipt_extractor.py index d5b4d13..194c484 100644 --- a/ocr/app/extractors/maintenance_receipt_extractor.py +++ b/ocr/app/extractors/maintenance_receipt_extractor.py @@ -14,6 +14,7 @@ import time from typing import Any, Optional from app.config import settings +from app.engines.gemini_engine import GeminiUnavailableError from app.extractors.receipt_extractor import ( ExtractedField, ReceiptExtractionResult, @@ -54,16 +55,16 @@ OCR Text: """ _RECEIPT_RESPONSE_SCHEMA: dict[str, Any] = { - "type": "object", + "type": "OBJECT", "properties": { - "serviceName": {"type": "string", "nullable": True}, - "serviceDate": {"type": "string", "nullable": True}, - "totalCost": {"type": "number", "nullable": True}, - "shopName": {"type": "string", "nullable": True}, - "laborCost": {"type": "number", "nullable": True}, - "partsCost": {"type": "number", "nullable": True}, - "odometerReading": {"type": "number", "nullable": True}, - "vehicleInfo": {"type": "string", "nullable": True}, + "serviceName": {"type": "STRING", "nullable": True}, + "serviceDate": {"type": "STRING", "nullable": True}, + "totalCost": {"type": "NUMBER", "nullable": True}, + "shopName": {"type": "STRING", "nullable": True}, + "laborCost": {"type": "NUMBER", "nullable": True}, + "partsCost": {"type": "NUMBER", "nullable": True}, + "odometerReading": {"type": "NUMBER", "nullable": True}, + "vehicleInfo": {"type": "STRING", "nullable": True}, }, "required": [ "serviceName", @@ -87,8 +88,8 @@ class MaintenanceReceiptExtractor: """ def __init__(self) -> None: - self._model: Any | None = None - self._generation_config: Any | None = None + self._client: Any | None = None + self._model_name: str = "" def extract( self, @@ -169,47 +170,52 @@ class MaintenanceReceiptExtractor: processing_time_ms=processing_time_ms, ) - def _get_model(self) -> Any: - """Lazy-initialize Vertex AI Gemini model. + def _get_client(self) -> Any: + """Lazy-initialize google-genai Gemini client. Uses the same authentication pattern as GeminiEngine. """ - if self._model is not None: - return self._model + if self._client is not None: + return self._client key_path = settings.google_vision_key_path if not os.path.isfile(key_path): - raise RuntimeError( + raise GeminiUnavailableError( f"Google credential config not found at {key_path}. " "Set GOOGLE_VISION_KEY_PATH or mount the secret." ) - from google.cloud import aiplatform # type: ignore[import-untyped] - from vertexai.generative_models import ( # type: ignore[import-untyped] - GenerationConfig, - GenerativeModel, - ) + try: + from google import genai # type: ignore[import-untyped] - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path - os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1" + # Point ADC at the WIF credential config (must be set BEFORE Client construction) + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path + os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1" - aiplatform.init( - project=settings.vertex_ai_project, - location=settings.vertex_ai_location, - ) + self._client = genai.Client( + vertexai=True, + project=settings.vertex_ai_project, + location=settings.vertex_ai_location, + ) + self._model_name = settings.gemini_model - model_name = settings.gemini_model - self._model = GenerativeModel(model_name) - self._generation_config = GenerationConfig( - response_mime_type="application/json", - response_schema=_RECEIPT_RESPONSE_SCHEMA, - ) + logger.info( + "Maintenance receipt Gemini client initialized (model=%s)", + self._model_name, + ) + return self._client - logger.info( - "Maintenance receipt Gemini model initialized (model=%s)", - model_name, - ) - return self._model + except ImportError as exc: + logger.exception("google-genai SDK import failed") + raise GeminiUnavailableError( + "google-genai is not installed. " + "Install with: pip install google-genai" + ) from exc + except Exception as exc: + logger.exception("Gemini authentication failed: %s", type(exc).__name__) + raise GeminiUnavailableError( + f"Gemini authentication failed: {exc}" + ) from exc def _extract_with_gemini(self, ocr_text: str) -> dict: """Send OCR text to Gemini for semantic field extraction. @@ -220,13 +226,19 @@ class MaintenanceReceiptExtractor: Returns: Dictionary of field_name -> extracted_value from Gemini. """ - model = self._get_model() + client = self._get_client() + + from google.genai import types # type: ignore[import-untyped] prompt = _RECEIPT_EXTRACTION_PROMPT.format(ocr_text=ocr_text) - response = model.generate_content( - [prompt], - generation_config=self._generation_config, + response = client.models.generate_content( + model=self._model_name, + contents=[prompt], + config=types.GenerateContentConfig( + response_mime_type="application/json", + response_schema=_RECEIPT_RESPONSE_SCHEMA, + ), ) raw = json.loads(response.text)