Compare commits

...

9 Commits

Author SHA1 Message Date
7e2bb9ef36 Merge pull request 'feat: Migrate Gemini SDK to google-genai (#231)' (#236) from issue-231-migrate-gemini-sdk-google-genai into main
All checks were successful
Deploy to Staging / Build Images (push) Successful in 37s
Deploy to Staging / Deploy to Staging (push) Successful in 51s
Deploy to Staging / Verify Staging (push) Successful in 9s
Deploy to Staging / Notify Staging Ready (push) Successful in 8s
Deploy to Staging / Notify Staging Failure (push) Has been skipped
Reviewed-on: #236
2026-03-01 04:08:09 +00:00
Eric Gullickson
56df5d48f3 fix: revert unsupported AFC config and add diagnostic logging for VIN decode (refs #231)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 12m33s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 52s
Deploy to Staging / Verify Staging (pull_request) Successful in 9s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
- Remove AutomaticFunctionCallingConfig(max_remote_calls=3) which caused
  pydantic validation error on the installed google-genai version
- Log full Gemini raw JSON response in OCR engine for debugging
- Add engine/transmission to backend raw values log
- Add hasTrim/hasEngine/hasTransmission to decode success log

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 21:16:56 -06:00
Eric Gullickson
1add6c8240 fix: remove unsupported AutomaticFunctionCallingConfig parameter (refs #231)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 39s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 53s
Deploy to Staging / Verify Staging (pull_request) Successful in 9s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
The installed google-genai version does not support max_remote_calls on
AutomaticFunctionCallingConfig, causing a pydantic validation error that
broke VIN decode on staging.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 12:59:04 -06:00
Eric Gullickson
936753fac2 fix: VIN Decoding timeouts and logic errors
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 3m33s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 52s
Deploy to Staging / Verify Staging (pull_request) Successful in 9s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
2026-02-28 12:02:26 -06:00
Eric Gullickson
96e1dde7b2 docs: update CLAUDE.md references from Vertex AI to google-genai (refs #231)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 8m4s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 24s
Deploy to Staging / Verify Staging (pull_request) Successful in 9s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 9s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 11:21:58 -06:00
Eric Gullickson
1464a0e1af feat: update test mocks for google-genai SDK (refs #235)
Replace engine._model/engine._generation_config mocks with
engine._client/engine._model_name. Update sys.modules patches
from vertexai to google.genai. Remove dead if-False branch.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 11:21:10 -06:00
Eric Gullickson
9f51e62b94 feat: migrate MaintenanceReceiptExtractor to google-genai SDK (refs #234)
Replace vertexai.generative_models with google.genai client pattern.
Fix pre-existing bug: raise GeminiUnavailableError instead of bare
RuntimeError for missing credentials. Add proper try/except blocks
matching GeminiEngine error handling pattern.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 11:17:14 -06:00
Eric Gullickson
b7f472b3e8 feat: migrate GeminiEngine to google-genai SDK with Google Search grounding (refs #233)
Replace vertexai.generative_models with google.genai client pattern.
Add Google Search grounding tool to VIN decode for improved accuracy.
Convert response schema types to uppercase per Vertex AI Schema spec.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 11:16:18 -06:00
Eric Gullickson
398d67304f feat: replace google-cloud-aiplatform with google-genai dependency (refs #232)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 11:13:54 -06:00
11 changed files with 227 additions and 214 deletions

View File

@@ -416,7 +416,10 @@ export class VehiclesController {
userId, userId,
hasYear: !!decodedData.year.value, hasYear: !!decodedData.year.value,
hasMake: !!decodedData.make.value, hasMake: !!decodedData.make.value,
hasModel: !!decodedData.model.value hasModel: !!decodedData.model.value,
hasTrim: !!decodedData.trimLevel.value,
hasEngine: !!decodedData.engine.value,
hasTransmission: !!decodedData.transmission.value,
}); });
return reply.code(200).send(decodedData); return reply.code(200).send(decodedData);

View File

@@ -679,7 +679,8 @@ export class VehiclesService {
logger.debug('VIN decode raw values', { logger.debug('VIN decode raw values', {
vin: response.vin, vin: response.vin,
year: sourceYear, make: sourceMake, model: sourceModel, year: sourceYear, make: sourceMake, model: sourceModel,
trim: sourceTrim, confidence: response.confidence trim: sourceTrim, engine: sourceEngine, transmission: sourceTransmission,
confidence: response.confidence
}); });
// Year is always high confidence if present (exact numeric match) // Year is always high confidence if present (exact numeric match)

View File

@@ -87,7 +87,7 @@ export const vehiclesApi = {
*/ */
decodeVin: async (vin: string): Promise<DecodedVehicleData> => { decodeVin: async (vin: string): Promise<DecodedVehicleData> => {
const response = await apiClient.post('/vehicles/decode-vin', { vin }, { const response = await apiClient.post('/vehicles/decode-vin', { vin }, {
timeout: 60000 // 60 seconds for Gemini cold start timeout: 120000 // 120 seconds for Gemini + Google Search grounding
}); });
return response.data; return response.data;
} }

View File

@@ -7,7 +7,7 @@ Python OCR microservice (FastAPI). Primary engine: PaddleOCR PP-OCRv4 with optio
| File | What | When to read | | File | What | When to read |
| ---- | ---- | ------------ | | ---- | ---- | ------------ |
| `main.py` | FastAPI application entry point | Route registration, app setup | | `main.py` | FastAPI application entry point | Route registration, app setup |
| `config.py` | Configuration settings (OCR engines, Vertex AI, Redis, Vision API limits) | Environment variables, settings | | `config.py` | Configuration settings (OCR engines, Google GenAI, Redis, Vision API limits) | Environment variables, settings |
| `__init__.py` | Package init | Package structure | | `__init__.py` | Package init | Package structure |
## Subdirectories ## Subdirectories

View File

@@ -29,7 +29,7 @@ class Settings:
os.getenv("VISION_MONTHLY_LIMIT", "1000") os.getenv("VISION_MONTHLY_LIMIT", "1000")
) )
# Vertex AI / Gemini configuration # Google GenAI / Gemini configuration
self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "") self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "")
self.vertex_ai_location: str = os.getenv( self.vertex_ai_location: str = os.getenv(
"VERTEX_AI_LOCATION", "global" "VERTEX_AI_LOCATION", "global"

View File

@@ -3,7 +3,7 @@
OCR engine abstraction layer. Two categories of engines: OCR engine abstraction layer. Two categories of engines:
1. **OcrEngine subclasses** (image-to-text): PaddleOCR, Google Vision, Hybrid. Accept image bytes, return text + confidence + word boxes. 1. **OcrEngine subclasses** (image-to-text): PaddleOCR, Google Vision, Hybrid. Accept image bytes, return text + confidence + word boxes.
2. **GeminiEngine** (PDF-to-structured-data and VIN decode): Standalone module for maintenance schedule extraction and VIN decoding via Vertex AI. Accepts PDF bytes or VIN strings, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ. 2. **GeminiEngine** (PDF-to-structured-data and VIN decode): Standalone module for maintenance schedule extraction and VIN decoding via google-genai SDK. Accepts PDF bytes or VIN strings, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ.
## Files ## Files
@@ -15,7 +15,7 @@ OCR engine abstraction layer. Two categories of engines:
| `cloud_engine.py` | Google Vision TEXT_DETECTION fallback engine (WIF authentication) | Cloud OCR configuration, API quota | | `cloud_engine.py` | Google Vision TEXT_DETECTION fallback engine (WIF authentication) | Cloud OCR configuration, API quota |
| `hybrid_engine.py` | Combines primary + fallback engine with confidence threshold switching | Engine selection logic, fallback behavior | | `hybrid_engine.py` | Combines primary + fallback engine with confidence threshold switching | Engine selection logic, fallback behavior |
| `engine_factory.py` | Factory function and engine registry for instantiation | Adding new engine types | | `engine_factory.py` | Factory function and engine registry for instantiation | Adding new engine types |
| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction and VIN decoding (Vertex AI SDK, 20MB PDF limit, structured JSON output) | Manual extraction debugging, VIN decode, Gemini configuration | | `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction and VIN decoding (google-genai SDK, 20MB PDF limit, structured JSON output, Google Search grounding for VIN decode) | Manual extraction debugging, VIN decode, Gemini configuration |
## Engine Selection ## Engine Selection

View File

@@ -2,7 +2,7 @@
Standalone module (does NOT extend OcrEngine) because Gemini performs Standalone module (does NOT extend OcrEngine) because Gemini performs
semantic document understanding, not traditional OCR word-box extraction. semantic document understanding, not traditional OCR word-box extraction.
Uses Vertex AI SDK with structured JSON output enforcement. Uses google-genai SDK with structured JSON output enforcement.
""" """
import json import json
@@ -40,8 +40,9 @@ Return the results as a JSON object with a single "maintenanceSchedule" array.\
# VIN year code lookup: position 10 character -> base year (first cycle, 1980-2009). # VIN year code lookup: position 10 character -> base year (first cycle, 1980-2009).
# The 30-year cycle repeats: +30 for 2010-2039, +60 for 2040-2069. # The 30-year cycle repeats: +30 for 2010-2039, +60 for 2040-2069.
# Disambiguation uses position 7: numeric -> 2010+ cycle, alphabetic -> 1980s cycle. # Disambiguation uses position 7: alphabetic -> 2010+ cycle, numeric -> 1980s cycle.
# For the 2040+ cycle (when position 7 is alphabetic again), we pick the most # Per NHTSA FMVSS No. 115: MY2010+ vehicles must use alphabetic position 7.
# For the 2040+ cycle (when position 7 is numeric again), we pick the most
# recent plausible year (not more than 2 years in the future). # recent plausible year (not more than 2 years in the future).
_VIN_YEAR_CODES: dict[str, int] = { _VIN_YEAR_CODES: dict[str, int] = {
"A": 1980, "B": 1981, "C": 1982, "D": 1983, "E": 1984, "A": 1980, "B": 1981, "C": 1982, "D": 1983, "E": 1984,
@@ -58,10 +59,10 @@ def resolve_vin_year(vin: str) -> int | None:
"""Deterministically resolve model year from VIN positions 7 and 10. """Deterministically resolve model year from VIN positions 7 and 10.
VIN year codes repeat on a 30-year cycle. Position 7 disambiguates: VIN year codes repeat on a 30-year cycle. Position 7 disambiguates:
- Numeric position 7 -> 2010-2039 cycle - Alphabetic position 7 -> 2010-2039 cycle (NHTSA MY2010+ requirement)
- Alphabetic position 7 -> 1980-2009 or 2040-2050+ cycle - Numeric position 7 -> 1980-2009 or 2040-2069 cycle
For the alphabetic case with three possible cycles, picks the most recent For the numeric case with two possible cycles, picks the most recent
year that is not more than 2 years in the future. year that is not more than 2 years in the future.
Returns None if the VIN is too short or position 10 is not a valid year code. Returns None if the VIN is too short or position 10 is not a valid year code.
@@ -76,11 +77,11 @@ def resolve_vin_year(vin: str) -> int | None:
if base_year is None: if base_year is None:
return None return None
if pos7.isdigit(): if pos7.isalpha():
# Numeric position 7 -> second cycle (2010-2039) # Alphabetic position 7 -> second cycle (2010-2039)
return base_year + 30 return base_year + 30
# Alphabetic position 7 -> first cycle (1980-2009) or third cycle (2040-2069) # Numeric position 7 -> first cycle (1980-2009) or third cycle (2040-2069)
# Pick the most recent plausible year # Pick the most recent plausible year
max_plausible = datetime.now().year + 2 max_plausible = datetime.now().year + 2
@@ -117,34 +118,34 @@ Return the vehicle's make, model, trim level, body type, drive type, fuel type,
""" """
_VIN_DECODE_SCHEMA: dict[str, Any] = { _VIN_DECODE_SCHEMA: dict[str, Any] = {
"type": "object", "type": "OBJECT",
"properties": { "properties": {
"year": {"type": "integer", "nullable": True}, "year": {"type": "INTEGER", "nullable": True},
"make": {"type": "string", "nullable": True}, "make": {"type": "STRING", "nullable": True},
"model": {"type": "string", "nullable": True}, "model": {"type": "STRING", "nullable": True},
"trimLevel": {"type": "string", "nullable": True}, "trimLevel": {"type": "STRING", "nullable": True},
"bodyType": {"type": "string", "nullable": True}, "bodyType": {"type": "STRING", "nullable": True},
"driveType": {"type": "string", "nullable": True}, "driveType": {"type": "STRING", "nullable": True},
"fuelType": {"type": "string", "nullable": True}, "fuelType": {"type": "STRING", "nullable": True},
"engine": {"type": "string", "nullable": True}, "engine": {"type": "STRING", "nullable": True},
"transmission": {"type": "string", "nullable": True}, "transmission": {"type": "STRING", "nullable": True},
"confidence": {"type": "number"}, "confidence": {"type": "NUMBER"},
}, },
"required": ["confidence"], "required": ["confidence"],
} }
_RESPONSE_SCHEMA: dict[str, Any] = { _RESPONSE_SCHEMA: dict[str, Any] = {
"type": "object", "type": "OBJECT",
"properties": { "properties": {
"maintenanceSchedule": { "maintenanceSchedule": {
"type": "array", "type": "ARRAY",
"items": { "items": {
"type": "object", "type": "OBJECT",
"properties": { "properties": {
"serviceName": {"type": "string"}, "serviceName": {"type": "STRING"},
"intervalMiles": {"type": "number", "nullable": True}, "intervalMiles": {"type": "NUMBER", "nullable": True},
"intervalMonths": {"type": "number", "nullable": True}, "intervalMonths": {"type": "NUMBER", "nullable": True},
"details": {"type": "string", "nullable": True}, "details": {"type": "STRING", "nullable": True},
}, },
"required": ["serviceName"], "required": ["serviceName"],
}, },
@@ -206,20 +207,21 @@ class GeminiEngine:
Standalone class (not an OcrEngine subclass) because Gemini performs Standalone class (not an OcrEngine subclass) because Gemini performs
semantic document understanding rather than traditional OCR. semantic document understanding rather than traditional OCR.
Uses lazy initialization: the Vertex AI client is not created until Uses lazy initialization: the Gemini client is not created until
the first call to ``extract_maintenance()`` or ``decode_vin()``. the first call to ``extract_maintenance()`` or ``decode_vin()``.
""" """
def __init__(self) -> None: def __init__(self) -> None:
self._model: Any | None = None self._client: Any | None = None
self._model_name: str = ""
def _get_model(self) -> Any: def _get_client(self) -> Any:
"""Create the GenerativeModel on first use. """Create the genai.Client on first use.
Authentication uses the same WIF credential path as Google Vision. Authentication uses the same WIF credential path as Google Vision.
""" """
if self._model is not None: if self._client is not None:
return self._model return self._client
key_path = settings.google_vision_key_path key_path = settings.google_vision_key_path
if not os.path.isfile(key_path): if not os.path.isfile(key_path):
@@ -229,46 +231,37 @@ class GeminiEngine:
) )
try: try:
from google.cloud import aiplatform # type: ignore[import-untyped] from google import genai # type: ignore[import-untyped]
from vertexai.generative_models import ( # type: ignore[import-untyped]
GenerationConfig,
GenerativeModel,
)
# Point ADC at the WIF credential config # Point ADC at the WIF credential config (must be set BEFORE Client construction)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1" os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
aiplatform.init( self._client = genai.Client(
vertexai=True,
project=settings.vertex_ai_project, project=settings.vertex_ai_project,
location=settings.vertex_ai_location, location=settings.vertex_ai_location,
) )
self._model_name = settings.gemini_model
model_name = settings.gemini_model
self._model = GenerativeModel(model_name)
self._generation_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_RESPONSE_SCHEMA,
)
logger.info( logger.info(
"Gemini engine initialized (model=%s, project=%s, location=%s)", "Gemini engine initialized (model=%s, project=%s, location=%s)",
model_name, self._model_name,
settings.vertex_ai_project, settings.vertex_ai_project,
settings.vertex_ai_location, settings.vertex_ai_location,
) )
return self._model return self._client
except ImportError as exc: except ImportError as exc:
logger.exception("Vertex AI SDK import failed") logger.exception("google-genai SDK import failed")
raise GeminiUnavailableError( raise GeminiUnavailableError(
"google-cloud-aiplatform is not installed. " "google-genai is not installed. "
"Install with: pip install google-cloud-aiplatform" "Install with: pip install google-genai"
) from exc ) from exc
except Exception as exc: except Exception as exc:
logger.exception("Vertex AI authentication failed") logger.exception("Gemini authentication failed: %s", type(exc).__name__)
raise GeminiUnavailableError( raise GeminiUnavailableError(
f"Vertex AI authentication failed: {exc}" f"Gemini authentication failed: {exc}"
) from exc ) from exc
def extract_maintenance( def extract_maintenance(
@@ -293,19 +286,23 @@ class GeminiEngine:
"inline processing. Upload to GCS and use a gs:// URI instead." "inline processing. Upload to GCS and use a gs:// URI instead."
) )
model = self._get_model() client = self._get_client()
try: try:
from vertexai.generative_models import Part # type: ignore[import-untyped] from google.genai import types # type: ignore[import-untyped]
pdf_part = Part.from_data( pdf_part = types.Part.from_bytes(
data=pdf_bytes, data=pdf_bytes,
mime_type="application/pdf", mime_type="application/pdf",
) )
response = model.generate_content( response = client.models.generate_content(
[pdf_part, _EXTRACTION_PROMPT], model=self._model_name,
generation_config=self._generation_config, contents=[pdf_part, _EXTRACTION_PROMPT],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_RESPONSE_SCHEMA,
),
) )
raw = json.loads(response.text) raw = json.loads(response.text)
@@ -358,7 +355,7 @@ class GeminiEngine:
GeminiProcessingError: If Gemini fails to decode the VIN. GeminiProcessingError: If Gemini fails to decode the VIN.
GeminiUnavailableError: If the engine cannot be initialized. GeminiUnavailableError: If the engine cannot be initialized.
""" """
model = self._get_model() client = self._get_client()
# Resolve year deterministically from VIN structure # Resolve year deterministically from VIN structure
resolved_year = resolve_vin_year(vin) resolved_year = resolve_vin_year(vin)
@@ -371,21 +368,21 @@ class GeminiEngine:
) )
try: try:
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped] from google.genai import types # type: ignore[import-untyped]
vin_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_VIN_DECODE_SCHEMA,
)
prompt = _VIN_DECODE_PROMPT.format( prompt = _VIN_DECODE_PROMPT.format(
vin=vin, vin=vin,
year=resolved_year or "unknown", year=resolved_year or "unknown",
year_code=year_code, year_code=year_code,
) )
response = model.generate_content( response = client.models.generate_content(
[prompt], model=self._model_name,
generation_config=vin_config, contents=[prompt],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_VIN_DECODE_SCHEMA,
tools=[types.Tool(google_search=types.GoogleSearch())],
),
) )
raw = json.loads(response.text) raw = json.loads(response.text)
@@ -401,7 +398,12 @@ class GeminiEngine:
vin, vin,
) )
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0)) logger.info(
"Gemini decoded VIN %s (confidence=%.2f) raw=%s",
vin,
raw.get("confidence", 0),
json.dumps(raw, default=str),
)
return VinDecodeResult( return VinDecodeResult(
year=resolved_year if resolved_year else raw.get("year"), year=resolved_year if resolved_year else raw.get("year"),

View File

@@ -14,6 +14,7 @@ import time
from typing import Any, Optional from typing import Any, Optional
from app.config import settings from app.config import settings
from app.engines.gemini_engine import GeminiUnavailableError
from app.extractors.receipt_extractor import ( from app.extractors.receipt_extractor import (
ExtractedField, ExtractedField,
ReceiptExtractionResult, ReceiptExtractionResult,
@@ -54,16 +55,16 @@ OCR Text:
""" """
_RECEIPT_RESPONSE_SCHEMA: dict[str, Any] = { _RECEIPT_RESPONSE_SCHEMA: dict[str, Any] = {
"type": "object", "type": "OBJECT",
"properties": { "properties": {
"serviceName": {"type": "string", "nullable": True}, "serviceName": {"type": "STRING", "nullable": True},
"serviceDate": {"type": "string", "nullable": True}, "serviceDate": {"type": "STRING", "nullable": True},
"totalCost": {"type": "number", "nullable": True}, "totalCost": {"type": "NUMBER", "nullable": True},
"shopName": {"type": "string", "nullable": True}, "shopName": {"type": "STRING", "nullable": True},
"laborCost": {"type": "number", "nullable": True}, "laborCost": {"type": "NUMBER", "nullable": True},
"partsCost": {"type": "number", "nullable": True}, "partsCost": {"type": "NUMBER", "nullable": True},
"odometerReading": {"type": "number", "nullable": True}, "odometerReading": {"type": "NUMBER", "nullable": True},
"vehicleInfo": {"type": "string", "nullable": True}, "vehicleInfo": {"type": "STRING", "nullable": True},
}, },
"required": [ "required": [
"serviceName", "serviceName",
@@ -87,8 +88,8 @@ class MaintenanceReceiptExtractor:
""" """
def __init__(self) -> None: def __init__(self) -> None:
self._model: Any | None = None self._client: Any | None = None
self._generation_config: Any | None = None self._model_name: str = ""
def extract( def extract(
self, self,
@@ -169,47 +170,52 @@ class MaintenanceReceiptExtractor:
processing_time_ms=processing_time_ms, processing_time_ms=processing_time_ms,
) )
def _get_model(self) -> Any: def _get_client(self) -> Any:
"""Lazy-initialize Vertex AI Gemini model. """Lazy-initialize google-genai Gemini client.
Uses the same authentication pattern as GeminiEngine. Uses the same authentication pattern as GeminiEngine.
""" """
if self._model is not None: if self._client is not None:
return self._model return self._client
key_path = settings.google_vision_key_path key_path = settings.google_vision_key_path
if not os.path.isfile(key_path): if not os.path.isfile(key_path):
raise RuntimeError( raise GeminiUnavailableError(
f"Google credential config not found at {key_path}. " f"Google credential config not found at {key_path}. "
"Set GOOGLE_VISION_KEY_PATH or mount the secret." "Set GOOGLE_VISION_KEY_PATH or mount the secret."
) )
from google.cloud import aiplatform # type: ignore[import-untyped] try:
from vertexai.generative_models import ( # type: ignore[import-untyped] from google import genai # type: ignore[import-untyped]
GenerationConfig,
GenerativeModel,
)
# Point ADC at the WIF credential config (must be set BEFORE Client construction)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1" os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
aiplatform.init( self._client = genai.Client(
vertexai=True,
project=settings.vertex_ai_project, project=settings.vertex_ai_project,
location=settings.vertex_ai_location, location=settings.vertex_ai_location,
) )
self._model_name = settings.gemini_model
model_name = settings.gemini_model
self._model = GenerativeModel(model_name)
self._generation_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_RECEIPT_RESPONSE_SCHEMA,
)
logger.info( logger.info(
"Maintenance receipt Gemini model initialized (model=%s)", "Maintenance receipt Gemini client initialized (model=%s)",
model_name, self._model_name,
) )
return self._model return self._client
except ImportError as exc:
logger.exception("google-genai SDK import failed")
raise GeminiUnavailableError(
"google-genai is not installed. "
"Install with: pip install google-genai"
) from exc
except Exception as exc:
logger.exception("Gemini authentication failed: %s", type(exc).__name__)
raise GeminiUnavailableError(
f"Gemini authentication failed: {exc}"
) from exc
def _extract_with_gemini(self, ocr_text: str) -> dict: def _extract_with_gemini(self, ocr_text: str) -> dict:
"""Send OCR text to Gemini for semantic field extraction. """Send OCR text to Gemini for semantic field extraction.
@@ -220,13 +226,19 @@ class MaintenanceReceiptExtractor:
Returns: Returns:
Dictionary of field_name -> extracted_value from Gemini. Dictionary of field_name -> extracted_value from Gemini.
""" """
model = self._get_model() client = self._get_client()
from google.genai import types # type: ignore[import-untyped]
prompt = _RECEIPT_EXTRACTION_PROMPT.format(ocr_text=ocr_text) prompt = _RECEIPT_EXTRACTION_PROMPT.format(ocr_text=ocr_text)
response = model.generate_content( response = client.models.generate_content(
[prompt], model=self._model_name,
generation_config=self._generation_config, contents=[prompt],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_RECEIPT_RESPONSE_SCHEMA,
),
) )
raw = json.loads(response.text) raw = json.loads(response.text)

View File

@@ -21,8 +21,8 @@ google-cloud-vision>=3.7.0
# PDF Processing # PDF Processing
PyMuPDF>=1.23.0 PyMuPDF>=1.23.0
# Vertex AI / Gemini (maintenance schedule extraction) # Google GenAI / Gemini (maintenance schedule extraction, VIN decode)
google-cloud-aiplatform>=1.40.0 google-genai>=1.0.0
# Redis for job queue # Redis for job queue
redis>=5.0.0 redis>=5.0.0

View File

@@ -2,11 +2,11 @@
Covers: GeminiEngine initialization, PDF size validation, Covers: GeminiEngine initialization, PDF size validation,
successful extraction, empty results, and error handling. successful extraction, empty results, and error handling.
All Vertex AI SDK calls are mocked. All google-genai SDK calls are mocked.
""" """
import json import json
from unittest.mock import MagicMock, patch, PropertyMock from unittest.mock import MagicMock, patch
import pytest import pytest
@@ -156,22 +156,16 @@ class TestExtractMaintenance:
}, },
] ]
mock_model = MagicMock() mock_client = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response(schedule) mock_client.models.generate_content.return_value = _make_gemini_response(schedule)
with ( with patch.dict("sys.modules", {
patch( "google.genai": MagicMock(),
"app.engines.gemini_engine.importlib_vertex_ai" "google.genai.types": MagicMock(),
) if False else patch.dict("sys.modules", { }):
"google.cloud": MagicMock(),
"google.cloud.aiplatform": MagicMock(),
"vertexai": MagicMock(),
"vertexai.generative_models": MagicMock(),
}),
):
engine = GeminiEngine() engine = GeminiEngine()
engine._model = mock_model engine._client = mock_client
engine._generation_config = MagicMock() engine._model_name = "gemini-2.5-flash"
result = engine.extract_maintenance(_make_pdf_bytes()) result = engine.extract_maintenance(_make_pdf_bytes())
@@ -200,12 +194,12 @@ class TestExtractMaintenance:
mock_settings.vertex_ai_location = "us-central1" mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash" mock_settings.gemini_model = "gemini-2.5-flash"
mock_model = MagicMock() mock_client = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response([]) mock_client.models.generate_content.return_value = _make_gemini_response([])
engine = GeminiEngine() engine = GeminiEngine()
engine._model = mock_model engine._client = mock_client
engine._generation_config = MagicMock() engine._model_name = "gemini-2.5-flash"
result = engine.extract_maintenance(_make_pdf_bytes()) result = engine.extract_maintenance(_make_pdf_bytes())
@@ -223,12 +217,12 @@ class TestExtractMaintenance:
schedule = [{"serviceName": "Brake Fluid Replacement"}] schedule = [{"serviceName": "Brake Fluid Replacement"}]
mock_model = MagicMock() mock_client = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response(schedule) mock_client.models.generate_content.return_value = _make_gemini_response(schedule)
engine = GeminiEngine() engine = GeminiEngine()
engine._model = mock_model engine._client = mock_client
engine._generation_config = MagicMock() engine._model_name = "gemini-2.5-flash"
result = engine.extract_maintenance(_make_pdf_bytes()) result = engine.extract_maintenance(_make_pdf_bytes())
@@ -264,7 +258,8 @@ class TestErrorHandling:
with ( with (
patch("app.engines.gemini_engine.settings") as mock_settings, patch("app.engines.gemini_engine.settings") as mock_settings,
patch.dict("sys.modules", { patch.dict("sys.modules", {
"google.cloud.aiplatform": None, "google": None,
"google.genai": None,
}), }),
): ):
mock_settings.google_vision_key_path = "/fake/creds.json" mock_settings.google_vision_key_path = "/fake/creds.json"
@@ -283,12 +278,12 @@ class TestErrorHandling:
mock_settings.vertex_ai_location = "us-central1" mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash" mock_settings.gemini_model = "gemini-2.5-flash"
mock_model = MagicMock() mock_client = MagicMock()
mock_model.generate_content.side_effect = RuntimeError("API quota exceeded") mock_client.models.generate_content.side_effect = RuntimeError("API quota exceeded")
engine = GeminiEngine() engine = GeminiEngine()
engine._model = mock_model engine._client = mock_client
engine._generation_config = MagicMock() engine._model_name = "gemini-2.5-flash"
with pytest.raises(GeminiProcessingError, match="maintenance extraction failed"): with pytest.raises(GeminiProcessingError, match="maintenance extraction failed"):
engine.extract_maintenance(_make_pdf_bytes()) engine.extract_maintenance(_make_pdf_bytes())
@@ -307,12 +302,12 @@ class TestErrorHandling:
mock_response = MagicMock() mock_response = MagicMock()
mock_response.text = "not valid json {{" mock_response.text = "not valid json {{"
mock_model = MagicMock() mock_client = MagicMock()
mock_model.generate_content.return_value = mock_response mock_client.models.generate_content.return_value = mock_response
engine = GeminiEngine() engine = GeminiEngine()
engine._model = mock_model engine._client = mock_client
engine._generation_config = MagicMock() engine._model_name = "gemini-2.5-flash"
with pytest.raises(GeminiProcessingError, match="invalid JSON"): with pytest.raises(GeminiProcessingError, match="invalid JSON"):
engine.extract_maintenance(_make_pdf_bytes()) engine.extract_maintenance(_make_pdf_bytes())
@@ -322,32 +317,32 @@ class TestErrorHandling:
class TestLazyInitialization: class TestLazyInitialization:
"""Verify the model is not created until first use.""" """Verify the client is not created until first use."""
def test_model_is_none_after_construction(self): def test_client_is_none_after_construction(self):
"""GeminiEngine should not initialize the model in __init__.""" """GeminiEngine should not initialize the client in __init__."""
engine = GeminiEngine() engine = GeminiEngine()
assert engine._model is None assert engine._client is None
@patch("app.engines.gemini_engine.settings") @patch("app.engines.gemini_engine.settings")
@patch("app.engines.gemini_engine.os.path.isfile", return_value=True) @patch("app.engines.gemini_engine.os.path.isfile", return_value=True)
def test_model_reused_on_second_call(self, mock_isfile, mock_settings): def test_client_reused_on_second_call(self, mock_isfile, mock_settings):
"""Once initialized, the same model instance is reused.""" """Once initialized, the same client instance is reused."""
mock_settings.google_vision_key_path = "/fake/creds.json" mock_settings.google_vision_key_path = "/fake/creds.json"
mock_settings.vertex_ai_project = "test-project" mock_settings.vertex_ai_project = "test-project"
mock_settings.vertex_ai_location = "us-central1" mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash" mock_settings.gemini_model = "gemini-2.5-flash"
schedule = [{"serviceName": "Oil Change", "intervalMiles": 5000}] schedule = [{"serviceName": "Oil Change", "intervalMiles": 5000}]
mock_model = MagicMock() mock_client = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response(schedule) mock_client.models.generate_content.return_value = _make_gemini_response(schedule)
engine = GeminiEngine() engine = GeminiEngine()
engine._model = mock_model engine._client = mock_client
engine._generation_config = MagicMock() engine._model_name = "gemini-2.5-flash"
engine.extract_maintenance(_make_pdf_bytes()) engine.extract_maintenance(_make_pdf_bytes())
engine.extract_maintenance(_make_pdf_bytes()) engine.extract_maintenance(_make_pdf_bytes())
# Model's generate_content should have been called twice # Client's generate_content should have been called twice
assert mock_model.generate_content.call_count == 2 assert mock_client.models.generate_content.call_count == 2

View File

@@ -12,86 +12,86 @@ from app.engines.gemini_engine import resolve_vin_year
class TestSecondCycle: class TestSecondCycle:
"""Position 7 numeric -> 2010-2039 cycle.""" """Position 7 alphabetic -> 2010-2039 cycle (NHTSA MY2010+ requirement)."""
def test_p_with_numeric_pos7_returns_2023(self): def test_p_with_alpha_pos7_returns_2023(self):
"""P=2023 when position 7 is numeric (the bug that triggered this fix).""" """P=2023 when position 7 is alphabetic (the bug that triggered this fix)."""
# VIN: 1G1YE2D32P5602473 -- pos7='2' (numeric), pos10='P' # VIN: 1G1YE2D32P5602473 -- pos7='D' (alphabetic), pos10='P'
assert resolve_vin_year("1G1YE2D32P5602473") == 2023 assert resolve_vin_year("1G1YE2D32P5602473") == 2023
def test_a_with_numeric_pos7_returns_2010(self): def test_a_with_alpha_pos7_returns_2010(self):
"""A=2010 when position 7 is numeric.""" """A=2010 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2112A5602473") == 2010 assert resolve_vin_year("1G1YE2D12A5602473") == 2010
def test_l_with_numeric_pos7_returns_2020(self): def test_l_with_alpha_pos7_returns_2020(self):
"""L=2020 when position 7 is numeric.""" """L=2020 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2112L5602473") == 2020 assert resolve_vin_year("1G1YE2D12L5602473") == 2020
def test_9_with_numeric_pos7_returns_2039(self): def test_9_with_alpha_pos7_returns_2039(self):
"""9=2039 when position 7 is numeric.""" """9=2039 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE211295602473") == 2039 assert resolve_vin_year("1G1YE2D1295602473") == 2039
def test_digit_1_with_numeric_pos7_returns_2031(self): def test_digit_1_with_alpha_pos7_returns_2031(self):
"""1=2031 when position 7 is numeric.""" """1=2031 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE211215602473") == 2031 assert resolve_vin_year("1G1YE2D1215602473") == 2031
def test_s_with_numeric_pos7_returns_2025(self): def test_s_with_alpha_pos7_returns_2025(self):
"""S=2025 when position 7 is numeric.""" """S=2025 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2112S5602473") == 2025 assert resolve_vin_year("1G1YE2D12S5602473") == 2025
def test_t_with_numeric_pos7_returns_2026(self): def test_t_with_alpha_pos7_returns_2026(self):
"""T=2026 when position 7 is numeric.""" """T=2026 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2112T5602473") == 2026 assert resolve_vin_year("1G1YE2D12T5602473") == 2026
class TestFirstCycle: class TestFirstCycle:
"""Position 7 alphabetic -> 1980-2009 cycle (when 2040+ is not yet plausible).""" """Position 7 numeric -> 1980-2009 cycle."""
def test_m_with_alpha_pos7_returns_1991(self): def test_m_with_numeric_pos7_returns_1991(self):
"""M=1991 when position 7 is alphabetic (third cycle 2051 is not plausible).""" """M=1991 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2J32M5602473") == 1991 assert resolve_vin_year("1G1YE2132M5602473") == 1991
def test_n_with_alpha_pos7_returns_1992(self): def test_n_with_numeric_pos7_returns_1992(self):
"""N=1992 when position 7 is alphabetic.""" """N=1992 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2J32N5602473") == 1992 assert resolve_vin_year("1G1YE2132N5602473") == 1992
def test_p_with_alpha_pos7_returns_1993(self): def test_p_with_numeric_pos7_returns_1993(self):
"""P=1993 when position 7 is alphabetic (third cycle 2053 not plausible).""" """P=1993 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2J32P5602473") == 1993 assert resolve_vin_year("1G1YE2132P5602473") == 1993
def test_y_with_alpha_pos7_returns_2000(self): def test_y_with_numeric_pos7_returns_2000(self):
"""Y=2000 when position 7 is alphabetic.""" """Y=2000 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2J32Y5602473") == 2000 assert resolve_vin_year("1G1YE2132Y5602473") == 2000
class TestThirdCycle: class TestThirdCycle:
"""Position 7 alphabetic + third cycle year (2040-2050) is plausible.""" """Position 7 numeric + third cycle year (2040-2050) is plausible."""
@patch("app.engines.gemini_engine.datetime") @patch("app.engines.gemini_engine.datetime")
def test_a_with_alpha_pos7_returns_2040_when_plausible(self, mock_dt): def test_a_with_numeric_pos7_returns_2040_when_plausible(self, mock_dt):
"""A=2040 when position 7 is alphabetic and year 2040 is plausible.""" """A=2040 when position 7 is numeric and year 2040 is plausible."""
mock_dt.now.return_value = datetime(2039, 1, 1) mock_dt.now.return_value = datetime(2039, 1, 1)
# 2039 + 2 = 2041 >= 2040, so third cycle is plausible # 2039 + 2 = 2041 >= 2040, so third cycle is plausible
assert resolve_vin_year("1G1YE2J32A5602473") == 2040 assert resolve_vin_year("1G1YE2132A5602473") == 2040
@patch("app.engines.gemini_engine.datetime") @patch("app.engines.gemini_engine.datetime")
def test_l_with_alpha_pos7_returns_2050_when_plausible(self, mock_dt): def test_l_with_numeric_pos7_returns_2050_when_plausible(self, mock_dt):
"""L=2050 when position 7 is alphabetic and year 2050 is plausible.""" """L=2050 when position 7 is numeric and year 2050 is plausible."""
mock_dt.now.return_value = datetime(2049, 6, 1) mock_dt.now.return_value = datetime(2049, 6, 1)
assert resolve_vin_year("1G1YE2J32L5602473") == 2050 assert resolve_vin_year("1G1YE2132L5602473") == 2050
@patch("app.engines.gemini_engine.datetime") @patch("app.engines.gemini_engine.datetime")
def test_a_with_alpha_pos7_returns_1980_when_2040_not_plausible(self, mock_dt): def test_a_with_numeric_pos7_returns_1980_when_2040_not_plausible(self, mock_dt):
"""A=1980 when third cycle year (2040) exceeds max plausible.""" """A=1980 when third cycle year (2040) exceeds max plausible."""
mock_dt.now.return_value = datetime(2026, 2, 20) mock_dt.now.return_value = datetime(2026, 2, 20)
# 2026 + 2 = 2028 < 2040, so third cycle not plausible -> first cycle # 2026 + 2 = 2028 < 2040, so third cycle not plausible -> first cycle
assert resolve_vin_year("1G1YE2J32A5602473") == 1980 assert resolve_vin_year("1G1YE2132A5602473") == 1980
@patch("app.engines.gemini_engine.datetime") @patch("app.engines.gemini_engine.datetime")
def test_k_with_alpha_pos7_returns_2049_when_plausible(self, mock_dt): def test_k_with_numeric_pos7_returns_2049_when_plausible(self, mock_dt):
"""K=2049 when position 7 is alphabetic and year is plausible.""" """K=2049 when position 7 is numeric and year is plausible."""
mock_dt.now.return_value = datetime(2048, 1, 1) mock_dt.now.return_value = datetime(2048, 1, 1)
assert resolve_vin_year("1G1YE2J32K5602473") == 2049 assert resolve_vin_year("1G1YE2132K5602473") == 2049
class TestEdgeCases: class TestEdgeCases: