feat: migrate GeminiEngine to google-genai SDK with Google Search grounding (refs #233)
Replace vertexai.generative_models with google.genai client pattern. Add Google Search grounding tool to VIN decode for improved accuracy. Convert response schema types to uppercase per Vertex AI Schema spec. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
|
||||
Standalone module (does NOT extend OcrEngine) because Gemini performs
|
||||
semantic document understanding, not traditional OCR word-box extraction.
|
||||
Uses Vertex AI SDK with structured JSON output enforcement.
|
||||
Uses google-genai SDK with structured JSON output enforcement.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -117,34 +117,34 @@ Return the vehicle's make, model, trim level, body type, drive type, fuel type,
|
||||
"""
|
||||
|
||||
_VIN_DECODE_SCHEMA: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"type": "OBJECT",
|
||||
"properties": {
|
||||
"year": {"type": "integer", "nullable": True},
|
||||
"make": {"type": "string", "nullable": True},
|
||||
"model": {"type": "string", "nullable": True},
|
||||
"trimLevel": {"type": "string", "nullable": True},
|
||||
"bodyType": {"type": "string", "nullable": True},
|
||||
"driveType": {"type": "string", "nullable": True},
|
||||
"fuelType": {"type": "string", "nullable": True},
|
||||
"engine": {"type": "string", "nullable": True},
|
||||
"transmission": {"type": "string", "nullable": True},
|
||||
"confidence": {"type": "number"},
|
||||
"year": {"type": "INTEGER", "nullable": True},
|
||||
"make": {"type": "STRING", "nullable": True},
|
||||
"model": {"type": "STRING", "nullable": True},
|
||||
"trimLevel": {"type": "STRING", "nullable": True},
|
||||
"bodyType": {"type": "STRING", "nullable": True},
|
||||
"driveType": {"type": "STRING", "nullable": True},
|
||||
"fuelType": {"type": "STRING", "nullable": True},
|
||||
"engine": {"type": "STRING", "nullable": True},
|
||||
"transmission": {"type": "STRING", "nullable": True},
|
||||
"confidence": {"type": "NUMBER"},
|
||||
},
|
||||
"required": ["confidence"],
|
||||
}
|
||||
|
||||
_RESPONSE_SCHEMA: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"type": "OBJECT",
|
||||
"properties": {
|
||||
"maintenanceSchedule": {
|
||||
"type": "array",
|
||||
"type": "ARRAY",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"type": "OBJECT",
|
||||
"properties": {
|
||||
"serviceName": {"type": "string"},
|
||||
"intervalMiles": {"type": "number", "nullable": True},
|
||||
"intervalMonths": {"type": "number", "nullable": True},
|
||||
"details": {"type": "string", "nullable": True},
|
||||
"serviceName": {"type": "STRING"},
|
||||
"intervalMiles": {"type": "NUMBER", "nullable": True},
|
||||
"intervalMonths": {"type": "NUMBER", "nullable": True},
|
||||
"details": {"type": "STRING", "nullable": True},
|
||||
},
|
||||
"required": ["serviceName"],
|
||||
},
|
||||
@@ -206,20 +206,21 @@ class GeminiEngine:
|
||||
Standalone class (not an OcrEngine subclass) because Gemini performs
|
||||
semantic document understanding rather than traditional OCR.
|
||||
|
||||
Uses lazy initialization: the Vertex AI client is not created until
|
||||
Uses lazy initialization: the Gemini client is not created until
|
||||
the first call to ``extract_maintenance()`` or ``decode_vin()``.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._model: Any | None = None
|
||||
self._client: Any | None = None
|
||||
self._model_name: str = ""
|
||||
|
||||
def _get_model(self) -> Any:
|
||||
"""Create the GenerativeModel on first use.
|
||||
def _get_client(self) -> Any:
|
||||
"""Create the genai.Client on first use.
|
||||
|
||||
Authentication uses the same WIF credential path as Google Vision.
|
||||
"""
|
||||
if self._model is not None:
|
||||
return self._model
|
||||
if self._client is not None:
|
||||
return self._client
|
||||
|
||||
key_path = settings.google_vision_key_path
|
||||
if not os.path.isfile(key_path):
|
||||
@@ -229,46 +230,37 @@ class GeminiEngine:
|
||||
)
|
||||
|
||||
try:
|
||||
from google.cloud import aiplatform # type: ignore[import-untyped]
|
||||
from vertexai.generative_models import ( # type: ignore[import-untyped]
|
||||
GenerationConfig,
|
||||
GenerativeModel,
|
||||
)
|
||||
from google import genai # type: ignore[import-untyped]
|
||||
|
||||
# Point ADC at the WIF credential config
|
||||
# Point ADC at the WIF credential config (must be set BEFORE Client construction)
|
||||
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
|
||||
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
|
||||
|
||||
aiplatform.init(
|
||||
self._client = genai.Client(
|
||||
vertexai=True,
|
||||
project=settings.vertex_ai_project,
|
||||
location=settings.vertex_ai_location,
|
||||
)
|
||||
|
||||
model_name = settings.gemini_model
|
||||
self._model = GenerativeModel(model_name)
|
||||
self._generation_config = GenerationConfig(
|
||||
response_mime_type="application/json",
|
||||
response_schema=_RESPONSE_SCHEMA,
|
||||
)
|
||||
self._model_name = settings.gemini_model
|
||||
|
||||
logger.info(
|
||||
"Gemini engine initialized (model=%s, project=%s, location=%s)",
|
||||
model_name,
|
||||
self._model_name,
|
||||
settings.vertex_ai_project,
|
||||
settings.vertex_ai_location,
|
||||
)
|
||||
return self._model
|
||||
return self._client
|
||||
|
||||
except ImportError as exc:
|
||||
logger.exception("Vertex AI SDK import failed")
|
||||
logger.exception("google-genai SDK import failed")
|
||||
raise GeminiUnavailableError(
|
||||
"google-cloud-aiplatform is not installed. "
|
||||
"Install with: pip install google-cloud-aiplatform"
|
||||
"google-genai is not installed. "
|
||||
"Install with: pip install google-genai"
|
||||
) from exc
|
||||
except Exception as exc:
|
||||
logger.exception("Vertex AI authentication failed")
|
||||
logger.exception("Gemini authentication failed: %s", type(exc).__name__)
|
||||
raise GeminiUnavailableError(
|
||||
f"Vertex AI authentication failed: {exc}"
|
||||
f"Gemini authentication failed: {exc}"
|
||||
) from exc
|
||||
|
||||
def extract_maintenance(
|
||||
@@ -293,19 +285,23 @@ class GeminiEngine:
|
||||
"inline processing. Upload to GCS and use a gs:// URI instead."
|
||||
)
|
||||
|
||||
model = self._get_model()
|
||||
client = self._get_client()
|
||||
|
||||
try:
|
||||
from vertexai.generative_models import Part # type: ignore[import-untyped]
|
||||
from google.genai import types # type: ignore[import-untyped]
|
||||
|
||||
pdf_part = Part.from_data(
|
||||
pdf_part = types.Part.from_bytes(
|
||||
data=pdf_bytes,
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
response = model.generate_content(
|
||||
[pdf_part, _EXTRACTION_PROMPT],
|
||||
generation_config=self._generation_config,
|
||||
response = client.models.generate_content(
|
||||
model=self._model_name,
|
||||
contents=[pdf_part, _EXTRACTION_PROMPT],
|
||||
config=types.GenerateContentConfig(
|
||||
response_mime_type="application/json",
|
||||
response_schema=_RESPONSE_SCHEMA,
|
||||
),
|
||||
)
|
||||
|
||||
raw = json.loads(response.text)
|
||||
@@ -358,7 +354,7 @@ class GeminiEngine:
|
||||
GeminiProcessingError: If Gemini fails to decode the VIN.
|
||||
GeminiUnavailableError: If the engine cannot be initialized.
|
||||
"""
|
||||
model = self._get_model()
|
||||
client = self._get_client()
|
||||
|
||||
# Resolve year deterministically from VIN structure
|
||||
resolved_year = resolve_vin_year(vin)
|
||||
@@ -371,21 +367,21 @@ class GeminiEngine:
|
||||
)
|
||||
|
||||
try:
|
||||
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
|
||||
|
||||
vin_config = GenerationConfig(
|
||||
response_mime_type="application/json",
|
||||
response_schema=_VIN_DECODE_SCHEMA,
|
||||
)
|
||||
from google.genai import types # type: ignore[import-untyped]
|
||||
|
||||
prompt = _VIN_DECODE_PROMPT.format(
|
||||
vin=vin,
|
||||
year=resolved_year or "unknown",
|
||||
year_code=year_code,
|
||||
)
|
||||
response = model.generate_content(
|
||||
[prompt],
|
||||
generation_config=vin_config,
|
||||
response = client.models.generate_content(
|
||||
model=self._model_name,
|
||||
contents=[prompt],
|
||||
config=types.GenerateContentConfig(
|
||||
response_mime_type="application/json",
|
||||
response_schema=_VIN_DECODE_SCHEMA,
|
||||
tools=[types.Tool(google_search=types.GoogleSearch())],
|
||||
),
|
||||
)
|
||||
|
||||
raw = json.loads(response.text)
|
||||
|
||||
Reference in New Issue
Block a user