feat: migrate GeminiEngine to google-genai SDK with Google Search grounding (refs #233)
Replace vertexai.generative_models with google.genai client pattern. Add Google Search grounding tool to VIN decode for improved accuracy. Convert response schema types to uppercase per Vertex AI Schema spec. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -29,7 +29,7 @@ class Settings:
|
|||||||
os.getenv("VISION_MONTHLY_LIMIT", "1000")
|
os.getenv("VISION_MONTHLY_LIMIT", "1000")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Vertex AI / Gemini configuration
|
# Google GenAI / Gemini configuration
|
||||||
self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "")
|
self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "")
|
||||||
self.vertex_ai_location: str = os.getenv(
|
self.vertex_ai_location: str = os.getenv(
|
||||||
"VERTEX_AI_LOCATION", "global"
|
"VERTEX_AI_LOCATION", "global"
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
Standalone module (does NOT extend OcrEngine) because Gemini performs
|
Standalone module (does NOT extend OcrEngine) because Gemini performs
|
||||||
semantic document understanding, not traditional OCR word-box extraction.
|
semantic document understanding, not traditional OCR word-box extraction.
|
||||||
Uses Vertex AI SDK with structured JSON output enforcement.
|
Uses google-genai SDK with structured JSON output enforcement.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
@@ -117,34 +117,34 @@ Return the vehicle's make, model, trim level, body type, drive type, fuel type,
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
_VIN_DECODE_SCHEMA: dict[str, Any] = {
|
_VIN_DECODE_SCHEMA: dict[str, Any] = {
|
||||||
"type": "object",
|
"type": "OBJECT",
|
||||||
"properties": {
|
"properties": {
|
||||||
"year": {"type": "integer", "nullable": True},
|
"year": {"type": "INTEGER", "nullable": True},
|
||||||
"make": {"type": "string", "nullable": True},
|
"make": {"type": "STRING", "nullable": True},
|
||||||
"model": {"type": "string", "nullable": True},
|
"model": {"type": "STRING", "nullable": True},
|
||||||
"trimLevel": {"type": "string", "nullable": True},
|
"trimLevel": {"type": "STRING", "nullable": True},
|
||||||
"bodyType": {"type": "string", "nullable": True},
|
"bodyType": {"type": "STRING", "nullable": True},
|
||||||
"driveType": {"type": "string", "nullable": True},
|
"driveType": {"type": "STRING", "nullable": True},
|
||||||
"fuelType": {"type": "string", "nullable": True},
|
"fuelType": {"type": "STRING", "nullable": True},
|
||||||
"engine": {"type": "string", "nullable": True},
|
"engine": {"type": "STRING", "nullable": True},
|
||||||
"transmission": {"type": "string", "nullable": True},
|
"transmission": {"type": "STRING", "nullable": True},
|
||||||
"confidence": {"type": "number"},
|
"confidence": {"type": "NUMBER"},
|
||||||
},
|
},
|
||||||
"required": ["confidence"],
|
"required": ["confidence"],
|
||||||
}
|
}
|
||||||
|
|
||||||
_RESPONSE_SCHEMA: dict[str, Any] = {
|
_RESPONSE_SCHEMA: dict[str, Any] = {
|
||||||
"type": "object",
|
"type": "OBJECT",
|
||||||
"properties": {
|
"properties": {
|
||||||
"maintenanceSchedule": {
|
"maintenanceSchedule": {
|
||||||
"type": "array",
|
"type": "ARRAY",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "object",
|
"type": "OBJECT",
|
||||||
"properties": {
|
"properties": {
|
||||||
"serviceName": {"type": "string"},
|
"serviceName": {"type": "STRING"},
|
||||||
"intervalMiles": {"type": "number", "nullable": True},
|
"intervalMiles": {"type": "NUMBER", "nullable": True},
|
||||||
"intervalMonths": {"type": "number", "nullable": True},
|
"intervalMonths": {"type": "NUMBER", "nullable": True},
|
||||||
"details": {"type": "string", "nullable": True},
|
"details": {"type": "STRING", "nullable": True},
|
||||||
},
|
},
|
||||||
"required": ["serviceName"],
|
"required": ["serviceName"],
|
||||||
},
|
},
|
||||||
@@ -206,20 +206,21 @@ class GeminiEngine:
|
|||||||
Standalone class (not an OcrEngine subclass) because Gemini performs
|
Standalone class (not an OcrEngine subclass) because Gemini performs
|
||||||
semantic document understanding rather than traditional OCR.
|
semantic document understanding rather than traditional OCR.
|
||||||
|
|
||||||
Uses lazy initialization: the Vertex AI client is not created until
|
Uses lazy initialization: the Gemini client is not created until
|
||||||
the first call to ``extract_maintenance()`` or ``decode_vin()``.
|
the first call to ``extract_maintenance()`` or ``decode_vin()``.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self._model: Any | None = None
|
self._client: Any | None = None
|
||||||
|
self._model_name: str = ""
|
||||||
|
|
||||||
def _get_model(self) -> Any:
|
def _get_client(self) -> Any:
|
||||||
"""Create the GenerativeModel on first use.
|
"""Create the genai.Client on first use.
|
||||||
|
|
||||||
Authentication uses the same WIF credential path as Google Vision.
|
Authentication uses the same WIF credential path as Google Vision.
|
||||||
"""
|
"""
|
||||||
if self._model is not None:
|
if self._client is not None:
|
||||||
return self._model
|
return self._client
|
||||||
|
|
||||||
key_path = settings.google_vision_key_path
|
key_path = settings.google_vision_key_path
|
||||||
if not os.path.isfile(key_path):
|
if not os.path.isfile(key_path):
|
||||||
@@ -229,46 +230,37 @@ class GeminiEngine:
|
|||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from google.cloud import aiplatform # type: ignore[import-untyped]
|
from google import genai # type: ignore[import-untyped]
|
||||||
from vertexai.generative_models import ( # type: ignore[import-untyped]
|
|
||||||
GenerationConfig,
|
|
||||||
GenerativeModel,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Point ADC at the WIF credential config
|
# Point ADC at the WIF credential config (must be set BEFORE Client construction)
|
||||||
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
|
||||||
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
|
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
|
||||||
|
|
||||||
aiplatform.init(
|
self._client = genai.Client(
|
||||||
|
vertexai=True,
|
||||||
project=settings.vertex_ai_project,
|
project=settings.vertex_ai_project,
|
||||||
location=settings.vertex_ai_location,
|
location=settings.vertex_ai_location,
|
||||||
)
|
)
|
||||||
|
self._model_name = settings.gemini_model
|
||||||
model_name = settings.gemini_model
|
|
||||||
self._model = GenerativeModel(model_name)
|
|
||||||
self._generation_config = GenerationConfig(
|
|
||||||
response_mime_type="application/json",
|
|
||||||
response_schema=_RESPONSE_SCHEMA,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Gemini engine initialized (model=%s, project=%s, location=%s)",
|
"Gemini engine initialized (model=%s, project=%s, location=%s)",
|
||||||
model_name,
|
self._model_name,
|
||||||
settings.vertex_ai_project,
|
settings.vertex_ai_project,
|
||||||
settings.vertex_ai_location,
|
settings.vertex_ai_location,
|
||||||
)
|
)
|
||||||
return self._model
|
return self._client
|
||||||
|
|
||||||
except ImportError as exc:
|
except ImportError as exc:
|
||||||
logger.exception("Vertex AI SDK import failed")
|
logger.exception("google-genai SDK import failed")
|
||||||
raise GeminiUnavailableError(
|
raise GeminiUnavailableError(
|
||||||
"google-cloud-aiplatform is not installed. "
|
"google-genai is not installed. "
|
||||||
"Install with: pip install google-cloud-aiplatform"
|
"Install with: pip install google-genai"
|
||||||
) from exc
|
) from exc
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.exception("Vertex AI authentication failed")
|
logger.exception("Gemini authentication failed: %s", type(exc).__name__)
|
||||||
raise GeminiUnavailableError(
|
raise GeminiUnavailableError(
|
||||||
f"Vertex AI authentication failed: {exc}"
|
f"Gemini authentication failed: {exc}"
|
||||||
) from exc
|
) from exc
|
||||||
|
|
||||||
def extract_maintenance(
|
def extract_maintenance(
|
||||||
@@ -293,19 +285,23 @@ class GeminiEngine:
|
|||||||
"inline processing. Upload to GCS and use a gs:// URI instead."
|
"inline processing. Upload to GCS and use a gs:// URI instead."
|
||||||
)
|
)
|
||||||
|
|
||||||
model = self._get_model()
|
client = self._get_client()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from vertexai.generative_models import Part # type: ignore[import-untyped]
|
from google.genai import types # type: ignore[import-untyped]
|
||||||
|
|
||||||
pdf_part = Part.from_data(
|
pdf_part = types.Part.from_bytes(
|
||||||
data=pdf_bytes,
|
data=pdf_bytes,
|
||||||
mime_type="application/pdf",
|
mime_type="application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
response = model.generate_content(
|
response = client.models.generate_content(
|
||||||
[pdf_part, _EXTRACTION_PROMPT],
|
model=self._model_name,
|
||||||
generation_config=self._generation_config,
|
contents=[pdf_part, _EXTRACTION_PROMPT],
|
||||||
|
config=types.GenerateContentConfig(
|
||||||
|
response_mime_type="application/json",
|
||||||
|
response_schema=_RESPONSE_SCHEMA,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
raw = json.loads(response.text)
|
raw = json.loads(response.text)
|
||||||
@@ -358,7 +354,7 @@ class GeminiEngine:
|
|||||||
GeminiProcessingError: If Gemini fails to decode the VIN.
|
GeminiProcessingError: If Gemini fails to decode the VIN.
|
||||||
GeminiUnavailableError: If the engine cannot be initialized.
|
GeminiUnavailableError: If the engine cannot be initialized.
|
||||||
"""
|
"""
|
||||||
model = self._get_model()
|
client = self._get_client()
|
||||||
|
|
||||||
# Resolve year deterministically from VIN structure
|
# Resolve year deterministically from VIN structure
|
||||||
resolved_year = resolve_vin_year(vin)
|
resolved_year = resolve_vin_year(vin)
|
||||||
@@ -371,21 +367,21 @@ class GeminiEngine:
|
|||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
|
from google.genai import types # type: ignore[import-untyped]
|
||||||
|
|
||||||
vin_config = GenerationConfig(
|
|
||||||
response_mime_type="application/json",
|
|
||||||
response_schema=_VIN_DECODE_SCHEMA,
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = _VIN_DECODE_PROMPT.format(
|
prompt = _VIN_DECODE_PROMPT.format(
|
||||||
vin=vin,
|
vin=vin,
|
||||||
year=resolved_year or "unknown",
|
year=resolved_year or "unknown",
|
||||||
year_code=year_code,
|
year_code=year_code,
|
||||||
)
|
)
|
||||||
response = model.generate_content(
|
response = client.models.generate_content(
|
||||||
[prompt],
|
model=self._model_name,
|
||||||
generation_config=vin_config,
|
contents=[prompt],
|
||||||
|
config=types.GenerateContentConfig(
|
||||||
|
response_mime_type="application/json",
|
||||||
|
response_schema=_VIN_DECODE_SCHEMA,
|
||||||
|
tools=[types.Tool(google_search=types.GoogleSearch())],
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
raw = json.loads(response.text)
|
raw = json.loads(response.text)
|
||||||
|
|||||||
Reference in New Issue
Block a user