feat: Migrate Gemini SDK to google-genai (#231) #236

Merged
egullickson merged 8 commits from issue-231-migrate-gemini-sdk-google-genai into main 2026-03-01 04:08:11 +00:00
2 changed files with 59 additions and 63 deletions
Showing only changes of commit b7f472b3e8 - Show all commits

View File

@@ -29,7 +29,7 @@ class Settings:
os.getenv("VISION_MONTHLY_LIMIT", "1000")
)
# Vertex AI / Gemini configuration
# Google GenAI / Gemini configuration
self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "")
self.vertex_ai_location: str = os.getenv(
"VERTEX_AI_LOCATION", "global"

View File

@@ -2,7 +2,7 @@
Standalone module (does NOT extend OcrEngine) because Gemini performs
semantic document understanding, not traditional OCR word-box extraction.
Uses Vertex AI SDK with structured JSON output enforcement.
Uses google-genai SDK with structured JSON output enforcement.
"""
import json
@@ -117,34 +117,34 @@ Return the vehicle's make, model, trim level, body type, drive type, fuel type,
"""
_VIN_DECODE_SCHEMA: dict[str, Any] = {
"type": "object",
"type": "OBJECT",
"properties": {
"year": {"type": "integer", "nullable": True},
"make": {"type": "string", "nullable": True},
"model": {"type": "string", "nullable": True},
"trimLevel": {"type": "string", "nullable": True},
"bodyType": {"type": "string", "nullable": True},
"driveType": {"type": "string", "nullable": True},
"fuelType": {"type": "string", "nullable": True},
"engine": {"type": "string", "nullable": True},
"transmission": {"type": "string", "nullable": True},
"confidence": {"type": "number"},
"year": {"type": "INTEGER", "nullable": True},
"make": {"type": "STRING", "nullable": True},
"model": {"type": "STRING", "nullable": True},
"trimLevel": {"type": "STRING", "nullable": True},
"bodyType": {"type": "STRING", "nullable": True},
"driveType": {"type": "STRING", "nullable": True},
"fuelType": {"type": "STRING", "nullable": True},
"engine": {"type": "STRING", "nullable": True},
"transmission": {"type": "STRING", "nullable": True},
"confidence": {"type": "NUMBER"},
},
"required": ["confidence"],
}
_RESPONSE_SCHEMA: dict[str, Any] = {
"type": "object",
"type": "OBJECT",
"properties": {
"maintenanceSchedule": {
"type": "array",
"type": "ARRAY",
"items": {
"type": "object",
"type": "OBJECT",
"properties": {
"serviceName": {"type": "string"},
"intervalMiles": {"type": "number", "nullable": True},
"intervalMonths": {"type": "number", "nullable": True},
"details": {"type": "string", "nullable": True},
"serviceName": {"type": "STRING"},
"intervalMiles": {"type": "NUMBER", "nullable": True},
"intervalMonths": {"type": "NUMBER", "nullable": True},
"details": {"type": "STRING", "nullable": True},
},
"required": ["serviceName"],
},
@@ -206,20 +206,21 @@ class GeminiEngine:
Standalone class (not an OcrEngine subclass) because Gemini performs
semantic document understanding rather than traditional OCR.
Uses lazy initialization: the Vertex AI client is not created until
Uses lazy initialization: the Gemini client is not created until
the first call to ``extract_maintenance()`` or ``decode_vin()``.
"""
def __init__(self) -> None:
self._model: Any | None = None
self._client: Any | None = None
self._model_name: str = ""
def _get_model(self) -> Any:
"""Create the GenerativeModel on first use.
def _get_client(self) -> Any:
"""Create the genai.Client on first use.
Authentication uses the same WIF credential path as Google Vision.
"""
if self._model is not None:
return self._model
if self._client is not None:
return self._client
key_path = settings.google_vision_key_path
if not os.path.isfile(key_path):
@@ -229,46 +230,37 @@ class GeminiEngine:
)
try:
from google.cloud import aiplatform # type: ignore[import-untyped]
from vertexai.generative_models import ( # type: ignore[import-untyped]
GenerationConfig,
GenerativeModel,
)
from google import genai # type: ignore[import-untyped]
# Point ADC at the WIF credential config
# Point ADC at the WIF credential config (must be set BEFORE Client construction)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
aiplatform.init(
self._client = genai.Client(
vertexai=True,
project=settings.vertex_ai_project,
location=settings.vertex_ai_location,
)
model_name = settings.gemini_model
self._model = GenerativeModel(model_name)
self._generation_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_RESPONSE_SCHEMA,
)
self._model_name = settings.gemini_model
logger.info(
"Gemini engine initialized (model=%s, project=%s, location=%s)",
model_name,
self._model_name,
settings.vertex_ai_project,
settings.vertex_ai_location,
)
return self._model
return self._client
except ImportError as exc:
logger.exception("Vertex AI SDK import failed")
logger.exception("google-genai SDK import failed")
raise GeminiUnavailableError(
"google-cloud-aiplatform is not installed. "
"Install with: pip install google-cloud-aiplatform"
"google-genai is not installed. "
"Install with: pip install google-genai"
) from exc
except Exception as exc:
logger.exception("Vertex AI authentication failed")
logger.exception("Gemini authentication failed: %s", type(exc).__name__)
raise GeminiUnavailableError(
f"Vertex AI authentication failed: {exc}"
f"Gemini authentication failed: {exc}"
) from exc
def extract_maintenance(
@@ -293,19 +285,23 @@ class GeminiEngine:
"inline processing. Upload to GCS and use a gs:// URI instead."
)
model = self._get_model()
client = self._get_client()
try:
from vertexai.generative_models import Part # type: ignore[import-untyped]
from google.genai import types # type: ignore[import-untyped]
pdf_part = Part.from_data(
pdf_part = types.Part.from_bytes(
data=pdf_bytes,
mime_type="application/pdf",
)
response = model.generate_content(
[pdf_part, _EXTRACTION_PROMPT],
generation_config=self._generation_config,
response = client.models.generate_content(
model=self._model_name,
contents=[pdf_part, _EXTRACTION_PROMPT],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_RESPONSE_SCHEMA,
),
)
raw = json.loads(response.text)
@@ -358,7 +354,7 @@ class GeminiEngine:
GeminiProcessingError: If Gemini fails to decode the VIN.
GeminiUnavailableError: If the engine cannot be initialized.
"""
model = self._get_model()
client = self._get_client()
# Resolve year deterministically from VIN structure
resolved_year = resolve_vin_year(vin)
@@ -371,21 +367,21 @@ class GeminiEngine:
)
try:
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
vin_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_VIN_DECODE_SCHEMA,
)
from google.genai import types # type: ignore[import-untyped]
prompt = _VIN_DECODE_PROMPT.format(
vin=vin,
year=resolved_year or "unknown",
year_code=year_code,
)
response = model.generate_content(
[prompt],
generation_config=vin_config,
response = client.models.generate_content(
model=self._model_name,
contents=[prompt],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_VIN_DECODE_SCHEMA,
tools=[types.Tool(google_search=types.GoogleSearch())],
),
)
raw = json.loads(response.text)