feat: Migrate Gemini SDK to google-genai (#231) #236

Merged
egullickson merged 8 commits from issue-231-migrate-gemini-sdk-google-genai into main 2026-03-01 04:08:11 +00:00
11 changed files with 227 additions and 214 deletions

View File

@@ -416,7 +416,10 @@ export class VehiclesController {
userId,
hasYear: !!decodedData.year.value,
hasMake: !!decodedData.make.value,
hasModel: !!decodedData.model.value
hasModel: !!decodedData.model.value,
hasTrim: !!decodedData.trimLevel.value,
hasEngine: !!decodedData.engine.value,
hasTransmission: !!decodedData.transmission.value,
});
return reply.code(200).send(decodedData);

View File

@@ -679,7 +679,8 @@ export class VehiclesService {
logger.debug('VIN decode raw values', {
vin: response.vin,
year: sourceYear, make: sourceMake, model: sourceModel,
trim: sourceTrim, confidence: response.confidence
trim: sourceTrim, engine: sourceEngine, transmission: sourceTransmission,
confidence: response.confidence
});
// Year is always high confidence if present (exact numeric match)

View File

@@ -87,7 +87,7 @@ export const vehiclesApi = {
*/
decodeVin: async (vin: string): Promise<DecodedVehicleData> => {
const response = await apiClient.post('/vehicles/decode-vin', { vin }, {
timeout: 60000 // 60 seconds for Gemini cold start
timeout: 120000 // 120 seconds for Gemini + Google Search grounding
});
return response.data;
}

View File

@@ -7,7 +7,7 @@ Python OCR microservice (FastAPI). Primary engine: PaddleOCR PP-OCRv4 with optio
| File | What | When to read |
| ---- | ---- | ------------ |
| `main.py` | FastAPI application entry point | Route registration, app setup |
| `config.py` | Configuration settings (OCR engines, Vertex AI, Redis, Vision API limits) | Environment variables, settings |
| `config.py` | Configuration settings (OCR engines, Google GenAI, Redis, Vision API limits) | Environment variables, settings |
| `__init__.py` | Package init | Package structure |
## Subdirectories

View File

@@ -29,7 +29,7 @@ class Settings:
os.getenv("VISION_MONTHLY_LIMIT", "1000")
)
# Vertex AI / Gemini configuration
# Google GenAI / Gemini configuration
self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "")
self.vertex_ai_location: str = os.getenv(
"VERTEX_AI_LOCATION", "global"

View File

@@ -3,7 +3,7 @@
OCR engine abstraction layer. Two categories of engines:
1. **OcrEngine subclasses** (image-to-text): PaddleOCR, Google Vision, Hybrid. Accept image bytes, return text + confidence + word boxes.
2. **GeminiEngine** (PDF-to-structured-data and VIN decode): Standalone module for maintenance schedule extraction and VIN decoding via Vertex AI. Accepts PDF bytes or VIN strings, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ.
2. **GeminiEngine** (PDF-to-structured-data and VIN decode): Standalone module for maintenance schedule extraction and VIN decoding via google-genai SDK. Accepts PDF bytes or VIN strings, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ.
## Files
@@ -15,7 +15,7 @@ OCR engine abstraction layer. Two categories of engines:
| `cloud_engine.py` | Google Vision TEXT_DETECTION fallback engine (WIF authentication) | Cloud OCR configuration, API quota |
| `hybrid_engine.py` | Combines primary + fallback engine with confidence threshold switching | Engine selection logic, fallback behavior |
| `engine_factory.py` | Factory function and engine registry for instantiation | Adding new engine types |
| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction and VIN decoding (Vertex AI SDK, 20MB PDF limit, structured JSON output) | Manual extraction debugging, VIN decode, Gemini configuration |
| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction and VIN decoding (google-genai SDK, 20MB PDF limit, structured JSON output, Google Search grounding for VIN decode) | Manual extraction debugging, VIN decode, Gemini configuration |
## Engine Selection

View File

@@ -2,7 +2,7 @@
Standalone module (does NOT extend OcrEngine) because Gemini performs
semantic document understanding, not traditional OCR word-box extraction.
Uses Vertex AI SDK with structured JSON output enforcement.
Uses google-genai SDK with structured JSON output enforcement.
"""
import json
@@ -40,8 +40,9 @@ Return the results as a JSON object with a single "maintenanceSchedule" array.\
# VIN year code lookup: position 10 character -> base year (first cycle, 1980-2009).
# The 30-year cycle repeats: +30 for 2010-2039, +60 for 2040-2069.
# Disambiguation uses position 7: numeric -> 2010+ cycle, alphabetic -> 1980s cycle.
# For the 2040+ cycle (when position 7 is alphabetic again), we pick the most
# Disambiguation uses position 7: alphabetic -> 2010+ cycle, numeric -> 1980s cycle.
# Per NHTSA FMVSS No. 115: MY2010+ vehicles must use alphabetic position 7.
# For the 2040+ cycle (when position 7 is numeric again), we pick the most
# recent plausible year (not more than 2 years in the future).
_VIN_YEAR_CODES: dict[str, int] = {
"A": 1980, "B": 1981, "C": 1982, "D": 1983, "E": 1984,
@@ -58,10 +59,10 @@ def resolve_vin_year(vin: str) -> int | None:
"""Deterministically resolve model year from VIN positions 7 and 10.
VIN year codes repeat on a 30-year cycle. Position 7 disambiguates:
- Numeric position 7 -> 2010-2039 cycle
- Alphabetic position 7 -> 1980-2009 or 2040-2050+ cycle
- Alphabetic position 7 -> 2010-2039 cycle (NHTSA MY2010+ requirement)
- Numeric position 7 -> 1980-2009 or 2040-2069 cycle
For the alphabetic case with three possible cycles, picks the most recent
For the numeric case with two possible cycles, picks the most recent
year that is not more than 2 years in the future.
Returns None if the VIN is too short or position 10 is not a valid year code.
@@ -76,11 +77,11 @@ def resolve_vin_year(vin: str) -> int | None:
if base_year is None:
return None
if pos7.isdigit():
# Numeric position 7 -> second cycle (2010-2039)
if pos7.isalpha():
# Alphabetic position 7 -> second cycle (2010-2039)
return base_year + 30
# Alphabetic position 7 -> first cycle (1980-2009) or third cycle (2040-2069)
# Numeric position 7 -> first cycle (1980-2009) or third cycle (2040-2069)
# Pick the most recent plausible year
max_plausible = datetime.now().year + 2
@@ -117,34 +118,34 @@ Return the vehicle's make, model, trim level, body type, drive type, fuel type,
"""
_VIN_DECODE_SCHEMA: dict[str, Any] = {
"type": "object",
"type": "OBJECT",
"properties": {
"year": {"type": "integer", "nullable": True},
"make": {"type": "string", "nullable": True},
"model": {"type": "string", "nullable": True},
"trimLevel": {"type": "string", "nullable": True},
"bodyType": {"type": "string", "nullable": True},
"driveType": {"type": "string", "nullable": True},
"fuelType": {"type": "string", "nullable": True},
"engine": {"type": "string", "nullable": True},
"transmission": {"type": "string", "nullable": True},
"confidence": {"type": "number"},
"year": {"type": "INTEGER", "nullable": True},
"make": {"type": "STRING", "nullable": True},
"model": {"type": "STRING", "nullable": True},
"trimLevel": {"type": "STRING", "nullable": True},
"bodyType": {"type": "STRING", "nullable": True},
"driveType": {"type": "STRING", "nullable": True},
"fuelType": {"type": "STRING", "nullable": True},
"engine": {"type": "STRING", "nullable": True},
"transmission": {"type": "STRING", "nullable": True},
"confidence": {"type": "NUMBER"},
},
"required": ["confidence"],
}
_RESPONSE_SCHEMA: dict[str, Any] = {
"type": "object",
"type": "OBJECT",
"properties": {
"maintenanceSchedule": {
"type": "array",
"type": "ARRAY",
"items": {
"type": "object",
"type": "OBJECT",
"properties": {
"serviceName": {"type": "string"},
"intervalMiles": {"type": "number", "nullable": True},
"intervalMonths": {"type": "number", "nullable": True},
"details": {"type": "string", "nullable": True},
"serviceName": {"type": "STRING"},
"intervalMiles": {"type": "NUMBER", "nullable": True},
"intervalMonths": {"type": "NUMBER", "nullable": True},
"details": {"type": "STRING", "nullable": True},
},
"required": ["serviceName"],
},
@@ -206,20 +207,21 @@ class GeminiEngine:
Standalone class (not an OcrEngine subclass) because Gemini performs
semantic document understanding rather than traditional OCR.
Uses lazy initialization: the Vertex AI client is not created until
Uses lazy initialization: the Gemini client is not created until
the first call to ``extract_maintenance()`` or ``decode_vin()``.
"""
def __init__(self) -> None:
self._model: Any | None = None
self._client: Any | None = None
self._model_name: str = ""
def _get_model(self) -> Any:
"""Create the GenerativeModel on first use.
def _get_client(self) -> Any:
"""Create the genai.Client on first use.
Authentication uses the same WIF credential path as Google Vision.
"""
if self._model is not None:
return self._model
if self._client is not None:
return self._client
key_path = settings.google_vision_key_path
if not os.path.isfile(key_path):
@@ -229,46 +231,37 @@ class GeminiEngine:
)
try:
from google.cloud import aiplatform # type: ignore[import-untyped]
from vertexai.generative_models import ( # type: ignore[import-untyped]
GenerationConfig,
GenerativeModel,
)
from google import genai # type: ignore[import-untyped]
# Point ADC at the WIF credential config
# Point ADC at the WIF credential config (must be set BEFORE Client construction)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
aiplatform.init(
self._client = genai.Client(
vertexai=True,
project=settings.vertex_ai_project,
location=settings.vertex_ai_location,
)
model_name = settings.gemini_model
self._model = GenerativeModel(model_name)
self._generation_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_RESPONSE_SCHEMA,
)
self._model_name = settings.gemini_model
logger.info(
"Gemini engine initialized (model=%s, project=%s, location=%s)",
model_name,
self._model_name,
settings.vertex_ai_project,
settings.vertex_ai_location,
)
return self._model
return self._client
except ImportError as exc:
logger.exception("Vertex AI SDK import failed")
logger.exception("google-genai SDK import failed")
raise GeminiUnavailableError(
"google-cloud-aiplatform is not installed. "
"Install with: pip install google-cloud-aiplatform"
"google-genai is not installed. "
"Install with: pip install google-genai"
) from exc
except Exception as exc:
logger.exception("Vertex AI authentication failed")
logger.exception("Gemini authentication failed: %s", type(exc).__name__)
raise GeminiUnavailableError(
f"Vertex AI authentication failed: {exc}"
f"Gemini authentication failed: {exc}"
) from exc
def extract_maintenance(
@@ -293,19 +286,23 @@ class GeminiEngine:
"inline processing. Upload to GCS and use a gs:// URI instead."
)
model = self._get_model()
client = self._get_client()
try:
from vertexai.generative_models import Part # type: ignore[import-untyped]
from google.genai import types # type: ignore[import-untyped]
pdf_part = Part.from_data(
pdf_part = types.Part.from_bytes(
data=pdf_bytes,
mime_type="application/pdf",
)
response = model.generate_content(
[pdf_part, _EXTRACTION_PROMPT],
generation_config=self._generation_config,
response = client.models.generate_content(
model=self._model_name,
contents=[pdf_part, _EXTRACTION_PROMPT],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_RESPONSE_SCHEMA,
),
)
raw = json.loads(response.text)
@@ -358,7 +355,7 @@ class GeminiEngine:
GeminiProcessingError: If Gemini fails to decode the VIN.
GeminiUnavailableError: If the engine cannot be initialized.
"""
model = self._get_model()
client = self._get_client()
# Resolve year deterministically from VIN structure
resolved_year = resolve_vin_year(vin)
@@ -371,21 +368,21 @@ class GeminiEngine:
)
try:
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
vin_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_VIN_DECODE_SCHEMA,
)
from google.genai import types # type: ignore[import-untyped]
prompt = _VIN_DECODE_PROMPT.format(
vin=vin,
year=resolved_year or "unknown",
year_code=year_code,
)
response = model.generate_content(
[prompt],
generation_config=vin_config,
response = client.models.generate_content(
model=self._model_name,
contents=[prompt],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_VIN_DECODE_SCHEMA,
tools=[types.Tool(google_search=types.GoogleSearch())],
),
)
raw = json.loads(response.text)
@@ -401,7 +398,12 @@ class GeminiEngine:
vin,
)
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
logger.info(
"Gemini decoded VIN %s (confidence=%.2f) raw=%s",
vin,
raw.get("confidence", 0),
json.dumps(raw, default=str),
)
return VinDecodeResult(
year=resolved_year if resolved_year else raw.get("year"),

View File

@@ -14,6 +14,7 @@ import time
from typing import Any, Optional
from app.config import settings
from app.engines.gemini_engine import GeminiUnavailableError
from app.extractors.receipt_extractor import (
ExtractedField,
ReceiptExtractionResult,
@@ -54,16 +55,16 @@ OCR Text:
"""
_RECEIPT_RESPONSE_SCHEMA: dict[str, Any] = {
"type": "object",
"type": "OBJECT",
"properties": {
"serviceName": {"type": "string", "nullable": True},
"serviceDate": {"type": "string", "nullable": True},
"totalCost": {"type": "number", "nullable": True},
"shopName": {"type": "string", "nullable": True},
"laborCost": {"type": "number", "nullable": True},
"partsCost": {"type": "number", "nullable": True},
"odometerReading": {"type": "number", "nullable": True},
"vehicleInfo": {"type": "string", "nullable": True},
"serviceName": {"type": "STRING", "nullable": True},
"serviceDate": {"type": "STRING", "nullable": True},
"totalCost": {"type": "NUMBER", "nullable": True},
"shopName": {"type": "STRING", "nullable": True},
"laborCost": {"type": "NUMBER", "nullable": True},
"partsCost": {"type": "NUMBER", "nullable": True},
"odometerReading": {"type": "NUMBER", "nullable": True},
"vehicleInfo": {"type": "STRING", "nullable": True},
},
"required": [
"serviceName",
@@ -87,8 +88,8 @@ class MaintenanceReceiptExtractor:
"""
def __init__(self) -> None:
self._model: Any | None = None
self._generation_config: Any | None = None
self._client: Any | None = None
self._model_name: str = ""
def extract(
self,
@@ -169,47 +170,52 @@ class MaintenanceReceiptExtractor:
processing_time_ms=processing_time_ms,
)
def _get_model(self) -> Any:
"""Lazy-initialize Vertex AI Gemini model.
def _get_client(self) -> Any:
"""Lazy-initialize google-genai Gemini client.
Uses the same authentication pattern as GeminiEngine.
"""
if self._model is not None:
return self._model
if self._client is not None:
return self._client
key_path = settings.google_vision_key_path
if not os.path.isfile(key_path):
raise RuntimeError(
raise GeminiUnavailableError(
f"Google credential config not found at {key_path}. "
"Set GOOGLE_VISION_KEY_PATH or mount the secret."
)
from google.cloud import aiplatform # type: ignore[import-untyped]
from vertexai.generative_models import ( # type: ignore[import-untyped]
GenerationConfig,
GenerativeModel,
)
try:
from google import genai # type: ignore[import-untyped]
# Point ADC at the WIF credential config (must be set BEFORE Client construction)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
aiplatform.init(
self._client = genai.Client(
vertexai=True,
project=settings.vertex_ai_project,
location=settings.vertex_ai_location,
)
model_name = settings.gemini_model
self._model = GenerativeModel(model_name)
self._generation_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_RECEIPT_RESPONSE_SCHEMA,
)
self._model_name = settings.gemini_model
logger.info(
"Maintenance receipt Gemini model initialized (model=%s)",
model_name,
"Maintenance receipt Gemini client initialized (model=%s)",
self._model_name,
)
return self._model
return self._client
except ImportError as exc:
logger.exception("google-genai SDK import failed")
raise GeminiUnavailableError(
"google-genai is not installed. "
"Install with: pip install google-genai"
) from exc
except Exception as exc:
logger.exception("Gemini authentication failed: %s", type(exc).__name__)
raise GeminiUnavailableError(
f"Gemini authentication failed: {exc}"
) from exc
def _extract_with_gemini(self, ocr_text: str) -> dict:
"""Send OCR text to Gemini for semantic field extraction.
@@ -220,13 +226,19 @@ class MaintenanceReceiptExtractor:
Returns:
Dictionary of field_name -> extracted_value from Gemini.
"""
model = self._get_model()
client = self._get_client()
from google.genai import types # type: ignore[import-untyped]
prompt = _RECEIPT_EXTRACTION_PROMPT.format(ocr_text=ocr_text)
response = model.generate_content(
[prompt],
generation_config=self._generation_config,
response = client.models.generate_content(
model=self._model_name,
contents=[prompt],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=_RECEIPT_RESPONSE_SCHEMA,
),
)
raw = json.loads(response.text)

View File

@@ -21,8 +21,8 @@ google-cloud-vision>=3.7.0
# PDF Processing
PyMuPDF>=1.23.0
# Vertex AI / Gemini (maintenance schedule extraction)
google-cloud-aiplatform>=1.40.0
# Google GenAI / Gemini (maintenance schedule extraction, VIN decode)
google-genai>=1.0.0
# Redis for job queue
redis>=5.0.0

View File

@@ -2,11 +2,11 @@
Covers: GeminiEngine initialization, PDF size validation,
successful extraction, empty results, and error handling.
All Vertex AI SDK calls are mocked.
All google-genai SDK calls are mocked.
"""
import json
from unittest.mock import MagicMock, patch, PropertyMock
from unittest.mock import MagicMock, patch
import pytest
@@ -156,22 +156,16 @@ class TestExtractMaintenance:
},
]
mock_model = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response(schedule)
mock_client = MagicMock()
mock_client.models.generate_content.return_value = _make_gemini_response(schedule)
with (
patch(
"app.engines.gemini_engine.importlib_vertex_ai"
) if False else patch.dict("sys.modules", {
"google.cloud": MagicMock(),
"google.cloud.aiplatform": MagicMock(),
"vertexai": MagicMock(),
"vertexai.generative_models": MagicMock(),
}),
):
with patch.dict("sys.modules", {
"google.genai": MagicMock(),
"google.genai.types": MagicMock(),
}):
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine._client = mock_client
engine._model_name = "gemini-2.5-flash"
result = engine.extract_maintenance(_make_pdf_bytes())
@@ -200,12 +194,12 @@ class TestExtractMaintenance:
mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash"
mock_model = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response([])
mock_client = MagicMock()
mock_client.models.generate_content.return_value = _make_gemini_response([])
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine._client = mock_client
engine._model_name = "gemini-2.5-flash"
result = engine.extract_maintenance(_make_pdf_bytes())
@@ -223,12 +217,12 @@ class TestExtractMaintenance:
schedule = [{"serviceName": "Brake Fluid Replacement"}]
mock_model = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response(schedule)
mock_client = MagicMock()
mock_client.models.generate_content.return_value = _make_gemini_response(schedule)
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine._client = mock_client
engine._model_name = "gemini-2.5-flash"
result = engine.extract_maintenance(_make_pdf_bytes())
@@ -264,7 +258,8 @@ class TestErrorHandling:
with (
patch("app.engines.gemini_engine.settings") as mock_settings,
patch.dict("sys.modules", {
"google.cloud.aiplatform": None,
"google": None,
"google.genai": None,
}),
):
mock_settings.google_vision_key_path = "/fake/creds.json"
@@ -283,12 +278,12 @@ class TestErrorHandling:
mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash"
mock_model = MagicMock()
mock_model.generate_content.side_effect = RuntimeError("API quota exceeded")
mock_client = MagicMock()
mock_client.models.generate_content.side_effect = RuntimeError("API quota exceeded")
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine._client = mock_client
engine._model_name = "gemini-2.5-flash"
with pytest.raises(GeminiProcessingError, match="maintenance extraction failed"):
engine.extract_maintenance(_make_pdf_bytes())
@@ -307,12 +302,12 @@ class TestErrorHandling:
mock_response = MagicMock()
mock_response.text = "not valid json {{"
mock_model = MagicMock()
mock_model.generate_content.return_value = mock_response
mock_client = MagicMock()
mock_client.models.generate_content.return_value = mock_response
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine._client = mock_client
engine._model_name = "gemini-2.5-flash"
with pytest.raises(GeminiProcessingError, match="invalid JSON"):
engine.extract_maintenance(_make_pdf_bytes())
@@ -322,32 +317,32 @@ class TestErrorHandling:
class TestLazyInitialization:
"""Verify the model is not created until first use."""
"""Verify the client is not created until first use."""
def test_model_is_none_after_construction(self):
"""GeminiEngine should not initialize the model in __init__."""
def test_client_is_none_after_construction(self):
"""GeminiEngine should not initialize the client in __init__."""
engine = GeminiEngine()
assert engine._model is None
assert engine._client is None
@patch("app.engines.gemini_engine.settings")
@patch("app.engines.gemini_engine.os.path.isfile", return_value=True)
def test_model_reused_on_second_call(self, mock_isfile, mock_settings):
"""Once initialized, the same model instance is reused."""
def test_client_reused_on_second_call(self, mock_isfile, mock_settings):
"""Once initialized, the same client instance is reused."""
mock_settings.google_vision_key_path = "/fake/creds.json"
mock_settings.vertex_ai_project = "test-project"
mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash"
schedule = [{"serviceName": "Oil Change", "intervalMiles": 5000}]
mock_model = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response(schedule)
mock_client = MagicMock()
mock_client.models.generate_content.return_value = _make_gemini_response(schedule)
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine._client = mock_client
engine._model_name = "gemini-2.5-flash"
engine.extract_maintenance(_make_pdf_bytes())
engine.extract_maintenance(_make_pdf_bytes())
# Model's generate_content should have been called twice
assert mock_model.generate_content.call_count == 2
# Client's generate_content should have been called twice
assert mock_client.models.generate_content.call_count == 2

View File

@@ -12,86 +12,86 @@ from app.engines.gemini_engine import resolve_vin_year
class TestSecondCycle:
"""Position 7 numeric -> 2010-2039 cycle."""
"""Position 7 alphabetic -> 2010-2039 cycle (NHTSA MY2010+ requirement)."""
def test_p_with_numeric_pos7_returns_2023(self):
"""P=2023 when position 7 is numeric (the bug that triggered this fix)."""
# VIN: 1G1YE2D32P5602473 -- pos7='2' (numeric), pos10='P'
def test_p_with_alpha_pos7_returns_2023(self):
"""P=2023 when position 7 is alphabetic (the bug that triggered this fix)."""
# VIN: 1G1YE2D32P5602473 -- pos7='D' (alphabetic), pos10='P'
assert resolve_vin_year("1G1YE2D32P5602473") == 2023
def test_a_with_numeric_pos7_returns_2010(self):
"""A=2010 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2112A5602473") == 2010
def test_a_with_alpha_pos7_returns_2010(self):
"""A=2010 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2D12A5602473") == 2010
def test_l_with_numeric_pos7_returns_2020(self):
"""L=2020 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2112L5602473") == 2020
def test_l_with_alpha_pos7_returns_2020(self):
"""L=2020 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2D12L5602473") == 2020
def test_9_with_numeric_pos7_returns_2039(self):
"""9=2039 when position 7 is numeric."""
assert resolve_vin_year("1G1YE211295602473") == 2039
def test_9_with_alpha_pos7_returns_2039(self):
"""9=2039 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2D1295602473") == 2039
def test_digit_1_with_numeric_pos7_returns_2031(self):
"""1=2031 when position 7 is numeric."""
assert resolve_vin_year("1G1YE211215602473") == 2031
def test_digit_1_with_alpha_pos7_returns_2031(self):
"""1=2031 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2D1215602473") == 2031
def test_s_with_numeric_pos7_returns_2025(self):
"""S=2025 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2112S5602473") == 2025
def test_s_with_alpha_pos7_returns_2025(self):
"""S=2025 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2D12S5602473") == 2025
def test_t_with_numeric_pos7_returns_2026(self):
"""T=2026 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2112T5602473") == 2026
def test_t_with_alpha_pos7_returns_2026(self):
"""T=2026 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2D12T5602473") == 2026
class TestFirstCycle:
"""Position 7 alphabetic -> 1980-2009 cycle (when 2040+ is not yet plausible)."""
"""Position 7 numeric -> 1980-2009 cycle."""
def test_m_with_alpha_pos7_returns_1991(self):
"""M=1991 when position 7 is alphabetic (third cycle 2051 is not plausible)."""
assert resolve_vin_year("1G1YE2J32M5602473") == 1991
def test_m_with_numeric_pos7_returns_1991(self):
"""M=1991 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2132M5602473") == 1991
def test_n_with_alpha_pos7_returns_1992(self):
"""N=1992 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2J32N5602473") == 1992
def test_n_with_numeric_pos7_returns_1992(self):
"""N=1992 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2132N5602473") == 1992
def test_p_with_alpha_pos7_returns_1993(self):
"""P=1993 when position 7 is alphabetic (third cycle 2053 not plausible)."""
assert resolve_vin_year("1G1YE2J32P5602473") == 1993
def test_p_with_numeric_pos7_returns_1993(self):
"""P=1993 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2132P5602473") == 1993
def test_y_with_alpha_pos7_returns_2000(self):
"""Y=2000 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2J32Y5602473") == 2000
def test_y_with_numeric_pos7_returns_2000(self):
"""Y=2000 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2132Y5602473") == 2000
class TestThirdCycle:
"""Position 7 alphabetic + third cycle year (2040-2050) is plausible."""
"""Position 7 numeric + third cycle year (2040-2050) is plausible."""
@patch("app.engines.gemini_engine.datetime")
def test_a_with_alpha_pos7_returns_2040_when_plausible(self, mock_dt):
"""A=2040 when position 7 is alphabetic and year 2040 is plausible."""
def test_a_with_numeric_pos7_returns_2040_when_plausible(self, mock_dt):
"""A=2040 when position 7 is numeric and year 2040 is plausible."""
mock_dt.now.return_value = datetime(2039, 1, 1)
# 2039 + 2 = 2041 >= 2040, so third cycle is plausible
assert resolve_vin_year("1G1YE2J32A5602473") == 2040
assert resolve_vin_year("1G1YE2132A5602473") == 2040
@patch("app.engines.gemini_engine.datetime")
def test_l_with_alpha_pos7_returns_2050_when_plausible(self, mock_dt):
"""L=2050 when position 7 is alphabetic and year 2050 is plausible."""
def test_l_with_numeric_pos7_returns_2050_when_plausible(self, mock_dt):
"""L=2050 when position 7 is numeric and year 2050 is plausible."""
mock_dt.now.return_value = datetime(2049, 6, 1)
assert resolve_vin_year("1G1YE2J32L5602473") == 2050
assert resolve_vin_year("1G1YE2132L5602473") == 2050
@patch("app.engines.gemini_engine.datetime")
def test_a_with_alpha_pos7_returns_1980_when_2040_not_plausible(self, mock_dt):
def test_a_with_numeric_pos7_returns_1980_when_2040_not_plausible(self, mock_dt):
"""A=1980 when third cycle year (2040) exceeds max plausible."""
mock_dt.now.return_value = datetime(2026, 2, 20)
# 2026 + 2 = 2028 < 2040, so third cycle not plausible -> first cycle
assert resolve_vin_year("1G1YE2J32A5602473") == 1980
assert resolve_vin_year("1G1YE2132A5602473") == 1980
@patch("app.engines.gemini_engine.datetime")
def test_k_with_alpha_pos7_returns_2049_when_plausible(self, mock_dt):
"""K=2049 when position 7 is alphabetic and year is plausible."""
def test_k_with_numeric_pos7_returns_2049_when_plausible(self, mock_dt):
"""K=2049 when position 7 is numeric and year is plausible."""
mock_dt.now.return_value = datetime(2048, 1, 1)
assert resolve_vin_year("1G1YE2J32K5602473") == 2049
assert resolve_vin_year("1G1YE2132K5602473") == 2049
class TestEdgeCases: