fix: VIN decoding year fixes
All checks were successful
Deploy to Staging / Build Images (push) Successful in 35s
Deploy to Staging / Deploy to Staging (push) Successful in 53s
Deploy to Staging / Verify Staging (push) Successful in 9s
Deploy to Staging / Notify Staging Ready (push) Successful in 9s
Deploy to Staging / Notify Staging Failure (push) Has been skipped
Mirror Base Images / Mirror Base Images (push) Successful in 1m2s

This commit is contained in:
Eric Gullickson
2026-02-28 11:09:46 -06:00
parent 9dc56a3773
commit 0055d9f0f3
2 changed files with 218 additions and 11 deletions

View File

@@ -9,6 +9,7 @@ import json
import logging
import os
from dataclasses import dataclass
from datetime import datetime
from typing import Any
from app.config import settings
@@ -37,27 +38,82 @@ Do not include one-time procedures, troubleshooting steps, or warranty informati
Return the results as a JSON object with a single "maintenanceSchedule" array.\
"""
# VIN year code lookup: position 10 character -> base year (first cycle, 1980-2009).
# The 30-year cycle repeats: +30 for 2010-2039, +60 for 2040-2069.
# Disambiguation uses position 7: numeric -> 2010+ cycle, alphabetic -> 1980s cycle.
# For the 2040+ cycle (when position 7 is alphabetic again), we pick the most
# recent plausible year (not more than 2 years in the future).
_VIN_YEAR_CODES: dict[str, int] = {
"A": 1980, "B": 1981, "C": 1982, "D": 1983, "E": 1984,
"F": 1985, "G": 1986, "H": 1987, "J": 1988, "K": 1989,
"L": 1990, "M": 1991, "N": 1992, "P": 1993, "R": 1994,
"S": 1995, "T": 1996, "V": 1997, "W": 1998, "X": 1999,
"Y": 2000,
"1": 2001, "2": 2002, "3": 2003, "4": 2004, "5": 2005,
"6": 2006, "7": 2007, "8": 2008, "9": 2009,
}
def resolve_vin_year(vin: str) -> int | None:
"""Deterministically resolve model year from VIN positions 7 and 10.
VIN year codes repeat on a 30-year cycle. Position 7 disambiguates:
- Numeric position 7 -> 2010-2039 cycle
- Alphabetic position 7 -> 1980-2009 or 2040-2050+ cycle
For the alphabetic case with three possible cycles, picks the most recent
year that is not more than 2 years in the future.
Returns None if the VIN is too short or position 10 is not a valid year code.
"""
if len(vin) < 17:
return None
code = vin[9].upper() # position 10 (0-indexed)
pos7 = vin[6].upper() # position 7 (0-indexed)
base_year = _VIN_YEAR_CODES.get(code)
if base_year is None:
return None
if pos7.isdigit():
# Numeric position 7 -> second cycle (2010-2039)
return base_year + 30
# Alphabetic position 7 -> first cycle (1980-2009) or third cycle (2040-2069)
# Pick the most recent plausible year
max_plausible = datetime.now().year + 2
third_cycle = base_year + 60 # 2040-2069
if third_cycle <= max_plausible:
return third_cycle
return base_year
_VIN_DECODE_PROMPT = """\
Decode the following VIN (Vehicle Identification Number) using standard VIN structure rules.
VIN: {vin}
Model year: {year} (determined from position 10 code '{year_code}')
The model year has already been resolved deterministically. Use {year} as the year.
VIN position reference:
- Positions 1-3 (WMI): World Manufacturer Identifier (country + manufacturer)
- Positions 4-8 (VDS): Vehicle attributes (model, body, engine, etc.)
- Position 9: Check digit
- Position 10: Model year code. Codes repeat on a 30-year cycle:
A=1980/2010 B=1981/2011 C=1982/2012 D=1983/2013 E=1984/2014
F=1985/2015 G=1986/2016 H=1987/2017 J=1988/2018 K=1989/2019
L=1990/2020 M=1991/2021 N=1992/2022 P=1993/2023 R=1994/2024
- Position 10: Model year code (30-year cycle, extended through 2050):
A=1980/2010/2040 B=1981/2011/2041 C=1982/2012/2042 D=1983/2013/2043 E=1984/2014/2044
F=1985/2015/2045 G=1986/2016/2046 H=1987/2017/2047 J=1988/2018/2048 K=1989/2019/2049
L=1990/2020/2050 M=1991/2021 N=1992/2022 P=1993/2023 R=1994/2024
S=1995/2025 T=1996/2026 V=1997/2027 W=1998/2028 X=1999/2029
Y=2000/2030 1=2001/2031 2=2002/2032 3=2003/2033 4=2004/2034
5=2005/2035 6=2006/2036 7=2007/2037 8=2008/2038 9=2009/2039
To disambiguate: if position 7 is numeric, use the 2010+ year; if alphabetic, use the 1980-2009 year.
- Position 11: Assembly plant
- Positions 12-17: Sequential production number
Return the vehicle's year, make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. The year MUST be derived from position 10 using the table above. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
Return the vehicle's make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
"""
_VIN_DECODE_SCHEMA: dict[str, Any] = {
@@ -288,6 +344,10 @@ class GeminiEngine:
def decode_vin(self, vin: str) -> VinDecodeResult:
"""Decode a VIN string into structured vehicle data via Gemini.
The model year is resolved deterministically from VIN positions 7
and 10 -- never delegated to the LLM. Gemini handles make, model,
trim, and other fields that require manufacturer knowledge.
Args:
vin: A 17-character Vehicle Identification Number.
@@ -300,6 +360,16 @@ class GeminiEngine:
"""
model = self._get_model()
# Resolve year deterministically from VIN structure
resolved_year = resolve_vin_year(vin)
year_code = vin[9].upper() if len(vin) >= 10 else "?"
logger.info(
"VIN year resolved: code=%s pos7=%s -> year=%s",
year_code,
vin[6] if len(vin) >= 7 else "?",
resolved_year,
)
try:
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
@@ -308,7 +378,11 @@ class GeminiEngine:
response_schema=_VIN_DECODE_SCHEMA,
)
prompt = _VIN_DECODE_PROMPT.format(vin=vin)
prompt = _VIN_DECODE_PROMPT.format(
vin=vin,
year=resolved_year or "unknown",
year_code=year_code,
)
response = model.generate_content(
[prompt],
generation_config=vin_config,
@@ -316,10 +390,21 @@ class GeminiEngine:
raw = json.loads(response.text)
# Override year with deterministic value -- never trust the LLM
# for a mechanical lookup
gemini_year = raw.get("year")
if resolved_year and gemini_year != resolved_year:
logger.warning(
"Gemini returned year %s but resolved year is %s for VIN %s -- overriding",
gemini_year,
resolved_year,
vin,
)
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
return VinDecodeResult(
year=raw.get("year"),
year=resolved_year if resolved_year else raw.get("year"),
make=raw.get("make"),
model=raw.get("model"),
trim_level=raw.get("trimLevel"),

View File

@@ -0,0 +1,122 @@
"""Tests for deterministic VIN year resolution.
Covers: all three 30-year cycles (1980-2009, 2010-2039, 2040-2050),
position 7 disambiguation, edge cases, and invalid inputs.
"""
import pytest
from unittest.mock import patch
from datetime import datetime
from app.engines.gemini_engine import resolve_vin_year
class TestSecondCycle:
"""Position 7 numeric -> 2010-2039 cycle."""
def test_p_with_numeric_pos7_returns_2023(self):
"""P=2023 when position 7 is numeric (the bug that triggered this fix)."""
# VIN: 1G1YE2D32P5602473 -- pos7='2' (numeric), pos10='P'
assert resolve_vin_year("1G1YE2D32P5602473") == 2023
def test_a_with_numeric_pos7_returns_2010(self):
"""A=2010 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2112A5602473") == 2010
def test_l_with_numeric_pos7_returns_2020(self):
"""L=2020 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2112L5602473") == 2020
def test_9_with_numeric_pos7_returns_2039(self):
"""9=2039 when position 7 is numeric."""
assert resolve_vin_year("1G1YE211295602473") == 2039
def test_digit_1_with_numeric_pos7_returns_2031(self):
"""1=2031 when position 7 is numeric."""
assert resolve_vin_year("1G1YE211215602473") == 2031
def test_s_with_numeric_pos7_returns_2025(self):
"""S=2025 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2112S5602473") == 2025
def test_t_with_numeric_pos7_returns_2026(self):
"""T=2026 when position 7 is numeric."""
assert resolve_vin_year("1G1YE2112T5602473") == 2026
class TestFirstCycle:
"""Position 7 alphabetic -> 1980-2009 cycle (when 2040+ is not yet plausible)."""
def test_m_with_alpha_pos7_returns_1991(self):
"""M=1991 when position 7 is alphabetic (third cycle 2051 is not plausible)."""
assert resolve_vin_year("1G1YE2J32M5602473") == 1991
def test_n_with_alpha_pos7_returns_1992(self):
"""N=1992 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2J32N5602473") == 1992
def test_p_with_alpha_pos7_returns_1993(self):
"""P=1993 when position 7 is alphabetic (third cycle 2053 not plausible)."""
assert resolve_vin_year("1G1YE2J32P5602473") == 1993
def test_y_with_alpha_pos7_returns_2000(self):
"""Y=2000 when position 7 is alphabetic."""
assert resolve_vin_year("1G1YE2J32Y5602473") == 2000
class TestThirdCycle:
"""Position 7 alphabetic + third cycle year (2040-2050) is plausible."""
@patch("app.engines.gemini_engine.datetime")
def test_a_with_alpha_pos7_returns_2040_when_plausible(self, mock_dt):
"""A=2040 when position 7 is alphabetic and year 2040 is plausible."""
mock_dt.now.return_value = datetime(2039, 1, 1)
# 2039 + 2 = 2041 >= 2040, so third cycle is plausible
assert resolve_vin_year("1G1YE2J32A5602473") == 2040
@patch("app.engines.gemini_engine.datetime")
def test_l_with_alpha_pos7_returns_2050_when_plausible(self, mock_dt):
"""L=2050 when position 7 is alphabetic and year 2050 is plausible."""
mock_dt.now.return_value = datetime(2049, 6, 1)
assert resolve_vin_year("1G1YE2J32L5602473") == 2050
@patch("app.engines.gemini_engine.datetime")
def test_a_with_alpha_pos7_returns_1980_when_2040_not_plausible(self, mock_dt):
"""A=1980 when third cycle year (2040) exceeds max plausible."""
mock_dt.now.return_value = datetime(2026, 2, 20)
# 2026 + 2 = 2028 < 2040, so third cycle not plausible -> first cycle
assert resolve_vin_year("1G1YE2J32A5602473") == 1980
@patch("app.engines.gemini_engine.datetime")
def test_k_with_alpha_pos7_returns_2049_when_plausible(self, mock_dt):
"""K=2049 when position 7 is alphabetic and year is plausible."""
mock_dt.now.return_value = datetime(2048, 1, 1)
assert resolve_vin_year("1G1YE2J32K5602473") == 2049
class TestEdgeCases:
"""Invalid inputs and boundary conditions."""
def test_short_vin_returns_none(self):
"""VIN shorter than 17 chars returns None."""
assert resolve_vin_year("1G1YE2D32") is None
def test_empty_vin_returns_none(self):
"""Empty string returns None."""
assert resolve_vin_year("") is None
def test_invalid_year_code_returns_none(self):
"""Position 10 with invalid code (e.g., 'Z') returns None."""
# Z is not a valid year code
assert resolve_vin_year("1G1YE2D32Z5602473") is None
def test_lowercase_vin_handled(self):
"""Lowercase VIN characters are handled correctly."""
assert resolve_vin_year("1g1ye2d32p5602473") == 2023
def test_i_o_q_not_valid_year_codes(self):
"""Letters I, O, Q are not valid VIN year codes."""
# These are excluded from VINs entirely but test graceful handling
assert resolve_vin_year("1G1YE2D32I5602473") is None
assert resolve_vin_year("1G1YE2D32O5602473") is None
assert resolve_vin_year("1G1YE2D32Q5602473") is None