fix: VIN decoding year fixes
All checks were successful
Deploy to Staging / Build Images (push) Successful in 35s
Deploy to Staging / Deploy to Staging (push) Successful in 53s
Deploy to Staging / Verify Staging (push) Successful in 9s
Deploy to Staging / Notify Staging Ready (push) Successful in 9s
Deploy to Staging / Notify Staging Failure (push) Has been skipped
Mirror Base Images / Mirror Base Images (push) Successful in 1m2s

This commit is contained in:
Eric Gullickson
2026-02-28 11:09:46 -06:00
parent 9dc56a3773
commit 0055d9f0f3
2 changed files with 218 additions and 11 deletions

View File

@@ -9,6 +9,7 @@ import json
import logging
import os
from dataclasses import dataclass
from datetime import datetime
from typing import Any
from app.config import settings
@@ -37,27 +38,82 @@ Do not include one-time procedures, troubleshooting steps, or warranty informati
Return the results as a JSON object with a single "maintenanceSchedule" array.\
"""
# VIN year code lookup: position 10 character -> base year (first cycle, 1980-2009).
# The 30-year cycle repeats: +30 for 2010-2039, +60 for 2040-2069.
# Disambiguation uses position 7: numeric -> 2010+ cycle, alphabetic -> 1980s cycle.
# For the 2040+ cycle (when position 7 is alphabetic again), we pick the most
# recent plausible year (not more than 2 years in the future).
_VIN_YEAR_CODES: dict[str, int] = {
"A": 1980, "B": 1981, "C": 1982, "D": 1983, "E": 1984,
"F": 1985, "G": 1986, "H": 1987, "J": 1988, "K": 1989,
"L": 1990, "M": 1991, "N": 1992, "P": 1993, "R": 1994,
"S": 1995, "T": 1996, "V": 1997, "W": 1998, "X": 1999,
"Y": 2000,
"1": 2001, "2": 2002, "3": 2003, "4": 2004, "5": 2005,
"6": 2006, "7": 2007, "8": 2008, "9": 2009,
}
def resolve_vin_year(vin: str) -> int | None:
"""Deterministically resolve model year from VIN positions 7 and 10.
VIN year codes repeat on a 30-year cycle. Position 7 disambiguates:
- Numeric position 7 -> 2010-2039 cycle
- Alphabetic position 7 -> 1980-2009 or 2040-2050+ cycle
For the alphabetic case with three possible cycles, picks the most recent
year that is not more than 2 years in the future.
Returns None if the VIN is too short or position 10 is not a valid year code.
"""
if len(vin) < 17:
return None
code = vin[9].upper() # position 10 (0-indexed)
pos7 = vin[6].upper() # position 7 (0-indexed)
base_year = _VIN_YEAR_CODES.get(code)
if base_year is None:
return None
if pos7.isdigit():
# Numeric position 7 -> second cycle (2010-2039)
return base_year + 30
# Alphabetic position 7 -> first cycle (1980-2009) or third cycle (2040-2069)
# Pick the most recent plausible year
max_plausible = datetime.now().year + 2
third_cycle = base_year + 60 # 2040-2069
if third_cycle <= max_plausible:
return third_cycle
return base_year
_VIN_DECODE_PROMPT = """\
Decode the following VIN (Vehicle Identification Number) using standard VIN structure rules.
VIN: {vin}
Model year: {year} (determined from position 10 code '{year_code}')
The model year has already been resolved deterministically. Use {year} as the year.
VIN position reference:
- Positions 1-3 (WMI): World Manufacturer Identifier (country + manufacturer)
- Positions 4-8 (VDS): Vehicle attributes (model, body, engine, etc.)
- Position 9: Check digit
- Position 10: Model year code. Codes repeat on a 30-year cycle:
A=1980/2010 B=1981/2011 C=1982/2012 D=1983/2013 E=1984/2014
F=1985/2015 G=1986/2016 H=1987/2017 J=1988/2018 K=1989/2019
L=1990/2020 M=1991/2021 N=1992/2022 P=1993/2023 R=1994/2024
S=1995/2025 T=1996/2026 V=1997/2027 W=1998/2028 X=1999/2029
Y=2000/2030 1=2001/2031 2=2002/2032 3=2003/2033 4=2004/2034
5=2005/2035 6=2006/2036 7=2007/2037 8=2008/2038 9=2009/2039
To disambiguate: if position 7 is numeric, use the 2010+ year; if alphabetic, use the 1980-2009 year.
- Position 10: Model year code (30-year cycle, extended through 2050):
A=1980/2010/2040 B=1981/2011/2041 C=1982/2012/2042 D=1983/2013/2043 E=1984/2014/2044
F=1985/2015/2045 G=1986/2016/2046 H=1987/2017/2047 J=1988/2018/2048 K=1989/2019/2049
L=1990/2020/2050 M=1991/2021 N=1992/2022 P=1993/2023 R=1994/2024
S=1995/2025 T=1996/2026 V=1997/2027 W=1998/2028 X=1999/2029
Y=2000/2030 1=2001/2031 2=2002/2032 3=2003/2033 4=2004/2034
5=2005/2035 6=2006/2036 7=2007/2037 8=2008/2038 9=2009/2039
- Position 11: Assembly plant
- Positions 12-17: Sequential production number
Return the vehicle's year, make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. The year MUST be derived from position 10 using the table above. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
Return the vehicle's make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
"""
_VIN_DECODE_SCHEMA: dict[str, Any] = {
@@ -288,6 +344,10 @@ class GeminiEngine:
def decode_vin(self, vin: str) -> VinDecodeResult:
"""Decode a VIN string into structured vehicle data via Gemini.
The model year is resolved deterministically from VIN positions 7
and 10 -- never delegated to the LLM. Gemini handles make, model,
trim, and other fields that require manufacturer knowledge.
Args:
vin: A 17-character Vehicle Identification Number.
@@ -300,6 +360,16 @@ class GeminiEngine:
"""
model = self._get_model()
# Resolve year deterministically from VIN structure
resolved_year = resolve_vin_year(vin)
year_code = vin[9].upper() if len(vin) >= 10 else "?"
logger.info(
"VIN year resolved: code=%s pos7=%s -> year=%s",
year_code,
vin[6] if len(vin) >= 7 else "?",
resolved_year,
)
try:
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
@@ -308,7 +378,11 @@ class GeminiEngine:
response_schema=_VIN_DECODE_SCHEMA,
)
prompt = _VIN_DECODE_PROMPT.format(vin=vin)
prompt = _VIN_DECODE_PROMPT.format(
vin=vin,
year=resolved_year or "unknown",
year_code=year_code,
)
response = model.generate_content(
[prompt],
generation_config=vin_config,
@@ -316,10 +390,21 @@ class GeminiEngine:
raw = json.loads(response.text)
# Override year with deterministic value -- never trust the LLM
# for a mechanical lookup
gemini_year = raw.get("year")
if resolved_year and gemini_year != resolved_year:
logger.warning(
"Gemini returned year %s but resolved year is %s for VIN %s -- overriding",
gemini_year,
resolved_year,
vin,
)
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
return VinDecodeResult(
year=raw.get("year"),
year=resolved_year if resolved_year else raw.get("year"),
make=raw.get("make"),
model=raw.get("model"),
trim_level=raw.get("trimLevel"),