fix: VIN decoding year fixes
All checks were successful
Deploy to Staging / Build Images (push) Successful in 35s
Deploy to Staging / Deploy to Staging (push) Successful in 53s
Deploy to Staging / Verify Staging (push) Successful in 9s
Deploy to Staging / Notify Staging Ready (push) Successful in 9s
Deploy to Staging / Notify Staging Failure (push) Has been skipped
Mirror Base Images / Mirror Base Images (push) Successful in 1m2s
All checks were successful
Deploy to Staging / Build Images (push) Successful in 35s
Deploy to Staging / Deploy to Staging (push) Successful in 53s
Deploy to Staging / Verify Staging (push) Successful in 9s
Deploy to Staging / Notify Staging Ready (push) Successful in 9s
Deploy to Staging / Notify Staging Failure (push) Has been skipped
Mirror Base Images / Mirror Base Images (push) Successful in 1m2s
This commit is contained in:
@@ -9,6 +9,7 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
@@ -37,27 +38,82 @@ Do not include one-time procedures, troubleshooting steps, or warranty informati
|
|||||||
Return the results as a JSON object with a single "maintenanceSchedule" array.\
|
Return the results as a JSON object with a single "maintenanceSchedule" array.\
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# VIN year code lookup: position 10 character -> base year (first cycle, 1980-2009).
|
||||||
|
# The 30-year cycle repeats: +30 for 2010-2039, +60 for 2040-2069.
|
||||||
|
# Disambiguation uses position 7: numeric -> 2010+ cycle, alphabetic -> 1980s cycle.
|
||||||
|
# For the 2040+ cycle (when position 7 is alphabetic again), we pick the most
|
||||||
|
# recent plausible year (not more than 2 years in the future).
|
||||||
|
_VIN_YEAR_CODES: dict[str, int] = {
|
||||||
|
"A": 1980, "B": 1981, "C": 1982, "D": 1983, "E": 1984,
|
||||||
|
"F": 1985, "G": 1986, "H": 1987, "J": 1988, "K": 1989,
|
||||||
|
"L": 1990, "M": 1991, "N": 1992, "P": 1993, "R": 1994,
|
||||||
|
"S": 1995, "T": 1996, "V": 1997, "W": 1998, "X": 1999,
|
||||||
|
"Y": 2000,
|
||||||
|
"1": 2001, "2": 2002, "3": 2003, "4": 2004, "5": 2005,
|
||||||
|
"6": 2006, "7": 2007, "8": 2008, "9": 2009,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_vin_year(vin: str) -> int | None:
|
||||||
|
"""Deterministically resolve model year from VIN positions 7 and 10.
|
||||||
|
|
||||||
|
VIN year codes repeat on a 30-year cycle. Position 7 disambiguates:
|
||||||
|
- Numeric position 7 -> 2010-2039 cycle
|
||||||
|
- Alphabetic position 7 -> 1980-2009 or 2040-2050+ cycle
|
||||||
|
|
||||||
|
For the alphabetic case with three possible cycles, picks the most recent
|
||||||
|
year that is not more than 2 years in the future.
|
||||||
|
|
||||||
|
Returns None if the VIN is too short or position 10 is not a valid year code.
|
||||||
|
"""
|
||||||
|
if len(vin) < 17:
|
||||||
|
return None
|
||||||
|
|
||||||
|
code = vin[9].upper() # position 10 (0-indexed)
|
||||||
|
pos7 = vin[6].upper() # position 7 (0-indexed)
|
||||||
|
|
||||||
|
base_year = _VIN_YEAR_CODES.get(code)
|
||||||
|
if base_year is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if pos7.isdigit():
|
||||||
|
# Numeric position 7 -> second cycle (2010-2039)
|
||||||
|
return base_year + 30
|
||||||
|
|
||||||
|
# Alphabetic position 7 -> first cycle (1980-2009) or third cycle (2040-2069)
|
||||||
|
# Pick the most recent plausible year
|
||||||
|
max_plausible = datetime.now().year + 2
|
||||||
|
|
||||||
|
third_cycle = base_year + 60 # 2040-2069
|
||||||
|
if third_cycle <= max_plausible:
|
||||||
|
return third_cycle
|
||||||
|
|
||||||
|
return base_year
|
||||||
|
|
||||||
|
|
||||||
_VIN_DECODE_PROMPT = """\
|
_VIN_DECODE_PROMPT = """\
|
||||||
Decode the following VIN (Vehicle Identification Number) using standard VIN structure rules.
|
Decode the following VIN (Vehicle Identification Number) using standard VIN structure rules.
|
||||||
|
|
||||||
VIN: {vin}
|
VIN: {vin}
|
||||||
|
Model year: {year} (determined from position 10 code '{year_code}')
|
||||||
|
|
||||||
|
The model year has already been resolved deterministically. Use {year} as the year.
|
||||||
|
|
||||||
VIN position reference:
|
VIN position reference:
|
||||||
- Positions 1-3 (WMI): World Manufacturer Identifier (country + manufacturer)
|
- Positions 1-3 (WMI): World Manufacturer Identifier (country + manufacturer)
|
||||||
- Positions 4-8 (VDS): Vehicle attributes (model, body, engine, etc.)
|
- Positions 4-8 (VDS): Vehicle attributes (model, body, engine, etc.)
|
||||||
- Position 9: Check digit
|
- Position 9: Check digit
|
||||||
- Position 10: Model year code. Codes repeat on a 30-year cycle:
|
- Position 10: Model year code (30-year cycle, extended through 2050):
|
||||||
A=1980/2010 B=1981/2011 C=1982/2012 D=1983/2013 E=1984/2014
|
A=1980/2010/2040 B=1981/2011/2041 C=1982/2012/2042 D=1983/2013/2043 E=1984/2014/2044
|
||||||
F=1985/2015 G=1986/2016 H=1987/2017 J=1988/2018 K=1989/2019
|
F=1985/2015/2045 G=1986/2016/2046 H=1987/2017/2047 J=1988/2018/2048 K=1989/2019/2049
|
||||||
L=1990/2020 M=1991/2021 N=1992/2022 P=1993/2023 R=1994/2024
|
L=1990/2020/2050 M=1991/2021 N=1992/2022 P=1993/2023 R=1994/2024
|
||||||
S=1995/2025 T=1996/2026 V=1997/2027 W=1998/2028 X=1999/2029
|
S=1995/2025 T=1996/2026 V=1997/2027 W=1998/2028 X=1999/2029
|
||||||
Y=2000/2030 1=2001/2031 2=2002/2032 3=2003/2033 4=2004/2034
|
Y=2000/2030 1=2001/2031 2=2002/2032 3=2003/2033 4=2004/2034
|
||||||
5=2005/2035 6=2006/2036 7=2007/2037 8=2008/2038 9=2009/2039
|
5=2005/2035 6=2006/2036 7=2007/2037 8=2008/2038 9=2009/2039
|
||||||
To disambiguate: if position 7 is numeric, use the 2010+ year; if alphabetic, use the 1980-2009 year.
|
|
||||||
- Position 11: Assembly plant
|
- Position 11: Assembly plant
|
||||||
- Positions 12-17: Sequential production number
|
- Positions 12-17: Sequential production number
|
||||||
|
|
||||||
Return the vehicle's year, make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. The year MUST be derived from position 10 using the table above. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
|
Return the vehicle's make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_VIN_DECODE_SCHEMA: dict[str, Any] = {
|
_VIN_DECODE_SCHEMA: dict[str, Any] = {
|
||||||
@@ -288,6 +344,10 @@ class GeminiEngine:
|
|||||||
def decode_vin(self, vin: str) -> VinDecodeResult:
|
def decode_vin(self, vin: str) -> VinDecodeResult:
|
||||||
"""Decode a VIN string into structured vehicle data via Gemini.
|
"""Decode a VIN string into structured vehicle data via Gemini.
|
||||||
|
|
||||||
|
The model year is resolved deterministically from VIN positions 7
|
||||||
|
and 10 -- never delegated to the LLM. Gemini handles make, model,
|
||||||
|
trim, and other fields that require manufacturer knowledge.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
vin: A 17-character Vehicle Identification Number.
|
vin: A 17-character Vehicle Identification Number.
|
||||||
|
|
||||||
@@ -300,6 +360,16 @@ class GeminiEngine:
|
|||||||
"""
|
"""
|
||||||
model = self._get_model()
|
model = self._get_model()
|
||||||
|
|
||||||
|
# Resolve year deterministically from VIN structure
|
||||||
|
resolved_year = resolve_vin_year(vin)
|
||||||
|
year_code = vin[9].upper() if len(vin) >= 10 else "?"
|
||||||
|
logger.info(
|
||||||
|
"VIN year resolved: code=%s pos7=%s -> year=%s",
|
||||||
|
year_code,
|
||||||
|
vin[6] if len(vin) >= 7 else "?",
|
||||||
|
resolved_year,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
|
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
|
||||||
|
|
||||||
@@ -308,7 +378,11 @@ class GeminiEngine:
|
|||||||
response_schema=_VIN_DECODE_SCHEMA,
|
response_schema=_VIN_DECODE_SCHEMA,
|
||||||
)
|
)
|
||||||
|
|
||||||
prompt = _VIN_DECODE_PROMPT.format(vin=vin)
|
prompt = _VIN_DECODE_PROMPT.format(
|
||||||
|
vin=vin,
|
||||||
|
year=resolved_year or "unknown",
|
||||||
|
year_code=year_code,
|
||||||
|
)
|
||||||
response = model.generate_content(
|
response = model.generate_content(
|
||||||
[prompt],
|
[prompt],
|
||||||
generation_config=vin_config,
|
generation_config=vin_config,
|
||||||
@@ -316,10 +390,21 @@ class GeminiEngine:
|
|||||||
|
|
||||||
raw = json.loads(response.text)
|
raw = json.loads(response.text)
|
||||||
|
|
||||||
|
# Override year with deterministic value -- never trust the LLM
|
||||||
|
# for a mechanical lookup
|
||||||
|
gemini_year = raw.get("year")
|
||||||
|
if resolved_year and gemini_year != resolved_year:
|
||||||
|
logger.warning(
|
||||||
|
"Gemini returned year %s but resolved year is %s for VIN %s -- overriding",
|
||||||
|
gemini_year,
|
||||||
|
resolved_year,
|
||||||
|
vin,
|
||||||
|
)
|
||||||
|
|
||||||
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
|
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
|
||||||
|
|
||||||
return VinDecodeResult(
|
return VinDecodeResult(
|
||||||
year=raw.get("year"),
|
year=resolved_year if resolved_year else raw.get("year"),
|
||||||
make=raw.get("make"),
|
make=raw.get("make"),
|
||||||
model=raw.get("model"),
|
model=raw.get("model"),
|
||||||
trim_level=raw.get("trimLevel"),
|
trim_level=raw.get("trimLevel"),
|
||||||
|
|||||||
122
ocr/tests/test_resolve_vin_year.py
Normal file
122
ocr/tests/test_resolve_vin_year.py
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
"""Tests for deterministic VIN year resolution.
|
||||||
|
|
||||||
|
Covers: all three 30-year cycles (1980-2009, 2010-2039, 2040-2050),
|
||||||
|
position 7 disambiguation, edge cases, and invalid inputs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import patch
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from app.engines.gemini_engine import resolve_vin_year
|
||||||
|
|
||||||
|
|
||||||
|
class TestSecondCycle:
|
||||||
|
"""Position 7 numeric -> 2010-2039 cycle."""
|
||||||
|
|
||||||
|
def test_p_with_numeric_pos7_returns_2023(self):
|
||||||
|
"""P=2023 when position 7 is numeric (the bug that triggered this fix)."""
|
||||||
|
# VIN: 1G1YE2D32P5602473 -- pos7='2' (numeric), pos10='P'
|
||||||
|
assert resolve_vin_year("1G1YE2D32P5602473") == 2023
|
||||||
|
|
||||||
|
def test_a_with_numeric_pos7_returns_2010(self):
|
||||||
|
"""A=2010 when position 7 is numeric."""
|
||||||
|
assert resolve_vin_year("1G1YE2112A5602473") == 2010
|
||||||
|
|
||||||
|
def test_l_with_numeric_pos7_returns_2020(self):
|
||||||
|
"""L=2020 when position 7 is numeric."""
|
||||||
|
assert resolve_vin_year("1G1YE2112L5602473") == 2020
|
||||||
|
|
||||||
|
def test_9_with_numeric_pos7_returns_2039(self):
|
||||||
|
"""9=2039 when position 7 is numeric."""
|
||||||
|
assert resolve_vin_year("1G1YE211295602473") == 2039
|
||||||
|
|
||||||
|
def test_digit_1_with_numeric_pos7_returns_2031(self):
|
||||||
|
"""1=2031 when position 7 is numeric."""
|
||||||
|
assert resolve_vin_year("1G1YE211215602473") == 2031
|
||||||
|
|
||||||
|
def test_s_with_numeric_pos7_returns_2025(self):
|
||||||
|
"""S=2025 when position 7 is numeric."""
|
||||||
|
assert resolve_vin_year("1G1YE2112S5602473") == 2025
|
||||||
|
|
||||||
|
def test_t_with_numeric_pos7_returns_2026(self):
|
||||||
|
"""T=2026 when position 7 is numeric."""
|
||||||
|
assert resolve_vin_year("1G1YE2112T5602473") == 2026
|
||||||
|
|
||||||
|
|
||||||
|
class TestFirstCycle:
|
||||||
|
"""Position 7 alphabetic -> 1980-2009 cycle (when 2040+ is not yet plausible)."""
|
||||||
|
|
||||||
|
def test_m_with_alpha_pos7_returns_1991(self):
|
||||||
|
"""M=1991 when position 7 is alphabetic (third cycle 2051 is not plausible)."""
|
||||||
|
assert resolve_vin_year("1G1YE2J32M5602473") == 1991
|
||||||
|
|
||||||
|
def test_n_with_alpha_pos7_returns_1992(self):
|
||||||
|
"""N=1992 when position 7 is alphabetic."""
|
||||||
|
assert resolve_vin_year("1G1YE2J32N5602473") == 1992
|
||||||
|
|
||||||
|
def test_p_with_alpha_pos7_returns_1993(self):
|
||||||
|
"""P=1993 when position 7 is alphabetic (third cycle 2053 not plausible)."""
|
||||||
|
assert resolve_vin_year("1G1YE2J32P5602473") == 1993
|
||||||
|
|
||||||
|
def test_y_with_alpha_pos7_returns_2000(self):
|
||||||
|
"""Y=2000 when position 7 is alphabetic."""
|
||||||
|
assert resolve_vin_year("1G1YE2J32Y5602473") == 2000
|
||||||
|
|
||||||
|
|
||||||
|
class TestThirdCycle:
|
||||||
|
"""Position 7 alphabetic + third cycle year (2040-2050) is plausible."""
|
||||||
|
|
||||||
|
@patch("app.engines.gemini_engine.datetime")
|
||||||
|
def test_a_with_alpha_pos7_returns_2040_when_plausible(self, mock_dt):
|
||||||
|
"""A=2040 when position 7 is alphabetic and year 2040 is plausible."""
|
||||||
|
mock_dt.now.return_value = datetime(2039, 1, 1)
|
||||||
|
# 2039 + 2 = 2041 >= 2040, so third cycle is plausible
|
||||||
|
assert resolve_vin_year("1G1YE2J32A5602473") == 2040
|
||||||
|
|
||||||
|
@patch("app.engines.gemini_engine.datetime")
|
||||||
|
def test_l_with_alpha_pos7_returns_2050_when_plausible(self, mock_dt):
|
||||||
|
"""L=2050 when position 7 is alphabetic and year 2050 is plausible."""
|
||||||
|
mock_dt.now.return_value = datetime(2049, 6, 1)
|
||||||
|
assert resolve_vin_year("1G1YE2J32L5602473") == 2050
|
||||||
|
|
||||||
|
@patch("app.engines.gemini_engine.datetime")
|
||||||
|
def test_a_with_alpha_pos7_returns_1980_when_2040_not_plausible(self, mock_dt):
|
||||||
|
"""A=1980 when third cycle year (2040) exceeds max plausible."""
|
||||||
|
mock_dt.now.return_value = datetime(2026, 2, 20)
|
||||||
|
# 2026 + 2 = 2028 < 2040, so third cycle not plausible -> first cycle
|
||||||
|
assert resolve_vin_year("1G1YE2J32A5602473") == 1980
|
||||||
|
|
||||||
|
@patch("app.engines.gemini_engine.datetime")
|
||||||
|
def test_k_with_alpha_pos7_returns_2049_when_plausible(self, mock_dt):
|
||||||
|
"""K=2049 when position 7 is alphabetic and year is plausible."""
|
||||||
|
mock_dt.now.return_value = datetime(2048, 1, 1)
|
||||||
|
assert resolve_vin_year("1G1YE2J32K5602473") == 2049
|
||||||
|
|
||||||
|
|
||||||
|
class TestEdgeCases:
|
||||||
|
"""Invalid inputs and boundary conditions."""
|
||||||
|
|
||||||
|
def test_short_vin_returns_none(self):
|
||||||
|
"""VIN shorter than 17 chars returns None."""
|
||||||
|
assert resolve_vin_year("1G1YE2D32") is None
|
||||||
|
|
||||||
|
def test_empty_vin_returns_none(self):
|
||||||
|
"""Empty string returns None."""
|
||||||
|
assert resolve_vin_year("") is None
|
||||||
|
|
||||||
|
def test_invalid_year_code_returns_none(self):
|
||||||
|
"""Position 10 with invalid code (e.g., 'Z') returns None."""
|
||||||
|
# Z is not a valid year code
|
||||||
|
assert resolve_vin_year("1G1YE2D32Z5602473") is None
|
||||||
|
|
||||||
|
def test_lowercase_vin_handled(self):
|
||||||
|
"""Lowercase VIN characters are handled correctly."""
|
||||||
|
assert resolve_vin_year("1g1ye2d32p5602473") == 2023
|
||||||
|
|
||||||
|
def test_i_o_q_not_valid_year_codes(self):
|
||||||
|
"""Letters I, O, Q are not valid VIN year codes."""
|
||||||
|
# These are excluded from VINs entirely but test graceful handling
|
||||||
|
assert resolve_vin_year("1G1YE2D32I5602473") is None
|
||||||
|
assert resolve_vin_year("1G1YE2D32O5602473") is None
|
||||||
|
assert resolve_vin_year("1G1YE2D32Q5602473") is None
|
||||||
Reference in New Issue
Block a user