From a75f7b558314d0fca036303202461ef3b8e8e544 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Wed, 18 Feb 2026 21:40:10 -0600 Subject: [PATCH 1/9] feat: add VIN decode endpoint to OCR Python service (refs #224) Add POST /decode/vin endpoint using Gemini 2.5 Flash for VIN string decoding. Returns structured vehicle data (year, make, model, trim, body/drive/fuel type, engine, transmission) with confidence score. Co-Authored-By: Claude Opus 4.6 --- ocr/app/engines/gemini_engine.py | 104 +++++++++++++++- ocr/app/main.py | 4 +- ocr/app/models/__init__.py | 4 + ocr/app/models/schemas.py | 27 +++++ ocr/app/routers/__init__.py | 3 +- ocr/app/routers/decode.py | 67 +++++++++++ ocr/tests/test_vin_decode.py | 199 +++++++++++++++++++++++++++++++ 7 files changed, 403 insertions(+), 5 deletions(-) create mode 100644 ocr/app/routers/decode.py create mode 100644 ocr/tests/test_vin_decode.py diff --git a/ocr/app/engines/gemini_engine.py b/ocr/app/engines/gemini_engine.py index b26a3c0..6fa347a 100644 --- a/ocr/app/engines/gemini_engine.py +++ b/ocr/app/engines/gemini_engine.py @@ -1,4 +1,4 @@ -"""Gemini 2.5 Flash engine for maintenance schedule extraction from PDFs. +"""Gemini 2.5 Flash engine for document understanding and VIN decode. Standalone module (does NOT extend OcrEngine) because Gemini performs semantic document understanding, not traditional OCR word-box extraction. @@ -37,6 +37,31 @@ Do not include one-time procedures, troubleshooting steps, or warranty informati Return the results as a JSON object with a single "maintenanceSchedule" array.\ """ +_VIN_DECODE_PROMPT = """\ +Given the VIN (Vehicle Identification Number) below, decode it and return the vehicle specifications. + +VIN: {vin} + +Return the vehicle's year, make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\ +""" + +_VIN_DECODE_SCHEMA: dict[str, Any] = { + "type": "object", + "properties": { + "year": {"type": "integer", "nullable": True}, + "make": {"type": "string", "nullable": True}, + "model": {"type": "string", "nullable": True}, + "trimLevel": {"type": "string", "nullable": True}, + "bodyType": {"type": "string", "nullable": True}, + "driveType": {"type": "string", "nullable": True}, + "fuelType": {"type": "string", "nullable": True}, + "engine": {"type": "string", "nullable": True}, + "transmission": {"type": "string", "nullable": True}, + "confidence": {"type": "number"}, + }, + "required": ["confidence"], +} + _RESPONSE_SCHEMA: dict[str, Any] = { "type": "object", "properties": { @@ -70,6 +95,22 @@ class GeminiProcessingError(GeminiEngineError): """Raised when Gemini fails to process a document.""" +@dataclass +class VinDecodeResult: + """Result from Gemini VIN decode.""" + + year: int | None = None + make: str | None = None + model: str | None = None + trim_level: str | None = None + body_type: str | None = None + drive_type: str | None = None + fuel_type: str | None = None + engine: str | None = None + transmission: str | None = None + confidence: float = 0.0 + + @dataclass class MaintenanceItem: """A single extracted maintenance schedule item.""" @@ -89,13 +130,13 @@ class MaintenanceExtractionResult: class GeminiEngine: - """Gemini 2.5 Flash wrapper for maintenance schedule extraction. + """Gemini 2.5 Flash wrapper for maintenance schedule extraction and VIN decode. Standalone class (not an OcrEngine subclass) because Gemini performs semantic document understanding rather than traditional OCR. Uses lazy initialization: the Vertex AI client is not created until - the first ``extract_maintenance()`` call. + the first call to ``extract_maintenance()`` or ``decode_vin()``. """ def __init__(self) -> None: @@ -228,3 +269,60 @@ class GeminiEngine: raise GeminiProcessingError( f"Gemini maintenance extraction failed: {exc}" ) from exc + + def decode_vin(self, vin: str) -> VinDecodeResult: + """Decode a VIN string into structured vehicle data via Gemini. + + Args: + vin: A 17-character Vehicle Identification Number. + + Returns: + Structured vehicle specification result. + + Raises: + GeminiProcessingError: If Gemini fails to decode the VIN. + GeminiUnavailableError: If the engine cannot be initialized. + """ + model = self._get_model() + + try: + from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped] + + vin_config = GenerationConfig( + response_mime_type="application/json", + response_schema=_VIN_DECODE_SCHEMA, + ) + + prompt = _VIN_DECODE_PROMPT.format(vin=vin) + response = model.generate_content( + [prompt], + generation_config=vin_config, + ) + + raw = json.loads(response.text) + + logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0)) + + return VinDecodeResult( + year=raw.get("year"), + make=raw.get("make"), + model=raw.get("model"), + trim_level=raw.get("trimLevel"), + body_type=raw.get("bodyType"), + drive_type=raw.get("driveType"), + fuel_type=raw.get("fuelType"), + engine=raw.get("engine"), + transmission=raw.get("transmission"), + confidence=raw.get("confidence", 0.0), + ) + + except (GeminiEngineError,): + raise + except json.JSONDecodeError as exc: + raise GeminiProcessingError( + f"Gemini returned invalid JSON for VIN decode: {exc}" + ) from exc + except Exception as exc: + raise GeminiProcessingError( + f"Gemini VIN decode failed: {exc}" + ) from exc diff --git a/ocr/app/main.py b/ocr/app/main.py index d5c87ab..df0a9f2 100644 --- a/ocr/app/main.py +++ b/ocr/app/main.py @@ -6,7 +6,7 @@ from typing import AsyncIterator from fastapi import FastAPI from app.config import settings -from app.routers import extract_router, jobs_router +from app.routers import decode_router, extract_router, jobs_router from app.services import job_queue # Configure logging @@ -36,6 +36,7 @@ app = FastAPI( ) # Include routers +app.include_router(decode_router) app.include_router(extract_router) app.include_router(jobs_router) @@ -54,6 +55,7 @@ async def root() -> dict: "version": "1.0.0", "log_level": settings.log_level, "endpoints": [ + "POST /decode/vin - VIN string decode via Gemini", "POST /extract - Synchronous OCR extraction", "POST /extract/vin - VIN-specific extraction with validation", "POST /extract/receipt - Receipt extraction (fuel, general)", diff --git a/ocr/app/models/__init__.py b/ocr/app/models/__init__.py index 6c5a8aa..c72cdc0 100644 --- a/ocr/app/models/__init__.py +++ b/ocr/app/models/__init__.py @@ -14,6 +14,8 @@ from .schemas import ( ReceiptExtractedField, ReceiptExtractionResponse, VinAlternative, + VinDecodeRequest, + VinDecodeResponse, VinExtractionResponse, ) @@ -32,5 +34,7 @@ __all__ = [ "ReceiptExtractedField", "ReceiptExtractionResponse", "VinAlternative", + "VinDecodeRequest", + "VinDecodeResponse", "VinExtractionResponse", ] diff --git a/ocr/app/models/schemas.py b/ocr/app/models/schemas.py index d6a8737..826a27c 100644 --- a/ocr/app/models/schemas.py +++ b/ocr/app/models/schemas.py @@ -169,3 +169,30 @@ class ManualJobResponse(BaseModel): error: Optional[str] = None model_config = {"populate_by_name": True} + + +class VinDecodeRequest(BaseModel): + """Request body for VIN decode endpoint.""" + + vin: str + + +class VinDecodeResponse(BaseModel): + """Response from VIN decode endpoint.""" + + success: bool + vin: str + year: Optional[int] = None + make: Optional[str] = None + model: Optional[str] = None + trim_level: Optional[str] = Field(default=None, alias="trimLevel") + body_type: Optional[str] = Field(default=None, alias="bodyType") + drive_type: Optional[str] = Field(default=None, alias="driveType") + fuel_type: Optional[str] = Field(default=None, alias="fuelType") + engine: Optional[str] = None + transmission: Optional[str] = None + confidence: float = Field(ge=0.0, le=1.0) + processing_time_ms: int = Field(alias="processingTimeMs") + error: Optional[str] = None + + model_config = {"populate_by_name": True} diff --git a/ocr/app/routers/__init__.py b/ocr/app/routers/__init__.py index ded0afd..df35f23 100644 --- a/ocr/app/routers/__init__.py +++ b/ocr/app/routers/__init__.py @@ -1,5 +1,6 @@ """OCR API routers.""" +from .decode import router as decode_router from .extract import router as extract_router from .jobs import router as jobs_router -__all__ = ["extract_router", "jobs_router"] +__all__ = ["decode_router", "extract_router", "jobs_router"] diff --git a/ocr/app/routers/decode.py b/ocr/app/routers/decode.py new file mode 100644 index 0000000..7d737a7 --- /dev/null +++ b/ocr/app/routers/decode.py @@ -0,0 +1,67 @@ +"""VIN decode router - Gemini-powered VIN string decoding.""" +import logging +import re +import time + +from fastapi import APIRouter, HTTPException + +from app.engines.gemini_engine import ( + GeminiEngine, + GeminiProcessingError, + GeminiUnavailableError, +) +from app.models import VinDecodeRequest, VinDecodeResponse + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/decode", tags=["decode"]) + +_VIN_REGEX = re.compile(r"^[A-HJ-NPR-Z0-9]{17}$") + +# Shared engine instance (lazy init on first request) +_gemini_engine = GeminiEngine() + + +@router.post("/vin", response_model=VinDecodeResponse) +async def decode_vin(request: VinDecodeRequest) -> VinDecodeResponse: + """Decode a VIN string into structured vehicle data using Gemini. + + Accepts a 17-character VIN and returns year, make, model, trim, etc. + """ + vin = request.vin.upper().strip() + + if not _VIN_REGEX.match(vin): + raise HTTPException( + status_code=400, + detail=f"Invalid VIN format: must be 17 alphanumeric characters (excluding I, O, Q). Got: {vin}", + ) + + start_ms = time.monotonic_ns() // 1_000_000 + + try: + result = _gemini_engine.decode_vin(vin) + except GeminiUnavailableError as exc: + logger.error("Gemini unavailable for VIN decode: %s", exc) + raise HTTPException(status_code=503, detail=str(exc)) from exc + except GeminiProcessingError as exc: + logger.error("Gemini processing error for VIN %s: %s", vin, exc) + raise HTTPException(status_code=422, detail=str(exc)) from exc + + elapsed_ms = (time.monotonic_ns() // 1_000_000) - start_ms + + return VinDecodeResponse( + success=True, + vin=vin, + year=result.year, + make=result.make, + model=result.model, + trimLevel=result.trim_level, + bodyType=result.body_type, + driveType=result.drive_type, + fuelType=result.fuel_type, + engine=result.engine, + transmission=result.transmission, + confidence=result.confidence, + processingTimeMs=elapsed_ms, + error=None, + ) diff --git a/ocr/tests/test_vin_decode.py b/ocr/tests/test_vin_decode.py new file mode 100644 index 0000000..2a1fa56 --- /dev/null +++ b/ocr/tests/test_vin_decode.py @@ -0,0 +1,199 @@ +"""Tests for the VIN decode endpoint (POST /decode/vin). + +Covers: valid VIN returns 200 with correct response shape, +invalid VIN format returns 400, Gemini unavailable returns 503, +and Gemini processing error returns 422. +All GeminiEngine calls are mocked. +""" + +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + +from app.engines.gemini_engine import ( + GeminiProcessingError, + GeminiUnavailableError, + VinDecodeResult, +) +from app.main import app + +client = TestClient(app) + +# A valid 17-character VIN (no I, O, Q) +_VALID_VIN = "1HGBH41JXMN109186" + +_FULL_RESULT = VinDecodeResult( + year=2021, + make="Honda", + model="Civic", + trim_level="EX", + body_type="Sedan", + drive_type="FWD", + fuel_type="Gasoline", + engine="2.0L I4", + transmission="CVT", + confidence=0.95, +) + + +# --- Valid VIN --- + + +class TestDecodeVinSuccess: + """Verify successful VIN decode returns 200 with correct response shape.""" + + @patch("app.routers.decode._gemini_engine") + def test_valid_vin_returns_200(self, mock_engine): + """Normal: Valid VIN returns 200 with all vehicle fields populated.""" + mock_engine.decode_vin.return_value = _FULL_RESULT + + response = client.post("/decode/vin", json={"vin": _VALID_VIN}) + + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert data["vin"] == _VALID_VIN + assert data["year"] == 2021 + assert data["make"] == "Honda" + assert data["model"] == "Civic" + assert data["trimLevel"] == "EX" + assert data["bodyType"] == "Sedan" + assert data["driveType"] == "FWD" + assert data["fuelType"] == "Gasoline" + assert data["engine"] == "2.0L I4" + assert data["transmission"] == "CVT" + assert data["confidence"] == 0.95 + assert "processingTimeMs" in data + assert data["error"] is None + + @patch("app.routers.decode._gemini_engine") + def test_vin_uppercased_before_decode(self, mock_engine): + """VIN submitted in lowercase is normalised to uppercase before decoding.""" + mock_engine.decode_vin.return_value = _FULL_RESULT + + response = client.post("/decode/vin", json={"vin": _VALID_VIN.lower()}) + + assert response.status_code == 200 + data = response.json() + assert data["vin"] == _VALID_VIN + mock_engine.decode_vin.assert_called_once_with(_VALID_VIN) + + @patch("app.routers.decode._gemini_engine") + def test_nullable_fields_allowed(self, mock_engine): + """Edge: VIN decode with only confidence set returns valid response.""" + mock_engine.decode_vin.return_value = VinDecodeResult(confidence=0.3) + + response = client.post("/decode/vin", json={"vin": _VALID_VIN}) + + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert data["year"] is None + assert data["make"] is None + assert data["confidence"] == 0.3 + + +# --- Invalid VIN format --- + + +class TestDecodeVinValidation: + """Verify invalid VIN formats return 400.""" + + def test_too_short_vin_returns_400(self): + """VIN shorter than 17 characters is rejected.""" + response = client.post("/decode/vin", json={"vin": "1HGBH41JXM"}) + + assert response.status_code == 400 + assert "Invalid VIN format" in response.json()["detail"] + + def test_too_long_vin_returns_400(self): + """VIN longer than 17 characters is rejected.""" + response = client.post("/decode/vin", json={"vin": "1HGBH41JXMN109186X"}) + + assert response.status_code == 400 + + def test_vin_with_letter_i_returns_400(self): + """VIN containing the letter I (invalid character) is rejected.""" + # Replace position 0 with I to create invalid VIN + invalid_vin = "IHGBH41JXMN109186" + response = client.post("/decode/vin", json={"vin": invalid_vin}) + + assert response.status_code == 400 + assert "Invalid VIN format" in response.json()["detail"] + + def test_vin_with_letter_o_returns_400(self): + """VIN containing the letter O (invalid character) is rejected.""" + invalid_vin = "OHGBH41JXMN109186" + response = client.post("/decode/vin", json={"vin": invalid_vin}) + + assert response.status_code == 400 + + def test_vin_with_letter_q_returns_400(self): + """VIN containing the letter Q (invalid character) is rejected.""" + invalid_vin = "QHGBH41JXMN109186" + response = client.post("/decode/vin", json={"vin": invalid_vin}) + + assert response.status_code == 400 + + def test_empty_vin_returns_400(self): + """Empty VIN string is rejected.""" + response = client.post("/decode/vin", json={"vin": ""}) + + assert response.status_code == 400 + + def test_vin_with_special_chars_returns_400(self): + """VIN containing special characters is rejected.""" + response = client.post("/decode/vin", json={"vin": "1HGBH41J-MN109186"}) + + assert response.status_code == 400 + + +# --- Gemini unavailable --- + + +class TestDecodeVinGeminiUnavailable: + """Verify Gemini service unavailability returns 503.""" + + @patch("app.routers.decode._gemini_engine") + def test_gemini_unavailable_returns_503(self, mock_engine): + """When Gemini cannot be initialized, endpoint returns 503.""" + mock_engine.decode_vin.side_effect = GeminiUnavailableError( + "Google credential config not found" + ) + + response = client.post("/decode/vin", json={"vin": _VALID_VIN}) + + assert response.status_code == 503 + assert "Google credential config not found" in response.json()["detail"] + + +# --- Gemini processing error --- + + +class TestDecodeVinGeminiProcessingError: + """Verify Gemini processing failures return 422.""" + + @patch("app.routers.decode._gemini_engine") + def test_gemini_processing_error_returns_422(self, mock_engine): + """When Gemini returns invalid output, endpoint returns 422.""" + mock_engine.decode_vin.side_effect = GeminiProcessingError( + "Gemini returned invalid JSON for VIN decode: ..." + ) + + response = client.post("/decode/vin", json={"vin": _VALID_VIN}) + + assert response.status_code == 422 + assert "Gemini returned invalid JSON" in response.json()["detail"] + + @patch("app.routers.decode._gemini_engine") + def test_gemini_api_failure_returns_422(self, mock_engine): + """When Gemini API call fails at runtime, endpoint returns 422.""" + mock_engine.decode_vin.side_effect = GeminiProcessingError( + "Gemini VIN decode failed: API quota exceeded" + ) + + response = client.post("/decode/vin", json={"vin": _VALID_VIN}) + + assert response.status_code == 422 + assert "Gemini VIN decode failed" in response.json()["detail"] From 3cd61256ba26af0d2df5b980b2581be63de8c199 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Wed, 18 Feb 2026 21:40:47 -0600 Subject: [PATCH 2/9] feat: add backend OCR client method for VIN decode (refs #225) Add VinDecodeResponse type and OcrClient.decodeVin() method that sends JSON POST to the new /decode/vin OCR endpoint. Unlike other OCR methods, this uses JSON body instead of multipart since there is no file upload. Co-Authored-By: Claude Opus 4.6 --- backend/src/features/ocr/domain/ocr.types.ts | 18 +++++++ .../src/features/ocr/external/ocr-client.ts | 51 ++++++++++++++++++- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/backend/src/features/ocr/domain/ocr.types.ts b/backend/src/features/ocr/domain/ocr.types.ts index 2f00b4c..1be5c5c 100644 --- a/backend/src/features/ocr/domain/ocr.types.ts +++ b/backend/src/features/ocr/domain/ocr.types.ts @@ -131,3 +131,21 @@ export interface ManualJobResponse { result?: ManualExtractionResult; error?: string; } + +/** Response from VIN decode via Gemini (OCR service) */ +export interface VinDecodeResponse { + success: boolean; + vin: string; + year: number | null; + make: string | null; + model: string | null; + trimLevel: string | null; + bodyType: string | null; + driveType: string | null; + fuelType: string | null; + engine: string | null; + transmission: string | null; + confidence: number; + processingTimeMs: number; + error: string | null; +} diff --git a/backend/src/features/ocr/external/ocr-client.ts b/backend/src/features/ocr/external/ocr-client.ts index 627abf7..4cc3fca 100644 --- a/backend/src/features/ocr/external/ocr-client.ts +++ b/backend/src/features/ocr/external/ocr-client.ts @@ -2,7 +2,7 @@ * @ai-summary HTTP client for OCR service communication */ import { logger } from '../../../core/logging/logger'; -import type { JobResponse, ManualJobResponse, OcrResponse, ReceiptExtractionResponse, VinExtractionResponse } from '../domain/ocr.types'; +import type { JobResponse, ManualJobResponse, OcrResponse, ReceiptExtractionResponse, VinDecodeResponse, VinExtractionResponse } from '../domain/ocr.types'; /** OCR service configuration */ const OCR_SERVICE_URL = process.env.OCR_SERVICE_URL || 'http://mvp-ocr:8000'; @@ -373,6 +373,55 @@ export class OcrClient { return result; } + /** + * Decode a VIN string into structured vehicle data via Gemini. + * + * Unlike other OCR methods, this sends JSON (not multipart) because + * VIN decode has no file upload. + * + * @param vin - 17-character Vehicle Identification Number + * @returns Structured vehicle data from Gemini decode + */ + async decodeVin(vin: string): Promise { + const url = `${this.baseUrl}/decode/vin`; + + logger.info('OCR VIN decode request', { + operation: 'ocr.client.decodeVin', + url, + vin, + }); + + const response = await this.fetchWithTimeout(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ vin }), + }); + + if (!response.ok) { + const errorText = await response.text(); + logger.error('OCR VIN decode failed', { + operation: 'ocr.client.decodeVin.error', + status: response.status, + error: errorText, + }); + const err: any = new Error(`OCR service error: ${response.status} - ${errorText}`); + err.statusCode = response.status; + throw err; + } + + const result = (await response.json()) as VinDecodeResponse; + + logger.info('OCR VIN decode completed', { + operation: 'ocr.client.decodeVin.success', + success: result.success, + vin: result.vin, + confidence: result.confidence, + processingTimeMs: result.processingTimeMs, + }); + + return result; + } + /** * Check if the OCR service is healthy. * From 5cbf9c764dce84ba99f05cd10a7946393c07f1b6 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Wed, 18 Feb 2026 21:47:47 -0600 Subject: [PATCH 3/9] feat: rewire vehicles controller to OCR VIN decode (refs #226) Replace NHTSAClient with OcrClient in vehicles controller. Move cache logic into VehiclesService with format-aware reads (Gemini vs legacy NHTSA entries). Rename nhtsaValue to sourceValue in MatchedField. Remove vpic config from Zod schema and YAML config files. Co-Authored-By: Claude Opus 4.6 --- backend/src/core/config/config-loader.ts | 8 - backend/src/core/config/feature-tiers.ts | 2 +- .../vehicles/api/vehicles.controller.ts | 63 +++++-- .../features/vehicles/api/vehicles.routes.ts | 2 +- .../vehicles/domain/vehicles.service.ts | 178 ++++++++++++------ .../vehicles/domain/vehicles.types.ts | 50 +++++ config/app/ci.yml | 5 - config/app/production.yml.example | 2 - config/shared/production.yml | 3 - 9 files changed, 220 insertions(+), 93 deletions(-) diff --git a/backend/src/core/config/config-loader.ts b/backend/src/core/config/config-loader.ts index 30f7ca4..1de41e3 100644 --- a/backend/src/core/config/config-loader.ts +++ b/backend/src/core/config/config-loader.ts @@ -41,14 +41,6 @@ const configSchema = z.object({ audience: z.string(), }), - // External APIs configuration (optional) - external: z.object({ - vpic: z.object({ - url: z.string(), - timeout: z.string(), - }).optional(), - }).optional(), - // Service configuration service: z.object({ name: z.string(), diff --git a/backend/src/core/config/feature-tiers.ts b/backend/src/core/config/feature-tiers.ts index ca803df..1260c8c 100644 --- a/backend/src/core/config/feature-tiers.ts +++ b/backend/src/core/config/feature-tiers.ts @@ -29,7 +29,7 @@ export const FEATURE_TIERS: Record = { 'vehicle.vinDecode': { minTier: 'pro', name: 'VIN Decode', - upgradePrompt: 'Upgrade to Pro to automatically decode VIN and populate vehicle details from the NHTSA database.', + upgradePrompt: 'Upgrade to Pro to automatically decode VIN and populate vehicle details from the vehicle database.', }, 'fuelLog.receiptScan': { minTier: 'pro', diff --git a/backend/src/features/vehicles/api/vehicles.controller.ts b/backend/src/features/vehicles/api/vehicles.controller.ts index 475abc6..79db275 100644 --- a/backend/src/features/vehicles/api/vehicles.controller.ts +++ b/backend/src/features/vehicles/api/vehicles.controller.ts @@ -10,19 +10,18 @@ import { pool } from '../../../core/config/database'; import { logger } from '../../../core/logging/logger'; import { CreateVehicleBody, UpdateVehicleBody, VehicleParams } from '../domain/vehicles.types'; import { getStorageService } from '../../../core/storage/storage.service'; -import { NHTSAClient, DecodeVinRequest } from '../external/nhtsa'; +import { ocrClient } from '../../ocr/external/ocr-client'; +import type { DecodeVinRequest } from '../domain/vehicles.types'; import crypto from 'crypto'; import FileType from 'file-type'; import path from 'path'; export class VehiclesController { private vehiclesService: VehiclesService; - private nhtsaClient: NHTSAClient; constructor() { const repository = new VehiclesRepository(pool); this.vehiclesService = new VehiclesService(repository, pool); - this.nhtsaClient = new NHTSAClient(pool); } async getUserVehicles(request: FastifyRequest, reply: FastifyReply) { @@ -378,7 +377,7 @@ export class VehiclesController { } /** - * Decode VIN using NHTSA vPIC API + * Decode VIN using OCR service (Gemini) * POST /api/vehicles/decode-vin * Requires Pro or Enterprise tier */ @@ -395,13 +394,34 @@ export class VehiclesController { }); } - logger.info('VIN decode requested', { userId, vin: vin.substring(0, 6) + '...' }); + // Validate VIN format + const sanitizedVin = vin.trim().toUpperCase(); + const VIN_REGEX = /^[A-HJ-NPR-Z0-9]{17}$/; + if (!VIN_REGEX.test(sanitizedVin)) { + return reply.code(400).send({ + error: 'INVALID_VIN', + message: 'Invalid VIN format. VIN must be exactly 17 characters and contain only letters (except I, O, Q) and numbers.' + }); + } - // Validate and decode VIN - const response = await this.nhtsaClient.decodeVin(vin); + logger.info('VIN decode requested', { userId, vin: sanitizedVin.substring(0, 6) + '...' }); - // Extract and map fields from NHTSA response - const decodedData = await this.vehiclesService.mapNHTSAResponse(response); + // Check cache first + const cached = await this.vehiclesService.getVinCached(sanitizedVin); + if (cached) { + logger.info('VIN decode cache hit', { userId }); + const decodedData = await this.vehiclesService.mapVinDecodeResponse(cached); + return reply.code(200).send(decodedData); + } + + // Call OCR service for VIN decode + const response = await ocrClient.decodeVin(sanitizedVin); + + // Cache the response + await this.vehiclesService.saveVinCache(sanitizedVin, response); + + // Map response to decoded vehicle data with dropdown matching + const decodedData = await this.vehiclesService.mapVinDecodeResponse(response); logger.info('VIN decode successful', { userId, @@ -414,7 +434,7 @@ export class VehiclesController { } catch (error: any) { logger.error('VIN decode failed', { error, userId }); - // Handle validation errors + // Handle VIN validation errors if (error.message?.includes('Invalid VIN')) { return reply.code(400).send({ error: 'INVALID_VIN', @@ -422,16 +442,25 @@ export class VehiclesController { }); } - // Handle timeout - if (error.message?.includes('timed out')) { - return reply.code(504).send({ - error: 'VIN_DECODE_TIMEOUT', - message: 'NHTSA API request timed out. Please try again.' + // Handle OCR service errors by status code + if (error.statusCode === 503 || error.statusCode === 422) { + return reply.code(502).send({ + error: 'VIN_DECODE_FAILED', + message: 'VIN decode service unavailable', + details: error.message }); } - // Handle NHTSA API errors - if (error.message?.includes('NHTSA')) { + // Handle timeout + if (error.message?.includes('timed out') || error.message?.includes('aborted')) { + return reply.code(504).send({ + error: 'VIN_DECODE_TIMEOUT', + message: 'VIN decode service timed out. Please try again.' + }); + } + + // Handle OCR service errors + if (error.message?.includes('OCR service error')) { return reply.code(502).send({ error: 'VIN_DECODE_FAILED', message: 'Unable to decode VIN from external service', diff --git a/backend/src/features/vehicles/api/vehicles.routes.ts b/backend/src/features/vehicles/api/vehicles.routes.ts index e82cb6c..9d404a3 100644 --- a/backend/src/features/vehicles/api/vehicles.routes.ts +++ b/backend/src/features/vehicles/api/vehicles.routes.ts @@ -75,7 +75,7 @@ export const vehiclesRoutes: FastifyPluginAsync = async ( handler: vehiclesController.getDropdownOptions.bind(vehiclesController) }); - // POST /api/vehicles/decode-vin - Decode VIN using NHTSA vPIC API (Pro/Enterprise only) + // POST /api/vehicles/decode-vin - Decode VIN via OCR service (Pro/Enterprise only) fastify.post<{ Body: { vin: string } }>('/vehicles/decode-vin', { preHandler: [fastify.authenticate, fastify.requireTier({ featureKey: 'vehicle.vinDecode' })], handler: vehiclesController.decodeVin.bind(vehiclesController) diff --git a/backend/src/features/vehicles/domain/vehicles.service.ts b/backend/src/features/vehicles/domain/vehicles.service.ts index ca7864d..bb5b668 100644 --- a/backend/src/features/vehicles/domain/vehicles.service.ts +++ b/backend/src/features/vehicles/domain/vehicles.service.ts @@ -24,7 +24,8 @@ import { isValidVIN, isValidPreModernVIN } from '../../../shared-minimal/utils/v import { normalizeMakeName, normalizeModelName } from './name-normalizer'; import { getVehicleDataService, getPool } from '../../platform'; import { auditLogService } from '../../audit-log'; -import { NHTSAClient, NHTSADecodeResponse, DecodedVehicleData, MatchedField } from '../external/nhtsa'; +import type { VinDecodeResponse } from '../../ocr/domain/ocr.types'; +import type { DecodedVehicleData, MatchedField } from './vehicles.types'; import { canAddVehicle, getVehicleLimitConfig } from '../../../core/config/feature-tiers'; import { UserProfileRepository } from '../../user-profile/data/user-profile.repository'; import { SubscriptionTier } from '../../user-profile/domain/user-profile.types'; @@ -592,6 +593,71 @@ export class VehiclesService { const cacheKey = `${this.cachePrefix}:user:${userId}`; await cacheService.del(cacheKey); } + + /** + * Check vin_cache for existing VIN data. + * Format-aware: validates raw_data has `success` field (Gemini format). + * Old NHTSA-format entries are treated as cache misses and expire via TTL. + */ + async getVinCached(vin: string): Promise { + try { + const result = await this.pool.query<{ + raw_data: any; + cached_at: Date; + }>( + `SELECT raw_data, cached_at + FROM vin_cache + WHERE vin = $1 + AND cached_at > NOW() - INTERVAL '365 days'`, + [vin] + ); + + if (result.rows.length === 0) { + return null; + } + + const rawData = result.rows[0].raw_data; + + // Format-aware check: Gemini responses have `success` field, + // old NHTSA responses do not. Treat old format as cache miss. + if (!rawData || typeof rawData !== 'object' || !('success' in rawData)) { + logger.debug('VIN cache format mismatch (legacy NHTSA entry), treating as miss', { vin }); + return null; + } + + logger.debug('VIN cache hit', { vin }); + return rawData as VinDecodeResponse; + } catch (error) { + logger.error('Failed to check VIN cache', { vin, error }); + return null; + } + } + + /** + * Save VIN decode response to cache with ON CONFLICT upsert. + */ + async saveVinCache(vin: string, response: VinDecodeResponse): Promise { + try { + await this.pool.query( + `INSERT INTO vin_cache (vin, make, model, year, engine_type, body_type, raw_data, cached_at) + VALUES ($1, $2, $3, $4, $5, $6, $7, NOW()) + ON CONFLICT (vin) DO UPDATE SET + make = EXCLUDED.make, + model = EXCLUDED.model, + year = EXCLUDED.year, + engine_type = EXCLUDED.engine_type, + body_type = EXCLUDED.body_type, + raw_data = EXCLUDED.raw_data, + cached_at = NOW()`, + [vin, response.make, response.model, response.year, response.engine, response.bodyType, JSON.stringify(response)] + ); + + logger.debug('VIN cached', { vin }); + } catch (error) { + logger.error('Failed to cache VIN data', { vin, error }); + // Don't throw - caching failure shouldn't break the decode flow + } + } async getDropdownMakes(year: number): Promise { const vehicleDataService = getVehicleDataService(); @@ -657,82 +723,82 @@ export class VehiclesService { } /** - * Map NHTSA decode response to internal decoded vehicle data format + * Map VIN decode response to internal decoded vehicle data format * with dropdown matching and confidence levels */ - async mapNHTSAResponse(response: NHTSADecodeResponse): Promise { + async mapVinDecodeResponse(response: VinDecodeResponse): Promise { const vehicleDataService = getVehicleDataService(); const pool = getPool(); - // Extract raw values from NHTSA response - const nhtsaYear = NHTSAClient.extractYear(response); - const nhtsaMake = NHTSAClient.extractValue(response, 'Make'); - const nhtsaModel = NHTSAClient.extractValue(response, 'Model'); - const nhtsaTrim = NHTSAClient.extractValue(response, 'Trim'); - const nhtsaBodyType = NHTSAClient.extractValue(response, 'Body Class'); - const nhtsaDriveType = NHTSAClient.extractValue(response, 'Drive Type'); - const nhtsaFuelType = NHTSAClient.extractValue(response, 'Fuel Type - Primary'); - const nhtsaEngine = NHTSAClient.extractEngine(response); - const nhtsaTransmission = NHTSAClient.extractValue(response, 'Transmission Style'); + // Read flat fields directly from Gemini response + const sourceYear = response.year; + const sourceMake = response.make; + const sourceModel = response.model; + const sourceTrim = response.trimLevel; + const sourceBodyType = response.bodyType; + const sourceDriveType = response.driveType; + const sourceFuelType = response.fuelType; + const sourceEngine = response.engine; + const sourceTransmission = response.transmission; // Year is always high confidence if present (exact numeric match) const year: MatchedField = { - value: nhtsaYear, - nhtsaValue: nhtsaYear?.toString() || null, - confidence: nhtsaYear ? 'high' : 'none' + value: sourceYear, + sourceValue: sourceYear?.toString() || null, + confidence: sourceYear ? 'high' : 'none' }; // Match make against dropdown options - let make: MatchedField = { value: null, nhtsaValue: nhtsaMake, confidence: 'none' }; - if (nhtsaYear && nhtsaMake) { - const makes = await vehicleDataService.getMakes(pool, nhtsaYear); - make = this.matchField(nhtsaMake, makes); + let make: MatchedField = { value: null, sourceValue: sourceMake, confidence: 'none' }; + if (sourceYear && sourceMake) { + const makes = await vehicleDataService.getMakes(pool, sourceYear); + make = this.matchField(sourceMake, makes); } // Match model against dropdown options - let model: MatchedField = { value: null, nhtsaValue: nhtsaModel, confidence: 'none' }; - if (nhtsaYear && make.value && nhtsaModel) { - const models = await vehicleDataService.getModels(pool, nhtsaYear, make.value); - model = this.matchField(nhtsaModel, models); + let model: MatchedField = { value: null, sourceValue: sourceModel, confidence: 'none' }; + if (sourceYear && make.value && sourceModel) { + const models = await vehicleDataService.getModels(pool, sourceYear, make.value); + model = this.matchField(sourceModel, models); } // Match trim against dropdown options - let trimLevel: MatchedField = { value: null, nhtsaValue: nhtsaTrim, confidence: 'none' }; - if (nhtsaYear && make.value && model.value && nhtsaTrim) { - const trims = await vehicleDataService.getTrims(pool, nhtsaYear, make.value, model.value); - trimLevel = this.matchField(nhtsaTrim, trims); + let trimLevel: MatchedField = { value: null, sourceValue: sourceTrim, confidence: 'none' }; + if (sourceYear && make.value && model.value && sourceTrim) { + const trims = await vehicleDataService.getTrims(pool, sourceYear, make.value, model.value); + trimLevel = this.matchField(sourceTrim, trims); } // Match engine against dropdown options - let engine: MatchedField = { value: null, nhtsaValue: nhtsaEngine, confidence: 'none' }; - if (nhtsaYear && make.value && model.value && trimLevel.value && nhtsaEngine) { - const engines = await vehicleDataService.getEngines(pool, nhtsaYear, make.value, model.value, trimLevel.value); - engine = this.matchField(nhtsaEngine, engines); + let engine: MatchedField = { value: null, sourceValue: sourceEngine, confidence: 'none' }; + if (sourceYear && make.value && model.value && trimLevel.value && sourceEngine) { + const engines = await vehicleDataService.getEngines(pool, sourceYear, make.value, model.value, trimLevel.value); + engine = this.matchField(sourceEngine, engines); } // Match transmission against dropdown options - let transmission: MatchedField = { value: null, nhtsaValue: nhtsaTransmission, confidence: 'none' }; - if (nhtsaYear && make.value && model.value && trimLevel.value && nhtsaTransmission) { - const transmissions = await vehicleDataService.getTransmissionsForTrim(pool, nhtsaYear, make.value, model.value, trimLevel.value); - transmission = this.matchField(nhtsaTransmission, transmissions); + let transmission: MatchedField = { value: null, sourceValue: sourceTransmission, confidence: 'none' }; + if (sourceYear && make.value && model.value && trimLevel.value && sourceTransmission) { + const transmissions = await vehicleDataService.getTransmissionsForTrim(pool, sourceYear, make.value, model.value, trimLevel.value); + transmission = this.matchField(sourceTransmission, transmissions); } // Body type, drive type, and fuel type are display-only (no dropdown matching) const bodyType: MatchedField = { value: null, - nhtsaValue: nhtsaBodyType, + sourceValue: sourceBodyType, confidence: 'none' }; const driveType: MatchedField = { value: null, - nhtsaValue: nhtsaDriveType, + sourceValue: sourceDriveType, confidence: 'none' }; const fuelType: MatchedField = { value: null, - nhtsaValue: nhtsaFuelType, + sourceValue: sourceFuelType, confidence: 'none' }; @@ -754,42 +820,42 @@ export class VehiclesService { * Returns the matched dropdown value with confidence level * Matching order: exact -> normalized -> prefix -> contains */ - private matchField(nhtsaValue: string, options: string[]): MatchedField { - if (!nhtsaValue || options.length === 0) { - return { value: null, nhtsaValue, confidence: 'none' }; + private matchField(sourceValue: string, options: string[]): MatchedField { + if (!sourceValue || options.length === 0) { + return { value: null, sourceValue, confidence: 'none' }; } - const normalizedNhtsa = nhtsaValue.toLowerCase().trim(); + const normalizedSource = sourceValue.toLowerCase().trim(); // Try exact case-insensitive match - const exactMatch = options.find(opt => opt.toLowerCase().trim() === normalizedNhtsa); + const exactMatch = options.find(opt => opt.toLowerCase().trim() === normalizedSource); if (exactMatch) { - return { value: exactMatch, nhtsaValue, confidence: 'high' }; + return { value: exactMatch, sourceValue, confidence: 'high' }; } // Try normalized comparison (remove special chars) const normalizeForCompare = (s: string) => s.toLowerCase().replace(/[^a-z0-9]/g, ''); - const normalizedNhtsaClean = normalizeForCompare(nhtsaValue); + const normalizedSourceClean = normalizeForCompare(sourceValue); - const normalizedMatch = options.find(opt => normalizeForCompare(opt) === normalizedNhtsaClean); + const normalizedMatch = options.find(opt => normalizeForCompare(opt) === normalizedSourceClean); if (normalizedMatch) { - return { value: normalizedMatch, nhtsaValue, confidence: 'medium' }; + return { value: normalizedMatch, sourceValue, confidence: 'medium' }; } - // Try prefix match - option starts with NHTSA value - const prefixMatch = options.find(opt => opt.toLowerCase().trim().startsWith(normalizedNhtsa)); + // Try prefix match - option starts with source value + const prefixMatch = options.find(opt => opt.toLowerCase().trim().startsWith(normalizedSource)); if (prefixMatch) { - return { value: prefixMatch, nhtsaValue, confidence: 'medium' }; + return { value: prefixMatch, sourceValue, confidence: 'medium' }; } - // Try contains match - option contains NHTSA value - const containsMatch = options.find(opt => opt.toLowerCase().trim().includes(normalizedNhtsa)); + // Try contains match - option contains source value + const containsMatch = options.find(opt => opt.toLowerCase().trim().includes(normalizedSource)); if (containsMatch) { - return { value: containsMatch, nhtsaValue, confidence: 'medium' }; + return { value: containsMatch, sourceValue, confidence: 'medium' }; } - // No match found - return NHTSA value as hint with no match - return { value: null, nhtsaValue, confidence: 'none' }; + // No match found - return source value as hint with no match + return { value: null, sourceValue, confidence: 'none' }; } private toResponse(vehicle: Vehicle): VehicleResponse { diff --git a/backend/src/features/vehicles/domain/vehicles.types.ts b/backend/src/features/vehicles/domain/vehicles.types.ts index e1380de..94a69a3 100644 --- a/backend/src/features/vehicles/domain/vehicles.types.ts +++ b/backend/src/features/vehicles/domain/vehicles.types.ts @@ -215,3 +215,53 @@ export interface TCOResponse { distanceUnit: string; currencyCode: string; } + +/** Confidence level for matched dropdown values */ +export type MatchConfidence = 'high' | 'medium' | 'none'; + +/** Matched field with confidence indicator */ +export interface MatchedField { + value: T | null; + sourceValue: string | null; + confidence: MatchConfidence; +} + +/** + * Decoded vehicle data with match confidence per field. + * Maps VIN decode response fields to internal field names. + */ +export interface DecodedVehicleData { + year: MatchedField; + make: MatchedField; + model: MatchedField; + trimLevel: MatchedField; + bodyType: MatchedField; + driveType: MatchedField; + fuelType: MatchedField; + engine: MatchedField; + transmission: MatchedField; +} + +/** Cached VIN data from vin_cache table */ +export interface VinCacheEntry { + vin: string; + make: string | null; + model: string | null; + year: number | null; + engineType: string | null; + bodyType: string | null; + rawData: import('../../ocr/domain/ocr.types').VinDecodeResponse; + cachedAt: Date; +} + +/** VIN decode request body */ +export interface DecodeVinRequest { + vin: string; +} + +/** VIN decode error response */ +export interface VinDecodeError { + error: 'INVALID_VIN' | 'VIN_DECODE_FAILED' | 'TIER_REQUIRED'; + message: string; + details?: string; +} diff --git a/config/app/ci.yml b/config/app/ci.yml index b84efff..8bbc4c3 100755 --- a/config/app/ci.yml +++ b/config/app/ci.yml @@ -22,11 +22,6 @@ platform: url: http://mvp-platform-vehicles-api:8000 timeout: 5s -external: - vpic: - url: https://vpic.nhtsa.dot.gov/api/vehicles - timeout: 10s - service: name: mvp-backend diff --git a/config/app/production.yml.example b/config/app/production.yml.example index 7d6226b..a956cec 100755 --- a/config/app/production.yml.example +++ b/config/app/production.yml.example @@ -21,5 +21,3 @@ auth0: domain: motovaultpro.us.auth0.com audience: https://api.motovaultpro.com -external: - vpic_api_url: https://vpic.nhtsa.dot.gov/api/vehicles diff --git a/config/shared/production.yml b/config/shared/production.yml index 0748010..b121295 100755 --- a/config/shared/production.yml +++ b/config/shared/production.yml @@ -107,9 +107,6 @@ external_services: google_maps: base_url: https://maps.googleapis.com/maps/api - vpic: - base_url: https://vpic.nhtsa.dot.gov/api/vehicles - # Development Configuration development: debug_enabled: false From f59042105829d54d1f0c8c190bd7cb408297bfe9 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Wed, 18 Feb 2026 21:51:38 -0600 Subject: [PATCH 4/9] chore: remove NHTSA code and update documentation (refs #227) Delete vehicles/external/nhtsa/ directory (3 files), remove VPICVariable and VPICResponse from platform models. Update all documentation to reflect Gemini VIN decode via OCR service architecture. Co-Authored-By: Claude Opus 4.6 --- backend/src/features/ocr/CLAUDE.md | 2 +- backend/src/features/platform/README.md | 6 +- .../src/features/platform/models/responses.ts | 16 -- backend/src/features/vehicles/CLAUDE.md | 2 +- backend/src/features/vehicles/README.md | 36 ++- .../src/features/vehicles/external/CLAUDE.md | 6 - .../src/features/vehicles/external/README.md | 2 +- .../features/vehicles/external/nhtsa/index.ts | 16 -- .../vehicles/external/nhtsa/nhtsa.client.ts | 235 ------------------ .../vehicles/external/nhtsa/nhtsa.types.ts | 96 ------- docs/PLATFORM-SERVICES.md | 2 +- docs/TESTING.md | 10 +- docs/USER-GUIDE.md | 4 +- ocr/CLAUDE.md | 2 +- ocr/app/CLAUDE.md | 2 +- ocr/app/engines/CLAUDE.md | 6 +- 16 files changed, 35 insertions(+), 408 deletions(-) delete mode 100644 backend/src/features/vehicles/external/nhtsa/index.ts delete mode 100644 backend/src/features/vehicles/external/nhtsa/nhtsa.client.ts delete mode 100644 backend/src/features/vehicles/external/nhtsa/nhtsa.types.ts diff --git a/backend/src/features/ocr/CLAUDE.md b/backend/src/features/ocr/CLAUDE.md index e57bce8..4ceaa93 100644 --- a/backend/src/features/ocr/CLAUDE.md +++ b/backend/src/features/ocr/CLAUDE.md @@ -37,7 +37,7 @@ Backend proxy for the Python OCR microservice. Handles authentication, tier gati | File | What | When to read | | ---- | ---- | ------------ | -| `ocr-client.ts` | HTTP client to mvp-ocr Python service (extract, extractVin, extractReceipt, submitJob, submitManualJob, getJobStatus, isHealthy) | OCR service communication, error handling | +| `ocr-client.ts` | HTTP client to mvp-ocr Python service (extract, extractVin, extractReceipt, decodeVin, submitJob, submitManualJob, getJobStatus, isHealthy) | OCR service communication, error handling | ## tests/ diff --git a/backend/src/features/platform/README.md b/backend/src/features/platform/README.md index 267de43..5c9ec55 100644 --- a/backend/src/features/platform/README.md +++ b/backend/src/features/platform/README.md @@ -117,7 +117,7 @@ platform/ When implemented, VIN decoding will use: 1. **Cache First**: Check Redis (7-day TTL for success, 1-hour for failures) 2. **PostgreSQL**: Database function for high-confidence decode -3. **vPIC Fallback**: NHTSA vPIC API with circuit breaker protection +3. **OCR Service Fallback**: Gemini VIN decode via OCR service 4. **Graceful Degradation**: Return meaningful errors when all sources fail ### Database Schema @@ -164,7 +164,7 @@ When VIN decoding is implemented: ### External APIs (Planned/Future) When VIN decoding is implemented: -- **NHTSA vPIC**: https://vpic.nhtsa.dot.gov/api (VIN decoding fallback) +- **OCR Service**: Gemini VIN decode via mvp-ocr (VIN decoding fallback) ### Database Tables - **vehicle_options** - Hierarchical vehicle data (years, makes, models, trims, engines, transmissions) @@ -269,7 +269,7 @@ npm run lint ## Future Considerations ### Planned Features -- VIN decoding endpoint with PostgreSQL + vPIC fallback +- VIN decoding endpoint with PostgreSQL + Gemini/OCR service fallback - Circuit breaker pattern for external API resilience ### Potential Enhancements diff --git a/backend/src/features/platform/models/responses.ts b/backend/src/features/platform/models/responses.ts index c2770c8..ea50117 100644 --- a/backend/src/features/platform/models/responses.ts +++ b/backend/src/features/platform/models/responses.ts @@ -61,19 +61,3 @@ export interface VINDecodeResponse { error?: string; } -/** - * vPIC API response structure (NHTSA) - */ -export interface VPICVariable { - Variable: string; - Value: string | null; - ValueId: string | null; - VariableId: number; -} - -export interface VPICResponse { - Count: number; - Message: string; - SearchCriteria: string; - Results: VPICVariable[]; -} diff --git a/backend/src/features/vehicles/CLAUDE.md b/backend/src/features/vehicles/CLAUDE.md index da8c492..75c5a3f 100644 --- a/backend/src/features/vehicles/CLAUDE.md +++ b/backend/src/features/vehicles/CLAUDE.md @@ -16,6 +16,6 @@ | `data/` | Repository, database queries | Database operations | | `docs/` | Feature-specific documentation | Vehicle design details | | `events/` | Event handlers and emitters | Cross-feature event integration | -| `external/` | External service integrations (NHTSA) | VIN decoding, third-party APIs | +| `external/` | External service integrations | VIN decoding, third-party APIs | | `migrations/` | Database schema | Schema changes | | `tests/` | Unit and integration tests | Adding or modifying tests | diff --git a/backend/src/features/vehicles/README.md b/backend/src/features/vehicles/README.md index f8ce50d..f198079 100644 --- a/backend/src/features/vehicles/README.md +++ b/backend/src/features/vehicles/README.md @@ -13,7 +13,7 @@ Primary entity for vehicle management consuming MVP Platform Vehicles Service. H - `DELETE /api/vehicles/:id` - Soft delete vehicle ### VIN Decoding (Pro/Enterprise Only) -- `POST /api/vehicles/decode-vin` - Decode VIN using NHTSA vPIC API +- `POST /api/vehicles/decode-vin` - Decode VIN using Gemini via OCR service ### Hierarchical Vehicle Dropdowns **Status**: Vehicles service now proxies the platform vehicle catalog to provide fully dynamic dropdowns. Each selection step filters the next list, ensuring only valid combinations are shown. @@ -104,11 +104,7 @@ vehicles/ ├── data/ # Database layer │ └── vehicles.repository.ts ├── external/ # External service integrations -│ ├── CLAUDE.md # Integration pattern docs -│ └── nhtsa/ # NHTSA vPIC API client -│ ├── nhtsa.client.ts -│ ├── nhtsa.types.ts -│ └── index.ts +│ └── CLAUDE.md # Integration pattern docs ├── migrations/ # Feature schema │ └── 001_create_vehicles_tables.sql ├── tests/ # All tests @@ -121,14 +117,14 @@ vehicles/ ## Key Features -### 🔍 VIN Decoding (NHTSA vPIC API) +### VIN Decoding (Gemini via OCR Service) - **Tier Gating**: Pro and Enterprise users only (`vehicle.vinDecode` feature key) -- **NHTSA API**: Calls official NHTSA vPIC API for authoritative vehicle data +- **Gemini**: Calls OCR service Gemini VIN decode for authoritative vehicle data - **Caching**: Results cached in `vin_cache` table (1-year TTL, VIN data is static) - **Validation**: 17-character VIN format, excludes I/O/Q characters - **Matching**: Case-insensitive exact match against dropdown options - **Confidence Levels**: High (exact match), Medium (normalized match), None (hint only) -- **Timeout**: 5-second timeout for NHTSA API calls +- **Timeout**: 5-second timeout for OCR service calls #### Decode VIN Request ```json @@ -140,15 +136,15 @@ Authorization: Bearer Response (200): { - "year": { "value": 2021, "nhtsaValue": "2021", "confidence": "high" }, - "make": { "value": "Honda", "nhtsaValue": "HONDA", "confidence": "high" }, - "model": { "value": "Civic", "nhtsaValue": "Civic", "confidence": "high" }, - "trimLevel": { "value": "EX", "nhtsaValue": "EX", "confidence": "high" }, - "engine": { "value": null, "nhtsaValue": "2.0L L4 DOHC 16V", "confidence": "none" }, - "transmission": { "value": null, "nhtsaValue": "CVT", "confidence": "none" }, - "bodyType": { "value": null, "nhtsaValue": "Sedan", "confidence": "none" }, - "driveType": { "value": null, "nhtsaValue": "FWD", "confidence": "none" }, - "fuelType": { "value": null, "nhtsaValue": "Gasoline", "confidence": "none" } + "year": { "value": 2021, "decodedValue": "2021", "confidence": "high" }, + "make": { "value": "Honda", "decodedValue": "HONDA", "confidence": "high" }, + "model": { "value": "Civic", "decodedValue": "Civic", "confidence": "high" }, + "trimLevel": { "value": "EX", "decodedValue": "EX", "confidence": "high" }, + "engine": { "value": null, "decodedValue": "2.0L L4 DOHC 16V", "confidence": "none" }, + "transmission": { "value": null, "decodedValue": "CVT", "confidence": "none" }, + "bodyType": { "value": null, "decodedValue": "Sedan", "confidence": "none" }, + "driveType": { "value": null, "decodedValue": "FWD", "confidence": "none" }, + "fuelType": { "value": null, "decodedValue": "Gasoline", "confidence": "none" } } Error (400 - Invalid VIN): @@ -157,7 +153,7 @@ Error (400 - Invalid VIN): Error (403 - Tier Required): { "error": "TIER_REQUIRED", "requiredTier": "pro", "currentTier": "free", ... } -Error (502 - NHTSA Failure): +Error (502 - OCR Service Failure): { "error": "VIN_DECODE_FAILED", "message": "Unable to decode VIN from external service" } ``` @@ -230,7 +226,7 @@ Error (502 - NHTSA Failure): ## Testing ### Unit Tests -- `vehicles.service.test.ts` - Business logic with mocked dependencies (VIN decode, caching, CRUD operations) +- `vehicles.service.test.ts` - Business logic with mocked dependencies (VIN decode via OCR service mock, caching, CRUD operations) ### Integration Tests - `vehicles.integration.test.ts` - Complete API workflow with test database (create, read, update, delete vehicles) diff --git a/backend/src/features/vehicles/external/CLAUDE.md b/backend/src/features/vehicles/external/CLAUDE.md index fdcb3c7..bc2a3e6 100644 --- a/backend/src/features/vehicles/external/CLAUDE.md +++ b/backend/src/features/vehicles/external/CLAUDE.md @@ -5,9 +5,3 @@ | File | What | When to read | | ---- | ---- | ------------ | | `README.md` | Integration patterns, adding new services | Understanding external service conventions | - -## Subdirectories - -| Directory | What | When to read | -| --------- | ---- | ------------ | -| `nhtsa/` | NHTSA vPIC API client for VIN decoding | VIN decode feature work | diff --git a/backend/src/features/vehicles/external/README.md b/backend/src/features/vehicles/external/README.md index 0198ff9..1fc6401 100644 --- a/backend/src/features/vehicles/external/README.md +++ b/backend/src/features/vehicles/external/README.md @@ -15,7 +15,7 @@ Each integration follows this structure: ## Adding New Integrations 1. Create subdirectory: `external/{service}/` -2. Add client: `{service}.client.ts` following NHTSAClient pattern +2. Add client: `{service}.client.ts` following the axios-based client pattern 3. Add types: `{service}.types.ts` 4. Update `CLAUDE.md` with new directory 5. Add tests in `tests/unit/{service}.client.test.ts` diff --git a/backend/src/features/vehicles/external/nhtsa/index.ts b/backend/src/features/vehicles/external/nhtsa/index.ts deleted file mode 100644 index 9b2a5d4..0000000 --- a/backend/src/features/vehicles/external/nhtsa/index.ts +++ /dev/null @@ -1,16 +0,0 @@ -/** - * @ai-summary NHTSA vPIC integration exports - * @ai-context Public API for VIN decoding functionality - */ - -export { NHTSAClient } from './nhtsa.client'; -export type { - NHTSADecodeResponse, - NHTSAResult, - DecodedVehicleData, - MatchedField, - MatchConfidence, - VinCacheEntry, - DecodeVinRequest, - VinDecodeError, -} from './nhtsa.types'; diff --git a/backend/src/features/vehicles/external/nhtsa/nhtsa.client.ts b/backend/src/features/vehicles/external/nhtsa/nhtsa.client.ts deleted file mode 100644 index 1a3a8a2..0000000 --- a/backend/src/features/vehicles/external/nhtsa/nhtsa.client.ts +++ /dev/null @@ -1,235 +0,0 @@ -/** - * @ai-summary NHTSA vPIC API client for VIN decoding - * @ai-context Fetches vehicle data from NHTSA and caches results - */ - -import axios, { AxiosError } from 'axios'; -import { logger } from '../../../../core/logging/logger'; -import { NHTSADecodeResponse, VinCacheEntry } from './nhtsa.types'; -import { Pool } from 'pg'; - -/** - * VIN validation regex - * - 17 characters - * - Excludes I, O, Q (not used in VINs) - * - Alphanumeric only - */ -const VIN_REGEX = /^[A-HJ-NPR-Z0-9]{17}$/; - -/** - * Cache TTL: 1 year (VIN data is static - vehicle specs don't change) - */ -const CACHE_TTL_SECONDS = 365 * 24 * 60 * 60; - -export class NHTSAClient { - private readonly baseURL = 'https://vpic.nhtsa.dot.gov/api'; - private readonly timeout = 5000; // 5 seconds - - constructor(private readonly pool: Pool) {} - - /** - * Validate VIN format - * @throws Error if VIN format is invalid - */ - validateVin(vin: string): string { - const sanitized = vin.trim().toUpperCase(); - - if (!sanitized) { - throw new Error('VIN is required'); - } - - if (!VIN_REGEX.test(sanitized)) { - throw new Error('Invalid VIN format. VIN must be exactly 17 characters and contain only letters (except I, O, Q) and numbers.'); - } - - return sanitized; - } - - /** - * Check cache for existing VIN data - */ - async getCached(vin: string): Promise { - try { - const result = await this.pool.query<{ - vin: string; - make: string | null; - model: string | null; - year: number | null; - engine_type: string | null; - body_type: string | null; - raw_data: NHTSADecodeResponse; - cached_at: Date; - }>( - `SELECT vin, make, model, year, engine_type, body_type, raw_data, cached_at - FROM vin_cache - WHERE vin = $1 - AND cached_at > NOW() - INTERVAL '${CACHE_TTL_SECONDS} seconds'`, - [vin] - ); - - if (result.rows.length === 0) { - return null; - } - - const row = result.rows[0]; - return { - vin: row.vin, - make: row.make, - model: row.model, - year: row.year, - engineType: row.engine_type, - bodyType: row.body_type, - rawData: row.raw_data, - cachedAt: row.cached_at, - }; - } catch (error) { - logger.error('Failed to check VIN cache', { vin, error }); - return null; - } - } - - /** - * Save VIN data to cache - */ - async saveToCache(vin: string, response: NHTSADecodeResponse): Promise { - try { - const findValue = (variable: string): string | null => { - const result = response.Results.find(r => r.Variable === variable); - return result?.Value || null; - }; - - const year = findValue('Model Year'); - const make = findValue('Make'); - const model = findValue('Model'); - const engineType = findValue('Engine Model'); - const bodyType = findValue('Body Class'); - - await this.pool.query( - `INSERT INTO vin_cache (vin, make, model, year, engine_type, body_type, raw_data, cached_at) - VALUES ($1, $2, $3, $4, $5, $6, $7, NOW()) - ON CONFLICT (vin) DO UPDATE SET - make = EXCLUDED.make, - model = EXCLUDED.model, - year = EXCLUDED.year, - engine_type = EXCLUDED.engine_type, - body_type = EXCLUDED.body_type, - raw_data = EXCLUDED.raw_data, - cached_at = NOW()`, - [vin, make, model, year ? parseInt(year) : null, engineType, bodyType, JSON.stringify(response)] - ); - - logger.debug('VIN cached', { vin }); - } catch (error) { - logger.error('Failed to cache VIN data', { vin, error }); - // Don't throw - caching failure shouldn't break the decode flow - } - } - - /** - * Decode VIN using NHTSA vPIC API - * @param vin - 17-character VIN - * @returns Raw NHTSA decode response - * @throws Error if VIN is invalid or API call fails - */ - async decodeVin(vin: string): Promise { - // Validate and sanitize VIN - const sanitizedVin = this.validateVin(vin); - - // Check cache first - const cached = await this.getCached(sanitizedVin); - if (cached) { - logger.debug('VIN cache hit', { vin: sanitizedVin }); - return cached.rawData; - } - - // Call NHTSA API - logger.info('Calling NHTSA vPIC API', { vin: sanitizedVin }); - - try { - const response = await axios.get( - `${this.baseURL}/vehicles/decodevin/${sanitizedVin}`, - { - params: { format: 'json' }, - timeout: this.timeout, - } - ); - - // Check for NHTSA-level errors - if (response.data.Count === 0) { - throw new Error('NHTSA returned no results for this VIN'); - } - - // Check for error messages in results - const errorResult = response.data.Results.find( - r => r.Variable === 'Error Code' && r.Value && r.Value !== '0' - ); - if (errorResult) { - const errorText = response.data.Results.find(r => r.Variable === 'Error Text'); - throw new Error(`NHTSA error: ${errorText?.Value || 'Unknown error'}`); - } - - // Cache the successful response - await this.saveToCache(sanitizedVin, response.data); - - return response.data; - } catch (error) { - if (axios.isAxiosError(error)) { - const axiosError = error as AxiosError; - if (axiosError.code === 'ECONNABORTED') { - logger.error('NHTSA API timeout', { vin: sanitizedVin }); - throw new Error('NHTSA API request timed out. Please try again.'); - } - if (axiosError.response) { - logger.error('NHTSA API error response', { - vin: sanitizedVin, - status: axiosError.response.status, - data: axiosError.response.data, - }); - throw new Error(`NHTSA API error: ${axiosError.response.status}`); - } - logger.error('NHTSA API network error', { vin: sanitizedVin, message: axiosError.message }); - throw new Error('Unable to connect to NHTSA API. Please try again later.'); - } - throw error; - } - } - - /** - * Extract a specific value from NHTSA response - */ - static extractValue(response: NHTSADecodeResponse, variable: string): string | null { - const result = response.Results.find(r => r.Variable === variable); - return result?.Value?.trim() || null; - } - - /** - * Extract year from NHTSA response - */ - static extractYear(response: NHTSADecodeResponse): number | null { - const value = NHTSAClient.extractValue(response, 'Model Year'); - if (!value) return null; - const parsed = parseInt(value, 10); - return isNaN(parsed) ? null : parsed; - } - - /** - * Extract engine description from NHTSA response - * Combines multiple engine-related fields - */ - static extractEngine(response: NHTSADecodeResponse): string | null { - const engineModel = NHTSAClient.extractValue(response, 'Engine Model'); - if (engineModel) return engineModel; - - // Build engine description from components - const cylinders = NHTSAClient.extractValue(response, 'Engine Number of Cylinders'); - const displacement = NHTSAClient.extractValue(response, 'Displacement (L)'); - const fuelType = NHTSAClient.extractValue(response, 'Fuel Type - Primary'); - - const parts: string[] = []; - if (cylinders) parts.push(`${cylinders}-Cylinder`); - if (displacement) parts.push(`${displacement}L`); - if (fuelType && fuelType !== 'Gasoline') parts.push(fuelType); - - return parts.length > 0 ? parts.join(' ') : null; - } -} diff --git a/backend/src/features/vehicles/external/nhtsa/nhtsa.types.ts b/backend/src/features/vehicles/external/nhtsa/nhtsa.types.ts deleted file mode 100644 index 23a18fb..0000000 --- a/backend/src/features/vehicles/external/nhtsa/nhtsa.types.ts +++ /dev/null @@ -1,96 +0,0 @@ -/** - * @ai-summary Type definitions for NHTSA vPIC API - * @ai-context Defines request/response types for VIN decoding - */ - -/** - * Individual result from NHTSA DecodeVin API - */ -export interface NHTSAResult { - Value: string | null; - ValueId: string | null; - Variable: string; - VariableId: number; -} - -/** - * Raw response from NHTSA DecodeVin API - * GET https://vpic.nhtsa.dot.gov/api/vehicles/decodevin/{VIN}?format=json - */ -export interface NHTSADecodeResponse { - Count: number; - Message: string; - SearchCriteria: string; - Results: NHTSAResult[]; -} - -/** - * Confidence level for matched dropdown values - */ -export type MatchConfidence = 'high' | 'medium' | 'none'; - -/** - * Matched field with confidence indicator - */ -export interface MatchedField { - value: T | null; - nhtsaValue: string | null; - confidence: MatchConfidence; -} - -/** - * Decoded vehicle data with match confidence per field - * Maps NHTSA response fields to internal field names (camelCase) - * - * NHTSA Field Mappings: - * - ModelYear -> year - * - Make -> make - * - Model -> model - * - Trim -> trimLevel - * - BodyClass -> bodyType - * - DriveType -> driveType - * - FuelTypePrimary -> fuelType - * - EngineModel / EngineCylinders + EngineDisplacementL -> engine - * - TransmissionStyle -> transmission - */ -export interface DecodedVehicleData { - year: MatchedField; - make: MatchedField; - model: MatchedField; - trimLevel: MatchedField; - bodyType: MatchedField; - driveType: MatchedField; - fuelType: MatchedField; - engine: MatchedField; - transmission: MatchedField; -} - -/** - * Cached VIN data from vin_cache table - */ -export interface VinCacheEntry { - vin: string; - make: string | null; - model: string | null; - year: number | null; - engineType: string | null; - bodyType: string | null; - rawData: NHTSADecodeResponse; - cachedAt: Date; -} - -/** - * VIN decode request body - */ -export interface DecodeVinRequest { - vin: string; -} - -/** - * VIN decode error response - */ -export interface VinDecodeError { - error: 'INVALID_VIN' | 'VIN_DECODE_FAILED' | 'TIER_REQUIRED'; - message: string; - details?: string; -} diff --git a/docs/PLATFORM-SERVICES.md b/docs/PLATFORM-SERVICES.md index 8fe3686..d603bf9 100644 --- a/docs/PLATFORM-SERVICES.md +++ b/docs/PLATFORM-SERVICES.md @@ -35,7 +35,7 @@ The platform provides vehicle hierarchical data lookups: VIN decoding is planned but not yet implemented. Future capabilities will include: - `GET /api/platform/vehicle?vin={vin}` - Decode VIN to vehicle details - PostgreSQL-based VIN decode function -- NHTSA vPIC API fallback with circuit breaker +- Gemini VIN decode via OCR service - Redis caching (7-day TTL for successful decodes) **Data Source**: Vehicle data from standardized sources diff --git a/docs/TESTING.md b/docs/TESTING.md index a3a3cd0..166ca2b 100644 --- a/docs/TESTING.md +++ b/docs/TESTING.md @@ -74,7 +74,7 @@ docker compose exec mvp-frontend npm test -- --coverage Example: `vehicles.service.test.ts` - Tests VIN validation logic -- Tests vehicle creation with mocked vPIC responses +- Tests vehicle creation with mocked OCR service responses - Tests caching behavior with mocked Redis - Tests error handling paths @@ -194,7 +194,7 @@ All 15 features have test suites with unit and/or integration tests: - `vehicles` - Unit + integration tests ### Mock Strategy -- **External APIs**: Completely mocked (vPIC, Google Maps) +- **External APIs**: Completely mocked (OCR service, Google Maps) - **Database**: Real database with transactions - **Redis**: Mocked for unit tests, real for integration - **Auth**: Mocked JWT tokens for protected endpoints @@ -319,9 +319,9 @@ describe('Error Handling', () => { ).rejects.toThrow('Invalid VIN format'); }); - it('should handle vPIC API failure', async () => { - mockVpicClient.decode.mockRejectedValue(new Error('API down')); - + it('should handle OCR service failure', async () => { + mockOcrClient.decodeVin.mockRejectedValue(new Error('API down')); + const result = await vehicleService.create(validVehicle, 'user123'); expect(result.make).toBeNull(); // Graceful degradation }); diff --git a/docs/USER-GUIDE.md b/docs/USER-GUIDE.md index 50163f5..86eda4a 100644 --- a/docs/USER-GUIDE.md +++ b/docs/USER-GUIDE.md @@ -644,7 +644,7 @@ When you attempt to use a Pro feature on the Free tier, an **Upgrade Required** ### VIN Camera Scanning and Decode (Pro) -**What it does:** Use your device camera to photograph your vehicle's VIN plate, and the system automatically reads the VIN using OCR (Optical Character Recognition) and decodes it from the NHTSA database. +**What it does:** Use your device camera to photograph your vehicle's VIN plate, and the system automatically reads the VIN using OCR (Optical Character Recognition) and decodes it from the vehicle database. **How to use it:** @@ -655,7 +655,7 @@ When you attempt to use a Pro feature on the Free tier, an **Upgrade Required** 5. A **VIN OCR Review modal** appears showing the detected VIN with confidence indicators 6. Confirm or correct the VIN, then click **Accept** 7. Click the **Decode VIN** button -8. The system queries the NHTSA database and auto-populates: Year, Make, Model, Engine, Transmission, and Trim +8. The system queries the vehicle database and auto-populates: Year, Make, Model, Engine, Transmission, and Trim 9. Review the pre-filled fields and complete the remaining details This eliminates manual data entry errors and ensures accurate vehicle specifications. diff --git a/ocr/CLAUDE.md b/ocr/CLAUDE.md index e25dc65..44c3f1f 100644 --- a/ocr/CLAUDE.md +++ b/ocr/CLAUDE.md @@ -1,6 +1,6 @@ # ocr/ -Python OCR microservice. Primary engine: PaddleOCR PP-OCRv4 with optional Google Vision cloud fallback. Gemini 2.5 Flash for maintenance manual PDF extraction. Pluggable engine abstraction in `app/engines/`. +Python OCR microservice. Primary engine: PaddleOCR PP-OCRv4 with optional Google Vision cloud fallback. Gemini 2.5 Flash for maintenance manual PDF extraction and VIN decode. Pluggable engine abstraction in `app/engines/`. ## Files diff --git a/ocr/app/CLAUDE.md b/ocr/app/CLAUDE.md index a91a2be..bc0dfcc 100644 --- a/ocr/app/CLAUDE.md +++ b/ocr/app/CLAUDE.md @@ -19,7 +19,7 @@ Python OCR microservice (FastAPI). Primary engine: PaddleOCR PP-OCRv4 with optio | `models/` | Data models and schemas | Request/response types | | `patterns/` | Regex patterns and service name mapping (27 maintenance subtypes) | Pattern matching rules, service categorization | | `preprocessors/` | Image preprocessing pipeline | Image preparation before OCR | -| `routers/` | FastAPI route handlers (/extract, /extract/receipt, /extract/manual, /jobs) | API endpoint changes | +| `routers/` | FastAPI route handlers (/extract, /extract/receipt, /extract/manual, /decode, /jobs) | API endpoint changes | | `services/` | Business logic services (job queue with Redis) | Core OCR processing, async job management | | `table_extraction/` | Table detection and parsing | Structured data extraction from images | | `validators/` | Input validation | Validation rules | diff --git a/ocr/app/engines/CLAUDE.md b/ocr/app/engines/CLAUDE.md index 7df7de1..68a4e82 100644 --- a/ocr/app/engines/CLAUDE.md +++ b/ocr/app/engines/CLAUDE.md @@ -3,7 +3,7 @@ OCR engine abstraction layer. Two categories of engines: 1. **OcrEngine subclasses** (image-to-text): PaddleOCR, Google Vision, Hybrid. Accept image bytes, return text + confidence + word boxes. -2. **GeminiEngine** (PDF-to-structured-data): Standalone module for maintenance schedule extraction via Vertex AI. Accepts PDF bytes, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ. +2. **GeminiEngine** (PDF-to-structured-data and VIN decode): Standalone module for maintenance schedule extraction and VIN decoding via Vertex AI. Accepts PDF bytes or VIN strings, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ. ## Files @@ -15,7 +15,7 @@ OCR engine abstraction layer. Two categories of engines: | `cloud_engine.py` | Google Vision TEXT_DETECTION fallback engine (WIF authentication) | Cloud OCR configuration, API quota | | `hybrid_engine.py` | Combines primary + fallback engine with confidence threshold switching | Engine selection logic, fallback behavior | | `engine_factory.py` | Factory function and engine registry for instantiation | Adding new engine types | -| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction (Vertex AI SDK, 20MB PDF limit, structured JSON output) | Manual extraction debugging, Gemini configuration | +| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction and VIN decoding (Vertex AI SDK, 20MB PDF limit, structured JSON output) | Manual extraction debugging, VIN decode, Gemini configuration | ## Engine Selection @@ -30,4 +30,4 @@ create_engine(config) HybridEngine (tries primary, falls back if confidence < threshold) ``` -GeminiEngine is created independently by ManualExtractor, not through the engine factory. +GeminiEngine is created independently by ManualExtractor and the VIN decode router, not through the engine factory. From d96736789e7a5f5957b2f6db8899d7672cc8b71f Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Wed, 18 Feb 2026 21:51:45 -0600 Subject: [PATCH 5/9] feat: update frontend for Gemini VIN decode (refs #228) Rename nhtsaValue to sourceValue in frontend MatchedField type and VinOcrReviewModal component. Update NHTSA references to vehicle database across guide pages, hooks, and API documentation. Co-Authored-By: Claude Opus 4.6 --- .../src/features/vehicles/api/vehicles.api.ts | 2 +- .../vehicles/components/VehicleForm.tsx | 2 +- .../vehicles/components/VinOcrReviewModal.tsx | 36 +++++++++---------- .../src/features/vehicles/hooks/useVinOcr.ts | 6 ++-- .../features/vehicles/types/vehicles.types.ts | 4 +-- .../sections/SubscriptionSection.tsx | 4 +-- .../GuidePage/sections/VehiclesSection.tsx | 2 +- 7 files changed, 28 insertions(+), 28 deletions(-) diff --git a/frontend/src/features/vehicles/api/vehicles.api.ts b/frontend/src/features/vehicles/api/vehicles.api.ts index ed409c7..d4c93f1 100644 --- a/frontend/src/features/vehicles/api/vehicles.api.ts +++ b/frontend/src/features/vehicles/api/vehicles.api.ts @@ -82,7 +82,7 @@ export const vehiclesApi = { }, /** - * Decode VIN using NHTSA vPIC API + * Decode VIN using VIN decode service * Requires Pro or Enterprise tier */ decodeVin: async (vin: string): Promise => { diff --git a/frontend/src/features/vehicles/components/VehicleForm.tsx b/frontend/src/features/vehicles/components/VehicleForm.tsx index cba6faa..bfe2955 100644 --- a/frontend/src/features/vehicles/components/VehicleForm.tsx +++ b/frontend/src/features/vehicles/components/VehicleForm.tsx @@ -507,7 +507,7 @@ export const VehicleForm: React.FC = ({ /** * Handle VIN decode button click - * Calls NHTSA API and populates empty form fields + * Calls VIN decode service and populates empty form fields */ const handleDecodeVin = async () => { // Check tier access first diff --git a/frontend/src/features/vehicles/components/VinOcrReviewModal.tsx b/frontend/src/features/vehicles/components/VinOcrReviewModal.tsx index acc05b3..b45dae2 100644 --- a/frontend/src/features/vehicles/components/VinOcrReviewModal.tsx +++ b/frontend/src/features/vehicles/components/VinOcrReviewModal.tsx @@ -95,8 +95,8 @@ const ReviewContent: React.FC<{ const [selectedEngine, setSelectedEngine] = useState(''); const [selectedTransmission, setSelectedTransmission] = useState(''); - // NHTSA reference values for unmatched fields - const [nhtsaRefs, setNhtsaRefs] = useState>({}); + // Source reference values for unmatched fields + const [sourceRefs, setSourceRefs] = useState>({}); // Initialize dropdown options and pre-select decoded values useEffect(() => { @@ -109,13 +109,13 @@ const ReviewContent: React.FC<{ if (!decodedVehicle) return; - // Store NHTSA reference values for unmatched fields - setNhtsaRefs({ - make: decodedVehicle.make.confidence === 'none' ? decodedVehicle.make.nhtsaValue : null, - model: decodedVehicle.model.confidence === 'none' ? decodedVehicle.model.nhtsaValue : null, - trim: decodedVehicle.trimLevel.confidence === 'none' ? decodedVehicle.trimLevel.nhtsaValue : null, - engine: decodedVehicle.engine.confidence === 'none' ? decodedVehicle.engine.nhtsaValue : null, - transmission: decodedVehicle.transmission.confidence === 'none' ? decodedVehicle.transmission.nhtsaValue : null, + // Store source reference values for unmatched fields + setSourceRefs({ + make: decodedVehicle.make.confidence === 'none' ? decodedVehicle.make.sourceValue : null, + model: decodedVehicle.model.confidence === 'none' ? decodedVehicle.model.sourceValue : null, + trim: decodedVehicle.trimLevel.confidence === 'none' ? decodedVehicle.trimLevel.sourceValue : null, + engine: decodedVehicle.engine.confidence === 'none' ? decodedVehicle.engine.sourceValue : null, + transmission: decodedVehicle.transmission.confidence === 'none' ? decodedVehicle.transmission.sourceValue : null, }); const yearValue = decodedVehicle.year.value; @@ -277,9 +277,9 @@ const ReviewContent: React.FC<{ }); }; - /** Show NHTSA reference when field had no dropdown match */ - const nhtsaHint = (field: string) => { - const ref = nhtsaRefs[field]; + /** Show source reference when field had no dropdown match */ + const sourceHint = (field: string) => { + const ref = sourceRefs[field]; if (!ref) return null; // Only show hint when no value is currently selected const selected: Record = { @@ -292,7 +292,7 @@ const ReviewContent: React.FC<{ if (selected[field]) return null; return (

- NHTSA returned: {ref} + Decoded value: {ref}

); }; @@ -409,7 +409,7 @@ const ReviewContent: React.FC<{ ))} - {nhtsaHint('make')} + {sourceHint('make')} {/* Model */} @@ -439,7 +439,7 @@ const ReviewContent: React.FC<{ ))} - {nhtsaHint('model')} + {sourceHint('model')} {/* Trim */} @@ -469,7 +469,7 @@ const ReviewContent: React.FC<{ ))} - {nhtsaHint('trim')} + {sourceHint('trim')} {/* Engine */} @@ -499,7 +499,7 @@ const ReviewContent: React.FC<{ ))} - {nhtsaHint('engine')} + {sourceHint('engine')} {/* Transmission */} @@ -529,7 +529,7 @@ const ReviewContent: React.FC<{ ))} - {nhtsaHint('transmission')} + {sourceHint('transmission')} diff --git a/frontend/src/features/vehicles/hooks/useVinOcr.ts b/frontend/src/features/vehicles/hooks/useVinOcr.ts index 9554259..11d7e0b 100644 --- a/frontend/src/features/vehicles/hooks/useVinOcr.ts +++ b/frontend/src/features/vehicles/hooks/useVinOcr.ts @@ -1,5 +1,5 @@ /** - * @ai-summary Hook to orchestrate VIN OCR extraction and NHTSA decode + * @ai-summary Hook to orchestrate VIN OCR extraction and VIN decode * @ai-context Handles camera capture -> OCR extraction -> VIN decode flow */ @@ -109,7 +109,7 @@ export function useVinOcr(): UseVinOcrReturn { ); } - // Step 2: Decode VIN using NHTSA + // Step 2: Decode VIN setProcessingStep('decoding'); let decodedVehicle: DecodedVehicleData | null = null; let decodeError: string | null = null; @@ -121,7 +121,7 @@ export function useVinOcr(): UseVinOcrReturn { if (err.response?.data?.error === 'TIER_REQUIRED') { decodeError = 'VIN decode requires Pro or Enterprise subscription'; } else if (err.response?.data?.error === 'INVALID_VIN') { - decodeError = 'VIN format is not recognized by NHTSA'; + decodeError = 'VIN format is not recognized'; } else { decodeError = 'Unable to decode vehicle information'; } diff --git a/frontend/src/features/vehicles/types/vehicles.types.ts b/frontend/src/features/vehicles/types/vehicles.types.ts index 4276714..01d8642 100644 --- a/frontend/src/features/vehicles/types/vehicles.types.ts +++ b/frontend/src/features/vehicles/types/vehicles.types.ts @@ -72,12 +72,12 @@ export type MatchConfidence = 'high' | 'medium' | 'none'; */ export interface MatchedField { value: T | null; - nhtsaValue: string | null; + sourceValue: string | null; confidence: MatchConfidence; } /** - * Decoded vehicle data from NHTSA vPIC API + * Decoded vehicle data from VIN decode * with match confidence per field */ export interface DecodedVehicleData { diff --git a/frontend/src/pages/GuidePage/sections/SubscriptionSection.tsx b/frontend/src/pages/GuidePage/sections/SubscriptionSection.tsx index 5a6c77a..c76a0b9 100644 --- a/frontend/src/pages/GuidePage/sections/SubscriptionSection.tsx +++ b/frontend/src/pages/GuidePage/sections/SubscriptionSection.tsx @@ -43,7 +43,7 @@ export const SubscriptionSection = () => {

- What it does: Use your device camera to photograph your vehicle's VIN plate, and the system automatically reads the VIN using OCR (Optical Character Recognition) and decodes it from the NHTSA database. + What it does: Use your device camera to photograph your vehicle's VIN plate, and the system automatically reads the VIN using OCR (Optical Character Recognition) and decodes it from the vehicle database.

@@ -58,7 +58,7 @@ export const SubscriptionSection = () => {

  • A VIN OCR Review modal appears showing the detected VIN with confidence indicators
  • Confirm or correct the VIN, then click Accept
  • Click the Decode VIN button
  • -
  • The system queries the NHTSA database and auto-populates: Year, Make, Model, Engine, Transmission, and Trim
  • +
  • The system queries the vehicle database and auto-populates: Year, Make, Model, Engine, Transmission, and Trim
  • Review the pre-filled fields and complete the remaining details
  • diff --git a/frontend/src/pages/GuidePage/sections/VehiclesSection.tsx b/frontend/src/pages/GuidePage/sections/VehiclesSection.tsx index b363a54..09099cc 100644 --- a/frontend/src/pages/GuidePage/sections/VehiclesSection.tsx +++ b/frontend/src/pages/GuidePage/sections/VehiclesSection.tsx @@ -141,7 +141,7 @@ export const VehiclesSection = () => { From 361f58d7c600c656a16a0e57d0bcfcaafe4b834f Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Thu, 19 Feb 2026 20:14:54 -0600 Subject: [PATCH 6/9] fix: resolve VIN decode cache race, fuzzy matching, and silent failure (refs #229) Prevent lower-confidence Gemini results from overwriting higher-confidence cache entries, add reverse-contains matching so values like "X5 xDrive35i" match DB option "X5", and show amber hint when dropdown matching fails. Co-Authored-By: Claude Opus 4.6 --- .../vehicles/domain/vehicles.service.ts | 33 +++++++++++++++++-- .../vehicles/components/VehicleForm.tsx | 20 +++++++++++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/backend/src/features/vehicles/domain/vehicles.service.ts b/backend/src/features/vehicles/domain/vehicles.service.ts index bb5b668..cbce71a 100644 --- a/backend/src/features/vehicles/domain/vehicles.service.ts +++ b/backend/src/features/vehicles/domain/vehicles.service.ts @@ -648,11 +648,12 @@ export class VehiclesService { engine_type = EXCLUDED.engine_type, body_type = EXCLUDED.body_type, raw_data = EXCLUDED.raw_data, - cached_at = NOW()`, - [vin, response.make, response.model, response.year, response.engine, response.bodyType, JSON.stringify(response)] + cached_at = NOW() + WHERE (vin_cache.raw_data->>'confidence')::float <= $8`, + [vin, response.make, response.model, response.year, response.engine, response.bodyType, JSON.stringify(response), response.confidence ?? 1] ); - logger.debug('VIN cached', { vin }); + logger.debug('VIN cached', { vin, confidence: response.confidence }); } catch (error) { logger.error('Failed to cache VIN data', { vin, error }); // Don't throw - caching failure shouldn't break the decode flow @@ -741,6 +742,12 @@ export class VehiclesService { const sourceEngine = response.engine; const sourceTransmission = response.transmission; + logger.debug('VIN decode raw values', { + vin: response.vin, + year: sourceYear, make: sourceMake, model: sourceModel, + trim: sourceTrim, confidence: response.confidence + }); + // Year is always high confidence if present (exact numeric match) const year: MatchedField = { value: sourceYear, @@ -854,6 +861,26 @@ export class VehiclesService { return { value: containsMatch, sourceValue, confidence: 'medium' }; } + // Try reverse contains - source value contains option (e.g., source "X5 xDrive35i" contains option "X5") + // Prefer the longest matching option to avoid false positives (e.g., "X5 M" over "X5") + const reverseMatches = options.filter(opt => { + const normalizedOpt = opt.toLowerCase().trim(); + return normalizedSource.includes(normalizedOpt) && normalizedOpt.length > 0; + }); + if (reverseMatches.length > 0) { + const bestMatch = reverseMatches.reduce((a, b) => a.length >= b.length ? a : b); + return { value: bestMatch, sourceValue, confidence: 'medium' }; + } + + // Try word-start match - source starts with option + separator (e.g., "X5 xDrive" starts with "X5 ") + const wordStartMatch = options.find(opt => { + const normalizedOpt = opt.toLowerCase().trim(); + return normalizedSource.startsWith(normalizedOpt + ' ') || normalizedSource.startsWith(normalizedOpt + '-'); + }); + if (wordStartMatch) { + return { value: wordStartMatch, sourceValue, confidence: 'medium' }; + } + // No match found - return source value as hint with no match return { value: null, sourceValue, confidence: 'none' }; } diff --git a/frontend/src/features/vehicles/components/VehicleForm.tsx b/frontend/src/features/vehicles/components/VehicleForm.tsx index bfe2955..624fbbc 100644 --- a/frontend/src/features/vehicles/components/VehicleForm.tsx +++ b/frontend/src/features/vehicles/components/VehicleForm.tsx @@ -114,6 +114,7 @@ export const VehicleForm: React.FC = ({ const [isDecoding, setIsDecoding] = useState(false); const [showUpgradeDialog, setShowUpgradeDialog] = useState(false); const [decodeError, setDecodeError] = useState(null); + const [decodeHint, setDecodeHint] = useState(null); // VIN OCR capture hook const vinOcr = useVinOcr(); @@ -524,6 +525,7 @@ export const VehicleForm: React.FC = ({ setIsDecoding(true); setDecodeError(null); + setDecodeHint(null); try { const decoded = await vehiclesApi.decodeVin(vin); @@ -588,6 +590,21 @@ export const VehicleForm: React.FC = ({ setValue('transmission', decoded.transmission.value); } + // Check if decode returned data but matching failed for key fields + const hasMatchedValue = decoded.year.value || decoded.make.value || decoded.model.value; + const hasSourceValue = decoded.year.sourceValue || decoded.make.sourceValue || decoded.model.sourceValue; + if (!hasMatchedValue && hasSourceValue) { + const parts = [ + decoded.year.sourceValue, + decoded.make.sourceValue, + decoded.model.sourceValue, + decoded.trimLevel.sourceValue + ].filter(Boolean); + setDecodeHint( + `Could not match VIN data to dropdowns. Decoded as: ${parts.join(' ')}. Please select values manually.` + ); + } + setLoadingDropdowns(false); isVinDecoding.current = false; } catch (error: any) { @@ -671,6 +688,9 @@ export const VehicleForm: React.FC = ({ {decodeError && (

    {decodeError}

    )} + {decodeHint && ( +

    {decodeHint}

    + )} {vinOcr.error && (

    {vinOcr.error}

    )} From 5bb44be8bc5afd51cd6f80aa40ac387afde6861c Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Thu, 19 Feb 2026 20:35:06 -0600 Subject: [PATCH 7/9] chore: Change to Gemini 3.0 Flash --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index ed526ce..8f953ac 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -207,7 +207,7 @@ services: # Vertex AI / Gemini configuration (maintenance schedule extraction) VERTEX_AI_PROJECT: motovaultpro VERTEX_AI_LOCATION: us-central1 - GEMINI_MODEL: gemini-2.5-flash + GEMINI_MODEL: gemini-3.0-flash volumes: - /tmp/vin-debug:/tmp/vin-debug - ./secrets/app/auth0-ocr-client-id.txt:/run/secrets/auth0-ocr-client-id:ro From bf6742f6ea633ef747c3aa6d69d20fb999d5cc5d Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Thu, 19 Feb 2026 20:36:34 -0600 Subject: [PATCH 8/9] chore: Gemini 3.0 Flash Preview model --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 8f953ac..3f3e04f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -207,7 +207,7 @@ services: # Vertex AI / Gemini configuration (maintenance schedule extraction) VERTEX_AI_PROJECT: motovaultpro VERTEX_AI_LOCATION: us-central1 - GEMINI_MODEL: gemini-3.0-flash + GEMINI_MODEL: gemini-3-flash-preview volumes: - /tmp/vin-debug:/tmp/vin-debug - ./secrets/app/auth0-ocr-client-id.txt:/run/secrets/auth0-ocr-client-id:ro From 781241966c4a4bf0e6c7207e73e834fa8c9e4e69 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Thu, 19 Feb 2026 20:59:40 -0600 Subject: [PATCH 9/9] chore: change google region --- docker-compose.yml | 2 +- ocr/app/config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 3f3e04f..26552e8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -206,7 +206,7 @@ services: VISION_MONTHLY_LIMIT: "1000" # Vertex AI / Gemini configuration (maintenance schedule extraction) VERTEX_AI_PROJECT: motovaultpro - VERTEX_AI_LOCATION: us-central1 + VERTEX_AI_LOCATION: global GEMINI_MODEL: gemini-3-flash-preview volumes: - /tmp/vin-debug:/tmp/vin-debug diff --git a/ocr/app/config.py b/ocr/app/config.py index f1e7826..c1f1041 100644 --- a/ocr/app/config.py +++ b/ocr/app/config.py @@ -32,7 +32,7 @@ class Settings: # Vertex AI / Gemini configuration self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "") self.vertex_ai_location: str = os.getenv( - "VERTEX_AI_LOCATION", "us-central1" + "VERTEX_AI_LOCATION", "global" ) self.gemini_model: str = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")