feat: Replace NHTSA VIN decode with Google Gemini via OCR service (#223) #229
@@ -41,14 +41,6 @@ const configSchema = z.object({
|
||||
audience: z.string(),
|
||||
}),
|
||||
|
||||
// External APIs configuration (optional)
|
||||
external: z.object({
|
||||
vpic: z.object({
|
||||
url: z.string(),
|
||||
timeout: z.string(),
|
||||
}).optional(),
|
||||
}).optional(),
|
||||
|
||||
// Service configuration
|
||||
service: z.object({
|
||||
name: z.string(),
|
||||
|
||||
@@ -29,7 +29,7 @@ export const FEATURE_TIERS: Record<string, FeatureConfig> = {
|
||||
'vehicle.vinDecode': {
|
||||
minTier: 'pro',
|
||||
name: 'VIN Decode',
|
||||
upgradePrompt: 'Upgrade to Pro to automatically decode VIN and populate vehicle details from the NHTSA database.',
|
||||
upgradePrompt: 'Upgrade to Pro to automatically decode VIN and populate vehicle details from the vehicle database.',
|
||||
},
|
||||
'fuelLog.receiptScan': {
|
||||
minTier: 'pro',
|
||||
|
||||
@@ -37,7 +37,7 @@ Backend proxy for the Python OCR microservice. Handles authentication, tier gati
|
||||
|
||||
| File | What | When to read |
|
||||
| ---- | ---- | ------------ |
|
||||
| `ocr-client.ts` | HTTP client to mvp-ocr Python service (extract, extractVin, extractReceipt, submitJob, submitManualJob, getJobStatus, isHealthy) | OCR service communication, error handling |
|
||||
| `ocr-client.ts` | HTTP client to mvp-ocr Python service (extract, extractVin, extractReceipt, decodeVin, submitJob, submitManualJob, getJobStatus, isHealthy) | OCR service communication, error handling |
|
||||
|
||||
## tests/
|
||||
|
||||
|
||||
@@ -131,3 +131,21 @@ export interface ManualJobResponse {
|
||||
result?: ManualExtractionResult;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/** Response from VIN decode via Gemini (OCR service) */
|
||||
export interface VinDecodeResponse {
|
||||
success: boolean;
|
||||
vin: string;
|
||||
year: number | null;
|
||||
make: string | null;
|
||||
model: string | null;
|
||||
trimLevel: string | null;
|
||||
bodyType: string | null;
|
||||
driveType: string | null;
|
||||
fuelType: string | null;
|
||||
engine: string | null;
|
||||
transmission: string | null;
|
||||
confidence: number;
|
||||
processingTimeMs: number;
|
||||
error: string | null;
|
||||
}
|
||||
|
||||
51
backend/src/features/ocr/external/ocr-client.ts
vendored
51
backend/src/features/ocr/external/ocr-client.ts
vendored
@@ -2,7 +2,7 @@
|
||||
* @ai-summary HTTP client for OCR service communication
|
||||
*/
|
||||
import { logger } from '../../../core/logging/logger';
|
||||
import type { JobResponse, ManualJobResponse, OcrResponse, ReceiptExtractionResponse, VinExtractionResponse } from '../domain/ocr.types';
|
||||
import type { JobResponse, ManualJobResponse, OcrResponse, ReceiptExtractionResponse, VinDecodeResponse, VinExtractionResponse } from '../domain/ocr.types';
|
||||
|
||||
/** OCR service configuration */
|
||||
const OCR_SERVICE_URL = process.env.OCR_SERVICE_URL || 'http://mvp-ocr:8000';
|
||||
@@ -373,6 +373,55 @@ export class OcrClient {
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a VIN string into structured vehicle data via Gemini.
|
||||
*
|
||||
* Unlike other OCR methods, this sends JSON (not multipart) because
|
||||
* VIN decode has no file upload.
|
||||
*
|
||||
* @param vin - 17-character Vehicle Identification Number
|
||||
* @returns Structured vehicle data from Gemini decode
|
||||
*/
|
||||
async decodeVin(vin: string): Promise<VinDecodeResponse> {
|
||||
const url = `${this.baseUrl}/decode/vin`;
|
||||
|
||||
logger.info('OCR VIN decode request', {
|
||||
operation: 'ocr.client.decodeVin',
|
||||
url,
|
||||
vin,
|
||||
});
|
||||
|
||||
const response = await this.fetchWithTimeout(url, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ vin }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.error('OCR VIN decode failed', {
|
||||
operation: 'ocr.client.decodeVin.error',
|
||||
status: response.status,
|
||||
error: errorText,
|
||||
});
|
||||
const err: any = new Error(`OCR service error: ${response.status} - ${errorText}`);
|
||||
err.statusCode = response.status;
|
||||
throw err;
|
||||
}
|
||||
|
||||
const result = (await response.json()) as VinDecodeResponse;
|
||||
|
||||
logger.info('OCR VIN decode completed', {
|
||||
operation: 'ocr.client.decodeVin.success',
|
||||
success: result.success,
|
||||
vin: result.vin,
|
||||
confidence: result.confidence,
|
||||
processingTimeMs: result.processingTimeMs,
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the OCR service is healthy.
|
||||
*
|
||||
|
||||
@@ -117,7 +117,7 @@ platform/
|
||||
When implemented, VIN decoding will use:
|
||||
1. **Cache First**: Check Redis (7-day TTL for success, 1-hour for failures)
|
||||
2. **PostgreSQL**: Database function for high-confidence decode
|
||||
3. **vPIC Fallback**: NHTSA vPIC API with circuit breaker protection
|
||||
3. **OCR Service Fallback**: Gemini VIN decode via OCR service
|
||||
4. **Graceful Degradation**: Return meaningful errors when all sources fail
|
||||
|
||||
### Database Schema
|
||||
@@ -164,7 +164,7 @@ When VIN decoding is implemented:
|
||||
|
||||
### External APIs (Planned/Future)
|
||||
When VIN decoding is implemented:
|
||||
- **NHTSA vPIC**: https://vpic.nhtsa.dot.gov/api (VIN decoding fallback)
|
||||
- **OCR Service**: Gemini VIN decode via mvp-ocr (VIN decoding fallback)
|
||||
|
||||
### Database Tables
|
||||
- **vehicle_options** - Hierarchical vehicle data (years, makes, models, trims, engines, transmissions)
|
||||
@@ -269,7 +269,7 @@ npm run lint
|
||||
## Future Considerations
|
||||
|
||||
### Planned Features
|
||||
- VIN decoding endpoint with PostgreSQL + vPIC fallback
|
||||
- VIN decoding endpoint with PostgreSQL + Gemini/OCR service fallback
|
||||
- Circuit breaker pattern for external API resilience
|
||||
|
||||
### Potential Enhancements
|
||||
|
||||
@@ -61,19 +61,3 @@ export interface VINDecodeResponse {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* vPIC API response structure (NHTSA)
|
||||
*/
|
||||
export interface VPICVariable {
|
||||
Variable: string;
|
||||
Value: string | null;
|
||||
ValueId: string | null;
|
||||
VariableId: number;
|
||||
}
|
||||
|
||||
export interface VPICResponse {
|
||||
Count: number;
|
||||
Message: string;
|
||||
SearchCriteria: string;
|
||||
Results: VPICVariable[];
|
||||
}
|
||||
|
||||
@@ -16,6 +16,6 @@
|
||||
| `data/` | Repository, database queries | Database operations |
|
||||
| `docs/` | Feature-specific documentation | Vehicle design details |
|
||||
| `events/` | Event handlers and emitters | Cross-feature event integration |
|
||||
| `external/` | External service integrations (NHTSA) | VIN decoding, third-party APIs |
|
||||
| `external/` | External service integrations | VIN decoding, third-party APIs |
|
||||
| `migrations/` | Database schema | Schema changes |
|
||||
| `tests/` | Unit and integration tests | Adding or modifying tests |
|
||||
|
||||
@@ -13,7 +13,7 @@ Primary entity for vehicle management consuming MVP Platform Vehicles Service. H
|
||||
- `DELETE /api/vehicles/:id` - Soft delete vehicle
|
||||
|
||||
### VIN Decoding (Pro/Enterprise Only)
|
||||
- `POST /api/vehicles/decode-vin` - Decode VIN using NHTSA vPIC API
|
||||
- `POST /api/vehicles/decode-vin` - Decode VIN using Gemini via OCR service
|
||||
|
||||
### Hierarchical Vehicle Dropdowns
|
||||
**Status**: Vehicles service now proxies the platform vehicle catalog to provide fully dynamic dropdowns. Each selection step filters the next list, ensuring only valid combinations are shown.
|
||||
@@ -104,11 +104,7 @@ vehicles/
|
||||
├── data/ # Database layer
|
||||
│ └── vehicles.repository.ts
|
||||
├── external/ # External service integrations
|
||||
│ ├── CLAUDE.md # Integration pattern docs
|
||||
│ └── nhtsa/ # NHTSA vPIC API client
|
||||
│ ├── nhtsa.client.ts
|
||||
│ ├── nhtsa.types.ts
|
||||
│ └── index.ts
|
||||
│ └── CLAUDE.md # Integration pattern docs
|
||||
├── migrations/ # Feature schema
|
||||
│ └── 001_create_vehicles_tables.sql
|
||||
├── tests/ # All tests
|
||||
@@ -121,14 +117,14 @@ vehicles/
|
||||
|
||||
## Key Features
|
||||
|
||||
### 🔍 VIN Decoding (NHTSA vPIC API)
|
||||
### VIN Decoding (Gemini via OCR Service)
|
||||
- **Tier Gating**: Pro and Enterprise users only (`vehicle.vinDecode` feature key)
|
||||
- **NHTSA API**: Calls official NHTSA vPIC API for authoritative vehicle data
|
||||
- **Gemini**: Calls OCR service Gemini VIN decode for authoritative vehicle data
|
||||
- **Caching**: Results cached in `vin_cache` table (1-year TTL, VIN data is static)
|
||||
- **Validation**: 17-character VIN format, excludes I/O/Q characters
|
||||
- **Matching**: Case-insensitive exact match against dropdown options
|
||||
- **Confidence Levels**: High (exact match), Medium (normalized match), None (hint only)
|
||||
- **Timeout**: 5-second timeout for NHTSA API calls
|
||||
- **Timeout**: 5-second timeout for OCR service calls
|
||||
|
||||
#### Decode VIN Request
|
||||
```json
|
||||
@@ -140,15 +136,15 @@ Authorization: Bearer <jwt>
|
||||
|
||||
Response (200):
|
||||
{
|
||||
"year": { "value": 2021, "nhtsaValue": "2021", "confidence": "high" },
|
||||
"make": { "value": "Honda", "nhtsaValue": "HONDA", "confidence": "high" },
|
||||
"model": { "value": "Civic", "nhtsaValue": "Civic", "confidence": "high" },
|
||||
"trimLevel": { "value": "EX", "nhtsaValue": "EX", "confidence": "high" },
|
||||
"engine": { "value": null, "nhtsaValue": "2.0L L4 DOHC 16V", "confidence": "none" },
|
||||
"transmission": { "value": null, "nhtsaValue": "CVT", "confidence": "none" },
|
||||
"bodyType": { "value": null, "nhtsaValue": "Sedan", "confidence": "none" },
|
||||
"driveType": { "value": null, "nhtsaValue": "FWD", "confidence": "none" },
|
||||
"fuelType": { "value": null, "nhtsaValue": "Gasoline", "confidence": "none" }
|
||||
"year": { "value": 2021, "decodedValue": "2021", "confidence": "high" },
|
||||
"make": { "value": "Honda", "decodedValue": "HONDA", "confidence": "high" },
|
||||
"model": { "value": "Civic", "decodedValue": "Civic", "confidence": "high" },
|
||||
"trimLevel": { "value": "EX", "decodedValue": "EX", "confidence": "high" },
|
||||
"engine": { "value": null, "decodedValue": "2.0L L4 DOHC 16V", "confidence": "none" },
|
||||
"transmission": { "value": null, "decodedValue": "CVT", "confidence": "none" },
|
||||
"bodyType": { "value": null, "decodedValue": "Sedan", "confidence": "none" },
|
||||
"driveType": { "value": null, "decodedValue": "FWD", "confidence": "none" },
|
||||
"fuelType": { "value": null, "decodedValue": "Gasoline", "confidence": "none" }
|
||||
}
|
||||
|
||||
Error (400 - Invalid VIN):
|
||||
@@ -157,7 +153,7 @@ Error (400 - Invalid VIN):
|
||||
Error (403 - Tier Required):
|
||||
{ "error": "TIER_REQUIRED", "requiredTier": "pro", "currentTier": "free", ... }
|
||||
|
||||
Error (502 - NHTSA Failure):
|
||||
Error (502 - OCR Service Failure):
|
||||
{ "error": "VIN_DECODE_FAILED", "message": "Unable to decode VIN from external service" }
|
||||
```
|
||||
|
||||
@@ -230,7 +226,7 @@ Error (502 - NHTSA Failure):
|
||||
## Testing
|
||||
|
||||
### Unit Tests
|
||||
- `vehicles.service.test.ts` - Business logic with mocked dependencies (VIN decode, caching, CRUD operations)
|
||||
- `vehicles.service.test.ts` - Business logic with mocked dependencies (VIN decode via OCR service mock, caching, CRUD operations)
|
||||
|
||||
### Integration Tests
|
||||
- `vehicles.integration.test.ts` - Complete API workflow with test database (create, read, update, delete vehicles)
|
||||
|
||||
@@ -10,19 +10,18 @@ import { pool } from '../../../core/config/database';
|
||||
import { logger } from '../../../core/logging/logger';
|
||||
import { CreateVehicleBody, UpdateVehicleBody, VehicleParams } from '../domain/vehicles.types';
|
||||
import { getStorageService } from '../../../core/storage/storage.service';
|
||||
import { NHTSAClient, DecodeVinRequest } from '../external/nhtsa';
|
||||
import { ocrClient } from '../../ocr/external/ocr-client';
|
||||
import type { DecodeVinRequest } from '../domain/vehicles.types';
|
||||
import crypto from 'crypto';
|
||||
import FileType from 'file-type';
|
||||
import path from 'path';
|
||||
|
||||
export class VehiclesController {
|
||||
private vehiclesService: VehiclesService;
|
||||
private nhtsaClient: NHTSAClient;
|
||||
|
||||
constructor() {
|
||||
const repository = new VehiclesRepository(pool);
|
||||
this.vehiclesService = new VehiclesService(repository, pool);
|
||||
this.nhtsaClient = new NHTSAClient(pool);
|
||||
}
|
||||
|
||||
async getUserVehicles(request: FastifyRequest, reply: FastifyReply) {
|
||||
@@ -378,7 +377,7 @@ export class VehiclesController {
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode VIN using NHTSA vPIC API
|
||||
* Decode VIN using OCR service (Gemini)
|
||||
* POST /api/vehicles/decode-vin
|
||||
* Requires Pro or Enterprise tier
|
||||
*/
|
||||
@@ -395,13 +394,34 @@ export class VehiclesController {
|
||||
});
|
||||
}
|
||||
|
||||
logger.info('VIN decode requested', { userId, vin: vin.substring(0, 6) + '...' });
|
||||
// Validate VIN format
|
||||
const sanitizedVin = vin.trim().toUpperCase();
|
||||
const VIN_REGEX = /^[A-HJ-NPR-Z0-9]{17}$/;
|
||||
if (!VIN_REGEX.test(sanitizedVin)) {
|
||||
return reply.code(400).send({
|
||||
error: 'INVALID_VIN',
|
||||
message: 'Invalid VIN format. VIN must be exactly 17 characters and contain only letters (except I, O, Q) and numbers.'
|
||||
});
|
||||
}
|
||||
|
||||
// Validate and decode VIN
|
||||
const response = await this.nhtsaClient.decodeVin(vin);
|
||||
logger.info('VIN decode requested', { userId, vin: sanitizedVin.substring(0, 6) + '...' });
|
||||
|
||||
// Extract and map fields from NHTSA response
|
||||
const decodedData = await this.vehiclesService.mapNHTSAResponse(response);
|
||||
// Check cache first
|
||||
const cached = await this.vehiclesService.getVinCached(sanitizedVin);
|
||||
if (cached) {
|
||||
logger.info('VIN decode cache hit', { userId });
|
||||
const decodedData = await this.vehiclesService.mapVinDecodeResponse(cached);
|
||||
return reply.code(200).send(decodedData);
|
||||
}
|
||||
|
||||
// Call OCR service for VIN decode
|
||||
const response = await ocrClient.decodeVin(sanitizedVin);
|
||||
|
||||
// Cache the response
|
||||
await this.vehiclesService.saveVinCache(sanitizedVin, response);
|
||||
|
||||
// Map response to decoded vehicle data with dropdown matching
|
||||
const decodedData = await this.vehiclesService.mapVinDecodeResponse(response);
|
||||
|
||||
logger.info('VIN decode successful', {
|
||||
userId,
|
||||
@@ -414,7 +434,7 @@ export class VehiclesController {
|
||||
} catch (error: any) {
|
||||
logger.error('VIN decode failed', { error, userId });
|
||||
|
||||
// Handle validation errors
|
||||
// Handle VIN validation errors
|
||||
if (error.message?.includes('Invalid VIN')) {
|
||||
return reply.code(400).send({
|
||||
error: 'INVALID_VIN',
|
||||
@@ -422,16 +442,25 @@ export class VehiclesController {
|
||||
});
|
||||
}
|
||||
|
||||
// Handle timeout
|
||||
if (error.message?.includes('timed out')) {
|
||||
return reply.code(504).send({
|
||||
error: 'VIN_DECODE_TIMEOUT',
|
||||
message: 'NHTSA API request timed out. Please try again.'
|
||||
// Handle OCR service errors by status code
|
||||
if (error.statusCode === 503 || error.statusCode === 422) {
|
||||
return reply.code(502).send({
|
||||
error: 'VIN_DECODE_FAILED',
|
||||
message: 'VIN decode service unavailable',
|
||||
details: error.message
|
||||
});
|
||||
}
|
||||
|
||||
// Handle NHTSA API errors
|
||||
if (error.message?.includes('NHTSA')) {
|
||||
// Handle timeout
|
||||
if (error.message?.includes('timed out') || error.message?.includes('aborted')) {
|
||||
return reply.code(504).send({
|
||||
error: 'VIN_DECODE_TIMEOUT',
|
||||
message: 'VIN decode service timed out. Please try again.'
|
||||
});
|
||||
}
|
||||
|
||||
// Handle OCR service errors
|
||||
if (error.message?.includes('OCR service error')) {
|
||||
return reply.code(502).send({
|
||||
error: 'VIN_DECODE_FAILED',
|
||||
message: 'Unable to decode VIN from external service',
|
||||
|
||||
@@ -75,7 +75,7 @@ export const vehiclesRoutes: FastifyPluginAsync = async (
|
||||
handler: vehiclesController.getDropdownOptions.bind(vehiclesController)
|
||||
});
|
||||
|
||||
// POST /api/vehicles/decode-vin - Decode VIN using NHTSA vPIC API (Pro/Enterprise only)
|
||||
// POST /api/vehicles/decode-vin - Decode VIN via OCR service (Pro/Enterprise only)
|
||||
fastify.post<{ Body: { vin: string } }>('/vehicles/decode-vin', {
|
||||
preHandler: [fastify.authenticate, fastify.requireTier({ featureKey: 'vehicle.vinDecode' })],
|
||||
handler: vehiclesController.decodeVin.bind(vehiclesController)
|
||||
|
||||
@@ -24,7 +24,8 @@ import { isValidVIN, isValidPreModernVIN } from '../../../shared-minimal/utils/v
|
||||
import { normalizeMakeName, normalizeModelName } from './name-normalizer';
|
||||
import { getVehicleDataService, getPool } from '../../platform';
|
||||
import { auditLogService } from '../../audit-log';
|
||||
import { NHTSAClient, NHTSADecodeResponse, DecodedVehicleData, MatchedField } from '../external/nhtsa';
|
||||
import type { VinDecodeResponse } from '../../ocr/domain/ocr.types';
|
||||
import type { DecodedVehicleData, MatchedField } from './vehicles.types';
|
||||
import { canAddVehicle, getVehicleLimitConfig } from '../../../core/config/feature-tiers';
|
||||
import { UserProfileRepository } from '../../user-profile/data/user-profile.repository';
|
||||
import { SubscriptionTier } from '../../user-profile/domain/user-profile.types';
|
||||
@@ -593,6 +594,72 @@ export class VehiclesService {
|
||||
await cacheService.del(cacheKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check vin_cache for existing VIN data.
|
||||
* Format-aware: validates raw_data has `success` field (Gemini format).
|
||||
* Old NHTSA-format entries are treated as cache misses and expire via TTL.
|
||||
*/
|
||||
async getVinCached(vin: string): Promise<VinDecodeResponse | null> {
|
||||
try {
|
||||
const result = await this.pool.query<{
|
||||
raw_data: any;
|
||||
cached_at: Date;
|
||||
}>(
|
||||
`SELECT raw_data, cached_at
|
||||
FROM vin_cache
|
||||
WHERE vin = $1
|
||||
AND cached_at > NOW() - INTERVAL '365 days'`,
|
||||
[vin]
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const rawData = result.rows[0].raw_data;
|
||||
|
||||
// Format-aware check: Gemini responses have `success` field,
|
||||
// old NHTSA responses do not. Treat old format as cache miss.
|
||||
if (!rawData || typeof rawData !== 'object' || !('success' in rawData)) {
|
||||
logger.debug('VIN cache format mismatch (legacy NHTSA entry), treating as miss', { vin });
|
||||
return null;
|
||||
}
|
||||
|
||||
logger.debug('VIN cache hit', { vin });
|
||||
return rawData as VinDecodeResponse;
|
||||
} catch (error) {
|
||||
logger.error('Failed to check VIN cache', { vin, error });
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save VIN decode response to cache with ON CONFLICT upsert.
|
||||
*/
|
||||
async saveVinCache(vin: string, response: VinDecodeResponse): Promise<void> {
|
||||
try {
|
||||
await this.pool.query(
|
||||
`INSERT INTO vin_cache (vin, make, model, year, engine_type, body_type, raw_data, cached_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, NOW())
|
||||
ON CONFLICT (vin) DO UPDATE SET
|
||||
make = EXCLUDED.make,
|
||||
model = EXCLUDED.model,
|
||||
year = EXCLUDED.year,
|
||||
engine_type = EXCLUDED.engine_type,
|
||||
body_type = EXCLUDED.body_type,
|
||||
raw_data = EXCLUDED.raw_data,
|
||||
cached_at = NOW()
|
||||
WHERE (vin_cache.raw_data->>'confidence')::float <= $8`,
|
||||
[vin, response.make, response.model, response.year, response.engine, response.bodyType, JSON.stringify(response), response.confidence ?? 1]
|
||||
);
|
||||
|
||||
logger.debug('VIN cached', { vin, confidence: response.confidence });
|
||||
} catch (error) {
|
||||
logger.error('Failed to cache VIN data', { vin, error });
|
||||
// Don't throw - caching failure shouldn't break the decode flow
|
||||
}
|
||||
}
|
||||
|
||||
async getDropdownMakes(year: number): Promise<string[]> {
|
||||
const vehicleDataService = getVehicleDataService();
|
||||
const pool = getPool();
|
||||
@@ -657,82 +724,88 @@ export class VehiclesService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Map NHTSA decode response to internal decoded vehicle data format
|
||||
* Map VIN decode response to internal decoded vehicle data format
|
||||
* with dropdown matching and confidence levels
|
||||
*/
|
||||
async mapNHTSAResponse(response: NHTSADecodeResponse): Promise<DecodedVehicleData> {
|
||||
async mapVinDecodeResponse(response: VinDecodeResponse): Promise<DecodedVehicleData> {
|
||||
const vehicleDataService = getVehicleDataService();
|
||||
const pool = getPool();
|
||||
|
||||
// Extract raw values from NHTSA response
|
||||
const nhtsaYear = NHTSAClient.extractYear(response);
|
||||
const nhtsaMake = NHTSAClient.extractValue(response, 'Make');
|
||||
const nhtsaModel = NHTSAClient.extractValue(response, 'Model');
|
||||
const nhtsaTrim = NHTSAClient.extractValue(response, 'Trim');
|
||||
const nhtsaBodyType = NHTSAClient.extractValue(response, 'Body Class');
|
||||
const nhtsaDriveType = NHTSAClient.extractValue(response, 'Drive Type');
|
||||
const nhtsaFuelType = NHTSAClient.extractValue(response, 'Fuel Type - Primary');
|
||||
const nhtsaEngine = NHTSAClient.extractEngine(response);
|
||||
const nhtsaTransmission = NHTSAClient.extractValue(response, 'Transmission Style');
|
||||
// Read flat fields directly from Gemini response
|
||||
const sourceYear = response.year;
|
||||
const sourceMake = response.make;
|
||||
const sourceModel = response.model;
|
||||
const sourceTrim = response.trimLevel;
|
||||
const sourceBodyType = response.bodyType;
|
||||
const sourceDriveType = response.driveType;
|
||||
const sourceFuelType = response.fuelType;
|
||||
const sourceEngine = response.engine;
|
||||
const sourceTransmission = response.transmission;
|
||||
|
||||
logger.debug('VIN decode raw values', {
|
||||
vin: response.vin,
|
||||
year: sourceYear, make: sourceMake, model: sourceModel,
|
||||
trim: sourceTrim, confidence: response.confidence
|
||||
});
|
||||
|
||||
// Year is always high confidence if present (exact numeric match)
|
||||
const year: MatchedField<number> = {
|
||||
value: nhtsaYear,
|
||||
nhtsaValue: nhtsaYear?.toString() || null,
|
||||
confidence: nhtsaYear ? 'high' : 'none'
|
||||
value: sourceYear,
|
||||
sourceValue: sourceYear?.toString() || null,
|
||||
confidence: sourceYear ? 'high' : 'none'
|
||||
};
|
||||
|
||||
// Match make against dropdown options
|
||||
let make: MatchedField<string> = { value: null, nhtsaValue: nhtsaMake, confidence: 'none' };
|
||||
if (nhtsaYear && nhtsaMake) {
|
||||
const makes = await vehicleDataService.getMakes(pool, nhtsaYear);
|
||||
make = this.matchField(nhtsaMake, makes);
|
||||
let make: MatchedField<string> = { value: null, sourceValue: sourceMake, confidence: 'none' };
|
||||
if (sourceYear && sourceMake) {
|
||||
const makes = await vehicleDataService.getMakes(pool, sourceYear);
|
||||
make = this.matchField(sourceMake, makes);
|
||||
}
|
||||
|
||||
// Match model against dropdown options
|
||||
let model: MatchedField<string> = { value: null, nhtsaValue: nhtsaModel, confidence: 'none' };
|
||||
if (nhtsaYear && make.value && nhtsaModel) {
|
||||
const models = await vehicleDataService.getModels(pool, nhtsaYear, make.value);
|
||||
model = this.matchField(nhtsaModel, models);
|
||||
let model: MatchedField<string> = { value: null, sourceValue: sourceModel, confidence: 'none' };
|
||||
if (sourceYear && make.value && sourceModel) {
|
||||
const models = await vehicleDataService.getModels(pool, sourceYear, make.value);
|
||||
model = this.matchField(sourceModel, models);
|
||||
}
|
||||
|
||||
// Match trim against dropdown options
|
||||
let trimLevel: MatchedField<string> = { value: null, nhtsaValue: nhtsaTrim, confidence: 'none' };
|
||||
if (nhtsaYear && make.value && model.value && nhtsaTrim) {
|
||||
const trims = await vehicleDataService.getTrims(pool, nhtsaYear, make.value, model.value);
|
||||
trimLevel = this.matchField(nhtsaTrim, trims);
|
||||
let trimLevel: MatchedField<string> = { value: null, sourceValue: sourceTrim, confidence: 'none' };
|
||||
if (sourceYear && make.value && model.value && sourceTrim) {
|
||||
const trims = await vehicleDataService.getTrims(pool, sourceYear, make.value, model.value);
|
||||
trimLevel = this.matchField(sourceTrim, trims);
|
||||
}
|
||||
|
||||
// Match engine against dropdown options
|
||||
let engine: MatchedField<string> = { value: null, nhtsaValue: nhtsaEngine, confidence: 'none' };
|
||||
if (nhtsaYear && make.value && model.value && trimLevel.value && nhtsaEngine) {
|
||||
const engines = await vehicleDataService.getEngines(pool, nhtsaYear, make.value, model.value, trimLevel.value);
|
||||
engine = this.matchField(nhtsaEngine, engines);
|
||||
let engine: MatchedField<string> = { value: null, sourceValue: sourceEngine, confidence: 'none' };
|
||||
if (sourceYear && make.value && model.value && trimLevel.value && sourceEngine) {
|
||||
const engines = await vehicleDataService.getEngines(pool, sourceYear, make.value, model.value, trimLevel.value);
|
||||
engine = this.matchField(sourceEngine, engines);
|
||||
}
|
||||
|
||||
// Match transmission against dropdown options
|
||||
let transmission: MatchedField<string> = { value: null, nhtsaValue: nhtsaTransmission, confidence: 'none' };
|
||||
if (nhtsaYear && make.value && model.value && trimLevel.value && nhtsaTransmission) {
|
||||
const transmissions = await vehicleDataService.getTransmissionsForTrim(pool, nhtsaYear, make.value, model.value, trimLevel.value);
|
||||
transmission = this.matchField(nhtsaTransmission, transmissions);
|
||||
let transmission: MatchedField<string> = { value: null, sourceValue: sourceTransmission, confidence: 'none' };
|
||||
if (sourceYear && make.value && model.value && trimLevel.value && sourceTransmission) {
|
||||
const transmissions = await vehicleDataService.getTransmissionsForTrim(pool, sourceYear, make.value, model.value, trimLevel.value);
|
||||
transmission = this.matchField(sourceTransmission, transmissions);
|
||||
}
|
||||
|
||||
// Body type, drive type, and fuel type are display-only (no dropdown matching)
|
||||
const bodyType: MatchedField<string> = {
|
||||
value: null,
|
||||
nhtsaValue: nhtsaBodyType,
|
||||
sourceValue: sourceBodyType,
|
||||
confidence: 'none'
|
||||
};
|
||||
|
||||
const driveType: MatchedField<string> = {
|
||||
value: null,
|
||||
nhtsaValue: nhtsaDriveType,
|
||||
sourceValue: sourceDriveType,
|
||||
confidence: 'none'
|
||||
};
|
||||
|
||||
const fuelType: MatchedField<string> = {
|
||||
value: null,
|
||||
nhtsaValue: nhtsaFuelType,
|
||||
sourceValue: sourceFuelType,
|
||||
confidence: 'none'
|
||||
};
|
||||
|
||||
@@ -754,42 +827,62 @@ export class VehiclesService {
|
||||
* Returns the matched dropdown value with confidence level
|
||||
* Matching order: exact -> normalized -> prefix -> contains
|
||||
*/
|
||||
private matchField(nhtsaValue: string, options: string[]): MatchedField<string> {
|
||||
if (!nhtsaValue || options.length === 0) {
|
||||
return { value: null, nhtsaValue, confidence: 'none' };
|
||||
private matchField(sourceValue: string, options: string[]): MatchedField<string> {
|
||||
if (!sourceValue || options.length === 0) {
|
||||
return { value: null, sourceValue, confidence: 'none' };
|
||||
}
|
||||
|
||||
const normalizedNhtsa = nhtsaValue.toLowerCase().trim();
|
||||
const normalizedSource = sourceValue.toLowerCase().trim();
|
||||
|
||||
// Try exact case-insensitive match
|
||||
const exactMatch = options.find(opt => opt.toLowerCase().trim() === normalizedNhtsa);
|
||||
const exactMatch = options.find(opt => opt.toLowerCase().trim() === normalizedSource);
|
||||
if (exactMatch) {
|
||||
return { value: exactMatch, nhtsaValue, confidence: 'high' };
|
||||
return { value: exactMatch, sourceValue, confidence: 'high' };
|
||||
}
|
||||
|
||||
// Try normalized comparison (remove special chars)
|
||||
const normalizeForCompare = (s: string) => s.toLowerCase().replace(/[^a-z0-9]/g, '');
|
||||
const normalizedNhtsaClean = normalizeForCompare(nhtsaValue);
|
||||
const normalizedSourceClean = normalizeForCompare(sourceValue);
|
||||
|
||||
const normalizedMatch = options.find(opt => normalizeForCompare(opt) === normalizedNhtsaClean);
|
||||
const normalizedMatch = options.find(opt => normalizeForCompare(opt) === normalizedSourceClean);
|
||||
if (normalizedMatch) {
|
||||
return { value: normalizedMatch, nhtsaValue, confidence: 'medium' };
|
||||
return { value: normalizedMatch, sourceValue, confidence: 'medium' };
|
||||
}
|
||||
|
||||
// Try prefix match - option starts with NHTSA value
|
||||
const prefixMatch = options.find(opt => opt.toLowerCase().trim().startsWith(normalizedNhtsa));
|
||||
// Try prefix match - option starts with source value
|
||||
const prefixMatch = options.find(opt => opt.toLowerCase().trim().startsWith(normalizedSource));
|
||||
if (prefixMatch) {
|
||||
return { value: prefixMatch, nhtsaValue, confidence: 'medium' };
|
||||
return { value: prefixMatch, sourceValue, confidence: 'medium' };
|
||||
}
|
||||
|
||||
// Try contains match - option contains NHTSA value
|
||||
const containsMatch = options.find(opt => opt.toLowerCase().trim().includes(normalizedNhtsa));
|
||||
// Try contains match - option contains source value
|
||||
const containsMatch = options.find(opt => opt.toLowerCase().trim().includes(normalizedSource));
|
||||
if (containsMatch) {
|
||||
return { value: containsMatch, nhtsaValue, confidence: 'medium' };
|
||||
return { value: containsMatch, sourceValue, confidence: 'medium' };
|
||||
}
|
||||
|
||||
// No match found - return NHTSA value as hint with no match
|
||||
return { value: null, nhtsaValue, confidence: 'none' };
|
||||
// Try reverse contains - source value contains option (e.g., source "X5 xDrive35i" contains option "X5")
|
||||
// Prefer the longest matching option to avoid false positives (e.g., "X5 M" over "X5")
|
||||
const reverseMatches = options.filter(opt => {
|
||||
const normalizedOpt = opt.toLowerCase().trim();
|
||||
return normalizedSource.includes(normalizedOpt) && normalizedOpt.length > 0;
|
||||
});
|
||||
if (reverseMatches.length > 0) {
|
||||
const bestMatch = reverseMatches.reduce((a, b) => a.length >= b.length ? a : b);
|
||||
return { value: bestMatch, sourceValue, confidence: 'medium' };
|
||||
}
|
||||
|
||||
// Try word-start match - source starts with option + separator (e.g., "X5 xDrive" starts with "X5 ")
|
||||
const wordStartMatch = options.find(opt => {
|
||||
const normalizedOpt = opt.toLowerCase().trim();
|
||||
return normalizedSource.startsWith(normalizedOpt + ' ') || normalizedSource.startsWith(normalizedOpt + '-');
|
||||
});
|
||||
if (wordStartMatch) {
|
||||
return { value: wordStartMatch, sourceValue, confidence: 'medium' };
|
||||
}
|
||||
|
||||
// No match found - return source value as hint with no match
|
||||
return { value: null, sourceValue, confidence: 'none' };
|
||||
}
|
||||
|
||||
private toResponse(vehicle: Vehicle): VehicleResponse {
|
||||
|
||||
@@ -215,3 +215,53 @@ export interface TCOResponse {
|
||||
distanceUnit: string;
|
||||
currencyCode: string;
|
||||
}
|
||||
|
||||
/** Confidence level for matched dropdown values */
|
||||
export type MatchConfidence = 'high' | 'medium' | 'none';
|
||||
|
||||
/** Matched field with confidence indicator */
|
||||
export interface MatchedField<T> {
|
||||
value: T | null;
|
||||
sourceValue: string | null;
|
||||
confidence: MatchConfidence;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decoded vehicle data with match confidence per field.
|
||||
* Maps VIN decode response fields to internal field names.
|
||||
*/
|
||||
export interface DecodedVehicleData {
|
||||
year: MatchedField<number>;
|
||||
make: MatchedField<string>;
|
||||
model: MatchedField<string>;
|
||||
trimLevel: MatchedField<string>;
|
||||
bodyType: MatchedField<string>;
|
||||
driveType: MatchedField<string>;
|
||||
fuelType: MatchedField<string>;
|
||||
engine: MatchedField<string>;
|
||||
transmission: MatchedField<string>;
|
||||
}
|
||||
|
||||
/** Cached VIN data from vin_cache table */
|
||||
export interface VinCacheEntry {
|
||||
vin: string;
|
||||
make: string | null;
|
||||
model: string | null;
|
||||
year: number | null;
|
||||
engineType: string | null;
|
||||
bodyType: string | null;
|
||||
rawData: import('../../ocr/domain/ocr.types').VinDecodeResponse;
|
||||
cachedAt: Date;
|
||||
}
|
||||
|
||||
/** VIN decode request body */
|
||||
export interface DecodeVinRequest {
|
||||
vin: string;
|
||||
}
|
||||
|
||||
/** VIN decode error response */
|
||||
export interface VinDecodeError {
|
||||
error: 'INVALID_VIN' | 'VIN_DECODE_FAILED' | 'TIER_REQUIRED';
|
||||
message: string;
|
||||
details?: string;
|
||||
}
|
||||
|
||||
@@ -5,9 +5,3 @@
|
||||
| File | What | When to read |
|
||||
| ---- | ---- | ------------ |
|
||||
| `README.md` | Integration patterns, adding new services | Understanding external service conventions |
|
||||
|
||||
## Subdirectories
|
||||
|
||||
| Directory | What | When to read |
|
||||
| --------- | ---- | ------------ |
|
||||
| `nhtsa/` | NHTSA vPIC API client for VIN decoding | VIN decode feature work |
|
||||
|
||||
@@ -15,7 +15,7 @@ Each integration follows this structure:
|
||||
## Adding New Integrations
|
||||
|
||||
1. Create subdirectory: `external/{service}/`
|
||||
2. Add client: `{service}.client.ts` following NHTSAClient pattern
|
||||
2. Add client: `{service}.client.ts` following the axios-based client pattern
|
||||
3. Add types: `{service}.types.ts`
|
||||
4. Update `CLAUDE.md` with new directory
|
||||
5. Add tests in `tests/unit/{service}.client.test.ts`
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
/**
|
||||
* @ai-summary NHTSA vPIC integration exports
|
||||
* @ai-context Public API for VIN decoding functionality
|
||||
*/
|
||||
|
||||
export { NHTSAClient } from './nhtsa.client';
|
||||
export type {
|
||||
NHTSADecodeResponse,
|
||||
NHTSAResult,
|
||||
DecodedVehicleData,
|
||||
MatchedField,
|
||||
MatchConfidence,
|
||||
VinCacheEntry,
|
||||
DecodeVinRequest,
|
||||
VinDecodeError,
|
||||
} from './nhtsa.types';
|
||||
@@ -1,235 +0,0 @@
|
||||
/**
|
||||
* @ai-summary NHTSA vPIC API client for VIN decoding
|
||||
* @ai-context Fetches vehicle data from NHTSA and caches results
|
||||
*/
|
||||
|
||||
import axios, { AxiosError } from 'axios';
|
||||
import { logger } from '../../../../core/logging/logger';
|
||||
import { NHTSADecodeResponse, VinCacheEntry } from './nhtsa.types';
|
||||
import { Pool } from 'pg';
|
||||
|
||||
/**
|
||||
* VIN validation regex
|
||||
* - 17 characters
|
||||
* - Excludes I, O, Q (not used in VINs)
|
||||
* - Alphanumeric only
|
||||
*/
|
||||
const VIN_REGEX = /^[A-HJ-NPR-Z0-9]{17}$/;
|
||||
|
||||
/**
|
||||
* Cache TTL: 1 year (VIN data is static - vehicle specs don't change)
|
||||
*/
|
||||
const CACHE_TTL_SECONDS = 365 * 24 * 60 * 60;
|
||||
|
||||
export class NHTSAClient {
|
||||
private readonly baseURL = 'https://vpic.nhtsa.dot.gov/api';
|
||||
private readonly timeout = 5000; // 5 seconds
|
||||
|
||||
constructor(private readonly pool: Pool) {}
|
||||
|
||||
/**
|
||||
* Validate VIN format
|
||||
* @throws Error if VIN format is invalid
|
||||
*/
|
||||
validateVin(vin: string): string {
|
||||
const sanitized = vin.trim().toUpperCase();
|
||||
|
||||
if (!sanitized) {
|
||||
throw new Error('VIN is required');
|
||||
}
|
||||
|
||||
if (!VIN_REGEX.test(sanitized)) {
|
||||
throw new Error('Invalid VIN format. VIN must be exactly 17 characters and contain only letters (except I, O, Q) and numbers.');
|
||||
}
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check cache for existing VIN data
|
||||
*/
|
||||
async getCached(vin: string): Promise<VinCacheEntry | null> {
|
||||
try {
|
||||
const result = await this.pool.query<{
|
||||
vin: string;
|
||||
make: string | null;
|
||||
model: string | null;
|
||||
year: number | null;
|
||||
engine_type: string | null;
|
||||
body_type: string | null;
|
||||
raw_data: NHTSADecodeResponse;
|
||||
cached_at: Date;
|
||||
}>(
|
||||
`SELECT vin, make, model, year, engine_type, body_type, raw_data, cached_at
|
||||
FROM vin_cache
|
||||
WHERE vin = $1
|
||||
AND cached_at > NOW() - INTERVAL '${CACHE_TTL_SECONDS} seconds'`,
|
||||
[vin]
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const row = result.rows[0];
|
||||
return {
|
||||
vin: row.vin,
|
||||
make: row.make,
|
||||
model: row.model,
|
||||
year: row.year,
|
||||
engineType: row.engine_type,
|
||||
bodyType: row.body_type,
|
||||
rawData: row.raw_data,
|
||||
cachedAt: row.cached_at,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error('Failed to check VIN cache', { vin, error });
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save VIN data to cache
|
||||
*/
|
||||
async saveToCache(vin: string, response: NHTSADecodeResponse): Promise<void> {
|
||||
try {
|
||||
const findValue = (variable: string): string | null => {
|
||||
const result = response.Results.find(r => r.Variable === variable);
|
||||
return result?.Value || null;
|
||||
};
|
||||
|
||||
const year = findValue('Model Year');
|
||||
const make = findValue('Make');
|
||||
const model = findValue('Model');
|
||||
const engineType = findValue('Engine Model');
|
||||
const bodyType = findValue('Body Class');
|
||||
|
||||
await this.pool.query(
|
||||
`INSERT INTO vin_cache (vin, make, model, year, engine_type, body_type, raw_data, cached_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, NOW())
|
||||
ON CONFLICT (vin) DO UPDATE SET
|
||||
make = EXCLUDED.make,
|
||||
model = EXCLUDED.model,
|
||||
year = EXCLUDED.year,
|
||||
engine_type = EXCLUDED.engine_type,
|
||||
body_type = EXCLUDED.body_type,
|
||||
raw_data = EXCLUDED.raw_data,
|
||||
cached_at = NOW()`,
|
||||
[vin, make, model, year ? parseInt(year) : null, engineType, bodyType, JSON.stringify(response)]
|
||||
);
|
||||
|
||||
logger.debug('VIN cached', { vin });
|
||||
} catch (error) {
|
||||
logger.error('Failed to cache VIN data', { vin, error });
|
||||
// Don't throw - caching failure shouldn't break the decode flow
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode VIN using NHTSA vPIC API
|
||||
* @param vin - 17-character VIN
|
||||
* @returns Raw NHTSA decode response
|
||||
* @throws Error if VIN is invalid or API call fails
|
||||
*/
|
||||
async decodeVin(vin: string): Promise<NHTSADecodeResponse> {
|
||||
// Validate and sanitize VIN
|
||||
const sanitizedVin = this.validateVin(vin);
|
||||
|
||||
// Check cache first
|
||||
const cached = await this.getCached(sanitizedVin);
|
||||
if (cached) {
|
||||
logger.debug('VIN cache hit', { vin: sanitizedVin });
|
||||
return cached.rawData;
|
||||
}
|
||||
|
||||
// Call NHTSA API
|
||||
logger.info('Calling NHTSA vPIC API', { vin: sanitizedVin });
|
||||
|
||||
try {
|
||||
const response = await axios.get<NHTSADecodeResponse>(
|
||||
`${this.baseURL}/vehicles/decodevin/${sanitizedVin}`,
|
||||
{
|
||||
params: { format: 'json' },
|
||||
timeout: this.timeout,
|
||||
}
|
||||
);
|
||||
|
||||
// Check for NHTSA-level errors
|
||||
if (response.data.Count === 0) {
|
||||
throw new Error('NHTSA returned no results for this VIN');
|
||||
}
|
||||
|
||||
// Check for error messages in results
|
||||
const errorResult = response.data.Results.find(
|
||||
r => r.Variable === 'Error Code' && r.Value && r.Value !== '0'
|
||||
);
|
||||
if (errorResult) {
|
||||
const errorText = response.data.Results.find(r => r.Variable === 'Error Text');
|
||||
throw new Error(`NHTSA error: ${errorText?.Value || 'Unknown error'}`);
|
||||
}
|
||||
|
||||
// Cache the successful response
|
||||
await this.saveToCache(sanitizedVin, response.data);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
if (axios.isAxiosError(error)) {
|
||||
const axiosError = error as AxiosError;
|
||||
if (axiosError.code === 'ECONNABORTED') {
|
||||
logger.error('NHTSA API timeout', { vin: sanitizedVin });
|
||||
throw new Error('NHTSA API request timed out. Please try again.');
|
||||
}
|
||||
if (axiosError.response) {
|
||||
logger.error('NHTSA API error response', {
|
||||
vin: sanitizedVin,
|
||||
status: axiosError.response.status,
|
||||
data: axiosError.response.data,
|
||||
});
|
||||
throw new Error(`NHTSA API error: ${axiosError.response.status}`);
|
||||
}
|
||||
logger.error('NHTSA API network error', { vin: sanitizedVin, message: axiosError.message });
|
||||
throw new Error('Unable to connect to NHTSA API. Please try again later.');
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a specific value from NHTSA response
|
||||
*/
|
||||
static extractValue(response: NHTSADecodeResponse, variable: string): string | null {
|
||||
const result = response.Results.find(r => r.Variable === variable);
|
||||
return result?.Value?.trim() || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract year from NHTSA response
|
||||
*/
|
||||
static extractYear(response: NHTSADecodeResponse): number | null {
|
||||
const value = NHTSAClient.extractValue(response, 'Model Year');
|
||||
if (!value) return null;
|
||||
const parsed = parseInt(value, 10);
|
||||
return isNaN(parsed) ? null : parsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract engine description from NHTSA response
|
||||
* Combines multiple engine-related fields
|
||||
*/
|
||||
static extractEngine(response: NHTSADecodeResponse): string | null {
|
||||
const engineModel = NHTSAClient.extractValue(response, 'Engine Model');
|
||||
if (engineModel) return engineModel;
|
||||
|
||||
// Build engine description from components
|
||||
const cylinders = NHTSAClient.extractValue(response, 'Engine Number of Cylinders');
|
||||
const displacement = NHTSAClient.extractValue(response, 'Displacement (L)');
|
||||
const fuelType = NHTSAClient.extractValue(response, 'Fuel Type - Primary');
|
||||
|
||||
const parts: string[] = [];
|
||||
if (cylinders) parts.push(`${cylinders}-Cylinder`);
|
||||
if (displacement) parts.push(`${displacement}L`);
|
||||
if (fuelType && fuelType !== 'Gasoline') parts.push(fuelType);
|
||||
|
||||
return parts.length > 0 ? parts.join(' ') : null;
|
||||
}
|
||||
}
|
||||
@@ -1,96 +0,0 @@
|
||||
/**
|
||||
* @ai-summary Type definitions for NHTSA vPIC API
|
||||
* @ai-context Defines request/response types for VIN decoding
|
||||
*/
|
||||
|
||||
/**
|
||||
* Individual result from NHTSA DecodeVin API
|
||||
*/
|
||||
export interface NHTSAResult {
|
||||
Value: string | null;
|
||||
ValueId: string | null;
|
||||
Variable: string;
|
||||
VariableId: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Raw response from NHTSA DecodeVin API
|
||||
* GET https://vpic.nhtsa.dot.gov/api/vehicles/decodevin/{VIN}?format=json
|
||||
*/
|
||||
export interface NHTSADecodeResponse {
|
||||
Count: number;
|
||||
Message: string;
|
||||
SearchCriteria: string;
|
||||
Results: NHTSAResult[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Confidence level for matched dropdown values
|
||||
*/
|
||||
export type MatchConfidence = 'high' | 'medium' | 'none';
|
||||
|
||||
/**
|
||||
* Matched field with confidence indicator
|
||||
*/
|
||||
export interface MatchedField<T> {
|
||||
value: T | null;
|
||||
nhtsaValue: string | null;
|
||||
confidence: MatchConfidence;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decoded vehicle data with match confidence per field
|
||||
* Maps NHTSA response fields to internal field names (camelCase)
|
||||
*
|
||||
* NHTSA Field Mappings:
|
||||
* - ModelYear -> year
|
||||
* - Make -> make
|
||||
* - Model -> model
|
||||
* - Trim -> trimLevel
|
||||
* - BodyClass -> bodyType
|
||||
* - DriveType -> driveType
|
||||
* - FuelTypePrimary -> fuelType
|
||||
* - EngineModel / EngineCylinders + EngineDisplacementL -> engine
|
||||
* - TransmissionStyle -> transmission
|
||||
*/
|
||||
export interface DecodedVehicleData {
|
||||
year: MatchedField<number>;
|
||||
make: MatchedField<string>;
|
||||
model: MatchedField<string>;
|
||||
trimLevel: MatchedField<string>;
|
||||
bodyType: MatchedField<string>;
|
||||
driveType: MatchedField<string>;
|
||||
fuelType: MatchedField<string>;
|
||||
engine: MatchedField<string>;
|
||||
transmission: MatchedField<string>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cached VIN data from vin_cache table
|
||||
*/
|
||||
export interface VinCacheEntry {
|
||||
vin: string;
|
||||
make: string | null;
|
||||
model: string | null;
|
||||
year: number | null;
|
||||
engineType: string | null;
|
||||
bodyType: string | null;
|
||||
rawData: NHTSADecodeResponse;
|
||||
cachedAt: Date;
|
||||
}
|
||||
|
||||
/**
|
||||
* VIN decode request body
|
||||
*/
|
||||
export interface DecodeVinRequest {
|
||||
vin: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* VIN decode error response
|
||||
*/
|
||||
export interface VinDecodeError {
|
||||
error: 'INVALID_VIN' | 'VIN_DECODE_FAILED' | 'TIER_REQUIRED';
|
||||
message: string;
|
||||
details?: string;
|
||||
}
|
||||
@@ -22,11 +22,6 @@ platform:
|
||||
url: http://mvp-platform-vehicles-api:8000
|
||||
timeout: 5s
|
||||
|
||||
external:
|
||||
vpic:
|
||||
url: https://vpic.nhtsa.dot.gov/api/vehicles
|
||||
timeout: 10s
|
||||
|
||||
service:
|
||||
name: mvp-backend
|
||||
|
||||
|
||||
@@ -21,5 +21,3 @@ auth0:
|
||||
domain: motovaultpro.us.auth0.com
|
||||
audience: https://api.motovaultpro.com
|
||||
|
||||
external:
|
||||
vpic_api_url: https://vpic.nhtsa.dot.gov/api/vehicles
|
||||
|
||||
@@ -107,9 +107,6 @@ external_services:
|
||||
google_maps:
|
||||
base_url: https://maps.googleapis.com/maps/api
|
||||
|
||||
vpic:
|
||||
base_url: https://vpic.nhtsa.dot.gov/api/vehicles
|
||||
|
||||
# Development Configuration
|
||||
development:
|
||||
debug_enabled: false
|
||||
|
||||
@@ -206,8 +206,8 @@ services:
|
||||
VISION_MONTHLY_LIMIT: "1000"
|
||||
# Vertex AI / Gemini configuration (maintenance schedule extraction)
|
||||
VERTEX_AI_PROJECT: motovaultpro
|
||||
VERTEX_AI_LOCATION: us-central1
|
||||
GEMINI_MODEL: gemini-2.5-flash
|
||||
VERTEX_AI_LOCATION: global
|
||||
GEMINI_MODEL: gemini-3-flash-preview
|
||||
volumes:
|
||||
- /tmp/vin-debug:/tmp/vin-debug
|
||||
- ./secrets/app/auth0-ocr-client-id.txt:/run/secrets/auth0-ocr-client-id:ro
|
||||
|
||||
@@ -35,7 +35,7 @@ The platform provides vehicle hierarchical data lookups:
|
||||
VIN decoding is planned but not yet implemented. Future capabilities will include:
|
||||
- `GET /api/platform/vehicle?vin={vin}` - Decode VIN to vehicle details
|
||||
- PostgreSQL-based VIN decode function
|
||||
- NHTSA vPIC API fallback with circuit breaker
|
||||
- Gemini VIN decode via OCR service
|
||||
- Redis caching (7-day TTL for successful decodes)
|
||||
|
||||
**Data Source**: Vehicle data from standardized sources
|
||||
|
||||
@@ -74,7 +74,7 @@ docker compose exec mvp-frontend npm test -- --coverage
|
||||
|
||||
Example: `vehicles.service.test.ts`
|
||||
- Tests VIN validation logic
|
||||
- Tests vehicle creation with mocked vPIC responses
|
||||
- Tests vehicle creation with mocked OCR service responses
|
||||
- Tests caching behavior with mocked Redis
|
||||
- Tests error handling paths
|
||||
|
||||
@@ -194,7 +194,7 @@ All 15 features have test suites with unit and/or integration tests:
|
||||
- `vehicles` - Unit + integration tests
|
||||
|
||||
### Mock Strategy
|
||||
- **External APIs**: Completely mocked (vPIC, Google Maps)
|
||||
- **External APIs**: Completely mocked (OCR service, Google Maps)
|
||||
- **Database**: Real database with transactions
|
||||
- **Redis**: Mocked for unit tests, real for integration
|
||||
- **Auth**: Mocked JWT tokens for protected endpoints
|
||||
@@ -319,8 +319,8 @@ describe('Error Handling', () => {
|
||||
).rejects.toThrow('Invalid VIN format');
|
||||
});
|
||||
|
||||
it('should handle vPIC API failure', async () => {
|
||||
mockVpicClient.decode.mockRejectedValue(new Error('API down'));
|
||||
it('should handle OCR service failure', async () => {
|
||||
mockOcrClient.decodeVin.mockRejectedValue(new Error('API down'));
|
||||
|
||||
const result = await vehicleService.create(validVehicle, 'user123');
|
||||
expect(result.make).toBeNull(); // Graceful degradation
|
||||
|
||||
@@ -644,7 +644,7 @@ When you attempt to use a Pro feature on the Free tier, an **Upgrade Required**
|
||||
|
||||
### VIN Camera Scanning and Decode (Pro)
|
||||
|
||||
**What it does:** Use your device camera to photograph your vehicle's VIN plate, and the system automatically reads the VIN using OCR (Optical Character Recognition) and decodes it from the NHTSA database.
|
||||
**What it does:** Use your device camera to photograph your vehicle's VIN plate, and the system automatically reads the VIN using OCR (Optical Character Recognition) and decodes it from the vehicle database.
|
||||
|
||||
**How to use it:**
|
||||
|
||||
@@ -655,7 +655,7 @@ When you attempt to use a Pro feature on the Free tier, an **Upgrade Required**
|
||||
5. A **VIN OCR Review modal** appears showing the detected VIN with confidence indicators
|
||||
6. Confirm or correct the VIN, then click **Accept**
|
||||
7. Click the **Decode VIN** button
|
||||
8. The system queries the NHTSA database and auto-populates: Year, Make, Model, Engine, Transmission, and Trim
|
||||
8. The system queries the vehicle database and auto-populates: Year, Make, Model, Engine, Transmission, and Trim
|
||||
9. Review the pre-filled fields and complete the remaining details
|
||||
|
||||
This eliminates manual data entry errors and ensures accurate vehicle specifications.
|
||||
|
||||
@@ -82,7 +82,7 @@ export const vehiclesApi = {
|
||||
},
|
||||
|
||||
/**
|
||||
* Decode VIN using NHTSA vPIC API
|
||||
* Decode VIN using VIN decode service
|
||||
* Requires Pro or Enterprise tier
|
||||
*/
|
||||
decodeVin: async (vin: string): Promise<DecodedVehicleData> => {
|
||||
|
||||
@@ -114,6 +114,7 @@ export const VehicleForm: React.FC<VehicleFormProps> = ({
|
||||
const [isDecoding, setIsDecoding] = useState(false);
|
||||
const [showUpgradeDialog, setShowUpgradeDialog] = useState(false);
|
||||
const [decodeError, setDecodeError] = useState<string | null>(null);
|
||||
const [decodeHint, setDecodeHint] = useState<string | null>(null);
|
||||
|
||||
// VIN OCR capture hook
|
||||
const vinOcr = useVinOcr();
|
||||
@@ -507,7 +508,7 @@ export const VehicleForm: React.FC<VehicleFormProps> = ({
|
||||
|
||||
/**
|
||||
* Handle VIN decode button click
|
||||
* Calls NHTSA API and populates empty form fields
|
||||
* Calls VIN decode service and populates empty form fields
|
||||
*/
|
||||
const handleDecodeVin = async () => {
|
||||
// Check tier access first
|
||||
@@ -524,6 +525,7 @@ export const VehicleForm: React.FC<VehicleFormProps> = ({
|
||||
|
||||
setIsDecoding(true);
|
||||
setDecodeError(null);
|
||||
setDecodeHint(null);
|
||||
|
||||
try {
|
||||
const decoded = await vehiclesApi.decodeVin(vin);
|
||||
@@ -588,6 +590,21 @@ export const VehicleForm: React.FC<VehicleFormProps> = ({
|
||||
setValue('transmission', decoded.transmission.value);
|
||||
}
|
||||
|
||||
// Check if decode returned data but matching failed for key fields
|
||||
const hasMatchedValue = decoded.year.value || decoded.make.value || decoded.model.value;
|
||||
const hasSourceValue = decoded.year.sourceValue || decoded.make.sourceValue || decoded.model.sourceValue;
|
||||
if (!hasMatchedValue && hasSourceValue) {
|
||||
const parts = [
|
||||
decoded.year.sourceValue,
|
||||
decoded.make.sourceValue,
|
||||
decoded.model.sourceValue,
|
||||
decoded.trimLevel.sourceValue
|
||||
].filter(Boolean);
|
||||
setDecodeHint(
|
||||
`Could not match VIN data to dropdowns. Decoded as: ${parts.join(' ')}. Please select values manually.`
|
||||
);
|
||||
}
|
||||
|
||||
setLoadingDropdowns(false);
|
||||
isVinDecoding.current = false;
|
||||
} catch (error: any) {
|
||||
@@ -671,6 +688,9 @@ export const VehicleForm: React.FC<VehicleFormProps> = ({
|
||||
{decodeError && (
|
||||
<p className="mt-1 text-sm text-red-600 dark:text-red-400">{decodeError}</p>
|
||||
)}
|
||||
{decodeHint && (
|
||||
<p className="mt-1 text-sm text-amber-600 dark:text-amber-400">{decodeHint}</p>
|
||||
)}
|
||||
{vinOcr.error && (
|
||||
<p className="mt-1 text-sm text-red-600 dark:text-red-400">{vinOcr.error}</p>
|
||||
)}
|
||||
|
||||
@@ -95,8 +95,8 @@ const ReviewContent: React.FC<{
|
||||
const [selectedEngine, setSelectedEngine] = useState('');
|
||||
const [selectedTransmission, setSelectedTransmission] = useState('');
|
||||
|
||||
// NHTSA reference values for unmatched fields
|
||||
const [nhtsaRefs, setNhtsaRefs] = useState<Record<string, string | null>>({});
|
||||
// Source reference values for unmatched fields
|
||||
const [sourceRefs, setSourceRefs] = useState<Record<string, string | null>>({});
|
||||
|
||||
// Initialize dropdown options and pre-select decoded values
|
||||
useEffect(() => {
|
||||
@@ -109,13 +109,13 @@ const ReviewContent: React.FC<{
|
||||
|
||||
if (!decodedVehicle) return;
|
||||
|
||||
// Store NHTSA reference values for unmatched fields
|
||||
setNhtsaRefs({
|
||||
make: decodedVehicle.make.confidence === 'none' ? decodedVehicle.make.nhtsaValue : null,
|
||||
model: decodedVehicle.model.confidence === 'none' ? decodedVehicle.model.nhtsaValue : null,
|
||||
trim: decodedVehicle.trimLevel.confidence === 'none' ? decodedVehicle.trimLevel.nhtsaValue : null,
|
||||
engine: decodedVehicle.engine.confidence === 'none' ? decodedVehicle.engine.nhtsaValue : null,
|
||||
transmission: decodedVehicle.transmission.confidence === 'none' ? decodedVehicle.transmission.nhtsaValue : null,
|
||||
// Store source reference values for unmatched fields
|
||||
setSourceRefs({
|
||||
make: decodedVehicle.make.confidence === 'none' ? decodedVehicle.make.sourceValue : null,
|
||||
model: decodedVehicle.model.confidence === 'none' ? decodedVehicle.model.sourceValue : null,
|
||||
trim: decodedVehicle.trimLevel.confidence === 'none' ? decodedVehicle.trimLevel.sourceValue : null,
|
||||
engine: decodedVehicle.engine.confidence === 'none' ? decodedVehicle.engine.sourceValue : null,
|
||||
transmission: decodedVehicle.transmission.confidence === 'none' ? decodedVehicle.transmission.sourceValue : null,
|
||||
});
|
||||
|
||||
const yearValue = decodedVehicle.year.value;
|
||||
@@ -277,9 +277,9 @@ const ReviewContent: React.FC<{
|
||||
});
|
||||
};
|
||||
|
||||
/** Show NHTSA reference when field had no dropdown match */
|
||||
const nhtsaHint = (field: string) => {
|
||||
const ref = nhtsaRefs[field];
|
||||
/** Show source reference when field had no dropdown match */
|
||||
const sourceHint = (field: string) => {
|
||||
const ref = sourceRefs[field];
|
||||
if (!ref) return null;
|
||||
// Only show hint when no value is currently selected
|
||||
const selected: Record<string, string> = {
|
||||
@@ -292,7 +292,7 @@ const ReviewContent: React.FC<{
|
||||
if (selected[field]) return null;
|
||||
return (
|
||||
<p className="mt-1 text-xs text-gray-500 dark:text-titanio">
|
||||
NHTSA returned: {ref}
|
||||
Decoded value: {ref}
|
||||
</p>
|
||||
);
|
||||
};
|
||||
@@ -409,7 +409,7 @@ const ReviewContent: React.FC<{
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
{nhtsaHint('make')}
|
||||
{sourceHint('make')}
|
||||
</div>
|
||||
|
||||
{/* Model */}
|
||||
@@ -439,7 +439,7 @@ const ReviewContent: React.FC<{
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
{nhtsaHint('model')}
|
||||
{sourceHint('model')}
|
||||
</div>
|
||||
|
||||
{/* Trim */}
|
||||
@@ -469,7 +469,7 @@ const ReviewContent: React.FC<{
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
{nhtsaHint('trim')}
|
||||
{sourceHint('trim')}
|
||||
</div>
|
||||
|
||||
{/* Engine */}
|
||||
@@ -499,7 +499,7 @@ const ReviewContent: React.FC<{
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
{nhtsaHint('engine')}
|
||||
{sourceHint('engine')}
|
||||
</div>
|
||||
|
||||
{/* Transmission */}
|
||||
@@ -529,7 +529,7 @@ const ReviewContent: React.FC<{
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
{nhtsaHint('transmission')}
|
||||
{sourceHint('transmission')}
|
||||
</div>
|
||||
</div>
|
||||
</Box>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* @ai-summary Hook to orchestrate VIN OCR extraction and NHTSA decode
|
||||
* @ai-summary Hook to orchestrate VIN OCR extraction and VIN decode
|
||||
* @ai-context Handles camera capture -> OCR extraction -> VIN decode flow
|
||||
*/
|
||||
|
||||
@@ -109,7 +109,7 @@ export function useVinOcr(): UseVinOcrReturn {
|
||||
);
|
||||
}
|
||||
|
||||
// Step 2: Decode VIN using NHTSA
|
||||
// Step 2: Decode VIN
|
||||
setProcessingStep('decoding');
|
||||
let decodedVehicle: DecodedVehicleData | null = null;
|
||||
let decodeError: string | null = null;
|
||||
@@ -121,7 +121,7 @@ export function useVinOcr(): UseVinOcrReturn {
|
||||
if (err.response?.data?.error === 'TIER_REQUIRED') {
|
||||
decodeError = 'VIN decode requires Pro or Enterprise subscription';
|
||||
} else if (err.response?.data?.error === 'INVALID_VIN') {
|
||||
decodeError = 'VIN format is not recognized by NHTSA';
|
||||
decodeError = 'VIN format is not recognized';
|
||||
} else {
|
||||
decodeError = 'Unable to decode vehicle information';
|
||||
}
|
||||
|
||||
@@ -72,12 +72,12 @@ export type MatchConfidence = 'high' | 'medium' | 'none';
|
||||
*/
|
||||
export interface MatchedField<T> {
|
||||
value: T | null;
|
||||
nhtsaValue: string | null;
|
||||
sourceValue: string | null;
|
||||
confidence: MatchConfidence;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decoded vehicle data from NHTSA vPIC API
|
||||
* Decoded vehicle data from VIN decode
|
||||
* with match confidence per field
|
||||
*/
|
||||
export interface DecodedVehicleData {
|
||||
|
||||
@@ -43,7 +43,7 @@ export const SubscriptionSection = () => {
|
||||
</h3>
|
||||
|
||||
<p className="text-titanio/70 leading-relaxed mb-4">
|
||||
<strong className="text-avus">What it does:</strong> Use your device camera to photograph your vehicle's VIN plate, and the system automatically reads the VIN using OCR (Optical Character Recognition) and decodes it from the NHTSA database.
|
||||
<strong className="text-avus">What it does:</strong> Use your device camera to photograph your vehicle's VIN plate, and the system automatically reads the VIN using OCR (Optical Character Recognition) and decodes it from the vehicle database.
|
||||
</p>
|
||||
|
||||
<p className="text-titanio/70 leading-relaxed mb-4">
|
||||
@@ -58,7 +58,7 @@ export const SubscriptionSection = () => {
|
||||
<li>A <strong className="text-avus">VIN OCR Review modal</strong> appears showing the detected VIN with confidence indicators</li>
|
||||
<li>Confirm or correct the VIN, then click <strong className="text-avus">Accept</strong></li>
|
||||
<li>Click the <strong className="text-avus">Decode VIN</strong> button</li>
|
||||
<li>The system queries the NHTSA database and auto-populates: Year, Make, Model, Engine, Transmission, and Trim</li>
|
||||
<li>The system queries the vehicle database and auto-populates: Year, Make, Model, Engine, Transmission, and Trim</li>
|
||||
<li>Review the pre-filled fields and complete the remaining details</li>
|
||||
</ol>
|
||||
|
||||
|
||||
@@ -141,7 +141,7 @@ export const VehiclesSection = () => {
|
||||
<GuideScreenshot
|
||||
src="/guide/vin-decode-desktop.png"
|
||||
alt="VIN Decode feature showing auto-populated vehicle specifications"
|
||||
caption="The VIN Decode feature automatically fills in vehicle details from the NHTSA database"
|
||||
caption="The VIN Decode feature automatically fills in vehicle details from the vehicle database"
|
||||
/>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# ocr/
|
||||
|
||||
Python OCR microservice. Primary engine: PaddleOCR PP-OCRv4 with optional Google Vision cloud fallback. Gemini 2.5 Flash for maintenance manual PDF extraction. Pluggable engine abstraction in `app/engines/`.
|
||||
Python OCR microservice. Primary engine: PaddleOCR PP-OCRv4 with optional Google Vision cloud fallback. Gemini 2.5 Flash for maintenance manual PDF extraction and VIN decode. Pluggable engine abstraction in `app/engines/`.
|
||||
|
||||
## Files
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ Python OCR microservice (FastAPI). Primary engine: PaddleOCR PP-OCRv4 with optio
|
||||
| `models/` | Data models and schemas | Request/response types |
|
||||
| `patterns/` | Regex patterns and service name mapping (27 maintenance subtypes) | Pattern matching rules, service categorization |
|
||||
| `preprocessors/` | Image preprocessing pipeline | Image preparation before OCR |
|
||||
| `routers/` | FastAPI route handlers (/extract, /extract/receipt, /extract/manual, /jobs) | API endpoint changes |
|
||||
| `routers/` | FastAPI route handlers (/extract, /extract/receipt, /extract/manual, /decode, /jobs) | API endpoint changes |
|
||||
| `services/` | Business logic services (job queue with Redis) | Core OCR processing, async job management |
|
||||
| `table_extraction/` | Table detection and parsing | Structured data extraction from images |
|
||||
| `validators/` | Input validation | Validation rules |
|
||||
|
||||
@@ -32,7 +32,7 @@ class Settings:
|
||||
# Vertex AI / Gemini configuration
|
||||
self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "")
|
||||
self.vertex_ai_location: str = os.getenv(
|
||||
"VERTEX_AI_LOCATION", "us-central1"
|
||||
"VERTEX_AI_LOCATION", "global"
|
||||
)
|
||||
self.gemini_model: str = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
OCR engine abstraction layer. Two categories of engines:
|
||||
|
||||
1. **OcrEngine subclasses** (image-to-text): PaddleOCR, Google Vision, Hybrid. Accept image bytes, return text + confidence + word boxes.
|
||||
2. **GeminiEngine** (PDF-to-structured-data): Standalone module for maintenance schedule extraction via Vertex AI. Accepts PDF bytes, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ.
|
||||
2. **GeminiEngine** (PDF-to-structured-data and VIN decode): Standalone module for maintenance schedule extraction and VIN decoding via Vertex AI. Accepts PDF bytes or VIN strings, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ.
|
||||
|
||||
## Files
|
||||
|
||||
@@ -15,7 +15,7 @@ OCR engine abstraction layer. Two categories of engines:
|
||||
| `cloud_engine.py` | Google Vision TEXT_DETECTION fallback engine (WIF authentication) | Cloud OCR configuration, API quota |
|
||||
| `hybrid_engine.py` | Combines primary + fallback engine with confidence threshold switching | Engine selection logic, fallback behavior |
|
||||
| `engine_factory.py` | Factory function and engine registry for instantiation | Adding new engine types |
|
||||
| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction (Vertex AI SDK, 20MB PDF limit, structured JSON output) | Manual extraction debugging, Gemini configuration |
|
||||
| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction and VIN decoding (Vertex AI SDK, 20MB PDF limit, structured JSON output) | Manual extraction debugging, VIN decode, Gemini configuration |
|
||||
|
||||
## Engine Selection
|
||||
|
||||
@@ -30,4 +30,4 @@ create_engine(config)
|
||||
HybridEngine (tries primary, falls back if confidence < threshold)
|
||||
```
|
||||
|
||||
GeminiEngine is created independently by ManualExtractor, not through the engine factory.
|
||||
GeminiEngine is created independently by ManualExtractor and the VIN decode router, not through the engine factory.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Gemini 2.5 Flash engine for maintenance schedule extraction from PDFs.
|
||||
"""Gemini 2.5 Flash engine for document understanding and VIN decode.
|
||||
|
||||
Standalone module (does NOT extend OcrEngine) because Gemini performs
|
||||
semantic document understanding, not traditional OCR word-box extraction.
|
||||
@@ -37,6 +37,31 @@ Do not include one-time procedures, troubleshooting steps, or warranty informati
|
||||
Return the results as a JSON object with a single "maintenanceSchedule" array.\
|
||||
"""
|
||||
|
||||
_VIN_DECODE_PROMPT = """\
|
||||
Given the VIN (Vehicle Identification Number) below, decode it and return the vehicle specifications.
|
||||
|
||||
VIN: {vin}
|
||||
|
||||
Return the vehicle's year, make, model, trim level, body type, drive type, fuel type, engine description, and transmission type. If a field cannot be determined from the VIN, return null for that field. Return a confidence score (0.0-1.0) indicating overall decode reliability.\
|
||||
"""
|
||||
|
||||
_VIN_DECODE_SCHEMA: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"year": {"type": "integer", "nullable": True},
|
||||
"make": {"type": "string", "nullable": True},
|
||||
"model": {"type": "string", "nullable": True},
|
||||
"trimLevel": {"type": "string", "nullable": True},
|
||||
"bodyType": {"type": "string", "nullable": True},
|
||||
"driveType": {"type": "string", "nullable": True},
|
||||
"fuelType": {"type": "string", "nullable": True},
|
||||
"engine": {"type": "string", "nullable": True},
|
||||
"transmission": {"type": "string", "nullable": True},
|
||||
"confidence": {"type": "number"},
|
||||
},
|
||||
"required": ["confidence"],
|
||||
}
|
||||
|
||||
_RESPONSE_SCHEMA: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -70,6 +95,22 @@ class GeminiProcessingError(GeminiEngineError):
|
||||
"""Raised when Gemini fails to process a document."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class VinDecodeResult:
|
||||
"""Result from Gemini VIN decode."""
|
||||
|
||||
year: int | None = None
|
||||
make: str | None = None
|
||||
model: str | None = None
|
||||
trim_level: str | None = None
|
||||
body_type: str | None = None
|
||||
drive_type: str | None = None
|
||||
fuel_type: str | None = None
|
||||
engine: str | None = None
|
||||
transmission: str | None = None
|
||||
confidence: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class MaintenanceItem:
|
||||
"""A single extracted maintenance schedule item."""
|
||||
@@ -89,13 +130,13 @@ class MaintenanceExtractionResult:
|
||||
|
||||
|
||||
class GeminiEngine:
|
||||
"""Gemini 2.5 Flash wrapper for maintenance schedule extraction.
|
||||
"""Gemini 2.5 Flash wrapper for maintenance schedule extraction and VIN decode.
|
||||
|
||||
Standalone class (not an OcrEngine subclass) because Gemini performs
|
||||
semantic document understanding rather than traditional OCR.
|
||||
|
||||
Uses lazy initialization: the Vertex AI client is not created until
|
||||
the first ``extract_maintenance()`` call.
|
||||
the first call to ``extract_maintenance()`` or ``decode_vin()``.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
@@ -228,3 +269,60 @@ class GeminiEngine:
|
||||
raise GeminiProcessingError(
|
||||
f"Gemini maintenance extraction failed: {exc}"
|
||||
) from exc
|
||||
|
||||
def decode_vin(self, vin: str) -> VinDecodeResult:
|
||||
"""Decode a VIN string into structured vehicle data via Gemini.
|
||||
|
||||
Args:
|
||||
vin: A 17-character Vehicle Identification Number.
|
||||
|
||||
Returns:
|
||||
Structured vehicle specification result.
|
||||
|
||||
Raises:
|
||||
GeminiProcessingError: If Gemini fails to decode the VIN.
|
||||
GeminiUnavailableError: If the engine cannot be initialized.
|
||||
"""
|
||||
model = self._get_model()
|
||||
|
||||
try:
|
||||
from vertexai.generative_models import GenerationConfig # type: ignore[import-untyped]
|
||||
|
||||
vin_config = GenerationConfig(
|
||||
response_mime_type="application/json",
|
||||
response_schema=_VIN_DECODE_SCHEMA,
|
||||
)
|
||||
|
||||
prompt = _VIN_DECODE_PROMPT.format(vin=vin)
|
||||
response = model.generate_content(
|
||||
[prompt],
|
||||
generation_config=vin_config,
|
||||
)
|
||||
|
||||
raw = json.loads(response.text)
|
||||
|
||||
logger.info("Gemini decoded VIN %s (confidence=%.2f)", vin, raw.get("confidence", 0))
|
||||
|
||||
return VinDecodeResult(
|
||||
year=raw.get("year"),
|
||||
make=raw.get("make"),
|
||||
model=raw.get("model"),
|
||||
trim_level=raw.get("trimLevel"),
|
||||
body_type=raw.get("bodyType"),
|
||||
drive_type=raw.get("driveType"),
|
||||
fuel_type=raw.get("fuelType"),
|
||||
engine=raw.get("engine"),
|
||||
transmission=raw.get("transmission"),
|
||||
confidence=raw.get("confidence", 0.0),
|
||||
)
|
||||
|
||||
except (GeminiEngineError,):
|
||||
raise
|
||||
except json.JSONDecodeError as exc:
|
||||
raise GeminiProcessingError(
|
||||
f"Gemini returned invalid JSON for VIN decode: {exc}"
|
||||
) from exc
|
||||
except Exception as exc:
|
||||
raise GeminiProcessingError(
|
||||
f"Gemini VIN decode failed: {exc}"
|
||||
) from exc
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import AsyncIterator
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app.config import settings
|
||||
from app.routers import extract_router, jobs_router
|
||||
from app.routers import decode_router, extract_router, jobs_router
|
||||
from app.services import job_queue
|
||||
|
||||
# Configure logging
|
||||
@@ -36,6 +36,7 @@ app = FastAPI(
|
||||
)
|
||||
|
||||
# Include routers
|
||||
app.include_router(decode_router)
|
||||
app.include_router(extract_router)
|
||||
app.include_router(jobs_router)
|
||||
|
||||
@@ -54,6 +55,7 @@ async def root() -> dict:
|
||||
"version": "1.0.0",
|
||||
"log_level": settings.log_level,
|
||||
"endpoints": [
|
||||
"POST /decode/vin - VIN string decode via Gemini",
|
||||
"POST /extract - Synchronous OCR extraction",
|
||||
"POST /extract/vin - VIN-specific extraction with validation",
|
||||
"POST /extract/receipt - Receipt extraction (fuel, general)",
|
||||
|
||||
@@ -14,6 +14,8 @@ from .schemas import (
|
||||
ReceiptExtractedField,
|
||||
ReceiptExtractionResponse,
|
||||
VinAlternative,
|
||||
VinDecodeRequest,
|
||||
VinDecodeResponse,
|
||||
VinExtractionResponse,
|
||||
)
|
||||
|
||||
@@ -32,5 +34,7 @@ __all__ = [
|
||||
"ReceiptExtractedField",
|
||||
"ReceiptExtractionResponse",
|
||||
"VinAlternative",
|
||||
"VinDecodeRequest",
|
||||
"VinDecodeResponse",
|
||||
"VinExtractionResponse",
|
||||
]
|
||||
|
||||
@@ -169,3 +169,30 @@ class ManualJobResponse(BaseModel):
|
||||
error: Optional[str] = None
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
class VinDecodeRequest(BaseModel):
|
||||
"""Request body for VIN decode endpoint."""
|
||||
|
||||
vin: str
|
||||
|
||||
|
||||
class VinDecodeResponse(BaseModel):
|
||||
"""Response from VIN decode endpoint."""
|
||||
|
||||
success: bool
|
||||
vin: str
|
||||
year: Optional[int] = None
|
||||
make: Optional[str] = None
|
||||
model: Optional[str] = None
|
||||
trim_level: Optional[str] = Field(default=None, alias="trimLevel")
|
||||
body_type: Optional[str] = Field(default=None, alias="bodyType")
|
||||
drive_type: Optional[str] = Field(default=None, alias="driveType")
|
||||
fuel_type: Optional[str] = Field(default=None, alias="fuelType")
|
||||
engine: Optional[str] = None
|
||||
transmission: Optional[str] = None
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
processing_time_ms: int = Field(alias="processingTimeMs")
|
||||
error: Optional[str] = None
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""OCR API routers."""
|
||||
from .decode import router as decode_router
|
||||
from .extract import router as extract_router
|
||||
from .jobs import router as jobs_router
|
||||
|
||||
__all__ = ["extract_router", "jobs_router"]
|
||||
__all__ = ["decode_router", "extract_router", "jobs_router"]
|
||||
|
||||
67
ocr/app/routers/decode.py
Normal file
67
ocr/app/routers/decode.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""VIN decode router - Gemini-powered VIN string decoding."""
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from app.engines.gemini_engine import (
|
||||
GeminiEngine,
|
||||
GeminiProcessingError,
|
||||
GeminiUnavailableError,
|
||||
)
|
||||
from app.models import VinDecodeRequest, VinDecodeResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/decode", tags=["decode"])
|
||||
|
||||
_VIN_REGEX = re.compile(r"^[A-HJ-NPR-Z0-9]{17}$")
|
||||
|
||||
# Shared engine instance (lazy init on first request)
|
||||
_gemini_engine = GeminiEngine()
|
||||
|
||||
|
||||
@router.post("/vin", response_model=VinDecodeResponse)
|
||||
async def decode_vin(request: VinDecodeRequest) -> VinDecodeResponse:
|
||||
"""Decode a VIN string into structured vehicle data using Gemini.
|
||||
|
||||
Accepts a 17-character VIN and returns year, make, model, trim, etc.
|
||||
"""
|
||||
vin = request.vin.upper().strip()
|
||||
|
||||
if not _VIN_REGEX.match(vin):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid VIN format: must be 17 alphanumeric characters (excluding I, O, Q). Got: {vin}",
|
||||
)
|
||||
|
||||
start_ms = time.monotonic_ns() // 1_000_000
|
||||
|
||||
try:
|
||||
result = _gemini_engine.decode_vin(vin)
|
||||
except GeminiUnavailableError as exc:
|
||||
logger.error("Gemini unavailable for VIN decode: %s", exc)
|
||||
raise HTTPException(status_code=503, detail=str(exc)) from exc
|
||||
except GeminiProcessingError as exc:
|
||||
logger.error("Gemini processing error for VIN %s: %s", vin, exc)
|
||||
raise HTTPException(status_code=422, detail=str(exc)) from exc
|
||||
|
||||
elapsed_ms = (time.monotonic_ns() // 1_000_000) - start_ms
|
||||
|
||||
return VinDecodeResponse(
|
||||
success=True,
|
||||
vin=vin,
|
||||
year=result.year,
|
||||
make=result.make,
|
||||
model=result.model,
|
||||
trimLevel=result.trim_level,
|
||||
bodyType=result.body_type,
|
||||
driveType=result.drive_type,
|
||||
fuelType=result.fuel_type,
|
||||
engine=result.engine,
|
||||
transmission=result.transmission,
|
||||
confidence=result.confidence,
|
||||
processingTimeMs=elapsed_ms,
|
||||
error=None,
|
||||
)
|
||||
199
ocr/tests/test_vin_decode.py
Normal file
199
ocr/tests/test_vin_decode.py
Normal file
@@ -0,0 +1,199 @@
|
||||
"""Tests for the VIN decode endpoint (POST /decode/vin).
|
||||
|
||||
Covers: valid VIN returns 200 with correct response shape,
|
||||
invalid VIN format returns 400, Gemini unavailable returns 503,
|
||||
and Gemini processing error returns 422.
|
||||
All GeminiEngine calls are mocked.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.engines.gemini_engine import (
|
||||
GeminiProcessingError,
|
||||
GeminiUnavailableError,
|
||||
VinDecodeResult,
|
||||
)
|
||||
from app.main import app
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
# A valid 17-character VIN (no I, O, Q)
|
||||
_VALID_VIN = "1HGBH41JXMN109186"
|
||||
|
||||
_FULL_RESULT = VinDecodeResult(
|
||||
year=2021,
|
||||
make="Honda",
|
||||
model="Civic",
|
||||
trim_level="EX",
|
||||
body_type="Sedan",
|
||||
drive_type="FWD",
|
||||
fuel_type="Gasoline",
|
||||
engine="2.0L I4",
|
||||
transmission="CVT",
|
||||
confidence=0.95,
|
||||
)
|
||||
|
||||
|
||||
# --- Valid VIN ---
|
||||
|
||||
|
||||
class TestDecodeVinSuccess:
|
||||
"""Verify successful VIN decode returns 200 with correct response shape."""
|
||||
|
||||
@patch("app.routers.decode._gemini_engine")
|
||||
def test_valid_vin_returns_200(self, mock_engine):
|
||||
"""Normal: Valid VIN returns 200 with all vehicle fields populated."""
|
||||
mock_engine.decode_vin.return_value = _FULL_RESULT
|
||||
|
||||
response = client.post("/decode/vin", json={"vin": _VALID_VIN})
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["success"] is True
|
||||
assert data["vin"] == _VALID_VIN
|
||||
assert data["year"] == 2021
|
||||
assert data["make"] == "Honda"
|
||||
assert data["model"] == "Civic"
|
||||
assert data["trimLevel"] == "EX"
|
||||
assert data["bodyType"] == "Sedan"
|
||||
assert data["driveType"] == "FWD"
|
||||
assert data["fuelType"] == "Gasoline"
|
||||
assert data["engine"] == "2.0L I4"
|
||||
assert data["transmission"] == "CVT"
|
||||
assert data["confidence"] == 0.95
|
||||
assert "processingTimeMs" in data
|
||||
assert data["error"] is None
|
||||
|
||||
@patch("app.routers.decode._gemini_engine")
|
||||
def test_vin_uppercased_before_decode(self, mock_engine):
|
||||
"""VIN submitted in lowercase is normalised to uppercase before decoding."""
|
||||
mock_engine.decode_vin.return_value = _FULL_RESULT
|
||||
|
||||
response = client.post("/decode/vin", json={"vin": _VALID_VIN.lower()})
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["vin"] == _VALID_VIN
|
||||
mock_engine.decode_vin.assert_called_once_with(_VALID_VIN)
|
||||
|
||||
@patch("app.routers.decode._gemini_engine")
|
||||
def test_nullable_fields_allowed(self, mock_engine):
|
||||
"""Edge: VIN decode with only confidence set returns valid response."""
|
||||
mock_engine.decode_vin.return_value = VinDecodeResult(confidence=0.3)
|
||||
|
||||
response = client.post("/decode/vin", json={"vin": _VALID_VIN})
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["success"] is True
|
||||
assert data["year"] is None
|
||||
assert data["make"] is None
|
||||
assert data["confidence"] == 0.3
|
||||
|
||||
|
||||
# --- Invalid VIN format ---
|
||||
|
||||
|
||||
class TestDecodeVinValidation:
|
||||
"""Verify invalid VIN formats return 400."""
|
||||
|
||||
def test_too_short_vin_returns_400(self):
|
||||
"""VIN shorter than 17 characters is rejected."""
|
||||
response = client.post("/decode/vin", json={"vin": "1HGBH41JXM"})
|
||||
|
||||
assert response.status_code == 400
|
||||
assert "Invalid VIN format" in response.json()["detail"]
|
||||
|
||||
def test_too_long_vin_returns_400(self):
|
||||
"""VIN longer than 17 characters is rejected."""
|
||||
response = client.post("/decode/vin", json={"vin": "1HGBH41JXMN109186X"})
|
||||
|
||||
assert response.status_code == 400
|
||||
|
||||
def test_vin_with_letter_i_returns_400(self):
|
||||
"""VIN containing the letter I (invalid character) is rejected."""
|
||||
# Replace position 0 with I to create invalid VIN
|
||||
invalid_vin = "IHGBH41JXMN109186"
|
||||
response = client.post("/decode/vin", json={"vin": invalid_vin})
|
||||
|
||||
assert response.status_code == 400
|
||||
assert "Invalid VIN format" in response.json()["detail"]
|
||||
|
||||
def test_vin_with_letter_o_returns_400(self):
|
||||
"""VIN containing the letter O (invalid character) is rejected."""
|
||||
invalid_vin = "OHGBH41JXMN109186"
|
||||
response = client.post("/decode/vin", json={"vin": invalid_vin})
|
||||
|
||||
assert response.status_code == 400
|
||||
|
||||
def test_vin_with_letter_q_returns_400(self):
|
||||
"""VIN containing the letter Q (invalid character) is rejected."""
|
||||
invalid_vin = "QHGBH41JXMN109186"
|
||||
response = client.post("/decode/vin", json={"vin": invalid_vin})
|
||||
|
||||
assert response.status_code == 400
|
||||
|
||||
def test_empty_vin_returns_400(self):
|
||||
"""Empty VIN string is rejected."""
|
||||
response = client.post("/decode/vin", json={"vin": ""})
|
||||
|
||||
assert response.status_code == 400
|
||||
|
||||
def test_vin_with_special_chars_returns_400(self):
|
||||
"""VIN containing special characters is rejected."""
|
||||
response = client.post("/decode/vin", json={"vin": "1HGBH41J-MN109186"})
|
||||
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
# --- Gemini unavailable ---
|
||||
|
||||
|
||||
class TestDecodeVinGeminiUnavailable:
|
||||
"""Verify Gemini service unavailability returns 503."""
|
||||
|
||||
@patch("app.routers.decode._gemini_engine")
|
||||
def test_gemini_unavailable_returns_503(self, mock_engine):
|
||||
"""When Gemini cannot be initialized, endpoint returns 503."""
|
||||
mock_engine.decode_vin.side_effect = GeminiUnavailableError(
|
||||
"Google credential config not found"
|
||||
)
|
||||
|
||||
response = client.post("/decode/vin", json={"vin": _VALID_VIN})
|
||||
|
||||
assert response.status_code == 503
|
||||
assert "Google credential config not found" in response.json()["detail"]
|
||||
|
||||
|
||||
# --- Gemini processing error ---
|
||||
|
||||
|
||||
class TestDecodeVinGeminiProcessingError:
|
||||
"""Verify Gemini processing failures return 422."""
|
||||
|
||||
@patch("app.routers.decode._gemini_engine")
|
||||
def test_gemini_processing_error_returns_422(self, mock_engine):
|
||||
"""When Gemini returns invalid output, endpoint returns 422."""
|
||||
mock_engine.decode_vin.side_effect = GeminiProcessingError(
|
||||
"Gemini returned invalid JSON for VIN decode: ..."
|
||||
)
|
||||
|
||||
response = client.post("/decode/vin", json={"vin": _VALID_VIN})
|
||||
|
||||
assert response.status_code == 422
|
||||
assert "Gemini returned invalid JSON" in response.json()["detail"]
|
||||
|
||||
@patch("app.routers.decode._gemini_engine")
|
||||
def test_gemini_api_failure_returns_422(self, mock_engine):
|
||||
"""When Gemini API call fails at runtime, endpoint returns 422."""
|
||||
mock_engine.decode_vin.side_effect = GeminiProcessingError(
|
||||
"Gemini VIN decode failed: API quota exceeded"
|
||||
)
|
||||
|
||||
response = client.post("/decode/vin", json={"vin": _VALID_VIN})
|
||||
|
||||
assert response.status_code == 422
|
||||
assert "Gemini VIN decode failed" in response.json()["detail"]
|
||||
Reference in New Issue
Block a user