Merge pull request 'feat: Expand OCR with fuel receipt scanning and maintenance extraction (#129)' (#147) from issue-129-expand-ocr-fuel-receipt-maintenance into main
All checks were successful
Deploy to Staging / Build Images (push) Successful in 36s
Deploy to Staging / Deploy to Staging (push) Successful in 51s
Deploy to Staging / Verify Staging (push) Successful in 8s
Deploy to Staging / Notify Staging Ready (push) Successful in 7s
Deploy to Staging / Notify Staging Failure (push) Has been skipped

Reviewed-on: #147
This commit was merged in pull request #147.
This commit is contained in:
2026-02-13 02:25:54 +00:00
56 changed files with 4617 additions and 1288 deletions

View File

@@ -0,0 +1,23 @@
{
"testModules": [
{
"moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/maintenance/components/MaintenanceScheduleReviewScreen.test.tsx",
"tests": [
{
"name": "Module failed to load (Error)",
"fullName": "Module failed to load (Error)",
"state": "failed",
"errors": [
{
"message": "File not found: tsconfig.json (resolved as: /Users/egullickson/Documents/Technology/coding/motovaultpro/tsconfig.json)",
"name": "Error",
"stack": "Error: File not found: tsconfig.json (resolved as: /Users/egullickson/Documents/Technology/coding/motovaultpro/tsconfig.json)\n at ConfigSet.resolvePath (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/ts-jest/dist/legacy/config/config-set.js:616:19)\n at ConfigSet._setupConfigSet (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/ts-jest/dist/legacy/config/config-set.js:322:71)\n at new ConfigSet (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/ts-jest/dist/legacy/config/config-set.js:206:14)\n at TsJestTransformer._createConfigSet (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/ts-jest/dist/legacy/ts-jest-transformer.js:119:16)\n at TsJestTransformer._configsFor (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/ts-jest/dist/legacy/ts-jest-transformer.js:98:34)\n at TsJestTransformer.getCacheKey (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/ts-jest/dist/legacy/ts-jest-transformer.js:249:30)\n at ScriptTransformer._getCacheKey (/Users/egullickson/Documents/Technology/coding/motovaultpro/node_modules/@jest/transform/build/index.js:195:41)\n at ScriptTransformer._getFileCachePath (/Users/egullickson/Documents/Technology/coding/motovaultpro/node_modules/@jest/transform/build/index.js:231:27)\n at ScriptTransformer.transformSource (/Users/egullickson/Documents/Technology/coding/motovaultpro/node_modules/@jest/transform/build/index.js:402:32)\n at ScriptTransformer._transformAndBuildScript (/Users/egullickson/Documents/Technology/coding/motovaultpro/node_modules/@jest/transform/build/index.js:519:40)\n at ScriptTransformer.transform (/Users/egullickson/Documents/Technology/coding/motovaultpro/node_modules/@jest/transform/build/index.js:558:19)\n at Runtime.transformFile (/Users/egullickson/Documents/Technology/coding/motovaultpro/node_modules/jest-runtime/build/index.js:1290:53)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/node_modules/jest-runtime/build/index.js:1243:34)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/node_modules/jest-runtime/build/index.js:944:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/node_modules/jest-runtime/build/index.js:832:12)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/node_modules/jest-circus/build/runner.js:84:33)\n at processTicksAndRejections (node:internal/process/task_queues:104:5)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/node_modules/jest-runner/build/index.js:275:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/node_modules/jest-runner/build/index.js:343:7)"
}
]
}
]
}
],
"unhandledErrors": [],
"reason": "failed"
}

View File

@@ -11,10 +11,10 @@
| Directory | What | When to read | | Directory | What | When to read |
| --------- | ---- | ------------ | | --------- | ---- | ------------ |
| `auth/` | Authentication utilities | JWT handling, user context | | `auth/` | Authentication utilities | JWT handling, user context |
| `config/` | Configuration loading (env, database, redis) | Environment setup, connection pools | | `config/` | Configuration loading (env, database, redis) and feature tier gating (fuelLog.receiptScan, document.scanMaintenanceSchedule, vehicle.vinDecode) | Environment setup, connection pools, tier requirements |
| `logging/` | Winston structured logging | Log configuration, debugging | | `logging/` | Winston structured logging | Log configuration, debugging |
| `middleware/` | Fastify middleware | Request processing, user extraction | | `middleware/` | Fastify middleware | Request processing, user extraction |
| `plugins/` | Fastify plugins (auth, error, logging) | Plugin registration, hooks | | `plugins/` | Fastify plugins (auth, error, logging, tier guard) | Plugin registration, hooks, tier gating |
| `scheduler/` | Job scheduling infrastructure | Scheduled tasks, cron jobs | | `scheduler/` | Job scheduling infrastructure | Scheduled tasks, cron jobs |
| `storage/` | Storage abstraction and adapters | File storage, S3/filesystem | | `storage/` | Storage abstraction and adapters | File storage, S3/filesystem |
| `user-preferences/` | User preferences data and migrations | User settings storage | | `user-preferences/` | User preferences data and migrations | User settings storage |

View File

@@ -31,6 +31,11 @@ export const FEATURE_TIERS: Record<string, FeatureConfig> = {
name: 'VIN Decode', name: 'VIN Decode',
upgradePrompt: 'Upgrade to Pro to automatically decode VIN and populate vehicle details from the NHTSA database.', upgradePrompt: 'Upgrade to Pro to automatically decode VIN and populate vehicle details from the NHTSA database.',
}, },
'fuelLog.receiptScan': {
minTier: 'pro',
name: 'Receipt Scan',
upgradePrompt: 'Upgrade to Pro to scan fuel receipts and auto-fill your fuel log entries.',
},
} as const; } as const;
/** /**

View File

@@ -34,6 +34,30 @@ describe('feature-tiers', () => {
expect(feature.name).toBe('Scan for Maintenance Schedule'); expect(feature.name).toBe('Scan for Maintenance Schedule');
expect(feature.upgradePrompt).toBeTruthy(); expect(feature.upgradePrompt).toBeTruthy();
}); });
it('includes fuelLog.receiptScan feature', () => {
const feature = FEATURE_TIERS['fuelLog.receiptScan'];
expect(feature).toBeDefined();
expect(feature.minTier).toBe('pro');
expect(feature.name).toBe('Receipt Scan');
expect(feature.upgradePrompt).toBeTruthy();
});
});
describe('canAccessFeature - fuelLog.receiptScan', () => {
const featureKey = 'fuelLog.receiptScan';
it('denies access for free tier user', () => {
expect(canAccessFeature('free', featureKey)).toBe(false);
});
it('allows access for pro tier user', () => {
expect(canAccessFeature('pro', featureKey)).toBe(true);
});
it('allows access for enterprise tier user (inherits pro)', () => {
expect(canAccessFeature('enterprise', featureKey)).toBe(true);
});
}); });
describe('getTierLevel', () => { describe('getTierLevel', () => {

View File

@@ -0,0 +1,191 @@
import { FastifyRequest, FastifyReply } from 'fastify';
import { requireTier } from './require-tier';
// Mock logger to suppress output during tests
jest.mock('../logging/logger', () => ({
logger: {
error: jest.fn(),
warn: jest.fn(),
debug: jest.fn(),
info: jest.fn(),
},
}));
const createRequest = (subscriptionTier?: string): Partial<FastifyRequest> => {
if (subscriptionTier === undefined) {
return { userContext: undefined };
}
return {
userContext: {
userId: 'auth0|user123456789',
email: 'user@example.com',
emailVerified: true,
onboardingCompleted: true,
isAdmin: false,
subscriptionTier: subscriptionTier as any,
},
};
};
const createReply = (): Partial<FastifyReply> & { statusCode?: number; payload?: unknown } => {
const reply: any = {
sent: false,
code: jest.fn(function (this: any, status: number) {
this.statusCode = status;
return this;
}),
send: jest.fn(function (this: any, payload: unknown) {
this.payload = payload;
this.sent = true;
return this;
}),
};
return reply;
};
describe('requireTier middleware', () => {
afterEach(() => {
jest.clearAllMocks();
});
describe('pro user passes fuelLog.receiptScan check', () => {
it('allows pro user through without sending a response', async () => {
const handler = requireTier('fuelLog.receiptScan');
const request = createRequest('pro');
const reply = createReply();
await handler(request as FastifyRequest, reply as FastifyReply);
expect(reply.code).not.toHaveBeenCalled();
expect(reply.send).not.toHaveBeenCalled();
});
});
describe('enterprise user passes all checks (tier inheritance)', () => {
it('allows enterprise user access to pro-gated features', async () => {
const handler = requireTier('fuelLog.receiptScan');
const request = createRequest('enterprise');
const reply = createReply();
await handler(request as FastifyRequest, reply as FastifyReply);
expect(reply.code).not.toHaveBeenCalled();
expect(reply.send).not.toHaveBeenCalled();
});
it('allows enterprise user access to document.scanMaintenanceSchedule', async () => {
const handler = requireTier('document.scanMaintenanceSchedule');
const request = createRequest('enterprise');
const reply = createReply();
await handler(request as FastifyRequest, reply as FastifyReply);
expect(reply.code).not.toHaveBeenCalled();
expect(reply.send).not.toHaveBeenCalled();
});
it('allows enterprise user access to vehicle.vinDecode', async () => {
const handler = requireTier('vehicle.vinDecode');
const request = createRequest('enterprise');
const reply = createReply();
await handler(request as FastifyRequest, reply as FastifyReply);
expect(reply.code).not.toHaveBeenCalled();
expect(reply.send).not.toHaveBeenCalled();
});
});
describe('free user blocked with 403 and correct response body', () => {
it('blocks free user from fuelLog.receiptScan', async () => {
const handler = requireTier('fuelLog.receiptScan');
const request = createRequest('free');
const reply = createReply();
await handler(request as FastifyRequest, reply as FastifyReply);
expect(reply.code).toHaveBeenCalledWith(403);
expect(reply.send).toHaveBeenCalledWith(
expect.objectContaining({
error: 'TIER_REQUIRED',
requiredTier: 'pro',
currentTier: 'free',
featureName: 'Receipt Scan',
upgradePrompt: expect.any(String),
}),
);
});
it('blocks free user from document.scanMaintenanceSchedule', async () => {
const handler = requireTier('document.scanMaintenanceSchedule');
const request = createRequest('free');
const reply = createReply();
await handler(request as FastifyRequest, reply as FastifyReply);
expect(reply.code).toHaveBeenCalledWith(403);
expect(reply.send).toHaveBeenCalledWith(
expect.objectContaining({
error: 'TIER_REQUIRED',
requiredTier: 'pro',
currentTier: 'free',
featureName: 'Scan for Maintenance Schedule',
upgradePrompt: expect.any(String),
}),
);
});
it('response body includes all required fields', async () => {
const handler = requireTier('fuelLog.receiptScan');
const request = createRequest('free');
const reply = createReply();
await handler(request as FastifyRequest, reply as FastifyReply);
const body = (reply.send as jest.Mock).mock.calls[0][0];
expect(body).toHaveProperty('requiredTier', 'pro');
expect(body).toHaveProperty('currentTier', 'free');
expect(body).toHaveProperty('featureName', 'Receipt Scan');
expect(body).toHaveProperty('upgradePrompt');
expect(typeof body.upgradePrompt).toBe('string');
expect(body.upgradePrompt.length).toBeGreaterThan(0);
});
});
describe('unknown feature key returns 500', () => {
it('returns 500 INTERNAL_ERROR for unregistered feature', async () => {
const handler = requireTier('unknown.nonexistent.feature');
const request = createRequest('pro');
const reply = createReply();
await handler(request as FastifyRequest, reply as FastifyReply);
expect(reply.code).toHaveBeenCalledWith(500);
expect(reply.send).toHaveBeenCalledWith(
expect.objectContaining({
error: 'INTERNAL_ERROR',
message: 'Unknown feature configuration',
}),
);
});
});
describe('missing user.tier on request returns 403', () => {
it('defaults to free tier when userContext is undefined', async () => {
const handler = requireTier('fuelLog.receiptScan');
const request = createRequest(); // no tier = undefined userContext
const reply = createReply();
await handler(request as FastifyRequest, reply as FastifyReply);
expect(reply.code).toHaveBeenCalledWith(403);
expect(reply.send).toHaveBeenCalledWith(
expect.objectContaining({
error: 'TIER_REQUIRED',
currentTier: 'free',
requiredTier: 'pro',
}),
);
});
});
});

View File

@@ -0,0 +1,64 @@
/**
* @ai-summary Standalone tier guard middleware for route-level feature gating
* @ai-context Returns a Fastify preHandler that checks user subscription tier against feature requirements.
* Must be composed AFTER requireAuth in preHandler arrays.
*/
import { FastifyRequest, FastifyReply } from 'fastify';
import { canAccessFeature, getFeatureConfig } from '../config/feature-tiers';
import { logger } from '../logging/logger';
/**
* Creates a preHandler middleware that enforces subscription tier requirements.
*
* Reads the user's tier from request.userContext.subscriptionTier (set by auth middleware).
* Must be placed AFTER requireAuth in the preHandler chain.
*
* Usage:
* fastify.post('/premium-route', {
* preHandler: [requireAuth, requireTier('fuelLog.receiptScan')],
* handler: controller.method
* });
*
* @param featureKey - Key from FEATURE_TIERS registry (e.g. 'fuelLog.receiptScan')
* @returns Fastify preHandler function
*/
export function requireTier(featureKey: string) {
return async (request: FastifyRequest, reply: FastifyReply): Promise<void> => {
// Validate feature key exists in registry
const featureConfig = getFeatureConfig(featureKey);
if (!featureConfig) {
logger.error('requireTier: unknown feature key', { featureKey });
return reply.code(500).send({
error: 'INTERNAL_ERROR',
message: 'Unknown feature configuration',
});
}
// Get user tier from userContext (populated by auth middleware)
const currentTier = request.userContext?.subscriptionTier || 'free';
if (!canAccessFeature(currentTier, featureKey)) {
logger.warn('requireTier: access denied', {
userId: request.userContext?.userId?.substring(0, 8) + '...',
currentTier,
requiredTier: featureConfig.minTier,
featureKey,
});
return reply.code(403).send({
error: 'TIER_REQUIRED',
requiredTier: featureConfig.minTier,
currentTier,
featureName: featureConfig.name,
upgradePrompt: featureConfig.upgradePrompt,
});
}
logger.debug('requireTier: access granted', {
userId: request.userContext?.userId?.substring(0, 8) + '...',
currentTier,
featureKey,
});
};
}

View File

@@ -12,7 +12,7 @@
| `fuel-logs/` | Fuel consumption tracking | Fuel log CRUD, statistics | | `fuel-logs/` | Fuel consumption tracking | Fuel log CRUD, statistics |
| `maintenance/` | Maintenance record management | Service records, reminders | | `maintenance/` | Maintenance record management | Service records, reminders |
| `notifications/` | Email and push notifications | Alert system, email templates | | `notifications/` | Email and push notifications | Alert system, email templates |
| `ocr/` | OCR proxy to mvp-ocr service | Image text extraction, async jobs | | `ocr/` | OCR proxy to mvp-ocr service (VIN, receipt, manual extraction) | Image text extraction, receipt scanning, manual PDF extraction, async jobs |
| `onboarding/` | User onboarding flow | First-time user setup | | `onboarding/` | User onboarding flow | First-time user setup |
| `ownership-costs/` | Ownership cost tracking and reports | Cost aggregation, expense analysis | | `ownership-costs/` | Ownership cost tracking and reports | Cost aggregation, expense analysis |
| `platform/` | Vehicle data and VIN decoding | Make/model lookup, VIN validation | | `platform/` | Vehicle data and VIN decoding | Make/model lookup, VIN validation |

View File

@@ -1,16 +1,47 @@
# ocr/ # ocr/
Backend proxy for the Python OCR microservice. Handles authentication, tier gating, file validation, and request forwarding for VIN extraction, fuel receipt scanning, and maintenance manual extraction.
## Files ## Files
| File | What | When to read | | File | What | When to read |
| ---- | ---- | ------------ | | ---- | ---- | ------------ |
| `README.md` | Feature documentation | Understanding OCR proxy | | `README.md` | Feature documentation with architecture diagrams | Understanding OCR proxy, data flows |
| `index.ts` | Feature barrel export | Importing OCR services | | `index.ts` | Feature barrel export | Importing OCR services |
## Subdirectories ## Subdirectories
| Directory | What | When to read | | Directory | What | When to read |
| --------- | ---- | ------------ | | --------- | ---- | ------------ |
| `api/` | HTTP endpoints and routes | API changes | | `api/` | HTTP endpoints, routes, request validation | API changes, adding endpoints |
| `domain/` | Business logic, types | Core OCR proxy logic | | `domain/` | Business logic, TypeScript types | Core OCR proxy logic, type definitions |
| `external/` | External OCR service client | OCR service integration | | `external/` | HTTP client to Python OCR service | OCR service integration, error handling |
| `tests/` | Unit tests for receipt and manual extraction | Test changes, adding test coverage |
## api/
| File | What | When to read |
| ---- | ---- | ------------ |
| `ocr.controller.ts` | Request handlers for all OCR endpoints (extract, extractVin, extractReceipt, extractManual, submitJob, getJobStatus) | Adding/modifying endpoint behavior |
| `ocr.routes.ts` | Fastify route registration with auth and tier guard preHandlers | Route configuration, middleware changes |
| `ocr.validation.ts` | Request/response type definitions for route schemas | Changing request/response shapes |
## domain/
| File | What | When to read |
| ---- | ---- | ------------ |
| `ocr.service.ts` | Business logic layer: file validation, size limits (10MB sync, 200MB async), content type checks, service delegation | Core logic changes, validation rules |
| `ocr.types.ts` | TypeScript types: OcrResponse, VinExtractionResponse, ReceiptExtractionResponse, ManualExtractionResult, JobResponse, ManualJobResponse | Type changes, adding new response shapes |
## external/
| File | What | When to read |
| ---- | ---- | ------------ |
| `ocr-client.ts` | HTTP client to mvp-ocr Python service (extract, extractVin, extractReceipt, submitJob, submitManualJob, getJobStatus, isHealthy) | OCR service communication, error handling |
## tests/
| File | What | When to read |
| ---- | ---- | ------------ |
| `unit/ocr-receipt.test.ts` | Receipt extraction tests with mock client | Receipt flow changes |
| `unit/ocr-manual.test.ts` | Manual PDF extraction tests | Manual extraction flow changes |

View File

@@ -1,54 +1,180 @@
# OCR Feature # OCR Feature
Backend proxy for OCR service communication. Handles authentication, validation, and file streaming to the OCR container. Backend proxy for the Python OCR microservice. Handles authentication, tier gating, file validation, and request forwarding for three extraction types: VIN decoding, fuel receipt scanning, and maintenance manual extraction.
## API Endpoints ## API Endpoints
| Method | Endpoint | Description | | Method | Endpoint | Description | Auth | Tier | Max Size |
|--------|----------|-------------| |--------|----------|-------------|------|------|----------|
| POST | `/api/ocr/extract` | Synchronous OCR extraction (max 10MB) | | POST | `/api/ocr/extract` | Synchronous general OCR extraction | Required | - | 10MB |
| POST | `/api/ocr/jobs` | Submit async OCR job (max 200MB) | | POST | `/api/ocr/extract/vin` | VIN-specific extraction | Required | - | 10MB |
| GET | `/api/ocr/jobs/:jobId` | Poll async job status | | POST | `/api/ocr/extract/receipt` | Fuel receipt extraction | Required | Pro | 10MB |
| POST | `/api/ocr/extract/manual` | Async maintenance manual extraction | Required | Pro | 200MB |
| POST | `/api/ocr/jobs` | Submit async OCR job | Required | - | 200MB |
| GET | `/api/ocr/jobs/:jobId` | Poll async job status | Required | - | - |
## Architecture ## Architecture
``` ```
api/ Frontend
ocr.controller.ts # Request handlers |
ocr.routes.ts # Route registration v
ocr.validation.ts # Request validation types Backend Proxy (this feature)
domain/ |
ocr.service.ts # Business logic +-- ocr.routes.ts --------> Route registration (auth + tier preHandlers)
ocr.types.ts # TypeScript types |
external/ +-- ocr.controller.ts ----> Request handlers (file validation, size checks)
ocr-client.ts # HTTP client to OCR service |
+-- ocr.service.ts -------> Business logic (content type validation, delegation)
|
+-- ocr-client.ts --------> HTTP client to mvp-ocr:8000
|
v
Python OCR Service
``` ```
## Receipt OCR Flow
```
Mobile Camera / File Upload
|
v
POST /api/ocr/extract/receipt (multipart/form-data)
|
v
OcrController.extractReceipt()
- Validates file size (<= 10MB)
- Validates content type (JPEG, PNG, HEIC)
|
v
OcrService.extractReceipt()
|
v
OcrClient.extractReceipt() --> HTTP POST --> Python /extract/receipt
| |
v v
ReceiptExtractionResponse ReceiptExtractor + HybridEngine
| (Vision API / PaddleOCR fallback)
v
Frontend receives extractedFields:
merchantName, transactionDate, totalAmount,
fuelQuantity, pricePerUnit, fuelGrade
```
After receipt extraction, the frontend calls `POST /api/stations/match` with the `merchantName` to auto-match a gas station via Google Places API. The station match is a separate request handled by the stations feature.
## Manual Extraction Flow
```
PDF Upload + "Scan for Maintenance Schedule"
|
v
POST /api/ocr/extract/manual (multipart/form-data)
- Requires Pro tier (document.scanMaintenanceSchedule)
- Validates file size (<= 200MB)
- Validates content type (application/pdf)
- Validates PDF magic bytes (%PDF header)
|
v
OcrService.submitManualJob()
|
v
OcrClient.submitManualJob() --> HTTP POST --> Python /extract/manual
| |
v v
{ jobId, status: 'pending' } GeminiEngine (Vertex AI)
Gemini 2.5 Flash
Frontend polls: (structured JSON output)
GET /api/ocr/jobs/:jobId |
(progress: 10% -> 50% -> 95% -> 100%) v
| ManualExtractionResult
v { vehicleInfo, maintenanceSchedules[] }
ManualJobResponse with result
|
v
Frontend displays MaintenanceScheduleReviewScreen
- User selects/edits items
- Batch creates maintenance schedules
```
Jobs expire after 2 hours (Redis TTL). Expired job polling returns HTTP 410 Gone.
## Supported File Types ## Supported File Types
### Sync Endpoints (extract, extractVin, extractReceipt)
- HEIC (converted server-side) - HEIC (converted server-side)
- JPEG - JPEG
- PNG - PNG
- PDF (first page only)
## Response Format ### Async Endpoints (extractManual)
- PDF (validated via magic bytes)
## Response Types
### ReceiptExtractionResponse
```typescript ```typescript
interface OcrResponse { {
success: boolean; success: boolean;
documentType: 'vin' | 'receipt' | 'manual' | 'unknown'; receiptType: string;
extractedFields: {
merchantName: { value: string; confidence: number };
transactionDate: { value: string; confidence: number };
totalAmount: { value: string; confidence: number };
fuelQuantity: { value: string; confidence: number };
pricePerUnit: { value: string; confidence: number };
fuelGrade: { value: string; confidence: number };
};
rawText: string; rawText: string;
confidence: number; // 0.0 - 1.0
extractedFields: Record<string, { value: string; confidence: number }>;
processingTimeMs: number; processingTimeMs: number;
} }
``` ```
## Async Job Flow ### ManualJobResponse
```typescript
{
jobId: string;
status: 'pending' | 'processing' | 'completed' | 'failed';
progress?: { percent: number; message: string };
estimatedSeconds?: number;
result?: ManualExtractionResult;
error?: string;
}
```
1. POST `/api/ocr/jobs` with file ### ManualExtractionResult
2. Receive `{ jobId, status: 'pending' }` ```typescript
3. Poll GET `/api/ocr/jobs/:jobId` {
4. When `status: 'completed'`, result contains OCR data success: boolean;
vehicleInfo?: { make: string; model: string; year: number };
maintenanceSchedules: Array<{
serviceName: string;
intervalMiles: number | null;
intervalMonths: number | null;
details: string;
confidence: number;
subtypes: string[];
}>;
rawTables: any[];
processingTimeMs: number;
totalPages: number;
pagesProcessed: number;
}
```
Jobs expire after 1 hour. ## Error Handling
The backend proxy translates Python service error codes:
| Python Status | Backend Status | Meaning |
|---------------|----------------|---------|
| 413 | 413 | File too large |
| 415 | 415 | Unsupported media type |
| 422 | 422 | Extraction failed |
| 410 | 410 | Job expired (TTL) |
| Other | 500 | Internal server error |
## Tier Gating
Manual extraction requires Pro tier. The tier guard middleware (`requireTier` plugin) validates the user's subscription tier before processing. Free-tier users receive HTTP 403 with `TIER_REQUIRED` error code and an upgrade prompt.
VIN extraction is available to all tiers. Receipt extraction requires Pro tier (`fuelLog.receiptScan`).

View File

@@ -15,6 +15,14 @@ const SUPPORTED_TYPES = new Set([
'application/pdf', 'application/pdf',
]); ]);
/** Image-only MIME types for receipt extraction (no PDF) */
const SUPPORTED_IMAGE_TYPES = new Set([
'image/jpeg',
'image/png',
'image/heic',
'image/heif',
]);
export class OcrController { export class OcrController {
/** /**
* POST /api/ocr/extract * POST /api/ocr/extract
@@ -223,6 +231,242 @@ export class OcrController {
} }
} }
/**
* POST /api/ocr/extract/receipt
* Extract data from a receipt image using receipt-specific OCR.
*/
async extractReceipt(
request: FastifyRequest,
reply: FastifyReply
) {
const userId = (request as any).user?.sub as string;
logger.info('Receipt extract requested', {
operation: 'ocr.controller.extractReceipt',
userId,
});
const file = await (request as any).file({ limits: { files: 1 } });
if (!file) {
logger.warn('No file provided for receipt extraction', {
operation: 'ocr.controller.extractReceipt.no_file',
userId,
});
return reply.code(400).send({
error: 'Bad Request',
message: 'No file provided',
});
}
const contentType = file.mimetype as string;
if (!SUPPORTED_IMAGE_TYPES.has(contentType)) {
logger.warn('Unsupported file type for receipt extraction', {
operation: 'ocr.controller.extractReceipt.unsupported_type',
userId,
contentType,
fileName: file.filename,
});
return reply.code(415).send({
error: 'Unsupported Media Type',
message: `Unsupported file type: ${contentType}. Supported: JPEG, PNG, HEIC`,
});
}
const chunks: Buffer[] = [];
for await (const chunk of file.file) {
chunks.push(chunk);
}
const fileBuffer = Buffer.concat(chunks);
if (fileBuffer.length === 0) {
logger.warn('Empty file provided for receipt extraction', {
operation: 'ocr.controller.extractReceipt.empty_file',
userId,
fileName: file.filename,
});
return reply.code(400).send({
error: 'Bad Request',
message: 'Empty file provided',
});
}
// Get optional receipt_type from form fields
const receiptType = file.fields?.receipt_type?.value as string | undefined;
try {
const result = await ocrService.extractReceipt(userId, {
fileBuffer,
contentType,
receiptType,
});
logger.info('Receipt extract completed', {
operation: 'ocr.controller.extractReceipt.success',
userId,
success: result.success,
receiptType: result.receiptType,
processingTimeMs: result.processingTimeMs,
});
return reply.code(200).send(result);
} catch (error: any) {
if (error.statusCode === 413) {
return reply.code(413).send({
error: 'Payload Too Large',
message: error.message,
});
}
if (error.statusCode === 415) {
return reply.code(415).send({
error: 'Unsupported Media Type',
message: error.message,
});
}
if (error.statusCode === 422) {
return reply.code(422).send({
error: 'Unprocessable Entity',
message: error.message,
});
}
logger.error('Receipt extract failed', {
operation: 'ocr.controller.extractReceipt.error',
userId,
error: error.message,
});
return reply.code(500).send({
error: 'Internal Server Error',
message: 'Receipt extraction failed',
});
}
}
/**
* POST /api/ocr/extract/manual
* Submit an async manual extraction job for PDF owner's manuals.
* Requires Pro tier (document.scanMaintenanceSchedule).
*/
async extractManual(
request: FastifyRequest,
reply: FastifyReply
) {
const userId = (request as any).user?.sub as string;
logger.info('Manual extract requested', {
operation: 'ocr.controller.extractManual',
userId,
});
const file = await (request as any).file({ limits: { files: 1 } });
if (!file) {
logger.warn('No file provided for manual extraction', {
operation: 'ocr.controller.extractManual.no_file',
userId,
});
return reply.code(400).send({
error: 'Bad Request',
message: 'No file provided',
});
}
const contentType = file.mimetype as string;
const fileName = file.filename as string | undefined;
const isPdfMime = contentType === 'application/pdf';
const isPdfExtension = fileName?.toLowerCase().endsWith('.pdf') ?? false;
if (!isPdfMime && !isPdfExtension) {
logger.warn('Non-PDF file provided for manual extraction', {
operation: 'ocr.controller.extractManual.not_pdf',
userId,
contentType,
fileName,
});
return reply.code(400).send({
error: 'Bad Request',
message: `Manual extraction requires PDF files. Received: ${contentType}`,
});
}
const chunks: Buffer[] = [];
for await (const chunk of file.file) {
chunks.push(chunk);
}
const fileBuffer = Buffer.concat(chunks);
if (fileBuffer.length === 0) {
logger.warn('Empty file provided for manual extraction', {
operation: 'ocr.controller.extractManual.empty_file',
userId,
fileName,
});
return reply.code(400).send({
error: 'Bad Request',
message: 'Empty file provided',
});
}
// Validate PDF magic bytes (%PDF)
const PDF_MAGIC = Buffer.from('%PDF');
if (fileBuffer.length < 4 || !fileBuffer.subarray(0, 4).equals(PDF_MAGIC)) {
logger.warn('File lacks PDF magic bytes', {
operation: 'ocr.controller.extractManual.invalid_magic',
userId,
fileName,
firstBytes: fileBuffer.subarray(0, 4).toString('hex'),
});
return reply.code(415).send({
error: 'Unsupported Media Type',
message: 'File does not appear to be a valid PDF (missing %PDF header)',
});
}
// Get optional vehicle_id from form fields
const vehicleId = file.fields?.vehicle_id?.value as string | undefined;
try {
const result = await ocrService.submitManualJob(userId, {
fileBuffer,
contentType,
vehicleId,
});
logger.info('Manual extract job submitted', {
operation: 'ocr.controller.extractManual.success',
userId,
jobId: result.jobId,
status: result.status,
estimatedSeconds: result.estimatedSeconds,
});
return reply.code(202).send(result);
} catch (error: any) {
if (error.statusCode === 413) {
return reply.code(413).send({
error: 'Payload Too Large',
message: error.message,
});
}
if (error.statusCode === 400) {
return reply.code(400).send({
error: 'Bad Request',
message: error.message,
});
}
logger.error('Manual extract failed', {
operation: 'ocr.controller.extractManual.error',
userId,
error: error.message,
});
return reply.code(500).send({
error: 'Internal Server Error',
message: 'Manual extraction submission failed',
});
}
}
/** /**
* POST /api/ocr/jobs * POST /api/ocr/jobs
* Submit an async OCR job for large files. * Submit an async OCR job for large files.
@@ -352,9 +596,9 @@ export class OcrController {
return reply.code(200).send(result); return reply.code(200).send(result);
} catch (error: any) { } catch (error: any) {
if (error.statusCode === 404) { if (error.statusCode === 410) {
return reply.code(404).send({ return reply.code(410).send({
error: 'Not Found', error: 'Gone',
message: error.message, message: error.message,
}); });
} }

View File

@@ -2,6 +2,7 @@
* @ai-summary Fastify routes for OCR API * @ai-summary Fastify routes for OCR API
*/ */
import { FastifyInstance, FastifyPluginAsync, FastifyPluginOptions } from 'fastify'; import { FastifyInstance, FastifyPluginAsync, FastifyPluginOptions } from 'fastify';
import { requireTier } from '../../../core/middleware/require-tier';
import { OcrController } from './ocr.controller'; import { OcrController } from './ocr.controller';
export const ocrRoutes: FastifyPluginAsync = async ( export const ocrRoutes: FastifyPluginAsync = async (
@@ -23,6 +24,18 @@ export const ocrRoutes: FastifyPluginAsync = async (
handler: ctrl.extractVin.bind(ctrl), handler: ctrl.extractVin.bind(ctrl),
}); });
// POST /api/ocr/extract/receipt - Receipt-specific OCR extraction (Pro tier required)
fastify.post('/ocr/extract/receipt', {
preHandler: [requireAuth, requireTier('fuelLog.receiptScan')],
handler: ctrl.extractReceipt.bind(ctrl),
});
// POST /api/ocr/extract/manual - Manual extraction (Pro tier required)
fastify.post('/ocr/extract/manual', {
preHandler: [requireAuth, fastify.requireTier({ featureKey: 'document.scanMaintenanceSchedule' })],
handler: ctrl.extractManual.bind(ctrl),
});
// POST /api/ocr/jobs - Submit async OCR job // POST /api/ocr/jobs - Submit async OCR job
fastify.post('/ocr/jobs', { fastify.post('/ocr/jobs', {
preHandler: [requireAuth], preHandler: [requireAuth],

View File

@@ -5,9 +5,13 @@ import { logger } from '../../../core/logging/logger';
import { ocrClient, JobNotFoundError } from '../external/ocr-client'; import { ocrClient, JobNotFoundError } from '../external/ocr-client';
import type { import type {
JobResponse, JobResponse,
ManualJobResponse,
ManualJobSubmitRequest,
OcrExtractRequest, OcrExtractRequest,
OcrJobSubmitRequest, OcrJobSubmitRequest,
OcrResponse, OcrResponse,
ReceiptExtractRequest,
ReceiptExtractionResponse,
VinExtractionResponse, VinExtractionResponse,
} from './ocr.types'; } from './ocr.types';
@@ -26,6 +30,14 @@ const SUPPORTED_TYPES = new Set([
'application/pdf', 'application/pdf',
]); ]);
/** Image-only MIME types for receipt extraction (no PDF) */
const SUPPORTED_IMAGE_TYPES = new Set([
'image/jpeg',
'image/png',
'image/heic',
'image/heif',
]);
/** /**
* Domain service for OCR operations. * Domain service for OCR operations.
* Handles business logic and validation for OCR requests. * Handles business logic and validation for OCR requests.
@@ -150,6 +162,65 @@ export class OcrService {
} }
} }
/**
* Extract data from a receipt image using receipt-specific OCR.
*
* @param userId - User ID for logging
* @param request - Receipt extraction request
* @returns Receipt extraction result
*/
async extractReceipt(userId: string, request: ReceiptExtractRequest): Promise<ReceiptExtractionResponse> {
if (request.fileBuffer.length > MAX_SYNC_SIZE) {
const err: any = new Error(
`File too large. Max: ${MAX_SYNC_SIZE / (1024 * 1024)}MB.`
);
err.statusCode = 413;
throw err;
}
if (!SUPPORTED_IMAGE_TYPES.has(request.contentType)) {
const err: any = new Error(
`Unsupported file type: ${request.contentType}. Supported: ${[...SUPPORTED_IMAGE_TYPES].join(', ')}`
);
err.statusCode = 415;
throw err;
}
logger.info('Receipt extract requested', {
operation: 'ocr.service.extractReceipt',
userId,
contentType: request.contentType,
fileSize: request.fileBuffer.length,
receiptType: request.receiptType,
});
try {
const result = await ocrClient.extractReceipt(
request.fileBuffer,
request.contentType,
request.receiptType
);
logger.info('Receipt extract completed', {
operation: 'ocr.service.extractReceipt.success',
userId,
success: result.success,
receiptType: result.receiptType,
fieldCount: Object.keys(result.extractedFields).length,
processingTimeMs: result.processingTimeMs,
});
return result;
} catch (error) {
logger.error('Receipt extract failed', {
operation: 'ocr.service.extractReceipt.error',
userId,
error: error instanceof Error ? error.message : 'Unknown error',
});
throw error;
}
}
/** /**
* Submit an async OCR job for large files. * Submit an async OCR job for large files.
* *
@@ -209,6 +280,66 @@ export class OcrService {
} }
} }
/**
* Submit an async manual extraction job for PDF owner's manuals.
*
* @param userId - User ID for logging
* @param request - Manual job submission request
* @returns Manual job response with job ID
*/
async submitManualJob(userId: string, request: ManualJobSubmitRequest): Promise<ManualJobResponse> {
// Validate file size for async processing (200MB max)
if (request.fileBuffer.length > MAX_ASYNC_SIZE) {
const err: any = new Error(
`File too large. Max: ${MAX_ASYNC_SIZE / (1024 * 1024)}MB.`
);
err.statusCode = 413;
throw err;
}
// Manual extraction only supports PDF
if (request.contentType !== 'application/pdf') {
const err: any = new Error(
`Unsupported file type: ${request.contentType}. Manual extraction requires PDF files.`
);
err.statusCode = 400;
throw err;
}
logger.info('Manual job submit requested', {
operation: 'ocr.service.submitManualJob',
userId,
contentType: request.contentType,
fileSize: request.fileBuffer.length,
hasVehicleId: !!request.vehicleId,
});
try {
const result = await ocrClient.submitManualJob(
request.fileBuffer,
request.contentType,
request.vehicleId
);
logger.info('Manual job submitted', {
operation: 'ocr.service.submitManualJob.success',
userId,
jobId: result.jobId,
status: result.status,
estimatedSeconds: result.estimatedSeconds,
});
return result;
} catch (error) {
logger.error('Manual job submit failed', {
operation: 'ocr.service.submitManualJob.error',
userId,
error: error instanceof Error ? error.message : 'Unknown error',
});
throw error;
}
}
/** /**
* Get the status of an async OCR job. * Get the status of an async OCR job.
* *
@@ -237,8 +368,8 @@ export class OcrService {
return result; return result;
} catch (error) { } catch (error) {
if (error instanceof JobNotFoundError) { if (error instanceof JobNotFoundError) {
const err: any = new Error(`Job ${jobId} not found. Jobs expire after 1 hour.`); const err: any = new Error('Job expired (max 2 hours). Please resubmit.');
err.statusCode = 404; err.statusCode = 410;
throw err; throw err;
} }

View File

@@ -45,6 +45,23 @@ export interface OcrExtractRequest {
preprocess?: boolean; preprocess?: boolean;
} }
/** Response from receipt-specific extraction */
export interface ReceiptExtractionResponse {
success: boolean;
receiptType: string;
extractedFields: Record<string, ExtractedField>;
rawText: string;
processingTimeMs: number;
error: string | null;
}
/** Request for receipt extraction */
export interface ReceiptExtractRequest {
fileBuffer: Buffer;
contentType: string;
receiptType?: string;
}
/** Response from VIN-specific extraction */ /** Response from VIN-specific extraction */
export interface VinExtractionResponse { export interface VinExtractionResponse {
success: boolean; success: boolean;
@@ -62,3 +79,49 @@ export interface OcrJobSubmitRequest {
contentType: string; contentType: string;
callbackUrl?: string; callbackUrl?: string;
} }
/** Request to submit a manual extraction job */
export interface ManualJobSubmitRequest {
fileBuffer: Buffer;
contentType: string;
vehicleId?: string;
}
/** Vehicle info extracted from a manual */
export interface ManualVehicleInfo {
make: string | null;
model: string | null;
year: number | null;
}
/** A single maintenance schedule item extracted from a manual */
export interface MaintenanceScheduleItem {
service: string;
intervalMiles: number | null;
intervalMonths: number | null;
details: string | null;
confidence: number;
subtypes: string[];
}
/** Result of manual extraction (nested in ManualJobResponse.result) */
export interface ManualExtractionResult {
success: boolean;
vehicleInfo: ManualVehicleInfo;
maintenanceSchedules: MaintenanceScheduleItem[];
rawTables: unknown[];
processingTimeMs: number;
totalPages: number;
pagesProcessed: number;
error: string | null;
}
/** Response for async manual extraction job */
export interface ManualJobResponse {
jobId: string;
status: JobStatus;
progress?: number;
estimatedSeconds?: number;
result?: ManualExtractionResult;
error?: string;
}

View File

@@ -2,7 +2,7 @@
* @ai-summary HTTP client for OCR service communication * @ai-summary HTTP client for OCR service communication
*/ */
import { logger } from '../../../core/logging/logger'; import { logger } from '../../../core/logging/logger';
import type { JobResponse, OcrResponse, VinExtractionResponse } from '../domain/ocr.types'; import type { JobResponse, ManualJobResponse, OcrResponse, ReceiptExtractionResponse, VinExtractionResponse } from '../domain/ocr.types';
/** OCR service configuration */ /** OCR service configuration */
const OCR_SERVICE_URL = process.env.OCR_SERVICE_URL || 'http://mvp-ocr:8000'; const OCR_SERVICE_URL = process.env.OCR_SERVICE_URL || 'http://mvp-ocr:8000';
@@ -119,6 +119,64 @@ export class OcrClient {
return result; return result;
} }
/**
* Extract data from a receipt image using receipt-specific OCR.
*
* @param fileBuffer - Image file buffer
* @param contentType - MIME type of the file
* @param receiptType - Optional receipt type hint (e.g., 'fuel')
* @returns Receipt extraction result
*/
async extractReceipt(
fileBuffer: Buffer,
contentType: string,
receiptType?: string
): Promise<ReceiptExtractionResponse> {
const formData = this.buildFormData(fileBuffer, contentType);
if (receiptType) {
formData.append('receipt_type', receiptType);
}
const url = `${this.baseUrl}/extract/receipt`;
logger.info('OCR receipt extract request', {
operation: 'ocr.client.extractReceipt',
url,
contentType,
fileSize: fileBuffer.length,
receiptType,
});
const response = await this.fetchWithTimeout(url, {
method: 'POST',
body: formData,
});
if (!response.ok) {
const errorText = await response.text();
logger.error('OCR receipt extract failed', {
operation: 'ocr.client.extractReceipt.error',
status: response.status,
error: errorText,
});
const err: any = new Error(`OCR service error: ${response.status} - ${errorText}`);
err.statusCode = response.status;
throw err;
}
const result = (await response.json()) as ReceiptExtractionResponse;
logger.info('OCR receipt extract completed', {
operation: 'ocr.client.extractReceipt.success',
success: result.success,
receiptType: result.receiptType,
fieldCount: Object.keys(result.extractedFields).length,
processingTimeMs: result.processingTimeMs,
});
return result;
}
/** /**
* Submit an async OCR job for large files. * Submit an async OCR job for large files.
* *
@@ -209,6 +267,61 @@ export class OcrClient {
return (await response.json()) as JobResponse; return (await response.json()) as JobResponse;
} }
/**
* Submit an async manual extraction job for PDF owner's manuals.
*
* @param fileBuffer - PDF file buffer
* @param contentType - MIME type of the file (must be application/pdf)
* @param vehicleId - Optional vehicle ID for context
* @returns Manual job submission response
*/
async submitManualJob(
fileBuffer: Buffer,
contentType: string,
vehicleId?: string
): Promise<ManualJobResponse> {
const formData = this.buildFormData(fileBuffer, contentType);
if (vehicleId) {
formData.append('vehicle_id', vehicleId);
}
const url = `${this.baseUrl}/extract/manual`;
logger.info('OCR manual job submit request', {
operation: 'ocr.client.submitManualJob',
url,
contentType,
fileSize: fileBuffer.length,
hasVehicleId: !!vehicleId,
});
const response = await this.fetchWithTimeout(url, {
method: 'POST',
body: formData,
});
if (!response.ok) {
const errorText = await response.text();
logger.error('OCR manual job submit failed', {
operation: 'ocr.client.submitManualJob.error',
status: response.status,
error: errorText,
});
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
}
const result = (await response.json()) as ManualJobResponse;
logger.info('OCR manual job submitted', {
operation: 'ocr.client.submitManualJob.success',
jobId: result.jobId,
status: result.status,
estimatedSeconds: result.estimatedSeconds,
});
return result;
}
/** /**
* Check if the OCR service is healthy. * Check if the OCR service is healthy.
* *

View File

@@ -8,4 +8,5 @@ export type {
JobResponse, JobResponse,
JobStatus, JobStatus,
OcrResponse, OcrResponse,
ReceiptExtractionResponse,
} from './domain/ocr.types'; } from './domain/ocr.types';

View File

@@ -0,0 +1,295 @@
/**
* @ai-summary Unit tests for OCR manual extraction endpoint
*/
import { OcrService } from '../../domain/ocr.service';
import { ocrClient, JobNotFoundError } from '../../external/ocr-client';
import type { ManualJobResponse } from '../../domain/ocr.types';
jest.mock('../../external/ocr-client');
jest.mock('../../../../core/logging/logger');
const mockSubmitManualJob = ocrClient.submitManualJob as jest.MockedFunction<
typeof ocrClient.submitManualJob
>;
const mockGetJobStatus = ocrClient.getJobStatus as jest.MockedFunction<
typeof ocrClient.getJobStatus
>;
describe('OcrService.submitManualJob', () => {
let service: OcrService;
const userId = 'test-user-id';
const mockManualJobResponse: ManualJobResponse = {
jobId: 'manual-job-123',
status: 'pending',
progress: 0,
estimatedSeconds: 45,
result: undefined,
error: undefined,
};
const mockCompletedJobResponse: ManualJobResponse = {
jobId: 'manual-job-123',
status: 'completed',
progress: 100,
result: {
success: true,
vehicleInfo: {
make: 'Honda',
model: 'Civic',
year: 2023,
},
maintenanceSchedules: [
{
service: 'Engine Oil Change',
intervalMiles: 5000,
intervalMonths: 6,
details: 'Use 0W-20 full synthetic oil',
confidence: 0.95,
subtypes: ['oil_change'],
},
{
service: 'Tire Rotation',
intervalMiles: 7500,
intervalMonths: 6,
details: null,
confidence: 0.90,
subtypes: ['tire_rotation'],
},
],
rawTables: [],
processingTimeMs: 45000,
totalPages: 120,
pagesProcessed: 120,
error: null,
},
error: undefined,
};
beforeEach(() => {
jest.clearAllMocks();
service = new OcrService();
});
describe('valid manual job submission', () => {
it('should return 202-style response with jobId for PDF submission', async () => {
mockSubmitManualJob.mockResolvedValue(mockManualJobResponse);
const result = await service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-pdf-data'),
contentType: 'application/pdf',
});
expect(result.jobId).toBe('manual-job-123');
expect(result.status).toBe('pending');
expect(result.progress).toBe(0);
expect(result.estimatedSeconds).toBe(45);
expect(result.result).toBeUndefined();
});
it('should pass vehicleId to client when provided', async () => {
mockSubmitManualJob.mockResolvedValue(mockManualJobResponse);
await service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-pdf-data'),
contentType: 'application/pdf',
vehicleId: 'vehicle-abc',
});
expect(mockSubmitManualJob).toHaveBeenCalledWith(
expect.any(Buffer),
'application/pdf',
'vehicle-abc'
);
});
it('should call client without vehicleId when not provided', async () => {
mockSubmitManualJob.mockResolvedValue(mockManualJobResponse);
await service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-pdf-data'),
contentType: 'application/pdf',
});
expect(mockSubmitManualJob).toHaveBeenCalledWith(
expect.any(Buffer),
'application/pdf',
undefined
);
});
});
describe('completed job result', () => {
it('should return completed result with maintenanceSchedules', async () => {
mockSubmitManualJob.mockResolvedValue(mockCompletedJobResponse);
const result = await service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-pdf-data'),
contentType: 'application/pdf',
});
expect(result.status).toBe('completed');
expect(result.result).toBeDefined();
expect(result.result!.success).toBe(true);
expect(result.result!.maintenanceSchedules).toHaveLength(2);
expect(result.result!.maintenanceSchedules[0].service).toBe('Engine Oil Change');
expect(result.result!.maintenanceSchedules[0].intervalMiles).toBe(5000);
expect(result.result!.maintenanceSchedules[0].subtypes).toEqual(['oil_change']);
expect(result.result!.vehicleInfo.make).toBe('Honda');
});
});
describe('error handling', () => {
it('should throw 400 for non-PDF file (JPEG)', async () => {
await expect(
service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/jpeg',
})
).rejects.toMatchObject({
statusCode: 400,
});
});
it('should throw 400 for non-PDF file (PNG)', async () => {
await expect(
service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/png',
})
).rejects.toMatchObject({
statusCode: 400,
});
});
it('should throw 400 for text/plain', async () => {
await expect(
service.submitManualJob(userId, {
fileBuffer: Buffer.from('not a pdf'),
contentType: 'text/plain',
})
).rejects.toMatchObject({
statusCode: 400,
});
});
it('should throw 413 for oversized file', async () => {
const largeBuffer = Buffer.alloc(201 * 1024 * 1024); // 201MB
await expect(
service.submitManualJob(userId, {
fileBuffer: largeBuffer,
contentType: 'application/pdf',
})
).rejects.toMatchObject({
statusCode: 413,
});
});
it('should accept file at 200MB boundary', async () => {
mockSubmitManualJob.mockResolvedValue(mockManualJobResponse);
const exactBuffer = Buffer.alloc(200 * 1024 * 1024); // exactly 200MB
const result = await service.submitManualJob(userId, {
fileBuffer: exactBuffer,
contentType: 'application/pdf',
});
expect(result.jobId).toBe('manual-job-123');
});
it('should propagate OCR service errors', async () => {
mockSubmitManualJob.mockRejectedValue(
new Error('OCR service error: 500 - Internal error')
);
await expect(
service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-pdf-data'),
contentType: 'application/pdf',
})
).rejects.toThrow('OCR service error: 500 - Internal error');
});
});
});
describe('OcrService.getJobStatus (manual job polling)', () => {
let service: OcrService;
const userId = 'test-user-id';
beforeEach(() => {
jest.clearAllMocks();
service = new OcrService();
});
it('should return completed manual job with schedules', async () => {
mockGetJobStatus.mockResolvedValue({
jobId: 'manual-job-123',
status: 'completed',
progress: 100,
});
const result = await service.getJobStatus(userId, 'manual-job-123');
expect(result.jobId).toBe('manual-job-123');
expect(result.status).toBe('completed');
expect(result.progress).toBe(100);
});
it('should return processing status with progress', async () => {
mockGetJobStatus.mockResolvedValue({
jobId: 'manual-job-456',
status: 'processing',
progress: 50,
});
const result = await service.getJobStatus(userId, 'manual-job-456');
expect(result.status).toBe('processing');
expect(result.progress).toBe(50);
});
it('should throw 410 Gone for expired/missing job', async () => {
mockGetJobStatus.mockRejectedValue(new JobNotFoundError('expired-job-789'));
await expect(
service.getJobStatus(userId, 'expired-job-789')
).rejects.toMatchObject({
statusCode: 410,
message: 'Job expired (max 2 hours). Please resubmit.',
});
});
});
describe('Manual extraction controller validations', () => {
it('PDF magic bytes validation rejects non-PDF content', () => {
// Controller validates first 4 bytes match %PDF (0x25504446)
// Files without %PDF header receive 415 Unsupported Media Type
const pdfMagic = Buffer.from('%PDF');
const notPdf = Buffer.from('JFIF');
expect(pdfMagic.subarray(0, 4).equals(Buffer.from('%PDF'))).toBe(true);
expect(notPdf.subarray(0, 4).equals(Buffer.from('%PDF'))).toBe(false);
});
it('accepts files with .pdf extension even if mimetype is octet-stream', () => {
// Controller checks: contentType === 'application/pdf' OR filename.endsWith('.pdf')
// This allows uploads where browser sends generic content type
const filename = 'owners-manual.pdf';
expect(filename.toLowerCase().endsWith('.pdf')).toBe(true);
});
});
describe('Manual route tier guard', () => {
it('route is configured with tier guard for document.scanMaintenanceSchedule', async () => {
// Tier guard is enforced at route level via requireTier('document.scanMaintenanceSchedule')
// preHandler: [requireAuth, requireTier('document.scanMaintenanceSchedule')]
// Free-tier users receive 403 TIER_REQUIRED before the handler executes.
// Middleware behavior is tested in core/middleware/require-tier.test.ts
const { requireTier } = await import('../../../../core/middleware/require-tier');
const handler = requireTier('document.scanMaintenanceSchedule');
expect(typeof handler).toBe('function');
});
});

View File

@@ -0,0 +1,209 @@
/**
* @ai-summary Unit tests for OCR receipt extraction endpoint
*/
import { OcrService } from '../../domain/ocr.service';
import { ocrClient } from '../../external/ocr-client';
import type { ReceiptExtractionResponse } from '../../domain/ocr.types';
jest.mock('../../external/ocr-client');
jest.mock('../../../../core/logging/logger');
const mockExtractReceipt = ocrClient.extractReceipt as jest.MockedFunction<
typeof ocrClient.extractReceipt
>;
describe('OcrService.extractReceipt', () => {
let service: OcrService;
const userId = 'test-user-id';
const mockReceiptResponse: ReceiptExtractionResponse = {
success: true,
receiptType: 'fuel',
extractedFields: {
merchantName: { value: 'Shell Gas Station', confidence: 0.92 },
transactionDate: { value: '2026-02-10', confidence: 0.88 },
totalAmount: { value: '45.67', confidence: 0.95 },
fuelQuantity: { value: '12.345', confidence: 0.87 },
pricePerUnit: { value: '3.699', confidence: 0.90 },
fuelGrade: { value: 'Regular 87', confidence: 0.85 },
},
rawText: 'SHELL\n02/10/2026\nREGULAR 87\n12.345 GAL\n$3.699/GAL\nTOTAL $45.67',
processingTimeMs: 1250,
error: null,
};
beforeEach(() => {
jest.clearAllMocks();
service = new OcrService();
});
describe('valid receipt extraction', () => {
it('should return receipt extraction response for valid image', async () => {
mockExtractReceipt.mockResolvedValue(mockReceiptResponse);
const result = await service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/jpeg',
});
expect(result.success).toBe(true);
expect(result.receiptType).toBe('fuel');
expect(result.extractedFields.merchantName.value).toBe('Shell Gas Station');
expect(result.extractedFields.totalAmount.value).toBe('45.67');
expect(result.extractedFields.fuelQuantity.value).toBe('12.345');
expect(result.extractedFields.pricePerUnit.value).toBe('3.699');
expect(result.extractedFields.fuelGrade.value).toBe('Regular 87');
expect(result.extractedFields.transactionDate.value).toBe('2026-02-10');
expect(result.processingTimeMs).toBe(1250);
});
it('should pass receipt_type hint to client when provided', async () => {
mockExtractReceipt.mockResolvedValue(mockReceiptResponse);
await service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/jpeg',
receiptType: 'fuel',
});
expect(mockExtractReceipt).toHaveBeenCalledWith(
expect.any(Buffer),
'image/jpeg',
'fuel'
);
});
it('should support PNG images', async () => {
mockExtractReceipt.mockResolvedValue(mockReceiptResponse);
const result = await service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-png-data'),
contentType: 'image/png',
});
expect(result.success).toBe(true);
});
it('should support HEIC images', async () => {
mockExtractReceipt.mockResolvedValue(mockReceiptResponse);
const result = await service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-heic-data'),
contentType: 'image/heic',
});
expect(result.success).toBe(true);
});
});
describe('missing optional fields', () => {
it('should handle response with some fields not detected', async () => {
const partialResponse: ReceiptExtractionResponse = {
success: true,
receiptType: 'fuel',
extractedFields: {
merchantName: { value: 'Unknown Station', confidence: 0.60 },
totalAmount: { value: '30.00', confidence: 0.88 },
},
rawText: 'UNKNOWN STATION\nTOTAL $30.00',
processingTimeMs: 980,
error: null,
};
mockExtractReceipt.mockResolvedValue(partialResponse);
const result = await service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/jpeg',
});
expect(result.success).toBe(true);
expect(result.extractedFields.merchantName).toBeDefined();
expect(result.extractedFields.totalAmount).toBeDefined();
expect(result.extractedFields.fuelQuantity).toBeUndefined();
expect(result.extractedFields.pricePerUnit).toBeUndefined();
expect(result.extractedFields.fuelGrade).toBeUndefined();
expect(result.extractedFields.transactionDate).toBeUndefined();
});
});
describe('error handling', () => {
it('should throw 415 for unsupported file type (PDF)', async () => {
await expect(
service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-pdf-data'),
contentType: 'application/pdf',
})
).rejects.toMatchObject({
statusCode: 415,
});
});
it('should throw 415 for text/plain', async () => {
await expect(
service.extractReceipt(userId, {
fileBuffer: Buffer.from('not an image'),
contentType: 'text/plain',
})
).rejects.toMatchObject({
statusCode: 415,
});
});
it('should throw 413 for oversized file', async () => {
const largeBuffer = Buffer.alloc(11 * 1024 * 1024); // 11MB
await expect(
service.extractReceipt(userId, {
fileBuffer: largeBuffer,
contentType: 'image/jpeg',
})
).rejects.toMatchObject({
statusCode: 413,
});
});
it('should propagate Python 422 with statusCode for controller forwarding', async () => {
const err: any = new Error('OCR service error: 422 - Failed to extract receipt data');
err.statusCode = 422;
mockExtractReceipt.mockRejectedValue(err);
await expect(
service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/jpeg',
})
).rejects.toMatchObject({
statusCode: 422,
message: 'OCR service error: 422 - Failed to extract receipt data',
});
});
it('should propagate OCR service errors', async () => {
mockExtractReceipt.mockRejectedValue(
new Error('OCR service error: 500 - Internal error')
);
await expect(
service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/jpeg',
})
).rejects.toThrow('OCR service error: 500 - Internal error');
});
});
});
describe('Receipt route tier guard', () => {
it('route is configured with requireTier fuelLog.receiptScan', async () => {
// Tier guard is enforced at route level via requireTier('fuelLog.receiptScan')
// preHandler: [requireAuth, requireTier('fuelLog.receiptScan')]
// Free-tier users receive 403 TIER_REQUIRED before the handler executes.
// Middleware behavior is tested in core/middleware/require-tier.test.ts
const { requireTier } = await import('../../../../core/middleware/require-tier');
const handler = requireTier('fuelLog.receiptScan');
expect(typeof handler).toBe('function');
});
});

View File

@@ -10,6 +10,7 @@ import { pool } from '../../../core/config/database';
import { logger } from '../../../core/logging/logger'; import { logger } from '../../../core/logging/logger';
import { import {
StationSearchBody, StationSearchBody,
StationMatchBody,
SaveStationBody, SaveStationBody,
StationParams, StationParams,
UpdateSavedStationBody UpdateSavedStationBody
@@ -53,6 +54,29 @@ export class StationsController {
} }
} }
async matchStation(request: FastifyRequest<{ Body: StationMatchBody }>, reply: FastifyReply) {
try {
const { merchantName } = request.body;
if (!merchantName || !merchantName.trim()) {
return reply.code(400).send({
error: 'Bad Request',
message: 'Merchant name is required',
});
}
const result = await this.stationsService.matchStationFromReceipt(merchantName);
return reply.code(200).send(result);
} catch (error: any) {
logger.error('Error matching station from receipt', { error, merchantName: request.body?.merchantName });
return reply.code(500).send({
error: 'Internal server error',
message: 'Failed to match station',
});
}
}
async saveStation(request: FastifyRequest<{ Body: SaveStationBody }>, reply: FastifyReply) { async saveStation(request: FastifyRequest<{ Body: SaveStationBody }>, reply: FastifyReply) {
try { try {
const userId = (request as any).user.sub; const userId = (request as any).user.sub;

View File

@@ -7,6 +7,7 @@ import { FastifyInstance, FastifyPluginOptions } from 'fastify';
import { FastifyPluginAsync } from 'fastify'; import { FastifyPluginAsync } from 'fastify';
import { import {
StationSearchBody, StationSearchBody,
StationMatchBody,
SaveStationBody, SaveStationBody,
StationParams, StationParams,
UpdateSavedStationBody UpdateSavedStationBody
@@ -25,6 +26,12 @@ export const stationsRoutes: FastifyPluginAsync = async (
handler: stationsController.searchStations.bind(stationsController) handler: stationsController.searchStations.bind(stationsController)
}); });
// POST /api/stations/match - Match station from receipt merchant name
fastify.post<{ Body: StationMatchBody }>('/stations/match', {
preHandler: [fastify.authenticate],
handler: stationsController.matchStation.bind(stationsController)
});
// POST /api/stations/save - Save a station to user's favorites // POST /api/stations/save - Save a station to user's favorites
fastify.post<{ Body: SaveStationBody }>('/stations/save', { fastify.post<{ Body: SaveStationBody }>('/stations/save', {
preHandler: [fastify.authenticate], preHandler: [fastify.authenticate],

View File

@@ -7,6 +7,7 @@ import { googleMapsClient } from '../external/google-maps/google-maps.client';
import { import {
StationSearchRequest, StationSearchRequest,
StationSearchResponse, StationSearchResponse,
StationMatchResponse,
SavedStation, SavedStation,
StationSavedMetadata, StationSavedMetadata,
UpdateSavedStationBody UpdateSavedStationBody
@@ -154,6 +155,27 @@ export class StationsService {
return enriched; return enriched;
} }
async matchStationFromReceipt(merchantName: string): Promise<StationMatchResponse> {
const trimmed = merchantName.trim();
if (!trimmed) {
return { matched: false, station: null };
}
logger.info('Matching station from receipt merchant name', { merchantName: trimmed });
const station = await googleMapsClient.searchStationByName(trimmed);
if (station) {
// Cache matched station for future reference (e.g. saveStation)
await this.repository.cacheStation(station);
}
return {
matched: station !== null,
station,
};
}
async removeSavedStation(placeId: string, userId: string) { async removeSavedStation(placeId: string, userId: string) {
const removed = await this.repository.deleteSavedStation(userId, placeId); const removed = await this.repository.deleteSavedStation(userId, placeId);

View File

@@ -89,3 +89,12 @@ export interface StationSavedMetadata {
has93Octane: boolean; has93Octane: boolean;
has93OctaneEthanolFree: boolean; has93OctaneEthanolFree: boolean;
} }
export interface StationMatchBody {
merchantName: string;
}
export interface StationMatchResponse {
matched: boolean;
station: Station | null;
}

View File

@@ -7,7 +7,7 @@ import axios from 'axios';
import { appConfig } from '../../../../core/config/config-loader'; import { appConfig } from '../../../../core/config/config-loader';
import { logger } from '../../../../core/logging/logger'; import { logger } from '../../../../core/logging/logger';
import { cacheService } from '../../../../core/config/redis'; import { cacheService } from '../../../../core/config/redis';
import { GooglePlacesResponse, GooglePlace } from './google-maps.types'; import { GooglePlacesResponse, GoogleTextSearchResponse, GooglePlace } from './google-maps.types';
import { Station } from '../../domain/stations.types'; import { Station } from '../../domain/stations.types';
export class GoogleMapsClient { export class GoogleMapsClient {
@@ -103,6 +103,92 @@ export class GoogleMapsClient {
return station; return station;
} }
/**
* Search for a gas station by merchant name using Google Places Text Search API.
* Used to match receipt merchant names (e.g. "Shell", "COSTCO #123") to actual stations.
*/
async searchStationByName(merchantName: string): Promise<Station | null> {
const query = `${merchantName} gas station`;
const cacheKey = `station-match:${query.toLowerCase().trim()}`;
try {
const cached = await cacheService.get<Station | null>(cacheKey);
if (cached !== undefined && cached !== null) {
logger.debug('Station name match cache hit', { merchantName });
return cached;
}
logger.info('Searching Google Places Text Search for station', { merchantName, query });
const response = await axios.get<GoogleTextSearchResponse>(
`${this.baseURL}/textsearch/json`,
{
params: {
query,
type: 'gas_station',
key: this.apiKey,
},
timeout: 5000,
}
);
if (response.data.status !== 'OK' && response.data.status !== 'ZERO_RESULTS') {
throw new Error(`Google Places Text Search API error: ${response.data.status}`);
}
if (response.data.results.length === 0) {
await cacheService.set(cacheKey, null, this.cacheTTL);
return null;
}
const topResult = response.data.results[0];
const station = this.transformTextSearchResult(topResult);
await cacheService.set(cacheKey, station, this.cacheTTL);
return station;
} catch (error: any) {
if (error.code === 'ECONNABORTED' || error.message?.includes('timeout')) {
logger.warn('Station name search timed out', { merchantName, timeoutMs: 5000 });
} else {
logger.error('Station name search failed', { error, merchantName });
}
return null;
}
}
private transformTextSearchResult(place: GooglePlace): Station {
let photoReference: string | undefined;
if (place.photos && place.photos.length > 0 && place.photos[0]) {
photoReference = place.photos[0].photo_reference;
}
// Text Search returns formatted_address instead of vicinity
const address = (place as any).formatted_address || place.vicinity || '';
const station: Station = {
id: place.place_id,
placeId: place.place_id,
name: place.name,
address,
latitude: place.geometry.location.lat,
longitude: place.geometry.location.lng,
};
if (photoReference !== undefined) {
station.photoReference = photoReference;
}
if (place.opening_hours?.open_now !== undefined) {
station.isOpen = place.opening_hours.open_now;
}
if (place.rating !== undefined) {
station.rating = place.rating;
}
return station;
}
/** /**
* Fetch photo from Google Maps API using photo reference * Fetch photo from Google Maps API using photo reference
* Used by photo proxy endpoint to serve photos without exposing API key * Used by photo proxy endpoint to serve photos without exposing API key

View File

@@ -53,3 +53,9 @@ export interface GooglePlaceDetails {
}; };
status: string; status: string;
} }
export interface GoogleTextSearchResponse {
results: GooglePlace[];
status: string;
next_page_token?: string;
}

View File

@@ -0,0 +1,276 @@
/**
* @ai-summary Unit tests for station matching from receipt merchant names
*/
// Mock config-loader before any imports that use it
jest.mock('../../../../core/config/config-loader', () => ({
appConfig: {
secrets: { google_maps_api_key: 'mock-api-key' },
getDatabaseUrl: () => 'postgresql://mock:mock@localhost/mock',
getRedisUrl: () => 'redis://localhost',
get: () => ({}),
},
}));
jest.mock('axios');
jest.mock('../../../../core/config/redis');
jest.mock('../../../../core/logging/logger');
jest.mock('../../data/stations.repository');
jest.mock('../../external/google-maps/google-maps.client', () => {
const { GoogleMapsClient } = jest.requireActual('../../external/google-maps/google-maps.client');
return {
GoogleMapsClient,
googleMapsClient: {
searchNearbyStations: jest.fn(),
searchStationByName: jest.fn(),
fetchPhoto: jest.fn(),
},
};
});
import axios from 'axios';
import { GoogleMapsClient } from '../../external/google-maps/google-maps.client';
import { StationsService } from '../../domain/stations.service';
import { StationsRepository } from '../../data/stations.repository';
import { googleMapsClient } from '../../external/google-maps/google-maps.client';
import { logger } from '../../../../core/logging/logger';
import { mockStations } from '../fixtures/mock-stations';
describe('Station Matching from Receipt', () => {
describe('GoogleMapsClient.searchStationByName', () => {
let client: GoogleMapsClient;
let mockAxios: jest.Mocked<typeof axios>;
beforeEach(() => {
jest.clearAllMocks();
mockAxios = axios as jest.Mocked<typeof axios>;
client = new GoogleMapsClient();
});
it('should match a known station name like "Shell"', async () => {
mockAxios.get.mockResolvedValue({
data: {
results: [
{
place_id: 'ChIJ_shell_match',
name: 'Shell Gas Station',
formatted_address: '123 Main St, San Francisco, CA 94105',
geometry: { location: { lat: 37.7749, lng: -122.4194 } },
rating: 4.2,
photos: [{ photo_reference: 'shell-photo-ref' }],
opening_hours: { open_now: true },
types: ['gas_station'],
},
],
status: 'OK',
},
});
const result = await client.searchStationByName('Shell');
expect(result).not.toBeNull();
expect(result?.placeId).toBe('ChIJ_shell_match');
expect(result?.name).toBe('Shell Gas Station');
expect(result?.address).toBe('123 Main St, San Francisco, CA 94105');
expect(mockAxios.get).toHaveBeenCalledWith(
expect.stringContaining('textsearch/json'),
expect.objectContaining({
params: expect.objectContaining({
query: 'Shell gas station',
type: 'gas_station',
}),
})
);
});
it('should match abbreviated names like "COSTCO #123"', async () => {
mockAxios.get.mockResolvedValue({
data: {
results: [
{
place_id: 'ChIJ_costco_match',
name: 'Costco Gasoline',
formatted_address: '2000 El Camino Real, Redwood City, CA',
geometry: { location: { lat: 37.4849, lng: -122.2278 } },
rating: 4.5,
types: ['gas_station'],
},
],
status: 'OK',
},
});
const result = await client.searchStationByName('COSTCO #123');
expect(result).not.toBeNull();
expect(result?.name).toBe('Costco Gasoline');
expect(result?.placeId).toBe('ChIJ_costco_match');
});
it('should match "BP" station name', async () => {
mockAxios.get.mockResolvedValue({
data: {
results: [
{
place_id: 'ChIJ_bp_match',
name: 'BP',
formatted_address: '500 Market St, San Francisco, CA',
geometry: { location: { lat: 37.79, lng: -122.40 } },
types: ['gas_station'],
},
],
status: 'OK',
},
});
const result = await client.searchStationByName('BP');
expect(result).not.toBeNull();
expect(result?.name).toBe('BP');
});
it('should return null when no match is found', async () => {
mockAxios.get.mockResolvedValue({
data: {
results: [],
status: 'ZERO_RESULTS',
},
});
const result = await client.searchStationByName('Unknown Station XYZ123');
expect(result).toBeNull();
});
it('should return null gracefully on API error', async () => {
mockAxios.get.mockRejectedValue(new Error('Network error'));
const result = await client.searchStationByName('Shell');
expect(result).toBeNull();
});
it('should return null on API denial', async () => {
mockAxios.get.mockResolvedValue({
data: {
results: [],
status: 'REQUEST_DENIED',
error_message: 'Invalid key',
},
});
const result = await client.searchStationByName('Shell');
expect(result).toBeNull();
});
it('should return null with logged warning on Places API timeout', async () => {
const timeoutError = new Error('timeout of 5000ms exceeded') as any;
timeoutError.code = 'ECONNABORTED';
mockAxios.get.mockRejectedValue(timeoutError);
const mockLogger = logger as jest.Mocked<typeof logger>;
const result = await client.searchStationByName('Shell');
expect(result).toBeNull();
expect(mockLogger.warn).toHaveBeenCalledWith(
'Station name search timed out',
expect.objectContaining({ merchantName: 'Shell', timeoutMs: 5000 })
);
expect(mockLogger.error).not.toHaveBeenCalled();
});
it('should include rating and photo reference when available', async () => {
mockAxios.get.mockResolvedValue({
data: {
results: [
{
place_id: 'ChIJ_rated',
name: 'Chevron',
formatted_address: '789 Oak Ave, Portland, OR',
geometry: { location: { lat: 45.52, lng: -122.68 } },
rating: 4.7,
photos: [{ photo_reference: 'chevron-photo' }],
opening_hours: { open_now: false },
types: ['gas_station'],
},
],
status: 'OK',
},
});
const result = await client.searchStationByName('Chevron');
expect(result?.rating).toBe(4.7);
expect(result?.photoReference).toBe('chevron-photo');
expect(result?.isOpen).toBe(false);
});
});
describe('StationsService.matchStationFromReceipt', () => {
let service: StationsService;
let mockRepository: jest.Mocked<StationsRepository>;
const mockSearchByName = googleMapsClient.searchStationByName as jest.Mock;
beforeEach(() => {
jest.clearAllMocks();
mockRepository = {
cacheStation: jest.fn().mockResolvedValue(undefined),
getCachedStation: jest.fn(),
saveStation: jest.fn(),
getUserSavedStations: jest.fn().mockResolvedValue([]),
updateSavedStation: jest.fn(),
deleteSavedStation: jest.fn(),
} as unknown as jest.Mocked<StationsRepository>;
service = new StationsService(mockRepository);
});
it('should return matched station for known merchant name', async () => {
const matchedStation = mockStations[0]!;
mockSearchByName.mockResolvedValue(matchedStation);
const result = await service.matchStationFromReceipt('Shell');
expect(result.matched).toBe(true);
expect(result.station).not.toBeNull();
expect(result.station?.name).toBe('Shell Gas Station - Downtown');
expect(mockRepository.cacheStation).toHaveBeenCalledWith(matchedStation);
});
it('should return no match for unknown merchant', async () => {
mockSearchByName.mockResolvedValue(null);
const result = await service.matchStationFromReceipt('Unknown Store');
expect(result.matched).toBe(false);
expect(result.station).toBeNull();
expect(mockRepository.cacheStation).not.toHaveBeenCalled();
});
it('should handle empty merchant name', async () => {
const result = await service.matchStationFromReceipt('');
expect(result.matched).toBe(false);
expect(result.station).toBeNull();
});
it('should handle whitespace-only merchant name', async () => {
const result = await service.matchStationFromReceipt(' ');
expect(result.matched).toBe(false);
expect(result.station).toBeNull();
});
it('should cache matched station for future saveStation calls', async () => {
const matchedStation = mockStations[1]!;
mockSearchByName.mockResolvedValue(matchedStation);
await service.matchStationFromReceipt('Chevron');
expect(mockRepository.cacheStation).toHaveBeenCalledWith(matchedStation);
});
});
});

View File

@@ -56,6 +56,10 @@ services:
OCR_FALLBACK_THRESHOLD: "0.6" OCR_FALLBACK_THRESHOLD: "0.6"
GOOGLE_VISION_KEY_PATH: /run/secrets/google-wif-config.json GOOGLE_VISION_KEY_PATH: /run/secrets/google-wif-config.json
VISION_MONTHLY_LIMIT: "1000" VISION_MONTHLY_LIMIT: "1000"
# Vertex AI / Gemini configuration (maintenance schedule extraction)
VERTEX_AI_PROJECT: motovaultpro
VERTEX_AI_LOCATION: us-central1
GEMINI_MODEL: gemini-2.5-flash
# PostgreSQL - Remove dev ports, production log level # PostgreSQL - Remove dev ports, production log level
mvp-postgres: mvp-postgres:

View File

@@ -76,6 +76,10 @@ services:
OCR_FALLBACK_THRESHOLD: "0.6" OCR_FALLBACK_THRESHOLD: "0.6"
GOOGLE_VISION_KEY_PATH: /run/secrets/google-wif-config.json GOOGLE_VISION_KEY_PATH: /run/secrets/google-wif-config.json
VISION_MONTHLY_LIMIT: "1000" VISION_MONTHLY_LIMIT: "1000"
# Vertex AI / Gemini configuration (maintenance schedule extraction)
VERTEX_AI_PROJECT: motovaultpro
VERTEX_AI_LOCATION: us-central1
GEMINI_MODEL: gemini-2.5-flash
volumes: volumes:
- ./secrets/app/auth0-ocr-client-id.txt:/run/secrets/auth0-ocr-client-id:ro - ./secrets/app/auth0-ocr-client-id.txt:/run/secrets/auth0-ocr-client-id:ro
- ./secrets/app/auth0-ocr-client-secret.txt:/run/secrets/auth0-ocr-client-secret:ro - ./secrets/app/auth0-ocr-client-secret.txt:/run/secrets/auth0-ocr-client-secret:ro

View File

@@ -203,6 +203,10 @@ services:
OCR_FALLBACK_THRESHOLD: "0.6" OCR_FALLBACK_THRESHOLD: "0.6"
GOOGLE_VISION_KEY_PATH: /run/secrets/google-wif-config.json GOOGLE_VISION_KEY_PATH: /run/secrets/google-wif-config.json
VISION_MONTHLY_LIMIT: "1000" VISION_MONTHLY_LIMIT: "1000"
# Vertex AI / Gemini configuration (maintenance schedule extraction)
VERTEX_AI_PROJECT: motovaultpro
VERTEX_AI_LOCATION: us-central1
GEMINI_MODEL: gemini-2.5-flash
volumes: volumes:
- /tmp/vin-debug:/tmp/vin-debug - /tmp/vin-debug:/tmp/vin-debug
- ./secrets/app/auth0-ocr-client-id.txt:/run/secrets/auth0-ocr-client-id:ro - ./secrets/app/auth0-ocr-client-id.txt:/run/secrets/auth0-ocr-client-id:ro

File diff suppressed because one or more lines are too long

View File

@@ -7,9 +7,9 @@
| `admin/` | Admin panel and catalog management | Admin UI, user management | | `admin/` | Admin panel and catalog management | Admin UI, user management |
| `auth/` | Authentication pages and components | Login, logout, auth flows | | `auth/` | Authentication pages and components | Login, logout, auth flows |
| `dashboard/` | Dashboard and fleet overview | Home page, summary widgets | | `dashboard/` | Dashboard and fleet overview | Home page, summary widgets |
| `documents/` | Document management UI | File upload, document viewer | | `documents/` | Document management UI with maintenance manual extraction | File upload, document viewer, manual OCR extraction |
| `fuel-logs/` | Fuel log tracking UI | Fuel entry forms, statistics | | `fuel-logs/` | Fuel log tracking UI with receipt OCR scanning | Fuel entry forms, receipt scanning, statistics |
| `maintenance/` | Maintenance record UI | Service tracking, reminders | | `maintenance/` | Maintenance record and schedule UI with OCR batch creation | Service tracking, extraction review, schedule management |
| `notifications/` | Notification display | Alert UI, notification center | | `notifications/` | Notification display | Alert UI, notification center |
| `onboarding/` | Onboarding wizard | First-time user experience | | `onboarding/` | Onboarding wizard | First-time user experience |
| `ownership-costs/` | Ownership cost tracking UI | Cost displays, expense forms | | `ownership-costs/` | Ownership cost tracking UI | Cost displays, expense forms |

View File

@@ -0,0 +1,49 @@
# documents/
Document management UI with maintenance manual extraction. Handles file uploads, document viewing, and PDF-based maintenance schedule extraction via Gemini.
## Subdirectories
| Directory | What | When to read |
| --------- | ---- | ------------ |
| `api/` | Document API endpoints | API integration |
| `components/` | Document forms, dialogs, preview, metadata display | UI changes |
| `hooks/` | Document CRUD, manual extraction, upload progress | Business logic |
| `mobile/` | Mobile-specific document layout | Mobile UI |
| `pages/` | DocumentsPage, DocumentDetailPage | Page layout |
| `types/` | TypeScript type definitions | Type changes |
| `utils/` | Utility functions (vehicle label formatting) | Helper logic |
## Key Files
| File | What | When to read |
| ---- | ---- | ------------ |
| `hooks/useManualExtraction.ts` | Manual extraction orchestration: submit PDF to /ocr/extract/manual, poll job status via /ocr/jobs/:jobId, return extraction results | Manual extraction flow, job polling |
| `components/DocumentForm.tsx` | Document metadata form with "Scan for Maintenance Schedule" checkbox (Pro tier) | Document upload, extraction trigger |
| `components/AddDocumentDialog.tsx` | Add document dialog integrating DocumentForm, upload progress, and manual extraction trigger | Document creation flow |
| `hooks/useDocuments.ts` | CRUD operations for documents | Document data management |
| `hooks/useUploadWithProgress.ts` | File upload with progress tracking | Upload UI |
| `components/DocumentPreview.tsx` | Document viewer/preview | Document display |
| `components/EditDocumentDialog.tsx` | Edit document metadata | Document editing |
| `types/documents.types.ts` | DocumentType, DocumentRecord, CreateDocumentRequest | Type definitions |
## Manual Extraction Flow
```
DocumentForm ("Scan for Maintenance Schedule" checkbox, Pro tier)
|
v
AddDocumentDialog -> useManualExtraction.submit(file, vehicleId)
|
v
POST /api/ocr/extract/manual (async job)
|
v
Poll GET /api/ocr/jobs/:jobId (progress: 10% -> 50% -> 95% -> 100%)
|
v
Job completed -> MaintenanceScheduleReviewScreen (in maintenance/ feature)
|
v
User selects/edits items -> Batch create maintenance schedules
```

View File

@@ -4,7 +4,7 @@ import { UpgradeRequiredDialog } from '../../../shared-minimal/components/Upgrad
import { LocalizationProvider } from '@mui/x-date-pickers/LocalizationProvider'; import { LocalizationProvider } from '@mui/x-date-pickers/LocalizationProvider';
import { AdapterDayjs } from '@mui/x-date-pickers/AdapterDayjs'; import { AdapterDayjs } from '@mui/x-date-pickers/AdapterDayjs';
import { DatePicker } from '@mui/x-date-pickers/DatePicker'; import { DatePicker } from '@mui/x-date-pickers/DatePicker';
import { Checkbox, FormControlLabel } from '@mui/material'; import { Checkbox, FormControlLabel, LinearProgress } from '@mui/material';
import LockOutlinedIcon from '@mui/icons-material/LockOutlined'; import LockOutlinedIcon from '@mui/icons-material/LockOutlined';
import dayjs from 'dayjs'; import dayjs from 'dayjs';
import { useCreateDocument, useUpdateDocument, useAddSharedVehicle, useRemoveVehicleFromDocument } from '../hooks/useDocuments'; import { useCreateDocument, useUpdateDocument, useAddSharedVehicle, useRemoveVehicleFromDocument } from '../hooks/useDocuments';
@@ -13,6 +13,8 @@ import type { DocumentType, DocumentRecord } from '../types/documents.types';
import { useVehicles } from '../../vehicles/hooks/useVehicles'; import { useVehicles } from '../../vehicles/hooks/useVehicles';
import type { Vehicle } from '../../vehicles/types/vehicles.types'; import type { Vehicle } from '../../vehicles/types/vehicles.types';
import { useTierAccess } from '../../../core/hooks/useTierAccess'; import { useTierAccess } from '../../../core/hooks/useTierAccess';
import { useManualExtraction } from '../hooks/useManualExtraction';
import { MaintenanceScheduleReviewScreen } from '../../maintenance/components/MaintenanceScheduleReviewScreen';
interface DocumentFormProps { interface DocumentFormProps {
mode?: 'create' | 'edit'; mode?: 'create' | 'edit';
@@ -95,6 +97,31 @@ export const DocumentForm: React.FC<DocumentFormProps> = ({
const removeSharedVehicle = useRemoveVehicleFromDocument(); const removeSharedVehicle = useRemoveVehicleFromDocument();
const { hasAccess } = useTierAccess(); const { hasAccess } = useTierAccess();
const canScanMaintenance = hasAccess('document.scanMaintenanceSchedule'); const canScanMaintenance = hasAccess('document.scanMaintenanceSchedule');
const extraction = useManualExtraction();
const [reviewDialogOpen, setReviewDialogOpen] = React.useState(false);
// Open review dialog when extraction completes
React.useEffect(() => {
if (extraction.status === 'completed' && extraction.result) {
setReviewDialogOpen(true);
}
}, [extraction.status, extraction.result]);
const isExtracting = extraction.status === 'pending' || extraction.status === 'processing';
const handleReviewClose = () => {
setReviewDialogOpen(false);
extraction.reset();
resetForm();
onSuccess?.();
};
const handleSchedulesCreated = (_count: number) => {
setReviewDialogOpen(false);
extraction.reset();
resetForm();
onSuccess?.();
};
const resetForm = () => { const resetForm = () => {
setTitle(''); setTitle('');
@@ -234,6 +261,18 @@ export const DocumentForm: React.FC<DocumentFormProps> = ({
setError(uploadErr?.message || 'Failed to upload file'); setError(uploadErr?.message || 'Failed to upload file');
return; return;
} }
// Trigger manual extraction if scan checkbox was checked
if (scanForMaintenance && documentType === 'manual' && file.type === 'application/pdf') {
try {
await extraction.submit(file, vehicleID);
// Don't call onSuccess yet - wait for extraction and review
return;
} catch (extractionErr: any) {
setError(extractionErr?.message || 'Failed to start maintenance extraction');
return;
}
}
} }
resetForm(); resetForm();
@@ -538,8 +577,8 @@ export const DocumentForm: React.FC<DocumentFormProps> = ({
<LockOutlinedIcon fontSize="small" /> <LockOutlinedIcon fontSize="small" />
</button> </button>
)} )}
{canScanMaintenance && ( {canScanMaintenance && scanForMaintenance && (
<span className="ml-1 text-xs text-slate-500 dark:text-titanio">(Coming soon)</span> <span className="ml-1 text-xs text-slate-500 dark:text-titanio">PDF will be scanned after upload</span>
)} )}
</div> </div>
)} )}
@@ -569,6 +608,39 @@ export const DocumentForm: React.FC<DocumentFormProps> = ({
<div className="text-sm text-slate-600 dark:text-titanio mt-1">Uploading... {uploadProgress}%</div> <div className="text-sm text-slate-600 dark:text-titanio mt-1">Uploading... {uploadProgress}%</div>
)} )}
</div> </div>
{isExtracting && (
<div className="md:col-span-2 mt-2">
<div className="flex items-center gap-3 p-3 rounded-lg border border-primary-200 bg-primary-50 dark:border-abudhabi/30 dark:bg-scuro">
<div className="flex-1">
<div className="text-sm font-medium text-slate-700 dark:text-avus mb-1">
Scanning manual for maintenance schedules...
</div>
<LinearProgress
variant={extraction.progress > 0 ? 'determinate' : 'indeterminate'}
value={extraction.progress}
sx={{ borderRadius: 1 }}
/>
<div className="text-xs text-slate-500 dark:text-titanio mt-1">
{extraction.progress >= 100 ? '100% - Complete' :
extraction.progress >= 95 ? `${extraction.progress}% - Mapping maintenance schedules...` :
extraction.progress >= 50 ? `${extraction.progress}% - Processing maintenance data...` :
extraction.progress >= 10 ? `${extraction.progress}% - Preparing document...` :
extraction.progress > 0 ? `${extraction.progress}% complete` :
'Starting extraction...'}
</div>
</div>
</div>
</div>
)}
{extraction.status === 'failed' && extraction.error && (
<div className="md:col-span-2 mt-2">
<div className="text-red-600 dark:text-red-400 text-sm p-3 rounded-lg border border-red-200 dark:border-red-800 bg-red-50 dark:bg-red-900/20">
Extraction failed: {extraction.error}
</div>
</div>
)}
</div> </div>
{error && ( {error && (
@@ -576,10 +648,10 @@ export const DocumentForm: React.FC<DocumentFormProps> = ({
)} )}
<div className="flex flex-col sm:flex-row gap-2 mt-4"> <div className="flex flex-col sm:flex-row gap-2 mt-4">
<Button type="submit" className="min-h-[44px]"> <Button type="submit" className="min-h-[44px]" disabled={isExtracting}>
{mode === 'edit' ? 'Save Changes' : 'Create Document'} {isExtracting ? 'Scanning...' : mode === 'edit' ? 'Save Changes' : 'Create Document'}
</Button> </Button>
<Button type="button" variant="secondary" onClick={onCancel} className="min-h-[44px]">Cancel</Button> <Button type="button" variant="secondary" onClick={onCancel} className="min-h-[44px]" disabled={isExtracting}>Cancel</Button>
</div> </div>
<UpgradeRequiredDialog <UpgradeRequiredDialog
@@ -587,6 +659,16 @@ export const DocumentForm: React.FC<DocumentFormProps> = ({
open={upgradeDialogOpen} open={upgradeDialogOpen}
onClose={() => setUpgradeDialogOpen(false)} onClose={() => setUpgradeDialogOpen(false)}
/> />
{extraction.result && (
<MaintenanceScheduleReviewScreen
open={reviewDialogOpen}
items={extraction.result.maintenanceSchedules}
vehicleId={vehicleID}
onClose={handleReviewClose}
onCreated={handleSchedulesCreated}
/>
)}
</form> </form>
</LocalizationProvider> </LocalizationProvider>
); );

View File

@@ -0,0 +1,134 @@
/**
* @ai-summary Hook for submitting and polling manual maintenance extraction jobs
* @ai-context Submits PDF to OCR endpoint, polls for status, returns extraction results
*/
import { useState, useCallback } from 'react';
import { useQuery, useMutation } from '@tanstack/react-query';
import { apiClient } from '../../../core/api/client';
// Types matching backend ManualJobResponse / ManualExtractionResult
export interface ManualVehicleInfo {
make: string | null;
model: string | null;
year: number | null;
}
export interface MaintenanceScheduleItem {
service: string;
intervalMiles: number | null;
intervalMonths: number | null;
details: string | null;
confidence: number;
subtypes: string[];
}
export interface ManualExtractionResult {
success: boolean;
vehicleInfo: ManualVehicleInfo;
maintenanceSchedules: MaintenanceScheduleItem[];
rawTables: unknown[];
processingTimeMs: number;
totalPages: number;
pagesProcessed: number;
error: string | null;
}
export type JobStatus = 'pending' | 'processing' | 'completed' | 'failed';
export interface ManualJobResponse {
jobId: string;
status: JobStatus;
progress?: number;
estimatedSeconds?: number;
result?: ManualExtractionResult;
error?: string;
}
async function submitManualExtraction(file: File, vehicleId: string): Promise<ManualJobResponse> {
const form = new FormData();
form.append('file', file);
form.append('vehicle_id', vehicleId);
const res = await apiClient.post<ManualJobResponse>('/ocr/extract/manual', form, {
headers: { 'Content-Type': 'multipart/form-data' },
timeout: 120000,
});
return res.data;
}
async function getJobStatus(jobId: string): Promise<ManualJobResponse> {
const res = await apiClient.get<ManualJobResponse>(`/ocr/jobs/${jobId}`);
return res.data;
}
export function useManualExtraction() {
const [jobId, setJobId] = useState<string | null>(null);
const submitMutation = useMutation({
mutationFn: ({ file, vehicleId }: { file: File; vehicleId: string }) =>
submitManualExtraction(file, vehicleId),
onSuccess: (data) => {
setJobId(data.jobId);
},
});
const pollQuery = useQuery<ManualJobResponse>({
queryKey: ['manualExtractionJob', jobId],
queryFn: () => getJobStatus(jobId!),
enabled: !!jobId,
refetchInterval: (query) => {
const data = query.state.data;
if (data?.status === 'completed' || data?.status === 'failed') {
return false;
}
return 3000;
},
refetchIntervalInBackground: false,
retry: 2,
});
const submit = useCallback(
(file: File, vehicleId: string) => submitMutation.mutateAsync({ file, vehicleId }),
[submitMutation]
);
const reset = useCallback(() => {
setJobId(null);
submitMutation.reset();
}, [submitMutation]);
const jobData = pollQuery.data;
const hasPollError = !!pollQuery.error;
const status: JobStatus | 'idle' = !jobId
? 'idle'
: hasPollError
? 'failed'
: jobData?.status ?? 'pending';
const progress = jobData?.progress ?? 0;
const result = jobData?.result ?? null;
let error: string | null = null;
if (jobData?.error) {
error = jobData.error;
} else if (pollQuery.error) {
const err = pollQuery.error as any;
if (err.response?.status === 410) {
error = 'Job expired. Please resubmit the document.';
} else {
error = String((err as Error).message || err);
}
} else if (submitMutation.error) {
error = String((submitMutation.error as Error).message || submitMutation.error);
}
return {
submit,
isSubmitting: submitMutation.isPending,
jobId,
status,
progress,
result,
error,
reset,
};
}

View File

@@ -0,0 +1,48 @@
# fuel-logs/
Fuel log tracking UI with receipt OCR scanning. Captures fuel purchases, calculates statistics, and supports camera-based receipt scanning that auto-extracts fields and matches gas stations.
## Subdirectories
| Directory | What | When to read |
| --------- | ---- | ------------ |
| `api/` | Fuel log API endpoints | API integration |
| `components/` | Form components, receipt OCR UI, stats display | UI changes |
| `hooks/` | Data fetching, receipt OCR orchestration, user settings | Business logic |
| `pages/` | FuelLogsPage | Page layout |
| `types/` | TypeScript type definitions | Type changes |
## Key Files
| File | What | When to read |
| ---- | ---- | ------------ |
| `hooks/useReceiptOcr.ts` | Receipt OCR orchestration: camera capture, OCR extraction via /ocr/extract/receipt, station matching via /stations/match, field mapping | Receipt scanning flow, OCR integration |
| `components/ReceiptOcrReviewModal.tsx` | Modal for reviewing OCR-extracted receipt fields with confidence indicators, inline editing, station match display | Receipt review UI, field editing |
| `components/ReceiptCameraButton.tsx` | Button to trigger receipt camera capture (tier-gated) | Receipt capture entry point |
| `components/FuelLogForm.tsx` | Main fuel log form with OCR integration (setValue from accepted receipt) | Form fields, OCR field mapping |
| `components/ReceiptPreview.tsx` | Receipt image preview | Receipt display |
| `components/StationPicker.tsx` | Gas station selection with search | Station selection UI |
| `components/FuelLogsList.tsx` | Fuel log list display | Log listing |
| `components/FuelStatsCard.tsx` | Fuel statistics summary | Statistics display |
| `hooks/useFuelLogs.tsx` | CRUD operations for fuel logs | Data management |
| `types/fuel-logs.types.ts` | FuelLogResponse, CreateFuelLogRequest, LocationData, UnitSystem | Type definitions |
## Receipt OCR Flow
```
ReceiptCameraButton (tier check)
|
v
useReceiptOcr.startCapture() -> CameraCapture (shared component)
|
v
useReceiptOcr.processImage() -> POST /api/ocr/extract/receipt
|
v
ReceiptOcrReviewModal (display extracted fields, confidence indicators)
|
+-- POST /api/stations/match (merchantName -> station match)
|
v
useReceiptOcr.acceptResult() -> FuelLogForm.setValue() (pre-fill form)
```

View File

@@ -19,6 +19,8 @@ import { useFuelLogs } from '../hooks/useFuelLogs';
import { useUserSettings } from '../hooks/useUserSettings'; import { useUserSettings } from '../hooks/useUserSettings';
import { useReceiptOcr } from '../hooks/useReceiptOcr'; import { useReceiptOcr } from '../hooks/useReceiptOcr';
import { useGeolocation } from '../../stations/hooks/useGeolocation'; import { useGeolocation } from '../../stations/hooks/useGeolocation';
import { useTierAccess } from '../../../core/hooks/useTierAccess';
import { UpgradeRequiredDialog } from '../../../shared-minimal/components/UpgradeRequiredDialog';
import { CameraCapture } from '../../../shared/components/CameraCapture'; import { CameraCapture } from '../../../shared/components/CameraCapture';
import { CreateFuelLogRequest, FuelType } from '../types/fuel-logs.types'; import { CreateFuelLogRequest, FuelType } from '../types/fuel-logs.types';
@@ -48,6 +50,11 @@ const FuelLogFormComponent: React.FC<{ onSuccess?: () => void; initial?: Partial
// Get user location for nearby station search // Get user location for nearby station search
const { coordinates: userLocation } = useGeolocation(); const { coordinates: userLocation } = useGeolocation();
// Tier access check for receipt scan feature
const { hasAccess } = useTierAccess();
const hasReceiptScanAccess = hasAccess('fuelLog.receiptScan');
const [showUpgradeDialog, setShowUpgradeDialog] = useState(false);
// Receipt OCR integration // Receipt OCR integration
const { const {
isCapturing, isCapturing,
@@ -61,6 +68,7 @@ const FuelLogFormComponent: React.FC<{ onSuccess?: () => void; initial?: Partial
acceptResult, acceptResult,
reset: resetOcr, reset: resetOcr,
updateField, updateField,
clearMatchedStation,
} = useReceiptOcr(); } = useReceiptOcr();
const { control, handleSubmit, watch, setValue, reset, formState: { errors, isValid } } = useForm<CreateFuelLogRequest>({ const { control, handleSubmit, watch, setValue, reset, formState: { errors, isValid } } = useForm<CreateFuelLogRequest>({
@@ -152,13 +160,13 @@ const FuelLogFormComponent: React.FC<{ onSuccess?: () => void; initial?: Partial
if (mappedFields.fuelGrade) { if (mappedFields.fuelGrade) {
setValue('fuelGrade', mappedFields.fuelGrade); setValue('fuelGrade', mappedFields.fuelGrade);
} }
if (mappedFields.locationData?.stationName) { if (mappedFields.locationData) {
// Set station name in locationData if no station is already selected // Set location data from OCR + station matching if no station is already selected
const currentLocation = watch('locationData'); const currentLocation = watch('locationData');
if (!currentLocation?.stationName && !currentLocation?.googlePlaceId) { if (!currentLocation?.stationName && !currentLocation?.googlePlaceId) {
setValue('locationData', { setValue('locationData', {
...currentLocation, ...currentLocation,
stationName: mappedFields.locationData.stationName, ...mappedFields.locationData,
}); });
} }
} }
@@ -217,9 +225,16 @@ const FuelLogFormComponent: React.FC<{ onSuccess?: () => void; initial?: Partial
}} }}
> >
<ReceiptCameraButton <ReceiptCameraButton
onClick={startCapture} onClick={() => {
if (!hasReceiptScanAccess) {
setShowUpgradeDialog(true);
return;
}
startCapture();
}}
disabled={isProcessing || isLoading} disabled={isProcessing || isLoading}
variant="button" variant="button"
locked={!hasReceiptScanAccess}
/> />
</Box> </Box>
@@ -429,13 +444,22 @@ const FuelLogFormComponent: React.FC<{ onSuccess?: () => void; initial?: Partial
open={!!ocrResult} open={!!ocrResult}
extractedFields={ocrResult.extractedFields} extractedFields={ocrResult.extractedFields}
receiptImageUrl={receiptImageUrl} receiptImageUrl={receiptImageUrl}
matchedStation={ocrResult.matchedStation}
onAccept={handleAcceptOcrResult} onAccept={handleAcceptOcrResult}
onRetake={handleRetakePhoto} onRetake={handleRetakePhoto}
onCancel={resetOcr} onCancel={resetOcr}
onFieldEdit={updateField} onFieldEdit={updateField}
onClearMatchedStation={clearMatchedStation}
/> />
)} )}
{/* Upgrade Required Dialog for Receipt Scan */}
<UpgradeRequiredDialog
featureKey="fuelLog.receiptScan"
open={showUpgradeDialog}
onClose={() => setShowUpgradeDialog(false)}
/>
{/* OCR Error Display */} {/* OCR Error Display */}
{ocrError && ( {ocrError && (
<Dialog open={!!ocrError} onClose={resetOcr} maxWidth="xs"> <Dialog open={!!ocrError} onClose={resetOcr} maxWidth="xs">

View File

@@ -7,6 +7,7 @@ import React from 'react';
import { Button, IconButton, Tooltip, useTheme, useMediaQuery } from '@mui/material'; import { Button, IconButton, Tooltip, useTheme, useMediaQuery } from '@mui/material';
import CameraAltIcon from '@mui/icons-material/CameraAlt'; import CameraAltIcon from '@mui/icons-material/CameraAlt';
import ReceiptIcon from '@mui/icons-material/Receipt'; import ReceiptIcon from '@mui/icons-material/Receipt';
import LockOutlinedIcon from '@mui/icons-material/LockOutlined';
export interface ReceiptCameraButtonProps { export interface ReceiptCameraButtonProps {
/** Called when user clicks to start capture */ /** Called when user clicks to start capture */
@@ -17,6 +18,8 @@ export interface ReceiptCameraButtonProps {
variant?: 'icon' | 'button' | 'auto'; variant?: 'icon' | 'button' | 'auto';
/** Size of the button */ /** Size of the button */
size?: 'small' | 'medium' | 'large'; size?: 'small' | 'medium' | 'large';
/** Whether the feature is locked behind a tier gate */
locked?: boolean;
} }
export const ReceiptCameraButton: React.FC<ReceiptCameraButtonProps> = ({ export const ReceiptCameraButton: React.FC<ReceiptCameraButtonProps> = ({
@@ -24,6 +27,7 @@ export const ReceiptCameraButton: React.FC<ReceiptCameraButtonProps> = ({
disabled = false, disabled = false,
variant = 'auto', variant = 'auto',
size = 'medium', size = 'medium',
locked = false,
}) => { }) => {
const theme = useTheme(); const theme = useTheme();
const isMobile = useMediaQuery(theme.breakpoints.down('sm')); const isMobile = useMediaQuery(theme.breakpoints.down('sm'));
@@ -31,28 +35,32 @@ export const ReceiptCameraButton: React.FC<ReceiptCameraButtonProps> = ({
// Determine display variant // Determine display variant
const displayVariant = variant === 'auto' ? (isMobile ? 'icon' : 'button') : variant; const displayVariant = variant === 'auto' ? (isMobile ? 'icon' : 'button') : variant;
const tooltipTitle = locked ? 'Upgrade to Pro to scan receipts' : 'Scan Receipt';
if (displayVariant === 'icon') { if (displayVariant === 'icon') {
return ( return (
<Tooltip title="Scan Receipt"> <Tooltip title={tooltipTitle}>
<span> <span>
<IconButton <IconButton
onClick={onClick} onClick={onClick}
disabled={disabled} disabled={disabled}
color="primary" color="primary"
size={size} size={size}
aria-label="Scan receipt with camera" aria-label={locked ? 'Scan receipt (Pro feature)' : 'Scan receipt with camera'}
sx={{ sx={{
backgroundColor: 'primary.light', minWidth: 44,
color: 'primary.contrastText', minHeight: 44,
backgroundColor: locked ? 'action.disabledBackground' : 'primary.light',
color: locked ? 'text.secondary' : 'primary.contrastText',
'&:hover': { '&:hover': {
backgroundColor: 'primary.main', backgroundColor: locked ? 'action.hover' : 'primary.main',
}, },
'&.Mui-disabled': { '&.Mui-disabled': {
backgroundColor: 'action.disabledBackground', backgroundColor: 'action.disabledBackground',
}, },
}} }}
> >
<CameraAltIcon /> {locked ? <LockOutlinedIcon /> : <CameraAltIcon />}
</IconButton> </IconButton>
</span> </span>
</Tooltip> </Tooltip>
@@ -60,23 +68,28 @@ export const ReceiptCameraButton: React.FC<ReceiptCameraButtonProps> = ({
} }
return ( return (
<Tooltip title={locked ? tooltipTitle : ''}>
<span>
<Button <Button
onClick={onClick} onClick={onClick}
disabled={disabled} disabled={disabled}
variant="outlined" variant="outlined"
color="primary" color={locked ? 'inherit' : 'primary'}
size={size} size={size}
startIcon={<ReceiptIcon />} startIcon={locked ? <LockOutlinedIcon /> : <ReceiptIcon />}
endIcon={<CameraAltIcon />} endIcon={locked ? undefined : <CameraAltIcon />}
sx={{ sx={{
minHeight: 44,
borderStyle: 'dashed', borderStyle: 'dashed',
'&:hover': { '&:hover': {
borderStyle: 'solid', borderStyle: 'solid',
}, },
}} }}
> >
Scan Receipt {locked ? 'Scan Receipt (Pro)' : 'Scan Receipt'}
</Button> </Button>
</span>
</Tooltip>
); );
}; };

View File

@@ -24,9 +24,11 @@ import EditIcon from '@mui/icons-material/Edit';
import CheckIcon from '@mui/icons-material/Check'; import CheckIcon from '@mui/icons-material/Check';
import CloseIcon from '@mui/icons-material/Close'; import CloseIcon from '@mui/icons-material/Close';
import CameraAltIcon from '@mui/icons-material/CameraAlt'; import CameraAltIcon from '@mui/icons-material/CameraAlt';
import PlaceIcon from '@mui/icons-material/Place';
import { import {
ExtractedReceiptFields, ExtractedReceiptFields,
ExtractedReceiptField, ExtractedReceiptField,
MatchedStation,
LOW_CONFIDENCE_THRESHOLD, LOW_CONFIDENCE_THRESHOLD,
} from '../hooks/useReceiptOcr'; } from '../hooks/useReceiptOcr';
import { ReceiptPreview } from './ReceiptPreview'; import { ReceiptPreview } from './ReceiptPreview';
@@ -38,6 +40,8 @@ export interface ReceiptOcrReviewModalProps {
extractedFields: ExtractedReceiptFields; extractedFields: ExtractedReceiptFields;
/** Receipt image URL for preview */ /** Receipt image URL for preview */
receiptImageUrl: string | null; receiptImageUrl: string | null;
/** Matched station from merchant name (if any) */
matchedStation?: MatchedStation | null;
/** Called when user accepts the fields */ /** Called when user accepts the fields */
onAccept: () => void; onAccept: () => void;
/** Called when user wants to retake the photo */ /** Called when user wants to retake the photo */
@@ -46,6 +50,8 @@ export interface ReceiptOcrReviewModalProps {
onCancel: () => void; onCancel: () => void;
/** Called when user edits a field */ /** Called when user edits a field */
onFieldEdit: (fieldName: keyof ExtractedReceiptFields, value: string | number | null) => void; onFieldEdit: (fieldName: keyof ExtractedReceiptFields, value: string | number | null) => void;
/** Called when user clears the matched station */
onClearMatchedStation?: () => void;
} }
/** Confidence indicator component */ /** Confidence indicator component */
@@ -209,10 +215,12 @@ export const ReceiptOcrReviewModal: React.FC<ReceiptOcrReviewModalProps> = ({
open, open,
extractedFields, extractedFields,
receiptImageUrl, receiptImageUrl,
matchedStation,
onAccept, onAccept,
onRetake, onRetake,
onCancel, onCancel,
onFieldEdit, onFieldEdit,
onClearMatchedStation,
}) => { }) => {
const theme = useTheme(); const theme = useTheme();
const isMobile = useMediaQuery(theme.breakpoints.down('sm')); const isMobile = useMediaQuery(theme.breakpoints.down('sm'));
@@ -354,6 +362,40 @@ export const ReceiptOcrReviewModal: React.FC<ReceiptOcrReviewModalProps> = ({
onEdit={(value) => onFieldEdit('merchantName', value)} onEdit={(value) => onFieldEdit('merchantName', value)}
type="text" type="text"
/> />
{matchedStation && (
<Box
sx={{
display: 'flex',
alignItems: 'center',
py: 1,
px: 1,
ml: '100px',
gap: 1,
backgroundColor: 'success.light',
borderRadius: 1,
mb: 0.5,
}}
>
<PlaceIcon fontSize="small" color="success" />
<Box sx={{ flex: 1, minWidth: 0 }}>
<Typography variant="body2" fontWeight={500} noWrap>
{matchedStation.name}
</Typography>
<Typography variant="caption" color="text.secondary" noWrap>
{matchedStation.address}
</Typography>
</Box>
{onClearMatchedStation && (
<IconButton
size="small"
onClick={onClearMatchedStation}
aria-label="Clear matched station"
>
<CloseIcon fontSize="small" />
</IconButton>
)}
</Box>
)}
</Collapse> </Collapse>
{isMobile && ( {isMobile && (
@@ -388,14 +430,14 @@ export const ReceiptOcrReviewModal: React.FC<ReceiptOcrReviewModalProps> = ({
<Button <Button
onClick={onRetake} onClick={onRetake}
startIcon={<CameraAltIcon />} startIcon={<CameraAltIcon />}
sx={{ order: isMobile ? 2 : 1 }} sx={{ order: isMobile ? 2 : 1, minHeight: 44 }}
> >
Retake Photo Retake Photo
</Button> </Button>
<Box sx={{ flex: 1, display: isMobile ? 'none' : 'block' }} /> <Box sx={{ flex: 1, display: isMobile ? 'none' : 'block' }} />
<Button <Button
onClick={onCancel} onClick={onCancel}
sx={{ order: isMobile ? 3 : 2 }} sx={{ order: isMobile ? 3 : 2, minHeight: 44 }}
> >
Cancel Cancel
</Button> </Button>
@@ -403,7 +445,7 @@ export const ReceiptOcrReviewModal: React.FC<ReceiptOcrReviewModalProps> = ({
variant="contained" variant="contained"
onClick={onAccept} onClick={onAccept}
startIcon={<CheckIcon />} startIcon={<CheckIcon />}
sx={{ order: isMobile ? 1 : 3, width: isMobile ? '100%' : 'auto' }} sx={{ order: isMobile ? 1 : 3, width: isMobile ? '100%' : 'auto', minHeight: 44 }}
> >
Accept Accept
</Button> </Button>

View File

@@ -31,15 +31,25 @@ export interface MappedFuelLogFields {
fuelGrade?: FuelGrade; fuelGrade?: FuelGrade;
locationData?: { locationData?: {
stationName?: string; stationName?: string;
googlePlaceId?: string;
address?: string;
}; };
} }
/** Matched station from receipt merchant name */
export interface MatchedStation {
placeId: string;
name: string;
address: string;
}
/** Receipt OCR result */ /** Receipt OCR result */
export interface ReceiptOcrResult { export interface ReceiptOcrResult {
extractedFields: ExtractedReceiptFields; extractedFields: ExtractedReceiptFields;
mappedFields: MappedFuelLogFields; mappedFields: MappedFuelLogFields;
rawText: string; rawText: string;
overallConfidence: number; overallConfidence: number;
matchedStation: MatchedStation | null;
} }
/** Hook state */ /** Hook state */
@@ -59,6 +69,7 @@ export interface UseReceiptOcrReturn extends UseReceiptOcrState {
acceptResult: () => MappedFuelLogFields | null; acceptResult: () => MappedFuelLogFields | null;
reset: () => void; reset: () => void;
updateField: (fieldName: keyof ExtractedReceiptFields, value: string | number | null) => void; updateField: (fieldName: keyof ExtractedReceiptFields, value: string | number | null) => void;
clearMatchedStation: () => void;
} }
/** Confidence threshold for highlighting low-confidence fields */ /** Confidence threshold for highlighting low-confidence fields */
@@ -137,7 +148,7 @@ async function extractReceiptFromImage(file: File): Promise<{
const formData = new FormData(); const formData = new FormData();
formData.append('file', file); formData.append('file', file);
const response = await apiClient.post('/ocr/extract', formData, { const response = await apiClient.post('/ocr/extract/receipt', formData, {
headers: { 'Content-Type': 'multipart/form-data' }, headers: { 'Content-Type': 'multipart/form-data' },
timeout: 30000, // 30 seconds for OCR processing timeout: 30000, // 30 seconds for OCR processing
}); });
@@ -185,16 +196,48 @@ async function extractReceiptFromImage(file: File): Promise<{
}; };
} }
/** Match station from merchant name via backend */
async function matchStationFromMerchant(merchantName: string): Promise<MatchedStation | null> {
try {
const response = await apiClient.post('/stations/match', { merchantName });
const data = response.data;
if (data.matched && data.station) {
return {
placeId: data.station.placeId,
name: data.station.name,
address: data.station.address,
};
}
return null;
} catch (err) {
console.error('Station matching failed (non-blocking):', err);
return null;
}
}
/** Map extracted fields to fuel log form fields */ /** Map extracted fields to fuel log form fields */
function mapFieldsToFuelLog(fields: ExtractedReceiptFields): MappedFuelLogFields { function mapFieldsToFuelLog(
fields: ExtractedReceiptFields,
matchedStation?: MatchedStation | null
): MappedFuelLogFields {
// If station was matched, use matched data; otherwise fall back to merchant name
const locationData = matchedStation
? {
stationName: matchedStation.name,
googlePlaceId: matchedStation.placeId,
address: matchedStation.address,
}
: fields.merchantName.value
? { stationName: String(fields.merchantName.value) }
: undefined;
return { return {
dateTime: parseTransactionDate(fields.transactionDate.value), dateTime: parseTransactionDate(fields.transactionDate.value),
fuelUnits: parseNumber(fields.fuelQuantity.value), fuelUnits: parseNumber(fields.fuelQuantity.value),
costPerUnit: parseNumber(fields.pricePerUnit.value), costPerUnit: parseNumber(fields.pricePerUnit.value),
fuelGrade: mapFuelGrade(fields.fuelGrade.value), fuelGrade: mapFuelGrade(fields.fuelGrade.value),
locationData: fields.merchantName.value locationData,
? { stationName: String(fields.merchantName.value) }
: undefined,
}; };
} }
@@ -232,13 +275,22 @@ export function useReceiptOcr(): UseReceiptOcrReturn {
try { try {
const { extractedFields, rawText, confidence } = await extractReceiptFromImage(imageToProcess); const { extractedFields, rawText, confidence } = await extractReceiptFromImage(imageToProcess);
const mappedFields = mapFieldsToFuelLog(extractedFields);
// Attempt station matching from merchant name (non-blocking)
let matchedStation: MatchedStation | null = null;
const merchantName = extractedFields.merchantName.value;
if (merchantName && String(merchantName).trim()) {
matchedStation = await matchStationFromMerchant(String(merchantName));
}
const mappedFields = mapFieldsToFuelLog(extractedFields, matchedStation);
setResult({ setResult({
extractedFields, extractedFields,
mappedFields, mappedFields,
rawText, rawText,
overallConfidence: confidence, overallConfidence: confidence,
matchedStation,
}); });
} catch (err: any) { } catch (err: any) {
console.error('Receipt OCR processing failed:', err); console.error('Receipt OCR processing failed:', err);
@@ -268,10 +320,14 @@ export function useReceiptOcr(): UseReceiptOcrReturn {
}, },
}; };
// Clear matched station if merchant name was edited (user override)
const station = fieldName === 'merchantName' ? null : prev.matchedStation;
return { return {
...prev, ...prev,
extractedFields: updatedFields, extractedFields: updatedFields,
mappedFields: mapFieldsToFuelLog(updatedFields), mappedFields: mapFieldsToFuelLog(updatedFields, station),
matchedStation: station,
}; };
}); });
}, []); }, []);
@@ -291,6 +347,17 @@ export function useReceiptOcr(): UseReceiptOcrReturn {
return mappedFields; return mappedFields;
}, [result, receiptImageUrl]); }, [result, receiptImageUrl]);
const clearMatchedStation = useCallback(() => {
setResult((prev) => {
if (!prev) return null;
return {
...prev,
matchedStation: null,
mappedFields: mapFieldsToFuelLog(prev.extractedFields, null),
};
});
}, []);
const reset = useCallback(() => { const reset = useCallback(() => {
setIsCapturing(false); setIsCapturing(false);
setIsProcessing(false); setIsProcessing(false);
@@ -314,5 +381,6 @@ export function useReceiptOcr(): UseReceiptOcrReturn {
acceptResult, acceptResult,
reset, reset,
updateField, updateField,
clearMatchedStation,
}; };
} }

View File

@@ -0,0 +1,51 @@
# maintenance/
Maintenance record and schedule management UI. Supports manual schedule creation and batch creation from OCR-extracted maintenance data. Three categories: routine maintenance, repair, performance upgrade.
## Subdirectories
| Directory | What | When to read |
| --------- | ---- | ------------ |
| `api/` | Maintenance API endpoints | API integration |
| `components/` | Forms, lists, review screen, subtype selection | UI changes |
| `hooks/` | Data fetching, batch schedule creation from extraction | Business logic |
| `mobile/` | Mobile-specific maintenance layout | Mobile UI |
| `pages/` | MaintenancePage (tabs: records, schedules) | Page layout |
| `types/` | TypeScript type definitions (categories, subtypes, schedules) | Type changes |
## Key Files
| File | What | When to read |
| ---- | ---- | ------------ |
| `hooks/useCreateSchedulesFromExtraction.ts` | Batch-creates maintenance schedules from OCR extraction results, maps MaintenanceScheduleItem to CreateScheduleRequest | OCR-to-schedule creation flow |
| `components/MaintenanceScheduleReviewScreen.tsx` | Dialog for reviewing OCR-extracted maintenance items: checkboxes for selection, confidence indicators, inline editing, batch create action | Extraction review UI, item editing |
| `components/MaintenanceScheduleForm.tsx` | Form for manual schedule creation | Schedule creation UI |
| `components/MaintenanceRecordForm.tsx` | Form for manual record creation | Record creation UI |
| `components/MaintenanceSchedulesList.tsx` | Schedule list with edit/delete | Schedule display |
| `components/MaintenanceRecordsList.tsx` | Record list display | Record display |
| `components/SubtypeCheckboxGroup.tsx` | Multi-select checkbox group for maintenance subtypes (27 routine, repair, performance) | Subtype selection UI |
| `hooks/useMaintenanceRecords.ts` | CRUD operations for maintenance records and schedules | Data management |
| `types/maintenance.types.ts` | MaintenanceCategory, ScheduleType, ROUTINE_MAINTENANCE_SUBTYPES, MaintenanceSchedule | Type definitions, subtype constants |
| `components/MaintenanceScheduleReviewScreen.test.tsx` | Tests for extraction review screen | Test changes |
## Extraction Review Flow
```
ManualExtractionResult (from documents/ feature useManualExtraction)
|
v
MaintenanceScheduleReviewScreen
- Displays extracted items with confidence scores
- Checkboxes for select/deselect
- Inline editing of service name, intervals, details
- Touch targets >= 44px for mobile
|
v
useCreateSchedulesFromExtraction.mutate(selectedItems)
|
v
POST /api/maintenance/schedules (batch create)
|
v
Query invalidation -> MaintenanceSchedulesList refreshes
```

View File

@@ -0,0 +1,432 @@
/**
* @ai-summary Unit tests for MaintenanceScheduleReviewScreen component
* @ai-context Tests rendering, selection, editing, empty state, and error handling
*/
import { render, screen, fireEvent } from '@testing-library/react';
import { MaintenanceScheduleReviewScreen } from './MaintenanceScheduleReviewScreen';
import type { MaintenanceScheduleItem } from '../../documents/hooks/useManualExtraction';
// Mock matchMedia for responsive tests
function mockMatchMedia(matches: boolean) {
Object.defineProperty(window, 'matchMedia', {
writable: true,
value: jest.fn().mockImplementation((query: string) => ({
matches,
media: query,
onchange: null,
addListener: jest.fn(),
removeListener: jest.fn(),
addEventListener: jest.fn(),
removeEventListener: jest.fn(),
dispatchEvent: jest.fn(),
})),
});
}
// Mock the create hook
const mockMutateAsync = jest.fn();
jest.mock('../hooks/useCreateSchedulesFromExtraction', () => ({
useCreateSchedulesFromExtraction: () => ({
mutateAsync: mockMutateAsync,
isPending: false,
}),
}));
// Track SubtypeCheckboxGroup onChange callbacks per instance
const subtypeOnChangeCallbacks: Array<(subtypes: string[]) => void> = [];
jest.mock('./SubtypeCheckboxGroup', () => ({
SubtypeCheckboxGroup: ({ selected, onChange }: { category: string; selected: string[]; onChange: (subtypes: string[]) => void }) => {
subtypeOnChangeCallbacks.push(onChange);
return (
<div data-testid="subtype-checkbox-group">
{selected.map((s: string) => (
<span key={s} data-testid="subtype-chip">{s}</span>
))}
</div>
);
},
}));
const sampleItems: MaintenanceScheduleItem[] = [
{
service: 'Engine Oil Change',
intervalMiles: 5000,
intervalMonths: 6,
details: 'Use 0W-20 full synthetic oil',
confidence: 0.95,
subtypes: ['Engine Oil'],
},
{
service: 'Tire Rotation',
intervalMiles: 5000,
intervalMonths: 6,
details: null,
confidence: 0.88,
subtypes: ['Tires'],
},
{
service: 'Cabin Air Filter Replacement',
intervalMiles: 15000,
intervalMonths: 12,
details: null,
confidence: 0.72,
subtypes: ['Cabin Air Filter / Purifier'],
},
];
const sampleItemsWithEmptySubtypes: MaintenanceScheduleItem[] = [
...sampleItems,
{
service: 'Brake Fluid',
intervalMiles: 30000,
intervalMonths: 24,
details: null,
confidence: 0.65,
subtypes: [],
},
];
const sampleItemsWithMissingIntervals: MaintenanceScheduleItem[] = [
...sampleItems,
{
service: 'Coolant Flush',
intervalMiles: null,
intervalMonths: null,
details: null,
confidence: 0.55,
subtypes: ['Coolant'],
},
];
describe('MaintenanceScheduleReviewScreen', () => {
const defaultProps = {
open: true,
items: sampleItems,
vehicleId: 'vehicle-123',
onClose: jest.fn(),
onCreated: jest.fn(),
};
beforeEach(() => {
jest.clearAllMocks();
mockMutateAsync.mockResolvedValue([]);
subtypeOnChangeCallbacks.length = 0;
});
describe('Rendering', () => {
it('should render extracted items with checkboxes', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
expect(screen.getByText('Extracted Maintenance Schedules')).toBeInTheDocument();
expect(screen.getByText('3 of 3 items selected')).toBeInTheDocument();
// All items should be visible
expect(screen.getByText('Engine Oil Change')).toBeInTheDocument();
expect(screen.getByText('Tire Rotation')).toBeInTheDocument();
expect(screen.getByText('Cabin Air Filter Replacement')).toBeInTheDocument();
// All checkboxes should be checked by default
const checkboxes = screen.getAllByRole('checkbox');
expect(checkboxes).toHaveLength(3);
checkboxes.forEach((cb) => {
expect(cb).toBeChecked();
});
});
it('should display interval information', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
expect(screen.getAllByText('5000 mi')).toHaveLength(2);
expect(screen.getAllByText('6 mo')).toHaveLength(2);
expect(screen.getByText('15000 mi')).toBeInTheDocument();
expect(screen.getByText('12 mo')).toBeInTheDocument();
});
it('should display details text when present', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
expect(screen.getByText('Use 0W-20 full synthetic oil')).toBeInTheDocument();
});
it('should display subtypes in SubtypeCheckboxGroup', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
const groups = screen.getAllByTestId('subtype-checkbox-group');
expect(groups).toHaveLength(3);
expect(screen.getByText('Engine Oil')).toBeInTheDocument();
expect(screen.getByText('Tires')).toBeInTheDocument();
expect(screen.getByText('Cabin Air Filter / Purifier')).toBeInTheDocument();
});
});
describe('Selection', () => {
it('should toggle item selection on checkbox click', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
const checkboxes = screen.getAllByRole('checkbox');
// Uncheck first item
fireEvent.click(checkboxes[0]);
expect(checkboxes[0]).not.toBeChecked();
expect(screen.getByText('2 of 3 items selected')).toBeInTheDocument();
// Re-check it
fireEvent.click(checkboxes[0]);
expect(checkboxes[0]).toBeChecked();
expect(screen.getByText('3 of 3 items selected')).toBeInTheDocument();
});
it('should deselect all items', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
fireEvent.click(screen.getByText('Deselect All'));
const checkboxes = screen.getAllByRole('checkbox');
checkboxes.forEach((cb) => {
expect(cb).not.toBeChecked();
});
expect(screen.getByText('0 of 3 items selected')).toBeInTheDocument();
});
it('should select all items after deselecting', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
// Deselect all first
fireEvent.click(screen.getByText('Deselect All'));
expect(screen.getByText('0 of 3 items selected')).toBeInTheDocument();
// Select all
fireEvent.click(screen.getByText('Select All'));
const checkboxes = screen.getAllByRole('checkbox');
checkboxes.forEach((cb) => {
expect(cb).toBeChecked();
});
expect(screen.getByText('3 of 3 items selected')).toBeInTheDocument();
});
it('should disable create button when no items selected', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
fireEvent.click(screen.getByText('Deselect All'));
const createButton = screen.getByRole('button', { name: /create/i });
expect(createButton).toBeDisabled();
});
});
describe('Empty state', () => {
it('should show no items found message for empty extraction', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} items={[]} />);
expect(screen.getByText('No maintenance items found')).toBeInTheDocument();
expect(screen.getByText(/did not contain any recognizable/)).toBeInTheDocument();
// Should show Close button instead of Create
expect(screen.getByText('Close')).toBeInTheDocument();
expect(screen.queryByText(/Create/)).not.toBeInTheDocument();
});
});
describe('Schedule creation', () => {
it('should create selected schedules on button click', async () => {
mockMutateAsync.mockResolvedValue([{ id: '1' }, { id: '2' }, { id: '3' }]);
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
fireEvent.click(screen.getByRole('button', { name: /create 3 schedules/i }));
expect(mockMutateAsync).toHaveBeenCalledWith({
vehicleId: 'vehicle-123',
items: expect.arrayContaining([
expect.objectContaining({ service: 'Engine Oil Change', selected: true }),
expect.objectContaining({ service: 'Tire Rotation', selected: true }),
expect.objectContaining({ service: 'Cabin Air Filter Replacement', selected: true }),
]),
});
});
it('should only create selected items', async () => {
mockMutateAsync.mockResolvedValue([{ id: '1' }]);
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
// Deselect last two items
const checkboxes = screen.getAllByRole('checkbox');
fireEvent.click(checkboxes[1]);
fireEvent.click(checkboxes[2]);
fireEvent.click(screen.getByRole('button', { name: /create 1 schedule$/i }));
expect(mockMutateAsync).toHaveBeenCalledWith({
vehicleId: 'vehicle-123',
items: expect.arrayContaining([
expect.objectContaining({ service: 'Engine Oil Change', selected: true }),
]),
});
// Should not include unselected items
const callArgs = mockMutateAsync.mock.calls[0][0];
expect(callArgs.items).toHaveLength(1);
});
it('should show error on creation failure', async () => {
mockMutateAsync.mockRejectedValue(new Error('Network error'));
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
fireEvent.click(screen.getByRole('button', { name: /create 3 schedules/i }));
// Wait for error to appear (async mutation)
await screen.findByText('Network error');
});
});
describe('Editing', () => {
it('should update item data via inline editing', async () => {
mockMutateAsync.mockResolvedValue([{ id: '1' }]);
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
// Click on the Months field of the third item (unique value: 12 mo)
const monthsField = screen.getByText('12 mo');
fireEvent.click(monthsField);
// Find the input that appeared and change its value
const monthsInput = screen.getByDisplayValue('12');
fireEvent.change(monthsInput, { target: { value: '24' } });
fireEvent.keyDown(monthsInput, { key: 'Enter' });
// Deselect items 1 and 2 to only create the edited item
const checkboxes = screen.getAllByRole('checkbox');
fireEvent.click(checkboxes[0]);
fireEvent.click(checkboxes[1]);
// Create the schedule and verify updated value is used
fireEvent.click(screen.getByRole('button', { name: /create 1 schedule$/i }));
expect(mockMutateAsync).toHaveBeenCalledWith({
vehicleId: 'vehicle-123',
items: [expect.objectContaining({
service: 'Cabin Air Filter Replacement',
intervalMonths: 24,
})],
});
});
});
describe('Subtype validation', () => {
it('should disable create button when selected item has empty subtypes', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} items={sampleItemsWithEmptySubtypes} />);
// All 4 items selected, but Brake Fluid has no subtypes
const createButton = screen.getByRole('button', { name: /create/i });
expect(createButton).toBeDisabled();
});
it('should enable create button after deselecting item with empty subtypes', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} items={sampleItemsWithEmptySubtypes} />);
// Deselect the 4th item (Brake Fluid with empty subtypes)
const checkboxes = screen.getAllByRole('checkbox');
fireEvent.click(checkboxes[3]);
const createButton = screen.getByRole('button', { name: /create 3 schedules/i });
expect(createButton).not.toBeDisabled();
});
it('should show warning alert for items missing subtypes', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} items={sampleItemsWithEmptySubtypes} />);
expect(screen.getByText(/missing subtypes/)).toBeInTheDocument();
});
it('should hide warning alert after deselecting items with empty subtypes', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} items={sampleItemsWithEmptySubtypes} />);
// Deselect the Brake Fluid item
const checkboxes = screen.getAllByRole('checkbox');
fireEvent.click(checkboxes[3]);
expect(screen.queryByText(/missing subtypes/)).not.toBeInTheDocument();
});
});
describe('Interval validation', () => {
it('should disable create button when selected item has no intervals', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} items={sampleItemsWithMissingIntervals} />);
const createButton = screen.getByRole('button', { name: /create/i });
expect(createButton).toBeDisabled();
});
it('should enable create button after deselecting item with missing intervals', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} items={sampleItemsWithMissingIntervals} />);
// Deselect the 4th item (Coolant Flush with null intervals)
const checkboxes = screen.getAllByRole('checkbox');
fireEvent.click(checkboxes[3]);
const createButton = screen.getByRole('button', { name: /create 3 schedules/i });
expect(createButton).not.toBeDisabled();
});
it('should show warning alert for items missing intervals', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} items={sampleItemsWithMissingIntervals} />);
expect(screen.getByText(/missing intervals/)).toBeInTheDocument();
});
it('should enable create button after editing interval on item', () => {
render(<MaintenanceScheduleReviewScreen {...defaultProps} items={sampleItemsWithMissingIntervals} />);
// The Coolant Flush item shows '-' for both intervals. Click the Miles '-' to edit.
// There are multiple '-' on screen, so find all and pick the right one.
const dashTexts = screen.getAllByText('-');
// Click the first dash (Miles field of the Coolant Flush item - last item's first dash)
fireEvent.click(dashTexts[dashTexts.length - 2]);
// Type a value and save
const input = screen.getByDisplayValue('');
fireEvent.change(input, { target: { value: '50000' } });
fireEvent.keyDown(input, { key: 'Enter' });
const createButton = screen.getByRole('button', { name: /create 4 schedules/i });
expect(createButton).not.toBeDisabled();
});
});
describe('Responsive layout', () => {
afterEach(() => {
// Reset matchMedia after each test
mockMatchMedia(false);
});
it('should render in fullscreen mode on mobile viewports', () => {
// Simulate mobile: breakpoints.down('sm') returns true
mockMatchMedia(true);
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
// On mobile, dialog renders with fullScreen prop - check that the MuiDialog-paperFullScreen
// class is applied. MUI renders the dialog in a portal, so query from document.
const paper = document.querySelector('.MuiDialog-paperFullScreen');
expect(paper).not.toBeNull();
});
it('should render as modal dialog on desktop viewports', () => {
// Simulate desktop: breakpoints.down('sm') returns false
mockMatchMedia(false);
render(<MaintenanceScheduleReviewScreen {...defaultProps} />);
// On desktop, dialog should NOT have fullScreen class
const fullScreenPaper = document.querySelector('.MuiDialog-paperFullScreen');
expect(fullScreenPaper).toBeNull();
// But the dialog should still render
expect(screen.getByText('Extracted Maintenance Schedules')).toBeInTheDocument();
});
});
});

View File

@@ -0,0 +1,442 @@
/**
* @ai-summary Review screen for extracted maintenance schedules from manual OCR
* @ai-context Dialog showing extracted items with checkboxes, inline editing, batch create
*/
import React, { useState, useCallback } from 'react';
import {
Dialog,
DialogTitle,
DialogContent,
DialogActions,
Button,
Box,
Typography,
TextField,
Checkbox,
IconButton,
Alert,
CircularProgress,
Tooltip,
useTheme,
useMediaQuery,
} from '@mui/material';
import EditIcon from '@mui/icons-material/Edit';
import CheckIcon from '@mui/icons-material/Check';
import CloseIcon from '@mui/icons-material/Close';
import SelectAllIcon from '@mui/icons-material/SelectAll';
import DeselectIcon from '@mui/icons-material/Deselect';
import WarningAmberIcon from '@mui/icons-material/WarningAmber';
import type { MaintenanceScheduleItem } from '../../documents/hooks/useManualExtraction';
import { useCreateSchedulesFromExtraction } from '../hooks/useCreateSchedulesFromExtraction';
import { SubtypeCheckboxGroup } from './SubtypeCheckboxGroup';
import { getSubtypesForCategory } from '../types/maintenance.types';
export interface MaintenanceScheduleReviewScreenProps {
open: boolean;
items: MaintenanceScheduleItem[];
vehicleId: string;
onClose: () => void;
onCreated: (count: number) => void;
}
interface EditableItem extends MaintenanceScheduleItem {
selected: boolean;
}
const ConfidenceIndicator: React.FC<{ confidence: number }> = ({ confidence }) => {
const filledDots = Math.round(confidence * 4);
const isLow = confidence < 0.6;
return (
<Box
sx={{ display: 'flex', gap: 0.25, ml: 1 }}
aria-label={`Confidence: ${Math.round(confidence * 100)}%`}
>
{[0, 1, 2, 3].map((i) => (
<Box
key={i}
sx={{
width: 6,
height: 6,
borderRadius: '50%',
backgroundColor: i < filledDots
? (isLow ? 'warning.main' : 'success.main')
: 'grey.300',
}}
/>
))}
</Box>
);
};
interface InlineFieldProps {
label: string;
value: string | number | null;
type?: 'text' | 'number';
onSave: (value: string | number | null) => void;
suffix?: string;
}
const InlineField: React.FC<InlineFieldProps> = ({ label, value, type = 'text', onSave, suffix }) => {
const [isEditing, setIsEditing] = useState(false);
const [editValue, setEditValue] = useState(value !== null ? String(value) : '');
const displayValue = value !== null
? (suffix ? `${value} ${suffix}` : String(value))
: '-';
const handleSave = () => {
let parsed: string | number | null = editValue || null;
if (type === 'number' && editValue) {
const num = parseFloat(editValue);
parsed = isNaN(num) ? null : num;
}
onSave(parsed);
setIsEditing(false);
};
const handleCancel = () => {
setEditValue(value !== null ? String(value) : '');
setIsEditing(false);
};
if (isEditing) {
return (
<Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
<Typography variant="caption" color="text.secondary" sx={{ minWidth: 50, flexShrink: 0 }}>
{label}:
</Typography>
<TextField
size="small"
value={editValue}
onChange={(e) => setEditValue(e.target.value)}
type={type === 'number' ? 'number' : 'text'}
inputProps={{ step: type === 'number' ? 1 : undefined }}
autoFocus
sx={{ flex: 1, '& .MuiInputBase-input': { py: 0.5, px: 1, fontSize: '0.875rem' } }}
onKeyDown={(e) => {
if (e.key === 'Enter') handleSave();
if (e.key === 'Escape') handleCancel();
}}
/>
<IconButton size="small" onClick={handleSave} color="primary" sx={{ minWidth: 44, minHeight: 44 }}>
<CheckIcon sx={{ fontSize: 16 }} />
</IconButton>
<IconButton size="small" onClick={handleCancel} sx={{ minWidth: 44, minHeight: 44 }}>
<CloseIcon sx={{ fontSize: 16 }} />
</IconButton>
</Box>
);
}
return (
<Box
sx={{
display: 'flex',
alignItems: 'center',
gap: 0.5,
cursor: 'pointer',
minHeight: 44,
'&:hover .edit-icon': { opacity: 1 },
}}
onClick={() => setIsEditing(true)}
role="button"
tabIndex={0}
aria-label={`Edit ${label}`}
onKeyDown={(e) => {
if (e.key === 'Enter' || e.key === ' ') {
e.preventDefault();
setIsEditing(true);
}
}}
>
<Typography variant="caption" color="text.secondary" sx={{ minWidth: 50, flexShrink: 0 }}>
{label}:
</Typography>
<Typography
variant="body2"
sx={{
fontWeight: value !== null ? 500 : 400,
color: value !== null ? 'text.primary' : 'text.disabled',
}}
>
{displayValue}
</Typography>
<EditIcon className="edit-icon" sx={{ fontSize: 14, opacity: 0, transition: 'opacity 0.2s', color: 'text.secondary' }} />
</Box>
);
};
export const MaintenanceScheduleReviewScreen: React.FC<MaintenanceScheduleReviewScreenProps> = ({
open,
items,
vehicleId,
onClose,
onCreated,
}) => {
const theme = useTheme();
const isMobile = useMediaQuery(theme.breakpoints.down('sm'));
const createMutation = useCreateSchedulesFromExtraction();
const validRoutineSubtypes = getSubtypesForCategory('routine_maintenance');
const [editableItems, setEditableItems] = useState<EditableItem[]>(() =>
items.map((item) => ({
...item,
subtypes: item.subtypes.filter((st) => validRoutineSubtypes.includes(st)),
selected: true,
}))
);
const [createError, setCreateError] = useState<string | null>(null);
const selectedCount = editableItems.filter((i) => i.selected).length;
const hasInvalidSubtypes = editableItems.some((i) => i.selected && i.subtypes.length === 0);
const hasInvalidIntervals = editableItems.some(
(i) => i.selected && i.intervalMiles === null && i.intervalMonths === null
);
const handleToggle = useCallback((index: number) => {
setEditableItems((prev) =>
prev.map((item, i) => (i === index ? { ...item, selected: !item.selected } : item))
);
}, []);
const handleSelectAll = useCallback(() => {
setEditableItems((prev) => prev.map((item) => ({ ...item, selected: true })));
}, []);
const handleDeselectAll = useCallback(() => {
setEditableItems((prev) => prev.map((item) => ({ ...item, selected: false })));
}, []);
const handleFieldUpdate = useCallback((index: number, field: keyof MaintenanceScheduleItem, value: string | number | null) => {
setEditableItems((prev) =>
prev.map((item, i) => (i === index ? { ...item, [field]: value } : item))
);
}, []);
const handleSubtypesChange = useCallback((index: number, subtypes: string[]) => {
setEditableItems((prev) =>
prev.map((item, i) => (i === index ? { ...item, subtypes } : item))
);
}, []);
const handleCreate = async () => {
setCreateError(null);
const selectedItems = editableItems.filter((i) => i.selected);
if (selectedItems.length === 0) return;
try {
await createMutation.mutateAsync({ vehicleId, items: selectedItems });
onCreated(selectedItems.length);
} catch (err: any) {
setCreateError(err?.message || 'Failed to create maintenance schedules');
}
};
const isEmpty = items.length === 0;
return (
<Dialog
open={open}
onClose={onClose}
maxWidth="md"
fullWidth
fullScreen={isMobile}
PaperProps={{
sx: { maxHeight: isMobile ? '100vh' : '90vh' },
}}
>
<DialogTitle sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
<Typography variant="h6" component="span">
Extracted Maintenance Schedules
</Typography>
<IconButton onClick={onClose} size="small" aria-label="Close">
<CloseIcon />
</IconButton>
</DialogTitle>
<DialogContent dividers>
{isEmpty ? (
<Box sx={{ textAlign: 'center', py: 6 }}>
<Typography variant="h6" color="text.secondary" gutterBottom>
No maintenance items found
</Typography>
<Typography variant="body2" color="text.secondary">
The manual did not contain any recognizable routine maintenance schedules.
</Typography>
</Box>
) : (
<>
<Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', mb: 2 }}>
<Typography variant="body2" color="text.secondary">
{selectedCount} of {editableItems.length} items selected
</Typography>
<Box sx={{ display: 'flex', gap: 1 }}>
<Button
size="small"
startIcon={<SelectAllIcon />}
onClick={handleSelectAll}
disabled={selectedCount === editableItems.length}
>
Select All
</Button>
<Button
size="small"
startIcon={<DeselectIcon />}
onClick={handleDeselectAll}
disabled={selectedCount === 0}
>
Deselect All
</Button>
</Box>
</Box>
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 1 }}>
{editableItems.map((item, index) => (
<Box
key={index}
sx={{
display: 'flex',
alignItems: 'flex-start',
p: 1.5,
borderRadius: 1,
border: '1px solid',
borderColor: item.selected ? 'primary.light' : 'divider',
backgroundColor: item.selected ? 'primary.50' : 'transparent',
opacity: item.selected ? 1 : 0.6,
transition: 'all 0.15s ease',
'&:hover': { borderColor: 'primary.main' },
}}
>
<Checkbox
checked={item.selected}
onChange={() => handleToggle(index)}
sx={{ mt: -0.5, mr: 1, '& .MuiSvgIcon-root': { fontSize: 24 }, minWidth: 44, minHeight: 44 }}
inputProps={{ 'aria-label': `Select ${item.service}` }}
/>
<Box sx={{ flex: 1, minWidth: 0 }}>
<Box sx={{ display: 'flex', alignItems: 'center', mb: 0.5 }}>
<InlineField
label="Service"
value={item.service}
onSave={(v) => handleFieldUpdate(index, 'service', v)}
/>
<ConfidenceIndicator confidence={item.confidence} />
</Box>
<Box sx={{
display: 'flex',
flexDirection: isMobile ? 'column' : 'row',
gap: isMobile ? 0.5 : 2,
alignItems: 'center',
}}>
<InlineField
label="Miles"
value={item.intervalMiles}
type="number"
onSave={(v) => handleFieldUpdate(index, 'intervalMiles', v)}
suffix="mi"
/>
<InlineField
label="Months"
value={item.intervalMonths}
type="number"
onSave={(v) => handleFieldUpdate(index, 'intervalMonths', v)}
suffix="mo"
/>
{item.selected && item.intervalMiles === null && item.intervalMonths === null && (
<Tooltip title="At least one interval (miles or months) is required">
<WarningAmberIcon color="warning" sx={{ fontSize: 18 }} />
</Tooltip>
)}
</Box>
{item.details && (
<Typography variant="caption" color="text.secondary" sx={{ display: 'block', mt: 0.5 }}>
{item.details}
</Typography>
)}
<Box sx={{ mt: 1 }}>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 0.5 }}>
<Typography variant="caption" color="text.secondary">
Subtypes:
</Typography>
{item.subtypes.length === 0 && item.selected && (
<Tooltip title="At least one subtype is required">
<WarningAmberIcon color="warning" sx={{ fontSize: 18 }} />
</Tooltip>
)}
</Box>
<SubtypeCheckboxGroup
category="routine_maintenance"
selected={item.subtypes}
onChange={(subtypes) => handleSubtypesChange(index, subtypes)}
/>
</Box>
</Box>
</Box>
))}
</Box>
{hasInvalidSubtypes && (
<Alert severity="warning" sx={{ mt: 2 }}>
Some selected items are missing subtypes. Please select at least one subtype for each selected item.
</Alert>
)}
{hasInvalidIntervals && (
<Alert severity="warning" sx={{ mt: 2 }}>
Some selected items are missing intervals. Please set at least one interval (miles or months) for each selected item.
</Alert>
)}
<Typography variant="body2" color="text.secondary" sx={{ mt: 2, textAlign: 'center' }}>
Tap any field to edit before creating schedules.
</Typography>
</>
)}
{createError && (
<Alert severity="error" sx={{ mt: 2 }}>
{createError}
</Alert>
)}
</DialogContent>
<DialogActions
sx={{
flexDirection: isMobile ? 'column' : 'row',
gap: 1,
p: 2,
}}
>
<Button
onClick={onClose}
sx={{ order: isMobile ? 2 : 1, width: isMobile ? '100%' : 'auto' }}
>
{isEmpty ? 'Close' : 'Skip'}
</Button>
{!isEmpty && (
<>
<Box sx={{ flex: 1, display: isMobile ? 'none' : 'block' }} />
<Button
variant="contained"
onClick={handleCreate}
disabled={selectedCount === 0 || hasInvalidSubtypes || hasInvalidIntervals || createMutation.isPending}
startIcon={createMutation.isPending ? <CircularProgress size={16} /> : <CheckIcon />}
sx={{ minHeight: 44, order: isMobile ? 1 : 2, width: isMobile ? '100%' : 'auto' }}
>
{createMutation.isPending
? 'Creating...'
: `Create ${selectedCount} Schedule${selectedCount !== 1 ? 's' : ''}`}
</Button>
</>
)}
</DialogActions>
</Dialog>
);
};
export default MaintenanceScheduleReviewScreen;

View File

@@ -0,0 +1,43 @@
/**
* @ai-summary Hook for batch-creating maintenance schedules from manual extraction results
* @ai-context Maps extracted MaintenanceScheduleItem[] to CreateScheduleRequest[] and creates via API
*/
import { useMutation, useQueryClient } from '@tanstack/react-query';
import { maintenanceApi } from '../api/maintenance.api';
import type { CreateScheduleRequest, MaintenanceScheduleResponse } from '../types/maintenance.types';
import type { MaintenanceScheduleItem } from '../../documents/hooks/useManualExtraction';
interface CreateSchedulesParams {
vehicleId: string;
items: MaintenanceScheduleItem[];
}
export function useCreateSchedulesFromExtraction() {
const queryClient = useQueryClient();
return useMutation<MaintenanceScheduleResponse[], Error, CreateSchedulesParams>({
mutationFn: async ({ vehicleId, items }) => {
const results: MaintenanceScheduleResponse[] = [];
for (const item of items) {
if (item.subtypes.length === 0) continue;
if (item.intervalMiles === null && item.intervalMonths === null) continue;
const request: CreateScheduleRequest = {
vehicleId,
category: 'routine_maintenance',
subtypes: item.subtypes,
scheduleType: 'interval',
intervalMiles: item.intervalMiles ?? undefined,
intervalMonths: item.intervalMonths ?? undefined,
};
const created = await maintenanceApi.createSchedule(request);
results.push(created);
}
return results;
},
onSuccess: (_data, variables) => {
queryClient.invalidateQueries({ queryKey: ['maintenanceSchedules', variables.vehicleId] });
queryClient.invalidateQueries({ queryKey: ['maintenanceUpcoming', variables.vehicleId] });
},
});
}

View File

@@ -2,12 +2,13 @@
* @ai-summary Mobile maintenance screen with tabs for records and schedules * @ai-summary Mobile maintenance screen with tabs for records and schedules
*/ */
import React, { useState } from 'react'; import React, { useState, useEffect } from 'react';
import { useQueryClient } from '@tanstack/react-query'; import { useQueryClient } from '@tanstack/react-query';
import { Box, Tabs, Tab } from '@mui/material'; import { Box, Tabs, Tab, FormControl, InputLabel, Select, MenuItem } from '@mui/material';
import { GlassCard } from '../../../shared-minimal/components/mobile/GlassCard'; import { GlassCard } from '../../../shared-minimal/components/mobile/GlassCard';
import { Button } from '../../../shared-minimal/components/Button'; import { Button } from '../../../shared-minimal/components/Button';
import { useMaintenanceRecords } from '../hooks/useMaintenanceRecords'; import { useMaintenanceRecords } from '../hooks/useMaintenanceRecords';
import { useVehicles } from '../../vehicles/hooks/useVehicles';
import { MaintenanceRecordForm } from '../components/MaintenanceRecordForm'; import { MaintenanceRecordForm } from '../components/MaintenanceRecordForm';
import { MaintenanceRecordsList } from '../components/MaintenanceRecordsList'; import { MaintenanceRecordsList } from '../components/MaintenanceRecordsList';
import { MaintenanceRecordEditDialog } from '../components/MaintenanceRecordEditDialog'; import { MaintenanceRecordEditDialog } from '../components/MaintenanceRecordEditDialog';
@@ -18,7 +19,17 @@ import type { MaintenanceRecordResponse, UpdateMaintenanceRecordRequest, Mainten
export const MaintenanceMobileScreen: React.FC = () => { export const MaintenanceMobileScreen: React.FC = () => {
const queryClient = useQueryClient(); const queryClient = useQueryClient();
const { records, schedules, isRecordsLoading, isSchedulesLoading, recordsError, schedulesError, updateRecord, deleteRecord, updateSchedule, deleteSchedule } = useMaintenanceRecords(); const { data: vehicles, isLoading: isLoadingVehicles } = useVehicles();
const [selectedVehicleId, setSelectedVehicleId] = useState<string>('');
// Auto-select first vehicle when vehicles load
useEffect(() => {
if (vehicles && vehicles.length > 0 && !selectedVehicleId) {
setSelectedVehicleId(vehicles[0].id);
}
}, [vehicles, selectedVehicleId]);
const { records, schedules, isRecordsLoading, isSchedulesLoading, recordsError, schedulesError, updateRecord, deleteRecord, updateSchedule, deleteSchedule } = useMaintenanceRecords(selectedVehicleId || undefined);
const [activeTab, setActiveTab] = useState<'records' | 'schedules'>('records'); const [activeTab, setActiveTab] = useState<'records' | 'schedules'>('records');
const [showForm, setShowForm] = useState(false); const [showForm, setShowForm] = useState(false);
@@ -52,7 +63,7 @@ export const MaintenanceMobileScreen: React.FC = () => {
const handleDelete = async (recordId: string) => { const handleDelete = async (recordId: string) => {
try { try {
await deleteRecord(recordId); await deleteRecord(recordId);
queryClient.refetchQueries({ queryKey: ['maintenanceRecords', 'all'] }); queryClient.refetchQueries({ queryKey: ['maintenanceRecords'] });
} catch (error) { } catch (error) {
console.error('Failed to delete maintenance record:', error); console.error('Failed to delete maintenance record:', error);
} }
@@ -99,6 +110,31 @@ export const MaintenanceMobileScreen: React.FC = () => {
<div className="p-4"> <div className="p-4">
<h2 className="text-lg font-semibold text-slate-800 dark:text-avus mb-3">Maintenance</h2> <h2 className="text-lg font-semibold text-slate-800 dark:text-avus mb-3">Maintenance</h2>
{/* Vehicle Selector */}
<Box sx={{ mb: 2 }}>
<FormControl fullWidth>
<InputLabel id="maintenance-mobile-vehicle-select-label">Vehicle</InputLabel>
<Select
labelId="maintenance-mobile-vehicle-select-label"
value={selectedVehicleId}
onChange={(e) => setSelectedVehicleId(e.target.value as string)}
label="Vehicle"
disabled={isLoadingVehicles}
sx={{ minHeight: 44 }}
>
{vehicles && vehicles.length > 0 ? (
vehicles.map((vehicle) => (
<MenuItem key={vehicle.id} value={vehicle.id} sx={{ minHeight: 44 }}>
{vehicle.year} {vehicle.make} {vehicle.model}
</MenuItem>
))
) : (
<MenuItem disabled>No vehicles available</MenuItem>
)}
</Select>
</FormControl>
</Box>
{/* Tabs */} {/* Tabs */}
<Box sx={{ borderBottom: 1, borderColor: 'divider', mb: 2 }}> <Box sx={{ borderBottom: 1, borderColor: 'divider', mb: 2 }}>
<Tabs <Tabs

View File

@@ -3,8 +3,8 @@
* @ai-context Two-column responsive layout following fuel-logs pattern * @ai-context Two-column responsive layout following fuel-logs pattern
*/ */
import React, { useState } from 'react'; import React, { useState, useEffect } from 'react';
import { Grid, Typography, Box, Tabs, Tab } from '@mui/material'; import { Grid, Typography, Box, Tabs, Tab, FormControl, InputLabel, Select, MenuItem } from '@mui/material';
import { useQueryClient } from '@tanstack/react-query'; import { useQueryClient } from '@tanstack/react-query';
import { MaintenanceRecordForm } from '../components/MaintenanceRecordForm'; import { MaintenanceRecordForm } from '../components/MaintenanceRecordForm';
import { MaintenanceRecordsList } from '../components/MaintenanceRecordsList'; import { MaintenanceRecordsList } from '../components/MaintenanceRecordsList';
@@ -13,13 +13,24 @@ import { MaintenanceScheduleForm } from '../components/MaintenanceScheduleForm';
import { MaintenanceSchedulesList } from '../components/MaintenanceSchedulesList'; import { MaintenanceSchedulesList } from '../components/MaintenanceSchedulesList';
import { MaintenanceScheduleEditDialog } from '../components/MaintenanceScheduleEditDialog'; import { MaintenanceScheduleEditDialog } from '../components/MaintenanceScheduleEditDialog';
import { useMaintenanceRecords } from '../hooks/useMaintenanceRecords'; import { useMaintenanceRecords } from '../hooks/useMaintenanceRecords';
import { useVehicles } from '../../vehicles/hooks/useVehicles';
import { FormSuspense } from '../../../components/SuspenseWrappers'; import { FormSuspense } from '../../../components/SuspenseWrappers';
import type { MaintenanceRecordResponse, UpdateMaintenanceRecordRequest, MaintenanceScheduleResponse, UpdateScheduleRequest } from '../types/maintenance.types'; import type { MaintenanceRecordResponse, UpdateMaintenanceRecordRequest, MaintenanceScheduleResponse, UpdateScheduleRequest } from '../types/maintenance.types';
export const MaintenancePage: React.FC = () => { export const MaintenancePage: React.FC = () => {
const { records, schedules, isRecordsLoading, isSchedulesLoading, recordsError, schedulesError, updateRecord, deleteRecord, updateSchedule, deleteSchedule } = useMaintenanceRecords(); const { data: vehicles, isLoading: isLoadingVehicles } = useVehicles();
const [selectedVehicleId, setSelectedVehicleId] = useState<string>('');
const queryClient = useQueryClient(); const queryClient = useQueryClient();
const [activeTab, setActiveTab] = useState<'records' | 'schedules'>('records'); const [activeTab, setActiveTab] = useState<'records' | 'schedules'>('records');
// Auto-select first vehicle when vehicles load
useEffect(() => {
if (vehicles && vehicles.length > 0 && !selectedVehicleId) {
setSelectedVehicleId(vehicles[0].id);
}
}, [vehicles, selectedVehicleId]);
const { records, schedules, isRecordsLoading, isSchedulesLoading, recordsError, schedulesError, updateRecord, deleteRecord, updateSchedule, deleteSchedule } = useMaintenanceRecords(selectedVehicleId || undefined);
const [editingRecord, setEditingRecord] = useState<MaintenanceRecordResponse | null>(null); const [editingRecord, setEditingRecord] = useState<MaintenanceRecordResponse | null>(null);
const [editDialogOpen, setEditDialogOpen] = useState(false); const [editDialogOpen, setEditDialogOpen] = useState(false);
const [editingSchedule, setEditingSchedule] = useState<MaintenanceScheduleResponse | null>(null); const [editingSchedule, setEditingSchedule] = useState<MaintenanceScheduleResponse | null>(null);
@@ -52,7 +63,7 @@ export const MaintenancePage: React.FC = () => {
try { try {
await deleteRecord(recordId); await deleteRecord(recordId);
// Refetch queries after delete // Refetch queries after delete
queryClient.refetchQueries({ queryKey: ['maintenanceRecords', 'all'] }); queryClient.refetchQueries({ queryKey: ['maintenanceRecords'] });
} catch (error) { } catch (error) {
console.error('Failed to delete maintenance record:', error); console.error('Failed to delete maintenance record:', error);
} }
@@ -130,6 +141,31 @@ export const MaintenancePage: React.FC = () => {
return ( return (
<FormSuspense> <FormSuspense>
{/* Vehicle Selector */}
<Box sx={{ mb: 3 }}>
<FormControl fullWidth>
<InputLabel id="maintenance-vehicle-select-label">Vehicle</InputLabel>
<Select
labelId="maintenance-vehicle-select-label"
value={selectedVehicleId}
onChange={(e) => setSelectedVehicleId(e.target.value as string)}
label="Vehicle"
disabled={isLoadingVehicles}
sx={{ minHeight: 56 }}
>
{vehicles && vehicles.length > 0 ? (
vehicles.map((vehicle) => (
<MenuItem key={vehicle.id} value={vehicle.id}>
{vehicle.year} {vehicle.make} {vehicle.model}
</MenuItem>
))
) : (
<MenuItem disabled>No vehicles available</MenuItem>
)}
</Select>
</FormControl>
</Box>
<Box sx={{ borderBottom: 1, borderColor: 'divider', mb: 3 }}> <Box sx={{ borderBottom: 1, borderColor: 'divider', mb: 3 }}>
<Tabs <Tabs
value={activeTab} value={activeTab}

View File

@@ -1,9 +1,10 @@
/** /**
* @ai-summary Email notification toggle component * @ai-summary Email notification toggle component
* @ai-context Mobile-first responsive toggle switch for email notifications * @ai-context Uses MUI Switch to match SettingsPage pill-style toggles
*/ */
import React from 'react'; import React from 'react';
import { Box, Switch, Typography } from '@mui/material';
interface EmailNotificationToggleProps { interface EmailNotificationToggleProps {
enabled: boolean; enabled: boolean;
@@ -19,32 +20,16 @@ export const EmailNotificationToggle: React.FC<EmailNotificationToggleProps> = (
className = '', className = '',
}) => { }) => {
return ( return (
<div className={`flex items-center justify-between gap-3 ${className}`}> <Box className={className} sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', gap: 2 }}>
<label className="text-sm font-medium text-slate-700 dark:text-slate-300"> <Typography variant="body2" fontWeight={500} color="text.secondary">
{label} {label}
</label> </Typography>
<button <Switch
type="button" checked={enabled}
role="switch" onChange={(e) => onChange(e.target.checked)}
aria-checked={enabled} color="primary"
aria-label={label} inputProps={{ 'aria-label': label }}
onClick={() => onChange(!enabled)}
className={`
relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full
border-2 border-transparent transition-colors duration-200 ease-in-out
focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2
${enabled ? 'bg-primary-600' : 'bg-slate-300 dark:bg-slate-600'}
`}
style={{ minWidth: '44px', minHeight: '44px', padding: '9px 0' }}
>
<span
className={`
pointer-events-none inline-block h-5 w-5 transform rounded-full
bg-white shadow ring-0 transition duration-200 ease-in-out
${enabled ? 'translate-x-5' : 'translate-x-0'}
`}
/> />
</button> </Box>
</div>
); );
}; };

View File

@@ -79,6 +79,8 @@ export const UpgradeRequiredDialog: React.FC<UpgradeRequiredDialogProps> = ({
position: 'absolute', position: 'absolute',
right: 8, right: 8,
top: 8, top: 8,
minWidth: 44,
minHeight: 44,
color: (theme) => theme.palette.grey[500], color: (theme) => theme.palette.grey[500],
}} }}
> >
@@ -157,7 +159,7 @@ export const UpgradeRequiredDialog: React.FC<UpgradeRequiredDialogProps> = ({
onClick={onClose} onClick={onClose}
variant="outlined" variant="outlined"
fullWidth={isSmall} fullWidth={isSmall}
sx={{ order: isSmall ? 2 : 1 }} sx={{ order: isSmall ? 2 : 1, minHeight: 44 }}
> >
Maybe Later Maybe Later
</Button> </Button>
@@ -166,7 +168,7 @@ export const UpgradeRequiredDialog: React.FC<UpgradeRequiredDialogProps> = ({
variant="contained" variant="contained"
color="primary" color="primary"
fullWidth={isSmall} fullWidth={isSmall}
sx={{ order: isSmall ? 1 : 2 }} sx={{ order: isSmall ? 1 : 2, minHeight: 44 }}
> >
Upgrade (Coming Soon) Upgrade (Coming Soon)
</Button> </Button>

View File

@@ -1,6 +1,6 @@
# ocr/ # ocr/
Python OCR microservice. Primary engine: PaddleOCR PP-OCRv4 with optional Google Vision cloud fallback. Pluggable engine abstraction in `app/engines/`. Python OCR microservice. Primary engine: PaddleOCR PP-OCRv4 with optional Google Vision cloud fallback. Gemini 2.5 Flash for maintenance manual PDF extraction. Pluggable engine abstraction in `app/engines/`.
## Files ## Files
@@ -14,5 +14,5 @@ Python OCR microservice. Primary engine: PaddleOCR PP-OCRv4 with optional Google
| Directory | What | When to read | | Directory | What | When to read |
| --------- | ---- | ------------ | | --------- | ---- | ------------ |
| `app/` | FastAPI application source | OCR endpoint development | | `app/` | FastAPI application source | OCR endpoint development |
| `app/engines/` | Engine abstraction layer (OcrEngine ABC, factory, hybrid) | Adding or changing OCR engines | | `app/engines/` | Engine abstraction layer (OcrEngine ABC, factory, hybrid) and Gemini module | Adding or changing OCR engines, Gemini integration |
| `tests/` | Test suite | Adding or modifying tests | | `tests/` | Test suite | Adding or modifying tests |

View File

@@ -1,23 +1,25 @@
# ocr/app/ # ocr/app/
Python OCR microservice (FastAPI). Primary engine: PaddleOCR PP-OCRv4 with optional Google Vision cloud fallback. Gemini 2.5 Flash for maintenance manual PDF extraction (standalone module, not an OcrEngine subclass).
## Files ## Files
| File | What | When to read | | File | What | When to read |
| ---- | ---- | ------------ | | ---- | ---- | ------------ |
| `main.py` | FastAPI application entry point | Route registration, app setup | | `main.py` | FastAPI application entry point | Route registration, app setup |
| `config.py` | Configuration settings | Environment variables, settings | | `config.py` | Configuration settings (OCR engines, Vertex AI, Redis, Vision API limits) | Environment variables, settings |
| `__init__.py` | Package init | Package structure | | `__init__.py` | Package init | Package structure |
## Subdirectories ## Subdirectories
| Directory | What | When to read | | Directory | What | When to read |
| --------- | ---- | ------------ | | --------- | ---- | ------------ |
| `engines/` | OCR engine abstraction (PaddleOCR primary, Google Vision fallback) | Engine changes, adding new engines | | `engines/` | OCR engine abstraction (PaddleOCR, Google Vision, Hybrid) and Gemini module | Engine changes, adding new engines |
| `extractors/` | Data extraction logic | Adding new extraction types | | `extractors/` | Domain-specific data extraction (receipts, fuel receipts, maintenance manuals) | Adding new extraction types, modifying extraction logic |
| `models/` | Data models and schemas | Request/response types | | `models/` | Data models and schemas | Request/response types |
| `patterns/` | Regex and parsing patterns | Pattern matching rules | | `patterns/` | Regex patterns and service name mapping (27 maintenance subtypes) | Pattern matching rules, service categorization |
| `preprocessors/` | Image preprocessing pipeline | Image preparation before OCR | | `preprocessors/` | Image preprocessing pipeline | Image preparation before OCR |
| `routers/` | FastAPI route handlers | API endpoint changes | | `routers/` | FastAPI route handlers (/extract, /extract/receipt, /extract/manual, /jobs) | API endpoint changes |
| `services/` | Business logic services | Core OCR processing | | `services/` | Business logic services (job queue with Redis) | Core OCR processing, async job management |
| `table_extraction/` | Table detection and parsing | Structured data extraction | | `table_extraction/` | Table detection and parsing | Structured data extraction from images |
| `validators/` | Input validation | Validation rules | | `validators/` | Input validation | Validation rules |

View File

@@ -29,6 +29,13 @@ class Settings:
os.getenv("VISION_MONTHLY_LIMIT", "1000") os.getenv("VISION_MONTHLY_LIMIT", "1000")
) )
# Vertex AI / Gemini configuration
self.vertex_ai_project: str = os.getenv("VERTEX_AI_PROJECT", "")
self.vertex_ai_location: str = os.getenv(
"VERTEX_AI_LOCATION", "us-central1"
)
self.gemini_model: str = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
# Redis configuration for job queue # Redis configuration for job queue
self.redis_host: str = os.getenv("REDIS_HOST", "mvp-redis") self.redis_host: str = os.getenv("REDIS_HOST", "mvp-redis")
self.redis_port: int = int(os.getenv("REDIS_PORT", "6379")) self.redis_port: int = int(os.getenv("REDIS_PORT", "6379"))

33
ocr/app/engines/CLAUDE.md Normal file
View File

@@ -0,0 +1,33 @@
# ocr/app/engines/
OCR engine abstraction layer. Two categories of engines:
1. **OcrEngine subclasses** (image-to-text): PaddleOCR, Google Vision, Hybrid. Accept image bytes, return text + confidence + word boxes.
2. **GeminiEngine** (PDF-to-structured-data): Standalone module for maintenance schedule extraction via Vertex AI. Accepts PDF bytes, returns structured JSON. Not an OcrEngine subclass because the interface signatures differ.
## Files
| File | What | When to read |
| ---- | ---- | ------------ |
| `__init__.py` | Public engine API exports (OcrEngine, create_engine, exceptions) | Importing engine interfaces |
| `base_engine.py` | OcrEngine ABC, OcrConfig, OcrEngineResult, WordBox, exception hierarchy | Engine interface contract, adding new engines |
| `paddle_engine.py` | PaddleOCR PP-OCRv4 primary engine | Local OCR debugging, accuracy tuning |
| `cloud_engine.py` | Google Vision TEXT_DETECTION fallback engine (WIF authentication) | Cloud OCR configuration, API quota |
| `hybrid_engine.py` | Combines primary + fallback engine with confidence threshold switching | Engine selection logic, fallback behavior |
| `engine_factory.py` | Factory function and engine registry for instantiation | Adding new engine types |
| `gemini_engine.py` | Gemini 2.5 Flash integration for maintenance schedule extraction (Vertex AI SDK, 20MB PDF limit, structured JSON output) | Manual extraction debugging, Gemini configuration |
## Engine Selection
```
create_engine(config)
|
+-- Primary: PaddleOCR (local, fast, no API limits)
|
+-- Fallback: Google Vision (cloud, 1000/month limit)
|
v
HybridEngine (tries primary, falls back if confidence < threshold)
```
GeminiEngine is created independently by ManualExtractor, not through the engine factory.

View File

@@ -0,0 +1,230 @@
"""Gemini 2.5 Flash engine for maintenance schedule extraction from PDFs.
Standalone module (does NOT extend OcrEngine) because Gemini performs
semantic document understanding, not traditional OCR word-box extraction.
Uses Vertex AI SDK with structured JSON output enforcement.
"""
import json
import logging
import os
from dataclasses import dataclass
from typing import Any
from app.config import settings
logger = logging.getLogger(__name__)
# 20 MB hard limit for inline base64 PDF delivery
_MAX_PDF_BYTES = 20 * 1024 * 1024
_EXTRACTION_PROMPT = """\
Extract all routine scheduled maintenance items from this vehicle owners manual.
For each maintenance item, extract:
- serviceName: The maintenance task name (e.g., "Engine Oil Change", "Tire Rotation", \
"Cabin Air Filter Replacement")
- intervalMiles: The mileage interval as a number, or null if not specified \
(e.g., 5000, 30000)
- intervalMonths: The time interval in months as a number, or null if not specified \
(e.g., 6, 12, 24)
- details: Any additional details such as fluid specifications, part numbers, \
or special instructions (e.g., "Use 0W-20 full synthetic oil")
Only include routine scheduled maintenance items with clear intervals. \
Do not include one-time procedures, troubleshooting steps, or warranty information.
Return the results as a JSON object with a single "maintenanceSchedule" array.\
"""
_RESPONSE_SCHEMA: dict[str, Any] = {
"type": "object",
"properties": {
"maintenanceSchedule": {
"type": "array",
"items": {
"type": "object",
"properties": {
"serviceName": {"type": "string"},
"intervalMiles": {"type": "number", "nullable": True},
"intervalMonths": {"type": "number", "nullable": True},
"details": {"type": "string", "nullable": True},
},
"required": ["serviceName"],
},
},
},
"required": ["maintenanceSchedule"],
}
class GeminiEngineError(Exception):
"""Base exception for Gemini engine errors."""
class GeminiUnavailableError(GeminiEngineError):
"""Raised when the Gemini engine cannot be initialized."""
class GeminiProcessingError(GeminiEngineError):
"""Raised when Gemini fails to process a document."""
@dataclass
class MaintenanceItem:
"""A single extracted maintenance schedule item."""
service_name: str
interval_miles: int | None = None
interval_months: int | None = None
details: str | None = None
@dataclass
class MaintenanceExtractionResult:
"""Result from Gemini maintenance schedule extraction."""
items: list[MaintenanceItem]
model: str
class GeminiEngine:
"""Gemini 2.5 Flash wrapper for maintenance schedule extraction.
Standalone class (not an OcrEngine subclass) because Gemini performs
semantic document understanding rather than traditional OCR.
Uses lazy initialization: the Vertex AI client is not created until
the first ``extract_maintenance()`` call.
"""
def __init__(self) -> None:
self._model: Any | None = None
def _get_model(self) -> Any:
"""Create the GenerativeModel on first use.
Authentication uses the same WIF credential path as Google Vision.
"""
if self._model is not None:
return self._model
key_path = settings.google_vision_key_path
if not os.path.isfile(key_path):
raise GeminiUnavailableError(
f"Google credential config not found at {key_path}. "
"Set GOOGLE_VISION_KEY_PATH or mount the secret."
)
try:
from google.cloud import aiplatform # type: ignore[import-untyped]
from vertexai.generative_models import ( # type: ignore[import-untyped]
GenerationConfig,
GenerativeModel,
)
# Point ADC at the WIF credential config
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
aiplatform.init(
project=settings.vertex_ai_project,
location=settings.vertex_ai_location,
)
model_name = settings.gemini_model
self._model = GenerativeModel(model_name)
self._generation_config = GenerationConfig(
response_mime_type="application/json",
response_schema=_RESPONSE_SCHEMA,
)
logger.info(
"Gemini engine initialized (model=%s, project=%s, location=%s)",
model_name,
settings.vertex_ai_project,
settings.vertex_ai_location,
)
return self._model
except ImportError as exc:
logger.exception("Vertex AI SDK import failed")
raise GeminiUnavailableError(
"google-cloud-aiplatform is not installed. "
"Install with: pip install google-cloud-aiplatform"
) from exc
except Exception as exc:
logger.exception("Vertex AI authentication failed")
raise GeminiUnavailableError(
f"Vertex AI authentication failed: {exc}"
) from exc
def extract_maintenance(
self, pdf_bytes: bytes
) -> MaintenanceExtractionResult:
"""Extract maintenance schedules from a PDF owners manual.
Args:
pdf_bytes: Raw PDF file bytes (<= 20 MB).
Returns:
Structured maintenance extraction result.
Raises:
GeminiProcessingError: If the PDF is too large or extraction fails.
GeminiUnavailableError: If the engine cannot be initialized.
"""
if len(pdf_bytes) > _MAX_PDF_BYTES:
size_mb = len(pdf_bytes) / (1024 * 1024)
raise GeminiProcessingError(
f"PDF size ({size_mb:.1f} MB) exceeds the 20 MB limit for "
"inline processing. Upload to GCS and use a gs:// URI instead."
)
model = self._get_model()
try:
from vertexai.generative_models import Part # type: ignore[import-untyped]
pdf_part = Part.from_data(
data=pdf_bytes,
mime_type="application/pdf",
)
response = model.generate_content(
[pdf_part, _EXTRACTION_PROMPT],
generation_config=self._generation_config,
)
raw = json.loads(response.text)
items = [
MaintenanceItem(
service_name=item["serviceName"],
interval_miles=item.get("intervalMiles"),
interval_months=item.get("intervalMonths"),
details=item.get("details"),
)
for item in raw.get("maintenanceSchedule", [])
]
logger.info(
"Gemini extracted %d maintenance items from PDF (%d bytes)",
len(items),
len(pdf_bytes),
)
return MaintenanceExtractionResult(
items=items,
model=settings.gemini_model,
)
except (GeminiEngineError,):
raise
except json.JSONDecodeError as exc:
raise GeminiProcessingError(
f"Gemini returned invalid JSON: {exc}"
) from exc
except Exception as exc:
raise GeminiProcessingError(
f"Gemini maintenance extraction failed: {exc}"
) from exc

View File

@@ -1,17 +1,11 @@
"""Owner's manual extractor for maintenance schedule extraction.""" """Owner's manual extractor for maintenance schedule extraction via Gemini."""
import io
import logging import logging
import time import time
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Callable, Optional from typing import Callable, Optional
from PIL import Image from app.engines.gemini_engine import GeminiEngine, GeminiEngineError
from app.patterns.service_mapping import service_mapper
from app.engines import create_engine, OcrConfig
from app.preprocessors.pdf_preprocessor import pdf_preprocessor, PdfInfo
from app.table_extraction.detector import table_detector, DetectedTable
from app.table_extraction.parser import table_parser, ParsedScheduleRow
from app.patterns.maintenance_patterns import maintenance_matcher
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -52,30 +46,26 @@ class ManualExtractionResult:
class ManualExtractor: class ManualExtractor:
"""Extract maintenance schedules from owner's manuals. """Extract maintenance schedules from owner's manuals using Gemini.
Processing pipeline: Processing pipeline:
1. Analyze PDF structure 1. Send entire PDF to Gemini for semantic extraction
2. Find maintenance section pages 2. Map extracted service names to system maintenance subtypes via fuzzy matching
3. Extract text (native) or OCR (scanned) 3. Return structured results
4. Detect tables
5. Parse schedules
6. Normalize and deduplicate
""" """
# Maximum pages to process for performance # Default confidence for Gemini-extracted items without a subtype match
MAX_PAGES_TO_PROCESS = 50 DEFAULT_CONFIDENCE = 0.85
# Minimum confidence to include schedule def __init__(self) -> None:
MIN_CONFIDENCE = 0.5 self._engine = GeminiEngine()
def extract( def extract(
self, self,
pdf_bytes: bytes, pdf_bytes: bytes,
progress_callback: Optional[Callable[[int, str], None]] = None, progress_callback: Optional[Callable[[int, str], None]] = None,
) -> ManualExtractionResult: ) -> ManualExtractionResult:
""" """Extract maintenance schedules from an owner's manual PDF.
Extract maintenance schedules from an owner's manual PDF.
Args: Args:
pdf_bytes: Raw PDF bytes pdf_bytes: Raw PDF bytes
@@ -92,97 +82,72 @@ class ManualExtractor:
logger.info(f"Progress {percent}%: {message}") logger.info(f"Progress {percent}%: {message}")
try: try:
update_progress(5, "Analyzing PDF structure") update_progress(10, "Preparing extraction")
# Get PDF info update_progress(50, "Processing with Gemini")
pdf_info = pdf_preprocessor.get_pdf_info(pdf_bytes) gemini_result = self._engine.extract_maintenance(pdf_bytes)
logger.info(
f"PDF: {pdf_info.total_pages} pages, "
f"has_text={pdf_info.has_text_layer}, "
f"is_scanned={pdf_info.is_scanned}"
)
update_progress(10, "Finding maintenance sections") update_progress(95, "Mapping results")
# Find pages likely to contain maintenance schedules schedules: list[ExtractedSchedule] = []
maintenance_pages = pdf_preprocessor.find_maintenance_section(pdf_bytes) for item in gemini_result.items:
mapping = service_mapper.map_service_fuzzy(item.service_name)
if not maintenance_pages: if mapping:
# If no specific pages found, process first N pages subtypes = mapping.subtypes
maintenance_pages = list(range(min(self.MAX_PAGES_TO_PROCESS, pdf_info.total_pages))) confidence = mapping.confidence
logger.info("No specific maintenance section found, processing all pages") service_name = mapping.normalized_name
else: else:
# Include pages before and after detected maintenance pages subtypes = []
expanded_pages: set[int] = set() confidence = self.DEFAULT_CONFIDENCE
for page in maintenance_pages: service_name = item.service_name
for offset in range(-2, 5): # Include 2 before, 4 after
new_page = page + offset
if 0 <= new_page < pdf_info.total_pages:
expanded_pages.add(new_page)
maintenance_pages = sorted(expanded_pages)[:self.MAX_PAGES_TO_PROCESS]
logger.info(f"Processing {len(maintenance_pages)} pages around maintenance section")
update_progress(15, "Extracting page content") schedules.append(
ExtractedSchedule(
# Extract content from pages service=service_name,
all_schedules: list[ParsedScheduleRow] = [] interval_miles=item.interval_miles,
all_tables: list[dict] = [] interval_months=item.interval_months,
pages_processed = 0 details=item.details,
confidence=confidence,
for i, page_num in enumerate(maintenance_pages): subtypes=subtypes,
page_progress = 15 + int((i / len(maintenance_pages)) * 60)
update_progress(page_progress, f"Processing page {page_num + 1}")
# Extract page content
page_content = pdf_preprocessor.extract_text_from_page(pdf_bytes, page_num)
pages_processed += 1
# Process based on content type
if page_content.has_text:
# Native PDF - use text directly
schedules, tables = self._process_text_page(
page_content.text_content, page_num
) )
elif page_content.image_bytes:
# Scanned PDF - OCR required
schedules, tables = self._process_scanned_page(
page_content.image_bytes, page_num
) )
else:
continue
all_schedules.extend(schedules)
all_tables.extend(tables)
update_progress(75, "Normalizing results")
# Deduplicate and normalize schedules
normalized_schedules = self._normalize_schedules(all_schedules)
update_progress(85, "Extracting vehicle information")
# Try to extract vehicle info from first few pages
vehicle_info = self._extract_vehicle_info(pdf_bytes, pdf_info)
update_progress(95, "Finalizing results")
processing_time_ms = int((time.time() - start_time) * 1000) processing_time_ms = int((time.time() - start_time) * 1000)
logger.info( logger.info(
f"Extraction complete: {len(normalized_schedules)} schedules from " f"Extraction complete: {len(schedules)} schedules in {processing_time_ms}ms"
f"{pages_processed} pages in {processing_time_ms}ms"
) )
update_progress(100, "Complete") # Note: do NOT send 100% progress here. The caller sets status=COMPLETED
# after this returns. Because this runs in a thread executor and the
# progress callback uses run_coroutine_threadsafe (fire-and-forget),
# a 100% update here races with complete_manual_job() and can overwrite
# COMPLETED back to PROCESSING.
return ManualExtractionResult( return ManualExtractionResult(
success=True, success=True,
vehicle_info=vehicle_info, vehicle_info=None,
maintenance_schedules=normalized_schedules, maintenance_schedules=schedules,
raw_tables=[{"page": t.get("page", 0), "rows": t.get("rows", 0)} for t in all_tables], raw_tables=[],
processing_time_ms=processing_time_ms, processing_time_ms=processing_time_ms,
total_pages=pdf_info.total_pages, total_pages=0,
pages_processed=pages_processed, pages_processed=0,
)
except GeminiEngineError as e:
logger.error(f"Gemini extraction failed: {e}", exc_info=True)
processing_time_ms = int((time.time() - start_time) * 1000)
return ManualExtractionResult(
success=False,
vehicle_info=None,
maintenance_schedules=[],
raw_tables=[],
processing_time_ms=processing_time_ms,
total_pages=0,
pages_processed=0,
error=str(e),
) )
except Exception as e: except Exception as e:
@@ -200,220 +165,6 @@ class ManualExtractor:
error=str(e), error=str(e),
) )
def _process_text_page(
self, text: str, page_number: int
) -> tuple[list[ParsedScheduleRow], list[dict]]:
"""Process a native PDF page with text."""
schedules: list[ParsedScheduleRow] = []
tables: list[dict] = []
# Detect tables in text
detected_tables = table_detector.detect_tables_in_text(text, page_number)
for table in detected_tables:
if table.is_maintenance_table and table.header_row:
# Parse table
parsed = table_parser.parse_table(
table.header_row,
table.raw_content,
)
schedules.extend(parsed)
tables.append({
"page": page_number,
"rows": len(table.raw_content),
"is_maintenance": True,
})
# Also try to extract from unstructured text
text_schedules = table_parser.parse_text_block(text)
schedules.extend(text_schedules)
return schedules, tables
def _process_scanned_page(
self, image_bytes: bytes, page_number: int
) -> tuple[list[ParsedScheduleRow], list[dict]]:
"""Process a scanned PDF page with OCR."""
schedules: list[ParsedScheduleRow] = []
tables: list[dict] = []
# Detect tables in image
detected_tables = table_detector.detect_tables_in_image(image_bytes, page_number)
# OCR the full page
try:
engine = create_engine()
ocr_result = engine.recognize(image_bytes, OcrConfig())
ocr_text = ocr_result.text
# Mark tables as maintenance if page contains maintenance keywords
for table in detected_tables:
table.is_maintenance_table = table_detector.is_maintenance_table(
table, ocr_text
)
# Try to extract from OCR text
text_tables = table_detector.detect_tables_in_text(ocr_text, page_number)
for table in text_tables:
if table.is_maintenance_table and table.header_row:
parsed = table_parser.parse_table(
table.header_row,
table.raw_content,
)
schedules.extend(parsed)
tables.append({
"page": page_number,
"rows": len(table.raw_content),
"is_maintenance": True,
})
# Also try unstructured text
text_schedules = table_parser.parse_text_block(ocr_text)
schedules.extend(text_schedules)
except Exception as e:
logger.warning(f"OCR failed for page {page_number}: {e}")
return schedules, tables
def _normalize_schedules(
self, schedules: list[ParsedScheduleRow]
) -> list[ExtractedSchedule]:
"""Normalize and deduplicate extracted schedules."""
# Group by normalized service name
by_service: dict[str, list[ParsedScheduleRow]] = {}
for schedule in schedules:
if schedule.confidence < self.MIN_CONFIDENCE:
continue
key = schedule.normalized_service or schedule.service.lower()
if key not in by_service:
by_service[key] = []
by_service[key].append(schedule)
# Merge duplicates, keeping highest confidence
results: list[ExtractedSchedule] = []
for service_key, items in by_service.items():
# Sort by confidence
items.sort(key=lambda x: x.confidence, reverse=True)
best = items[0]
# Merge interval info from other items if missing
miles = best.interval_miles
months = best.interval_months
details = best.details
fluid_spec = best.fluid_spec
for item in items[1:]:
if not miles and item.interval_miles:
miles = item.interval_miles
if not months and item.interval_months:
months = item.interval_months
if not details and item.details:
details = item.details
if not fluid_spec and item.fluid_spec:
fluid_spec = item.fluid_spec
# Build details string
detail_parts = []
if details:
detail_parts.append(details)
if fluid_spec:
detail_parts.append(f"Use {fluid_spec}")
results.append(
ExtractedSchedule(
service=best.normalized_service or best.service,
interval_miles=miles,
interval_months=months,
details=" - ".join(detail_parts) if detail_parts else None,
confidence=best.confidence,
subtypes=best.subtypes,
)
)
# Sort by confidence
results.sort(key=lambda x: x.confidence, reverse=True)
return results
def _extract_vehicle_info(
self, pdf_bytes: bytes, pdf_info: PdfInfo
) -> Optional[VehicleInfo]:
"""Extract vehicle make/model/year from manual."""
# Check metadata first
if pdf_info.title:
info = self._parse_vehicle_from_title(pdf_info.title)
if info:
return info
# Try first page
try:
first_page = pdf_preprocessor.extract_text_from_page(pdf_bytes, 0)
text = first_page.text_content
if not text and first_page.image_bytes:
# OCR first page
engine = create_engine()
ocr_result = engine.recognize(first_page.image_bytes, OcrConfig())
text = ocr_result.text
if text:
return self._parse_vehicle_from_text(text)
except Exception as e:
logger.warning(f"Failed to extract vehicle info: {e}")
return None
def _parse_vehicle_from_title(self, title: str) -> Optional[VehicleInfo]:
"""Parse vehicle info from document title."""
import re
# Common patterns: "2024 Honda Civic Owner's Manual"
year_match = re.search(r"(20\d{2}|19\d{2})", title)
year = int(year_match.group(1)) if year_match else None
# Common makes
makes = [
"Acura", "Alfa Romeo", "Audi", "BMW", "Buick", "Cadillac",
"Chevrolet", "Chrysler", "Dodge", "Ferrari", "Fiat", "Ford",
"Genesis", "GMC", "Honda", "Hyundai", "Infiniti", "Jaguar",
"Jeep", "Kia", "Lamborghini", "Land Rover", "Lexus", "Lincoln",
"Maserati", "Mazda", "McLaren", "Mercedes", "Mini", "Mitsubishi",
"Nissan", "Porsche", "Ram", "Rolls-Royce", "Subaru", "Tesla",
"Toyota", "Volkswagen", "Volvo",
]
make = None
model = None
for m in makes:
if m.lower() in title.lower():
make = m
# Try to find model after make
idx = title.lower().find(m.lower())
after = title[idx + len(m):].strip()
# First word after make is likely model
model_match = re.match(r"^(\w+)", after)
if model_match:
model = model_match.group(1)
break
if year or make:
return VehicleInfo(make=make, model=model, year=year)
return None
def _parse_vehicle_from_text(self, text: str) -> Optional[VehicleInfo]:
"""Parse vehicle info from page text."""
return self._parse_vehicle_from_title(text[:500]) # Use first 500 chars
# Singleton instance # Singleton instance
manual_extractor = ManualExtractor() manual_extractor = ManualExtractor()

View File

@@ -280,11 +280,9 @@ async def extract_manual(
the time required for large documents. the time required for large documents.
Pipeline: Pipeline:
1. Analyze PDF structure (text layer vs scanned) 1. Send entire PDF to Gemini for semantic extraction
2. Find maintenance schedule sections 2. Map extracted service names to system maintenance subtypes
3. Extract text or perform OCR on scanned pages 3. Return structured results with confidence scores
4. Detect and parse maintenance tables
5. Extract service intervals and fluid specifications
- **file**: Owner's manual PDF (max 200MB) - **file**: Owner's manual PDF (max 200MB)
- **vehicle_id**: Optional vehicle ID for context - **vehicle_id**: Optional vehicle ID for context
@@ -361,8 +359,8 @@ async def process_manual_job(job_id: str) -> None:
# Update status to processing # Update status to processing
await job_queue.update_manual_job_progress(job_id, 5, "Starting extraction") await job_queue.update_manual_job_progress(job_id, 5, "Starting extraction")
# Get job data # Get job data (must use manual-specific key prefix)
file_bytes = await job_queue.get_job_data(job_id) file_bytes = await job_queue.get_manual_job_data(job_id)
if not file_bytes: if not file_bytes:
await job_queue.fail_manual_job(job_id, "Job data not found") await job_queue.fail_manual_job(job_id, "Job data not found")
return return

View File

@@ -207,10 +207,15 @@ class JobQueue:
async def get_job_data(self, job_id: str) -> Optional[bytes]: async def get_job_data(self, job_id: str) -> Optional[bytes]:
"""Get the file data for a job.""" """Get the file data for a job."""
r = await self.get_redis() return await self._get_raw_data(f"{JOB_DATA_PREFIX}{job_id}")
data_key = f"{JOB_DATA_PREFIX}{job_id}"
# Get raw bytes (not decoded) async def get_manual_job_data(self, job_id: str) -> Optional[bytes]:
"""Get the file data for a manual extraction job."""
return await self._get_raw_data(f"{MANUAL_JOB_DATA_PREFIX}{job_id}")
async def _get_raw_data(self, data_key: str) -> Optional[bytes]:
"""Get raw binary data from Redis."""
# Need separate connection with decode_responses=False for binary data
raw_redis = redis.Redis( raw_redis = redis.Redis(
host=settings.redis_host, host=settings.redis_host,
port=settings.redis_port, port=settings.redis_port,

View File

@@ -21,6 +21,9 @@ google-cloud-vision>=3.7.0
# PDF Processing # PDF Processing
PyMuPDF>=1.23.0 PyMuPDF>=1.23.0
# Vertex AI / Gemini (maintenance schedule extraction)
google-cloud-aiplatform>=1.40.0
# Redis for job queue # Redis for job queue
redis>=5.0.0 redis>=5.0.0

View File

@@ -0,0 +1,353 @@
"""Tests for Gemini engine maintenance schedule extraction.
Covers: GeminiEngine initialization, PDF size validation,
successful extraction, empty results, and error handling.
All Vertex AI SDK calls are mocked.
"""
import json
from unittest.mock import MagicMock, patch, PropertyMock
import pytest
from app.engines.gemini_engine import (
GeminiEngine,
GeminiEngineError,
GeminiProcessingError,
GeminiUnavailableError,
MaintenanceExtractionResult,
MaintenanceItem,
_MAX_PDF_BYTES,
)
# --- Helpers ---
def _make_pdf_bytes(size: int = 1024) -> bytes:
"""Create fake PDF bytes of a given size."""
# Minimal PDF header so it looks plausible, padded to size
header = b"%PDF-1.4 fake"
return header + b"\x00" * max(0, size - len(header))
def _make_gemini_response(schedule: list[dict]) -> MagicMock:
"""Create a mock Gemini generate_content response."""
response = MagicMock()
response.text = json.dumps({"maintenanceSchedule": schedule})
return response
# --- Exception hierarchy ---
class TestExceptionHierarchy:
"""Verify the Gemini exception class relationships."""
def test_processing_error_is_engine_error(self):
assert issubclass(GeminiProcessingError, GeminiEngineError)
def test_unavailable_error_is_engine_error(self):
assert issubclass(GeminiUnavailableError, GeminiEngineError)
def test_engine_error_is_exception(self):
assert issubclass(GeminiEngineError, Exception)
# --- Data types ---
class TestMaintenanceItem:
"""Verify MaintenanceItem dataclass construction."""
def test_required_fields_only(self):
item = MaintenanceItem(service_name="Oil Change")
assert item.service_name == "Oil Change"
assert item.interval_miles is None
assert item.interval_months is None
assert item.details is None
def test_all_fields(self):
item = MaintenanceItem(
service_name="Tire Rotation",
interval_miles=5000,
interval_months=6,
details="Rotate front to rear on same side.",
)
assert item.service_name == "Tire Rotation"
assert item.interval_miles == 5000
assert item.interval_months == 6
assert item.details == "Rotate front to rear on same side."
class TestMaintenanceExtractionResult:
"""Verify MaintenanceExtractionResult dataclass."""
def test_construction(self):
result = MaintenanceExtractionResult(
items=[MaintenanceItem(service_name="Oil Change")],
model="gemini-2.5-flash",
)
assert len(result.items) == 1
assert result.model == "gemini-2.5-flash"
def test_empty_items(self):
result = MaintenanceExtractionResult(items=[], model="gemini-2.5-flash")
assert result.items == []
# --- PDF size validation ---
class TestPdfSizeValidation:
"""Verify the 20 MB PDF size limit."""
def test_oversized_pdf_rejected(self):
"""PDFs exceeding 20 MB must be rejected with a clear error."""
engine = GeminiEngine()
oversized = _make_pdf_bytes(_MAX_PDF_BYTES + 1)
with pytest.raises(GeminiProcessingError, match="exceeds the 20 MB limit"):
engine.extract_maintenance(oversized)
def test_exactly_at_limit_accepted(self):
"""PDFs exactly at 20 MB should pass size validation.
The engine will still fail at model init (mocked away in other tests),
but the size check itself should pass.
"""
engine = GeminiEngine()
exact = _make_pdf_bytes(_MAX_PDF_BYTES)
# Should fail at _get_model, not at size check
with pytest.raises(GeminiUnavailableError):
engine.extract_maintenance(exact)
# --- Successful extraction ---
class TestExtractMaintenance:
"""Verify successful maintenance schedule extraction."""
@patch("app.engines.gemini_engine.settings")
@patch("app.engines.gemini_engine.os.path.isfile", return_value=True)
def test_valid_pdf_returns_structured_schedules(
self, mock_isfile, mock_settings
):
"""Normal: Valid PDF returns structured maintenance schedules."""
mock_settings.google_vision_key_path = "/fake/creds.json"
mock_settings.vertex_ai_project = "test-project"
mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash"
schedule = [
{
"serviceName": "Engine Oil Change",
"intervalMiles": 5000,
"intervalMonths": 6,
"details": "Use 0W-20 full synthetic oil.",
},
{
"serviceName": "Tire Rotation",
"intervalMiles": 5000,
"intervalMonths": 6,
"details": None,
},
]
mock_model = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response(schedule)
with (
patch(
"app.engines.gemini_engine.importlib_vertex_ai"
) if False else patch.dict("sys.modules", {
"google.cloud": MagicMock(),
"google.cloud.aiplatform": MagicMock(),
"vertexai": MagicMock(),
"vertexai.generative_models": MagicMock(),
}),
):
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
result = engine.extract_maintenance(_make_pdf_bytes())
assert isinstance(result, MaintenanceExtractionResult)
assert len(result.items) == 2
assert result.model == "gemini-2.5-flash"
oil = result.items[0]
assert oil.service_name == "Engine Oil Change"
assert oil.interval_miles == 5000
assert oil.interval_months == 6
assert oil.details == "Use 0W-20 full synthetic oil."
tire = result.items[1]
assert tire.service_name == "Tire Rotation"
assert tire.details is None
@patch("app.engines.gemini_engine.settings")
@patch("app.engines.gemini_engine.os.path.isfile", return_value=True)
def test_no_maintenance_content_returns_empty_array(
self, mock_isfile, mock_settings
):
"""Edge: PDF with no maintenance content returns empty array."""
mock_settings.google_vision_key_path = "/fake/creds.json"
mock_settings.vertex_ai_project = "test-project"
mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash"
mock_model = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response([])
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
result = engine.extract_maintenance(_make_pdf_bytes())
assert isinstance(result, MaintenanceExtractionResult)
assert result.items == []
@patch("app.engines.gemini_engine.settings")
@patch("app.engines.gemini_engine.os.path.isfile", return_value=True)
def test_nullable_fields_handled(self, mock_isfile, mock_settings):
"""Items with only serviceName (nullable fields omitted) parse correctly."""
mock_settings.google_vision_key_path = "/fake/creds.json"
mock_settings.vertex_ai_project = "test-project"
mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash"
schedule = [{"serviceName": "Brake Fluid Replacement"}]
mock_model = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response(schedule)
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
result = engine.extract_maintenance(_make_pdf_bytes())
assert len(result.items) == 1
item = result.items[0]
assert item.service_name == "Brake Fluid Replacement"
assert item.interval_miles is None
assert item.interval_months is None
assert item.details is None
# --- Error handling ---
class TestErrorHandling:
"""Verify error handling for various failure modes."""
def test_missing_credential_file_raises_unavailable(self):
"""Auth failure: Missing credential file raises GeminiUnavailableError."""
engine = GeminiEngine()
with (
patch("app.engines.gemini_engine.os.path.isfile", return_value=False),
pytest.raises(GeminiUnavailableError, match="credential config not found"),
):
engine.extract_maintenance(_make_pdf_bytes())
@patch("app.engines.gemini_engine.os.path.isfile", return_value=True)
def test_missing_sdk_raises_unavailable(self, mock_isfile):
"""Auth failure: Missing SDK raises GeminiUnavailableError."""
engine = GeminiEngine()
with (
patch("app.engines.gemini_engine.settings") as mock_settings,
patch.dict("sys.modules", {
"google.cloud.aiplatform": None,
}),
):
mock_settings.google_vision_key_path = "/fake/creds.json"
with pytest.raises(GeminiUnavailableError):
engine.extract_maintenance(_make_pdf_bytes())
@patch("app.engines.gemini_engine.settings")
@patch("app.engines.gemini_engine.os.path.isfile", return_value=True)
def test_generate_content_exception_raises_processing_error(
self, mock_isfile, mock_settings
):
"""Runtime error from Gemini API is wrapped as GeminiProcessingError."""
mock_settings.google_vision_key_path = "/fake/creds.json"
mock_settings.vertex_ai_project = "test-project"
mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash"
mock_model = MagicMock()
mock_model.generate_content.side_effect = RuntimeError("API quota exceeded")
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
with pytest.raises(GeminiProcessingError, match="maintenance extraction failed"):
engine.extract_maintenance(_make_pdf_bytes())
@patch("app.engines.gemini_engine.settings")
@patch("app.engines.gemini_engine.os.path.isfile", return_value=True)
def test_invalid_json_response_raises_processing_error(
self, mock_isfile, mock_settings
):
"""Gemini returning invalid JSON is caught and wrapped."""
mock_settings.google_vision_key_path = "/fake/creds.json"
mock_settings.vertex_ai_project = "test-project"
mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash"
mock_response = MagicMock()
mock_response.text = "not valid json {{"
mock_model = MagicMock()
mock_model.generate_content.return_value = mock_response
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
with pytest.raises(GeminiProcessingError, match="invalid JSON"):
engine.extract_maintenance(_make_pdf_bytes())
# --- Lazy initialization ---
class TestLazyInitialization:
"""Verify the model is not created until first use."""
def test_model_is_none_after_construction(self):
"""GeminiEngine should not initialize the model in __init__."""
engine = GeminiEngine()
assert engine._model is None
@patch("app.engines.gemini_engine.settings")
@patch("app.engines.gemini_engine.os.path.isfile", return_value=True)
def test_model_reused_on_second_call(self, mock_isfile, mock_settings):
"""Once initialized, the same model instance is reused."""
mock_settings.google_vision_key_path = "/fake/creds.json"
mock_settings.vertex_ai_project = "test-project"
mock_settings.vertex_ai_location = "us-central1"
mock_settings.gemini_model = "gemini-2.5-flash"
schedule = [{"serviceName": "Oil Change", "intervalMiles": 5000}]
mock_model = MagicMock()
mock_model.generate_content.return_value = _make_gemini_response(schedule)
engine = GeminiEngine()
engine._model = mock_model
engine._generation_config = MagicMock()
engine.extract_maintenance(_make_pdf_bytes())
engine.extract_maintenance(_make_pdf_bytes())
# Model's generate_content should have been called twice
assert mock_model.generate_content.call_count == 2

View File

@@ -0,0 +1,271 @@
"""Tests for ManualExtractor Gemini-based maintenance schedule extraction.
Covers: normal extraction with subtype mapping, unusual service names,
empty Gemini response, and Gemini call failure.
All GeminiEngine calls are mocked.
"""
from unittest.mock import MagicMock, patch
import pytest
from app.engines.gemini_engine import (
GeminiProcessingError,
MaintenanceExtractionResult,
MaintenanceItem,
)
from app.extractors.manual_extractor import (
ExtractedSchedule,
ManualExtractionResult,
ManualExtractor,
)
# --- Helpers ---
def _make_pdf_bytes(size: int = 1024) -> bytes:
"""Create fake PDF bytes of a given size."""
header = b"%PDF-1.4 fake"
return header + b"\x00" * max(0, size - len(header))
def _make_gemini_result(items: list[MaintenanceItem]) -> MaintenanceExtractionResult:
"""Create a mock Gemini extraction result."""
return MaintenanceExtractionResult(items=items, model="gemini-2.5-flash")
# --- Successful extraction ---
class TestNormalExtraction:
"""Verify normal PDF extraction returns mapped schedules with subtypes."""
def test_pdf_with_maintenance_schedule_returns_mapped_items(self):
"""Normal: PDF with maintenance schedule returns extracted items with subtypes."""
items = [
MaintenanceItem(
service_name="Engine Oil Change",
interval_miles=5000,
interval_months=6,
details="Use 0W-20 full synthetic oil",
),
MaintenanceItem(
service_name="Tire Rotation",
interval_miles=5000,
interval_months=6,
details=None,
),
MaintenanceItem(
service_name="Cabin Filter",
interval_miles=15000,
interval_months=12,
details=None,
),
]
extractor = ManualExtractor()
extractor._engine = MagicMock()
extractor._engine.extract_maintenance.return_value = _make_gemini_result(items)
result = extractor.extract(_make_pdf_bytes())
assert result.success is True
assert result.error is None
assert len(result.maintenance_schedules) == 3
# Oil change should map to Engine Oil subtype
oil = result.maintenance_schedules[0]
assert oil.service == "Engine Oil Change"
assert oil.interval_miles == 5000
assert oil.interval_months == 6
assert oil.details == "Use 0W-20 full synthetic oil"
assert "Engine Oil" in oil.subtypes
assert oil.confidence > 0.0
# Tire rotation should map to Tires subtype
tire = result.maintenance_schedules[1]
assert tire.service == "Tire Rotation"
assert "Tires" in tire.subtypes
# Cabin filter should map to Cabin Air Filter / Purifier
cabin = result.maintenance_schedules[2]
assert "Cabin Air Filter / Purifier" in cabin.subtypes
def test_progress_callbacks_fire_at_intervals(self):
"""Progress callbacks fire at appropriate intervals during processing."""
items = [
MaintenanceItem(service_name="Oil Change", interval_miles=5000),
]
extractor = ManualExtractor()
extractor._engine = MagicMock()
extractor._engine.extract_maintenance.return_value = _make_gemini_result(items)
progress_calls: list[tuple[int, str]] = []
def track_progress(percent: int, message: str) -> None:
progress_calls.append((percent, message))
extractor.extract(_make_pdf_bytes(), progress_callback=track_progress)
# Should have progress calls at 10, 50, 95 (100% is set by complete_manual_job)
percents = [p for p, _ in progress_calls]
assert 10 in percents
assert 50 in percents
assert 95 in percents
# Percents should be non-decreasing
assert percents == sorted(percents)
# --- Unusual service names ---
class TestUnusualServiceNames:
"""Verify that unusual service names still map to closest subtype."""
def test_unusual_names_fuzzy_match_to_subtypes(self):
"""Edge: PDF with unusual service names still maps to closest subtype."""
items = [
MaintenanceItem(
service_name="Replace engine air cleaner element",
interval_miles=30000,
),
MaintenanceItem(
service_name="Inspect drive belt for cracks",
interval_miles=60000,
),
]
extractor = ManualExtractor()
extractor._engine = MagicMock()
extractor._engine.extract_maintenance.return_value = _make_gemini_result(items)
result = extractor.extract(_make_pdf_bytes())
assert result.success is True
assert len(result.maintenance_schedules) == 2
# "air cleaner element" should fuzzy match to Air Filter Element
air_filter = result.maintenance_schedules[0]
assert "Air Filter Element" in air_filter.subtypes
# "drive belt" should match to Drive Belt
belt = result.maintenance_schedules[1]
assert "Drive Belt" in belt.subtypes
def test_unmapped_service_uses_gemini_name_directly(self):
"""Edge: Service name with no match uses Gemini name and default confidence."""
items = [
MaintenanceItem(
service_name="Recalibrate Quantum Flux Capacitor",
interval_miles=100000,
),
]
extractor = ManualExtractor()
extractor._engine = MagicMock()
extractor._engine.extract_maintenance.return_value = _make_gemini_result(items)
result = extractor.extract(_make_pdf_bytes())
assert result.success is True
assert len(result.maintenance_schedules) == 1
item = result.maintenance_schedules[0]
assert item.service == "Recalibrate Quantum Flux Capacitor"
assert item.subtypes == []
assert item.confidence == ManualExtractor.DEFAULT_CONFIDENCE
# --- Empty response ---
class TestEmptyResponse:
"""Verify handling of empty Gemini responses."""
def test_empty_gemini_response_returns_empty_schedules(self):
"""Edge: Empty Gemini response returns empty schedules list."""
extractor = ManualExtractor()
extractor._engine = MagicMock()
extractor._engine.extract_maintenance.return_value = _make_gemini_result([])
result = extractor.extract(_make_pdf_bytes())
assert result.success is True
assert result.maintenance_schedules == []
assert result.error is None
assert result.processing_time_ms >= 0
# --- Error handling ---
class TestErrorHandling:
"""Verify error handling when Gemini calls fail."""
def test_gemini_failure_returns_error_result(self):
"""Error: Gemini call failure returns ManualExtractionResult with error."""
extractor = ManualExtractor()
extractor._engine = MagicMock()
extractor._engine.extract_maintenance.side_effect = GeminiProcessingError(
"Gemini maintenance extraction failed: API quota exceeded"
)
result = extractor.extract(_make_pdf_bytes())
assert result.success is False
assert result.maintenance_schedules == []
assert result.error is not None
assert "quota exceeded" in result.error.lower()
def test_unexpected_exception_returns_error_result(self):
"""Error: Unexpected exception is caught and returned as error."""
extractor = ManualExtractor()
extractor._engine = MagicMock()
extractor._engine.extract_maintenance.side_effect = RuntimeError(
"Unexpected failure"
)
result = extractor.extract(_make_pdf_bytes())
assert result.success is False
assert result.error is not None
assert "Unexpected failure" in result.error
# --- Job queue integration ---
class TestJobQueueIntegration:
"""Verify the extractor works within the existing job queue flow."""
def test_extract_returns_all_required_fields(self):
"""The result contains all fields needed by process_manual_job in extract.py."""
items = [
MaintenanceItem(service_name="Oil Change", interval_miles=5000),
]
extractor = ManualExtractor()
extractor._engine = MagicMock()
extractor._engine.extract_maintenance.return_value = _make_gemini_result(items)
result = extractor.extract(_make_pdf_bytes())
# All fields used by process_manual_job must be present
assert hasattr(result, "success")
assert hasattr(result, "vehicle_info")
assert hasattr(result, "maintenance_schedules")
assert hasattr(result, "raw_tables")
assert hasattr(result, "processing_time_ms")
assert hasattr(result, "total_pages")
assert hasattr(result, "pages_processed")
assert hasattr(result, "error")
# Schedules have required fields
schedule = result.maintenance_schedules[0]
assert hasattr(schedule, "service")
assert hasattr(schedule, "interval_miles")
assert hasattr(schedule, "interval_months")
assert hasattr(schedule, "details")
assert hasattr(schedule, "confidence")
assert hasattr(schedule, "subtypes")