feat: add receipt extraction proxy endpoint (refs #130)

Add POST /api/ocr/extract/receipt endpoint that proxies to the Python
OCR service's /extract/receipt for receipt-specific field extraction.

- ReceiptExtractionResponse type with receiptType, extractedFields, rawText
- OcrClient.extractReceipt() with optional receipt_type form field
- OcrService.extractReceipt() with 10MB max, image-only validation
- OcrController.extractReceipt() with file upload and error mapping
- Route with auth middleware
- 9 unit tests covering normal, edge, and error scenarios

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-11 09:26:57 -06:00
parent e98b45eb3a
commit e0e578a627
7 changed files with 444 additions and 1 deletions

View File

@@ -8,6 +8,8 @@ import type {
OcrExtractRequest,
OcrJobSubmitRequest,
OcrResponse,
ReceiptExtractRequest,
ReceiptExtractionResponse,
VinExtractionResponse,
} from './ocr.types';
@@ -26,6 +28,14 @@ const SUPPORTED_TYPES = new Set([
'application/pdf',
]);
/** Image-only MIME types for receipt extraction (no PDF) */
const SUPPORTED_IMAGE_TYPES = new Set([
'image/jpeg',
'image/png',
'image/heic',
'image/heif',
]);
/**
* Domain service for OCR operations.
* Handles business logic and validation for OCR requests.
@@ -150,6 +160,65 @@ export class OcrService {
}
}
/**
* Extract data from a receipt image using receipt-specific OCR.
*
* @param userId - User ID for logging
* @param request - Receipt extraction request
* @returns Receipt extraction result
*/
async extractReceipt(userId: string, request: ReceiptExtractRequest): Promise<ReceiptExtractionResponse> {
if (request.fileBuffer.length > MAX_SYNC_SIZE) {
const err: any = new Error(
`File too large. Max: ${MAX_SYNC_SIZE / (1024 * 1024)}MB.`
);
err.statusCode = 413;
throw err;
}
if (!SUPPORTED_IMAGE_TYPES.has(request.contentType)) {
const err: any = new Error(
`Unsupported file type: ${request.contentType}. Supported: ${[...SUPPORTED_IMAGE_TYPES].join(', ')}`
);
err.statusCode = 415;
throw err;
}
logger.info('Receipt extract requested', {
operation: 'ocr.service.extractReceipt',
userId,
contentType: request.contentType,
fileSize: request.fileBuffer.length,
receiptType: request.receiptType,
});
try {
const result = await ocrClient.extractReceipt(
request.fileBuffer,
request.contentType,
request.receiptType
);
logger.info('Receipt extract completed', {
operation: 'ocr.service.extractReceipt.success',
userId,
success: result.success,
receiptType: result.receiptType,
fieldCount: Object.keys(result.extractedFields).length,
processingTimeMs: result.processingTimeMs,
});
return result;
} catch (error) {
logger.error('Receipt extract failed', {
operation: 'ocr.service.extractReceipt.error',
userId,
error: error instanceof Error ? error.message : 'Unknown error',
});
throw error;
}
}
/**
* Submit an async OCR job for large files.
*