feat: add receipt extraction proxy endpoint (refs #130)
Add POST /api/ocr/extract/receipt endpoint that proxies to the Python OCR service's /extract/receipt for receipt-specific field extraction. - ReceiptExtractionResponse type with receiptType, extractedFields, rawText - OcrClient.extractReceipt() with optional receipt_type form field - OcrService.extractReceipt() with 10MB max, image-only validation - OcrController.extractReceipt() with file upload and error mapping - Route with auth middleware - 9 unit tests covering normal, edge, and error scenarios Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,8 @@ import type {
|
||||
OcrExtractRequest,
|
||||
OcrJobSubmitRequest,
|
||||
OcrResponse,
|
||||
ReceiptExtractRequest,
|
||||
ReceiptExtractionResponse,
|
||||
VinExtractionResponse,
|
||||
} from './ocr.types';
|
||||
|
||||
@@ -26,6 +28,14 @@ const SUPPORTED_TYPES = new Set([
|
||||
'application/pdf',
|
||||
]);
|
||||
|
||||
/** Image-only MIME types for receipt extraction (no PDF) */
|
||||
const SUPPORTED_IMAGE_TYPES = new Set([
|
||||
'image/jpeg',
|
||||
'image/png',
|
||||
'image/heic',
|
||||
'image/heif',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Domain service for OCR operations.
|
||||
* Handles business logic and validation for OCR requests.
|
||||
@@ -150,6 +160,65 @@ export class OcrService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract data from a receipt image using receipt-specific OCR.
|
||||
*
|
||||
* @param userId - User ID for logging
|
||||
* @param request - Receipt extraction request
|
||||
* @returns Receipt extraction result
|
||||
*/
|
||||
async extractReceipt(userId: string, request: ReceiptExtractRequest): Promise<ReceiptExtractionResponse> {
|
||||
if (request.fileBuffer.length > MAX_SYNC_SIZE) {
|
||||
const err: any = new Error(
|
||||
`File too large. Max: ${MAX_SYNC_SIZE / (1024 * 1024)}MB.`
|
||||
);
|
||||
err.statusCode = 413;
|
||||
throw err;
|
||||
}
|
||||
|
||||
if (!SUPPORTED_IMAGE_TYPES.has(request.contentType)) {
|
||||
const err: any = new Error(
|
||||
`Unsupported file type: ${request.contentType}. Supported: ${[...SUPPORTED_IMAGE_TYPES].join(', ')}`
|
||||
);
|
||||
err.statusCode = 415;
|
||||
throw err;
|
||||
}
|
||||
|
||||
logger.info('Receipt extract requested', {
|
||||
operation: 'ocr.service.extractReceipt',
|
||||
userId,
|
||||
contentType: request.contentType,
|
||||
fileSize: request.fileBuffer.length,
|
||||
receiptType: request.receiptType,
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await ocrClient.extractReceipt(
|
||||
request.fileBuffer,
|
||||
request.contentType,
|
||||
request.receiptType
|
||||
);
|
||||
|
||||
logger.info('Receipt extract completed', {
|
||||
operation: 'ocr.service.extractReceipt.success',
|
||||
userId,
|
||||
success: result.success,
|
||||
receiptType: result.receiptType,
|
||||
fieldCount: Object.keys(result.extractedFields).length,
|
||||
processingTimeMs: result.processingTimeMs,
|
||||
});
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
logger.error('Receipt extract failed', {
|
||||
operation: 'ocr.service.extractReceipt.error',
|
||||
userId,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Submit an async OCR job for large files.
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user