feat: add receipt extraction proxy endpoint (refs #130)

Add POST /api/ocr/extract/receipt endpoint that proxies to the Python
OCR service's /extract/receipt for receipt-specific field extraction.

- ReceiptExtractionResponse type with receiptType, extractedFields, rawText
- OcrClient.extractReceipt() with optional receipt_type form field
- OcrService.extractReceipt() with 10MB max, image-only validation
- OcrController.extractReceipt() with file upload and error mapping
- Route with auth middleware
- 9 unit tests covering normal, edge, and error scenarios

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-11 09:26:57 -06:00
parent e98b45eb3a
commit e0e578a627
7 changed files with 444 additions and 1 deletions

View File

@@ -2,7 +2,7 @@
* @ai-summary HTTP client for OCR service communication
*/
import { logger } from '../../../core/logging/logger';
import type { JobResponse, OcrResponse, VinExtractionResponse } from '../domain/ocr.types';
import type { JobResponse, OcrResponse, ReceiptExtractionResponse, VinExtractionResponse } from '../domain/ocr.types';
/** OCR service configuration */
const OCR_SERVICE_URL = process.env.OCR_SERVICE_URL || 'http://mvp-ocr:8000';
@@ -119,6 +119,62 @@ export class OcrClient {
return result;
}
/**
* Extract data from a receipt image using receipt-specific OCR.
*
* @param fileBuffer - Image file buffer
* @param contentType - MIME type of the file
* @param receiptType - Optional receipt type hint (e.g., 'fuel')
* @returns Receipt extraction result
*/
async extractReceipt(
fileBuffer: Buffer,
contentType: string,
receiptType?: string
): Promise<ReceiptExtractionResponse> {
const formData = this.buildFormData(fileBuffer, contentType);
if (receiptType) {
formData.append('receipt_type', receiptType);
}
const url = `${this.baseUrl}/extract/receipt`;
logger.info('OCR receipt extract request', {
operation: 'ocr.client.extractReceipt',
url,
contentType,
fileSize: fileBuffer.length,
receiptType,
});
const response = await this.fetchWithTimeout(url, {
method: 'POST',
body: formData,
});
if (!response.ok) {
const errorText = await response.text();
logger.error('OCR receipt extract failed', {
operation: 'ocr.client.extractReceipt.error',
status: response.status,
error: errorText,
});
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
}
const result = (await response.json()) as ReceiptExtractionResponse;
logger.info('OCR receipt extract completed', {
operation: 'ocr.client.extractReceipt.success',
success: result.success,
receiptType: result.receiptType,
fieldCount: Object.keys(result.extractedFields).length,
processingTimeMs: result.processingTimeMs,
});
return result;
}
/**
* Submit an async OCR job for large files.
*