feat: add receipt extraction proxy endpoint (refs #130)

Add POST /api/ocr/extract/receipt endpoint that proxies to the Python
OCR service's /extract/receipt for receipt-specific field extraction.

- ReceiptExtractionResponse type with receiptType, extractedFields, rawText
- OcrClient.extractReceipt() with optional receipt_type form field
- OcrService.extractReceipt() with 10MB max, image-only validation
- OcrController.extractReceipt() with file upload and error mapping
- Route with auth middleware
- 9 unit tests covering normal, edge, and error scenarios

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-11 09:26:57 -06:00
parent e98b45eb3a
commit e0e578a627
7 changed files with 444 additions and 1 deletions

View File

@@ -0,0 +1,181 @@
/**
* @ai-summary Unit tests for OCR receipt extraction endpoint
*/
import { OcrService } from '../../domain/ocr.service';
import { ocrClient } from '../../external/ocr-client';
import type { ReceiptExtractionResponse } from '../../domain/ocr.types';
jest.mock('../../external/ocr-client');
jest.mock('../../../../core/logging/logger');
const mockExtractReceipt = ocrClient.extractReceipt as jest.MockedFunction<
typeof ocrClient.extractReceipt
>;
describe('OcrService.extractReceipt', () => {
let service: OcrService;
const userId = 'test-user-id';
const mockReceiptResponse: ReceiptExtractionResponse = {
success: true,
receiptType: 'fuel',
extractedFields: {
merchantName: { value: 'Shell Gas Station', confidence: 0.92 },
transactionDate: { value: '2026-02-10', confidence: 0.88 },
totalAmount: { value: '45.67', confidence: 0.95 },
fuelQuantity: { value: '12.345', confidence: 0.87 },
pricePerUnit: { value: '3.699', confidence: 0.90 },
fuelGrade: { value: 'Regular 87', confidence: 0.85 },
},
rawText: 'SHELL\n02/10/2026\nREGULAR 87\n12.345 GAL\n$3.699/GAL\nTOTAL $45.67',
processingTimeMs: 1250,
error: null,
};
beforeEach(() => {
jest.clearAllMocks();
service = new OcrService();
});
describe('valid receipt extraction', () => {
it('should return receipt extraction response for valid image', async () => {
mockExtractReceipt.mockResolvedValue(mockReceiptResponse);
const result = await service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/jpeg',
});
expect(result.success).toBe(true);
expect(result.receiptType).toBe('fuel');
expect(result.extractedFields.merchantName.value).toBe('Shell Gas Station');
expect(result.extractedFields.totalAmount.value).toBe('45.67');
expect(result.extractedFields.fuelQuantity.value).toBe('12.345');
expect(result.extractedFields.pricePerUnit.value).toBe('3.699');
expect(result.extractedFields.fuelGrade.value).toBe('Regular 87');
expect(result.extractedFields.transactionDate.value).toBe('2026-02-10');
expect(result.processingTimeMs).toBe(1250);
});
it('should pass receipt_type hint to client when provided', async () => {
mockExtractReceipt.mockResolvedValue(mockReceiptResponse);
await service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/jpeg',
receiptType: 'fuel',
});
expect(mockExtractReceipt).toHaveBeenCalledWith(
expect.any(Buffer),
'image/jpeg',
'fuel'
);
});
it('should support PNG images', async () => {
mockExtractReceipt.mockResolvedValue(mockReceiptResponse);
const result = await service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-png-data'),
contentType: 'image/png',
});
expect(result.success).toBe(true);
});
it('should support HEIC images', async () => {
mockExtractReceipt.mockResolvedValue(mockReceiptResponse);
const result = await service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-heic-data'),
contentType: 'image/heic',
});
expect(result.success).toBe(true);
});
});
describe('missing optional fields', () => {
it('should handle response with some fields not detected', async () => {
const partialResponse: ReceiptExtractionResponse = {
success: true,
receiptType: 'fuel',
extractedFields: {
merchantName: { value: 'Unknown Station', confidence: 0.60 },
totalAmount: { value: '30.00', confidence: 0.88 },
},
rawText: 'UNKNOWN STATION\nTOTAL $30.00',
processingTimeMs: 980,
error: null,
};
mockExtractReceipt.mockResolvedValue(partialResponse);
const result = await service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/jpeg',
});
expect(result.success).toBe(true);
expect(result.extractedFields.merchantName).toBeDefined();
expect(result.extractedFields.totalAmount).toBeDefined();
expect(result.extractedFields.fuelQuantity).toBeUndefined();
expect(result.extractedFields.pricePerUnit).toBeUndefined();
expect(result.extractedFields.fuelGrade).toBeUndefined();
expect(result.extractedFields.transactionDate).toBeUndefined();
});
});
describe('error handling', () => {
it('should throw 415 for unsupported file type (PDF)', async () => {
await expect(
service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-pdf-data'),
contentType: 'application/pdf',
})
).rejects.toMatchObject({
statusCode: 415,
});
});
it('should throw 415 for text/plain', async () => {
await expect(
service.extractReceipt(userId, {
fileBuffer: Buffer.from('not an image'),
contentType: 'text/plain',
})
).rejects.toMatchObject({
statusCode: 415,
});
});
it('should throw 413 for oversized file', async () => {
const largeBuffer = Buffer.alloc(11 * 1024 * 1024); // 11MB
await expect(
service.extractReceipt(userId, {
fileBuffer: largeBuffer,
contentType: 'image/jpeg',
})
).rejects.toMatchObject({
statusCode: 413,
});
});
it('should propagate OCR service errors', async () => {
mockExtractReceipt.mockRejectedValue(
new Error('OCR service error: 500 - Internal error')
);
await expect(
service.extractReceipt(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/jpeg',
})
).rejects.toThrow('OCR service error: 500 - Internal error');
});
});
});