feat: add receipt extraction proxy endpoint (refs #130)
Add POST /api/ocr/extract/receipt endpoint that proxies to the Python OCR service's /extract/receipt for receipt-specific field extraction. - ReceiptExtractionResponse type with receiptType, extractedFields, rawText - OcrClient.extractReceipt() with optional receipt_type form field - OcrService.extractReceipt() with 10MB max, image-only validation - OcrController.extractReceipt() with file upload and error mapping - Route with auth middleware - 9 unit tests covering normal, edge, and error scenarios Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,14 @@ const SUPPORTED_TYPES = new Set([
|
||||
'application/pdf',
|
||||
]);
|
||||
|
||||
/** Image-only MIME types for receipt extraction (no PDF) */
|
||||
const SUPPORTED_IMAGE_TYPES = new Set([
|
||||
'image/jpeg',
|
||||
'image/png',
|
||||
'image/heic',
|
||||
'image/heif',
|
||||
]);
|
||||
|
||||
export class OcrController {
|
||||
/**
|
||||
* POST /api/ocr/extract
|
||||
@@ -223,6 +231,111 @@ export class OcrController {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/ocr/extract/receipt
|
||||
* Extract data from a receipt image using receipt-specific OCR.
|
||||
*/
|
||||
async extractReceipt(
|
||||
request: FastifyRequest,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
const userId = (request as any).user?.sub as string;
|
||||
|
||||
logger.info('Receipt extract requested', {
|
||||
operation: 'ocr.controller.extractReceipt',
|
||||
userId,
|
||||
});
|
||||
|
||||
const file = await (request as any).file({ limits: { files: 1 } });
|
||||
if (!file) {
|
||||
logger.warn('No file provided for receipt extraction', {
|
||||
operation: 'ocr.controller.extractReceipt.no_file',
|
||||
userId,
|
||||
});
|
||||
return reply.code(400).send({
|
||||
error: 'Bad Request',
|
||||
message: 'No file provided',
|
||||
});
|
||||
}
|
||||
|
||||
const contentType = file.mimetype as string;
|
||||
if (!SUPPORTED_IMAGE_TYPES.has(contentType)) {
|
||||
logger.warn('Unsupported file type for receipt extraction', {
|
||||
operation: 'ocr.controller.extractReceipt.unsupported_type',
|
||||
userId,
|
||||
contentType,
|
||||
fileName: file.filename,
|
||||
});
|
||||
return reply.code(415).send({
|
||||
error: 'Unsupported Media Type',
|
||||
message: `Unsupported file type: ${contentType}. Supported: JPEG, PNG, HEIC`,
|
||||
});
|
||||
}
|
||||
|
||||
const chunks: Buffer[] = [];
|
||||
for await (const chunk of file.file) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
const fileBuffer = Buffer.concat(chunks);
|
||||
|
||||
if (fileBuffer.length === 0) {
|
||||
logger.warn('Empty file provided for receipt extraction', {
|
||||
operation: 'ocr.controller.extractReceipt.empty_file',
|
||||
userId,
|
||||
fileName: file.filename,
|
||||
});
|
||||
return reply.code(400).send({
|
||||
error: 'Bad Request',
|
||||
message: 'Empty file provided',
|
||||
});
|
||||
}
|
||||
|
||||
// Get optional receipt_type from form fields
|
||||
const receiptType = file.fields?.receipt_type?.value as string | undefined;
|
||||
|
||||
try {
|
||||
const result = await ocrService.extractReceipt(userId, {
|
||||
fileBuffer,
|
||||
contentType,
|
||||
receiptType,
|
||||
});
|
||||
|
||||
logger.info('Receipt extract completed', {
|
||||
operation: 'ocr.controller.extractReceipt.success',
|
||||
userId,
|
||||
success: result.success,
|
||||
receiptType: result.receiptType,
|
||||
processingTimeMs: result.processingTimeMs,
|
||||
});
|
||||
|
||||
return reply.code(200).send(result);
|
||||
} catch (error: any) {
|
||||
if (error.statusCode === 413) {
|
||||
return reply.code(413).send({
|
||||
error: 'Payload Too Large',
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
if (error.statusCode === 415) {
|
||||
return reply.code(415).send({
|
||||
error: 'Unsupported Media Type',
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
|
||||
logger.error('Receipt extract failed', {
|
||||
operation: 'ocr.controller.extractReceipt.error',
|
||||
userId,
|
||||
error: error.message,
|
||||
});
|
||||
|
||||
return reply.code(500).send({
|
||||
error: 'Internal Server Error',
|
||||
message: 'Receipt extraction failed',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/ocr/jobs
|
||||
* Submit an async OCR job for large files.
|
||||
|
||||
Reference in New Issue
Block a user