feat: add receipt extraction proxy endpoint (refs #130)

Add POST /api/ocr/extract/receipt endpoint that proxies to the Python OCR service's /extract/receipt for receipt-specific field extraction. - ReceiptExtractionResponse type with receiptType, extractedFields, rawText - OcrClient.extractReceipt() with optional receipt_type form field - OcrService.extractReceipt() with 10MB max, image-only validation - OcrController.extractReceipt() with file upload and error mapping - Route with auth middleware - 9 unit tests covering normal, edge, and error scenarios Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 09:26:57 -06:00
parent e98b45eb3a
commit e0e578a627
7 changed files with 444 additions and 1 deletions
--- a/backend/src/features/ocr/domain/ocr.service.ts
+++ b/backend/src/features/ocr/domain/ocr.service.ts
@@ -8,6 +8,8 @@ import type {
  OcrExtractRequest,
  OcrJobSubmitRequest,
  OcrResponse,
+  ReceiptExtractRequest,
+  ReceiptExtractionResponse,
  VinExtractionResponse,
 } from './ocr.types';

@@ -26,6 +28,14 @@ const SUPPORTED_TYPES = new Set([
  'application/pdf',
 ]);

+/** Image-only MIME types for receipt extraction (no PDF) */
+const SUPPORTED_IMAGE_TYPES = new Set([
+  'image/jpeg',
+  'image/png',
+  'image/heic',
+  'image/heif',
+]);
+
 /**
 * Domain service for OCR operations.
 * Handles business logic and validation for OCR requests.
@@ -150,6 +160,65 @@ export class OcrService {
    }
  }

+  /**
+   * Extract data from a receipt image using receipt-specific OCR.
+   *
+   * @param userId - User ID for logging
+   * @param request - Receipt extraction request
+   * @returns Receipt extraction result
+   */
+  async extractReceipt(userId: string, request: ReceiptExtractRequest): Promise<ReceiptExtractionResponse> {
+    if (request.fileBuffer.length > MAX_SYNC_SIZE) {
+      const err: any = new Error(
+        `File too large. Max: ${MAX_SYNC_SIZE / (1024 * 1024)}MB.`
+      );
+      err.statusCode = 413;
+      throw err;
+    }
+
+    if (!SUPPORTED_IMAGE_TYPES.has(request.contentType)) {
+      const err: any = new Error(
+        `Unsupported file type: ${request.contentType}. Supported: ${[...SUPPORTED_IMAGE_TYPES].join(', ')}`
+      );
+      err.statusCode = 415;
+      throw err;
+    }
+
+    logger.info('Receipt extract requested', {
+      operation: 'ocr.service.extractReceipt',
+      userId,
+      contentType: request.contentType,
+      fileSize: request.fileBuffer.length,
+      receiptType: request.receiptType,
+    });
+
+    try {
+      const result = await ocrClient.extractReceipt(
+        request.fileBuffer,
+        request.contentType,
+        request.receiptType
+      );
+
+      logger.info('Receipt extract completed', {
+        operation: 'ocr.service.extractReceipt.success',
+        userId,
+        success: result.success,
+        receiptType: result.receiptType,
+        fieldCount: Object.keys(result.extractedFields).length,
+        processingTimeMs: result.processingTimeMs,
+      });
+
+      return result;
+    } catch (error) {
+      logger.error('Receipt extract failed', {
+        operation: 'ocr.service.extractReceipt.error',
+        userId,
+        error: error instanceof Error ? error.message : 'Unknown error',
+      });
+      throw error;
+    }
+  }
+
  /**
   * Submit an async OCR job for large files.
   *