/** * @ai-summary Domain service for OCR operations */ import { logger } from '../../../core/logging/logger'; import { ocrClient, JobNotFoundError } from '../external/ocr-client'; import type { JobResponse, ManualJobResponse, ManualJobSubmitRequest, OcrExtractRequest, OcrJobSubmitRequest, OcrResponse, ReceiptExtractRequest, ReceiptExtractionResponse, VinExtractionResponse, } from './ocr.types'; /** Maximum file size for sync processing (10MB) */ const MAX_SYNC_SIZE = 10 * 1024 * 1024; /** Maximum file size for async processing (200MB) */ const MAX_ASYNC_SIZE = 200 * 1024 * 1024; /** Supported MIME types */ const SUPPORTED_TYPES = new Set([ 'image/jpeg', 'image/png', 'image/heic', 'image/heif', 'application/pdf', ]); /** Image-only MIME types for receipt extraction (no PDF) */ const SUPPORTED_IMAGE_TYPES = new Set([ 'image/jpeg', 'image/png', 'image/heic', 'image/heif', ]); /** * Domain service for OCR operations. * Handles business logic and validation for OCR requests. */ export class OcrService { /** * Extract text from an image using synchronous OCR. * * @param userId - User ID for logging * @param request - OCR extraction request * @returns OCR extraction result */ async extract(userId: string, request: OcrExtractRequest): Promise { // Validate file size for sync processing if (request.fileBuffer.length > MAX_SYNC_SIZE) { const err: any = new Error( `File too large for sync processing. Max: ${MAX_SYNC_SIZE / (1024 * 1024)}MB. Use async job submission for larger files.` ); err.statusCode = 413; throw err; } // Validate content type if (!SUPPORTED_TYPES.has(request.contentType)) { const err: any = new Error( `Unsupported file type: ${request.contentType}. Supported: ${[...SUPPORTED_TYPES].join(', ')}` ); err.statusCode = 415; throw err; } logger.info('OCR extract requested', { operation: 'ocr.service.extract', userId, contentType: request.contentType, fileSize: request.fileBuffer.length, preprocess: request.preprocess ?? true, }); try { const result = await ocrClient.extract( request.fileBuffer, request.contentType, request.preprocess ?? true ); logger.info('OCR extract completed', { operation: 'ocr.service.extract.success', userId, success: result.success, documentType: result.documentType, confidence: result.confidence, processingTimeMs: result.processingTimeMs, textLength: result.rawText.length, }); return result; } catch (error) { logger.error('OCR extract failed', { operation: 'ocr.service.extract.error', userId, error: error instanceof Error ? error.message : 'Unknown error', }); throw error; } } /** * Extract VIN from an image using VIN-specific OCR. * * @param userId - User ID for logging * @param request - OCR extraction request * @returns VIN extraction result */ async extractVin(userId: string, request: OcrExtractRequest): Promise { if (request.fileBuffer.length > MAX_SYNC_SIZE) { const err: any = new Error( `File too large. Max: ${MAX_SYNC_SIZE / (1024 * 1024)}MB.` ); err.statusCode = 413; throw err; } if (!SUPPORTED_TYPES.has(request.contentType)) { const err: any = new Error( `Unsupported file type: ${request.contentType}. Supported: ${[...SUPPORTED_TYPES].join(', ')}` ); err.statusCode = 415; throw err; } logger.info('VIN extract requested', { operation: 'ocr.service.extractVin', userId, contentType: request.contentType, fileSize: request.fileBuffer.length, }); try { const result = await ocrClient.extractVin( request.fileBuffer, request.contentType ); logger.info('VIN extract completed', { operation: 'ocr.service.extractVin.success', userId, success: result.success, vin: result.vin, confidence: result.confidence, processingTimeMs: result.processingTimeMs, }); return result; } catch (error) { logger.error('VIN extract failed', { operation: 'ocr.service.extractVin.error', userId, error: error instanceof Error ? error.message : 'Unknown error', }); throw error; } } /** * Extract data from a receipt image using receipt-specific OCR. * * @param userId - User ID for logging * @param request - Receipt extraction request * @returns Receipt extraction result */ async extractReceipt(userId: string, request: ReceiptExtractRequest): Promise { if (request.fileBuffer.length > MAX_SYNC_SIZE) { const err: any = new Error( `File too large. Max: ${MAX_SYNC_SIZE / (1024 * 1024)}MB.` ); err.statusCode = 413; throw err; } if (!SUPPORTED_IMAGE_TYPES.has(request.contentType)) { const err: any = new Error( `Unsupported file type: ${request.contentType}. Supported: ${[...SUPPORTED_IMAGE_TYPES].join(', ')}` ); err.statusCode = 415; throw err; } logger.info('Receipt extract requested', { operation: 'ocr.service.extractReceipt', userId, contentType: request.contentType, fileSize: request.fileBuffer.length, receiptType: request.receiptType, }); try { const result = await ocrClient.extractReceipt( request.fileBuffer, request.contentType, request.receiptType ); logger.info('Receipt extract completed', { operation: 'ocr.service.extractReceipt.success', userId, success: result.success, receiptType: result.receiptType, fieldCount: Object.keys(result.extractedFields).length, processingTimeMs: result.processingTimeMs, }); return result; } catch (error) { logger.error('Receipt extract failed', { operation: 'ocr.service.extractReceipt.error', userId, error: error instanceof Error ? error.message : 'Unknown error', }); throw error; } } /** * Submit an async OCR job for large files. * * @param userId - User ID for logging * @param request - Job submission request * @returns Job response with job ID */ async submitJob(userId: string, request: OcrJobSubmitRequest): Promise { // Validate file size for async processing if (request.fileBuffer.length > MAX_ASYNC_SIZE) { const err: any = new Error( `File too large. Max: ${MAX_ASYNC_SIZE / (1024 * 1024)}MB.` ); err.statusCode = 413; throw err; } // Validate content type if (!SUPPORTED_TYPES.has(request.contentType)) { const err: any = new Error( `Unsupported file type: ${request.contentType}. Supported: ${[...SUPPORTED_TYPES].join(', ')}` ); err.statusCode = 415; throw err; } logger.info('OCR job submit requested', { operation: 'ocr.service.submitJob', userId, contentType: request.contentType, fileSize: request.fileBuffer.length, hasCallback: !!request.callbackUrl, }); try { const result = await ocrClient.submitJob( request.fileBuffer, request.contentType, request.callbackUrl ); logger.info('OCR job submitted', { operation: 'ocr.service.submitJob.success', userId, jobId: result.jobId, status: result.status, }); return result; } catch (error) { logger.error('OCR job submit failed', { operation: 'ocr.service.submitJob.error', userId, error: error instanceof Error ? error.message : 'Unknown error', }); throw error; } } /** * Submit an async manual extraction job for PDF owner's manuals. * * @param userId - User ID for logging * @param request - Manual job submission request * @returns Manual job response with job ID */ async submitManualJob(userId: string, request: ManualJobSubmitRequest): Promise { // Validate file size for async processing (200MB max) if (request.fileBuffer.length > MAX_ASYNC_SIZE) { const err: any = new Error( `File too large. Max: ${MAX_ASYNC_SIZE / (1024 * 1024)}MB.` ); err.statusCode = 413; throw err; } // Manual extraction only supports PDF if (request.contentType !== 'application/pdf') { const err: any = new Error( `Unsupported file type: ${request.contentType}. Manual extraction requires PDF files.` ); err.statusCode = 400; throw err; } logger.info('Manual job submit requested', { operation: 'ocr.service.submitManualJob', userId, contentType: request.contentType, fileSize: request.fileBuffer.length, hasVehicleId: !!request.vehicleId, }); try { const result = await ocrClient.submitManualJob( request.fileBuffer, request.contentType, request.vehicleId ); logger.info('Manual job submitted', { operation: 'ocr.service.submitManualJob.success', userId, jobId: result.jobId, status: result.status, estimatedSeconds: result.estimatedSeconds, }); return result; } catch (error) { logger.error('Manual job submit failed', { operation: 'ocr.service.submitManualJob.error', userId, error: error instanceof Error ? error.message : 'Unknown error', }); throw error; } } /** * Get the status of an async OCR job. * * @param userId - User ID for logging * @param jobId - Job ID to check * @returns Job status response */ async getJobStatus(userId: string, jobId: string): Promise { logger.debug('OCR job status requested', { operation: 'ocr.service.getJobStatus', userId, jobId, }); try { const result = await ocrClient.getJobStatus(jobId); logger.debug('OCR job status retrieved', { operation: 'ocr.service.getJobStatus.success', userId, jobId, status: result.status, progress: result.progress, }); return result; } catch (error) { if (error instanceof JobNotFoundError) { const err: any = new Error('Job expired (max 2 hours). Please resubmit.'); err.statusCode = 410; throw err; } logger.error('OCR job status failed', { operation: 'ocr.service.getJobStatus.error', userId, jobId, error: error instanceof Error ? error.message : 'Unknown error', }); throw error; } } /** * Check if the OCR service is available. * * @returns true if OCR service is healthy */ async isServiceHealthy(): Promise { return ocrClient.isHealthy(); } } /** Singleton instance */ export const ocrService = new OcrService();