/** * @ai-summary HTTP client for OCR service communication */ import { logger } from '../../../core/logging/logger'; import type { JobResponse, ManualJobResponse, OcrResponse, ReceiptExtractionResponse, VinExtractionResponse } from '../domain/ocr.types'; /** OCR service configuration */ const OCR_SERVICE_URL = process.env.OCR_SERVICE_URL || 'http://mvp-ocr:8000'; const OCR_TIMEOUT_MS = 120000; // 120 seconds for sync operations (PaddleOCR model loading on first call) /** * HTTP client for communicating with the OCR service. */ export class OcrClient { private readonly baseUrl: string; constructor(baseUrl: string = OCR_SERVICE_URL) { this.baseUrl = baseUrl; } /** * Extract text from an image using OCR. * * @param fileBuffer - Image file buffer * @param contentType - MIME type of the file * @param preprocess - Whether to apply preprocessing (default: true) * @returns OCR extraction result */ async extract( fileBuffer: Buffer, contentType: string, preprocess: boolean = true ): Promise { const formData = this.buildFormData(fileBuffer, contentType); const url = `${this.baseUrl}/extract?preprocess=${preprocess}`; logger.info('OCR extract request', { operation: 'ocr.client.extract', url, contentType, fileSize: fileBuffer.length, preprocess, }); const response = await this.fetchWithTimeout(url, { method: 'POST', body: formData, }); if (!response.ok) { const errorText = await response.text(); logger.error('OCR extract failed', { operation: 'ocr.client.extract.error', status: response.status, error: errorText, }); throw new Error(`OCR service error: ${response.status} - ${errorText}`); } const result = (await response.json()) as OcrResponse; logger.info('OCR extract completed', { operation: 'ocr.client.extract.success', success: result.success, documentType: result.documentType, confidence: result.confidence, processingTimeMs: result.processingTimeMs, }); return result; } /** * Extract VIN from an image using VIN-specific OCR. * * @param fileBuffer - Image file buffer * @param contentType - MIME type of the file * @returns VIN extraction result */ async extractVin( fileBuffer: Buffer, contentType: string ): Promise { const formData = this.buildFormData(fileBuffer, contentType); const url = `${this.baseUrl}/extract/vin`; logger.info('OCR VIN extract request', { operation: 'ocr.client.extractVin', url, contentType, fileSize: fileBuffer.length, }); const response = await this.fetchWithTimeout(url, { method: 'POST', body: formData, }); if (!response.ok) { const errorText = await response.text(); logger.error('OCR VIN extract failed', { operation: 'ocr.client.extractVin.error', status: response.status, error: errorText, }); throw new Error(`OCR service error: ${response.status} - ${errorText}`); } const result = (await response.json()) as VinExtractionResponse; logger.info('OCR VIN extract completed', { operation: 'ocr.client.extractVin.success', success: result.success, vin: result.vin, confidence: result.confidence, processingTimeMs: result.processingTimeMs, }); return result; } /** * Extract data from a receipt image using receipt-specific OCR. * * @param fileBuffer - Image file buffer * @param contentType - MIME type of the file * @param receiptType - Optional receipt type hint (e.g., 'fuel') * @returns Receipt extraction result */ async extractReceipt( fileBuffer: Buffer, contentType: string, receiptType?: string ): Promise { const formData = this.buildFormData(fileBuffer, contentType); if (receiptType) { formData.append('receipt_type', receiptType); } const url = `${this.baseUrl}/extract/receipt`; logger.info('OCR receipt extract request', { operation: 'ocr.client.extractReceipt', url, contentType, fileSize: fileBuffer.length, receiptType, }); const response = await this.fetchWithTimeout(url, { method: 'POST', body: formData, }); if (!response.ok) { const errorText = await response.text(); logger.error('OCR receipt extract failed', { operation: 'ocr.client.extractReceipt.error', status: response.status, error: errorText, }); const err: any = new Error(`OCR service error: ${response.status} - ${errorText}`); err.statusCode = response.status; throw err; } const result = (await response.json()) as ReceiptExtractionResponse; logger.info('OCR receipt extract completed', { operation: 'ocr.client.extractReceipt.success', success: result.success, receiptType: result.receiptType, fieldCount: Object.keys(result.extractedFields).length, processingTimeMs: result.processingTimeMs, }); return result; } /** * Extract data from a maintenance receipt image using maintenance-specific OCR. * * @param fileBuffer - Image file buffer * @param contentType - MIME type of the file * @returns Receipt extraction result (receiptType: "maintenance") */ async extractMaintenanceReceipt( fileBuffer: Buffer, contentType: string ): Promise { const formData = this.buildFormData(fileBuffer, contentType); const url = `${this.baseUrl}/extract/maintenance-receipt`; logger.info('OCR maintenance receipt extract request', { operation: 'ocr.client.extractMaintenanceReceipt', url, contentType, fileSize: fileBuffer.length, }); const response = await this.fetchWithTimeout(url, { method: 'POST', body: formData, }); if (!response.ok) { const errorText = await response.text(); logger.error('OCR maintenance receipt extract failed', { operation: 'ocr.client.extractMaintenanceReceipt.error', status: response.status, error: errorText, }); const err: any = new Error(`OCR service error: ${response.status} - ${errorText}`); err.statusCode = response.status; throw err; } const result = (await response.json()) as ReceiptExtractionResponse; logger.info('OCR maintenance receipt extract completed', { operation: 'ocr.client.extractMaintenanceReceipt.success', success: result.success, receiptType: result.receiptType, fieldCount: Object.keys(result.extractedFields).length, processingTimeMs: result.processingTimeMs, }); return result; } /** * Submit an async OCR job for large files. * * @param fileBuffer - Image file buffer * @param contentType - MIME type of the file * @param callbackUrl - Optional URL to call when job completes * @returns Job submission response */ async submitJob( fileBuffer: Buffer, contentType: string, callbackUrl?: string ): Promise { const formData = this.buildFormData(fileBuffer, contentType); if (callbackUrl) { formData.append('callback_url', callbackUrl); } const url = `${this.baseUrl}/jobs`; logger.info('OCR job submit request', { operation: 'ocr.client.submitJob', url, contentType, fileSize: fileBuffer.length, hasCallback: !!callbackUrl, }); const response = await this.fetchWithTimeout(url, { method: 'POST', body: formData, }); if (!response.ok) { const errorText = await response.text(); logger.error('OCR job submit failed', { operation: 'ocr.client.submitJob.error', status: response.status, error: errorText, }); throw new Error(`OCR service error: ${response.status} - ${errorText}`); } const result = (await response.json()) as JobResponse; logger.info('OCR job submitted', { operation: 'ocr.client.submitJob.success', jobId: result.jobId, status: result.status, }); return result; } /** * Get the status of an async OCR job. * * @param jobId - Job ID to check * @returns Job status response */ async getJobStatus(jobId: string): Promise { const url = `${this.baseUrl}/jobs/${jobId}`; logger.debug('OCR job status request', { operation: 'ocr.client.getJobStatus', jobId, }); const response = await this.fetchWithTimeout(url, { method: 'GET', }); if (response.status === 404) { throw new JobNotFoundError(jobId); } if (!response.ok) { const errorText = await response.text(); logger.error('OCR job status failed', { operation: 'ocr.client.getJobStatus.error', jobId, status: response.status, error: errorText, }); throw new Error(`OCR service error: ${response.status} - ${errorText}`); } return (await response.json()) as JobResponse; } /** * Submit an async manual extraction job for PDF owner's manuals. * * @param fileBuffer - PDF file buffer * @param contentType - MIME type of the file (must be application/pdf) * @param vehicleId - Optional vehicle ID for context * @returns Manual job submission response */ async submitManualJob( fileBuffer: Buffer, contentType: string, vehicleId?: string ): Promise { const formData = this.buildFormData(fileBuffer, contentType); if (vehicleId) { formData.append('vehicle_id', vehicleId); } const url = `${this.baseUrl}/extract/manual`; logger.info('OCR manual job submit request', { operation: 'ocr.client.submitManualJob', url, contentType, fileSize: fileBuffer.length, hasVehicleId: !!vehicleId, }); const response = await this.fetchWithTimeout(url, { method: 'POST', body: formData, }); if (!response.ok) { const errorText = await response.text(); logger.error('OCR manual job submit failed', { operation: 'ocr.client.submitManualJob.error', status: response.status, error: errorText, }); throw new Error(`OCR service error: ${response.status} - ${errorText}`); } const result = (await response.json()) as ManualJobResponse; logger.info('OCR manual job submitted', { operation: 'ocr.client.submitManualJob.success', jobId: result.jobId, status: result.status, estimatedSeconds: result.estimatedSeconds, }); return result; } /** * Check if the OCR service is healthy. * * @returns true if healthy, false otherwise */ async isHealthy(): Promise { try { const response = await this.fetchWithTimeout(`${this.baseUrl}/health`, { method: 'GET', }); return response.ok; } catch { return false; } } private async fetchWithTimeout( url: string, options: RequestInit & { headers?: Record } ): Promise { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), OCR_TIMEOUT_MS); try { return await fetch(url, { ...options, signal: controller.signal, }); } finally { clearTimeout(timeout); } } private buildFormData(fileBuffer: Buffer, contentType: string): FormData { const filename = this.getFilenameFromContentType(contentType); const blob = new Blob([fileBuffer], { type: contentType }); const formData = new FormData(); formData.append('file', blob, filename); return formData; } private getFilenameFromContentType(contentType: string): string { const extensions: Record = { 'image/jpeg': 'image.jpg', 'image/png': 'image.png', 'image/heic': 'image.heic', 'image/heif': 'image.heif', 'application/pdf': 'document.pdf', }; return extensions[contentType] || 'file.bin'; } } /** Error thrown when a job is not found */ export class JobNotFoundError extends Error { constructor(jobId: string) { super(`Job ${jobId} not found`); this.name = 'JobNotFoundError'; } } /** Singleton instance */ export const ocrClient = new OcrClient();