Files
motovaultpro/backend/src/features/ocr/external/ocr-client.ts
Eric Gullickson 88d23d2745 feat: add backend migration and API for maintenance receipt linking (refs #151)
Add receipt_document_id FK on maintenance_records, update types/repo/service
to support receipt linking on create and return document metadata on GET.
Add OCR proxy endpoint POST /api/ocr/extract/maintenance-receipt with
tier gating (maintenance.receiptScan) through full chain: routes -> controller
-> service -> client.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-12 21:24:24 -06:00

439 lines
12 KiB
TypeScript

/**
* @ai-summary HTTP client for OCR service communication
*/
import { logger } from '../../../core/logging/logger';
import type { JobResponse, ManualJobResponse, OcrResponse, ReceiptExtractionResponse, VinExtractionResponse } from '../domain/ocr.types';
/** OCR service configuration */
const OCR_SERVICE_URL = process.env.OCR_SERVICE_URL || 'http://mvp-ocr:8000';
const OCR_TIMEOUT_MS = 120000; // 120 seconds for sync operations (PaddleOCR model loading on first call)
/**
* HTTP client for communicating with the OCR service.
*/
export class OcrClient {
private readonly baseUrl: string;
constructor(baseUrl: string = OCR_SERVICE_URL) {
this.baseUrl = baseUrl;
}
/**
* Extract text from an image using OCR.
*
* @param fileBuffer - Image file buffer
* @param contentType - MIME type of the file
* @param preprocess - Whether to apply preprocessing (default: true)
* @returns OCR extraction result
*/
async extract(
fileBuffer: Buffer,
contentType: string,
preprocess: boolean = true
): Promise<OcrResponse> {
const formData = this.buildFormData(fileBuffer, contentType);
const url = `${this.baseUrl}/extract?preprocess=${preprocess}`;
logger.info('OCR extract request', {
operation: 'ocr.client.extract',
url,
contentType,
fileSize: fileBuffer.length,
preprocess,
});
const response = await this.fetchWithTimeout(url, {
method: 'POST',
body: formData,
});
if (!response.ok) {
const errorText = await response.text();
logger.error('OCR extract failed', {
operation: 'ocr.client.extract.error',
status: response.status,
error: errorText,
});
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
}
const result = (await response.json()) as OcrResponse;
logger.info('OCR extract completed', {
operation: 'ocr.client.extract.success',
success: result.success,
documentType: result.documentType,
confidence: result.confidence,
processingTimeMs: result.processingTimeMs,
});
return result;
}
/**
* Extract VIN from an image using VIN-specific OCR.
*
* @param fileBuffer - Image file buffer
* @param contentType - MIME type of the file
* @returns VIN extraction result
*/
async extractVin(
fileBuffer: Buffer,
contentType: string
): Promise<VinExtractionResponse> {
const formData = this.buildFormData(fileBuffer, contentType);
const url = `${this.baseUrl}/extract/vin`;
logger.info('OCR VIN extract request', {
operation: 'ocr.client.extractVin',
url,
contentType,
fileSize: fileBuffer.length,
});
const response = await this.fetchWithTimeout(url, {
method: 'POST',
body: formData,
});
if (!response.ok) {
const errorText = await response.text();
logger.error('OCR VIN extract failed', {
operation: 'ocr.client.extractVin.error',
status: response.status,
error: errorText,
});
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
}
const result = (await response.json()) as VinExtractionResponse;
logger.info('OCR VIN extract completed', {
operation: 'ocr.client.extractVin.success',
success: result.success,
vin: result.vin,
confidence: result.confidence,
processingTimeMs: result.processingTimeMs,
});
return result;
}
/**
* Extract data from a receipt image using receipt-specific OCR.
*
* @param fileBuffer - Image file buffer
* @param contentType - MIME type of the file
* @param receiptType - Optional receipt type hint (e.g., 'fuel')
* @returns Receipt extraction result
*/
async extractReceipt(
fileBuffer: Buffer,
contentType: string,
receiptType?: string
): Promise<ReceiptExtractionResponse> {
const formData = this.buildFormData(fileBuffer, contentType);
if (receiptType) {
formData.append('receipt_type', receiptType);
}
const url = `${this.baseUrl}/extract/receipt`;
logger.info('OCR receipt extract request', {
operation: 'ocr.client.extractReceipt',
url,
contentType,
fileSize: fileBuffer.length,
receiptType,
});
const response = await this.fetchWithTimeout(url, {
method: 'POST',
body: formData,
});
if (!response.ok) {
const errorText = await response.text();
logger.error('OCR receipt extract failed', {
operation: 'ocr.client.extractReceipt.error',
status: response.status,
error: errorText,
});
const err: any = new Error(`OCR service error: ${response.status} - ${errorText}`);
err.statusCode = response.status;
throw err;
}
const result = (await response.json()) as ReceiptExtractionResponse;
logger.info('OCR receipt extract completed', {
operation: 'ocr.client.extractReceipt.success',
success: result.success,
receiptType: result.receiptType,
fieldCount: Object.keys(result.extractedFields).length,
processingTimeMs: result.processingTimeMs,
});
return result;
}
/**
* Extract data from a maintenance receipt image using maintenance-specific OCR.
*
* @param fileBuffer - Image file buffer
* @param contentType - MIME type of the file
* @returns Receipt extraction result (receiptType: "maintenance")
*/
async extractMaintenanceReceipt(
fileBuffer: Buffer,
contentType: string
): Promise<ReceiptExtractionResponse> {
const formData = this.buildFormData(fileBuffer, contentType);
const url = `${this.baseUrl}/extract/maintenance-receipt`;
logger.info('OCR maintenance receipt extract request', {
operation: 'ocr.client.extractMaintenanceReceipt',
url,
contentType,
fileSize: fileBuffer.length,
});
const response = await this.fetchWithTimeout(url, {
method: 'POST',
body: formData,
});
if (!response.ok) {
const errorText = await response.text();
logger.error('OCR maintenance receipt extract failed', {
operation: 'ocr.client.extractMaintenanceReceipt.error',
status: response.status,
error: errorText,
});
const err: any = new Error(`OCR service error: ${response.status} - ${errorText}`);
err.statusCode = response.status;
throw err;
}
const result = (await response.json()) as ReceiptExtractionResponse;
logger.info('OCR maintenance receipt extract completed', {
operation: 'ocr.client.extractMaintenanceReceipt.success',
success: result.success,
receiptType: result.receiptType,
fieldCount: Object.keys(result.extractedFields).length,
processingTimeMs: result.processingTimeMs,
});
return result;
}
/**
* Submit an async OCR job for large files.
*
* @param fileBuffer - Image file buffer
* @param contentType - MIME type of the file
* @param callbackUrl - Optional URL to call when job completes
* @returns Job submission response
*/
async submitJob(
fileBuffer: Buffer,
contentType: string,
callbackUrl?: string
): Promise<JobResponse> {
const formData = this.buildFormData(fileBuffer, contentType);
if (callbackUrl) {
formData.append('callback_url', callbackUrl);
}
const url = `${this.baseUrl}/jobs`;
logger.info('OCR job submit request', {
operation: 'ocr.client.submitJob',
url,
contentType,
fileSize: fileBuffer.length,
hasCallback: !!callbackUrl,
});
const response = await this.fetchWithTimeout(url, {
method: 'POST',
body: formData,
});
if (!response.ok) {
const errorText = await response.text();
logger.error('OCR job submit failed', {
operation: 'ocr.client.submitJob.error',
status: response.status,
error: errorText,
});
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
}
const result = (await response.json()) as JobResponse;
logger.info('OCR job submitted', {
operation: 'ocr.client.submitJob.success',
jobId: result.jobId,
status: result.status,
});
return result;
}
/**
* Get the status of an async OCR job.
*
* @param jobId - Job ID to check
* @returns Job status response
*/
async getJobStatus(jobId: string): Promise<JobResponse> {
const url = `${this.baseUrl}/jobs/${jobId}`;
logger.debug('OCR job status request', {
operation: 'ocr.client.getJobStatus',
jobId,
});
const response = await this.fetchWithTimeout(url, {
method: 'GET',
});
if (response.status === 404) {
throw new JobNotFoundError(jobId);
}
if (!response.ok) {
const errorText = await response.text();
logger.error('OCR job status failed', {
operation: 'ocr.client.getJobStatus.error',
jobId,
status: response.status,
error: errorText,
});
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
}
return (await response.json()) as JobResponse;
}
/**
* Submit an async manual extraction job for PDF owner's manuals.
*
* @param fileBuffer - PDF file buffer
* @param contentType - MIME type of the file (must be application/pdf)
* @param vehicleId - Optional vehicle ID for context
* @returns Manual job submission response
*/
async submitManualJob(
fileBuffer: Buffer,
contentType: string,
vehicleId?: string
): Promise<ManualJobResponse> {
const formData = this.buildFormData(fileBuffer, contentType);
if (vehicleId) {
formData.append('vehicle_id', vehicleId);
}
const url = `${this.baseUrl}/extract/manual`;
logger.info('OCR manual job submit request', {
operation: 'ocr.client.submitManualJob',
url,
contentType,
fileSize: fileBuffer.length,
hasVehicleId: !!vehicleId,
});
const response = await this.fetchWithTimeout(url, {
method: 'POST',
body: formData,
});
if (!response.ok) {
const errorText = await response.text();
logger.error('OCR manual job submit failed', {
operation: 'ocr.client.submitManualJob.error',
status: response.status,
error: errorText,
});
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
}
const result = (await response.json()) as ManualJobResponse;
logger.info('OCR manual job submitted', {
operation: 'ocr.client.submitManualJob.success',
jobId: result.jobId,
status: result.status,
estimatedSeconds: result.estimatedSeconds,
});
return result;
}
/**
* Check if the OCR service is healthy.
*
* @returns true if healthy, false otherwise
*/
async isHealthy(): Promise<boolean> {
try {
const response = await this.fetchWithTimeout(`${this.baseUrl}/health`, {
method: 'GET',
});
return response.ok;
} catch {
return false;
}
}
private async fetchWithTimeout(
url: string,
options: RequestInit & { headers?: Record<string, string> }
): Promise<Response> {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), OCR_TIMEOUT_MS);
try {
return await fetch(url, {
...options,
signal: controller.signal,
});
} finally {
clearTimeout(timeout);
}
}
private buildFormData(fileBuffer: Buffer, contentType: string): FormData {
const filename = this.getFilenameFromContentType(contentType);
const blob = new Blob([fileBuffer], { type: contentType });
const formData = new FormData();
formData.append('file', blob, filename);
return formData;
}
private getFilenameFromContentType(contentType: string): string {
const extensions: Record<string, string> = {
'image/jpeg': 'image.jpg',
'image/png': 'image.png',
'image/heic': 'image.heic',
'image/heif': 'image.heif',
'application/pdf': 'document.pdf',
};
return extensions[contentType] || 'file.bin';
}
}
/** Error thrown when a job is not found */
export class JobNotFoundError extends Error {
constructor(jobId: string) {
super(`Job ${jobId} not found`);
this.name = 'JobNotFoundError';
}
}
/** Singleton instance */
export const ocrClient = new OcrClient();