Add receipt_document_id FK on maintenance_records, update types/repo/service to support receipt linking on create and return document metadata on GET. Add OCR proxy endpoint POST /api/ocr/extract/maintenance-receipt with tier gating (maintenance.receiptScan) through full chain: routes -> controller -> service -> client. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
439 lines
12 KiB
TypeScript
439 lines
12 KiB
TypeScript
/**
|
|
* @ai-summary HTTP client for OCR service communication
|
|
*/
|
|
import { logger } from '../../../core/logging/logger';
|
|
import type { JobResponse, ManualJobResponse, OcrResponse, ReceiptExtractionResponse, VinExtractionResponse } from '../domain/ocr.types';
|
|
|
|
/** OCR service configuration */
|
|
const OCR_SERVICE_URL = process.env.OCR_SERVICE_URL || 'http://mvp-ocr:8000';
|
|
const OCR_TIMEOUT_MS = 120000; // 120 seconds for sync operations (PaddleOCR model loading on first call)
|
|
|
|
/**
|
|
* HTTP client for communicating with the OCR service.
|
|
*/
|
|
export class OcrClient {
|
|
private readonly baseUrl: string;
|
|
|
|
constructor(baseUrl: string = OCR_SERVICE_URL) {
|
|
this.baseUrl = baseUrl;
|
|
}
|
|
|
|
/**
|
|
* Extract text from an image using OCR.
|
|
*
|
|
* @param fileBuffer - Image file buffer
|
|
* @param contentType - MIME type of the file
|
|
* @param preprocess - Whether to apply preprocessing (default: true)
|
|
* @returns OCR extraction result
|
|
*/
|
|
async extract(
|
|
fileBuffer: Buffer,
|
|
contentType: string,
|
|
preprocess: boolean = true
|
|
): Promise<OcrResponse> {
|
|
const formData = this.buildFormData(fileBuffer, contentType);
|
|
const url = `${this.baseUrl}/extract?preprocess=${preprocess}`;
|
|
|
|
logger.info('OCR extract request', {
|
|
operation: 'ocr.client.extract',
|
|
url,
|
|
contentType,
|
|
fileSize: fileBuffer.length,
|
|
preprocess,
|
|
});
|
|
|
|
const response = await this.fetchWithTimeout(url, {
|
|
method: 'POST',
|
|
body: formData,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
logger.error('OCR extract failed', {
|
|
operation: 'ocr.client.extract.error',
|
|
status: response.status,
|
|
error: errorText,
|
|
});
|
|
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
const result = (await response.json()) as OcrResponse;
|
|
|
|
logger.info('OCR extract completed', {
|
|
operation: 'ocr.client.extract.success',
|
|
success: result.success,
|
|
documentType: result.documentType,
|
|
confidence: result.confidence,
|
|
processingTimeMs: result.processingTimeMs,
|
|
});
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Extract VIN from an image using VIN-specific OCR.
|
|
*
|
|
* @param fileBuffer - Image file buffer
|
|
* @param contentType - MIME type of the file
|
|
* @returns VIN extraction result
|
|
*/
|
|
async extractVin(
|
|
fileBuffer: Buffer,
|
|
contentType: string
|
|
): Promise<VinExtractionResponse> {
|
|
const formData = this.buildFormData(fileBuffer, contentType);
|
|
const url = `${this.baseUrl}/extract/vin`;
|
|
|
|
logger.info('OCR VIN extract request', {
|
|
operation: 'ocr.client.extractVin',
|
|
url,
|
|
contentType,
|
|
fileSize: fileBuffer.length,
|
|
});
|
|
|
|
const response = await this.fetchWithTimeout(url, {
|
|
method: 'POST',
|
|
body: formData,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
logger.error('OCR VIN extract failed', {
|
|
operation: 'ocr.client.extractVin.error',
|
|
status: response.status,
|
|
error: errorText,
|
|
});
|
|
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
const result = (await response.json()) as VinExtractionResponse;
|
|
|
|
logger.info('OCR VIN extract completed', {
|
|
operation: 'ocr.client.extractVin.success',
|
|
success: result.success,
|
|
vin: result.vin,
|
|
confidence: result.confidence,
|
|
processingTimeMs: result.processingTimeMs,
|
|
});
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Extract data from a receipt image using receipt-specific OCR.
|
|
*
|
|
* @param fileBuffer - Image file buffer
|
|
* @param contentType - MIME type of the file
|
|
* @param receiptType - Optional receipt type hint (e.g., 'fuel')
|
|
* @returns Receipt extraction result
|
|
*/
|
|
async extractReceipt(
|
|
fileBuffer: Buffer,
|
|
contentType: string,
|
|
receiptType?: string
|
|
): Promise<ReceiptExtractionResponse> {
|
|
const formData = this.buildFormData(fileBuffer, contentType);
|
|
if (receiptType) {
|
|
formData.append('receipt_type', receiptType);
|
|
}
|
|
|
|
const url = `${this.baseUrl}/extract/receipt`;
|
|
|
|
logger.info('OCR receipt extract request', {
|
|
operation: 'ocr.client.extractReceipt',
|
|
url,
|
|
contentType,
|
|
fileSize: fileBuffer.length,
|
|
receiptType,
|
|
});
|
|
|
|
const response = await this.fetchWithTimeout(url, {
|
|
method: 'POST',
|
|
body: formData,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
logger.error('OCR receipt extract failed', {
|
|
operation: 'ocr.client.extractReceipt.error',
|
|
status: response.status,
|
|
error: errorText,
|
|
});
|
|
const err: any = new Error(`OCR service error: ${response.status} - ${errorText}`);
|
|
err.statusCode = response.status;
|
|
throw err;
|
|
}
|
|
|
|
const result = (await response.json()) as ReceiptExtractionResponse;
|
|
|
|
logger.info('OCR receipt extract completed', {
|
|
operation: 'ocr.client.extractReceipt.success',
|
|
success: result.success,
|
|
receiptType: result.receiptType,
|
|
fieldCount: Object.keys(result.extractedFields).length,
|
|
processingTimeMs: result.processingTimeMs,
|
|
});
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Extract data from a maintenance receipt image using maintenance-specific OCR.
|
|
*
|
|
* @param fileBuffer - Image file buffer
|
|
* @param contentType - MIME type of the file
|
|
* @returns Receipt extraction result (receiptType: "maintenance")
|
|
*/
|
|
async extractMaintenanceReceipt(
|
|
fileBuffer: Buffer,
|
|
contentType: string
|
|
): Promise<ReceiptExtractionResponse> {
|
|
const formData = this.buildFormData(fileBuffer, contentType);
|
|
const url = `${this.baseUrl}/extract/maintenance-receipt`;
|
|
|
|
logger.info('OCR maintenance receipt extract request', {
|
|
operation: 'ocr.client.extractMaintenanceReceipt',
|
|
url,
|
|
contentType,
|
|
fileSize: fileBuffer.length,
|
|
});
|
|
|
|
const response = await this.fetchWithTimeout(url, {
|
|
method: 'POST',
|
|
body: formData,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
logger.error('OCR maintenance receipt extract failed', {
|
|
operation: 'ocr.client.extractMaintenanceReceipt.error',
|
|
status: response.status,
|
|
error: errorText,
|
|
});
|
|
const err: any = new Error(`OCR service error: ${response.status} - ${errorText}`);
|
|
err.statusCode = response.status;
|
|
throw err;
|
|
}
|
|
|
|
const result = (await response.json()) as ReceiptExtractionResponse;
|
|
|
|
logger.info('OCR maintenance receipt extract completed', {
|
|
operation: 'ocr.client.extractMaintenanceReceipt.success',
|
|
success: result.success,
|
|
receiptType: result.receiptType,
|
|
fieldCount: Object.keys(result.extractedFields).length,
|
|
processingTimeMs: result.processingTimeMs,
|
|
});
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Submit an async OCR job for large files.
|
|
*
|
|
* @param fileBuffer - Image file buffer
|
|
* @param contentType - MIME type of the file
|
|
* @param callbackUrl - Optional URL to call when job completes
|
|
* @returns Job submission response
|
|
*/
|
|
async submitJob(
|
|
fileBuffer: Buffer,
|
|
contentType: string,
|
|
callbackUrl?: string
|
|
): Promise<JobResponse> {
|
|
const formData = this.buildFormData(fileBuffer, contentType);
|
|
if (callbackUrl) {
|
|
formData.append('callback_url', callbackUrl);
|
|
}
|
|
|
|
const url = `${this.baseUrl}/jobs`;
|
|
|
|
logger.info('OCR job submit request', {
|
|
operation: 'ocr.client.submitJob',
|
|
url,
|
|
contentType,
|
|
fileSize: fileBuffer.length,
|
|
hasCallback: !!callbackUrl,
|
|
});
|
|
|
|
const response = await this.fetchWithTimeout(url, {
|
|
method: 'POST',
|
|
body: formData,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
logger.error('OCR job submit failed', {
|
|
operation: 'ocr.client.submitJob.error',
|
|
status: response.status,
|
|
error: errorText,
|
|
});
|
|
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
const result = (await response.json()) as JobResponse;
|
|
|
|
logger.info('OCR job submitted', {
|
|
operation: 'ocr.client.submitJob.success',
|
|
jobId: result.jobId,
|
|
status: result.status,
|
|
});
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Get the status of an async OCR job.
|
|
*
|
|
* @param jobId - Job ID to check
|
|
* @returns Job status response
|
|
*/
|
|
async getJobStatus(jobId: string): Promise<JobResponse> {
|
|
const url = `${this.baseUrl}/jobs/${jobId}`;
|
|
|
|
logger.debug('OCR job status request', {
|
|
operation: 'ocr.client.getJobStatus',
|
|
jobId,
|
|
});
|
|
|
|
const response = await this.fetchWithTimeout(url, {
|
|
method: 'GET',
|
|
});
|
|
|
|
if (response.status === 404) {
|
|
throw new JobNotFoundError(jobId);
|
|
}
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
logger.error('OCR job status failed', {
|
|
operation: 'ocr.client.getJobStatus.error',
|
|
jobId,
|
|
status: response.status,
|
|
error: errorText,
|
|
});
|
|
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
return (await response.json()) as JobResponse;
|
|
}
|
|
|
|
/**
|
|
* Submit an async manual extraction job for PDF owner's manuals.
|
|
*
|
|
* @param fileBuffer - PDF file buffer
|
|
* @param contentType - MIME type of the file (must be application/pdf)
|
|
* @param vehicleId - Optional vehicle ID for context
|
|
* @returns Manual job submission response
|
|
*/
|
|
async submitManualJob(
|
|
fileBuffer: Buffer,
|
|
contentType: string,
|
|
vehicleId?: string
|
|
): Promise<ManualJobResponse> {
|
|
const formData = this.buildFormData(fileBuffer, contentType);
|
|
if (vehicleId) {
|
|
formData.append('vehicle_id', vehicleId);
|
|
}
|
|
|
|
const url = `${this.baseUrl}/extract/manual`;
|
|
|
|
logger.info('OCR manual job submit request', {
|
|
operation: 'ocr.client.submitManualJob',
|
|
url,
|
|
contentType,
|
|
fileSize: fileBuffer.length,
|
|
hasVehicleId: !!vehicleId,
|
|
});
|
|
|
|
const response = await this.fetchWithTimeout(url, {
|
|
method: 'POST',
|
|
body: formData,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
logger.error('OCR manual job submit failed', {
|
|
operation: 'ocr.client.submitManualJob.error',
|
|
status: response.status,
|
|
error: errorText,
|
|
});
|
|
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
const result = (await response.json()) as ManualJobResponse;
|
|
|
|
logger.info('OCR manual job submitted', {
|
|
operation: 'ocr.client.submitManualJob.success',
|
|
jobId: result.jobId,
|
|
status: result.status,
|
|
estimatedSeconds: result.estimatedSeconds,
|
|
});
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Check if the OCR service is healthy.
|
|
*
|
|
* @returns true if healthy, false otherwise
|
|
*/
|
|
async isHealthy(): Promise<boolean> {
|
|
try {
|
|
const response = await this.fetchWithTimeout(`${this.baseUrl}/health`, {
|
|
method: 'GET',
|
|
});
|
|
return response.ok;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private async fetchWithTimeout(
|
|
url: string,
|
|
options: RequestInit & { headers?: Record<string, string> }
|
|
): Promise<Response> {
|
|
const controller = new AbortController();
|
|
const timeout = setTimeout(() => controller.abort(), OCR_TIMEOUT_MS);
|
|
|
|
try {
|
|
return await fetch(url, {
|
|
...options,
|
|
signal: controller.signal,
|
|
});
|
|
} finally {
|
|
clearTimeout(timeout);
|
|
}
|
|
}
|
|
|
|
private buildFormData(fileBuffer: Buffer, contentType: string): FormData {
|
|
const filename = this.getFilenameFromContentType(contentType);
|
|
const blob = new Blob([fileBuffer], { type: contentType });
|
|
const formData = new FormData();
|
|
formData.append('file', blob, filename);
|
|
return formData;
|
|
}
|
|
|
|
private getFilenameFromContentType(contentType: string): string {
|
|
const extensions: Record<string, string> = {
|
|
'image/jpeg': 'image.jpg',
|
|
'image/png': 'image.png',
|
|
'image/heic': 'image.heic',
|
|
'image/heif': 'image.heif',
|
|
'application/pdf': 'document.pdf',
|
|
};
|
|
return extensions[contentType] || 'file.bin';
|
|
}
|
|
}
|
|
|
|
/** Error thrown when a job is not found */
|
|
export class JobNotFoundError extends Error {
|
|
constructor(jobId: string) {
|
|
super(`Job ${jobId} not found`);
|
|
this.name = 'JobNotFoundError';
|
|
}
|
|
}
|
|
|
|
/** Singleton instance */
|
|
export const ocrClient = new OcrClient();
|