/** * @ai-summary Controller for OCR API endpoints */ import { FastifyReply, FastifyRequest } from 'fastify'; import { logger } from '../../../core/logging/logger'; import { ocrService } from '../domain/ocr.service'; import type { ExtractQuery, JobIdParams, JobSubmitBody } from './ocr.validation'; /** Supported MIME types for OCR */ const SUPPORTED_TYPES = new Set([ 'image/jpeg', 'image/png', 'image/heic', 'image/heif', 'application/pdf', ]); /** Image-only MIME types for receipt extraction (no PDF) */ const SUPPORTED_IMAGE_TYPES = new Set([ 'image/jpeg', 'image/png', 'image/heic', 'image/heif', ]); export class OcrController { /** * POST /api/ocr/extract * Extract text from an uploaded image using synchronous OCR. */ async extract( request: FastifyRequest<{ Querystring: ExtractQuery }>, reply: FastifyReply ) { const userId = (request as any).user?.sub as string; const preprocess = request.query.preprocess !== false; logger.info('OCR extract requested', { operation: 'ocr.controller.extract', userId, preprocess, }); // Get uploaded file const file = await (request as any).file({ limits: { files: 1 } }); if (!file) { logger.warn('No file provided for OCR', { operation: 'ocr.controller.extract.no_file', userId, }); return reply.code(400).send({ error: 'Bad Request', message: 'No file provided', }); } // Validate content type const contentType = file.mimetype as string; if (!SUPPORTED_TYPES.has(contentType)) { logger.warn('Unsupported file type for OCR', { operation: 'ocr.controller.extract.unsupported_type', userId, contentType, fileName: file.filename, }); return reply.code(415).send({ error: 'Unsupported Media Type', message: `Unsupported file type: ${contentType}. Supported: JPEG, PNG, HEIC, PDF`, }); } // Read file content const chunks: Buffer[] = []; for await (const chunk of file.file) { chunks.push(chunk); } const fileBuffer = Buffer.concat(chunks); if (fileBuffer.length === 0) { logger.warn('Empty file provided for OCR', { operation: 'ocr.controller.extract.empty_file', userId, fileName: file.filename, }); return reply.code(400).send({ error: 'Bad Request', message: 'Empty file provided', }); } try { const result = await ocrService.extract(userId, { fileBuffer, contentType, preprocess, }); logger.info('OCR extract completed', { operation: 'ocr.controller.extract.success', userId, success: result.success, documentType: result.documentType, processingTimeMs: result.processingTimeMs, }); return reply.code(200).send(result); } catch (error: any) { if (error.statusCode === 413) { return reply.code(413).send({ error: 'Payload Too Large', message: error.message, }); } if (error.statusCode === 415) { return reply.code(415).send({ error: 'Unsupported Media Type', message: error.message, }); } logger.error('OCR extract failed', { operation: 'ocr.controller.extract.error', userId, error: error.message, }); return reply.code(500).send({ error: 'Internal Server Error', message: 'OCR processing failed', }); } } /** * POST /api/ocr/extract/vin * Extract VIN from an uploaded image using VIN-specific OCR. */ async extractVin( request: FastifyRequest, reply: FastifyReply ) { const userId = (request as any).user?.sub as string; logger.info('VIN extract requested', { operation: 'ocr.controller.extractVin', userId, }); const file = await (request as any).file({ limits: { files: 1 } }); if (!file) { logger.warn('No file provided for VIN extraction', { operation: 'ocr.controller.extractVin.no_file', userId, }); return reply.code(400).send({ error: 'Bad Request', message: 'No file provided', }); } const contentType = file.mimetype as string; if (!SUPPORTED_TYPES.has(contentType)) { logger.warn('Unsupported file type for VIN extraction', { operation: 'ocr.controller.extractVin.unsupported_type', userId, contentType, fileName: file.filename, }); return reply.code(415).send({ error: 'Unsupported Media Type', message: `Unsupported file type: ${contentType}. Supported: JPEG, PNG, HEIC, PDF`, }); } const chunks: Buffer[] = []; for await (const chunk of file.file) { chunks.push(chunk); } const fileBuffer = Buffer.concat(chunks); if (fileBuffer.length === 0) { logger.warn('Empty file provided for VIN extraction', { operation: 'ocr.controller.extractVin.empty_file', userId, fileName: file.filename, }); return reply.code(400).send({ error: 'Bad Request', message: 'Empty file provided', }); } try { const result = await ocrService.extractVin(userId, { fileBuffer, contentType, }); logger.info('VIN extract completed', { operation: 'ocr.controller.extractVin.success', userId, success: result.success, processingTimeMs: result.processingTimeMs, }); return reply.code(200).send(result); } catch (error: any) { if (error.statusCode === 413) { return reply.code(413).send({ error: 'Payload Too Large', message: error.message, }); } if (error.statusCode === 415) { return reply.code(415).send({ error: 'Unsupported Media Type', message: error.message, }); } logger.error('VIN extract failed', { operation: 'ocr.controller.extractVin.error', userId, error: error.message, }); return reply.code(500).send({ error: 'Internal Server Error', message: 'VIN extraction failed', }); } } /** * POST /api/ocr/extract/receipt * Extract data from a receipt image using receipt-specific OCR. */ async extractReceipt( request: FastifyRequest, reply: FastifyReply ) { const userId = (request as any).user?.sub as string; logger.info('Receipt extract requested', { operation: 'ocr.controller.extractReceipt', userId, }); const file = await (request as any).file({ limits: { files: 1 } }); if (!file) { logger.warn('No file provided for receipt extraction', { operation: 'ocr.controller.extractReceipt.no_file', userId, }); return reply.code(400).send({ error: 'Bad Request', message: 'No file provided', }); } const contentType = file.mimetype as string; if (!SUPPORTED_IMAGE_TYPES.has(contentType)) { logger.warn('Unsupported file type for receipt extraction', { operation: 'ocr.controller.extractReceipt.unsupported_type', userId, contentType, fileName: file.filename, }); return reply.code(415).send({ error: 'Unsupported Media Type', message: `Unsupported file type: ${contentType}. Supported: JPEG, PNG, HEIC`, }); } const chunks: Buffer[] = []; for await (const chunk of file.file) { chunks.push(chunk); } const fileBuffer = Buffer.concat(chunks); if (fileBuffer.length === 0) { logger.warn('Empty file provided for receipt extraction', { operation: 'ocr.controller.extractReceipt.empty_file', userId, fileName: file.filename, }); return reply.code(400).send({ error: 'Bad Request', message: 'Empty file provided', }); } // Get optional receipt_type from form fields const receiptType = file.fields?.receipt_type?.value as string | undefined; try { const result = await ocrService.extractReceipt(userId, { fileBuffer, contentType, receiptType, }); logger.info('Receipt extract completed', { operation: 'ocr.controller.extractReceipt.success', userId, success: result.success, receiptType: result.receiptType, processingTimeMs: result.processingTimeMs, }); return reply.code(200).send(result); } catch (error: any) { if (error.statusCode === 413) { return reply.code(413).send({ error: 'Payload Too Large', message: error.message, }); } if (error.statusCode === 415) { return reply.code(415).send({ error: 'Unsupported Media Type', message: error.message, }); } if (error.statusCode === 422) { return reply.code(422).send({ error: 'Unprocessable Entity', message: error.message, }); } logger.error('Receipt extract failed', { operation: 'ocr.controller.extractReceipt.error', userId, error: error.message, }); return reply.code(500).send({ error: 'Internal Server Error', message: 'Receipt extraction failed', }); } } /** * POST /api/ocr/extract/manual * Submit an async manual extraction job for PDF owner's manuals. * Requires Pro tier (document.scanMaintenanceSchedule). */ async extractManual( request: FastifyRequest, reply: FastifyReply ) { const userId = (request as any).user?.sub as string; logger.info('Manual extract requested', { operation: 'ocr.controller.extractManual', userId, }); const file = await (request as any).file({ limits: { files: 1 } }); if (!file) { logger.warn('No file provided for manual extraction', { operation: 'ocr.controller.extractManual.no_file', userId, }); return reply.code(400).send({ error: 'Bad Request', message: 'No file provided', }); } const contentType = file.mimetype as string; const fileName = file.filename as string | undefined; const isPdfMime = contentType === 'application/pdf'; const isPdfExtension = fileName?.toLowerCase().endsWith('.pdf') ?? false; if (!isPdfMime && !isPdfExtension) { logger.warn('Non-PDF file provided for manual extraction', { operation: 'ocr.controller.extractManual.not_pdf', userId, contentType, fileName, }); return reply.code(400).send({ error: 'Bad Request', message: `Manual extraction requires PDF files. Received: ${contentType}`, }); } const chunks: Buffer[] = []; for await (const chunk of file.file) { chunks.push(chunk); } const fileBuffer = Buffer.concat(chunks); if (fileBuffer.length === 0) { logger.warn('Empty file provided for manual extraction', { operation: 'ocr.controller.extractManual.empty_file', userId, fileName, }); return reply.code(400).send({ error: 'Bad Request', message: 'Empty file provided', }); } // Validate PDF magic bytes (%PDF) const PDF_MAGIC = Buffer.from('%PDF'); if (fileBuffer.length < 4 || !fileBuffer.subarray(0, 4).equals(PDF_MAGIC)) { logger.warn('File lacks PDF magic bytes', { operation: 'ocr.controller.extractManual.invalid_magic', userId, fileName, firstBytes: fileBuffer.subarray(0, 4).toString('hex'), }); return reply.code(415).send({ error: 'Unsupported Media Type', message: 'File does not appear to be a valid PDF (missing %PDF header)', }); } // Get optional vehicle_id from form fields const vehicleId = file.fields?.vehicle_id?.value as string | undefined; try { const result = await ocrService.submitManualJob(userId, { fileBuffer, contentType, vehicleId, }); logger.info('Manual extract job submitted', { operation: 'ocr.controller.extractManual.success', userId, jobId: result.jobId, status: result.status, estimatedSeconds: result.estimatedSeconds, }); return reply.code(202).send(result); } catch (error: any) { if (error.statusCode === 413) { return reply.code(413).send({ error: 'Payload Too Large', message: error.message, }); } if (error.statusCode === 400) { return reply.code(400).send({ error: 'Bad Request', message: error.message, }); } logger.error('Manual extract failed', { operation: 'ocr.controller.extractManual.error', userId, error: error.message, }); return reply.code(500).send({ error: 'Internal Server Error', message: 'Manual extraction submission failed', }); } } /** * POST /api/ocr/jobs * Submit an async OCR job for large files. */ async submitJob( request: FastifyRequest<{ Body: JobSubmitBody }>, reply: FastifyReply ) { const userId = (request as any).user?.sub as string; logger.info('OCR job submit requested', { operation: 'ocr.controller.submitJob', userId, }); // Get uploaded file const file = await (request as any).file({ limits: { files: 1 } }); if (!file) { logger.warn('No file provided for OCR job', { operation: 'ocr.controller.submitJob.no_file', userId, }); return reply.code(400).send({ error: 'Bad Request', message: 'No file provided', }); } // Validate content type const contentType = file.mimetype as string; if (!SUPPORTED_TYPES.has(contentType)) { logger.warn('Unsupported file type for OCR job', { operation: 'ocr.controller.submitJob.unsupported_type', userId, contentType, fileName: file.filename, }); return reply.code(415).send({ error: 'Unsupported Media Type', message: `Unsupported file type: ${contentType}. Supported: JPEG, PNG, HEIC, PDF`, }); } // Read file content const chunks: Buffer[] = []; for await (const chunk of file.file) { chunks.push(chunk); } const fileBuffer = Buffer.concat(chunks); if (fileBuffer.length === 0) { logger.warn('Empty file provided for OCR job', { operation: 'ocr.controller.submitJob.empty_file', userId, fileName: file.filename, }); return reply.code(400).send({ error: 'Bad Request', message: 'Empty file provided', }); } // Get callback URL from form data (if present) const callbackUrl = file.fields?.callbackUrl?.value as string | undefined; try { const result = await ocrService.submitJob(userId, { fileBuffer, contentType, callbackUrl, }); logger.info('OCR job submitted', { operation: 'ocr.controller.submitJob.success', userId, jobId: result.jobId, status: result.status, }); return reply.code(202).send(result); } catch (error: any) { if (error.statusCode === 413) { return reply.code(413).send({ error: 'Payload Too Large', message: error.message, }); } if (error.statusCode === 415) { return reply.code(415).send({ error: 'Unsupported Media Type', message: error.message, }); } logger.error('OCR job submit failed', { operation: 'ocr.controller.submitJob.error', userId, error: error.message, }); return reply.code(500).send({ error: 'Internal Server Error', message: 'Job submission failed', }); } } /** * GET /api/ocr/jobs/:jobId * Get the status of an async OCR job. */ async getJobStatus( request: FastifyRequest<{ Params: JobIdParams }>, reply: FastifyReply ) { const userId = (request as any).user?.sub as string; const { jobId } = request.params; logger.debug('OCR job status requested', { operation: 'ocr.controller.getJobStatus', userId, jobId, }); try { const result = await ocrService.getJobStatus(userId, jobId); return reply.code(200).send(result); } catch (error: any) { if (error.statusCode === 410) { return reply.code(410).send({ error: 'Gone', message: error.message, }); } logger.error('OCR job status failed', { operation: 'ocr.controller.getJobStatus.error', userId, jobId, error: error.message, }); return reply.code(500).send({ error: 'Internal Server Error', message: 'Failed to retrieve job status', }); } } }