feat: add backend OCR manual proxy endpoint (refs #135)

Add POST /api/ocr/extract/manual endpoint that proxies to the Python OCR service's manual extraction pipeline. Includes Pro tier gating via document.scanMaintenanceSchedule, PDF-only validation, 200MB file size limit, and async 202 job response for polling via existing job status endpoint. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 10:37:18 -06:00
parent 57ed04d955
commit a281cea9c5
6 changed files with 489 additions and 1 deletions
--- a/backend/src/features/ocr/api/ocr.controller.ts
+++ b/backend/src/features/ocr/api/ocr.controller.ts
@@ -336,6 +336,112 @@ export class OcrController {
    }
  }

+  /**
+   * POST /api/ocr/extract/manual
+   * Submit an async manual extraction job for PDF owner's manuals.
+   * Requires Pro tier (document.scanMaintenanceSchedule).
+   */
+  async extractManual(
+    request: FastifyRequest,
+    reply: FastifyReply
+  ) {
+    const userId = (request as any).user?.sub as string;
+
+    logger.info('Manual extract requested', {
+      operation: 'ocr.controller.extractManual',
+      userId,
+    });
+
+    const file = await (request as any).file({ limits: { files: 1 } });
+    if (!file) {
+      logger.warn('No file provided for manual extraction', {
+        operation: 'ocr.controller.extractManual.no_file',
+        userId,
+      });
+      return reply.code(400).send({
+        error: 'Bad Request',
+        message: 'No file provided',
+      });
+    }
+
+    const contentType = file.mimetype as string;
+    if (contentType !== 'application/pdf') {
+      logger.warn('Non-PDF file provided for manual extraction', {
+        operation: 'ocr.controller.extractManual.not_pdf',
+        userId,
+        contentType,
+        fileName: file.filename,
+      });
+      return reply.code(400).send({
+        error: 'Bad Request',
+        message: `Manual extraction requires PDF files. Received: ${contentType}`,
+      });
+    }
+
+    const chunks: Buffer[] = [];
+    for await (const chunk of file.file) {
+      chunks.push(chunk);
+    }
+    const fileBuffer = Buffer.concat(chunks);
+
+    if (fileBuffer.length === 0) {
+      logger.warn('Empty file provided for manual extraction', {
+        operation: 'ocr.controller.extractManual.empty_file',
+        userId,
+        fileName: file.filename,
+      });
+      return reply.code(400).send({
+        error: 'Bad Request',
+        message: 'Empty file provided',
+      });
+    }
+
+    // Get optional vehicle_id from form fields
+    const vehicleId = file.fields?.vehicle_id?.value as string | undefined;
+
+    try {
+      const result = await ocrService.submitManualJob(userId, {
+        fileBuffer,
+        contentType,
+        vehicleId,
+      });
+
+      logger.info('Manual extract job submitted', {
+        operation: 'ocr.controller.extractManual.success',
+        userId,
+        jobId: result.jobId,
+        status: result.status,
+        estimatedSeconds: result.estimatedSeconds,
+      });
+
+      return reply.code(202).send(result);
+    } catch (error: any) {
+      if (error.statusCode === 413) {
+        return reply.code(413).send({
+          error: 'Payload Too Large',
+          message: error.message,
+        });
+      }
+      if (error.statusCode === 400) {
+        return reply.code(400).send({
+          error: 'Bad Request',
+          message: error.message,
+        });
+      }
+
+      logger.error('Manual extract failed', {
+        operation: 'ocr.controller.extractManual.error',
+        userId,
+        error: error.message,
+      });
+
+      return reply.code(500).send({
+        error: 'Internal Server Error',
+        message: 'Manual extraction submission failed',
+      });
+    }
+  }
+
  /**
   * POST /api/ocr/jobs
   * Submit an async OCR job for large files.