feat: add backend OCR manual proxy endpoint (refs #135)
Add POST /api/ocr/extract/manual endpoint that proxies to the Python OCR service's manual extraction pipeline. Includes Pro tier gating via document.scanMaintenanceSchedule, PDF-only validation, 200MB file size limit, and async 202 job response for polling via existing job status endpoint. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -336,6 +336,112 @@ export class OcrController {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/ocr/extract/manual
|
||||
* Submit an async manual extraction job for PDF owner's manuals.
|
||||
* Requires Pro tier (document.scanMaintenanceSchedule).
|
||||
*/
|
||||
async extractManual(
|
||||
request: FastifyRequest,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
const userId = (request as any).user?.sub as string;
|
||||
|
||||
logger.info('Manual extract requested', {
|
||||
operation: 'ocr.controller.extractManual',
|
||||
userId,
|
||||
});
|
||||
|
||||
const file = await (request as any).file({ limits: { files: 1 } });
|
||||
if (!file) {
|
||||
logger.warn('No file provided for manual extraction', {
|
||||
operation: 'ocr.controller.extractManual.no_file',
|
||||
userId,
|
||||
});
|
||||
return reply.code(400).send({
|
||||
error: 'Bad Request',
|
||||
message: 'No file provided',
|
||||
});
|
||||
}
|
||||
|
||||
const contentType = file.mimetype as string;
|
||||
if (contentType !== 'application/pdf') {
|
||||
logger.warn('Non-PDF file provided for manual extraction', {
|
||||
operation: 'ocr.controller.extractManual.not_pdf',
|
||||
userId,
|
||||
contentType,
|
||||
fileName: file.filename,
|
||||
});
|
||||
return reply.code(400).send({
|
||||
error: 'Bad Request',
|
||||
message: `Manual extraction requires PDF files. Received: ${contentType}`,
|
||||
});
|
||||
}
|
||||
|
||||
const chunks: Buffer[] = [];
|
||||
for await (const chunk of file.file) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
const fileBuffer = Buffer.concat(chunks);
|
||||
|
||||
if (fileBuffer.length === 0) {
|
||||
logger.warn('Empty file provided for manual extraction', {
|
||||
operation: 'ocr.controller.extractManual.empty_file',
|
||||
userId,
|
||||
fileName: file.filename,
|
||||
});
|
||||
return reply.code(400).send({
|
||||
error: 'Bad Request',
|
||||
message: 'Empty file provided',
|
||||
});
|
||||
}
|
||||
|
||||
// Get optional vehicle_id from form fields
|
||||
const vehicleId = file.fields?.vehicle_id?.value as string | undefined;
|
||||
|
||||
try {
|
||||
const result = await ocrService.submitManualJob(userId, {
|
||||
fileBuffer,
|
||||
contentType,
|
||||
vehicleId,
|
||||
});
|
||||
|
||||
logger.info('Manual extract job submitted', {
|
||||
operation: 'ocr.controller.extractManual.success',
|
||||
userId,
|
||||
jobId: result.jobId,
|
||||
status: result.status,
|
||||
estimatedSeconds: result.estimatedSeconds,
|
||||
});
|
||||
|
||||
return reply.code(202).send(result);
|
||||
} catch (error: any) {
|
||||
if (error.statusCode === 413) {
|
||||
return reply.code(413).send({
|
||||
error: 'Payload Too Large',
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
if (error.statusCode === 400) {
|
||||
return reply.code(400).send({
|
||||
error: 'Bad Request',
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
|
||||
logger.error('Manual extract failed', {
|
||||
operation: 'ocr.controller.extractManual.error',
|
||||
userId,
|
||||
error: error.message,
|
||||
});
|
||||
|
||||
return reply.code(500).send({
|
||||
error: 'Internal Server Error',
|
||||
message: 'Manual extraction submission failed',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/ocr/jobs
|
||||
* Submit an async OCR job for large files.
|
||||
|
||||
Reference in New Issue
Block a user