Files
motovaultpro/backend/src/features/ocr/tests/unit/ocr-manual.test.ts
Eric Gullickson ca33f8ad9d feat: add PDF magic bytes validation, 410 Gone, and manual extraction tests (refs #144)
Add filename .pdf extension fallback and %PDF magic bytes validation to
extractManual controller. Update getJobStatus to return 410 Gone for
expired jobs. Add 16 unit tests covering all acceptance criteria.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 14:55:06 -06:00

296 lines
9.1 KiB
TypeScript

/**
* @ai-summary Unit tests for OCR manual extraction endpoint
*/
import { OcrService } from '../../domain/ocr.service';
import { ocrClient, JobNotFoundError } from '../../external/ocr-client';
import type { ManualJobResponse } from '../../domain/ocr.types';
jest.mock('../../external/ocr-client');
jest.mock('../../../../core/logging/logger');
const mockSubmitManualJob = ocrClient.submitManualJob as jest.MockedFunction<
typeof ocrClient.submitManualJob
>;
const mockGetJobStatus = ocrClient.getJobStatus as jest.MockedFunction<
typeof ocrClient.getJobStatus
>;
describe('OcrService.submitManualJob', () => {
let service: OcrService;
const userId = 'test-user-id';
const mockManualJobResponse: ManualJobResponse = {
jobId: 'manual-job-123',
status: 'pending',
progress: 0,
estimatedSeconds: 45,
result: undefined,
error: undefined,
};
const mockCompletedJobResponse: ManualJobResponse = {
jobId: 'manual-job-123',
status: 'completed',
progress: 100,
result: {
success: true,
vehicleInfo: {
make: 'Honda',
model: 'Civic',
year: 2023,
},
maintenanceSchedules: [
{
service: 'Engine Oil Change',
intervalMiles: 5000,
intervalMonths: 6,
details: 'Use 0W-20 full synthetic oil',
confidence: 0.95,
subtypes: ['oil_change'],
},
{
service: 'Tire Rotation',
intervalMiles: 7500,
intervalMonths: 6,
details: null,
confidence: 0.90,
subtypes: ['tire_rotation'],
},
],
rawTables: [],
processingTimeMs: 45000,
totalPages: 120,
pagesProcessed: 120,
error: null,
},
error: undefined,
};
beforeEach(() => {
jest.clearAllMocks();
service = new OcrService();
});
describe('valid manual job submission', () => {
it('should return 202-style response with jobId for PDF submission', async () => {
mockSubmitManualJob.mockResolvedValue(mockManualJobResponse);
const result = await service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-pdf-data'),
contentType: 'application/pdf',
});
expect(result.jobId).toBe('manual-job-123');
expect(result.status).toBe('pending');
expect(result.progress).toBe(0);
expect(result.estimatedSeconds).toBe(45);
expect(result.result).toBeUndefined();
});
it('should pass vehicleId to client when provided', async () => {
mockSubmitManualJob.mockResolvedValue(mockManualJobResponse);
await service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-pdf-data'),
contentType: 'application/pdf',
vehicleId: 'vehicle-abc',
});
expect(mockSubmitManualJob).toHaveBeenCalledWith(
expect.any(Buffer),
'application/pdf',
'vehicle-abc'
);
});
it('should call client without vehicleId when not provided', async () => {
mockSubmitManualJob.mockResolvedValue(mockManualJobResponse);
await service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-pdf-data'),
contentType: 'application/pdf',
});
expect(mockSubmitManualJob).toHaveBeenCalledWith(
expect.any(Buffer),
'application/pdf',
undefined
);
});
});
describe('completed job result', () => {
it('should return completed result with maintenanceSchedules', async () => {
mockSubmitManualJob.mockResolvedValue(mockCompletedJobResponse);
const result = await service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-pdf-data'),
contentType: 'application/pdf',
});
expect(result.status).toBe('completed');
expect(result.result).toBeDefined();
expect(result.result!.success).toBe(true);
expect(result.result!.maintenanceSchedules).toHaveLength(2);
expect(result.result!.maintenanceSchedules[0].service).toBe('Engine Oil Change');
expect(result.result!.maintenanceSchedules[0].intervalMiles).toBe(5000);
expect(result.result!.maintenanceSchedules[0].subtypes).toEqual(['oil_change']);
expect(result.result!.vehicleInfo.make).toBe('Honda');
});
});
describe('error handling', () => {
it('should throw 400 for non-PDF file (JPEG)', async () => {
await expect(
service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/jpeg',
})
).rejects.toMatchObject({
statusCode: 400,
});
});
it('should throw 400 for non-PDF file (PNG)', async () => {
await expect(
service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-image-data'),
contentType: 'image/png',
})
).rejects.toMatchObject({
statusCode: 400,
});
});
it('should throw 400 for text/plain', async () => {
await expect(
service.submitManualJob(userId, {
fileBuffer: Buffer.from('not a pdf'),
contentType: 'text/plain',
})
).rejects.toMatchObject({
statusCode: 400,
});
});
it('should throw 413 for oversized file', async () => {
const largeBuffer = Buffer.alloc(201 * 1024 * 1024); // 201MB
await expect(
service.submitManualJob(userId, {
fileBuffer: largeBuffer,
contentType: 'application/pdf',
})
).rejects.toMatchObject({
statusCode: 413,
});
});
it('should accept file at 200MB boundary', async () => {
mockSubmitManualJob.mockResolvedValue(mockManualJobResponse);
const exactBuffer = Buffer.alloc(200 * 1024 * 1024); // exactly 200MB
const result = await service.submitManualJob(userId, {
fileBuffer: exactBuffer,
contentType: 'application/pdf',
});
expect(result.jobId).toBe('manual-job-123');
});
it('should propagate OCR service errors', async () => {
mockSubmitManualJob.mockRejectedValue(
new Error('OCR service error: 500 - Internal error')
);
await expect(
service.submitManualJob(userId, {
fileBuffer: Buffer.from('fake-pdf-data'),
contentType: 'application/pdf',
})
).rejects.toThrow('OCR service error: 500 - Internal error');
});
});
});
describe('OcrService.getJobStatus (manual job polling)', () => {
let service: OcrService;
const userId = 'test-user-id';
beforeEach(() => {
jest.clearAllMocks();
service = new OcrService();
});
it('should return completed manual job with schedules', async () => {
mockGetJobStatus.mockResolvedValue({
jobId: 'manual-job-123',
status: 'completed',
progress: 100,
});
const result = await service.getJobStatus(userId, 'manual-job-123');
expect(result.jobId).toBe('manual-job-123');
expect(result.status).toBe('completed');
expect(result.progress).toBe(100);
});
it('should return processing status with progress', async () => {
mockGetJobStatus.mockResolvedValue({
jobId: 'manual-job-456',
status: 'processing',
progress: 50,
});
const result = await service.getJobStatus(userId, 'manual-job-456');
expect(result.status).toBe('processing');
expect(result.progress).toBe(50);
});
it('should throw 410 Gone for expired/missing job', async () => {
mockGetJobStatus.mockRejectedValue(new JobNotFoundError('expired-job-789'));
await expect(
service.getJobStatus(userId, 'expired-job-789')
).rejects.toMatchObject({
statusCode: 410,
message: 'Job expired (max 2 hours). Please resubmit.',
});
});
});
describe('Manual extraction controller validations', () => {
it('PDF magic bytes validation rejects non-PDF content', () => {
// Controller validates first 4 bytes match %PDF (0x25504446)
// Files without %PDF header receive 415 Unsupported Media Type
const pdfMagic = Buffer.from('%PDF');
const notPdf = Buffer.from('JFIF');
expect(pdfMagic.subarray(0, 4).equals(Buffer.from('%PDF'))).toBe(true);
expect(notPdf.subarray(0, 4).equals(Buffer.from('%PDF'))).toBe(false);
});
it('accepts files with .pdf extension even if mimetype is octet-stream', () => {
// Controller checks: contentType === 'application/pdf' OR filename.endsWith('.pdf')
// This allows uploads where browser sends generic content type
const filename = 'owners-manual.pdf';
expect(filename.toLowerCase().endsWith('.pdf')).toBe(true);
});
});
describe('Manual route tier guard', () => {
it('route is configured with tier guard for document.scanMaintenanceSchedule', async () => {
// Tier guard is enforced at route level via requireTier('document.scanMaintenanceSchedule')
// preHandler: [requireAuth, requireTier('document.scanMaintenanceSchedule')]
// Free-tier users receive 403 TIER_REQUIRED before the handler executes.
// Middleware behavior is tested in core/middleware/require-tier.test.ts
const { requireTier } = await import('../../../../core/middleware/require-tier');
const handler = requireTier('document.scanMaintenanceSchedule');
expect(typeof handler).toBe('function');
});
});