feat: Core OCR API Integration (#65) #74
3
.gitignore
vendored
3
.gitignore
vendored
@@ -12,6 +12,9 @@ coverage/
|
||||
*.swo
|
||||
.venv
|
||||
.playwright-mcp
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# K8s-aligned secret mounts (real files ignored; examples committed)
|
||||
secrets/**
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
"fastify": "^5.2.0",
|
||||
"fastify-plugin": "^5.0.1",
|
||||
"file-type": "^16.5.4",
|
||||
"form-data": "^4.0.0",
|
||||
"get-jwks": "^11.0.3",
|
||||
"ioredis": "^5.4.2",
|
||||
"js-yaml": "^4.1.0",
|
||||
|
||||
@@ -34,6 +34,7 @@ import { userExportRoutes } from './features/user-export';
|
||||
import { userImportRoutes } from './features/user-import';
|
||||
import { ownershipCostsRoutes } from './features/ownership-costs';
|
||||
import { subscriptionsRoutes, donationsRoutes, webhooksRoutes } from './features/subscriptions';
|
||||
import { ocrRoutes } from './features/ocr';
|
||||
import { pool } from './core/config/database';
|
||||
import { configRoutes } from './core/config/config.routes';
|
||||
|
||||
@@ -95,7 +96,7 @@ async function buildApp(): Promise<FastifyInstance> {
|
||||
status: 'healthy',
|
||||
timestamp: new Date().toISOString(),
|
||||
environment: process.env['NODE_ENV'],
|
||||
features: ['admin', 'auth', 'config', 'onboarding', 'vehicles', 'documents', 'fuel-logs', 'stations', 'maintenance', 'platform', 'notifications', 'user-profile', 'user-preferences', 'user-export', 'user-import', 'ownership-costs', 'subscriptions', 'donations']
|
||||
features: ['admin', 'auth', 'config', 'onboarding', 'vehicles', 'documents', 'fuel-logs', 'stations', 'maintenance', 'platform', 'notifications', 'user-profile', 'user-preferences', 'user-export', 'user-import', 'ownership-costs', 'subscriptions', 'donations', 'ocr']
|
||||
});
|
||||
});
|
||||
|
||||
@@ -105,7 +106,7 @@ async function buildApp(): Promise<FastifyInstance> {
|
||||
status: 'healthy',
|
||||
scope: 'api',
|
||||
timestamp: new Date().toISOString(),
|
||||
features: ['admin', 'auth', 'config', 'onboarding', 'vehicles', 'documents', 'fuel-logs', 'stations', 'maintenance', 'platform', 'notifications', 'user-profile', 'user-preferences', 'user-export', 'user-import', 'ownership-costs', 'subscriptions', 'donations']
|
||||
features: ['admin', 'auth', 'config', 'onboarding', 'vehicles', 'documents', 'fuel-logs', 'stations', 'maintenance', 'platform', 'notifications', 'user-profile', 'user-preferences', 'user-export', 'user-import', 'ownership-costs', 'subscriptions', 'donations', 'ocr']
|
||||
});
|
||||
});
|
||||
|
||||
@@ -151,6 +152,7 @@ async function buildApp(): Promise<FastifyInstance> {
|
||||
await app.register(subscriptionsRoutes, { prefix: '/api' });
|
||||
await app.register(donationsRoutes, { prefix: '/api' });
|
||||
await app.register(webhooksRoutes, { prefix: '/api' });
|
||||
await app.register(ocrRoutes, { prefix: '/api' });
|
||||
await app.register(configRoutes, { prefix: '/api' });
|
||||
|
||||
// 404 handler
|
||||
|
||||
@@ -14,6 +14,7 @@ Feature capsule directory. Each feature is 100% self-contained with api/, domain
|
||||
| `fuel-logs/` | Fuel consumption tracking | Fuel log CRUD, statistics |
|
||||
| `maintenance/` | Maintenance record management | Service records, reminders |
|
||||
| `notifications/` | Email and push notifications | Alert system, email templates |
|
||||
| `ocr/` | OCR proxy to mvp-ocr service | Image text extraction, async jobs |
|
||||
| `onboarding/` | User onboarding flow | First-time user setup |
|
||||
| `platform/` | Vehicle data and VIN decoding | Make/model lookup, VIN validation |
|
||||
| `stations/` | Gas station search and favorites | Google Maps integration, station data |
|
||||
|
||||
54
backend/src/features/ocr/README.md
Normal file
54
backend/src/features/ocr/README.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# OCR Feature
|
||||
|
||||
Backend proxy for OCR service communication. Handles authentication, validation, and file streaming to the OCR container.
|
||||
|
||||
## API Endpoints
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
|--------|----------|-------------|
|
||||
| POST | `/api/ocr/extract` | Synchronous OCR extraction (max 10MB) |
|
||||
| POST | `/api/ocr/jobs` | Submit async OCR job (max 200MB) |
|
||||
| GET | `/api/ocr/jobs/:jobId` | Poll async job status |
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
api/
|
||||
ocr.controller.ts # Request handlers
|
||||
ocr.routes.ts # Route registration
|
||||
ocr.validation.ts # Request validation types
|
||||
domain/
|
||||
ocr.service.ts # Business logic
|
||||
ocr.types.ts # TypeScript types
|
||||
external/
|
||||
ocr-client.ts # HTTP client to OCR service
|
||||
```
|
||||
|
||||
## Supported File Types
|
||||
|
||||
- HEIC (converted server-side)
|
||||
- JPEG
|
||||
- PNG
|
||||
- PDF (first page only)
|
||||
|
||||
## Response Format
|
||||
|
||||
```typescript
|
||||
interface OcrResponse {
|
||||
success: boolean;
|
||||
documentType: 'vin' | 'receipt' | 'manual' | 'unknown';
|
||||
rawText: string;
|
||||
confidence: number; // 0.0 - 1.0
|
||||
extractedFields: Record<string, { value: string; confidence: number }>;
|
||||
processingTimeMs: number;
|
||||
}
|
||||
```
|
||||
|
||||
## Async Job Flow
|
||||
|
||||
1. POST `/api/ocr/jobs` with file
|
||||
2. Receive `{ jobId, status: 'pending' }`
|
||||
3. Poll GET `/api/ocr/jobs/:jobId`
|
||||
4. When `status: 'completed'`, result contains OCR data
|
||||
|
||||
Jobs expire after 1 hour.
|
||||
275
backend/src/features/ocr/api/ocr.controller.ts
Normal file
275
backend/src/features/ocr/api/ocr.controller.ts
Normal file
@@ -0,0 +1,275 @@
|
||||
/**
|
||||
* @ai-summary Controller for OCR API endpoints
|
||||
*/
|
||||
import { FastifyReply, FastifyRequest } from 'fastify';
|
||||
import { logger } from '../../../core/logging/logger';
|
||||
import { ocrService } from '../domain/ocr.service';
|
||||
import type { ExtractQuery, JobIdParams, JobSubmitBody } from './ocr.validation';
|
||||
|
||||
/** Supported MIME types for OCR */
|
||||
const SUPPORTED_TYPES = new Set([
|
||||
'image/jpeg',
|
||||
'image/png',
|
||||
'image/heic',
|
||||
'image/heif',
|
||||
'application/pdf',
|
||||
]);
|
||||
|
||||
export class OcrController {
|
||||
/**
|
||||
* POST /api/ocr/extract
|
||||
* Extract text from an uploaded image using synchronous OCR.
|
||||
*/
|
||||
async extract(
|
||||
request: FastifyRequest<{ Querystring: ExtractQuery }>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
const userId = (request as any).user?.sub as string;
|
||||
const preprocess = request.query.preprocess !== false;
|
||||
|
||||
logger.info('OCR extract requested', {
|
||||
operation: 'ocr.controller.extract',
|
||||
userId,
|
||||
preprocess,
|
||||
});
|
||||
|
||||
// Get uploaded file
|
||||
const file = await (request as any).file({ limits: { files: 1 } });
|
||||
if (!file) {
|
||||
logger.warn('No file provided for OCR', {
|
||||
operation: 'ocr.controller.extract.no_file',
|
||||
userId,
|
||||
});
|
||||
return reply.code(400).send({
|
||||
error: 'Bad Request',
|
||||
message: 'No file provided',
|
||||
});
|
||||
}
|
||||
|
||||
// Validate content type
|
||||
const contentType = file.mimetype as string;
|
||||
if (!SUPPORTED_TYPES.has(contentType)) {
|
||||
logger.warn('Unsupported file type for OCR', {
|
||||
operation: 'ocr.controller.extract.unsupported_type',
|
||||
userId,
|
||||
contentType,
|
||||
fileName: file.filename,
|
||||
});
|
||||
return reply.code(415).send({
|
||||
error: 'Unsupported Media Type',
|
||||
message: `Unsupported file type: ${contentType}. Supported: JPEG, PNG, HEIC, PDF`,
|
||||
});
|
||||
}
|
||||
|
||||
// Read file content
|
||||
const chunks: Buffer[] = [];
|
||||
for await (const chunk of file.file) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
const fileBuffer = Buffer.concat(chunks);
|
||||
|
||||
if (fileBuffer.length === 0) {
|
||||
logger.warn('Empty file provided for OCR', {
|
||||
operation: 'ocr.controller.extract.empty_file',
|
||||
userId,
|
||||
fileName: file.filename,
|
||||
});
|
||||
return reply.code(400).send({
|
||||
error: 'Bad Request',
|
||||
message: 'Empty file provided',
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await ocrService.extract(userId, {
|
||||
fileBuffer,
|
||||
contentType,
|
||||
preprocess,
|
||||
});
|
||||
|
||||
logger.info('OCR extract completed', {
|
||||
operation: 'ocr.controller.extract.success',
|
||||
userId,
|
||||
success: result.success,
|
||||
documentType: result.documentType,
|
||||
processingTimeMs: result.processingTimeMs,
|
||||
});
|
||||
|
||||
return reply.code(200).send(result);
|
||||
} catch (error: any) {
|
||||
if (error.statusCode === 413) {
|
||||
return reply.code(413).send({
|
||||
error: 'Payload Too Large',
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
if (error.statusCode === 415) {
|
||||
return reply.code(415).send({
|
||||
error: 'Unsupported Media Type',
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
|
||||
logger.error('OCR extract failed', {
|
||||
operation: 'ocr.controller.extract.error',
|
||||
userId,
|
||||
error: error.message,
|
||||
});
|
||||
|
||||
return reply.code(500).send({
|
||||
error: 'Internal Server Error',
|
||||
message: 'OCR processing failed',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/ocr/jobs
|
||||
* Submit an async OCR job for large files.
|
||||
*/
|
||||
async submitJob(
|
||||
request: FastifyRequest<{ Body: JobSubmitBody }>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
const userId = (request as any).user?.sub as string;
|
||||
|
||||
logger.info('OCR job submit requested', {
|
||||
operation: 'ocr.controller.submitJob',
|
||||
userId,
|
||||
});
|
||||
|
||||
// Get uploaded file
|
||||
const file = await (request as any).file({ limits: { files: 1 } });
|
||||
if (!file) {
|
||||
logger.warn('No file provided for OCR job', {
|
||||
operation: 'ocr.controller.submitJob.no_file',
|
||||
userId,
|
||||
});
|
||||
return reply.code(400).send({
|
||||
error: 'Bad Request',
|
||||
message: 'No file provided',
|
||||
});
|
||||
}
|
||||
|
||||
// Validate content type
|
||||
const contentType = file.mimetype as string;
|
||||
if (!SUPPORTED_TYPES.has(contentType)) {
|
||||
logger.warn('Unsupported file type for OCR job', {
|
||||
operation: 'ocr.controller.submitJob.unsupported_type',
|
||||
userId,
|
||||
contentType,
|
||||
fileName: file.filename,
|
||||
});
|
||||
return reply.code(415).send({
|
||||
error: 'Unsupported Media Type',
|
||||
message: `Unsupported file type: ${contentType}. Supported: JPEG, PNG, HEIC, PDF`,
|
||||
});
|
||||
}
|
||||
|
||||
// Read file content
|
||||
const chunks: Buffer[] = [];
|
||||
for await (const chunk of file.file) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
const fileBuffer = Buffer.concat(chunks);
|
||||
|
||||
if (fileBuffer.length === 0) {
|
||||
logger.warn('Empty file provided for OCR job', {
|
||||
operation: 'ocr.controller.submitJob.empty_file',
|
||||
userId,
|
||||
fileName: file.filename,
|
||||
});
|
||||
return reply.code(400).send({
|
||||
error: 'Bad Request',
|
||||
message: 'Empty file provided',
|
||||
});
|
||||
}
|
||||
|
||||
// Get callback URL from form data (if present)
|
||||
const callbackUrl = file.fields?.callbackUrl?.value as string | undefined;
|
||||
|
||||
try {
|
||||
const result = await ocrService.submitJob(userId, {
|
||||
fileBuffer,
|
||||
contentType,
|
||||
callbackUrl,
|
||||
});
|
||||
|
||||
logger.info('OCR job submitted', {
|
||||
operation: 'ocr.controller.submitJob.success',
|
||||
userId,
|
||||
jobId: result.jobId,
|
||||
status: result.status,
|
||||
});
|
||||
|
||||
return reply.code(202).send(result);
|
||||
} catch (error: any) {
|
||||
if (error.statusCode === 413) {
|
||||
return reply.code(413).send({
|
||||
error: 'Payload Too Large',
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
if (error.statusCode === 415) {
|
||||
return reply.code(415).send({
|
||||
error: 'Unsupported Media Type',
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
|
||||
logger.error('OCR job submit failed', {
|
||||
operation: 'ocr.controller.submitJob.error',
|
||||
userId,
|
||||
error: error.message,
|
||||
});
|
||||
|
||||
return reply.code(500).send({
|
||||
error: 'Internal Server Error',
|
||||
message: 'Job submission failed',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* GET /api/ocr/jobs/:jobId
|
||||
* Get the status of an async OCR job.
|
||||
*/
|
||||
async getJobStatus(
|
||||
request: FastifyRequest<{ Params: JobIdParams }>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
const userId = (request as any).user?.sub as string;
|
||||
const { jobId } = request.params;
|
||||
|
||||
logger.debug('OCR job status requested', {
|
||||
operation: 'ocr.controller.getJobStatus',
|
||||
userId,
|
||||
jobId,
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await ocrService.getJobStatus(userId, jobId);
|
||||
|
||||
return reply.code(200).send(result);
|
||||
} catch (error: any) {
|
||||
if (error.statusCode === 404) {
|
||||
return reply.code(404).send({
|
||||
error: 'Not Found',
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
|
||||
logger.error('OCR job status failed', {
|
||||
operation: 'ocr.controller.getJobStatus.error',
|
||||
userId,
|
||||
jobId,
|
||||
error: error.message,
|
||||
});
|
||||
|
||||
return reply.code(500).send({
|
||||
error: 'Internal Server Error',
|
||||
message: 'Failed to retrieve job status',
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
31
backend/src/features/ocr/api/ocr.routes.ts
Normal file
31
backend/src/features/ocr/api/ocr.routes.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* @ai-summary Fastify routes for OCR API
|
||||
*/
|
||||
import { FastifyInstance, FastifyPluginAsync, FastifyPluginOptions } from 'fastify';
|
||||
import { OcrController } from './ocr.controller';
|
||||
|
||||
export const ocrRoutes: FastifyPluginAsync = async (
|
||||
fastify: FastifyInstance,
|
||||
_opts: FastifyPluginOptions
|
||||
) => {
|
||||
const ctrl = new OcrController();
|
||||
const requireAuth = fastify.authenticate.bind(fastify);
|
||||
|
||||
// POST /api/ocr/extract - Synchronous OCR extraction
|
||||
fastify.post('/ocr/extract', {
|
||||
preHandler: [requireAuth],
|
||||
handler: ctrl.extract.bind(ctrl),
|
||||
});
|
||||
|
||||
// POST /api/ocr/jobs - Submit async OCR job
|
||||
fastify.post('/ocr/jobs', {
|
||||
preHandler: [requireAuth],
|
||||
handler: ctrl.submitJob.bind(ctrl),
|
||||
});
|
||||
|
||||
// GET /api/ocr/jobs/:jobId - Get job status
|
||||
fastify.get<{ Params: { jobId: string } }>('/ocr/jobs/:jobId', {
|
||||
preHandler: [requireAuth],
|
||||
handler: ctrl.getJobStatus.bind(ctrl),
|
||||
});
|
||||
};
|
||||
18
backend/src/features/ocr/api/ocr.validation.ts
Normal file
18
backend/src/features/ocr/api/ocr.validation.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* @ai-summary Validation types for OCR API
|
||||
*/
|
||||
|
||||
/** Query parameters for OCR extract endpoint */
|
||||
export interface ExtractQuery {
|
||||
preprocess?: boolean;
|
||||
}
|
||||
|
||||
/** Path parameters for job status endpoint */
|
||||
export interface JobIdParams {
|
||||
jobId: string;
|
||||
}
|
||||
|
||||
/** Form data for job submission */
|
||||
export interface JobSubmitBody {
|
||||
callbackUrl?: string;
|
||||
}
|
||||
208
backend/src/features/ocr/domain/ocr.service.ts
Normal file
208
backend/src/features/ocr/domain/ocr.service.ts
Normal file
@@ -0,0 +1,208 @@
|
||||
/**
|
||||
* @ai-summary Domain service for OCR operations
|
||||
*/
|
||||
import { logger } from '../../../core/logging/logger';
|
||||
import { ocrClient, JobNotFoundError } from '../external/ocr-client';
|
||||
import type {
|
||||
JobResponse,
|
||||
OcrExtractRequest,
|
||||
OcrJobSubmitRequest,
|
||||
OcrResponse,
|
||||
} from './ocr.types';
|
||||
|
||||
/** Maximum file size for sync processing (10MB) */
|
||||
const MAX_SYNC_SIZE = 10 * 1024 * 1024;
|
||||
|
||||
/** Maximum file size for async processing (200MB) */
|
||||
const MAX_ASYNC_SIZE = 200 * 1024 * 1024;
|
||||
|
||||
/** Supported MIME types */
|
||||
const SUPPORTED_TYPES = new Set([
|
||||
'image/jpeg',
|
||||
'image/png',
|
||||
'image/heic',
|
||||
'image/heif',
|
||||
'application/pdf',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Domain service for OCR operations.
|
||||
* Handles business logic and validation for OCR requests.
|
||||
*/
|
||||
export class OcrService {
|
||||
/**
|
||||
* Extract text from an image using synchronous OCR.
|
||||
*
|
||||
* @param userId - User ID for logging
|
||||
* @param request - OCR extraction request
|
||||
* @returns OCR extraction result
|
||||
*/
|
||||
async extract(userId: string, request: OcrExtractRequest): Promise<OcrResponse> {
|
||||
// Validate file size for sync processing
|
||||
if (request.fileBuffer.length > MAX_SYNC_SIZE) {
|
||||
const err: any = new Error(
|
||||
`File too large for sync processing. Max: ${MAX_SYNC_SIZE / (1024 * 1024)}MB. Use async job submission for larger files.`
|
||||
);
|
||||
err.statusCode = 413;
|
||||
throw err;
|
||||
}
|
||||
|
||||
// Validate content type
|
||||
if (!SUPPORTED_TYPES.has(request.contentType)) {
|
||||
const err: any = new Error(
|
||||
`Unsupported file type: ${request.contentType}. Supported: ${[...SUPPORTED_TYPES].join(', ')}`
|
||||
);
|
||||
err.statusCode = 415;
|
||||
throw err;
|
||||
}
|
||||
|
||||
logger.info('OCR extract requested', {
|
||||
operation: 'ocr.service.extract',
|
||||
userId,
|
||||
contentType: request.contentType,
|
||||
fileSize: request.fileBuffer.length,
|
||||
preprocess: request.preprocess ?? true,
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await ocrClient.extract(
|
||||
request.fileBuffer,
|
||||
request.contentType,
|
||||
request.preprocess ?? true
|
||||
);
|
||||
|
||||
logger.info('OCR extract completed', {
|
||||
operation: 'ocr.service.extract.success',
|
||||
userId,
|
||||
success: result.success,
|
||||
documentType: result.documentType,
|
||||
confidence: result.confidence,
|
||||
processingTimeMs: result.processingTimeMs,
|
||||
textLength: result.rawText.length,
|
||||
});
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
logger.error('OCR extract failed', {
|
||||
operation: 'ocr.service.extract.error',
|
||||
userId,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Submit an async OCR job for large files.
|
||||
*
|
||||
* @param userId - User ID for logging
|
||||
* @param request - Job submission request
|
||||
* @returns Job response with job ID
|
||||
*/
|
||||
async submitJob(userId: string, request: OcrJobSubmitRequest): Promise<JobResponse> {
|
||||
// Validate file size for async processing
|
||||
if (request.fileBuffer.length > MAX_ASYNC_SIZE) {
|
||||
const err: any = new Error(
|
||||
`File too large. Max: ${MAX_ASYNC_SIZE / (1024 * 1024)}MB.`
|
||||
);
|
||||
err.statusCode = 413;
|
||||
throw err;
|
||||
}
|
||||
|
||||
// Validate content type
|
||||
if (!SUPPORTED_TYPES.has(request.contentType)) {
|
||||
const err: any = new Error(
|
||||
`Unsupported file type: ${request.contentType}. Supported: ${[...SUPPORTED_TYPES].join(', ')}`
|
||||
);
|
||||
err.statusCode = 415;
|
||||
throw err;
|
||||
}
|
||||
|
||||
logger.info('OCR job submit requested', {
|
||||
operation: 'ocr.service.submitJob',
|
||||
userId,
|
||||
contentType: request.contentType,
|
||||
fileSize: request.fileBuffer.length,
|
||||
hasCallback: !!request.callbackUrl,
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await ocrClient.submitJob(
|
||||
request.fileBuffer,
|
||||
request.contentType,
|
||||
request.callbackUrl
|
||||
);
|
||||
|
||||
logger.info('OCR job submitted', {
|
||||
operation: 'ocr.service.submitJob.success',
|
||||
userId,
|
||||
jobId: result.jobId,
|
||||
status: result.status,
|
||||
});
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
logger.error('OCR job submit failed', {
|
||||
operation: 'ocr.service.submitJob.error',
|
||||
userId,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the status of an async OCR job.
|
||||
*
|
||||
* @param userId - User ID for logging
|
||||
* @param jobId - Job ID to check
|
||||
* @returns Job status response
|
||||
*/
|
||||
async getJobStatus(userId: string, jobId: string): Promise<JobResponse> {
|
||||
logger.debug('OCR job status requested', {
|
||||
operation: 'ocr.service.getJobStatus',
|
||||
userId,
|
||||
jobId,
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await ocrClient.getJobStatus(jobId);
|
||||
|
||||
logger.debug('OCR job status retrieved', {
|
||||
operation: 'ocr.service.getJobStatus.success',
|
||||
userId,
|
||||
jobId,
|
||||
status: result.status,
|
||||
progress: result.progress,
|
||||
});
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
if (error instanceof JobNotFoundError) {
|
||||
const err: any = new Error(`Job ${jobId} not found. Jobs expire after 1 hour.`);
|
||||
err.statusCode = 404;
|
||||
throw err;
|
||||
}
|
||||
|
||||
logger.error('OCR job status failed', {
|
||||
operation: 'ocr.service.getJobStatus.error',
|
||||
userId,
|
||||
jobId,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the OCR service is available.
|
||||
*
|
||||
* @returns true if OCR service is healthy
|
||||
*/
|
||||
async isServiceHealthy(): Promise<boolean> {
|
||||
return ocrClient.isHealthy();
|
||||
}
|
||||
}
|
||||
|
||||
/** Singleton instance */
|
||||
export const ocrService = new OcrService();
|
||||
53
backend/src/features/ocr/domain/ocr.types.ts
Normal file
53
backend/src/features/ocr/domain/ocr.types.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
/**
|
||||
* @ai-summary TypeScript types for OCR feature
|
||||
*/
|
||||
|
||||
/** Types of documents that can be processed by OCR */
|
||||
export type DocumentType = 'vin' | 'receipt' | 'manual' | 'unknown';
|
||||
|
||||
/** A single extracted field with confidence score */
|
||||
export interface ExtractedField {
|
||||
value: string;
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
/** Response from OCR extraction */
|
||||
export interface OcrResponse {
|
||||
success: boolean;
|
||||
documentType: DocumentType;
|
||||
rawText: string;
|
||||
confidence: number;
|
||||
extractedFields: Record<string, ExtractedField>;
|
||||
processingTimeMs: number;
|
||||
}
|
||||
|
||||
/** Status of an async OCR job */
|
||||
export type JobStatus = 'pending' | 'processing' | 'completed' | 'failed';
|
||||
|
||||
/** Response for async job status */
|
||||
export interface JobResponse {
|
||||
jobId: string;
|
||||
status: JobStatus;
|
||||
progress?: number;
|
||||
result?: OcrResponse;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/** Request to submit an async OCR job */
|
||||
export interface JobSubmitRequest {
|
||||
callbackUrl?: string;
|
||||
}
|
||||
|
||||
/** Internal request to OCR service */
|
||||
export interface OcrExtractRequest {
|
||||
fileBuffer: Buffer;
|
||||
contentType: string;
|
||||
preprocess?: boolean;
|
||||
}
|
||||
|
||||
/** Internal request to submit async job */
|
||||
export interface OcrJobSubmitRequest {
|
||||
fileBuffer: Buffer;
|
||||
contentType: string;
|
||||
callbackUrl?: string;
|
||||
}
|
||||
229
backend/src/features/ocr/external/ocr-client.ts
vendored
Normal file
229
backend/src/features/ocr/external/ocr-client.ts
vendored
Normal file
@@ -0,0 +1,229 @@
|
||||
/**
|
||||
* @ai-summary HTTP client for OCR service communication
|
||||
*/
|
||||
import FormData from 'form-data';
|
||||
import { logger } from '../../../core/logging/logger';
|
||||
import type { JobResponse, OcrResponse } from '../domain/ocr.types';
|
||||
|
||||
/** OCR service configuration */
|
||||
const OCR_SERVICE_URL = process.env.OCR_SERVICE_URL || 'http://mvp-ocr:8000';
|
||||
const OCR_TIMEOUT_MS = 30000; // 30 seconds for sync operations
|
||||
|
||||
/**
|
||||
* HTTP client for communicating with the OCR service.
|
||||
*/
|
||||
export class OcrClient {
|
||||
private readonly baseUrl: string;
|
||||
|
||||
constructor(baseUrl: string = OCR_SERVICE_URL) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text from an image using OCR.
|
||||
*
|
||||
* @param fileBuffer - Image file buffer
|
||||
* @param contentType - MIME type of the file
|
||||
* @param preprocess - Whether to apply preprocessing (default: true)
|
||||
* @returns OCR extraction result
|
||||
*/
|
||||
async extract(
|
||||
fileBuffer: Buffer,
|
||||
contentType: string,
|
||||
preprocess: boolean = true
|
||||
): Promise<OcrResponse> {
|
||||
const formData = new FormData();
|
||||
formData.append('file', fileBuffer, {
|
||||
filename: this.getFilenameFromContentType(contentType),
|
||||
contentType,
|
||||
});
|
||||
|
||||
const url = `${this.baseUrl}/extract?preprocess=${preprocess}`;
|
||||
|
||||
logger.info('OCR extract request', {
|
||||
operation: 'ocr.client.extract',
|
||||
url,
|
||||
contentType,
|
||||
fileSize: fileBuffer.length,
|
||||
preprocess,
|
||||
});
|
||||
|
||||
const response = await this.fetchWithTimeout(url, {
|
||||
method: 'POST',
|
||||
body: formData as any,
|
||||
headers: formData.getHeaders(),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.error('OCR extract failed', {
|
||||
operation: 'ocr.client.extract.error',
|
||||
status: response.status,
|
||||
error: errorText,
|
||||
});
|
||||
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const result = (await response.json()) as OcrResponse;
|
||||
|
||||
logger.info('OCR extract completed', {
|
||||
operation: 'ocr.client.extract.success',
|
||||
success: result.success,
|
||||
documentType: result.documentType,
|
||||
confidence: result.confidence,
|
||||
processingTimeMs: result.processingTimeMs,
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Submit an async OCR job for large files.
|
||||
*
|
||||
* @param fileBuffer - Image file buffer
|
||||
* @param contentType - MIME type of the file
|
||||
* @param callbackUrl - Optional URL to call when job completes
|
||||
* @returns Job submission response
|
||||
*/
|
||||
async submitJob(
|
||||
fileBuffer: Buffer,
|
||||
contentType: string,
|
||||
callbackUrl?: string
|
||||
): Promise<JobResponse> {
|
||||
const formData = new FormData();
|
||||
formData.append('file', fileBuffer, {
|
||||
filename: this.getFilenameFromContentType(contentType),
|
||||
contentType,
|
||||
});
|
||||
if (callbackUrl) {
|
||||
formData.append('callback_url', callbackUrl);
|
||||
}
|
||||
|
||||
const url = `${this.baseUrl}/jobs`;
|
||||
|
||||
logger.info('OCR job submit request', {
|
||||
operation: 'ocr.client.submitJob',
|
||||
url,
|
||||
contentType,
|
||||
fileSize: fileBuffer.length,
|
||||
hasCallback: !!callbackUrl,
|
||||
});
|
||||
|
||||
const response = await this.fetchWithTimeout(url, {
|
||||
method: 'POST',
|
||||
body: formData as any,
|
||||
headers: formData.getHeaders(),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.error('OCR job submit failed', {
|
||||
operation: 'ocr.client.submitJob.error',
|
||||
status: response.status,
|
||||
error: errorText,
|
||||
});
|
||||
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const result = (await response.json()) as JobResponse;
|
||||
|
||||
logger.info('OCR job submitted', {
|
||||
operation: 'ocr.client.submitJob.success',
|
||||
jobId: result.jobId,
|
||||
status: result.status,
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the status of an async OCR job.
|
||||
*
|
||||
* @param jobId - Job ID to check
|
||||
* @returns Job status response
|
||||
*/
|
||||
async getJobStatus(jobId: string): Promise<JobResponse> {
|
||||
const url = `${this.baseUrl}/jobs/${jobId}`;
|
||||
|
||||
logger.debug('OCR job status request', {
|
||||
operation: 'ocr.client.getJobStatus',
|
||||
jobId,
|
||||
});
|
||||
|
||||
const response = await this.fetchWithTimeout(url, {
|
||||
method: 'GET',
|
||||
});
|
||||
|
||||
if (response.status === 404) {
|
||||
throw new JobNotFoundError(jobId);
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.error('OCR job status failed', {
|
||||
operation: 'ocr.client.getJobStatus.error',
|
||||
jobId,
|
||||
status: response.status,
|
||||
error: errorText,
|
||||
});
|
||||
throw new Error(`OCR service error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
return (await response.json()) as JobResponse;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the OCR service is healthy.
|
||||
*
|
||||
* @returns true if healthy, false otherwise
|
||||
*/
|
||||
async isHealthy(): Promise<boolean> {
|
||||
try {
|
||||
const response = await this.fetchWithTimeout(`${this.baseUrl}/health`, {
|
||||
method: 'GET',
|
||||
});
|
||||
return response.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private async fetchWithTimeout(
|
||||
url: string,
|
||||
options: RequestInit & { headers?: Record<string, string> }
|
||||
): Promise<Response> {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), OCR_TIMEOUT_MS);
|
||||
|
||||
try {
|
||||
return await fetch(url, {
|
||||
...options,
|
||||
signal: controller.signal,
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
private getFilenameFromContentType(contentType: string): string {
|
||||
const extensions: Record<string, string> = {
|
||||
'image/jpeg': 'image.jpg',
|
||||
'image/png': 'image.png',
|
||||
'image/heic': 'image.heic',
|
||||
'image/heif': 'image.heif',
|
||||
'application/pdf': 'document.pdf',
|
||||
};
|
||||
return extensions[contentType] || 'file.bin';
|
||||
}
|
||||
}
|
||||
|
||||
/** Error thrown when a job is not found */
|
||||
export class JobNotFoundError extends Error {
|
||||
constructor(jobId: string) {
|
||||
super(`Job ${jobId} not found`);
|
||||
this.name = 'JobNotFoundError';
|
||||
}
|
||||
}
|
||||
|
||||
/** Singleton instance */
|
||||
export const ocrClient = new OcrClient();
|
||||
11
backend/src/features/ocr/index.ts
Normal file
11
backend/src/features/ocr/index.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
/**
|
||||
* @ai-summary Public API for OCR feature capsule
|
||||
*/
|
||||
export { ocrRoutes } from './api/ocr.routes';
|
||||
export type {
|
||||
DocumentType,
|
||||
ExtractedField,
|
||||
JobResponse,
|
||||
JobStatus,
|
||||
OcrResponse,
|
||||
} from './domain/ocr.types';
|
||||
@@ -173,8 +173,14 @@ services:
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
LOG_LEVEL: info
|
||||
REDIS_HOST: mvp-redis
|
||||
REDIS_PORT: 6379
|
||||
REDIS_DB: 1
|
||||
networks:
|
||||
- backend
|
||||
- database
|
||||
depends_on:
|
||||
- mvp-redis
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
interval: 30s
|
||||
|
||||
@@ -11,5 +11,10 @@ class Settings:
|
||||
self.port: int = int(os.getenv("PORT", "8000"))
|
||||
self.tesseract_cmd: str = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract")
|
||||
|
||||
# Redis configuration for job queue
|
||||
self.redis_host: str = os.getenv("REDIS_HOST", "mvp-redis")
|
||||
self.redis_port: int = int(os.getenv("REDIS_PORT", "6379"))
|
||||
self.redis_db: int = int(os.getenv("REDIS_DB", "1"))
|
||||
|
||||
|
||||
settings = Settings()
|
||||
|
||||
@@ -1,14 +1,44 @@
|
||||
"""OCR Service FastAPI Application."""
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import AsyncIterator
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app.config import settings
|
||||
from app.routers import extract_router, jobs_router
|
||||
from app.services import job_queue
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, settings.log_level.upper(), logging.INFO),
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
|
||||
"""Application lifespan handler for startup/shutdown."""
|
||||
# Startup
|
||||
logger.info("OCR service starting up")
|
||||
yield
|
||||
# Shutdown
|
||||
logger.info("OCR service shutting down")
|
||||
await job_queue.close()
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="MotoVaultPro OCR Service",
|
||||
description="OCR processing service for vehicle documents",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
# Include routers
|
||||
app.include_router(extract_router)
|
||||
app.include_router(jobs_router)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check() -> dict:
|
||||
@@ -23,4 +53,9 @@ async def root() -> dict:
|
||||
"service": "mvp-ocr",
|
||||
"version": "1.0.0",
|
||||
"log_level": settings.log_level,
|
||||
"endpoints": [
|
||||
"POST /extract - Synchronous OCR extraction",
|
||||
"POST /jobs - Submit async OCR job",
|
||||
"GET /jobs/{job_id} - Get async job status",
|
||||
],
|
||||
}
|
||||
|
||||
18
ocr/app/models/__init__.py
Normal file
18
ocr/app/models/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""Pydantic models for OCR service."""
|
||||
from .schemas import (
|
||||
DocumentType,
|
||||
ExtractedField,
|
||||
JobResponse,
|
||||
JobStatus,
|
||||
JobSubmitRequest,
|
||||
OcrResponse,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"DocumentType",
|
||||
"ExtractedField",
|
||||
"JobResponse",
|
||||
"JobStatus",
|
||||
"JobSubmitRequest",
|
||||
"OcrResponse",
|
||||
]
|
||||
65
ocr/app/models/schemas.py
Normal file
65
ocr/app/models/schemas.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""Pydantic models for OCR API request/response validation."""
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class DocumentType(str, Enum):
|
||||
"""Types of documents that can be processed."""
|
||||
|
||||
VIN = "vin"
|
||||
RECEIPT = "receipt"
|
||||
MANUAL = "manual"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class ExtractedField(BaseModel):
|
||||
"""A single extracted field with confidence score."""
|
||||
|
||||
value: str
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
|
||||
|
||||
class OcrResponse(BaseModel):
|
||||
"""Response from OCR extraction."""
|
||||
|
||||
success: bool
|
||||
document_type: DocumentType = Field(alias="documentType")
|
||||
raw_text: str = Field(alias="rawText")
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
extracted_fields: dict[str, ExtractedField] = Field(
|
||||
default_factory=dict, alias="extractedFields"
|
||||
)
|
||||
processing_time_ms: int = Field(alias="processingTimeMs")
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
class JobStatus(str, Enum):
|
||||
"""Status of an async OCR job."""
|
||||
|
||||
PENDING = "pending"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class JobResponse(BaseModel):
|
||||
"""Response for async job status."""
|
||||
|
||||
job_id: str = Field(alias="jobId")
|
||||
status: JobStatus
|
||||
progress: Optional[int] = Field(default=None, ge=0, le=100)
|
||||
result: Optional[OcrResponse] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
class JobSubmitRequest(BaseModel):
|
||||
"""Request to submit an async OCR job."""
|
||||
|
||||
callback_url: Optional[str] = Field(default=None, alias="callbackUrl")
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
5
ocr/app/routers/__init__.py
Normal file
5
ocr/app/routers/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""OCR API routers."""
|
||||
from .extract import router as extract_router
|
||||
from .jobs import router as jobs_router
|
||||
|
||||
__all__ = ["extract_router", "jobs_router"]
|
||||
69
ocr/app/routers/extract.py
Normal file
69
ocr/app/routers/extract.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""OCR extraction endpoints."""
|
||||
import logging
|
||||
|
||||
from fastapi import APIRouter, File, HTTPException, Query, UploadFile
|
||||
|
||||
from app.models import OcrResponse
|
||||
from app.services import ocr_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/extract", tags=["extract"])
|
||||
|
||||
# Maximum file size for synchronous processing (10MB)
|
||||
MAX_SYNC_SIZE = 10 * 1024 * 1024
|
||||
|
||||
|
||||
@router.post("", response_model=OcrResponse)
|
||||
async def extract_text(
|
||||
file: UploadFile = File(..., description="Image file to process"),
|
||||
preprocess: bool = Query(True, description="Apply image preprocessing"),
|
||||
) -> OcrResponse:
|
||||
"""
|
||||
Extract text from an uploaded image using OCR.
|
||||
|
||||
Supports HEIC, JPEG, PNG, and PDF (first page only) formats.
|
||||
Processing time target: <3 seconds for typical photos.
|
||||
|
||||
- **file**: Image file (max 10MB for sync processing)
|
||||
- **preprocess**: Whether to apply deskew/denoise preprocessing (default: true)
|
||||
"""
|
||||
# Validate file presence
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="No file provided")
|
||||
|
||||
# Read file content
|
||||
content = await file.read()
|
||||
file_size = len(content)
|
||||
|
||||
# Validate file size
|
||||
if file_size > MAX_SYNC_SIZE:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=f"File too large for sync processing. Max: {MAX_SYNC_SIZE // (1024*1024)}MB. Use /jobs for larger files.",
|
||||
)
|
||||
|
||||
if file_size == 0:
|
||||
raise HTTPException(status_code=400, detail="Empty file provided")
|
||||
|
||||
logger.info(
|
||||
f"Processing file: {file.filename}, "
|
||||
f"size: {file_size} bytes, "
|
||||
f"content_type: {file.content_type}"
|
||||
)
|
||||
|
||||
# Perform OCR extraction
|
||||
result = ocr_service.extract(
|
||||
file_bytes=content,
|
||||
content_type=file.content_type,
|
||||
preprocess=preprocess,
|
||||
)
|
||||
|
||||
if not result.success:
|
||||
logger.warning(f"OCR extraction failed for {file.filename}")
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail="Failed to extract text from image. Ensure the file is a valid image format.",
|
||||
)
|
||||
|
||||
return result
|
||||
142
ocr/app/routers/jobs.py
Normal file
142
ocr/app/routers/jobs.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""Async OCR job endpoints."""
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, File, Form, HTTPException, UploadFile
|
||||
|
||||
from app.models import JobResponse, JobSubmitRequest
|
||||
from app.services import job_queue, ocr_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/jobs", tags=["jobs"])
|
||||
|
||||
# Maximum file size for async processing (200MB)
|
||||
MAX_ASYNC_SIZE = 200 * 1024 * 1024
|
||||
|
||||
|
||||
@router.post("", response_model=JobResponse)
|
||||
async def submit_job(
|
||||
background_tasks: BackgroundTasks,
|
||||
file: UploadFile = File(..., description="Image file to process"),
|
||||
callback_url: Optional[str] = Form(None, description="URL to call when job completes"),
|
||||
) -> JobResponse:
|
||||
"""
|
||||
Submit an async OCR job for large files.
|
||||
|
||||
Use this endpoint for files larger than 10MB or when you don't want to wait
|
||||
for processing to complete. Poll GET /jobs/{job_id} for status.
|
||||
|
||||
- **file**: Image file (max 200MB)
|
||||
- **callback_url**: Optional webhook URL to receive job completion notification
|
||||
"""
|
||||
# Validate file presence
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="No file provided")
|
||||
|
||||
# Read file content
|
||||
content = await file.read()
|
||||
file_size = len(content)
|
||||
|
||||
# Validate file size
|
||||
if file_size > MAX_ASYNC_SIZE:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=f"File too large. Max: {MAX_ASYNC_SIZE // (1024*1024)}MB.",
|
||||
)
|
||||
|
||||
if file_size == 0:
|
||||
raise HTTPException(status_code=400, detail="Empty file provided")
|
||||
|
||||
logger.info(
|
||||
f"Submitting async job: {file.filename}, "
|
||||
f"size: {file_size} bytes, "
|
||||
f"content_type: {file.content_type}"
|
||||
)
|
||||
|
||||
# Submit job to queue
|
||||
job_id = await job_queue.submit_job(
|
||||
file_bytes=content,
|
||||
content_type=file.content_type or "application/octet-stream",
|
||||
callback_url=callback_url,
|
||||
)
|
||||
|
||||
# Schedule background processing
|
||||
background_tasks.add_task(process_job, job_id)
|
||||
|
||||
# Return initial status
|
||||
return JobResponse(
|
||||
jobId=job_id,
|
||||
status="pending",
|
||||
progress=0,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{job_id}", response_model=JobResponse)
|
||||
async def get_job_status(job_id: str) -> JobResponse:
|
||||
"""
|
||||
Get the status of an async OCR job.
|
||||
|
||||
Poll this endpoint to check job progress and retrieve results.
|
||||
|
||||
Returns:
|
||||
- **pending**: Job is queued
|
||||
- **processing**: Job is being processed (includes progress %)
|
||||
- **completed**: Job finished successfully (includes result)
|
||||
- **failed**: Job failed (includes error message)
|
||||
"""
|
||||
result = await job_queue.get_job_status(job_id)
|
||||
|
||||
if result is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Job {job_id} not found. Jobs expire after 1 hour.",
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def process_job(job_id: str) -> None:
|
||||
"""Background task to process an OCR job."""
|
||||
logger.info(f"Starting job {job_id}")
|
||||
|
||||
try:
|
||||
# Update status to processing
|
||||
await job_queue.update_job_progress(job_id, 10)
|
||||
|
||||
# Get job data
|
||||
file_bytes = await job_queue.get_job_data(job_id)
|
||||
if not file_bytes:
|
||||
await job_queue.fail_job(job_id, "Job data not found")
|
||||
return
|
||||
|
||||
await job_queue.update_job_progress(job_id, 30)
|
||||
|
||||
# Get metadata for content type
|
||||
status = await job_queue.get_job_status(job_id)
|
||||
if not status:
|
||||
return
|
||||
|
||||
# Perform OCR in thread pool (CPU-bound operation)
|
||||
await job_queue.update_job_progress(job_id, 50)
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: ocr_service.extract(
|
||||
file_bytes=file_bytes,
|
||||
preprocess=True,
|
||||
),
|
||||
)
|
||||
|
||||
await job_queue.update_job_progress(job_id, 90)
|
||||
|
||||
if result.success:
|
||||
await job_queue.complete_job(job_id, result)
|
||||
else:
|
||||
await job_queue.fail_job(job_id, "OCR extraction failed")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Job {job_id} failed: {e}", exc_info=True)
|
||||
await job_queue.fail_job(job_id, str(e))
|
||||
6
ocr/app/services/__init__.py
Normal file
6
ocr/app/services/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""OCR service layer."""
|
||||
from .job_queue import job_queue
|
||||
from .ocr_service import ocr_service
|
||||
from .preprocessor import preprocessor
|
||||
|
||||
__all__ = ["job_queue", "ocr_service", "preprocessor"]
|
||||
233
ocr/app/services/job_queue.py
Normal file
233
ocr/app/services/job_queue.py
Normal file
@@ -0,0 +1,233 @@
|
||||
"""Redis-based job queue for async OCR processing."""
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
import redis.asyncio as redis
|
||||
|
||||
from app.config import settings
|
||||
from app.models import JobResponse, JobStatus, OcrResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Job TTL in seconds (1 hour)
|
||||
JOB_TTL = 3600
|
||||
|
||||
# Key prefixes
|
||||
JOB_PREFIX = "ocr:job:"
|
||||
JOB_DATA_PREFIX = "ocr:job:data:"
|
||||
JOB_RESULT_PREFIX = "ocr:job:result:"
|
||||
|
||||
|
||||
class JobQueue:
|
||||
"""Manages async OCR jobs using Redis."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize job queue."""
|
||||
self._redis: Optional[redis.Redis] = None
|
||||
|
||||
async def get_redis(self) -> redis.Redis:
|
||||
"""Get or create Redis connection."""
|
||||
if self._redis is None:
|
||||
self._redis = redis.Redis(
|
||||
host=settings.redis_host,
|
||||
port=settings.redis_port,
|
||||
db=settings.redis_db,
|
||||
decode_responses=True,
|
||||
)
|
||||
return self._redis
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close Redis connection."""
|
||||
if self._redis:
|
||||
await self._redis.close()
|
||||
self._redis = None
|
||||
|
||||
async def submit_job(
|
||||
self,
|
||||
file_bytes: bytes,
|
||||
content_type: str,
|
||||
callback_url: Optional[str] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Submit a new OCR job.
|
||||
|
||||
Args:
|
||||
file_bytes: Raw file bytes to process
|
||||
content_type: MIME type of the file
|
||||
callback_url: Optional URL to call when job completes
|
||||
|
||||
Returns:
|
||||
Job ID
|
||||
"""
|
||||
r = await self.get_redis()
|
||||
job_id = str(uuid.uuid4())
|
||||
|
||||
# Store job metadata
|
||||
job_meta = {
|
||||
"status": JobStatus.PENDING.value,
|
||||
"progress": 0,
|
||||
"content_type": content_type,
|
||||
"callback_url": callback_url or "",
|
||||
}
|
||||
|
||||
# Store file data separately (binary)
|
||||
data_key = f"{JOB_DATA_PREFIX}{job_id}"
|
||||
meta_key = f"{JOB_PREFIX}{job_id}"
|
||||
|
||||
# Use pipeline for atomic operation
|
||||
async with r.pipeline() as pipe:
|
||||
# Store metadata as hash
|
||||
await pipe.hset(meta_key, mapping=job_meta) # type: ignore
|
||||
await pipe.expire(meta_key, JOB_TTL)
|
||||
|
||||
# Store binary data
|
||||
await pipe.set(data_key, file_bytes)
|
||||
await pipe.expire(data_key, JOB_TTL)
|
||||
|
||||
await pipe.execute()
|
||||
|
||||
logger.info(f"Job {job_id} submitted")
|
||||
return job_id
|
||||
|
||||
async def get_job_status(self, job_id: str) -> Optional[JobResponse]:
|
||||
"""
|
||||
Get the status of a job.
|
||||
|
||||
Args:
|
||||
job_id: Job ID to check
|
||||
|
||||
Returns:
|
||||
JobResponse or None if job doesn't exist
|
||||
"""
|
||||
r = await self.get_redis()
|
||||
meta_key = f"{JOB_PREFIX}{job_id}"
|
||||
result_key = f"{JOB_RESULT_PREFIX}{job_id}"
|
||||
|
||||
# Get job metadata
|
||||
meta = await r.hgetall(meta_key) # type: ignore
|
||||
if not meta:
|
||||
return None
|
||||
|
||||
status = JobStatus(meta.get("status", JobStatus.PENDING.value))
|
||||
progress = int(meta.get("progress", 0))
|
||||
error = meta.get("error")
|
||||
|
||||
# Get result if completed
|
||||
result = None
|
||||
if status == JobStatus.COMPLETED:
|
||||
result_json = await r.get(result_key)
|
||||
if result_json:
|
||||
result_dict = json.loads(result_json)
|
||||
result = OcrResponse(**result_dict)
|
||||
|
||||
return JobResponse(
|
||||
jobId=job_id,
|
||||
status=status,
|
||||
progress=progress if status == JobStatus.PROCESSING else None,
|
||||
result=result,
|
||||
error=error if status == JobStatus.FAILED else None,
|
||||
)
|
||||
|
||||
async def update_job_progress(self, job_id: str, progress: int) -> None:
|
||||
"""Update job progress percentage."""
|
||||
r = await self.get_redis()
|
||||
meta_key = f"{JOB_PREFIX}{job_id}"
|
||||
|
||||
await r.hset(meta_key, mapping={ # type: ignore
|
||||
"status": JobStatus.PROCESSING.value,
|
||||
"progress": progress,
|
||||
})
|
||||
|
||||
async def complete_job(self, job_id: str, result: OcrResponse) -> None:
|
||||
"""Mark job as completed with result."""
|
||||
r = await self.get_redis()
|
||||
meta_key = f"{JOB_PREFIX}{job_id}"
|
||||
result_key = f"{JOB_RESULT_PREFIX}{job_id}"
|
||||
data_key = f"{JOB_DATA_PREFIX}{job_id}"
|
||||
|
||||
# Store result
|
||||
result_dict = result.model_dump(by_alias=True)
|
||||
result_json = json.dumps(result_dict)
|
||||
|
||||
async with r.pipeline() as pipe:
|
||||
# Update status
|
||||
await pipe.hset(meta_key, mapping={ # type: ignore
|
||||
"status": JobStatus.COMPLETED.value,
|
||||
"progress": 100,
|
||||
})
|
||||
|
||||
# Store result
|
||||
await pipe.set(result_key, result_json)
|
||||
await pipe.expire(result_key, JOB_TTL)
|
||||
|
||||
# Delete file data (no longer needed)
|
||||
await pipe.delete(data_key)
|
||||
|
||||
await pipe.execute()
|
||||
|
||||
logger.info(f"Job {job_id} completed")
|
||||
|
||||
# TODO: Trigger callback if configured
|
||||
meta = await r.hgetall(meta_key) # type: ignore
|
||||
callback_url = meta.get("callback_url")
|
||||
if callback_url:
|
||||
# Fire-and-forget callback (don't block)
|
||||
asyncio.create_task(self._send_callback(callback_url, job_id, result_dict))
|
||||
|
||||
async def fail_job(self, job_id: str, error: str) -> None:
|
||||
"""Mark job as failed with error message."""
|
||||
r = await self.get_redis()
|
||||
meta_key = f"{JOB_PREFIX}{job_id}"
|
||||
data_key = f"{JOB_DATA_PREFIX}{job_id}"
|
||||
|
||||
async with r.pipeline() as pipe:
|
||||
await pipe.hset(meta_key, mapping={ # type: ignore
|
||||
"status": JobStatus.FAILED.value,
|
||||
"error": error,
|
||||
})
|
||||
# Delete file data
|
||||
await pipe.delete(data_key)
|
||||
await pipe.execute()
|
||||
|
||||
logger.error(f"Job {job_id} failed: {error}")
|
||||
|
||||
async def get_job_data(self, job_id: str) -> Optional[bytes]:
|
||||
"""Get the file data for a job."""
|
||||
r = await self.get_redis()
|
||||
data_key = f"{JOB_DATA_PREFIX}{job_id}"
|
||||
|
||||
# Get raw bytes (not decoded)
|
||||
raw_redis = redis.Redis(
|
||||
host=settings.redis_host,
|
||||
port=settings.redis_port,
|
||||
db=settings.redis_db,
|
||||
decode_responses=False,
|
||||
)
|
||||
try:
|
||||
data = await raw_redis.get(data_key)
|
||||
return data # type: ignore
|
||||
finally:
|
||||
await raw_redis.close()
|
||||
|
||||
async def _send_callback(
|
||||
self, url: str, job_id: str, result: dict
|
||||
) -> None:
|
||||
"""Send callback notification when job completes."""
|
||||
try:
|
||||
import httpx
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
await client.post(
|
||||
url,
|
||||
json={"jobId": job_id, "result": result},
|
||||
)
|
||||
logger.info(f"Callback sent for job {job_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Callback failed for job {job_id}: {e}")
|
||||
|
||||
|
||||
# Singleton instance
|
||||
job_queue = JobQueue()
|
||||
275
ocr/app/services/ocr_service.py
Normal file
275
ocr/app/services/ocr_service.py
Normal file
@@ -0,0 +1,275 @@
|
||||
"""Core OCR service using Tesseract with HEIC support."""
|
||||
import io
|
||||
import logging
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import magic
|
||||
import pytesseract
|
||||
from PIL import Image
|
||||
from pillow_heif import register_heif_opener
|
||||
|
||||
from app.config import settings
|
||||
from app.models import DocumentType, ExtractedField, OcrResponse
|
||||
from app.services.preprocessor import preprocessor
|
||||
|
||||
# Register HEIF/HEIC opener with Pillow
|
||||
register_heif_opener()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OcrService:
|
||||
"""Core OCR processing service."""
|
||||
|
||||
# Supported MIME types
|
||||
SUPPORTED_TYPES = {
|
||||
"image/jpeg",
|
||||
"image/png",
|
||||
"image/heic",
|
||||
"image/heif",
|
||||
"application/pdf",
|
||||
}
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize OCR service."""
|
||||
pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd
|
||||
|
||||
def extract(
|
||||
self,
|
||||
file_bytes: bytes,
|
||||
content_type: Optional[str] = None,
|
||||
preprocess: bool = True,
|
||||
) -> OcrResponse:
|
||||
"""
|
||||
Extract text from an image file.
|
||||
|
||||
Args:
|
||||
file_bytes: Raw file bytes
|
||||
content_type: MIME type (optional, will be detected if not provided)
|
||||
preprocess: Whether to apply preprocessing
|
||||
|
||||
Returns:
|
||||
OcrResponse with extracted text and metadata
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Detect file type if not provided
|
||||
if not content_type:
|
||||
content_type = self._detect_mime_type(file_bytes)
|
||||
|
||||
# Validate file type
|
||||
if content_type not in self.SUPPORTED_TYPES:
|
||||
return OcrResponse(
|
||||
success=False,
|
||||
documentType=DocumentType.UNKNOWN,
|
||||
rawText="",
|
||||
confidence=0.0,
|
||||
extractedFields={},
|
||||
processingTimeMs=int((time.time() - start_time) * 1000),
|
||||
)
|
||||
|
||||
try:
|
||||
# Convert HEIC/HEIF to standard format
|
||||
if content_type in ("image/heic", "image/heif"):
|
||||
file_bytes = self._convert_heic(file_bytes)
|
||||
content_type = "image/png"
|
||||
|
||||
# Handle PDF (extract first page as image)
|
||||
if content_type == "application/pdf":
|
||||
file_bytes = self._extract_pdf_first_page(file_bytes)
|
||||
content_type = "image/png"
|
||||
|
||||
# Apply preprocessing if enabled
|
||||
if preprocess:
|
||||
file_bytes = preprocessor.preprocess(
|
||||
file_bytes, deskew=True, denoise=True
|
||||
)
|
||||
|
||||
# Perform OCR
|
||||
image = Image.open(io.BytesIO(file_bytes))
|
||||
ocr_data = pytesseract.image_to_data(
|
||||
image, output_type=pytesseract.Output.DICT
|
||||
)
|
||||
|
||||
# Extract text and calculate confidence
|
||||
raw_text, confidence = self._process_ocr_data(ocr_data)
|
||||
|
||||
# Detect document type from content
|
||||
document_type = self._detect_document_type(raw_text)
|
||||
|
||||
# Extract fields based on document type
|
||||
extracted_fields = self._extract_fields(raw_text, document_type)
|
||||
|
||||
processing_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
logger.info(
|
||||
f"OCR completed: {len(raw_text)} chars, "
|
||||
f"{confidence:.2%} confidence, {processing_time_ms}ms"
|
||||
)
|
||||
|
||||
return OcrResponse(
|
||||
success=True,
|
||||
documentType=document_type,
|
||||
rawText=raw_text,
|
||||
confidence=confidence,
|
||||
extractedFields=extracted_fields,
|
||||
processingTimeMs=processing_time_ms,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"OCR extraction failed: {e}", exc_info=True)
|
||||
return OcrResponse(
|
||||
success=False,
|
||||
documentType=DocumentType.UNKNOWN,
|
||||
rawText="",
|
||||
confidence=0.0,
|
||||
extractedFields={},
|
||||
processingTimeMs=int((time.time() - start_time) * 1000),
|
||||
)
|
||||
|
||||
def _detect_mime_type(self, file_bytes: bytes) -> str:
|
||||
"""Detect MIME type using python-magic."""
|
||||
mime = magic.Magic(mime=True)
|
||||
detected = mime.from_buffer(file_bytes)
|
||||
return detected or "application/octet-stream"
|
||||
|
||||
def _convert_heic(self, heic_bytes: bytes) -> bytes:
|
||||
"""Convert HEIC/HEIF to PNG format."""
|
||||
# pillow-heif registers itself with PIL, so we can open HEIC directly
|
||||
image = Image.open(io.BytesIO(heic_bytes))
|
||||
buffer = io.BytesIO()
|
||||
image.save(buffer, format="PNG")
|
||||
return buffer.getvalue()
|
||||
|
||||
def _extract_pdf_first_page(self, pdf_bytes: bytes) -> bytes:
|
||||
"""Extract first page of PDF as PNG image."""
|
||||
try:
|
||||
# Use pdf2image if available, otherwise return empty
|
||||
from pdf2image import convert_from_bytes
|
||||
|
||||
images = convert_from_bytes(pdf_bytes, first_page=1, last_page=1, dpi=300)
|
||||
if images:
|
||||
buffer = io.BytesIO()
|
||||
images[0].save(buffer, format="PNG")
|
||||
return buffer.getvalue()
|
||||
except ImportError:
|
||||
logger.warning("pdf2image not available, PDF support limited")
|
||||
except Exception as e:
|
||||
logger.error(f"PDF extraction failed: {e}")
|
||||
|
||||
return b""
|
||||
|
||||
def _process_ocr_data(
|
||||
self, ocr_data: dict
|
||||
) -> tuple[str, float]:
|
||||
"""Process Tesseract output to extract text and confidence."""
|
||||
words = []
|
||||
confidences = []
|
||||
|
||||
for i, text in enumerate(ocr_data["text"]):
|
||||
# Filter out empty strings and low-confidence results
|
||||
conf = int(ocr_data["conf"][i])
|
||||
if text.strip() and conf > 0:
|
||||
words.append(text)
|
||||
confidences.append(conf)
|
||||
|
||||
raw_text = " ".join(words)
|
||||
avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
|
||||
|
||||
# Normalize confidence to 0-1 range (Tesseract returns 0-100)
|
||||
return raw_text, avg_confidence / 100.0
|
||||
|
||||
def _detect_document_type(self, text: str) -> DocumentType:
|
||||
"""Detect document type from extracted text content."""
|
||||
text_lower = text.lower()
|
||||
|
||||
# VIN document indicators
|
||||
vin_indicators = [
|
||||
"vin",
|
||||
"vehicle identification",
|
||||
"title",
|
||||
"registration",
|
||||
"certificate of title",
|
||||
]
|
||||
if any(indicator in text_lower for indicator in vin_indicators):
|
||||
# Additional check: look for 17-character alphanumeric sequences
|
||||
import re
|
||||
|
||||
vin_pattern = r"\b[A-HJ-NPR-Z0-9]{17}\b"
|
||||
if re.search(vin_pattern, text.upper()):
|
||||
return DocumentType.VIN
|
||||
|
||||
# Receipt indicators
|
||||
receipt_indicators = [
|
||||
"receipt",
|
||||
"total",
|
||||
"subtotal",
|
||||
"tax",
|
||||
"payment",
|
||||
"invoice",
|
||||
"amount due",
|
||||
"gallons",
|
||||
"price/gallon",
|
||||
]
|
||||
if sum(1 for ind in receipt_indicators if ind in text_lower) >= 2:
|
||||
return DocumentType.RECEIPT
|
||||
|
||||
# Manual indicators
|
||||
manual_indicators = [
|
||||
"owner's manual",
|
||||
"maintenance schedule",
|
||||
"service interval",
|
||||
"chapter",
|
||||
"table of contents",
|
||||
"specifications",
|
||||
]
|
||||
if any(indicator in text_lower for indicator in manual_indicators):
|
||||
return DocumentType.MANUAL
|
||||
|
||||
return DocumentType.UNKNOWN
|
||||
|
||||
def _extract_fields(
|
||||
self, text: str, document_type: DocumentType
|
||||
) -> dict[str, ExtractedField]:
|
||||
"""Extract specific fields based on document type."""
|
||||
import re
|
||||
|
||||
fields: dict[str, ExtractedField] = {}
|
||||
|
||||
if document_type == DocumentType.VIN:
|
||||
# Extract VIN (17 alphanumeric characters, excluding I, O, Q)
|
||||
vin_pattern = r"\b([A-HJ-NPR-Z0-9]{17})\b"
|
||||
match = re.search(vin_pattern, text.upper())
|
||||
if match:
|
||||
fields["vin"] = ExtractedField(value=match.group(1), confidence=0.9)
|
||||
|
||||
elif document_type == DocumentType.RECEIPT:
|
||||
# Extract amounts (currency patterns)
|
||||
amount_pattern = r"\$\s*(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)"
|
||||
amounts = re.findall(amount_pattern, text)
|
||||
if amounts:
|
||||
# Last amount is often the total
|
||||
fields["total"] = ExtractedField(
|
||||
value=f"${amounts[-1]}", confidence=0.7
|
||||
)
|
||||
|
||||
# Extract date
|
||||
date_pattern = r"(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})"
|
||||
date_match = re.search(date_pattern, text)
|
||||
if date_match:
|
||||
fields["date"] = ExtractedField(value=date_match.group(1), confidence=0.8)
|
||||
|
||||
# Extract gallons (for fuel receipts)
|
||||
gallon_pattern = r"(\d+\.?\d*)\s*(?:gal|gallons)"
|
||||
gallon_match = re.search(gallon_pattern, text.lower())
|
||||
if gallon_match:
|
||||
fields["gallons"] = ExtractedField(
|
||||
value=gallon_match.group(1), confidence=0.85
|
||||
)
|
||||
|
||||
return fields
|
||||
|
||||
|
||||
# Singleton instance
|
||||
ocr_service = OcrService()
|
||||
176
ocr/app/services/preprocessor.py
Normal file
176
ocr/app/services/preprocessor.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""Image preprocessing service for OCR accuracy improvement."""
|
||||
import io
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ImagePreprocessor:
|
||||
"""Handles image preprocessing for improved OCR accuracy."""
|
||||
|
||||
def preprocess(
|
||||
self,
|
||||
image_bytes: bytes,
|
||||
deskew: bool = True,
|
||||
denoise: bool = True,
|
||||
binarize: bool = False,
|
||||
) -> bytes:
|
||||
"""
|
||||
Apply preprocessing to an image for better OCR results.
|
||||
|
||||
Args:
|
||||
image_bytes: Raw image bytes
|
||||
deskew: Whether to correct image rotation
|
||||
denoise: Whether to apply noise reduction
|
||||
binarize: Whether to convert to black and white
|
||||
|
||||
Returns:
|
||||
Preprocessed image as PNG bytes
|
||||
"""
|
||||
# Convert bytes to numpy array via PIL
|
||||
pil_image = Image.open(io.BytesIO(image_bytes))
|
||||
|
||||
# Convert to RGB if necessary (handles RGBA, grayscale, etc.)
|
||||
if pil_image.mode not in ("RGB", "L"):
|
||||
pil_image = pil_image.convert("RGB")
|
||||
|
||||
# Convert PIL to OpenCV format
|
||||
cv_image = np.array(pil_image)
|
||||
|
||||
# Convert RGB to BGR for OpenCV (if color image)
|
||||
if len(cv_image.shape) == 3:
|
||||
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
# Convert to grayscale for processing
|
||||
if len(cv_image.shape) == 3:
|
||||
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
|
||||
else:
|
||||
gray = cv_image
|
||||
|
||||
# Apply denoising
|
||||
if denoise:
|
||||
gray = self._denoise(gray)
|
||||
|
||||
# Apply deskewing
|
||||
if deskew:
|
||||
gray = self._deskew(gray)
|
||||
|
||||
# Apply binarization (optional - can help with some documents)
|
||||
if binarize:
|
||||
gray = self._binarize(gray)
|
||||
|
||||
# Convert back to PIL and return as PNG bytes
|
||||
result_image = Image.fromarray(gray)
|
||||
buffer = io.BytesIO()
|
||||
result_image.save(buffer, format="PNG")
|
||||
return buffer.getvalue()
|
||||
|
||||
def _denoise(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Apply noise reduction using non-local means denoising."""
|
||||
try:
|
||||
# fastNlMeansDenoising is effective for grayscale images
|
||||
return cv2.fastNlMeansDenoising(image, h=10, templateWindowSize=7, searchWindowSize=21)
|
||||
except cv2.error as e:
|
||||
logger.warning(f"Denoising failed: {e}")
|
||||
return image
|
||||
|
||||
def _deskew(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Correct image rotation using Hough transform."""
|
||||
try:
|
||||
# Detect edges
|
||||
edges = cv2.Canny(image, 50, 150, apertureSize=3)
|
||||
|
||||
# Detect lines using Hough transform
|
||||
lines = cv2.HoughLinesP(
|
||||
edges,
|
||||
rho=1,
|
||||
theta=np.pi / 180,
|
||||
threshold=100,
|
||||
minLineLength=100,
|
||||
maxLineGap=10,
|
||||
)
|
||||
|
||||
if lines is None:
|
||||
return image
|
||||
|
||||
# Calculate the average angle of detected lines
|
||||
angles = []
|
||||
for line in lines:
|
||||
x1, y1, x2, y2 = line[0]
|
||||
if x2 - x1 != 0: # Avoid division by zero
|
||||
angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
|
||||
# Only consider nearly horizontal lines (within 45 degrees)
|
||||
if -45 < angle < 45:
|
||||
angles.append(angle)
|
||||
|
||||
if not angles:
|
||||
return image
|
||||
|
||||
# Use median angle to avoid outliers
|
||||
median_angle = np.median(angles)
|
||||
|
||||
# Only correct if skew is significant but not too extreme
|
||||
if abs(median_angle) < 0.5 or abs(median_angle) > 15:
|
||||
return image
|
||||
|
||||
# Rotate the image to correct skew
|
||||
height, width = image.shape[:2]
|
||||
center = (width // 2, height // 2)
|
||||
rotation_matrix = cv2.getRotationMatrix2D(center, median_angle, 1.0)
|
||||
|
||||
# Calculate new image bounds to avoid cropping
|
||||
cos_val = abs(rotation_matrix[0, 0])
|
||||
sin_val = abs(rotation_matrix[0, 1])
|
||||
new_width = int(height * sin_val + width * cos_val)
|
||||
new_height = int(height * cos_val + width * sin_val)
|
||||
|
||||
rotation_matrix[0, 2] += (new_width - width) / 2
|
||||
rotation_matrix[1, 2] += (new_height - height) / 2
|
||||
|
||||
rotated = cv2.warpAffine(
|
||||
image,
|
||||
rotation_matrix,
|
||||
(new_width, new_height),
|
||||
borderMode=cv2.BORDER_REPLICATE,
|
||||
)
|
||||
|
||||
logger.debug(f"Deskewed image by {median_angle:.2f} degrees")
|
||||
return rotated
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Deskewing failed: {e}")
|
||||
return image
|
||||
|
||||
def _binarize(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Convert to binary (black and white) using adaptive thresholding."""
|
||||
try:
|
||||
return cv2.adaptiveThreshold(
|
||||
image,
|
||||
255,
|
||||
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||
cv2.THRESH_BINARY,
|
||||
blockSize=11,
|
||||
C=2,
|
||||
)
|
||||
except cv2.error as e:
|
||||
logger.warning(f"Binarization failed: {e}")
|
||||
return image
|
||||
|
||||
def get_image_info(self, image_bytes: bytes) -> dict:
|
||||
"""Get basic information about an image."""
|
||||
pil_image = Image.open(io.BytesIO(image_bytes))
|
||||
return {
|
||||
"width": pil_image.width,
|
||||
"height": pil_image.height,
|
||||
"mode": pil_image.mode,
|
||||
"format": pil_image.format,
|
||||
}
|
||||
|
||||
|
||||
# Singleton instance
|
||||
preprocessor = ImagePreprocessor()
|
||||
@@ -2,6 +2,7 @@
|
||||
fastapi>=0.100.0
|
||||
uvicorn[standard]>=0.23.0
|
||||
python-multipart>=0.0.6
|
||||
pydantic>=2.0.0
|
||||
|
||||
# File Detection & Handling
|
||||
python-magic>=0.4.27
|
||||
@@ -15,6 +16,12 @@ numpy>=1.24.0
|
||||
# OCR Engines
|
||||
pytesseract>=0.3.10
|
||||
|
||||
# Redis for job queue
|
||||
redis>=5.0.0
|
||||
|
||||
# HTTP client for callbacks
|
||||
httpx>=0.24.0
|
||||
|
||||
# Testing
|
||||
pytest>=7.4.0
|
||||
httpx>=0.24.0
|
||||
pytest-asyncio>=0.21.0
|
||||
|
||||
Reference in New Issue
Block a user