"""OCR extraction endpoints.""" import logging from typing import Optional from fastapi import APIRouter, File, Form, HTTPException, Query, UploadFile from app.extractors.vin_extractor import vin_extractor from app.extractors.receipt_extractor import receipt_extractor from app.models import ( BoundingBox, OcrResponse, ReceiptExtractedField, ReceiptExtractionResponse, VinAlternative, VinExtractionResponse, ) from app.services import ocr_service logger = logging.getLogger(__name__) router = APIRouter(prefix="/extract", tags=["extract"]) # Maximum file size for synchronous processing (10MB) MAX_SYNC_SIZE = 10 * 1024 * 1024 @router.post("", response_model=OcrResponse) async def extract_text( file: UploadFile = File(..., description="Image file to process"), preprocess: bool = Query(True, description="Apply image preprocessing"), ) -> OcrResponse: """ Extract text from an uploaded image using OCR. Supports HEIC, JPEG, PNG, and PDF (first page only) formats. Processing time target: <3 seconds for typical photos. - **file**: Image file (max 10MB for sync processing) - **preprocess**: Whether to apply deskew/denoise preprocessing (default: true) """ # Validate file presence if not file.filename: raise HTTPException(status_code=400, detail="No file provided") # Read file content content = await file.read() file_size = len(content) # Validate file size if file_size > MAX_SYNC_SIZE: raise HTTPException( status_code=413, detail=f"File too large for sync processing. Max: {MAX_SYNC_SIZE // (1024*1024)}MB. Use /jobs for larger files.", ) if file_size == 0: raise HTTPException(status_code=400, detail="Empty file provided") logger.info( f"Processing file: {file.filename}, " f"size: {file_size} bytes, " f"content_type: {file.content_type}" ) # Perform OCR extraction result = ocr_service.extract( file_bytes=content, content_type=file.content_type, preprocess=preprocess, ) if not result.success: logger.warning(f"OCR extraction failed for {file.filename}") raise HTTPException( status_code=422, detail="Failed to extract text from image. Ensure the file is a valid image format.", ) return result @router.post("/vin", response_model=VinExtractionResponse) async def extract_vin( file: UploadFile = File(..., description="Image file containing VIN"), ) -> VinExtractionResponse: """ Extract VIN (Vehicle Identification Number) from an uploaded image. Uses VIN-optimized preprocessing and pattern matching: - HEIC conversion (if needed) - Grayscale conversion - Deskew correction - CLAHE contrast enhancement - Noise reduction - Adaptive thresholding - VIN pattern matching (17 chars, excludes I/O/Q) - Check digit validation - Common OCR error correction (I->1, O->0, Q->0) Supports HEIC, JPEG, PNG formats. Processing time target: <3 seconds. - **file**: Image file (max 10MB) Returns: - **vin**: Extracted VIN (17 alphanumeric characters) - **confidence**: Confidence score (0.0-1.0) - **boundingBox**: Location of VIN in image (if detected) - **alternatives**: Other VIN candidates with confidence scores - **processingTimeMs**: Processing time in milliseconds """ # Validate file presence if not file.filename: raise HTTPException(status_code=400, detail="No file provided") # Read file content content = await file.read() file_size = len(content) # Validate file size if file_size > MAX_SYNC_SIZE: raise HTTPException( status_code=413, detail=f"File too large. Max: {MAX_SYNC_SIZE // (1024*1024)}MB", ) if file_size == 0: raise HTTPException(status_code=400, detail="Empty file provided") logger.info( f"VIN extraction: {file.filename}, " f"size: {file_size} bytes, " f"content_type: {file.content_type}" ) # Perform VIN extraction result = vin_extractor.extract( image_bytes=content, content_type=file.content_type, ) # Convert internal result to API response bounding_box = None if result.bounding_box: bounding_box = BoundingBox( x=result.bounding_box.x, y=result.bounding_box.y, width=result.bounding_box.width, height=result.bounding_box.height, ) alternatives = [ VinAlternative(vin=alt.vin, confidence=alt.confidence) for alt in result.alternatives ] return VinExtractionResponse( success=result.success, vin=result.vin, confidence=result.confidence, boundingBox=bounding_box, alternatives=alternatives, processingTimeMs=result.processing_time_ms, error=result.error, ) @router.post("/receipt", response_model=ReceiptExtractionResponse) async def extract_receipt( file: UploadFile = File(..., description="Receipt image file"), receipt_type: Optional[str] = Form( default=None, description="Receipt type hint: 'fuel' for specialized extraction", ), ) -> ReceiptExtractionResponse: """ Extract data from a receipt image using OCR. Optimized for fuel receipts with pattern-based field extraction: - HEIC conversion (if needed) - Grayscale conversion - High contrast enhancement (for thermal receipts) - Adaptive thresholding - Pattern matching for dates, amounts, fuel quantities Supports HEIC, JPEG, PNG formats. Processing time target: <3 seconds. - **file**: Receipt image file (max 10MB) - **receipt_type**: Optional hint ("fuel" for gas station receipts) Returns: - **receiptType**: Detected type ("fuel" or "unknown") - **extractedFields**: Dictionary of extracted fields with confidence scores - merchantName: Gas station or store name - transactionDate: Date in YYYY-MM-DD format - totalAmount: Total purchase amount - fuelQuantity: Gallons/liters purchased (fuel receipts) - pricePerUnit: Price per gallon/liter (fuel receipts) - fuelGrade: Octane rating or fuel type (fuel receipts) - **rawText**: Full OCR text - **processingTimeMs**: Processing time in milliseconds """ # Validate file presence if not file.filename: raise HTTPException(status_code=400, detail="No file provided") # Read file content content = await file.read() file_size = len(content) # Validate file size if file_size > MAX_SYNC_SIZE: raise HTTPException( status_code=413, detail=f"File too large. Max: {MAX_SYNC_SIZE // (1024*1024)}MB", ) if file_size == 0: raise HTTPException(status_code=400, detail="Empty file provided") logger.info( f"Receipt extraction: {file.filename}, " f"size: {file_size} bytes, " f"content_type: {file.content_type}, " f"receipt_type: {receipt_type}" ) # Perform receipt extraction result = receipt_extractor.extract( image_bytes=content, content_type=file.content_type, receipt_type=receipt_type, ) if not result.success: logger.warning(f"Receipt extraction failed for {file.filename}: {result.error}") raise HTTPException( status_code=422, detail=result.error or "Failed to extract data from receipt image", ) # Convert internal fields to API response format extracted_fields = { name: ReceiptExtractedField( value=field.value, confidence=field.confidence, ) for name, field in result.extracted_fields.items() } return ReceiptExtractionResponse( success=result.success, receiptType=result.receipt_type, extractedFields=extracted_fields, rawText=result.raw_text, processingTimeMs=result.processing_time_ms, error=result.error, )