"""Receipt-optimized image preprocessing pipeline.""" import io import logging from dataclasses import dataclass from typing import Optional import cv2 import numpy as np from PIL import Image from pillow_heif import register_heif_opener # Register HEIF/HEIC opener register_heif_opener() logger = logging.getLogger(__name__) @dataclass class ReceiptPreprocessingResult: """Result of receipt preprocessing.""" image_bytes: bytes preprocessing_applied: list[str] original_width: int original_height: int class ReceiptPreprocessor: """Receipt-optimized image preprocessing for improved OCR accuracy. Thermal receipts typically have: - Low contrast (faded ink) - Uneven illumination - Paper curl/skew - Variable font weights This preprocessor addresses these issues with targeted enhancements. """ # Optimal width for receipt OCR (narrow receipts work better) TARGET_WIDTH = 800 def preprocess( self, image_bytes: bytes, apply_contrast: bool = True, apply_deskew: bool = True, apply_denoise: bool = True, apply_threshold: bool = True, apply_sharpen: bool = True, ) -> ReceiptPreprocessingResult: """ Apply receipt-optimized preprocessing pipeline. Pipeline optimized for thermal receipts: 1. HEIC conversion (if needed) 2. Grayscale conversion 3. Resize to optimal width 4. Deskew (correct rotation) 5. High contrast enhancement (CLAHE + histogram stretch) 6. Adaptive sharpening 7. Noise reduction 8. Adaptive thresholding (receipt-optimized) Args: image_bytes: Raw image bytes (HEIC, JPEG, PNG) apply_contrast: Apply contrast enhancement apply_deskew: Apply deskew correction apply_denoise: Apply noise reduction apply_threshold: Apply adaptive thresholding apply_sharpen: Apply sharpening Returns: ReceiptPreprocessingResult with processed image bytes """ steps_applied = [] # Load image with PIL (handles HEIC via pillow-heif) pil_image = Image.open(io.BytesIO(image_bytes)) original_width, original_height = pil_image.size steps_applied.append("loaded") # Handle EXIF rotation pil_image = self._fix_orientation(pil_image) # Convert to RGB if needed if pil_image.mode not in ("RGB", "L"): pil_image = pil_image.convert("RGB") steps_applied.append("convert_rgb") # Convert to OpenCV format cv_image = np.array(pil_image) if len(cv_image.shape) == 3: cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) # Convert to grayscale if len(cv_image.shape) == 3: gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) else: gray = cv_image steps_applied.append("grayscale") # Resize to optimal width while maintaining aspect ratio gray = self._resize_optimal(gray) steps_applied.append("resize") # Apply deskew if apply_deskew: gray = self._deskew(gray) steps_applied.append("deskew") # Apply high contrast enhancement (critical for thermal receipts) if apply_contrast: gray = self._enhance_contrast(gray) steps_applied.append("contrast") # Apply sharpening if apply_sharpen: gray = self._sharpen(gray) steps_applied.append("sharpen") # Apply denoising if apply_denoise: gray = self._denoise(gray) steps_applied.append("denoise") # Apply adaptive thresholding (receipt-optimized parameters) if apply_threshold: gray = self._adaptive_threshold_receipt(gray) steps_applied.append("threshold") # Convert back to PNG bytes result_image = Image.fromarray(gray) buffer = io.BytesIO() result_image.save(buffer, format="PNG") logger.debug(f"Receipt preprocessing applied: {steps_applied}") return ReceiptPreprocessingResult( image_bytes=buffer.getvalue(), preprocessing_applied=steps_applied, original_width=original_width, original_height=original_height, ) def _fix_orientation(self, image: Image.Image) -> Image.Image: """Fix image orientation based on EXIF data.""" try: exif = image.getexif() if exif: orientation = exif.get(274) # Orientation tag if orientation: rotate_values = { 3: 180, 6: 270, 8: 90, } if orientation in rotate_values: return image.rotate( rotate_values[orientation], expand=True ) except Exception as e: logger.debug(f"Could not read EXIF orientation: {e}") return image def _resize_optimal(self, image: np.ndarray) -> np.ndarray: """Resize image to optimal width for OCR.""" height, width = image.shape[:2] if width <= self.TARGET_WIDTH: return image scale = self.TARGET_WIDTH / width new_height = int(height * scale) return cv2.resize( image, (self.TARGET_WIDTH, new_height), interpolation=cv2.INTER_AREA, ) def _deskew(self, image: np.ndarray) -> np.ndarray: """ Correct image rotation using projection profile. Receipts often have slight rotation from scanning/photography. Uses projection profile method optimized for text documents. """ try: # Create binary image for angle detection _, binary = cv2.threshold( image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU ) # Find all non-zero points coords = np.column_stack(np.where(binary > 0)) if len(coords) < 100: return image # Use minimum area rectangle to find angle rect = cv2.minAreaRect(coords) angle = rect[-1] # Normalize angle if angle < -45: angle = 90 + angle elif angle > 45: angle = angle - 90 # Only correct if angle is significant but not extreme if abs(angle) < 0.5 or abs(angle) > 15: return image # Rotate to correct skew height, width = image.shape[:2] center = (width // 2, height // 2) rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0) rotated = cv2.warpAffine( image, rotation_matrix, (width, height), borderMode=cv2.BORDER_REPLICATE, ) logger.debug(f"Receipt deskewed by {angle:.2f} degrees") return rotated except Exception as e: logger.warning(f"Deskew failed: {e}") return image def _enhance_contrast(self, image: np.ndarray) -> np.ndarray: """ Apply aggressive contrast enhancement for faded receipts. Combines: 1. Histogram stretching 2. CLAHE (Contrast Limited Adaptive Histogram Equalization) """ try: # First, stretch histogram to use full dynamic range p2, p98 = np.percentile(image, (2, 98)) stretched = np.clip( (image - p2) * 255.0 / (p98 - p2), 0, 255 ).astype(np.uint8) # Apply CLAHE with parameters optimized for receipts # Higher clipLimit for faded thermal receipts clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) enhanced = clahe.apply(stretched) return enhanced except Exception as e: logger.warning(f"Contrast enhancement failed: {e}") return image def _sharpen(self, image: np.ndarray) -> np.ndarray: """ Apply unsharp masking for clearer text edges. Light sharpening improves OCR on slightly blurry images. """ try: # Gaussian blur for unsharp mask blurred = cv2.GaussianBlur(image, (0, 0), 2.0) # Unsharp mask: original + alpha * (original - blurred) sharpened = cv2.addWeighted(image, 1.5, blurred, -0.5, 0) return sharpened except Exception as e: logger.warning(f"Sharpening failed: {e}") return image def _denoise(self, image: np.ndarray) -> np.ndarray: """ Apply light denoising optimized for text. Uses bilateral filter to preserve edges while reducing noise. """ try: # Bilateral filter preserves edges better than Gaussian # Light denoising - don't want to blur text return cv2.bilateralFilter(image, 5, 50, 50) except Exception as e: logger.warning(f"Denoising failed: {e}") return image def _adaptive_threshold_receipt(self, image: np.ndarray) -> np.ndarray: """ Apply adaptive thresholding optimized for receipt text. Uses parameters tuned for: - Variable font sizes (small print + headers) - Faded thermal printing - Uneven paper illumination """ try: # Use Gaussian adaptive threshold # Larger block size (31) handles uneven illumination # Moderate C value (8) for faded receipts binary = cv2.adaptiveThreshold( image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blockSize=31, C=8, ) return binary except Exception as e: logger.warning(f"Adaptive threshold failed: {e}") return image def preprocess_for_low_quality( self, image_bytes: bytes ) -> ReceiptPreprocessingResult: """ Apply aggressive preprocessing for very low quality receipts. Use this when standard preprocessing fails to produce readable text. """ return self.preprocess( image_bytes, apply_contrast=True, apply_deskew=True, apply_denoise=True, apply_threshold=True, apply_sharpen=True, ) # Singleton instance receipt_preprocessor = ReceiptPreprocessor()