Files
motovaultpro/ocr/app/preprocessors/receipt_preprocessor.py
Eric Gullickson 6319d50fb1
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 32s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 31s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m20s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
feat: add receipt OCR pipeline (refs #69)
Implement receipt-specific OCR extraction for fuel receipts:

- Pattern matching modules for date, currency, and fuel data extraction
- Receipt-optimized image preprocessing for thermal receipts
- POST /extract/receipt endpoint with field extraction
- Confidence scoring per extracted field
- Cross-validation of fuel receipt data
- Unit tests for all pattern matchers

Extracted fields: merchantName, transactionDate, totalAmount,
fuelQuantity, pricePerUnit, fuelGrade

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 20:43:30 -06:00

341 lines
10 KiB
Python

"""Receipt-optimized image preprocessing pipeline."""
import io
import logging
from dataclasses import dataclass
from typing import Optional
import cv2
import numpy as np
from PIL import Image
from pillow_heif import register_heif_opener
# Register HEIF/HEIC opener
register_heif_opener()
logger = logging.getLogger(__name__)
@dataclass
class ReceiptPreprocessingResult:
"""Result of receipt preprocessing."""
image_bytes: bytes
preprocessing_applied: list[str]
original_width: int
original_height: int
class ReceiptPreprocessor:
"""Receipt-optimized image preprocessing for improved OCR accuracy.
Thermal receipts typically have:
- Low contrast (faded ink)
- Uneven illumination
- Paper curl/skew
- Variable font weights
This preprocessor addresses these issues with targeted enhancements.
"""
# Optimal width for receipt OCR (narrow receipts work better)
TARGET_WIDTH = 800
def preprocess(
self,
image_bytes: bytes,
apply_contrast: bool = True,
apply_deskew: bool = True,
apply_denoise: bool = True,
apply_threshold: bool = True,
apply_sharpen: bool = True,
) -> ReceiptPreprocessingResult:
"""
Apply receipt-optimized preprocessing pipeline.
Pipeline optimized for thermal receipts:
1. HEIC conversion (if needed)
2. Grayscale conversion
3. Resize to optimal width
4. Deskew (correct rotation)
5. High contrast enhancement (CLAHE + histogram stretch)
6. Adaptive sharpening
7. Noise reduction
8. Adaptive thresholding (receipt-optimized)
Args:
image_bytes: Raw image bytes (HEIC, JPEG, PNG)
apply_contrast: Apply contrast enhancement
apply_deskew: Apply deskew correction
apply_denoise: Apply noise reduction
apply_threshold: Apply adaptive thresholding
apply_sharpen: Apply sharpening
Returns:
ReceiptPreprocessingResult with processed image bytes
"""
steps_applied = []
# Load image with PIL (handles HEIC via pillow-heif)
pil_image = Image.open(io.BytesIO(image_bytes))
original_width, original_height = pil_image.size
steps_applied.append("loaded")
# Handle EXIF rotation
pil_image = self._fix_orientation(pil_image)
# Convert to RGB if needed
if pil_image.mode not in ("RGB", "L"):
pil_image = pil_image.convert("RGB")
steps_applied.append("convert_rgb")
# Convert to OpenCV format
cv_image = np.array(pil_image)
if len(cv_image.shape) == 3:
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
# Convert to grayscale
if len(cv_image.shape) == 3:
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
else:
gray = cv_image
steps_applied.append("grayscale")
# Resize to optimal width while maintaining aspect ratio
gray = self._resize_optimal(gray)
steps_applied.append("resize")
# Apply deskew
if apply_deskew:
gray = self._deskew(gray)
steps_applied.append("deskew")
# Apply high contrast enhancement (critical for thermal receipts)
if apply_contrast:
gray = self._enhance_contrast(gray)
steps_applied.append("contrast")
# Apply sharpening
if apply_sharpen:
gray = self._sharpen(gray)
steps_applied.append("sharpen")
# Apply denoising
if apply_denoise:
gray = self._denoise(gray)
steps_applied.append("denoise")
# Apply adaptive thresholding (receipt-optimized parameters)
if apply_threshold:
gray = self._adaptive_threshold_receipt(gray)
steps_applied.append("threshold")
# Convert back to PNG bytes
result_image = Image.fromarray(gray)
buffer = io.BytesIO()
result_image.save(buffer, format="PNG")
logger.debug(f"Receipt preprocessing applied: {steps_applied}")
return ReceiptPreprocessingResult(
image_bytes=buffer.getvalue(),
preprocessing_applied=steps_applied,
original_width=original_width,
original_height=original_height,
)
def _fix_orientation(self, image: Image.Image) -> Image.Image:
"""Fix image orientation based on EXIF data."""
try:
exif = image.getexif()
if exif:
orientation = exif.get(274) # Orientation tag
if orientation:
rotate_values = {
3: 180,
6: 270,
8: 90,
}
if orientation in rotate_values:
return image.rotate(
rotate_values[orientation], expand=True
)
except Exception as e:
logger.debug(f"Could not read EXIF orientation: {e}")
return image
def _resize_optimal(self, image: np.ndarray) -> np.ndarray:
"""Resize image to optimal width for OCR."""
height, width = image.shape[:2]
if width <= self.TARGET_WIDTH:
return image
scale = self.TARGET_WIDTH / width
new_height = int(height * scale)
return cv2.resize(
image,
(self.TARGET_WIDTH, new_height),
interpolation=cv2.INTER_AREA,
)
def _deskew(self, image: np.ndarray) -> np.ndarray:
"""
Correct image rotation using projection profile.
Receipts often have slight rotation from scanning/photography.
Uses projection profile method optimized for text documents.
"""
try:
# Create binary image for angle detection
_, binary = cv2.threshold(
image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
)
# Find all non-zero points
coords = np.column_stack(np.where(binary > 0))
if len(coords) < 100:
return image
# Use minimum area rectangle to find angle
rect = cv2.minAreaRect(coords)
angle = rect[-1]
# Normalize angle
if angle < -45:
angle = 90 + angle
elif angle > 45:
angle = angle - 90
# Only correct if angle is significant but not extreme
if abs(angle) < 0.5 or abs(angle) > 15:
return image
# Rotate to correct skew
height, width = image.shape[:2]
center = (width // 2, height // 2)
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(
image,
rotation_matrix,
(width, height),
borderMode=cv2.BORDER_REPLICATE,
)
logger.debug(f"Receipt deskewed by {angle:.2f} degrees")
return rotated
except Exception as e:
logger.warning(f"Deskew failed: {e}")
return image
def _enhance_contrast(self, image: np.ndarray) -> np.ndarray:
"""
Apply aggressive contrast enhancement for faded receipts.
Combines:
1. Histogram stretching
2. CLAHE (Contrast Limited Adaptive Histogram Equalization)
"""
try:
# First, stretch histogram to use full dynamic range
p2, p98 = np.percentile(image, (2, 98))
stretched = np.clip(
(image - p2) * 255.0 / (p98 - p2), 0, 255
).astype(np.uint8)
# Apply CLAHE with parameters optimized for receipts
# Higher clipLimit for faded thermal receipts
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
enhanced = clahe.apply(stretched)
return enhanced
except Exception as e:
logger.warning(f"Contrast enhancement failed: {e}")
return image
def _sharpen(self, image: np.ndarray) -> np.ndarray:
"""
Apply unsharp masking for clearer text edges.
Light sharpening improves OCR on slightly blurry images.
"""
try:
# Gaussian blur for unsharp mask
blurred = cv2.GaussianBlur(image, (0, 0), 2.0)
# Unsharp mask: original + alpha * (original - blurred)
sharpened = cv2.addWeighted(image, 1.5, blurred, -0.5, 0)
return sharpened
except Exception as e:
logger.warning(f"Sharpening failed: {e}")
return image
def _denoise(self, image: np.ndarray) -> np.ndarray:
"""
Apply light denoising optimized for text.
Uses bilateral filter to preserve edges while reducing noise.
"""
try:
# Bilateral filter preserves edges better than Gaussian
# Light denoising - don't want to blur text
return cv2.bilateralFilter(image, 5, 50, 50)
except Exception as e:
logger.warning(f"Denoising failed: {e}")
return image
def _adaptive_threshold_receipt(self, image: np.ndarray) -> np.ndarray:
"""
Apply adaptive thresholding optimized for receipt text.
Uses parameters tuned for:
- Variable font sizes (small print + headers)
- Faded thermal printing
- Uneven paper illumination
"""
try:
# Use Gaussian adaptive threshold
# Larger block size (31) handles uneven illumination
# Moderate C value (8) for faded receipts
binary = cv2.adaptiveThreshold(
image,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
blockSize=31,
C=8,
)
return binary
except Exception as e:
logger.warning(f"Adaptive threshold failed: {e}")
return image
def preprocess_for_low_quality(
self, image_bytes: bytes
) -> ReceiptPreprocessingResult:
"""
Apply aggressive preprocessing for very low quality receipts.
Use this when standard preprocessing fails to produce readable text.
"""
return self.preprocess(
image_bytes,
apply_contrast=True,
apply_deskew=True,
apply_denoise=True,
apply_threshold=True,
apply_sharpen=True,
)
# Singleton instance
receipt_preprocessor = ReceiptPreprocessor()