All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 32s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 31s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m20s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
Implement receipt-specific OCR extraction for fuel receipts: - Pattern matching modules for date, currency, and fuel data extraction - Receipt-optimized image preprocessing for thermal receipts - POST /extract/receipt endpoint with field extraction - Confidence scoring per extracted field - Cross-validation of fuel receipt data - Unit tests for all pattern matchers Extracted fields: merchantName, transactionDate, totalAmount, fuelQuantity, pricePerUnit, fuelGrade Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
341 lines
10 KiB
Python
341 lines
10 KiB
Python
"""Receipt-optimized image preprocessing pipeline."""
|
|
import io
|
|
import logging
|
|
from dataclasses import dataclass
|
|
from typing import Optional
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image
|
|
from pillow_heif import register_heif_opener
|
|
|
|
# Register HEIF/HEIC opener
|
|
register_heif_opener()
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class ReceiptPreprocessingResult:
|
|
"""Result of receipt preprocessing."""
|
|
|
|
image_bytes: bytes
|
|
preprocessing_applied: list[str]
|
|
original_width: int
|
|
original_height: int
|
|
|
|
|
|
class ReceiptPreprocessor:
|
|
"""Receipt-optimized image preprocessing for improved OCR accuracy.
|
|
|
|
Thermal receipts typically have:
|
|
- Low contrast (faded ink)
|
|
- Uneven illumination
|
|
- Paper curl/skew
|
|
- Variable font weights
|
|
|
|
This preprocessor addresses these issues with targeted enhancements.
|
|
"""
|
|
|
|
# Optimal width for receipt OCR (narrow receipts work better)
|
|
TARGET_WIDTH = 800
|
|
|
|
def preprocess(
|
|
self,
|
|
image_bytes: bytes,
|
|
apply_contrast: bool = True,
|
|
apply_deskew: bool = True,
|
|
apply_denoise: bool = True,
|
|
apply_threshold: bool = True,
|
|
apply_sharpen: bool = True,
|
|
) -> ReceiptPreprocessingResult:
|
|
"""
|
|
Apply receipt-optimized preprocessing pipeline.
|
|
|
|
Pipeline optimized for thermal receipts:
|
|
1. HEIC conversion (if needed)
|
|
2. Grayscale conversion
|
|
3. Resize to optimal width
|
|
4. Deskew (correct rotation)
|
|
5. High contrast enhancement (CLAHE + histogram stretch)
|
|
6. Adaptive sharpening
|
|
7. Noise reduction
|
|
8. Adaptive thresholding (receipt-optimized)
|
|
|
|
Args:
|
|
image_bytes: Raw image bytes (HEIC, JPEG, PNG)
|
|
apply_contrast: Apply contrast enhancement
|
|
apply_deskew: Apply deskew correction
|
|
apply_denoise: Apply noise reduction
|
|
apply_threshold: Apply adaptive thresholding
|
|
apply_sharpen: Apply sharpening
|
|
|
|
Returns:
|
|
ReceiptPreprocessingResult with processed image bytes
|
|
"""
|
|
steps_applied = []
|
|
|
|
# Load image with PIL (handles HEIC via pillow-heif)
|
|
pil_image = Image.open(io.BytesIO(image_bytes))
|
|
original_width, original_height = pil_image.size
|
|
steps_applied.append("loaded")
|
|
|
|
# Handle EXIF rotation
|
|
pil_image = self._fix_orientation(pil_image)
|
|
|
|
# Convert to RGB if needed
|
|
if pil_image.mode not in ("RGB", "L"):
|
|
pil_image = pil_image.convert("RGB")
|
|
steps_applied.append("convert_rgb")
|
|
|
|
# Convert to OpenCV format
|
|
cv_image = np.array(pil_image)
|
|
if len(cv_image.shape) == 3:
|
|
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
|
|
|
|
# Convert to grayscale
|
|
if len(cv_image.shape) == 3:
|
|
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
|
|
else:
|
|
gray = cv_image
|
|
steps_applied.append("grayscale")
|
|
|
|
# Resize to optimal width while maintaining aspect ratio
|
|
gray = self._resize_optimal(gray)
|
|
steps_applied.append("resize")
|
|
|
|
# Apply deskew
|
|
if apply_deskew:
|
|
gray = self._deskew(gray)
|
|
steps_applied.append("deskew")
|
|
|
|
# Apply high contrast enhancement (critical for thermal receipts)
|
|
if apply_contrast:
|
|
gray = self._enhance_contrast(gray)
|
|
steps_applied.append("contrast")
|
|
|
|
# Apply sharpening
|
|
if apply_sharpen:
|
|
gray = self._sharpen(gray)
|
|
steps_applied.append("sharpen")
|
|
|
|
# Apply denoising
|
|
if apply_denoise:
|
|
gray = self._denoise(gray)
|
|
steps_applied.append("denoise")
|
|
|
|
# Apply adaptive thresholding (receipt-optimized parameters)
|
|
if apply_threshold:
|
|
gray = self._adaptive_threshold_receipt(gray)
|
|
steps_applied.append("threshold")
|
|
|
|
# Convert back to PNG bytes
|
|
result_image = Image.fromarray(gray)
|
|
buffer = io.BytesIO()
|
|
result_image.save(buffer, format="PNG")
|
|
|
|
logger.debug(f"Receipt preprocessing applied: {steps_applied}")
|
|
|
|
return ReceiptPreprocessingResult(
|
|
image_bytes=buffer.getvalue(),
|
|
preprocessing_applied=steps_applied,
|
|
original_width=original_width,
|
|
original_height=original_height,
|
|
)
|
|
|
|
def _fix_orientation(self, image: Image.Image) -> Image.Image:
|
|
"""Fix image orientation based on EXIF data."""
|
|
try:
|
|
exif = image.getexif()
|
|
if exif:
|
|
orientation = exif.get(274) # Orientation tag
|
|
if orientation:
|
|
rotate_values = {
|
|
3: 180,
|
|
6: 270,
|
|
8: 90,
|
|
}
|
|
if orientation in rotate_values:
|
|
return image.rotate(
|
|
rotate_values[orientation], expand=True
|
|
)
|
|
except Exception as e:
|
|
logger.debug(f"Could not read EXIF orientation: {e}")
|
|
return image
|
|
|
|
def _resize_optimal(self, image: np.ndarray) -> np.ndarray:
|
|
"""Resize image to optimal width for OCR."""
|
|
height, width = image.shape[:2]
|
|
|
|
if width <= self.TARGET_WIDTH:
|
|
return image
|
|
|
|
scale = self.TARGET_WIDTH / width
|
|
new_height = int(height * scale)
|
|
|
|
return cv2.resize(
|
|
image,
|
|
(self.TARGET_WIDTH, new_height),
|
|
interpolation=cv2.INTER_AREA,
|
|
)
|
|
|
|
def _deskew(self, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Correct image rotation using projection profile.
|
|
|
|
Receipts often have slight rotation from scanning/photography.
|
|
Uses projection profile method optimized for text documents.
|
|
"""
|
|
try:
|
|
# Create binary image for angle detection
|
|
_, binary = cv2.threshold(
|
|
image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
|
|
)
|
|
|
|
# Find all non-zero points
|
|
coords = np.column_stack(np.where(binary > 0))
|
|
if len(coords) < 100:
|
|
return image
|
|
|
|
# Use minimum area rectangle to find angle
|
|
rect = cv2.minAreaRect(coords)
|
|
angle = rect[-1]
|
|
|
|
# Normalize angle
|
|
if angle < -45:
|
|
angle = 90 + angle
|
|
elif angle > 45:
|
|
angle = angle - 90
|
|
|
|
# Only correct if angle is significant but not extreme
|
|
if abs(angle) < 0.5 or abs(angle) > 15:
|
|
return image
|
|
|
|
# Rotate to correct skew
|
|
height, width = image.shape[:2]
|
|
center = (width // 2, height // 2)
|
|
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
|
|
|
|
rotated = cv2.warpAffine(
|
|
image,
|
|
rotation_matrix,
|
|
(width, height),
|
|
borderMode=cv2.BORDER_REPLICATE,
|
|
)
|
|
|
|
logger.debug(f"Receipt deskewed by {angle:.2f} degrees")
|
|
return rotated
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Deskew failed: {e}")
|
|
return image
|
|
|
|
def _enhance_contrast(self, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply aggressive contrast enhancement for faded receipts.
|
|
|
|
Combines:
|
|
1. Histogram stretching
|
|
2. CLAHE (Contrast Limited Adaptive Histogram Equalization)
|
|
"""
|
|
try:
|
|
# First, stretch histogram to use full dynamic range
|
|
p2, p98 = np.percentile(image, (2, 98))
|
|
stretched = np.clip(
|
|
(image - p2) * 255.0 / (p98 - p2), 0, 255
|
|
).astype(np.uint8)
|
|
|
|
# Apply CLAHE with parameters optimized for receipts
|
|
# Higher clipLimit for faded thermal receipts
|
|
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
|
enhanced = clahe.apply(stretched)
|
|
|
|
return enhanced
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Contrast enhancement failed: {e}")
|
|
return image
|
|
|
|
def _sharpen(self, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply unsharp masking for clearer text edges.
|
|
|
|
Light sharpening improves OCR on slightly blurry images.
|
|
"""
|
|
try:
|
|
# Gaussian blur for unsharp mask
|
|
blurred = cv2.GaussianBlur(image, (0, 0), 2.0)
|
|
|
|
# Unsharp mask: original + alpha * (original - blurred)
|
|
sharpened = cv2.addWeighted(image, 1.5, blurred, -0.5, 0)
|
|
|
|
return sharpened
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Sharpening failed: {e}")
|
|
return image
|
|
|
|
def _denoise(self, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply light denoising optimized for text.
|
|
|
|
Uses bilateral filter to preserve edges while reducing noise.
|
|
"""
|
|
try:
|
|
# Bilateral filter preserves edges better than Gaussian
|
|
# Light denoising - don't want to blur text
|
|
return cv2.bilateralFilter(image, 5, 50, 50)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Denoising failed: {e}")
|
|
return image
|
|
|
|
def _adaptive_threshold_receipt(self, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply adaptive thresholding optimized for receipt text.
|
|
|
|
Uses parameters tuned for:
|
|
- Variable font sizes (small print + headers)
|
|
- Faded thermal printing
|
|
- Uneven paper illumination
|
|
"""
|
|
try:
|
|
# Use Gaussian adaptive threshold
|
|
# Larger block size (31) handles uneven illumination
|
|
# Moderate C value (8) for faded receipts
|
|
binary = cv2.adaptiveThreshold(
|
|
image,
|
|
255,
|
|
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
cv2.THRESH_BINARY,
|
|
blockSize=31,
|
|
C=8,
|
|
)
|
|
|
|
return binary
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Adaptive threshold failed: {e}")
|
|
return image
|
|
|
|
def preprocess_for_low_quality(
|
|
self, image_bytes: bytes
|
|
) -> ReceiptPreprocessingResult:
|
|
"""
|
|
Apply aggressive preprocessing for very low quality receipts.
|
|
|
|
Use this when standard preprocessing fails to produce readable text.
|
|
"""
|
|
return self.preprocess(
|
|
image_bytes,
|
|
apply_contrast=True,
|
|
apply_deskew=True,
|
|
apply_denoise=True,
|
|
apply_threshold=True,
|
|
apply_sharpen=True,
|
|
)
|
|
|
|
|
|
# Singleton instance
|
|
receipt_preprocessor = ReceiptPreprocessor()
|