motovaultpro/ocr/app/services/preprocessor.py

"""Image preprocessing service for OCR accuracy improvement."""
import io
import logging
from typing import Optional

import cv2
import numpy as np
from PIL import Image

logger = logging.getLogger(__name__)


class ImagePreprocessor:
    """Handles image preprocessing for improved OCR accuracy."""

    def preprocess(
        self,
        image_bytes: bytes,
        deskew: bool = True,
        denoise: bool = True,
        binarize: bool = False,
    ) -> bytes:
        """
        Apply preprocessing to an image for better OCR results.

        Args:
            image_bytes: Raw image bytes
            deskew: Whether to correct image rotation
            denoise: Whether to apply noise reduction
            binarize: Whether to convert to black and white

        Returns:
            Preprocessed image as PNG bytes
        """
        # Convert bytes to numpy array via PIL
        pil_image = Image.open(io.BytesIO(image_bytes))

        # Convert to RGB if necessary (handles RGBA, grayscale, etc.)
        if pil_image.mode not in ("RGB", "L"):
            pil_image = pil_image.convert("RGB")

        # Convert PIL to OpenCV format
        cv_image = np.array(pil_image)

        # Convert RGB to BGR for OpenCV (if color image)
        if len(cv_image.shape) == 3:
            cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)

        # Convert to grayscale for processing
        if len(cv_image.shape) == 3:
            gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
        else:
            gray = cv_image

        # Apply denoising
        if denoise:
            gray = self._denoise(gray)

        # Apply deskewing
        if deskew:
            gray = self._deskew(gray)

        # Apply binarization (optional - can help with some documents)
        if binarize:
            gray = self._binarize(gray)

        # Convert back to PIL and return as PNG bytes
        result_image = Image.fromarray(gray)
        buffer = io.BytesIO()
        result_image.save(buffer, format="PNG")
        return buffer.getvalue()

    def _denoise(self, image: np.ndarray) -> np.ndarray:
        """Apply noise reduction using non-local means denoising."""
        try:
            # fastNlMeansDenoising is effective for grayscale images
            return cv2.fastNlMeansDenoising(image, h=10, templateWindowSize=7, searchWindowSize=21)
        except cv2.error as e:
            logger.warning(f"Denoising failed: {e}")
            return image

    def _deskew(self, image: np.ndarray) -> np.ndarray:
        """Correct image rotation using Hough transform."""
        try:
            # Detect edges
            edges = cv2.Canny(image, 50, 150, apertureSize=3)

            # Detect lines using Hough transform
            lines = cv2.HoughLinesP(
                edges,
                rho=1,
                theta=np.pi / 180,
                threshold=100,
                minLineLength=100,
                maxLineGap=10,
            )

            if lines is None:
                return image

            # Calculate the average angle of detected lines
            angles = []
            for line in lines:
                x1, y1, x2, y2 = line[0]
                if x2 - x1 != 0:  # Avoid division by zero
                    angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
                    # Only consider nearly horizontal lines (within 45 degrees)
                    if -45 < angle < 45:
                        angles.append(angle)

            if not angles:
                return image

            # Use median angle to avoid outliers
            median_angle = np.median(angles)

            # Only correct if skew is significant but not too extreme
            if abs(median_angle) < 0.5 or abs(median_angle) > 15:
                return image

            # Rotate the image to correct skew
            height, width = image.shape[:2]
            center = (width // 2, height // 2)
            rotation_matrix = cv2.getRotationMatrix2D(center, median_angle, 1.0)

            # Calculate new image bounds to avoid cropping
            cos_val = abs(rotation_matrix[0, 0])
            sin_val = abs(rotation_matrix[0, 1])
            new_width = int(height * sin_val + width * cos_val)
            new_height = int(height * cos_val + width * sin_val)

            rotation_matrix[0, 2] += (new_width - width) / 2
            rotation_matrix[1, 2] += (new_height - height) / 2

            rotated = cv2.warpAffine(
                image,
                rotation_matrix,
                (new_width, new_height),
                borderMode=cv2.BORDER_REPLICATE,
            )

            logger.debug(f"Deskewed image by {median_angle:.2f} degrees")
            return rotated

        except Exception as e:
            logger.warning(f"Deskewing failed: {e}")
            return image

    def _binarize(self, image: np.ndarray) -> np.ndarray:
        """Convert to binary (black and white) using adaptive thresholding."""
        try:
            return cv2.adaptiveThreshold(
                image,
                255,
                cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                cv2.THRESH_BINARY,
                blockSize=11,
                C=2,
            )
        except cv2.error as e:
            logger.warning(f"Binarization failed: {e}")
            return image

    def get_image_info(self, image_bytes: bytes) -> dict:
        """Get basic information about an image."""
        pil_image = Image.open(io.BytesIO(image_bytes))
        return {
            "width": pil_image.width,
            "height": pil_image.height,
            "mode": pil_image.mode,
            "format": pil_image.format,
        }


# Singleton instance
preprocessor = ImagePreprocessor()