2026-02-07 15:47:37 +00:00
1 changed files with 34 additions and 27 deletions
--- a/ocr/app/preprocessors/vin_preprocessor.py
+++ b/ocr/app/preprocessors/vin_preprocessor.py
@@ -115,6 +115,7 @@ class VinPreprocessor:
        # Apply adaptive thresholding
        if apply_threshold:
            gray = self._adaptive_threshold(gray)
            gray = self._morphological_cleanup(gray)
            steps_applied.append("threshold")
        # Convert back to PNG bytes
@@ -152,43 +153,34 @@ class VinPreprocessor:
    def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray:
        """
-        Select the single color channel with the highest contrast.
+        Compute a grayscale image that maximizes text-to-background contrast.
-        Standard grayscale conversion (0.299R + 0.587G + 0.114B) averages
+        Uses per-pixel minimum across B, G, R channels.  White text has
-        channels, which destroys contrast when text and background differ
+        min(255,255,255) = 255 regardless of channel, while any colored
-        primarily in one channel.  For example, white text on a green VIN
+        background has a low value in at least one channel (e.g. green
-        sticker has almost identical luminance, but the blue and red channels
+        sticker: min(130,230,150) = 130).  This gives ~125 units of
-        show strong contrast.
+        contrast vs ~60 from standard grayscale.
-        This method evaluates each BGR channel by its standard deviation
+        Falls back to standard grayscale when the min-channel doesn't
-        (a proxy for contrast) and returns the one with the highest value.
+        improve contrast (i.e. for already-neutral/gray images).
        Falls back to standard grayscale when all channels are similar.
        """
        b_channel, g_channel, r_channel = cv2.split(bgr_image)
-        stds = [
+        min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)
            float(np.std(b_channel)),
            float(np.std(g_channel)),
            float(np.std(r_channel)),
        ]
        channels = [b_channel, g_channel, r_channel]
        channel_names = ["blue", "green", "red"]
-        best_idx = int(np.argmax(stds))
+        min_std = float(np.std(min_channel))
-        max_std = stds[best_idx]
+        gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
-        min_std = min(stds)
+        gray_std = float(np.std(gray))
-        # Only use single-channel extraction when one channel is notably
+        # Use min-channel when it provides meaningfully more contrast
-        # better (>20% higher std than the weakest).  Otherwise, standard
+        if min_std > gray_std * 1.1:
        # grayscale is fine and more robust for neutral-colored images.
        if max_std > 0 and (max_std - min_std) / max_std > 0.20:
            logger.debug(
-                "Using %s channel (std=%.1f) over grayscale (stds: B=%.1f G=%.1f R=%.1f)",
+                "Using min-channel (std=%.1f) over grayscale (std=%.1f)",
-                channel_names[best_idx], max_std, stds[0], stds[1], stds[2],
+                min_std, gray_std,
            )
-            return channels[best_idx]
+            return min_channel
-        return cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
+        return gray
    def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
        """
@@ -309,6 +301,20 @@ class VinPreprocessor:
            logger.warning(f"Adaptive threshold failed: {e}")
            return image
    def _morphological_cleanup(self, image: np.ndarray) -> np.ndarray:
        """
        Remove small noise artifacts from a thresholded binary image.
        Morphological opening (erosion then dilation) removes isolated
        pixels and thin noise lines while preserving larger text characters.
        """
        try:
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
            return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
        except cv2.error as e:
            logger.warning(f"Morphological cleanup failed: {e}")
            return image
    def _otsu_threshold(self, image: np.ndarray) -> np.ndarray:
        """
        Apply Otsu's thresholding for binarization.
@@ -361,6 +367,7 @@ class VinPreprocessor:
        steps_applied.append("denoise")
        gray = self._otsu_threshold(gray)
        gray = self._morphological_cleanup(gray)
        steps_applied.append("otsu_threshold")
        result_image = Image.fromarray(gray)