fix: invert min-channel so Tesseract gets dark-on-light text (refs #113)

The min-channel correctly extracts contrast (white text=255 vs green sticker bg=130), but Tesseract expects dark text on light background. Without inversion, the grayscale-only path returned empty text for every PSM mode because Tesseract couldn't see bright-on-dark text. Invert via bitwise_not: text becomes 0 (black), sticker bg becomes 125 (gray). Fixes all three OCR paths (adaptive, grayscale, Otsu). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 21:39:48 -06:00
parent 63c027a454
commit ae5221c759
1 changed files with 16 additions and 18 deletions
--- a/ocr/app/preprocessors/vin_preprocessor.py
+++ b/ocr/app/preprocessors/vin_preprocessor.py
@@ -153,34 +153,32 @@ class VinPreprocessor:

    def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray:
        """
-        Compute a grayscale image that maximizes text-to-background contrast.
+        Compute a grayscale image with dark text on light background.

-        Uses per-pixel minimum across B, G, R channels.  White text has
-        min(255,255,255) = 255 regardless of channel, while any colored
-        background has a low value in at least one channel (e.g. green
-        sticker: min(130,230,150) = 130).  This gives ~125 units of
-        contrast vs ~60 from standard grayscale.
+        Uses inverted per-pixel minimum across B, G, R channels.
+        White text has min(255,255,255) = 255 → inverted to 0 (black).
+        Colored backgrounds have a low min value (e.g. green sticker:
+        min(130,230,150) = 130) → inverted to 125 (medium gray).

-        Falls back to standard grayscale when the min-channel doesn't
-        improve contrast (i.e. for already-neutral/gray images).
+        The inversion ensures Tesseract always receives dark-text-on-
+        light-background, which is the polarity it expects.
        """
        b_channel, g_channel, r_channel = cv2.split(bgr_image)

        min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)
-        gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)

-        min_std = float(np.std(min_channel))
-        gray_std = float(np.std(gray))
+        # Invert so white text (min=255) becomes black (0) and colored
+        # backgrounds (min~130) become lighter gray (~125).  Tesseract
+        # expects dark text on light background.
+        inverted = cv2.bitwise_not(min_channel)
+
+        gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
        logger.debug(
-            "Channel contrast: min-channel std=%.1f, grayscale std=%.1f",
-            min_std, gray_std,
+            "Channel contrast: inverted-min std=%.1f, grayscale std=%.1f",
+            float(np.std(inverted)), float(np.std(gray)),
        )

-        # Always use min-channel for VIN images.  White text keeps
-        # min(B,G,R)=255 while any colored background drops to its
-        # weakest channel.  For neutral images the result is equivalent
-        # to grayscale, so there is no downside.
-        return min_channel
+        return inverted

    def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
        """