fix: use best-contrast color channel for VIN preprocessing (refs #113)

White text on green VIN stickers has only ~12% contrast in standard grayscale conversion because the green channel dominates luminance. The new _best_contrast_channel method evaluates each RGB channel's standard deviation and selects the one with highest contrast, giving ~2x improvement for green-tinted VIN stickers. Falls back to standard grayscale for neutral-colored images. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 21:14:56 -06:00
parent ce2a8d88f9
commit 0de34983bb
1 changed files with 43 additions and 3 deletions
--- a/ocr/app/preprocessors/vin_preprocessor.py
+++ b/ocr/app/preprocessors/vin_preprocessor.py
@@ -86,9 +86,9 @@ class VinPreprocessor:
        if len(cv_image.shape) == 3:
            cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
-        # Convert to grayscale
+        # Convert to grayscale using best-contrast channel selection
        if len(cv_image.shape) == 3:
-            gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
+            gray = self._best_contrast_channel(cv_image)
        else:
            gray = cv_image
        steps_applied.append("grayscale")
@@ -150,6 +150,46 @@ class VinPreprocessor:
            logger.debug(f"Upscaled image from {width}x{height} to {new_width}x{new_height}")
        return image
    def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray:
        """
        Select the single color channel with the highest contrast.
        Standard grayscale conversion (0.299R + 0.587G + 0.114B) averages
        channels, which destroys contrast when text and background differ
        primarily in one channel.  For example, white text on a green VIN
        sticker has almost identical luminance, but the blue and red channels
        show strong contrast.
        This method evaluates each BGR channel by its standard deviation
        (a proxy for contrast) and returns the one with the highest value.
        Falls back to standard grayscale when all channels are similar.
        """
        b_channel, g_channel, r_channel = cv2.split(bgr_image)
        stds = [
            float(np.std(b_channel)),
            float(np.std(g_channel)),
            float(np.std(r_channel)),
        ]
        channels = [b_channel, g_channel, r_channel]
        channel_names = ["blue", "green", "red"]
        best_idx = int(np.argmax(stds))
        max_std = stds[best_idx]
        min_std = min(stds)
        # Only use single-channel extraction when one channel is notably
        # better (>20% higher std than the weakest).  Otherwise, standard
        # grayscale is fine and more robust for neutral-colored images.
        if max_std > 0 and (max_std - min_std) / max_std > 0.20:
            logger.debug(
                "Using %s channel (std=%.1f) over grayscale (stds: B=%.1f G=%.1f R=%.1f)",
                channel_names[best_idx], max_std, stds[0], stds[1], stds[2],
            )
            return channels[best_idx]
        return cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
    def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
        """
        Apply CLAHE (Contrast Limited Adaptive Histogram Equalization).
@@ -306,7 +346,7 @@ class VinPreprocessor:
            cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
        if len(cv_image.shape) == 3:
-            gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
+            gray = self._best_contrast_channel(cv_image)
        else:
            gray = cv_image
        steps_applied.append("grayscale")