From 0de34983bbab3b57c3c0d05554ab46ec1368136d Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 21:14:56 -0600 Subject: [PATCH] fix: use best-contrast color channel for VIN preprocessing (refs #113) White text on green VIN stickers has only ~12% contrast in standard grayscale conversion because the green channel dominates luminance. The new _best_contrast_channel method evaluates each RGB channel's standard deviation and selects the one with highest contrast, giving ~2x improvement for green-tinted VIN stickers. Falls back to standard grayscale for neutral-colored images. Co-Authored-By: Claude Opus 4.6 --- ocr/app/preprocessors/vin_preprocessor.py | 46 +++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py index 95ba4bc..aee5a86 100644 --- a/ocr/app/preprocessors/vin_preprocessor.py +++ b/ocr/app/preprocessors/vin_preprocessor.py @@ -86,9 +86,9 @@ class VinPreprocessor: if len(cv_image.shape) == 3: cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) - # Convert to grayscale + # Convert to grayscale using best-contrast channel selection if len(cv_image.shape) == 3: - gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) + gray = self._best_contrast_channel(cv_image) else: gray = cv_image steps_applied.append("grayscale") @@ -150,6 +150,46 @@ class VinPreprocessor: logger.debug(f"Upscaled image from {width}x{height} to {new_width}x{new_height}") return image + def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray: + """ + Select the single color channel with the highest contrast. + + Standard grayscale conversion (0.299R + 0.587G + 0.114B) averages + channels, which destroys contrast when text and background differ + primarily in one channel. For example, white text on a green VIN + sticker has almost identical luminance, but the blue and red channels + show strong contrast. + + This method evaluates each BGR channel by its standard deviation + (a proxy for contrast) and returns the one with the highest value. + Falls back to standard grayscale when all channels are similar. + """ + b_channel, g_channel, r_channel = cv2.split(bgr_image) + + stds = [ + float(np.std(b_channel)), + float(np.std(g_channel)), + float(np.std(r_channel)), + ] + channels = [b_channel, g_channel, r_channel] + channel_names = ["blue", "green", "red"] + + best_idx = int(np.argmax(stds)) + max_std = stds[best_idx] + min_std = min(stds) + + # Only use single-channel extraction when one channel is notably + # better (>20% higher std than the weakest). Otherwise, standard + # grayscale is fine and more robust for neutral-colored images. + if max_std > 0 and (max_std - min_std) / max_std > 0.20: + logger.debug( + "Using %s channel (std=%.1f) over grayscale (stds: B=%.1f G=%.1f R=%.1f)", + channel_names[best_idx], max_std, stds[0], stds[1], stds[2], + ) + return channels[best_idx] + + return cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) + def _apply_clahe(self, image: np.ndarray) -> np.ndarray: """ Apply CLAHE (Contrast Limited Adaptive Histogram Equalization). @@ -306,7 +346,7 @@ class VinPreprocessor: cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) if len(cv_image.shape) == 3: - gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) + gray = self._best_contrast_channel(cv_image) else: gray = cv_image steps_applied.append("grayscale")