From ae5221c75953d602aea175d7f2ddbb5dbbda1843 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 21:39:48 -0600 Subject: [PATCH] fix: invert min-channel so Tesseract gets dark-on-light text (refs #113) The min-channel correctly extracts contrast (white text=255 vs green sticker bg=130), but Tesseract expects dark text on light background. Without inversion, the grayscale-only path returned empty text for every PSM mode because Tesseract couldn't see bright-on-dark text. Invert via bitwise_not: text becomes 0 (black), sticker bg becomes 125 (gray). Fixes all three OCR paths (adaptive, grayscale, Otsu). Co-Authored-By: Claude Opus 4.6 --- ocr/app/preprocessors/vin_preprocessor.py | 34 +++++++++++------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py index 1362b12..290fb5b 100644 --- a/ocr/app/preprocessors/vin_preprocessor.py +++ b/ocr/app/preprocessors/vin_preprocessor.py @@ -153,34 +153,32 @@ class VinPreprocessor: def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray: """ - Compute a grayscale image that maximizes text-to-background contrast. + Compute a grayscale image with dark text on light background. - Uses per-pixel minimum across B, G, R channels. White text has - min(255,255,255) = 255 regardless of channel, while any colored - background has a low value in at least one channel (e.g. green - sticker: min(130,230,150) = 130). This gives ~125 units of - contrast vs ~60 from standard grayscale. + Uses inverted per-pixel minimum across B, G, R channels. + White text has min(255,255,255) = 255 → inverted to 0 (black). + Colored backgrounds have a low min value (e.g. green sticker: + min(130,230,150) = 130) → inverted to 125 (medium gray). - Falls back to standard grayscale when the min-channel doesn't - improve contrast (i.e. for already-neutral/gray images). + The inversion ensures Tesseract always receives dark-text-on- + light-background, which is the polarity it expects. """ b_channel, g_channel, r_channel = cv2.split(bgr_image) min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel) - gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) - min_std = float(np.std(min_channel)) - gray_std = float(np.std(gray)) + # Invert so white text (min=255) becomes black (0) and colored + # backgrounds (min~130) become lighter gray (~125). Tesseract + # expects dark text on light background. + inverted = cv2.bitwise_not(min_channel) + + gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) logger.debug( - "Channel contrast: min-channel std=%.1f, grayscale std=%.1f", - min_std, gray_std, + "Channel contrast: inverted-min std=%.1f, grayscale std=%.1f", + float(np.std(inverted)), float(np.std(gray)), ) - # Always use min-channel for VIN images. White text keeps - # min(B,G,R)=255 while any colored background drops to its - # weakest channel. For neutral images the result is equivalent - # to grayscale, so there is no downside. - return min_channel + return inverted def _apply_clahe(self, image: np.ndarray) -> np.ndarray: """