From 63c027a454e817942a84a40ca2662bc01dda01da Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 21:32:52 -0600 Subject: [PATCH] fix: always use min-channel and add grayscale-only OCR path (refs #113) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes: 1. Always use min-channel for color images instead of gated comparison that was falling back to standard grayscale (which has only 23% contrast for white-on-green VIN stickers). 2. Add grayscale-only OCR path (CLAHE + denoise, no thresholding) between adaptive and Otsu attempts. Tesseract's LSTM engine is designed to handle grayscale input directly and often outperforms binarized input where thresholding creates artifacts. Pipeline order: adaptive threshold → grayscale-only → Otsu threshold Co-Authored-By: Claude Opus 4.6 --- ocr/app/extractors/vin_extractor.py | 28 +++++++++++++++++++++++ ocr/app/preprocessors/vin_preprocessor.py | 20 ++++++++-------- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py index 9d58501..66a694b 100644 --- a/ocr/app/extractors/vin_extractor.py +++ b/ocr/app/extractors/vin_extractor.py @@ -146,6 +146,34 @@ class VinExtractor(BaseExtractor): # No VIN candidates found - try with different PSM modes candidates = self._try_alternate_ocr(preprocessed_bytes) + if not candidates: + # Try grayscale-only (no thresholding) — the Tesseract + # LSTM engine often performs better on non-binarized input + # because it does its own internal preprocessing. + gray_result = vin_preprocessor.preprocess( + image_bytes, apply_threshold=False + ) + logger.debug( + "Grayscale preprocessing steps: %s", + gray_result.preprocessing_applied, + ) + if debug_session: + self._save_debug_image( + debug_session, "04_preprocessed_gray.png", + gray_result.image_bytes, + ) + + raw_text, word_confidences = self._perform_ocr( + gray_result.image_bytes + ) + logger.debug("Gray PSM 6 raw text: '%s'", raw_text) + candidates = vin_validator.extract_candidates(raw_text) + logger.debug("Gray PSM 6 candidates: %s", candidates) + if not candidates: + candidates = self._try_alternate_ocr( + gray_result.image_bytes, prefix="Gray" + ) + if not candidates: # Try alternative preprocessing (Otsu's thresholding) otsu_result = vin_preprocessor.preprocess_otsu(image_bytes) diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py index cd9b388..1362b12 100644 --- a/ocr/app/preprocessors/vin_preprocessor.py +++ b/ocr/app/preprocessors/vin_preprocessor.py @@ -167,20 +167,20 @@ class VinPreprocessor: b_channel, g_channel, r_channel = cv2.split(bgr_image) min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel) + gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) min_std = float(np.std(min_channel)) - gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) gray_std = float(np.std(gray)) + logger.debug( + "Channel contrast: min-channel std=%.1f, grayscale std=%.1f", + min_std, gray_std, + ) - # Use min-channel when it provides meaningfully more contrast - if min_std > gray_std * 1.1: - logger.debug( - "Using min-channel (std=%.1f) over grayscale (std=%.1f)", - min_std, gray_std, - ) - return min_channel - - return gray + # Always use min-channel for VIN images. White text keeps + # min(B,G,R)=255 while any colored background drops to its + # weakest channel. For neutral images the result is equivalent + # to grayscale, so there is no downside. + return min_channel def _apply_clahe(self, image: np.ndarray) -> np.ndarray: """