fix: always use min-channel and add grayscale-only OCR path (refs #113)

Two fixes: 1. Always use min-channel for color images instead of gated comparison that was falling back to standard grayscale (which has only 23% contrast for white-on-green VIN stickers). 2. Add grayscale-only OCR path (CLAHE + denoise, no thresholding) between adaptive and Otsu attempts. Tesseract's LSTM engine is designed to handle grayscale input directly and often outperforms binarized input where thresholding creates artifacts. Pipeline order: adaptive threshold → grayscale-only → Otsu threshold Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 21:32:52 -06:00
parent a07ec324fe
commit 63c027a454
2 changed files with 38 additions and 10 deletions
--- a/ocr/app/extractors/vin_extractor.py
+++ b/ocr/app/extractors/vin_extractor.py
@@ -146,6 +146,34 @@ class VinExtractor(BaseExtractor):
                # No VIN candidates found - try with different PSM modes
                candidates = self._try_alternate_ocr(preprocessed_bytes)
            if not candidates:
                # Try grayscale-only (no thresholding) — the Tesseract
                # LSTM engine often performs better on non-binarized input
                # because it does its own internal preprocessing.
                gray_result = vin_preprocessor.preprocess(
                    image_bytes, apply_threshold=False
                )
                logger.debug(
                    "Grayscale preprocessing steps: %s",
                    gray_result.preprocessing_applied,
                )
                if debug_session:
                    self._save_debug_image(
                        debug_session, "04_preprocessed_gray.png",
                        gray_result.image_bytes,
                    )
                raw_text, word_confidences = self._perform_ocr(
                    gray_result.image_bytes
                )
                logger.debug("Gray PSM 6 raw text: '%s'", raw_text)
                candidates = vin_validator.extract_candidates(raw_text)
                logger.debug("Gray PSM 6 candidates: %s", candidates)
                if not candidates:
                    candidates = self._try_alternate_ocr(
                        gray_result.image_bytes, prefix="Gray"
                    )
            if not candidates:
                # Try alternative preprocessing (Otsu's thresholding)
                otsu_result = vin_preprocessor.preprocess_otsu(image_bytes)
--- a/ocr/app/preprocessors/vin_preprocessor.py
+++ b/ocr/app/preprocessors/vin_preprocessor.py
@@ -167,20 +167,20 @@ class VinPreprocessor:
        b_channel, g_channel, r_channel = cv2.split(bgr_image)
        min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)
        gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
        min_std = float(np.std(min_channel))
        gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
        gray_std = float(np.std(gray))
        logger.debug(
            "Channel contrast: min-channel std=%.1f, grayscale std=%.1f",
            min_std, gray_std,
        )
-        # Use min-channel when it provides meaningfully more contrast
+        # Always use min-channel for VIN images.  White text keeps
-        if min_std > gray_std * 1.1:
+        # min(B,G,R)=255 while any colored background drops to its
-            logger.debug(
+        # weakest channel.  For neutral images the result is equivalent
-                "Using min-channel (std=%.1f) over grayscale (std=%.1f)",
+        # to grayscale, so there is no downside.
-                min_std, gray_std,
+        return min_channel
            )
            return min_channel
        return gray
    def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
        """