From 63c027a454e817942a84a40ca2662bc01dda01da Mon Sep 17 00:00:00 2001
From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com>
Date: Fri, 6 Feb 2026 21:32:52 -0600
Subject: [PATCH] fix: always use min-channel and add grayscale-only OCR path
 (refs #113)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two fixes:
1. Always use min-channel for color images instead of gated comparison
   that was falling back to standard grayscale (which has only 23%
   contrast for white-on-green VIN stickers).
2. Add grayscale-only OCR path (CLAHE + denoise, no thresholding)
   between adaptive and Otsu attempts. Tesseract's LSTM engine is
   designed to handle grayscale input directly and often outperforms
   binarized input where thresholding creates artifacts.

Pipeline order: adaptive threshold → grayscale-only → Otsu threshold

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ocr/app/extractors/vin_extractor.py       | 28 +++++++++++++++++++++++
 ocr/app/preprocessors/vin_preprocessor.py | 20 ++++++++--------
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py
index 9d58501..66a694b 100644
--- a/ocr/app/extractors/vin_extractor.py
+++ b/ocr/app/extractors/vin_extractor.py
@@ -146,6 +146,34 @@ class VinExtractor(BaseExtractor):
                 # No VIN candidates found - try with different PSM modes
                 candidates = self._try_alternate_ocr(preprocessed_bytes)
 
+            if not candidates:
+                # Try grayscale-only (no thresholding) — the Tesseract
+                # LSTM engine often performs better on non-binarized input
+                # because it does its own internal preprocessing.
+                gray_result = vin_preprocessor.preprocess(
+                    image_bytes, apply_threshold=False
+                )
+                logger.debug(
+                    "Grayscale preprocessing steps: %s",
+                    gray_result.preprocessing_applied,
+                )
+                if debug_session:
+                    self._save_debug_image(
+                        debug_session, "04_preprocessed_gray.png",
+                        gray_result.image_bytes,
+                    )
+
+                raw_text, word_confidences = self._perform_ocr(
+                    gray_result.image_bytes
+                )
+                logger.debug("Gray PSM 6 raw text: '%s'", raw_text)
+                candidates = vin_validator.extract_candidates(raw_text)
+                logger.debug("Gray PSM 6 candidates: %s", candidates)
+                if not candidates:
+                    candidates = self._try_alternate_ocr(
+                        gray_result.image_bytes, prefix="Gray"
+                    )
+
             if not candidates:
                 # Try alternative preprocessing (Otsu's thresholding)
                 otsu_result = vin_preprocessor.preprocess_otsu(image_bytes)
diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py
index cd9b388..1362b12 100644
--- a/ocr/app/preprocessors/vin_preprocessor.py
+++ b/ocr/app/preprocessors/vin_preprocessor.py
@@ -167,20 +167,20 @@ class VinPreprocessor:
         b_channel, g_channel, r_channel = cv2.split(bgr_image)
 
         min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)
+        gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
 
         min_std = float(np.std(min_channel))
-        gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
         gray_std = float(np.std(gray))
+        logger.debug(
+            "Channel contrast: min-channel std=%.1f, grayscale std=%.1f",
+            min_std, gray_std,
+        )
 
-        # Use min-channel when it provides meaningfully more contrast
-        if min_std > gray_std * 1.1:
-            logger.debug(
-                "Using min-channel (std=%.1f) over grayscale (std=%.1f)",
-                min_std, gray_std,
-            )
-            return min_channel
-
-        return gray
+        # Always use min-channel for VIN images.  White text keeps
+        # min(B,G,R)=255 while any colored background drops to its
+        # weakest channel.  For neutral images the result is equivalent
+        # to grayscale, so there is no downside.
+        return min_channel
 
     def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
         """