diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py index 9d58501..66a694b 100644 --- a/ocr/app/extractors/vin_extractor.py +++ b/ocr/app/extractors/vin_extractor.py @@ -146,6 +146,34 @@ class VinExtractor(BaseExtractor): # No VIN candidates found - try with different PSM modes candidates = self._try_alternate_ocr(preprocessed_bytes) + if not candidates: + # Try grayscale-only (no thresholding) — the Tesseract + # LSTM engine often performs better on non-binarized input + # because it does its own internal preprocessing. + gray_result = vin_preprocessor.preprocess( + image_bytes, apply_threshold=False + ) + logger.debug( + "Grayscale preprocessing steps: %s", + gray_result.preprocessing_applied, + ) + if debug_session: + self._save_debug_image( + debug_session, "04_preprocessed_gray.png", + gray_result.image_bytes, + ) + + raw_text, word_confidences = self._perform_ocr( + gray_result.image_bytes + ) + logger.debug("Gray PSM 6 raw text: '%s'", raw_text) + candidates = vin_validator.extract_candidates(raw_text) + logger.debug("Gray PSM 6 candidates: %s", candidates) + if not candidates: + candidates = self._try_alternate_ocr( + gray_result.image_bytes, prefix="Gray" + ) + if not candidates: # Try alternative preprocessing (Otsu's thresholding) otsu_result = vin_preprocessor.preprocess_otsu(image_bytes) diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py index cd9b388..1362b12 100644 --- a/ocr/app/preprocessors/vin_preprocessor.py +++ b/ocr/app/preprocessors/vin_preprocessor.py @@ -167,20 +167,20 @@ class VinPreprocessor: b_channel, g_channel, r_channel = cv2.split(bgr_image) min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel) + gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) min_std = float(np.std(min_channel)) - gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) gray_std = float(np.std(gray)) + logger.debug( + "Channel contrast: min-channel std=%.1f, grayscale std=%.1f", + min_std, gray_std, + ) - # Use min-channel when it provides meaningfully more contrast - if min_std > gray_std * 1.1: - logger.debug( - "Using min-channel (std=%.1f) over grayscale (std=%.1f)", - min_std, gray_std, - ) - return min_channel - - return gray + # Always use min-channel for VIN images. White text keeps + # min(B,G,R)=255 while any colored background drops to its + # weakest channel. For neutral images the result is equivalent + # to grayscale, so there is no downside. + return min_channel def _apply_clahe(self, image: np.ndarray) -> np.ndarray: """