fix: VIN OCR scanning fails with "No VIN Pattern found" on all images (#113) #114

Merged
egullickson merged 15 commits from issue-113-fix-vin-ocr-scanning into main 2026-02-07 15:47:37 +00:00
2 changed files with 38 additions and 10 deletions
Showing only changes of commit 63c027a454 - Show all commits

View File

@@ -146,6 +146,34 @@ class VinExtractor(BaseExtractor):
# No VIN candidates found - try with different PSM modes # No VIN candidates found - try with different PSM modes
candidates = self._try_alternate_ocr(preprocessed_bytes) candidates = self._try_alternate_ocr(preprocessed_bytes)
if not candidates:
# Try grayscale-only (no thresholding) — the Tesseract
# LSTM engine often performs better on non-binarized input
# because it does its own internal preprocessing.
gray_result = vin_preprocessor.preprocess(
image_bytes, apply_threshold=False
)
logger.debug(
"Grayscale preprocessing steps: %s",
gray_result.preprocessing_applied,
)
if debug_session:
self._save_debug_image(
debug_session, "04_preprocessed_gray.png",
gray_result.image_bytes,
)
raw_text, word_confidences = self._perform_ocr(
gray_result.image_bytes
)
logger.debug("Gray PSM 6 raw text: '%s'", raw_text)
candidates = vin_validator.extract_candidates(raw_text)
logger.debug("Gray PSM 6 candidates: %s", candidates)
if not candidates:
candidates = self._try_alternate_ocr(
gray_result.image_bytes, prefix="Gray"
)
if not candidates: if not candidates:
# Try alternative preprocessing (Otsu's thresholding) # Try alternative preprocessing (Otsu's thresholding)
otsu_result = vin_preprocessor.preprocess_otsu(image_bytes) otsu_result = vin_preprocessor.preprocess_otsu(image_bytes)

View File

@@ -167,20 +167,20 @@ class VinPreprocessor:
b_channel, g_channel, r_channel = cv2.split(bgr_image) b_channel, g_channel, r_channel = cv2.split(bgr_image)
min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel) min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)
gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
min_std = float(np.std(min_channel)) min_std = float(np.std(min_channel))
gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
gray_std = float(np.std(gray)) gray_std = float(np.std(gray))
logger.debug(
"Channel contrast: min-channel std=%.1f, grayscale std=%.1f",
min_std, gray_std,
)
# Use min-channel when it provides meaningfully more contrast # Always use min-channel for VIN images. White text keeps
if min_std > gray_std * 1.1: # min(B,G,R)=255 while any colored background drops to its
logger.debug( # weakest channel. For neutral images the result is equivalent
"Using min-channel (std=%.1f) over grayscale (std=%.1f)", # to grayscale, so there is no downside.
min_std, gray_std, return min_channel
)
return min_channel
return gray
def _apply_clahe(self, image: np.ndarray) -> np.ndarray: def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
""" """