fix: VIN OCR scanning fails with "No VIN Pattern found" on all images (#113) #114

Merged
egullickson merged 15 commits from issue-113-fix-vin-ocr-scanning into main 2026-02-07 15:47:37 +00:00
Showing only changes of commit ae5221c759 - Show all commits

View File

@@ -153,34 +153,32 @@ class VinPreprocessor:
def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray:
"""
Compute a grayscale image that maximizes text-to-background contrast.
Compute a grayscale image with dark text on light background.
Uses per-pixel minimum across B, G, R channels. White text has
min(255,255,255) = 255 regardless of channel, while any colored
background has a low value in at least one channel (e.g. green
sticker: min(130,230,150) = 130). This gives ~125 units of
contrast vs ~60 from standard grayscale.
Uses inverted per-pixel minimum across B, G, R channels.
White text has min(255,255,255) = 255 → inverted to 0 (black).
Colored backgrounds have a low min value (e.g. green sticker:
min(130,230,150) = 130) → inverted to 125 (medium gray).
Falls back to standard grayscale when the min-channel doesn't
improve contrast (i.e. for already-neutral/gray images).
The inversion ensures Tesseract always receives dark-text-on-
light-background, which is the polarity it expects.
"""
b_channel, g_channel, r_channel = cv2.split(bgr_image)
min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)
gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
min_std = float(np.std(min_channel))
gray_std = float(np.std(gray))
# Invert so white text (min=255) becomes black (0) and colored
# backgrounds (min~130) become lighter gray (~125). Tesseract
# expects dark text on light background.
inverted = cv2.bitwise_not(min_channel)
gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
logger.debug(
"Channel contrast: min-channel std=%.1f, grayscale std=%.1f",
min_std, gray_std,
"Channel contrast: inverted-min std=%.1f, grayscale std=%.1f",
float(np.std(inverted)), float(np.std(gray)),
)
# Always use min-channel for VIN images. White text keeps
# min(B,G,R)=255 while any colored background drops to its
# weakest channel. For neutral images the result is equivalent
# to grayscale, so there is no downside.
return min_channel
return inverted
def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
"""