fix: VIN OCR scanning fails with "No VIN Pattern found" on all images (#113) #114

Merged
egullickson merged 15 commits from issue-113-fix-vin-ocr-scanning into main 2026-02-07 15:47:37 +00:00
Showing only changes of commit a07ec324fe - Show all commits

View File

@@ -115,6 +115,7 @@ class VinPreprocessor:
# Apply adaptive thresholding # Apply adaptive thresholding
if apply_threshold: if apply_threshold:
gray = self._adaptive_threshold(gray) gray = self._adaptive_threshold(gray)
gray = self._morphological_cleanup(gray)
steps_applied.append("threshold") steps_applied.append("threshold")
# Convert back to PNG bytes # Convert back to PNG bytes
@@ -152,43 +153,34 @@ class VinPreprocessor:
def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray: def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray:
""" """
Select the single color channel with the highest contrast. Compute a grayscale image that maximizes text-to-background contrast.
Standard grayscale conversion (0.299R + 0.587G + 0.114B) averages Uses per-pixel minimum across B, G, R channels. White text has
channels, which destroys contrast when text and background differ min(255,255,255) = 255 regardless of channel, while any colored
primarily in one channel. For example, white text on a green VIN background has a low value in at least one channel (e.g. green
sticker has almost identical luminance, but the blue and red channels sticker: min(130,230,150) = 130). This gives ~125 units of
show strong contrast. contrast vs ~60 from standard grayscale.
This method evaluates each BGR channel by its standard deviation Falls back to standard grayscale when the min-channel doesn't
(a proxy for contrast) and returns the one with the highest value. improve contrast (i.e. for already-neutral/gray images).
Falls back to standard grayscale when all channels are similar.
""" """
b_channel, g_channel, r_channel = cv2.split(bgr_image) b_channel, g_channel, r_channel = cv2.split(bgr_image)
stds = [ min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)
float(np.std(b_channel)),
float(np.std(g_channel)),
float(np.std(r_channel)),
]
channels = [b_channel, g_channel, r_channel]
channel_names = ["blue", "green", "red"]
best_idx = int(np.argmax(stds)) min_std = float(np.std(min_channel))
max_std = stds[best_idx] gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
min_std = min(stds) gray_std = float(np.std(gray))
# Only use single-channel extraction when one channel is notably # Use min-channel when it provides meaningfully more contrast
# better (>20% higher std than the weakest). Otherwise, standard if min_std > gray_std * 1.1:
# grayscale is fine and more robust for neutral-colored images.
if max_std > 0 and (max_std - min_std) / max_std > 0.20:
logger.debug( logger.debug(
"Using %s channel (std=%.1f) over grayscale (stds: B=%.1f G=%.1f R=%.1f)", "Using min-channel (std=%.1f) over grayscale (std=%.1f)",
channel_names[best_idx], max_std, stds[0], stds[1], stds[2], min_std, gray_std,
) )
return channels[best_idx] return min_channel
return cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) return gray
def _apply_clahe(self, image: np.ndarray) -> np.ndarray: def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
""" """
@@ -309,6 +301,20 @@ class VinPreprocessor:
logger.warning(f"Adaptive threshold failed: {e}") logger.warning(f"Adaptive threshold failed: {e}")
return image return image
def _morphological_cleanup(self, image: np.ndarray) -> np.ndarray:
"""
Remove small noise artifacts from a thresholded binary image.
Morphological opening (erosion then dilation) removes isolated
pixels and thin noise lines while preserving larger text characters.
"""
try:
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
except cv2.error as e:
logger.warning(f"Morphological cleanup failed: {e}")
return image
def _otsu_threshold(self, image: np.ndarray) -> np.ndarray: def _otsu_threshold(self, image: np.ndarray) -> np.ndarray:
""" """
Apply Otsu's thresholding for binarization. Apply Otsu's thresholding for binarization.
@@ -361,6 +367,7 @@ class VinPreprocessor:
steps_applied.append("denoise") steps_applied.append("denoise")
gray = self._otsu_threshold(gray) gray = self._otsu_threshold(gray)
gray = self._morphological_cleanup(gray)
steps_applied.append("otsu_threshold") steps_applied.append("otsu_threshold")
result_image = Image.fromarray(gray) result_image = Image.fromarray(gray)