fix: VIN OCR scanning fails with "No VIN Pattern found" on all images (#113) #114

Merged
egullickson merged 15 commits from issue-113-fix-vin-ocr-scanning into main 2026-02-07 15:47:37 +00:00
Showing only changes of commit a07ec324fe - Show all commits

View File

@@ -115,6 +115,7 @@ class VinPreprocessor:
# Apply adaptive thresholding
if apply_threshold:
gray = self._adaptive_threshold(gray)
gray = self._morphological_cleanup(gray)
steps_applied.append("threshold")
# Convert back to PNG bytes
@@ -152,43 +153,34 @@ class VinPreprocessor:
def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray:
"""
Select the single color channel with the highest contrast.
Compute a grayscale image that maximizes text-to-background contrast.
Standard grayscale conversion (0.299R + 0.587G + 0.114B) averages
channels, which destroys contrast when text and background differ
primarily in one channel. For example, white text on a green VIN
sticker has almost identical luminance, but the blue and red channels
show strong contrast.
Uses per-pixel minimum across B, G, R channels. White text has
min(255,255,255) = 255 regardless of channel, while any colored
background has a low value in at least one channel (e.g. green
sticker: min(130,230,150) = 130). This gives ~125 units of
contrast vs ~60 from standard grayscale.
This method evaluates each BGR channel by its standard deviation
(a proxy for contrast) and returns the one with the highest value.
Falls back to standard grayscale when all channels are similar.
Falls back to standard grayscale when the min-channel doesn't
improve contrast (i.e. for already-neutral/gray images).
"""
b_channel, g_channel, r_channel = cv2.split(bgr_image)
stds = [
float(np.std(b_channel)),
float(np.std(g_channel)),
float(np.std(r_channel)),
]
channels = [b_channel, g_channel, r_channel]
channel_names = ["blue", "green", "red"]
min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)
best_idx = int(np.argmax(stds))
max_std = stds[best_idx]
min_std = min(stds)
min_std = float(np.std(min_channel))
gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
gray_std = float(np.std(gray))
# Only use single-channel extraction when one channel is notably
# better (>20% higher std than the weakest). Otherwise, standard
# grayscale is fine and more robust for neutral-colored images.
if max_std > 0 and (max_std - min_std) / max_std > 0.20:
# Use min-channel when it provides meaningfully more contrast
if min_std > gray_std * 1.1:
logger.debug(
"Using %s channel (std=%.1f) over grayscale (stds: B=%.1f G=%.1f R=%.1f)",
channel_names[best_idx], max_std, stds[0], stds[1], stds[2],
"Using min-channel (std=%.1f) over grayscale (std=%.1f)",
min_std, gray_std,
)
return channels[best_idx]
return min_channel
return cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
return gray
def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
"""
@@ -309,6 +301,20 @@ class VinPreprocessor:
logger.warning(f"Adaptive threshold failed: {e}")
return image
def _morphological_cleanup(self, image: np.ndarray) -> np.ndarray:
"""
Remove small noise artifacts from a thresholded binary image.
Morphological opening (erosion then dilation) removes isolated
pixels and thin noise lines while preserving larger text characters.
"""
try:
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
except cv2.error as e:
logger.warning(f"Morphological cleanup failed: {e}")
return image
def _otsu_threshold(self, image: np.ndarray) -> np.ndarray:
"""
Apply Otsu's thresholding for binarization.
@@ -361,6 +367,7 @@ class VinPreprocessor:
steps_applied.append("denoise")
gray = self._otsu_threshold(gray)
gray = self._morphological_cleanup(gray)
steps_applied.append("otsu_threshold")
result_image = Image.fromarray(gray)