fix: VIN OCR scanning fails with "No VIN Pattern found" on all images (#113) #114

Merged
egullickson merged 15 commits from issue-113-fix-vin-ocr-scanning into main 2026-02-07 15:47:37 +00:00
Showing only changes of commit 0de34983bb - Show all commits

View File

@@ -86,9 +86,9 @@ class VinPreprocessor:
if len(cv_image.shape) == 3:
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
# Convert to grayscale
# Convert to grayscale using best-contrast channel selection
if len(cv_image.shape) == 3:
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
gray = self._best_contrast_channel(cv_image)
else:
gray = cv_image
steps_applied.append("grayscale")
@@ -150,6 +150,46 @@ class VinPreprocessor:
logger.debug(f"Upscaled image from {width}x{height} to {new_width}x{new_height}")
return image
def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray:
"""
Select the single color channel with the highest contrast.
Standard grayscale conversion (0.299R + 0.587G + 0.114B) averages
channels, which destroys contrast when text and background differ
primarily in one channel. For example, white text on a green VIN
sticker has almost identical luminance, but the blue and red channels
show strong contrast.
This method evaluates each BGR channel by its standard deviation
(a proxy for contrast) and returns the one with the highest value.
Falls back to standard grayscale when all channels are similar.
"""
b_channel, g_channel, r_channel = cv2.split(bgr_image)
stds = [
float(np.std(b_channel)),
float(np.std(g_channel)),
float(np.std(r_channel)),
]
channels = [b_channel, g_channel, r_channel]
channel_names = ["blue", "green", "red"]
best_idx = int(np.argmax(stds))
max_std = stds[best_idx]
min_std = min(stds)
# Only use single-channel extraction when one channel is notably
# better (>20% higher std than the weakest). Otherwise, standard
# grayscale is fine and more robust for neutral-colored images.
if max_std > 0 and (max_std - min_std) / max_std > 0.20:
logger.debug(
"Using %s channel (std=%.1f) over grayscale (stds: B=%.1f G=%.1f R=%.1f)",
channel_names[best_idx], max_std, stds[0], stds[1], stds[2],
)
return channels[best_idx]
return cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
"""
Apply CLAHE (Contrast Limited Adaptive Histogram Equalization).
@@ -306,7 +346,7 @@ class VinPreprocessor:
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
if len(cv_image.shape) == 3:
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
gray = self._best_contrast_channel(cv_image)
else:
gray = cv_image
steps_applied.append("grayscale")