fix: VIN OCR scanning fails with "No VIN Pattern found" on all images (#113) #114
@@ -115,6 +115,7 @@ class VinPreprocessor:
|
||||
# Apply adaptive thresholding
|
||||
if apply_threshold:
|
||||
gray = self._adaptive_threshold(gray)
|
||||
gray = self._morphological_cleanup(gray)
|
||||
steps_applied.append("threshold")
|
||||
|
||||
# Convert back to PNG bytes
|
||||
@@ -152,43 +153,34 @@ class VinPreprocessor:
|
||||
|
||||
def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Select the single color channel with the highest contrast.
|
||||
Compute a grayscale image that maximizes text-to-background contrast.
|
||||
|
||||
Standard grayscale conversion (0.299R + 0.587G + 0.114B) averages
|
||||
channels, which destroys contrast when text and background differ
|
||||
primarily in one channel. For example, white text on a green VIN
|
||||
sticker has almost identical luminance, but the blue and red channels
|
||||
show strong contrast.
|
||||
Uses per-pixel minimum across B, G, R channels. White text has
|
||||
min(255,255,255) = 255 regardless of channel, while any colored
|
||||
background has a low value in at least one channel (e.g. green
|
||||
sticker: min(130,230,150) = 130). This gives ~125 units of
|
||||
contrast vs ~60 from standard grayscale.
|
||||
|
||||
This method evaluates each BGR channel by its standard deviation
|
||||
(a proxy for contrast) and returns the one with the highest value.
|
||||
Falls back to standard grayscale when all channels are similar.
|
||||
Falls back to standard grayscale when the min-channel doesn't
|
||||
improve contrast (i.e. for already-neutral/gray images).
|
||||
"""
|
||||
b_channel, g_channel, r_channel = cv2.split(bgr_image)
|
||||
|
||||
stds = [
|
||||
float(np.std(b_channel)),
|
||||
float(np.std(g_channel)),
|
||||
float(np.std(r_channel)),
|
||||
]
|
||||
channels = [b_channel, g_channel, r_channel]
|
||||
channel_names = ["blue", "green", "red"]
|
||||
min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)
|
||||
|
||||
best_idx = int(np.argmax(stds))
|
||||
max_std = stds[best_idx]
|
||||
min_std = min(stds)
|
||||
min_std = float(np.std(min_channel))
|
||||
gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
|
||||
gray_std = float(np.std(gray))
|
||||
|
||||
# Only use single-channel extraction when one channel is notably
|
||||
# better (>20% higher std than the weakest). Otherwise, standard
|
||||
# grayscale is fine and more robust for neutral-colored images.
|
||||
if max_std > 0 and (max_std - min_std) / max_std > 0.20:
|
||||
# Use min-channel when it provides meaningfully more contrast
|
||||
if min_std > gray_std * 1.1:
|
||||
logger.debug(
|
||||
"Using %s channel (std=%.1f) over grayscale (stds: B=%.1f G=%.1f R=%.1f)",
|
||||
channel_names[best_idx], max_std, stds[0], stds[1], stds[2],
|
||||
"Using min-channel (std=%.1f) over grayscale (std=%.1f)",
|
||||
min_std, gray_std,
|
||||
)
|
||||
return channels[best_idx]
|
||||
return min_channel
|
||||
|
||||
return cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
|
||||
return gray
|
||||
|
||||
def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
@@ -309,6 +301,20 @@ class VinPreprocessor:
|
||||
logger.warning(f"Adaptive threshold failed: {e}")
|
||||
return image
|
||||
|
||||
def _morphological_cleanup(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Remove small noise artifacts from a thresholded binary image.
|
||||
|
||||
Morphological opening (erosion then dilation) removes isolated
|
||||
pixels and thin noise lines while preserving larger text characters.
|
||||
"""
|
||||
try:
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
|
||||
except cv2.error as e:
|
||||
logger.warning(f"Morphological cleanup failed: {e}")
|
||||
return image
|
||||
|
||||
def _otsu_threshold(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Apply Otsu's thresholding for binarization.
|
||||
@@ -361,6 +367,7 @@ class VinPreprocessor:
|
||||
steps_applied.append("denoise")
|
||||
|
||||
gray = self._otsu_threshold(gray)
|
||||
gray = self._morphological_cleanup(gray)
|
||||
steps_applied.append("otsu_threshold")
|
||||
|
||||
result_image = Image.fromarray(gray)
|
||||
|
||||
Reference in New Issue
Block a user