fix: use min-channel grayscale and morphological cleanup for VIN OCR (refs #113)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 35s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 51s
Deploy to Staging / Verify Staging (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped

Replace std-based channel selection (which incorrectly picked green for
green-tinted VIN stickers) with per-pixel min(B,G,R). White text stays
255 in all channels while colored backgrounds drop to their weakest
channel value, giving 2x contrast improvement. Add morphological
opening after thresholding to remove noise speckles from car body
surface that were confusing Tesseract's page segmentation.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-06 21:23:43 -06:00
parent 0de34983bb
commit a07ec324fe

View File

@@ -115,6 +115,7 @@ class VinPreprocessor:
# Apply adaptive thresholding # Apply adaptive thresholding
if apply_threshold: if apply_threshold:
gray = self._adaptive_threshold(gray) gray = self._adaptive_threshold(gray)
gray = self._morphological_cleanup(gray)
steps_applied.append("threshold") steps_applied.append("threshold")
# Convert back to PNG bytes # Convert back to PNG bytes
@@ -152,43 +153,34 @@ class VinPreprocessor:
def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray: def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray:
""" """
Select the single color channel with the highest contrast. Compute a grayscale image that maximizes text-to-background contrast.
Standard grayscale conversion (0.299R + 0.587G + 0.114B) averages Uses per-pixel minimum across B, G, R channels. White text has
channels, which destroys contrast when text and background differ min(255,255,255) = 255 regardless of channel, while any colored
primarily in one channel. For example, white text on a green VIN background has a low value in at least one channel (e.g. green
sticker has almost identical luminance, but the blue and red channels sticker: min(130,230,150) = 130). This gives ~125 units of
show strong contrast. contrast vs ~60 from standard grayscale.
This method evaluates each BGR channel by its standard deviation Falls back to standard grayscale when the min-channel doesn't
(a proxy for contrast) and returns the one with the highest value. improve contrast (i.e. for already-neutral/gray images).
Falls back to standard grayscale when all channels are similar.
""" """
b_channel, g_channel, r_channel = cv2.split(bgr_image) b_channel, g_channel, r_channel = cv2.split(bgr_image)
stds = [ min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)
float(np.std(b_channel)),
float(np.std(g_channel)),
float(np.std(r_channel)),
]
channels = [b_channel, g_channel, r_channel]
channel_names = ["blue", "green", "red"]
best_idx = int(np.argmax(stds)) min_std = float(np.std(min_channel))
max_std = stds[best_idx] gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
min_std = min(stds) gray_std = float(np.std(gray))
# Only use single-channel extraction when one channel is notably # Use min-channel when it provides meaningfully more contrast
# better (>20% higher std than the weakest). Otherwise, standard if min_std > gray_std * 1.1:
# grayscale is fine and more robust for neutral-colored images.
if max_std > 0 and (max_std - min_std) / max_std > 0.20:
logger.debug( logger.debug(
"Using %s channel (std=%.1f) over grayscale (stds: B=%.1f G=%.1f R=%.1f)", "Using min-channel (std=%.1f) over grayscale (std=%.1f)",
channel_names[best_idx], max_std, stds[0], stds[1], stds[2], min_std, gray_std,
) )
return channels[best_idx] return min_channel
return cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) return gray
def _apply_clahe(self, image: np.ndarray) -> np.ndarray: def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
""" """
@@ -309,6 +301,20 @@ class VinPreprocessor:
logger.warning(f"Adaptive threshold failed: {e}") logger.warning(f"Adaptive threshold failed: {e}")
return image return image
def _morphological_cleanup(self, image: np.ndarray) -> np.ndarray:
"""
Remove small noise artifacts from a thresholded binary image.
Morphological opening (erosion then dilation) removes isolated
pixels and thin noise lines while preserving larger text characters.
"""
try:
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
except cv2.error as e:
logger.warning(f"Morphological cleanup failed: {e}")
return image
def _otsu_threshold(self, image: np.ndarray) -> np.ndarray: def _otsu_threshold(self, image: np.ndarray) -> np.ndarray:
""" """
Apply Otsu's thresholding for binarization. Apply Otsu's thresholding for binarization.
@@ -361,6 +367,7 @@ class VinPreprocessor:
steps_applied.append("denoise") steps_applied.append("denoise")
gray = self._otsu_threshold(gray) gray = self._otsu_threshold(gray)
gray = self._morphological_cleanup(gray)
steps_applied.append("otsu_threshold") steps_applied.append("otsu_threshold")
result_image = Image.fromarray(gray) result_image = Image.fromarray(gray)