diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py index 95ba4bc..aee5a86 100644 --- a/ocr/app/preprocessors/vin_preprocessor.py +++ b/ocr/app/preprocessors/vin_preprocessor.py @@ -86,9 +86,9 @@ class VinPreprocessor: if len(cv_image.shape) == 3: cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) - # Convert to grayscale + # Convert to grayscale using best-contrast channel selection if len(cv_image.shape) == 3: - gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) + gray = self._best_contrast_channel(cv_image) else: gray = cv_image steps_applied.append("grayscale") @@ -150,6 +150,46 @@ class VinPreprocessor: logger.debug(f"Upscaled image from {width}x{height} to {new_width}x{new_height}") return image + def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray: + """ + Select the single color channel with the highest contrast. + + Standard grayscale conversion (0.299R + 0.587G + 0.114B) averages + channels, which destroys contrast when text and background differ + primarily in one channel. For example, white text on a green VIN + sticker has almost identical luminance, but the blue and red channels + show strong contrast. + + This method evaluates each BGR channel by its standard deviation + (a proxy for contrast) and returns the one with the highest value. + Falls back to standard grayscale when all channels are similar. + """ + b_channel, g_channel, r_channel = cv2.split(bgr_image) + + stds = [ + float(np.std(b_channel)), + float(np.std(g_channel)), + float(np.std(r_channel)), + ] + channels = [b_channel, g_channel, r_channel] + channel_names = ["blue", "green", "red"] + + best_idx = int(np.argmax(stds)) + max_std = stds[best_idx] + min_std = min(stds) + + # Only use single-channel extraction when one channel is notably + # better (>20% higher std than the weakest). Otherwise, standard + # grayscale is fine and more robust for neutral-colored images. + if max_std > 0 and (max_std - min_std) / max_std > 0.20: + logger.debug( + "Using %s channel (std=%.1f) over grayscale (stds: B=%.1f G=%.1f R=%.1f)", + channel_names[best_idx], max_std, stds[0], stds[1], stds[2], + ) + return channels[best_idx] + + return cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) + def _apply_clahe(self, image: np.ndarray) -> np.ndarray: """ Apply CLAHE (Contrast Limited Adaptive Histogram Equalization). @@ -306,7 +346,7 @@ class VinPreprocessor: cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) if len(cv_image.shape) == 3: - gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) + gray = self._best_contrast_channel(cv_image) else: gray = cv_image steps_applied.append("grayscale")