All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 35s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 51s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m31s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
Root cause: Tesseract fragments VINs into multiple words but candidate extraction required continuous 17-char sequences, rejecting all results. Changes: - Fix candidate extraction to concatenate adjacent OCR fragments - Disable Tesseract dictionaries (VINs are not dictionary words) - Set OEM 1 (LSTM engine) for better accuracy - Add PSM 11 (sparse text) and PSM 13 (raw line) fallback modes - Add Otsu's thresholding as alternative preprocessing pipeline - Upscale small images to meet Tesseract's 300 DPI requirement - Remove incorrect B->8 and S->5 transliterations (valid VIN chars) - Fix pre-existing test bug in check digit expected value Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
400 lines
12 KiB
Python
400 lines
12 KiB
Python
"""VIN-optimized image preprocessing pipeline."""
|
|
import io
|
|
import logging
|
|
from dataclasses import dataclass
|
|
from typing import Optional
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image
|
|
from pillow_heif import register_heif_opener
|
|
|
|
# Register HEIF/HEIC opener
|
|
register_heif_opener()
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class BoundingBox:
|
|
"""Represents a region in an image."""
|
|
|
|
x: int
|
|
y: int
|
|
width: int
|
|
height: int
|
|
|
|
|
|
@dataclass
|
|
class PreprocessingResult:
|
|
"""Result of VIN preprocessing."""
|
|
|
|
image_bytes: bytes
|
|
bounding_box: Optional[BoundingBox] = None
|
|
preprocessing_applied: list[str] = None
|
|
|
|
def __post_init__(self) -> None:
|
|
if self.preprocessing_applied is None:
|
|
self.preprocessing_applied = []
|
|
|
|
|
|
class VinPreprocessor:
|
|
"""VIN-optimized image preprocessing for improved OCR accuracy."""
|
|
|
|
def preprocess(
|
|
self,
|
|
image_bytes: bytes,
|
|
apply_clahe: bool = True,
|
|
apply_deskew: bool = True,
|
|
apply_denoise: bool = True,
|
|
apply_threshold: bool = True,
|
|
) -> PreprocessingResult:
|
|
"""
|
|
Apply VIN-optimized preprocessing pipeline.
|
|
|
|
Pipeline:
|
|
1. HEIC conversion (if needed)
|
|
2. Grayscale conversion
|
|
3. Deskew (correct rotation/tilt)
|
|
4. Contrast enhancement (CLAHE)
|
|
5. Noise reduction (fastNlMeansDenoising)
|
|
6. Adaptive thresholding
|
|
|
|
Args:
|
|
image_bytes: Raw image bytes (HEIC, JPEG, PNG)
|
|
apply_clahe: Apply CLAHE contrast enhancement
|
|
apply_deskew: Apply deskew correction
|
|
apply_denoise: Apply noise reduction
|
|
apply_threshold: Apply adaptive thresholding
|
|
|
|
Returns:
|
|
PreprocessingResult with processed image bytes
|
|
"""
|
|
steps_applied = []
|
|
|
|
# Load image with PIL (handles HEIC via pillow-heif)
|
|
pil_image = Image.open(io.BytesIO(image_bytes))
|
|
steps_applied.append("loaded")
|
|
|
|
# Convert to RGB if needed
|
|
if pil_image.mode not in ("RGB", "L"):
|
|
pil_image = pil_image.convert("RGB")
|
|
steps_applied.append("convert_rgb")
|
|
|
|
# Convert to OpenCV format
|
|
cv_image = np.array(pil_image)
|
|
if len(cv_image.shape) == 3:
|
|
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
|
|
|
|
# Convert to grayscale
|
|
if len(cv_image.shape) == 3:
|
|
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
|
|
else:
|
|
gray = cv_image
|
|
steps_applied.append("grayscale")
|
|
|
|
# Upscale small images for better OCR (Tesseract needs ~300 DPI)
|
|
gray = self._ensure_minimum_resolution(gray)
|
|
steps_applied.append("resolution_check")
|
|
|
|
# Apply deskew
|
|
if apply_deskew:
|
|
gray = self._deskew(gray)
|
|
steps_applied.append("deskew")
|
|
|
|
# Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
|
|
if apply_clahe:
|
|
gray = self._apply_clahe(gray)
|
|
steps_applied.append("clahe")
|
|
|
|
# Apply denoising
|
|
if apply_denoise:
|
|
gray = self._denoise(gray)
|
|
steps_applied.append("denoise")
|
|
|
|
# Apply adaptive thresholding
|
|
if apply_threshold:
|
|
gray = self._adaptive_threshold(gray)
|
|
steps_applied.append("threshold")
|
|
|
|
# Convert back to PNG bytes
|
|
result_image = Image.fromarray(gray)
|
|
buffer = io.BytesIO()
|
|
result_image.save(buffer, format="PNG")
|
|
|
|
return PreprocessingResult(
|
|
image_bytes=buffer.getvalue(),
|
|
preprocessing_applied=steps_applied,
|
|
)
|
|
|
|
# Minimum width in pixels for reliable VIN OCR.
|
|
# A 17-char VIN needs ~30px per character for Tesseract accuracy.
|
|
MIN_WIDTH_FOR_VIN = 600
|
|
|
|
def _ensure_minimum_resolution(self, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Upscale image if too small for reliable OCR.
|
|
|
|
Tesseract works best at ~300 DPI. Mobile photos of VINs may have
|
|
the text occupy only a small portion of the frame, resulting in
|
|
low effective resolution for the VIN characters.
|
|
"""
|
|
height, width = image.shape[:2]
|
|
if width < self.MIN_WIDTH_FOR_VIN:
|
|
scale = self.MIN_WIDTH_FOR_VIN / width
|
|
new_width = int(width * scale)
|
|
new_height = int(height * scale)
|
|
image = cv2.resize(
|
|
image, (new_width, new_height), interpolation=cv2.INTER_CUBIC
|
|
)
|
|
logger.debug(f"Upscaled image from {width}x{height} to {new_width}x{new_height}")
|
|
return image
|
|
|
|
def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply CLAHE (Contrast Limited Adaptive Histogram Equalization).
|
|
|
|
CLAHE improves contrast in images with varying illumination,
|
|
which is common in VIN photos taken in different lighting conditions.
|
|
"""
|
|
try:
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
|
return clahe.apply(image)
|
|
except cv2.error as e:
|
|
logger.warning(f"CLAHE failed: {e}")
|
|
return image
|
|
|
|
def _deskew(self, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Correct image rotation using Hough transform line detection.
|
|
|
|
VIN plates/stickers are often photographed at slight angles.
|
|
"""
|
|
try:
|
|
# Detect edges
|
|
edges = cv2.Canny(image, 50, 150, apertureSize=3)
|
|
|
|
# Detect lines
|
|
lines = cv2.HoughLinesP(
|
|
edges,
|
|
rho=1,
|
|
theta=np.pi / 180,
|
|
threshold=100,
|
|
minLineLength=100,
|
|
maxLineGap=10,
|
|
)
|
|
|
|
if lines is None:
|
|
return image
|
|
|
|
# Calculate angles of detected lines
|
|
angles = []
|
|
for line in lines:
|
|
x1, y1, x2, y2 = line[0]
|
|
if x2 - x1 != 0:
|
|
angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
|
|
# Only consider nearly horizontal lines
|
|
if -45 < angle < 45:
|
|
angles.append(angle)
|
|
|
|
if not angles:
|
|
return image
|
|
|
|
# Use median angle to avoid outliers
|
|
median_angle = np.median(angles)
|
|
|
|
# Only correct if skew is significant but not extreme
|
|
if abs(median_angle) < 0.5 or abs(median_angle) > 20:
|
|
return image
|
|
|
|
# Rotate to correct skew
|
|
height, width = image.shape[:2]
|
|
center = (width // 2, height // 2)
|
|
rotation_matrix = cv2.getRotationMatrix2D(center, median_angle, 1.0)
|
|
|
|
# Calculate new bounds
|
|
cos_val = abs(rotation_matrix[0, 0])
|
|
sin_val = abs(rotation_matrix[0, 1])
|
|
new_width = int(height * sin_val + width * cos_val)
|
|
new_height = int(height * cos_val + width * sin_val)
|
|
|
|
rotation_matrix[0, 2] += (new_width - width) / 2
|
|
rotation_matrix[1, 2] += (new_height - height) / 2
|
|
|
|
rotated = cv2.warpAffine(
|
|
image,
|
|
rotation_matrix,
|
|
(new_width, new_height),
|
|
borderMode=cv2.BORDER_REPLICATE,
|
|
)
|
|
|
|
logger.debug(f"Deskewed by {median_angle:.2f} degrees")
|
|
return rotated
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Deskew failed: {e}")
|
|
return image
|
|
|
|
def _denoise(self, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply non-local means denoising.
|
|
|
|
This helps remove noise while preserving VIN character edges.
|
|
"""
|
|
try:
|
|
return cv2.fastNlMeansDenoising(
|
|
image, h=10, templateWindowSize=7, searchWindowSize=21
|
|
)
|
|
except cv2.error as e:
|
|
logger.warning(f"Denoising failed: {e}")
|
|
return image
|
|
|
|
def _adaptive_threshold(self, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply adaptive thresholding for binarization.
|
|
|
|
Adaptive thresholding handles varying illumination across the image,
|
|
which is common in VIN photos.
|
|
"""
|
|
try:
|
|
return cv2.adaptiveThreshold(
|
|
image,
|
|
255,
|
|
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
cv2.THRESH_BINARY,
|
|
blockSize=11,
|
|
C=2,
|
|
)
|
|
except cv2.error as e:
|
|
logger.warning(f"Adaptive threshold failed: {e}")
|
|
return image
|
|
|
|
def _otsu_threshold(self, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply Otsu's thresholding for binarization.
|
|
|
|
Otsu's method auto-calculates the optimal threshold value,
|
|
which can work better than adaptive thresholding on evenly-lit images.
|
|
"""
|
|
try:
|
|
_, result = cv2.threshold(
|
|
image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
|
|
)
|
|
return result
|
|
except cv2.error as e:
|
|
logger.warning(f"Otsu threshold failed: {e}")
|
|
return image
|
|
|
|
def preprocess_otsu(self, image_bytes: bytes) -> PreprocessingResult:
|
|
"""
|
|
Alternative preprocessing pipeline using Otsu's thresholding.
|
|
|
|
Used as a fallback when adaptive thresholding doesn't produce
|
|
good OCR results.
|
|
"""
|
|
steps_applied = []
|
|
|
|
pil_image = Image.open(io.BytesIO(image_bytes))
|
|
steps_applied.append("loaded")
|
|
|
|
if pil_image.mode not in ("RGB", "L"):
|
|
pil_image = pil_image.convert("RGB")
|
|
steps_applied.append("convert_rgb")
|
|
|
|
cv_image = np.array(pil_image)
|
|
if len(cv_image.shape) == 3:
|
|
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
|
|
|
|
if len(cv_image.shape) == 3:
|
|
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
|
|
else:
|
|
gray = cv_image
|
|
steps_applied.append("grayscale")
|
|
|
|
gray = self._ensure_minimum_resolution(gray)
|
|
steps_applied.append("resolution_check")
|
|
|
|
gray = self._apply_clahe(gray)
|
|
steps_applied.append("clahe")
|
|
|
|
gray = self._denoise(gray)
|
|
steps_applied.append("denoise")
|
|
|
|
gray = self._otsu_threshold(gray)
|
|
steps_applied.append("otsu_threshold")
|
|
|
|
result_image = Image.fromarray(gray)
|
|
buffer = io.BytesIO()
|
|
result_image.save(buffer, format="PNG")
|
|
|
|
return PreprocessingResult(
|
|
image_bytes=buffer.getvalue(),
|
|
preprocessing_applied=steps_applied,
|
|
)
|
|
|
|
def detect_vin_region(self, image_bytes: bytes) -> Optional[BoundingBox]:
|
|
"""
|
|
Attempt to detect the VIN region in an image.
|
|
|
|
Uses contour detection to find rectangular regions that might contain VINs.
|
|
|
|
Args:
|
|
image_bytes: Raw image bytes
|
|
|
|
Returns:
|
|
BoundingBox of detected VIN region, or None if not found
|
|
"""
|
|
try:
|
|
pil_image = Image.open(io.BytesIO(image_bytes))
|
|
if pil_image.mode != "L":
|
|
pil_image = pil_image.convert("L")
|
|
|
|
cv_image = np.array(pil_image)
|
|
|
|
# Apply preprocessing for better contour detection
|
|
blurred = cv2.GaussianBlur(cv_image, (5, 5), 0)
|
|
edges = cv2.Canny(blurred, 50, 150)
|
|
|
|
# Find contours
|
|
contours, _ = cv2.findContours(
|
|
edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
|
)
|
|
|
|
if not contours:
|
|
return None
|
|
|
|
# Find rectangular contours with appropriate aspect ratio for VIN
|
|
# VIN is typically 17 characters, roughly 5:1 to 10:1 aspect ratio
|
|
vin_candidates = []
|
|
|
|
for contour in contours:
|
|
x, y, w, h = cv2.boundingRect(contour)
|
|
if h == 0:
|
|
continue
|
|
|
|
aspect_ratio = w / h
|
|
area = w * h
|
|
|
|
# VIN regions typically have:
|
|
# - Aspect ratio between 4:1 and 12:1
|
|
# - Minimum area (to filter out noise)
|
|
if 4 <= aspect_ratio <= 12 and area > 1000:
|
|
vin_candidates.append((x, y, w, h, area))
|
|
|
|
if not vin_candidates:
|
|
return None
|
|
|
|
# Return the largest candidate
|
|
vin_candidates.sort(key=lambda c: c[4], reverse=True)
|
|
x, y, w, h, _ = vin_candidates[0]
|
|
|
|
return BoundingBox(x=x, y=y, width=w, height=h)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"VIN region detection failed: {e}")
|
|
return None
|
|
|
|
|
|
# Singleton instance
|
|
vin_preprocessor = VinPreprocessor()
|