feat: add VIN photo OCR pipeline (refs #67)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 31s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 31s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m19s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped

Implement VIN-specific OCR extraction with optimized preprocessing:

- Add POST /extract/vin endpoint for VIN extraction
- VIN preprocessor: CLAHE, deskew, denoise, adaptive threshold
- VIN validator: check digit validation, OCR error correction (I->1, O->0)
- VIN extractor: PSM modes 6/7/8, character whitelist, alternatives
- Response includes confidence, bounding box, and alternatives
- Unit tests for validator and preprocessor
- Integration tests for VIN extraction endpoint

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-01 19:31:36 -06:00
parent 004940b013
commit 54cbd49171
14 changed files with 1694 additions and 1 deletions

View File

@@ -0,0 +1,10 @@
"""Image preprocessors for OCR optimization."""
from app.services.preprocessor import ImagePreprocessor, preprocessor
from app.preprocessors.vin_preprocessor import VinPreprocessor, vin_preprocessor
__all__ = [
"ImagePreprocessor",
"preprocessor",
"VinPreprocessor",
"vin_preprocessor",
]

View File

@@ -0,0 +1,309 @@
"""VIN-optimized image preprocessing pipeline."""
import io
import logging
from dataclasses import dataclass
from typing import Optional
import cv2
import numpy as np
from PIL import Image
from pillow_heif import register_heif_opener
# Register HEIF/HEIC opener
register_heif_opener()
logger = logging.getLogger(__name__)
@dataclass
class BoundingBox:
"""Represents a region in an image."""
x: int
y: int
width: int
height: int
@dataclass
class PreprocessingResult:
"""Result of VIN preprocessing."""
image_bytes: bytes
bounding_box: Optional[BoundingBox] = None
preprocessing_applied: list[str] = None
def __post_init__(self) -> None:
if self.preprocessing_applied is None:
self.preprocessing_applied = []
class VinPreprocessor:
"""VIN-optimized image preprocessing for improved OCR accuracy."""
def preprocess(
self,
image_bytes: bytes,
apply_clahe: bool = True,
apply_deskew: bool = True,
apply_denoise: bool = True,
apply_threshold: bool = True,
) -> PreprocessingResult:
"""
Apply VIN-optimized preprocessing pipeline.
Pipeline:
1. HEIC conversion (if needed)
2. Grayscale conversion
3. Deskew (correct rotation/tilt)
4. Contrast enhancement (CLAHE)
5. Noise reduction (fastNlMeansDenoising)
6. Adaptive thresholding
Args:
image_bytes: Raw image bytes (HEIC, JPEG, PNG)
apply_clahe: Apply CLAHE contrast enhancement
apply_deskew: Apply deskew correction
apply_denoise: Apply noise reduction
apply_threshold: Apply adaptive thresholding
Returns:
PreprocessingResult with processed image bytes
"""
steps_applied = []
# Load image with PIL (handles HEIC via pillow-heif)
pil_image = Image.open(io.BytesIO(image_bytes))
steps_applied.append("loaded")
# Convert to RGB if needed
if pil_image.mode not in ("RGB", "L"):
pil_image = pil_image.convert("RGB")
steps_applied.append("convert_rgb")
# Convert to OpenCV format
cv_image = np.array(pil_image)
if len(cv_image.shape) == 3:
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
# Convert to grayscale
if len(cv_image.shape) == 3:
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
else:
gray = cv_image
steps_applied.append("grayscale")
# Apply deskew
if apply_deskew:
gray = self._deskew(gray)
steps_applied.append("deskew")
# Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
if apply_clahe:
gray = self._apply_clahe(gray)
steps_applied.append("clahe")
# Apply denoising
if apply_denoise:
gray = self._denoise(gray)
steps_applied.append("denoise")
# Apply adaptive thresholding
if apply_threshold:
gray = self._adaptive_threshold(gray)
steps_applied.append("threshold")
# Convert back to PNG bytes
result_image = Image.fromarray(gray)
buffer = io.BytesIO()
result_image.save(buffer, format="PNG")
return PreprocessingResult(
image_bytes=buffer.getvalue(),
preprocessing_applied=steps_applied,
)
def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
"""
Apply CLAHE (Contrast Limited Adaptive Histogram Equalization).
CLAHE improves contrast in images with varying illumination,
which is common in VIN photos taken in different lighting conditions.
"""
try:
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
return clahe.apply(image)
except cv2.error as e:
logger.warning(f"CLAHE failed: {e}")
return image
def _deskew(self, image: np.ndarray) -> np.ndarray:
"""
Correct image rotation using Hough transform line detection.
VIN plates/stickers are often photographed at slight angles.
"""
try:
# Detect edges
edges = cv2.Canny(image, 50, 150, apertureSize=3)
# Detect lines
lines = cv2.HoughLinesP(
edges,
rho=1,
theta=np.pi / 180,
threshold=100,
minLineLength=100,
maxLineGap=10,
)
if lines is None:
return image
# Calculate angles of detected lines
angles = []
for line in lines:
x1, y1, x2, y2 = line[0]
if x2 - x1 != 0:
angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
# Only consider nearly horizontal lines
if -45 < angle < 45:
angles.append(angle)
if not angles:
return image
# Use median angle to avoid outliers
median_angle = np.median(angles)
# Only correct if skew is significant but not extreme
if abs(median_angle) < 0.5 or abs(median_angle) > 20:
return image
# Rotate to correct skew
height, width = image.shape[:2]
center = (width // 2, height // 2)
rotation_matrix = cv2.getRotationMatrix2D(center, median_angle, 1.0)
# Calculate new bounds
cos_val = abs(rotation_matrix[0, 0])
sin_val = abs(rotation_matrix[0, 1])
new_width = int(height * sin_val + width * cos_val)
new_height = int(height * cos_val + width * sin_val)
rotation_matrix[0, 2] += (new_width - width) / 2
rotation_matrix[1, 2] += (new_height - height) / 2
rotated = cv2.warpAffine(
image,
rotation_matrix,
(new_width, new_height),
borderMode=cv2.BORDER_REPLICATE,
)
logger.debug(f"Deskewed by {median_angle:.2f} degrees")
return rotated
except Exception as e:
logger.warning(f"Deskew failed: {e}")
return image
def _denoise(self, image: np.ndarray) -> np.ndarray:
"""
Apply non-local means denoising.
This helps remove noise while preserving VIN character edges.
"""
try:
return cv2.fastNlMeansDenoising(
image, h=10, templateWindowSize=7, searchWindowSize=21
)
except cv2.error as e:
logger.warning(f"Denoising failed: {e}")
return image
def _adaptive_threshold(self, image: np.ndarray) -> np.ndarray:
"""
Apply adaptive thresholding for binarization.
Adaptive thresholding handles varying illumination across the image,
which is common in VIN photos.
"""
try:
return cv2.adaptiveThreshold(
image,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
blockSize=11,
C=2,
)
except cv2.error as e:
logger.warning(f"Adaptive threshold failed: {e}")
return image
def detect_vin_region(self, image_bytes: bytes) -> Optional[BoundingBox]:
"""
Attempt to detect the VIN region in an image.
Uses contour detection to find rectangular regions that might contain VINs.
Args:
image_bytes: Raw image bytes
Returns:
BoundingBox of detected VIN region, or None if not found
"""
try:
pil_image = Image.open(io.BytesIO(image_bytes))
if pil_image.mode != "L":
pil_image = pil_image.convert("L")
cv_image = np.array(pil_image)
# Apply preprocessing for better contour detection
blurred = cv2.GaussianBlur(cv_image, (5, 5), 0)
edges = cv2.Canny(blurred, 50, 150)
# Find contours
contours, _ = cv2.findContours(
edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
if not contours:
return None
# Find rectangular contours with appropriate aspect ratio for VIN
# VIN is typically 17 characters, roughly 5:1 to 10:1 aspect ratio
vin_candidates = []
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
if h == 0:
continue
aspect_ratio = w / h
area = w * h
# VIN regions typically have:
# - Aspect ratio between 4:1 and 12:1
# - Minimum area (to filter out noise)
if 4 <= aspect_ratio <= 12 and area > 1000:
vin_candidates.append((x, y, w, h, area))
if not vin_candidates:
return None
# Return the largest candidate
vin_candidates.sort(key=lambda c: c[4], reverse=True)
x, y, w, h, _ = vin_candidates[0]
return BoundingBox(x=x, y=y, width=w, height=h)
except Exception as e:
logger.warning(f"VIN region detection failed: {e}")
return None
# Singleton instance
vin_preprocessor = VinPreprocessor()