fix: VIN OCR scanning fails with "No VIN Pattern found" on all images (#113) #114
@@ -103,6 +103,14 @@ class VinExtractor(BaseExtractor):
|
||||
# No VIN candidates found - try with different PSM modes
|
||||
candidates = self._try_alternate_ocr(preprocessed_bytes)
|
||||
|
||||
if not candidates:
|
||||
# Try alternative preprocessing (Otsu's thresholding)
|
||||
otsu_result = vin_preprocessor.preprocess_otsu(image_bytes)
|
||||
raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes)
|
||||
candidates = vin_validator.extract_candidates(raw_text)
|
||||
if not candidates:
|
||||
candidates = self._try_alternate_ocr(otsu_result.image_bytes)
|
||||
|
||||
if not candidates:
|
||||
return VinExtractionResult(
|
||||
success=False,
|
||||
@@ -200,10 +208,14 @@ class VinExtractor(BaseExtractor):
|
||||
image = Image.open(io.BytesIO(image_bytes))
|
||||
|
||||
# Configure Tesseract for VIN extraction
|
||||
# Use character whitelist to exclude I, O, Q
|
||||
# OEM 1 = LSTM neural network engine (best accuracy)
|
||||
# Disable dictionaries since VINs are not dictionary words
|
||||
config = (
|
||||
f"--psm {psm} "
|
||||
f"-c tessedit_char_whitelist={self.VIN_WHITELIST}"
|
||||
f"--oem 1 "
|
||||
f"-c tessedit_char_whitelist={self.VIN_WHITELIST} "
|
||||
f"-c load_system_dawg=false "
|
||||
f"-c load_freq_dawg=false"
|
||||
)
|
||||
|
||||
# Get detailed OCR data
|
||||
@@ -228,20 +240,20 @@ class VinExtractor(BaseExtractor):
|
||||
"""
|
||||
Try alternate OCR configurations when initial extraction fails.
|
||||
|
||||
PSM modes tried in order:
|
||||
7 - Single text line
|
||||
8 - Single word
|
||||
11 - Sparse text (finds text in any order, good for angled photos)
|
||||
13 - Raw line (no Tesseract heuristics, good for clean VIN plates)
|
||||
|
||||
Returns:
|
||||
List of VIN candidates
|
||||
"""
|
||||
# Try PSM 7 (single text line)
|
||||
raw_text, _ = self._perform_ocr(image_bytes, psm=7)
|
||||
candidates = vin_validator.extract_candidates(raw_text)
|
||||
if candidates:
|
||||
return candidates
|
||||
|
||||
# Try PSM 8 (single word)
|
||||
raw_text, _ = self._perform_ocr(image_bytes, psm=8)
|
||||
candidates = vin_validator.extract_candidates(raw_text)
|
||||
if candidates:
|
||||
return candidates
|
||||
for psm in (7, 8, 11, 13):
|
||||
raw_text, _ = self._perform_ocr(image_bytes, psm=psm)
|
||||
candidates = vin_validator.extract_candidates(raw_text)
|
||||
if candidates:
|
||||
return candidates
|
||||
|
||||
return []
|
||||
|
||||
|
||||
@@ -93,6 +93,10 @@ class VinPreprocessor:
|
||||
gray = cv_image
|
||||
steps_applied.append("grayscale")
|
||||
|
||||
# Upscale small images for better OCR (Tesseract needs ~300 DPI)
|
||||
gray = self._ensure_minimum_resolution(gray)
|
||||
steps_applied.append("resolution_check")
|
||||
|
||||
# Apply deskew
|
||||
if apply_deskew:
|
||||
gray = self._deskew(gray)
|
||||
@@ -123,6 +127,29 @@ class VinPreprocessor:
|
||||
preprocessing_applied=steps_applied,
|
||||
)
|
||||
|
||||
# Minimum width in pixels for reliable VIN OCR.
|
||||
# A 17-char VIN needs ~30px per character for Tesseract accuracy.
|
||||
MIN_WIDTH_FOR_VIN = 600
|
||||
|
||||
def _ensure_minimum_resolution(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Upscale image if too small for reliable OCR.
|
||||
|
||||
Tesseract works best at ~300 DPI. Mobile photos of VINs may have
|
||||
the text occupy only a small portion of the frame, resulting in
|
||||
low effective resolution for the VIN characters.
|
||||
"""
|
||||
height, width = image.shape[:2]
|
||||
if width < self.MIN_WIDTH_FOR_VIN:
|
||||
scale = self.MIN_WIDTH_FOR_VIN / width
|
||||
new_width = int(width * scale)
|
||||
new_height = int(height * scale)
|
||||
image = cv2.resize(
|
||||
image, (new_width, new_height), interpolation=cv2.INTER_CUBIC
|
||||
)
|
||||
logger.debug(f"Upscaled image from {width}x{height} to {new_width}x{new_height}")
|
||||
return image
|
||||
|
||||
def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Apply CLAHE (Contrast Limited Adaptive Histogram Equalization).
|
||||
@@ -242,6 +269,69 @@ class VinPreprocessor:
|
||||
logger.warning(f"Adaptive threshold failed: {e}")
|
||||
return image
|
||||
|
||||
def _otsu_threshold(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Apply Otsu's thresholding for binarization.
|
||||
|
||||
Otsu's method auto-calculates the optimal threshold value,
|
||||
which can work better than adaptive thresholding on evenly-lit images.
|
||||
"""
|
||||
try:
|
||||
_, result = cv2.threshold(
|
||||
image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
|
||||
)
|
||||
return result
|
||||
except cv2.error as e:
|
||||
logger.warning(f"Otsu threshold failed: {e}")
|
||||
return image
|
||||
|
||||
def preprocess_otsu(self, image_bytes: bytes) -> PreprocessingResult:
|
||||
"""
|
||||
Alternative preprocessing pipeline using Otsu's thresholding.
|
||||
|
||||
Used as a fallback when adaptive thresholding doesn't produce
|
||||
good OCR results.
|
||||
"""
|
||||
steps_applied = []
|
||||
|
||||
pil_image = Image.open(io.BytesIO(image_bytes))
|
||||
steps_applied.append("loaded")
|
||||
|
||||
if pil_image.mode not in ("RGB", "L"):
|
||||
pil_image = pil_image.convert("RGB")
|
||||
steps_applied.append("convert_rgb")
|
||||
|
||||
cv_image = np.array(pil_image)
|
||||
if len(cv_image.shape) == 3:
|
||||
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
if len(cv_image.shape) == 3:
|
||||
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
|
||||
else:
|
||||
gray = cv_image
|
||||
steps_applied.append("grayscale")
|
||||
|
||||
gray = self._ensure_minimum_resolution(gray)
|
||||
steps_applied.append("resolution_check")
|
||||
|
||||
gray = self._apply_clahe(gray)
|
||||
steps_applied.append("clahe")
|
||||
|
||||
gray = self._denoise(gray)
|
||||
steps_applied.append("denoise")
|
||||
|
||||
gray = self._otsu_threshold(gray)
|
||||
steps_applied.append("otsu_threshold")
|
||||
|
||||
result_image = Image.fromarray(gray)
|
||||
buffer = io.BytesIO()
|
||||
result_image.save(buffer, format="PNG")
|
||||
|
||||
return PreprocessingResult(
|
||||
image_bytes=buffer.getvalue(),
|
||||
preprocessing_applied=steps_applied,
|
||||
)
|
||||
|
||||
def detect_vin_region(self, image_bytes: bytes) -> Optional[BoundingBox]:
|
||||
"""
|
||||
Attempt to detect the VIN region in an image.
|
||||
|
||||
@@ -20,7 +20,9 @@ class VinValidator:
|
||||
# VIN character set (excludes I, O, Q)
|
||||
VALID_CHARS = set("ABCDEFGHJKLMNPRSTUVWXYZ0123456789")
|
||||
|
||||
# Common OCR misreads and their corrections
|
||||
# Common OCR misreads and their corrections.
|
||||
# Only map characters that are INVALID in VINs to their likely correct values.
|
||||
# B and S are valid VIN characters and must NOT be transliterated.
|
||||
TRANSLITERATION = {
|
||||
"I": "1",
|
||||
"O": "0",
|
||||
@@ -29,9 +31,6 @@ class VinValidator:
|
||||
"o": "0",
|
||||
"q": "0",
|
||||
"l": "1",
|
||||
"L": "1",
|
||||
"B": "8", # Sometimes confused
|
||||
"S": "5", # Sometimes confused
|
||||
}
|
||||
|
||||
# Weights for check digit calculation (positions 1-17)
|
||||
@@ -224,6 +223,11 @@ class VinValidator:
|
||||
"""
|
||||
Extract VIN candidates from raw OCR text.
|
||||
|
||||
Uses two strategies:
|
||||
1. Find continuous 11-20 char alphanumeric runs (handles intact VINs)
|
||||
2. Concatenate adjacent short fragments separated by spaces/dashes
|
||||
(handles Tesseract fragmenting VINs into multiple words)
|
||||
|
||||
Args:
|
||||
text: Raw OCR text
|
||||
max_candidates: Maximum number of candidates to return
|
||||
@@ -231,29 +235,70 @@ class VinValidator:
|
||||
Returns:
|
||||
List of (vin, start_pos, end_pos) tuples
|
||||
"""
|
||||
# Pattern to find potential VIN sequences
|
||||
# Allow some flexibility for OCR errors (include I, O, Q for correction later)
|
||||
potential_vin_pattern = re.compile(r"[A-Z0-9IOQ]{11,17}", re.IGNORECASE)
|
||||
|
||||
candidates = []
|
||||
for match in potential_vin_pattern.finditer(text.upper()):
|
||||
candidate = match.group()
|
||||
corrected = self.correct_ocr_errors(candidate)
|
||||
seen_vins: set[str] = set()
|
||||
|
||||
# Only include if it could be a valid VIN after correction
|
||||
if len(corrected) == 17 and self.MODERN_VIN_PATTERN.match(corrected):
|
||||
candidates.append((corrected, match.start(), match.end()))
|
||||
upper_text = text.upper()
|
||||
|
||||
# Sort by likelihood of being valid (check digit validation)
|
||||
# Strategy 1: Find continuous runs of VIN-like characters
|
||||
continuous_pattern = re.compile(r"[A-Z0-9IOQ]{11,20}", re.IGNORECASE)
|
||||
for match in continuous_pattern.finditer(upper_text):
|
||||
self._try_add_candidate(
|
||||
match.group(), match.start(), match.end(), candidates, seen_vins
|
||||
)
|
||||
|
||||
# Strategy 2: Concatenate adjacent alphanumeric fragments
|
||||
# This handles OCR fragmentation like "1HGBH 41JXMN 109186"
|
||||
# Only consider fragments >= 3 chars (filters out noise/short words)
|
||||
fragment_pattern = re.compile(r"[A-Z0-9IOQ]{3,}", re.IGNORECASE)
|
||||
fragments = [
|
||||
(m.group(), m.start(), m.end())
|
||||
for m in fragment_pattern.finditer(upper_text)
|
||||
]
|
||||
|
||||
# Try sliding windows of 2-4 adjacent fragments
|
||||
for window_size in range(2, min(5, len(fragments) + 1)):
|
||||
for i in range(len(fragments) - window_size + 1):
|
||||
window = fragments[i : i + window_size]
|
||||
combined = "".join(f[0] for f in window)
|
||||
# Combined length must be close to 17 (allow +/- 2 for OCR noise)
|
||||
# Must contain at least 2 digit characters (VINs always have digits;
|
||||
# pure-alphabetic text is almost certainly not a VIN)
|
||||
if 15 <= len(combined) <= 19 and sum(c.isdigit() for c in combined) >= 2:
|
||||
self._try_add_candidate(
|
||||
combined, window[0][1], window[-1][2], candidates, seen_vins
|
||||
)
|
||||
|
||||
# Sort by likelihood of being valid (check digit first, then position)
|
||||
def score_candidate(c: tuple[str, int, int]) -> int:
|
||||
vin = c[0]
|
||||
if self.validate_check_digit(vin):
|
||||
return 0 # Best score
|
||||
return 0
|
||||
return 1
|
||||
|
||||
candidates.sort(key=score_candidate)
|
||||
return candidates[:max_candidates]
|
||||
|
||||
def _try_add_candidate(
|
||||
self,
|
||||
raw: str,
|
||||
start: int,
|
||||
end: int,
|
||||
candidates: list[tuple[str, int, int]],
|
||||
seen_vins: set[str],
|
||||
) -> None:
|
||||
"""Try to add a corrected VIN candidate if it passes validation."""
|
||||
corrected = self.correct_ocr_errors(raw)
|
||||
|
||||
# Trim to 17 chars if OCR captured extra characters
|
||||
if len(corrected) > 17:
|
||||
corrected = corrected[:17]
|
||||
|
||||
if len(corrected) == 17 and self.MODERN_VIN_PATTERN.match(corrected):
|
||||
if corrected not in seen_vins:
|
||||
seen_vins.add(corrected)
|
||||
candidates.append((corrected, start, end))
|
||||
|
||||
|
||||
# Singleton instance
|
||||
vin_validator = VinValidator()
|
||||
|
||||
@@ -53,6 +53,7 @@ class TestVinPreprocessor:
|
||||
)
|
||||
|
||||
assert "grayscale" in result.preprocessing_applied
|
||||
assert "resolution_check" in result.preprocessing_applied
|
||||
assert "clahe" in result.preprocessing_applied
|
||||
assert "deskew" in result.preprocessing_applied
|
||||
assert "denoise" in result.preprocessing_applied
|
||||
@@ -185,6 +186,54 @@ class TestVinPreprocessorThreshold:
|
||||
assert len(unique_values) <= 2
|
||||
|
||||
|
||||
class TestVinPreprocessorOtsu:
|
||||
"""Tests for Otsu's thresholding preprocessing."""
|
||||
|
||||
def test_otsu_threshold_creates_binary_image(self) -> None:
|
||||
"""Test Otsu's thresholding creates binary output."""
|
||||
preprocessor = VinPreprocessor()
|
||||
image = np.full((100, 400), 128, dtype=np.uint8)
|
||||
|
||||
result = preprocessor._otsu_threshold(image)
|
||||
|
||||
unique_values = np.unique(result)
|
||||
assert len(unique_values) <= 2
|
||||
|
||||
def test_preprocess_otsu_returns_result(self) -> None:
|
||||
"""Test Otsu preprocessing pipeline returns valid result."""
|
||||
preprocessor = VinPreprocessor()
|
||||
image_bytes = create_test_image()
|
||||
|
||||
result = preprocessor.preprocess_otsu(image_bytes)
|
||||
|
||||
assert result.image_bytes is not None
|
||||
assert len(result.image_bytes) > 0
|
||||
assert "otsu_threshold" in result.preprocessing_applied
|
||||
assert "grayscale" in result.preprocessing_applied
|
||||
|
||||
|
||||
class TestVinPreprocessorResolution:
|
||||
"""Tests for resolution upscaling."""
|
||||
|
||||
def test_upscale_small_image(self) -> None:
|
||||
"""Test small images are upscaled."""
|
||||
preprocessor = VinPreprocessor()
|
||||
small_image = np.full((50, 200), 128, dtype=np.uint8)
|
||||
|
||||
result = preprocessor._ensure_minimum_resolution(small_image)
|
||||
|
||||
assert result.shape[1] >= preprocessor.MIN_WIDTH_FOR_VIN
|
||||
|
||||
def test_no_upscale_large_image(self) -> None:
|
||||
"""Test large images are not upscaled."""
|
||||
preprocessor = VinPreprocessor()
|
||||
large_image = np.full((200, 800), 128, dtype=np.uint8)
|
||||
|
||||
result = preprocessor._ensure_minimum_resolution(large_image)
|
||||
|
||||
assert result.shape == large_image.shape
|
||||
|
||||
|
||||
class TestVinRegionDetection:
|
||||
"""Tests for VIN region detection."""
|
||||
|
||||
|
||||
@@ -43,9 +43,9 @@ class TestVinValidator:
|
||||
result = validator.calculate_check_digit("1HGBH41JXMN109186")
|
||||
assert result == "X"
|
||||
|
||||
# 5YJSA1E28HF123456 has check digit 2 at position 9
|
||||
# 5YJSA1E28HF123456 has check digit at position 9
|
||||
result = validator.calculate_check_digit("5YJSA1E28HF123456")
|
||||
assert result == "8" # Verify this is correct for this VIN
|
||||
assert result == "5"
|
||||
|
||||
def test_validate_check_digit_valid(self) -> None:
|
||||
"""Test check digit validation with valid VIN."""
|
||||
@@ -161,6 +161,27 @@ class TestVinValidator:
|
||||
assert len(candidates) >= 1
|
||||
assert candidates[0][0] == "1HGBH41JXMN109186"
|
||||
|
||||
def test_extract_candidates_fragmented_vin(self) -> None:
|
||||
"""Test candidate extraction handles space-fragmented VINs from OCR."""
|
||||
validator = VinValidator()
|
||||
|
||||
# Tesseract often fragments VINs into multiple words
|
||||
text = "1HGBH 41JXMN 109186"
|
||||
candidates = validator.extract_candidates(text)
|
||||
|
||||
assert len(candidates) >= 1
|
||||
assert candidates[0][0] == "1HGBH41JXMN109186"
|
||||
|
||||
def test_extract_candidates_dash_fragmented_vin(self) -> None:
|
||||
"""Test candidate extraction handles dash-separated VINs."""
|
||||
validator = VinValidator()
|
||||
|
||||
text = "1HGBH41J-XMN109186"
|
||||
candidates = validator.extract_candidates(text)
|
||||
|
||||
assert len(candidates) >= 1
|
||||
assert candidates[0][0] == "1HGBH41JXMN109186"
|
||||
|
||||
def test_extract_candidates_no_vin(self) -> None:
|
||||
"""Test candidate extraction with no VIN."""
|
||||
validator = VinValidator()
|
||||
|
||||
Reference in New Issue
Block a user