fix: VIN OCR scanning fails with "No VIN Pattern found" on all images (#113) #114

Merged
egullickson merged 15 commits from issue-113-fix-vin-ocr-scanning into main 2026-02-07 15:47:37 +00:00
Showing only changes of commit e4336ce9da - Show all commits

View File

@@ -290,14 +290,48 @@ class VinValidator:
"""Try to add a corrected VIN candidate if it passes validation.""" """Try to add a corrected VIN candidate if it passes validation."""
corrected = self.correct_ocr_errors(raw) corrected = self.correct_ocr_errors(raw)
# Trim to 17 chars if OCR captured extra characters if len(corrected) == 17:
if len(corrected) > 17: self._add_if_valid(corrected, start, end, candidates, seen_vins)
corrected = corrected[:17] return
if len(corrected) == 17 and self.MODERN_VIN_PATTERN.match(corrected): if len(corrected) > 17:
if corrected not in seen_vins: # Strategy A: try every 17-char sliding window
seen_vins.add(corrected) for i in range(len(corrected) - 16):
candidates.append((corrected, start, end)) window = corrected[i : i + 17]
self._add_if_valid(window, start, end, candidates, seen_vins)
# Strategy B: for 18-19 char strings, try deleting each
# character one at a time. OCR often inserts a spurious
# character (e.g. sticker border read as 'C') that breaks
# the VIN. Check-digit validation filters out false hits.
if len(corrected) <= 19:
for i in range(len(corrected)):
reduced = corrected[:i] + corrected[i + 1 :]
if len(reduced) == 17:
self._add_if_valid(
reduced, start, end, candidates, seen_vins
)
elif len(reduced) == 18:
# Two deletions needed — try removing one more
for j in range(len(reduced)):
reduced2 = reduced[:j] + reduced[j + 1 :]
self._add_if_valid(
reduced2, start, end, candidates, seen_vins
)
def _add_if_valid(
self,
vin: str,
start: int,
end: int,
candidates: list[tuple[str, int, int]],
seen_vins: set[str],
) -> None:
"""Add a 17-char VIN to candidates if it matches the pattern."""
if len(vin) == 17 and self.MODERN_VIN_PATTERN.match(vin):
if vin not in seen_vins:
seen_vins.add(vin)
candidates.append((vin, start, end))
# Singleton instance # Singleton instance