fix: VIN OCR scanning fails with "No VIN Pattern found" on all images (#113) #114

Merged
egullickson merged 15 commits from issue-113-fix-vin-ocr-scanning into main 2026-02-07 15:47:37 +00:00
2 changed files with 41 additions and 0 deletions
Showing only changes of commit ff3858f750 - Show all commits

View File

@@ -191,6 +191,8 @@ services:
REDIS_HOST: mvp-redis REDIS_HOST: mvp-redis
REDIS_PORT: 6379 REDIS_PORT: 6379
REDIS_DB: 1 REDIS_DB: 1
volumes:
- vin-debug:/tmp/vin-debug
networks: networks:
- backend - backend
- database - database
@@ -396,3 +398,5 @@ volumes:
name: mvp_loki_data name: mvp_loki_data
mvp_grafana_data: mvp_grafana_data:
name: mvp_grafana_data name: mvp_grafana_data
vin-debug:
name: mvp_vin_debug

View File

@@ -1,8 +1,10 @@
"""VIN-specific OCR extractor with preprocessing and validation.""" """VIN-specific OCR extractor with preprocessing and validation."""
import io import io
import logging import logging
import os
import time import time
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional from typing import Optional
import magic import magic
@@ -57,9 +59,31 @@ class VinExtractor(BaseExtractor):
# VIN character whitelist for Tesseract # VIN character whitelist for Tesseract
VIN_WHITELIST = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789" VIN_WHITELIST = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
# Fixed debug output directory (inside container)
DEBUG_DIR = "/tmp/vin-debug"
def __init__(self) -> None: def __init__(self) -> None:
"""Initialize VIN extractor.""" """Initialize VIN extractor."""
pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd
self._debug = settings.log_level.upper() == "DEBUG"
def _save_debug_image(self, session_dir: str, name: str, data: bytes) -> None:
"""Save image bytes to the debug session directory when LOG_LEVEL=debug."""
if not self._debug:
return
path = os.path.join(session_dir, name)
with open(path, "wb") as f:
f.write(data)
logger.debug("Saved debug image: %s (%d bytes)", name, len(data))
def _create_debug_session(self) -> Optional[str]:
"""Create a timestamped debug directory. Returns path or None."""
if not self._debug:
return None
ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
session_dir = os.path.join(self.DEBUG_DIR, ts)
os.makedirs(session_dir, exist_ok=True)
return session_dir
def extract( def extract(
self, image_bytes: bytes, content_type: Optional[str] = None self, image_bytes: bytes, content_type: Optional[str] = None
@@ -89,10 +113,14 @@ class VinExtractor(BaseExtractor):
) )
try: try:
debug_session = self._create_debug_session()
logger.debug( logger.debug(
"VIN extraction input: %d bytes, content_type=%s", "VIN extraction input: %d bytes, content_type=%s",
len(image_bytes), content_type, len(image_bytes), content_type,
) )
if debug_session:
self._save_debug_image(debug_session, "01_original.jpg", image_bytes)
# Apply VIN-optimized preprocessing # Apply VIN-optimized preprocessing
preprocessing_result = vin_preprocessor.preprocess(image_bytes) preprocessing_result = vin_preprocessor.preprocess(image_bytes)
@@ -100,6 +128,10 @@ class VinExtractor(BaseExtractor):
logger.debug( logger.debug(
"Preprocessing steps: %s", preprocessing_result.preprocessing_applied "Preprocessing steps: %s", preprocessing_result.preprocessing_applied
) )
if debug_session:
self._save_debug_image(
debug_session, "02_preprocessed_adaptive.png", preprocessed_bytes
)
# Perform OCR with VIN-optimized settings # Perform OCR with VIN-optimized settings
raw_text, word_confidences = self._perform_ocr(preprocessed_bytes) raw_text, word_confidences = self._perform_ocr(preprocessed_bytes)
@@ -121,6 +153,11 @@ class VinExtractor(BaseExtractor):
"Otsu preprocessing steps: %s", "Otsu preprocessing steps: %s",
otsu_result.preprocessing_applied, otsu_result.preprocessing_applied,
) )
if debug_session:
self._save_debug_image(
debug_session, "03_preprocessed_otsu.png",
otsu_result.image_bytes,
)
raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes) raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes)
logger.debug("Otsu PSM 6 raw text: '%s'", raw_text) logger.debug("Otsu PSM 6 raw text: '%s'", raw_text)