fix: add debug image saving gated on LOG_LEVEL=debug (refs #113)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 36s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 21s
Deploy to Staging / Verify Staging (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 7s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped

Save original, adaptive, and Otsu preprocessed images to
/tmp/vin-debug/{timestamp}/ when LOG_LEVEL is set to debug.
No images saved at info level. Volume mount added for access.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-06 20:26:06 -06:00
parent 488a267fc7
commit ff3858f750
2 changed files with 41 additions and 0 deletions

View File

@@ -191,6 +191,8 @@ services:
REDIS_HOST: mvp-redis REDIS_HOST: mvp-redis
REDIS_PORT: 6379 REDIS_PORT: 6379
REDIS_DB: 1 REDIS_DB: 1
volumes:
- vin-debug:/tmp/vin-debug
networks: networks:
- backend - backend
- database - database
@@ -396,3 +398,5 @@ volumes:
name: mvp_loki_data name: mvp_loki_data
mvp_grafana_data: mvp_grafana_data:
name: mvp_grafana_data name: mvp_grafana_data
vin-debug:
name: mvp_vin_debug

View File

@@ -1,8 +1,10 @@
"""VIN-specific OCR extractor with preprocessing and validation.""" """VIN-specific OCR extractor with preprocessing and validation."""
import io import io
import logging import logging
import os
import time import time
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional from typing import Optional
import magic import magic
@@ -57,9 +59,31 @@ class VinExtractor(BaseExtractor):
# VIN character whitelist for Tesseract # VIN character whitelist for Tesseract
VIN_WHITELIST = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789" VIN_WHITELIST = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
# Fixed debug output directory (inside container)
DEBUG_DIR = "/tmp/vin-debug"
def __init__(self) -> None: def __init__(self) -> None:
"""Initialize VIN extractor.""" """Initialize VIN extractor."""
pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd
self._debug = settings.log_level.upper() == "DEBUG"
def _save_debug_image(self, session_dir: str, name: str, data: bytes) -> None:
"""Save image bytes to the debug session directory when LOG_LEVEL=debug."""
if not self._debug:
return
path = os.path.join(session_dir, name)
with open(path, "wb") as f:
f.write(data)
logger.debug("Saved debug image: %s (%d bytes)", name, len(data))
def _create_debug_session(self) -> Optional[str]:
"""Create a timestamped debug directory. Returns path or None."""
if not self._debug:
return None
ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
session_dir = os.path.join(self.DEBUG_DIR, ts)
os.makedirs(session_dir, exist_ok=True)
return session_dir
def extract( def extract(
self, image_bytes: bytes, content_type: Optional[str] = None self, image_bytes: bytes, content_type: Optional[str] = None
@@ -89,10 +113,14 @@ class VinExtractor(BaseExtractor):
) )
try: try:
debug_session = self._create_debug_session()
logger.debug( logger.debug(
"VIN extraction input: %d bytes, content_type=%s", "VIN extraction input: %d bytes, content_type=%s",
len(image_bytes), content_type, len(image_bytes), content_type,
) )
if debug_session:
self._save_debug_image(debug_session, "01_original.jpg", image_bytes)
# Apply VIN-optimized preprocessing # Apply VIN-optimized preprocessing
preprocessing_result = vin_preprocessor.preprocess(image_bytes) preprocessing_result = vin_preprocessor.preprocess(image_bytes)
@@ -100,6 +128,10 @@ class VinExtractor(BaseExtractor):
logger.debug( logger.debug(
"Preprocessing steps: %s", preprocessing_result.preprocessing_applied "Preprocessing steps: %s", preprocessing_result.preprocessing_applied
) )
if debug_session:
self._save_debug_image(
debug_session, "02_preprocessed_adaptive.png", preprocessed_bytes
)
# Perform OCR with VIN-optimized settings # Perform OCR with VIN-optimized settings
raw_text, word_confidences = self._perform_ocr(preprocessed_bytes) raw_text, word_confidences = self._perform_ocr(preprocessed_bytes)
@@ -121,6 +153,11 @@ class VinExtractor(BaseExtractor):
"Otsu preprocessing steps: %s", "Otsu preprocessing steps: %s",
otsu_result.preprocessing_applied, otsu_result.preprocessing_applied,
) )
if debug_session:
self._save_debug_image(
debug_session, "03_preprocessed_otsu.png",
otsu_result.image_bytes,
)
raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes) raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes)
logger.debug("Otsu PSM 6 raw text: '%s'", raw_text) logger.debug("Otsu PSM 6 raw text: '%s'", raw_text)