fix: add debug image saving gated on LOG_LEVEL=debug (refs #113)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 36s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 21s
Deploy to Staging / Verify Staging (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 7s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped

Save original, adaptive, and Otsu preprocessed images to
/tmp/vin-debug/{timestamp}/ when LOG_LEVEL is set to debug.
No images saved at info level. Volume mount added for access.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-06 20:26:06 -06:00
parent 488a267fc7
commit ff3858f750
2 changed files with 41 additions and 0 deletions

View File

@@ -1,8 +1,10 @@
"""VIN-specific OCR extractor with preprocessing and validation."""
import io
import logging
import os
import time
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
import magic
@@ -57,9 +59,31 @@ class VinExtractor(BaseExtractor):
# VIN character whitelist for Tesseract
VIN_WHITELIST = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
# Fixed debug output directory (inside container)
DEBUG_DIR = "/tmp/vin-debug"
def __init__(self) -> None:
"""Initialize VIN extractor."""
pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd
self._debug = settings.log_level.upper() == "DEBUG"
def _save_debug_image(self, session_dir: str, name: str, data: bytes) -> None:
"""Save image bytes to the debug session directory when LOG_LEVEL=debug."""
if not self._debug:
return
path = os.path.join(session_dir, name)
with open(path, "wb") as f:
f.write(data)
logger.debug("Saved debug image: %s (%d bytes)", name, len(data))
def _create_debug_session(self) -> Optional[str]:
"""Create a timestamped debug directory. Returns path or None."""
if not self._debug:
return None
ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
session_dir = os.path.join(self.DEBUG_DIR, ts)
os.makedirs(session_dir, exist_ok=True)
return session_dir
def extract(
self, image_bytes: bytes, content_type: Optional[str] = None
@@ -89,10 +113,14 @@ class VinExtractor(BaseExtractor):
)
try:
debug_session = self._create_debug_session()
logger.debug(
"VIN extraction input: %d bytes, content_type=%s",
len(image_bytes), content_type,
)
if debug_session:
self._save_debug_image(debug_session, "01_original.jpg", image_bytes)
# Apply VIN-optimized preprocessing
preprocessing_result = vin_preprocessor.preprocess(image_bytes)
@@ -100,6 +128,10 @@ class VinExtractor(BaseExtractor):
logger.debug(
"Preprocessing steps: %s", preprocessing_result.preprocessing_applied
)
if debug_session:
self._save_debug_image(
debug_session, "02_preprocessed_adaptive.png", preprocessed_bytes
)
# Perform OCR with VIN-optimized settings
raw_text, word_confidences = self._perform_ocr(preprocessed_bytes)
@@ -121,6 +153,11 @@ class VinExtractor(BaseExtractor):
"Otsu preprocessing steps: %s",
otsu_result.preprocessing_applied,
)
if debug_session:
self._save_debug_image(
debug_session, "03_preprocessed_otsu.png",
otsu_result.image_bytes,
)
raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes)
logger.debug("Otsu PSM 6 raw text: '%s'", raw_text)