fix: add debug image saving gated on LOG_LEVEL=debug (refs #113)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 36s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 21s
Deploy to Staging / Verify Staging (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 7s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 36s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 21s
Deploy to Staging / Verify Staging (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 7s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
Save original, adaptive, and Otsu preprocessed images to
/tmp/vin-debug/{timestamp}/ when LOG_LEVEL is set to debug.
No images saved at info level. Volume mount added for access.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -191,6 +191,8 @@ services:
|
|||||||
REDIS_HOST: mvp-redis
|
REDIS_HOST: mvp-redis
|
||||||
REDIS_PORT: 6379
|
REDIS_PORT: 6379
|
||||||
REDIS_DB: 1
|
REDIS_DB: 1
|
||||||
|
volumes:
|
||||||
|
- vin-debug:/tmp/vin-debug
|
||||||
networks:
|
networks:
|
||||||
- backend
|
- backend
|
||||||
- database
|
- database
|
||||||
@@ -396,3 +398,5 @@ volumes:
|
|||||||
name: mvp_loki_data
|
name: mvp_loki_data
|
||||||
mvp_grafana_data:
|
mvp_grafana_data:
|
||||||
name: mvp_grafana_data
|
name: mvp_grafana_data
|
||||||
|
vin-debug:
|
||||||
|
name: mvp_vin_debug
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
"""VIN-specific OCR extractor with preprocessing and validation."""
|
"""VIN-specific OCR extractor with preprocessing and validation."""
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import magic
|
import magic
|
||||||
@@ -57,9 +59,31 @@ class VinExtractor(BaseExtractor):
|
|||||||
# VIN character whitelist for Tesseract
|
# VIN character whitelist for Tesseract
|
||||||
VIN_WHITELIST = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
|
VIN_WHITELIST = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
|
||||||
|
|
||||||
|
# Fixed debug output directory (inside container)
|
||||||
|
DEBUG_DIR = "/tmp/vin-debug"
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
"""Initialize VIN extractor."""
|
"""Initialize VIN extractor."""
|
||||||
pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd
|
pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd
|
||||||
|
self._debug = settings.log_level.upper() == "DEBUG"
|
||||||
|
|
||||||
|
def _save_debug_image(self, session_dir: str, name: str, data: bytes) -> None:
|
||||||
|
"""Save image bytes to the debug session directory when LOG_LEVEL=debug."""
|
||||||
|
if not self._debug:
|
||||||
|
return
|
||||||
|
path = os.path.join(session_dir, name)
|
||||||
|
with open(path, "wb") as f:
|
||||||
|
f.write(data)
|
||||||
|
logger.debug("Saved debug image: %s (%d bytes)", name, len(data))
|
||||||
|
|
||||||
|
def _create_debug_session(self) -> Optional[str]:
|
||||||
|
"""Create a timestamped debug directory. Returns path or None."""
|
||||||
|
if not self._debug:
|
||||||
|
return None
|
||||||
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||||
|
session_dir = os.path.join(self.DEBUG_DIR, ts)
|
||||||
|
os.makedirs(session_dir, exist_ok=True)
|
||||||
|
return session_dir
|
||||||
|
|
||||||
def extract(
|
def extract(
|
||||||
self, image_bytes: bytes, content_type: Optional[str] = None
|
self, image_bytes: bytes, content_type: Optional[str] = None
|
||||||
@@ -89,10 +113,14 @@ class VinExtractor(BaseExtractor):
|
|||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
debug_session = self._create_debug_session()
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"VIN extraction input: %d bytes, content_type=%s",
|
"VIN extraction input: %d bytes, content_type=%s",
|
||||||
len(image_bytes), content_type,
|
len(image_bytes), content_type,
|
||||||
)
|
)
|
||||||
|
if debug_session:
|
||||||
|
self._save_debug_image(debug_session, "01_original.jpg", image_bytes)
|
||||||
|
|
||||||
# Apply VIN-optimized preprocessing
|
# Apply VIN-optimized preprocessing
|
||||||
preprocessing_result = vin_preprocessor.preprocess(image_bytes)
|
preprocessing_result = vin_preprocessor.preprocess(image_bytes)
|
||||||
@@ -100,6 +128,10 @@ class VinExtractor(BaseExtractor):
|
|||||||
logger.debug(
|
logger.debug(
|
||||||
"Preprocessing steps: %s", preprocessing_result.preprocessing_applied
|
"Preprocessing steps: %s", preprocessing_result.preprocessing_applied
|
||||||
)
|
)
|
||||||
|
if debug_session:
|
||||||
|
self._save_debug_image(
|
||||||
|
debug_session, "02_preprocessed_adaptive.png", preprocessed_bytes
|
||||||
|
)
|
||||||
|
|
||||||
# Perform OCR with VIN-optimized settings
|
# Perform OCR with VIN-optimized settings
|
||||||
raw_text, word_confidences = self._perform_ocr(preprocessed_bytes)
|
raw_text, word_confidences = self._perform_ocr(preprocessed_bytes)
|
||||||
@@ -121,6 +153,11 @@ class VinExtractor(BaseExtractor):
|
|||||||
"Otsu preprocessing steps: %s",
|
"Otsu preprocessing steps: %s",
|
||||||
otsu_result.preprocessing_applied,
|
otsu_result.preprocessing_applied,
|
||||||
)
|
)
|
||||||
|
if debug_session:
|
||||||
|
self._save_debug_image(
|
||||||
|
debug_session, "03_preprocessed_otsu.png",
|
||||||
|
otsu_result.image_bytes,
|
||||||
|
)
|
||||||
|
|
||||||
raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes)
|
raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes)
|
||||||
logger.debug("Otsu PSM 6 raw text: '%s'", raw_text)
|
logger.debug("Otsu PSM 6 raw text: '%s'", raw_text)
|
||||||
|
|||||||
Reference in New Issue
Block a user