From d5696320f14f69ff245b79a9e2070fe767d4b3a1 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 19:36:35 -0600 Subject: [PATCH] fix: align VIN OCR logging with unified logging design (refs #113) Replace filesystem-based debug system (VIN_DEBUG_DIR) with standard logger.debug() calls that flow through Loki when LOG_LEVEL=DEBUG. Use .env.logging variable for OCR LOG_LEVEL. Increase image capture quality to 0.95 for better OCR accuracy. Co-Authored-By: Claude Opus 4.6 --- docker-compose.yml | 2 +- .../components/CameraCapture/useImageCrop.ts | 2 +- ocr/app/extractors/vin_extractor.py | 42 ++++++++++++++++--- 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 0d83e30..a444bf9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -187,7 +187,7 @@ services: container_name: mvp-ocr restart: unless-stopped environment: - LOG_LEVEL: info + LOG_LEVEL: ${BACKEND_LOG_LEVEL:-info} REDIS_HOST: mvp-redis REDIS_PORT: 6379 REDIS_DB: 1 diff --git a/frontend/src/shared/components/CameraCapture/useImageCrop.ts b/frontend/src/shared/components/CameraCapture/useImageCrop.ts index 58d69d4..4b17108 100644 --- a/frontend/src/shared/components/CameraCapture/useImageCrop.ts +++ b/frontend/src/shared/components/CameraCapture/useImageCrop.ts @@ -304,7 +304,7 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet } }, mimeType, - 0.92 + 0.95 ); }; diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py index ef1cb67..1b310f2 100644 --- a/ocr/app/extractors/vin_extractor.py +++ b/ocr/app/extractors/vin_extractor.py @@ -89,15 +89,26 @@ class VinExtractor(BaseExtractor): ) try: + logger.debug( + "VIN extraction input: %d bytes, content_type=%s", + len(image_bytes), content_type, + ) + # Apply VIN-optimized preprocessing preprocessing_result = vin_preprocessor.preprocess(image_bytes) preprocessed_bytes = preprocessing_result.image_bytes + logger.debug( + "Preprocessing steps: %s", preprocessing_result.preprocessing_applied + ) # Perform OCR with VIN-optimized settings raw_text, word_confidences = self._perform_ocr(preprocessed_bytes) + logger.debug("PSM 6 raw text: '%s'", raw_text) + logger.debug("PSM 6 word confidences: %s", word_confidences) # Extract VIN candidates from raw text candidates = vin_validator.extract_candidates(raw_text) + logger.debug("PSM 6 candidates: %s", candidates) if not candidates: # No VIN candidates found - try with different PSM modes @@ -106,12 +117,22 @@ class VinExtractor(BaseExtractor): if not candidates: # Try alternative preprocessing (Otsu's thresholding) otsu_result = vin_preprocessor.preprocess_otsu(image_bytes) + logger.debug( + "Otsu preprocessing steps: %s", + otsu_result.preprocessing_applied, + ) + raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes) + logger.debug("Otsu PSM 6 raw text: '%s'", raw_text) candidates = vin_validator.extract_candidates(raw_text) + logger.debug("Otsu PSM 6 candidates: %s", candidates) if not candidates: - candidates = self._try_alternate_ocr(otsu_result.image_bytes) + candidates = self._try_alternate_ocr( + otsu_result.image_bytes, prefix="Otsu" + ) if not candidates: + logger.debug("No VIN pattern found in any OCR attempt") return VinExtractionResult( success=False, error="No VIN pattern found in image", @@ -161,8 +182,12 @@ class VinExtractor(BaseExtractor): processing_time_ms = int((time.time() - start_time) * 1000) logger.info( - f"VIN extraction: {primary_vin}, confidence={primary_confidence:.2%}, " - f"time={processing_time_ms}ms" + "VIN extraction: %s, confidence=%.2f%%, time=%dms", + primary_vin, primary_confidence * 100, processing_time_ms, + ) + logger.debug( + "VIN alternatives: %s", + [(a.vin, a.confidence) for a in alternatives], ) return VinExtractionResult( @@ -176,7 +201,7 @@ class VinExtractor(BaseExtractor): ) except Exception as e: - logger.error(f"VIN extraction failed: {e}", exc_info=True) + logger.error("VIN extraction failed: %s", e, exc_info=True) return VinExtractionResult( success=False, error=str(e), @@ -236,7 +261,11 @@ class VinExtractor(BaseExtractor): raw_text = " ".join(words) return raw_text, confidences - def _try_alternate_ocr(self, image_bytes: bytes) -> list[tuple[str, int, int]]: + def _try_alternate_ocr( + self, + image_bytes: bytes, + prefix: str = "", + ) -> list[tuple[str, int, int]]: """ Try alternate OCR configurations when initial extraction fails. @@ -249,9 +278,12 @@ class VinExtractor(BaseExtractor): Returns: List of VIN candidates """ + tag = f"{prefix} " if prefix else "" for psm in (7, 8, 11, 13): raw_text, _ = self._perform_ocr(image_bytes, psm=psm) + logger.debug("%sPSM %d raw text: '%s'", tag, psm, raw_text) candidates = vin_validator.extract_candidates(raw_text) + logger.debug("%sPSM %d candidates: %s", tag, psm, candidates) if candidates: return candidates