From 45aaeab9739ac464d78d93f2f7133d3a9db9ec96 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 15:48:45 -0600 Subject: [PATCH 01/15] chore: update context.json --- .ai/context.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.ai/context.json b/.ai/context.json index 736a130..1e2c4bd 100644 --- a/.ai/context.json +++ b/.ai/context.json @@ -5,7 +5,8 @@ "host": "gitea", "owner": "egullickson", "repo": "motovaultpro", - "url": "https://git.motovaultpro.com" + "url": "https://git.motovaultpro.com", + "default_branch": "main" }, "ai_quick_start": { "load_order": [ -- 2.49.1 From 6a4c2137f7e7edf6a345abc55de5ec9be3df0bca Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 15:57:14 -0600 Subject: [PATCH 02/15] fix: resolve VIN OCR scanning failures on all images (refs #113) Root cause: Tesseract fragments VINs into multiple words but candidate extraction required continuous 17-char sequences, rejecting all results. Changes: - Fix candidate extraction to concatenate adjacent OCR fragments - Disable Tesseract dictionaries (VINs are not dictionary words) - Set OEM 1 (LSTM engine) for better accuracy - Add PSM 11 (sparse text) and PSM 13 (raw line) fallback modes - Add Otsu's thresholding as alternative preprocessing pipeline - Upscale small images to meet Tesseract's 300 DPI requirement - Remove incorrect B->8 and S->5 transliterations (valid VIN chars) - Fix pre-existing test bug in check digit expected value Co-Authored-By: Claude Opus 4.6 --- ocr/app/extractors/vin_extractor.py | 38 ++++++---- ocr/app/preprocessors/vin_preprocessor.py | 90 +++++++++++++++++++++++ ocr/app/validators/vin_validator.py | 77 +++++++++++++++---- ocr/tests/test_vin_preprocessor.py | 49 ++++++++++++ ocr/tests/test_vin_validator.py | 25 ++++++- 5 files changed, 248 insertions(+), 31 deletions(-) diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py index 37fdad1..ef1cb67 100644 --- a/ocr/app/extractors/vin_extractor.py +++ b/ocr/app/extractors/vin_extractor.py @@ -103,6 +103,14 @@ class VinExtractor(BaseExtractor): # No VIN candidates found - try with different PSM modes candidates = self._try_alternate_ocr(preprocessed_bytes) + if not candidates: + # Try alternative preprocessing (Otsu's thresholding) + otsu_result = vin_preprocessor.preprocess_otsu(image_bytes) + raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes) + candidates = vin_validator.extract_candidates(raw_text) + if not candidates: + candidates = self._try_alternate_ocr(otsu_result.image_bytes) + if not candidates: return VinExtractionResult( success=False, @@ -200,10 +208,14 @@ class VinExtractor(BaseExtractor): image = Image.open(io.BytesIO(image_bytes)) # Configure Tesseract for VIN extraction - # Use character whitelist to exclude I, O, Q + # OEM 1 = LSTM neural network engine (best accuracy) + # Disable dictionaries since VINs are not dictionary words config = ( f"--psm {psm} " - f"-c tessedit_char_whitelist={self.VIN_WHITELIST}" + f"--oem 1 " + f"-c tessedit_char_whitelist={self.VIN_WHITELIST} " + f"-c load_system_dawg=false " + f"-c load_freq_dawg=false" ) # Get detailed OCR data @@ -228,20 +240,20 @@ class VinExtractor(BaseExtractor): """ Try alternate OCR configurations when initial extraction fails. + PSM modes tried in order: + 7 - Single text line + 8 - Single word + 11 - Sparse text (finds text in any order, good for angled photos) + 13 - Raw line (no Tesseract heuristics, good for clean VIN plates) + Returns: List of VIN candidates """ - # Try PSM 7 (single text line) - raw_text, _ = self._perform_ocr(image_bytes, psm=7) - candidates = vin_validator.extract_candidates(raw_text) - if candidates: - return candidates - - # Try PSM 8 (single word) - raw_text, _ = self._perform_ocr(image_bytes, psm=8) - candidates = vin_validator.extract_candidates(raw_text) - if candidates: - return candidates + for psm in (7, 8, 11, 13): + raw_text, _ = self._perform_ocr(image_bytes, psm=psm) + candidates = vin_validator.extract_candidates(raw_text) + if candidates: + return candidates return [] diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py index e0ffbba..95ba4bc 100644 --- a/ocr/app/preprocessors/vin_preprocessor.py +++ b/ocr/app/preprocessors/vin_preprocessor.py @@ -93,6 +93,10 @@ class VinPreprocessor: gray = cv_image steps_applied.append("grayscale") + # Upscale small images for better OCR (Tesseract needs ~300 DPI) + gray = self._ensure_minimum_resolution(gray) + steps_applied.append("resolution_check") + # Apply deskew if apply_deskew: gray = self._deskew(gray) @@ -123,6 +127,29 @@ class VinPreprocessor: preprocessing_applied=steps_applied, ) + # Minimum width in pixels for reliable VIN OCR. + # A 17-char VIN needs ~30px per character for Tesseract accuracy. + MIN_WIDTH_FOR_VIN = 600 + + def _ensure_minimum_resolution(self, image: np.ndarray) -> np.ndarray: + """ + Upscale image if too small for reliable OCR. + + Tesseract works best at ~300 DPI. Mobile photos of VINs may have + the text occupy only a small portion of the frame, resulting in + low effective resolution for the VIN characters. + """ + height, width = image.shape[:2] + if width < self.MIN_WIDTH_FOR_VIN: + scale = self.MIN_WIDTH_FOR_VIN / width + new_width = int(width * scale) + new_height = int(height * scale) + image = cv2.resize( + image, (new_width, new_height), interpolation=cv2.INTER_CUBIC + ) + logger.debug(f"Upscaled image from {width}x{height} to {new_width}x{new_height}") + return image + def _apply_clahe(self, image: np.ndarray) -> np.ndarray: """ Apply CLAHE (Contrast Limited Adaptive Histogram Equalization). @@ -242,6 +269,69 @@ class VinPreprocessor: logger.warning(f"Adaptive threshold failed: {e}") return image + def _otsu_threshold(self, image: np.ndarray) -> np.ndarray: + """ + Apply Otsu's thresholding for binarization. + + Otsu's method auto-calculates the optimal threshold value, + which can work better than adaptive thresholding on evenly-lit images. + """ + try: + _, result = cv2.threshold( + image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU + ) + return result + except cv2.error as e: + logger.warning(f"Otsu threshold failed: {e}") + return image + + def preprocess_otsu(self, image_bytes: bytes) -> PreprocessingResult: + """ + Alternative preprocessing pipeline using Otsu's thresholding. + + Used as a fallback when adaptive thresholding doesn't produce + good OCR results. + """ + steps_applied = [] + + pil_image = Image.open(io.BytesIO(image_bytes)) + steps_applied.append("loaded") + + if pil_image.mode not in ("RGB", "L"): + pil_image = pil_image.convert("RGB") + steps_applied.append("convert_rgb") + + cv_image = np.array(pil_image) + if len(cv_image.shape) == 3: + cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) + + if len(cv_image.shape) == 3: + gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) + else: + gray = cv_image + steps_applied.append("grayscale") + + gray = self._ensure_minimum_resolution(gray) + steps_applied.append("resolution_check") + + gray = self._apply_clahe(gray) + steps_applied.append("clahe") + + gray = self._denoise(gray) + steps_applied.append("denoise") + + gray = self._otsu_threshold(gray) + steps_applied.append("otsu_threshold") + + result_image = Image.fromarray(gray) + buffer = io.BytesIO() + result_image.save(buffer, format="PNG") + + return PreprocessingResult( + image_bytes=buffer.getvalue(), + preprocessing_applied=steps_applied, + ) + def detect_vin_region(self, image_bytes: bytes) -> Optional[BoundingBox]: """ Attempt to detect the VIN region in an image. diff --git a/ocr/app/validators/vin_validator.py b/ocr/app/validators/vin_validator.py index 6a4b264..7c74ae9 100644 --- a/ocr/app/validators/vin_validator.py +++ b/ocr/app/validators/vin_validator.py @@ -20,7 +20,9 @@ class VinValidator: # VIN character set (excludes I, O, Q) VALID_CHARS = set("ABCDEFGHJKLMNPRSTUVWXYZ0123456789") - # Common OCR misreads and their corrections + # Common OCR misreads and their corrections. + # Only map characters that are INVALID in VINs to their likely correct values. + # B and S are valid VIN characters and must NOT be transliterated. TRANSLITERATION = { "I": "1", "O": "0", @@ -29,9 +31,6 @@ class VinValidator: "o": "0", "q": "0", "l": "1", - "L": "1", - "B": "8", # Sometimes confused - "S": "5", # Sometimes confused } # Weights for check digit calculation (positions 1-17) @@ -224,6 +223,11 @@ class VinValidator: """ Extract VIN candidates from raw OCR text. + Uses two strategies: + 1. Find continuous 11-20 char alphanumeric runs (handles intact VINs) + 2. Concatenate adjacent short fragments separated by spaces/dashes + (handles Tesseract fragmenting VINs into multiple words) + Args: text: Raw OCR text max_candidates: Maximum number of candidates to return @@ -231,29 +235,70 @@ class VinValidator: Returns: List of (vin, start_pos, end_pos) tuples """ - # Pattern to find potential VIN sequences - # Allow some flexibility for OCR errors (include I, O, Q for correction later) - potential_vin_pattern = re.compile(r"[A-Z0-9IOQ]{11,17}", re.IGNORECASE) - candidates = [] - for match in potential_vin_pattern.finditer(text.upper()): - candidate = match.group() - corrected = self.correct_ocr_errors(candidate) + seen_vins: set[str] = set() - # Only include if it could be a valid VIN after correction - if len(corrected) == 17 and self.MODERN_VIN_PATTERN.match(corrected): - candidates.append((corrected, match.start(), match.end())) + upper_text = text.upper() - # Sort by likelihood of being valid (check digit validation) + # Strategy 1: Find continuous runs of VIN-like characters + continuous_pattern = re.compile(r"[A-Z0-9IOQ]{11,20}", re.IGNORECASE) + for match in continuous_pattern.finditer(upper_text): + self._try_add_candidate( + match.group(), match.start(), match.end(), candidates, seen_vins + ) + + # Strategy 2: Concatenate adjacent alphanumeric fragments + # This handles OCR fragmentation like "1HGBH 41JXMN 109186" + # Only consider fragments >= 3 chars (filters out noise/short words) + fragment_pattern = re.compile(r"[A-Z0-9IOQ]{3,}", re.IGNORECASE) + fragments = [ + (m.group(), m.start(), m.end()) + for m in fragment_pattern.finditer(upper_text) + ] + + # Try sliding windows of 2-4 adjacent fragments + for window_size in range(2, min(5, len(fragments) + 1)): + for i in range(len(fragments) - window_size + 1): + window = fragments[i : i + window_size] + combined = "".join(f[0] for f in window) + # Combined length must be close to 17 (allow +/- 2 for OCR noise) + # Must contain at least 2 digit characters (VINs always have digits; + # pure-alphabetic text is almost certainly not a VIN) + if 15 <= len(combined) <= 19 and sum(c.isdigit() for c in combined) >= 2: + self._try_add_candidate( + combined, window[0][1], window[-1][2], candidates, seen_vins + ) + + # Sort by likelihood of being valid (check digit first, then position) def score_candidate(c: tuple[str, int, int]) -> int: vin = c[0] if self.validate_check_digit(vin): - return 0 # Best score + return 0 return 1 candidates.sort(key=score_candidate) return candidates[:max_candidates] + def _try_add_candidate( + self, + raw: str, + start: int, + end: int, + candidates: list[tuple[str, int, int]], + seen_vins: set[str], + ) -> None: + """Try to add a corrected VIN candidate if it passes validation.""" + corrected = self.correct_ocr_errors(raw) + + # Trim to 17 chars if OCR captured extra characters + if len(corrected) > 17: + corrected = corrected[:17] + + if len(corrected) == 17 and self.MODERN_VIN_PATTERN.match(corrected): + if corrected not in seen_vins: + seen_vins.add(corrected) + candidates.append((corrected, start, end)) + # Singleton instance vin_validator = VinValidator() diff --git a/ocr/tests/test_vin_preprocessor.py b/ocr/tests/test_vin_preprocessor.py index 8076294..2d81a7b 100644 --- a/ocr/tests/test_vin_preprocessor.py +++ b/ocr/tests/test_vin_preprocessor.py @@ -53,6 +53,7 @@ class TestVinPreprocessor: ) assert "grayscale" in result.preprocessing_applied + assert "resolution_check" in result.preprocessing_applied assert "clahe" in result.preprocessing_applied assert "deskew" in result.preprocessing_applied assert "denoise" in result.preprocessing_applied @@ -185,6 +186,54 @@ class TestVinPreprocessorThreshold: assert len(unique_values) <= 2 +class TestVinPreprocessorOtsu: + """Tests for Otsu's thresholding preprocessing.""" + + def test_otsu_threshold_creates_binary_image(self) -> None: + """Test Otsu's thresholding creates binary output.""" + preprocessor = VinPreprocessor() + image = np.full((100, 400), 128, dtype=np.uint8) + + result = preprocessor._otsu_threshold(image) + + unique_values = np.unique(result) + assert len(unique_values) <= 2 + + def test_preprocess_otsu_returns_result(self) -> None: + """Test Otsu preprocessing pipeline returns valid result.""" + preprocessor = VinPreprocessor() + image_bytes = create_test_image() + + result = preprocessor.preprocess_otsu(image_bytes) + + assert result.image_bytes is not None + assert len(result.image_bytes) > 0 + assert "otsu_threshold" in result.preprocessing_applied + assert "grayscale" in result.preprocessing_applied + + +class TestVinPreprocessorResolution: + """Tests for resolution upscaling.""" + + def test_upscale_small_image(self) -> None: + """Test small images are upscaled.""" + preprocessor = VinPreprocessor() + small_image = np.full((50, 200), 128, dtype=np.uint8) + + result = preprocessor._ensure_minimum_resolution(small_image) + + assert result.shape[1] >= preprocessor.MIN_WIDTH_FOR_VIN + + def test_no_upscale_large_image(self) -> None: + """Test large images are not upscaled.""" + preprocessor = VinPreprocessor() + large_image = np.full((200, 800), 128, dtype=np.uint8) + + result = preprocessor._ensure_minimum_resolution(large_image) + + assert result.shape == large_image.shape + + class TestVinRegionDetection: """Tests for VIN region detection.""" diff --git a/ocr/tests/test_vin_validator.py b/ocr/tests/test_vin_validator.py index 26f170b..241eabd 100644 --- a/ocr/tests/test_vin_validator.py +++ b/ocr/tests/test_vin_validator.py @@ -43,9 +43,9 @@ class TestVinValidator: result = validator.calculate_check_digit("1HGBH41JXMN109186") assert result == "X" - # 5YJSA1E28HF123456 has check digit 2 at position 9 + # 5YJSA1E28HF123456 has check digit at position 9 result = validator.calculate_check_digit("5YJSA1E28HF123456") - assert result == "8" # Verify this is correct for this VIN + assert result == "5" def test_validate_check_digit_valid(self) -> None: """Test check digit validation with valid VIN.""" @@ -161,6 +161,27 @@ class TestVinValidator: assert len(candidates) >= 1 assert candidates[0][0] == "1HGBH41JXMN109186" + def test_extract_candidates_fragmented_vin(self) -> None: + """Test candidate extraction handles space-fragmented VINs from OCR.""" + validator = VinValidator() + + # Tesseract often fragments VINs into multiple words + text = "1HGBH 41JXMN 109186" + candidates = validator.extract_candidates(text) + + assert len(candidates) >= 1 + assert candidates[0][0] == "1HGBH41JXMN109186" + + def test_extract_candidates_dash_fragmented_vin(self) -> None: + """Test candidate extraction handles dash-separated VINs.""" + validator = VinValidator() + + text = "1HGBH41J-XMN109186" + candidates = validator.extract_candidates(text) + + assert len(candidates) >= 1 + assert candidates[0][0] == "1HGBH41JXMN109186" + def test_extract_candidates_no_vin(self) -> None: """Test candidate extraction with no VIN.""" validator = VinValidator() -- 2.49.1 From d5696320f14f69ff245b79a9e2070fe767d4b3a1 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 19:36:35 -0600 Subject: [PATCH 03/15] fix: align VIN OCR logging with unified logging design (refs #113) Replace filesystem-based debug system (VIN_DEBUG_DIR) with standard logger.debug() calls that flow through Loki when LOG_LEVEL=DEBUG. Use .env.logging variable for OCR LOG_LEVEL. Increase image capture quality to 0.95 for better OCR accuracy. Co-Authored-By: Claude Opus 4.6 --- docker-compose.yml | 2 +- .../components/CameraCapture/useImageCrop.ts | 2 +- ocr/app/extractors/vin_extractor.py | 42 ++++++++++++++++--- 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 0d83e30..a444bf9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -187,7 +187,7 @@ services: container_name: mvp-ocr restart: unless-stopped environment: - LOG_LEVEL: info + LOG_LEVEL: ${BACKEND_LOG_LEVEL:-info} REDIS_HOST: mvp-redis REDIS_PORT: 6379 REDIS_DB: 1 diff --git a/frontend/src/shared/components/CameraCapture/useImageCrop.ts b/frontend/src/shared/components/CameraCapture/useImageCrop.ts index 58d69d4..4b17108 100644 --- a/frontend/src/shared/components/CameraCapture/useImageCrop.ts +++ b/frontend/src/shared/components/CameraCapture/useImageCrop.ts @@ -304,7 +304,7 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet } }, mimeType, - 0.92 + 0.95 ); }; diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py index ef1cb67..1b310f2 100644 --- a/ocr/app/extractors/vin_extractor.py +++ b/ocr/app/extractors/vin_extractor.py @@ -89,15 +89,26 @@ class VinExtractor(BaseExtractor): ) try: + logger.debug( + "VIN extraction input: %d bytes, content_type=%s", + len(image_bytes), content_type, + ) + # Apply VIN-optimized preprocessing preprocessing_result = vin_preprocessor.preprocess(image_bytes) preprocessed_bytes = preprocessing_result.image_bytes + logger.debug( + "Preprocessing steps: %s", preprocessing_result.preprocessing_applied + ) # Perform OCR with VIN-optimized settings raw_text, word_confidences = self._perform_ocr(preprocessed_bytes) + logger.debug("PSM 6 raw text: '%s'", raw_text) + logger.debug("PSM 6 word confidences: %s", word_confidences) # Extract VIN candidates from raw text candidates = vin_validator.extract_candidates(raw_text) + logger.debug("PSM 6 candidates: %s", candidates) if not candidates: # No VIN candidates found - try with different PSM modes @@ -106,12 +117,22 @@ class VinExtractor(BaseExtractor): if not candidates: # Try alternative preprocessing (Otsu's thresholding) otsu_result = vin_preprocessor.preprocess_otsu(image_bytes) + logger.debug( + "Otsu preprocessing steps: %s", + otsu_result.preprocessing_applied, + ) + raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes) + logger.debug("Otsu PSM 6 raw text: '%s'", raw_text) candidates = vin_validator.extract_candidates(raw_text) + logger.debug("Otsu PSM 6 candidates: %s", candidates) if not candidates: - candidates = self._try_alternate_ocr(otsu_result.image_bytes) + candidates = self._try_alternate_ocr( + otsu_result.image_bytes, prefix="Otsu" + ) if not candidates: + logger.debug("No VIN pattern found in any OCR attempt") return VinExtractionResult( success=False, error="No VIN pattern found in image", @@ -161,8 +182,12 @@ class VinExtractor(BaseExtractor): processing_time_ms = int((time.time() - start_time) * 1000) logger.info( - f"VIN extraction: {primary_vin}, confidence={primary_confidence:.2%}, " - f"time={processing_time_ms}ms" + "VIN extraction: %s, confidence=%.2f%%, time=%dms", + primary_vin, primary_confidence * 100, processing_time_ms, + ) + logger.debug( + "VIN alternatives: %s", + [(a.vin, a.confidence) for a in alternatives], ) return VinExtractionResult( @@ -176,7 +201,7 @@ class VinExtractor(BaseExtractor): ) except Exception as e: - logger.error(f"VIN extraction failed: {e}", exc_info=True) + logger.error("VIN extraction failed: %s", e, exc_info=True) return VinExtractionResult( success=False, error=str(e), @@ -236,7 +261,11 @@ class VinExtractor(BaseExtractor): raw_text = " ".join(words) return raw_text, confidences - def _try_alternate_ocr(self, image_bytes: bytes) -> list[tuple[str, int, int]]: + def _try_alternate_ocr( + self, + image_bytes: bytes, + prefix: str = "", + ) -> list[tuple[str, int, int]]: """ Try alternate OCR configurations when initial extraction fails. @@ -249,9 +278,12 @@ class VinExtractor(BaseExtractor): Returns: List of VIN candidates """ + tag = f"{prefix} " if prefix else "" for psm in (7, 8, 11, 13): raw_text, _ = self._perform_ocr(image_bytes, psm=psm) + logger.debug("%sPSM %d raw text: '%s'", tag, psm, raw_text) candidates = vin_validator.extract_candidates(raw_text) + logger.debug("%sPSM %d candidates: %s", tag, psm, candidates) if candidates: return candidates -- 2.49.1 From 3f0e2430870fde62faad4212605225ea29095f75 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 19:53:37 -0600 Subject: [PATCH 04/15] fix: Postgres Data paths --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index a444bf9..e2f18c6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -220,6 +220,7 @@ services: POSTGRES_INITDB_ARGS: --encoding=UTF8 POSTGRES_LOG_STATEMENT: ${POSTGRES_LOG_STATEMENT:-ddl} POSTGRES_LOG_MIN_DURATION_STATEMENT: ${POSTGRES_LOG_MIN_DURATION:-500} + PGDATA: /var/lib/postgresql/data volumes: - mvp_postgres_data:/var/lib/postgresql/data # Secrets (K8s Secrets equivalent) -- 2.49.1 From 488a267fc7c08bfc32d66ac423a8d399389795ba Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 20:20:14 -0600 Subject: [PATCH 05/15] fix: Fixed debug env variable. --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index e2f18c6..c6648a5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -187,7 +187,7 @@ services: container_name: mvp-ocr restart: unless-stopped environment: - LOG_LEVEL: ${BACKEND_LOG_LEVEL:-info} + LOG_LEVEL: debug REDIS_HOST: mvp-redis REDIS_PORT: 6379 REDIS_DB: 1 -- 2.49.1 From ff3858f750f85f573f6449cf1af5479732471897 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 20:26:06 -0600 Subject: [PATCH 06/15] fix: add debug image saving gated on LOG_LEVEL=debug (refs #113) Save original, adaptive, and Otsu preprocessed images to /tmp/vin-debug/{timestamp}/ when LOG_LEVEL is set to debug. No images saved at info level. Volume mount added for access. Co-Authored-By: Claude Opus 4.6 --- docker-compose.yml | 4 ++++ ocr/app/extractors/vin_extractor.py | 37 +++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index c6648a5..544d973 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -191,6 +191,8 @@ services: REDIS_HOST: mvp-redis REDIS_PORT: 6379 REDIS_DB: 1 + volumes: + - vin-debug:/tmp/vin-debug networks: - backend - database @@ -396,3 +398,5 @@ volumes: name: mvp_loki_data mvp_grafana_data: name: mvp_grafana_data + vin-debug: + name: mvp_vin_debug diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py index 1b310f2..9d58501 100644 --- a/ocr/app/extractors/vin_extractor.py +++ b/ocr/app/extractors/vin_extractor.py @@ -1,8 +1,10 @@ """VIN-specific OCR extractor with preprocessing and validation.""" import io import logging +import os import time from dataclasses import dataclass, field +from datetime import datetime from typing import Optional import magic @@ -57,9 +59,31 @@ class VinExtractor(BaseExtractor): # VIN character whitelist for Tesseract VIN_WHITELIST = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789" + # Fixed debug output directory (inside container) + DEBUG_DIR = "/tmp/vin-debug" + def __init__(self) -> None: """Initialize VIN extractor.""" pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd + self._debug = settings.log_level.upper() == "DEBUG" + + def _save_debug_image(self, session_dir: str, name: str, data: bytes) -> None: + """Save image bytes to the debug session directory when LOG_LEVEL=debug.""" + if not self._debug: + return + path = os.path.join(session_dir, name) + with open(path, "wb") as f: + f.write(data) + logger.debug("Saved debug image: %s (%d bytes)", name, len(data)) + + def _create_debug_session(self) -> Optional[str]: + """Create a timestamped debug directory. Returns path or None.""" + if not self._debug: + return None + ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f") + session_dir = os.path.join(self.DEBUG_DIR, ts) + os.makedirs(session_dir, exist_ok=True) + return session_dir def extract( self, image_bytes: bytes, content_type: Optional[str] = None @@ -89,10 +113,14 @@ class VinExtractor(BaseExtractor): ) try: + debug_session = self._create_debug_session() + logger.debug( "VIN extraction input: %d bytes, content_type=%s", len(image_bytes), content_type, ) + if debug_session: + self._save_debug_image(debug_session, "01_original.jpg", image_bytes) # Apply VIN-optimized preprocessing preprocessing_result = vin_preprocessor.preprocess(image_bytes) @@ -100,6 +128,10 @@ class VinExtractor(BaseExtractor): logger.debug( "Preprocessing steps: %s", preprocessing_result.preprocessing_applied ) + if debug_session: + self._save_debug_image( + debug_session, "02_preprocessed_adaptive.png", preprocessed_bytes + ) # Perform OCR with VIN-optimized settings raw_text, word_confidences = self._perform_ocr(preprocessed_bytes) @@ -121,6 +153,11 @@ class VinExtractor(BaseExtractor): "Otsu preprocessing steps: %s", otsu_result.preprocessing_applied, ) + if debug_session: + self._save_debug_image( + debug_session, "03_preprocessed_otsu.png", + otsu_result.image_bytes, + ) raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes) logger.debug("Otsu PSM 6 raw text: '%s'", raw_text) -- 2.49.1 From 9ce08cbb8917ca89fb67b421105105a5705aa97a Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 20:42:00 -0600 Subject: [PATCH 07/15] fix: Debug variables --- config/traefik/traefik.yml | 2 +- docker-compose.prod.yml | 57 ++++++++++++++++++++++++++++++++------ docker-compose.yml | 15 ++++++---- 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/config/traefik/traefik.yml b/config/traefik/traefik.yml index 4f7f779..b9e3a7f 100755 --- a/config/traefik/traefik.yml +++ b/config/traefik/traefik.yml @@ -52,7 +52,7 @@ global: # Logging log: - level: INFO + level: DEBUG format: json # Access logs diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 65151a8..076aeaa 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -9,16 +9,13 @@ # - Development-specific settings services: - # PostgreSQL - Remove development port exposure - mvp-postgres: - ports: [] - - # Redis - Remove development port exposure - mvp-redis: - ports: [] - - # Traefik - Ensure dashboard authentication is enforced + # Traefik - Production log level and dashboard auth mvp-traefik: + environment: + LOG_LEVEL: error + command: + - --configFile=/etc/traefik/traefik.yml + - --log.level=ERROR labels: - "traefik.enable=true" - "traefik.http.routers.traefik-dashboard.rule=Host(`traefik.motovaultpro.local`)" @@ -26,3 +23,45 @@ services: - "traefik.http.routers.traefik-dashboard.middlewares=dashboard-auth" - "traefik.http.services.traefik-dashboard.loadbalancer.server.port=8080" - "traefik.http.middlewares.dashboard-auth.basicauth.users=admin:$$2y$$10$$foobar" + + # Backend - Production log level + mvp-backend: + environment: + NODE_ENV: production + CONFIG_PATH: /app/config/production.yml + SECRETS_DIR: /run/secrets + LOG_LEVEL: error + DATABASE_HOST: mvp-postgres + REDIS_HOST: mvp-redis + STRIPE_PRO_MONTHLY_PRICE_ID: prod_Toj6BG9Z9JwREl + STRIPE_PRO_YEARLY_PRICE_ID: prod_Toj8oo0RpVBQmB + STRIPE_ENTERPRISE_MONTHLY_PRICE_ID: prod_Toj8xGEui9jl6j + STRIPE_ENTERPRISE_YEARLY_PRICE_ID: prod_Toj9A7A773xrdn + + # OCR - Production log level + mvp-ocr: + environment: + LOG_LEVEL: error + REDIS_HOST: mvp-redis + REDIS_PORT: 6379 + REDIS_DB: 1 + + # PostgreSQL - Remove dev ports, production log level + mvp-postgres: + ports: [] + environment: + POSTGRES_DB: motovaultpro + POSTGRES_USER: postgres + POSTGRES_PASSWORD_FILE: /run/secrets/postgres-password + POSTGRES_INITDB_ARGS: --encoding=UTF8 + LOG_LEVEL: error + POSTGRES_LOG_STATEMENT: none + POSTGRES_LOG_MIN_DURATION_STATEMENT: -1 + PGDATA: /var/lib/postgresql/data + + # Redis - Remove dev ports, production log level + mvp-redis: + ports: [] + command: redis-server --appendonly yes --loglevel ${LOG_LEVEL:-info} + environment: + LOG_LEVEL: error diff --git a/docker-compose.yml b/docker-compose.yml index 544d973..b79e2c9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,7 @@ services: command: - --configFile=/etc/traefik/traefik.yml environment: + LOG_LEVEL: debug CLOUDFLARE_DNS_API_TOKEN_FILE: /run/secrets/cloudflare-dns-token ports: - "80:80" @@ -112,6 +113,7 @@ services: NODE_ENV: production CONFIG_PATH: /app/config/production.yml SECRETS_DIR: /run/secrets + LOG_LEVEL: debug # Service references DATABASE_HOST: mvp-postgres REDIS_HOST: mvp-redis @@ -192,7 +194,7 @@ services: REDIS_PORT: 6379 REDIS_DB: 1 volumes: - - vin-debug:/tmp/vin-debug + - /tmp/vin-debug:/tmp/vin-debug networks: - backend - database @@ -220,8 +222,9 @@ services: POSTGRES_USER: postgres POSTGRES_PASSWORD_FILE: /run/secrets/postgres-password POSTGRES_INITDB_ARGS: --encoding=UTF8 - POSTGRES_LOG_STATEMENT: ${POSTGRES_LOG_STATEMENT:-ddl} - POSTGRES_LOG_MIN_DURATION_STATEMENT: ${POSTGRES_LOG_MIN_DURATION:-500} + LOG_LEVEL: debug + POSTGRES_LOG_STATEMENT: all + POSTGRES_LOG_MIN_DURATION_STATEMENT: 0 PGDATA: /var/lib/postgresql/data volumes: - mvp_postgres_data:/var/lib/postgresql/data @@ -248,7 +251,9 @@ services: image: ${REGISTRY_MIRRORS:-git.motovaultpro.com/egullickson/mirrors}/redis:8.4-alpine container_name: mvp-redis restart: unless-stopped - command: redis-server --appendonly yes --loglevel ${REDIS_LOGLEVEL:-notice} + command: redis-server --appendonly yes --loglevel ${LOG_LEVEL:-info} + environment: + LOG_LEVEL: debug volumes: - mvp_redis_data:/data networks: @@ -398,5 +403,3 @@ volumes: name: mvp_loki_data mvp_grafana_data: name: mvp_grafana_data - vin-debug: - name: mvp_vin_debug -- 2.49.1 From ce2a8d88f9eec52b68ec43a5b19ca8ca7beaf8b7 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 20:55:08 -0600 Subject: [PATCH 08/15] fix: Mobile image crop fix --- .../components/CameraCapture/CropTool.tsx | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/frontend/src/shared/components/CameraCapture/CropTool.tsx b/frontend/src/shared/components/CameraCapture/CropTool.tsx index 1d3a750..09f8b54 100644 --- a/frontend/src/shared/components/CameraCapture/CropTool.tsx +++ b/frontend/src/shared/components/CameraCapture/CropTool.tsx @@ -3,7 +3,7 @@ * @ai-context Allows user to adjust crop area with touch/mouse, confirm or retake */ -import React, { useCallback, useState } from 'react'; +import React, { useCallback, useState, useRef, useEffect } from 'react'; import { Box, IconButton, Button, Typography, CircularProgress } from '@mui/material'; import CheckIcon from '@mui/icons-material/Check'; import RefreshIcon from '@mui/icons-material/Refresh'; @@ -22,12 +22,28 @@ export const CropTool: React.FC = ({ onSkip, }) => { const [isProcessing, setIsProcessing] = useState(false); + const imageAreaRef = useRef(null); + const [imageMaxHeight, setImageMaxHeight] = useState(0); const { cropArea, isDragging, resetCrop, executeCrop, handleDragStart } = useImageCrop({ aspectRatio: lockAspectRatio ? aspectRatio : undefined, }); + // Measure available height for the image so the crop container + // matches the rendered image exactly (fixes mobile crop offset) + useEffect(() => { + const updateMaxHeight = () => { + if (imageAreaRef.current) { + const rect = imageAreaRef.current.getBoundingClientRect(); + setImageMaxHeight(rect.height - 32); // subtract p:2 padding (16px * 2) + } + }; + updateMaxHeight(); + window.addEventListener('resize', updateMaxHeight); + return () => window.removeEventListener('resize', updateMaxHeight); + }, []); + const handleConfirm = useCallback(async () => { setIsProcessing(true); try { @@ -61,6 +77,7 @@ export const CropTool: React.FC = ({ > {/* Image with crop overlay */} = ({ data-crop-container sx={{ position: 'relative', - maxWidth: '100%', - maxHeight: '100%', userSelect: 'none', touchAction: isDragging ? 'none' : 'auto', }} @@ -87,7 +102,7 @@ export const CropTool: React.FC = ({ alt="Captured" style={{ maxWidth: '100%', - maxHeight: '100%', + maxHeight: imageMaxHeight > 0 ? `${imageMaxHeight}px` : '70vh', display: 'block', }} draggable={false} -- 2.49.1 From 0de34983bbab3b57c3c0d05554ab46ec1368136d Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 21:14:56 -0600 Subject: [PATCH 09/15] fix: use best-contrast color channel for VIN preprocessing (refs #113) White text on green VIN stickers has only ~12% contrast in standard grayscale conversion because the green channel dominates luminance. The new _best_contrast_channel method evaluates each RGB channel's standard deviation and selects the one with highest contrast, giving ~2x improvement for green-tinted VIN stickers. Falls back to standard grayscale for neutral-colored images. Co-Authored-By: Claude Opus 4.6 --- ocr/app/preprocessors/vin_preprocessor.py | 46 +++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py index 95ba4bc..aee5a86 100644 --- a/ocr/app/preprocessors/vin_preprocessor.py +++ b/ocr/app/preprocessors/vin_preprocessor.py @@ -86,9 +86,9 @@ class VinPreprocessor: if len(cv_image.shape) == 3: cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) - # Convert to grayscale + # Convert to grayscale using best-contrast channel selection if len(cv_image.shape) == 3: - gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) + gray = self._best_contrast_channel(cv_image) else: gray = cv_image steps_applied.append("grayscale") @@ -150,6 +150,46 @@ class VinPreprocessor: logger.debug(f"Upscaled image from {width}x{height} to {new_width}x{new_height}") return image + def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray: + """ + Select the single color channel with the highest contrast. + + Standard grayscale conversion (0.299R + 0.587G + 0.114B) averages + channels, which destroys contrast when text and background differ + primarily in one channel. For example, white text on a green VIN + sticker has almost identical luminance, but the blue and red channels + show strong contrast. + + This method evaluates each BGR channel by its standard deviation + (a proxy for contrast) and returns the one with the highest value. + Falls back to standard grayscale when all channels are similar. + """ + b_channel, g_channel, r_channel = cv2.split(bgr_image) + + stds = [ + float(np.std(b_channel)), + float(np.std(g_channel)), + float(np.std(r_channel)), + ] + channels = [b_channel, g_channel, r_channel] + channel_names = ["blue", "green", "red"] + + best_idx = int(np.argmax(stds)) + max_std = stds[best_idx] + min_std = min(stds) + + # Only use single-channel extraction when one channel is notably + # better (>20% higher std than the weakest). Otherwise, standard + # grayscale is fine and more robust for neutral-colored images. + if max_std > 0 and (max_std - min_std) / max_std > 0.20: + logger.debug( + "Using %s channel (std=%.1f) over grayscale (stds: B=%.1f G=%.1f R=%.1f)", + channel_names[best_idx], max_std, stds[0], stds[1], stds[2], + ) + return channels[best_idx] + + return cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) + def _apply_clahe(self, image: np.ndarray) -> np.ndarray: """ Apply CLAHE (Contrast Limited Adaptive Histogram Equalization). @@ -306,7 +346,7 @@ class VinPreprocessor: cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) if len(cv_image.shape) == 3: - gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) + gray = self._best_contrast_channel(cv_image) else: gray = cv_image steps_applied.append("grayscale") -- 2.49.1 From a07ec324fe01bcf93c00bc4463619d1500c3f466 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 21:23:43 -0600 Subject: [PATCH 10/15] fix: use min-channel grayscale and morphological cleanup for VIN OCR (refs #113) Replace std-based channel selection (which incorrectly picked green for green-tinted VIN stickers) with per-pixel min(B,G,R). White text stays 255 in all channels while colored backgrounds drop to their weakest channel value, giving 2x contrast improvement. Add morphological opening after thresholding to remove noise speckles from car body surface that were confusing Tesseract's page segmentation. Co-Authored-By: Claude Opus 4.6 --- ocr/app/preprocessors/vin_preprocessor.py | 61 +++++++++++++---------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py index aee5a86..cd9b388 100644 --- a/ocr/app/preprocessors/vin_preprocessor.py +++ b/ocr/app/preprocessors/vin_preprocessor.py @@ -115,6 +115,7 @@ class VinPreprocessor: # Apply adaptive thresholding if apply_threshold: gray = self._adaptive_threshold(gray) + gray = self._morphological_cleanup(gray) steps_applied.append("threshold") # Convert back to PNG bytes @@ -152,43 +153,34 @@ class VinPreprocessor: def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray: """ - Select the single color channel with the highest contrast. + Compute a grayscale image that maximizes text-to-background contrast. - Standard grayscale conversion (0.299R + 0.587G + 0.114B) averages - channels, which destroys contrast when text and background differ - primarily in one channel. For example, white text on a green VIN - sticker has almost identical luminance, but the blue and red channels - show strong contrast. + Uses per-pixel minimum across B, G, R channels. White text has + min(255,255,255) = 255 regardless of channel, while any colored + background has a low value in at least one channel (e.g. green + sticker: min(130,230,150) = 130). This gives ~125 units of + contrast vs ~60 from standard grayscale. - This method evaluates each BGR channel by its standard deviation - (a proxy for contrast) and returns the one with the highest value. - Falls back to standard grayscale when all channels are similar. + Falls back to standard grayscale when the min-channel doesn't + improve contrast (i.e. for already-neutral/gray images). """ b_channel, g_channel, r_channel = cv2.split(bgr_image) - stds = [ - float(np.std(b_channel)), - float(np.std(g_channel)), - float(np.std(r_channel)), - ] - channels = [b_channel, g_channel, r_channel] - channel_names = ["blue", "green", "red"] + min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel) - best_idx = int(np.argmax(stds)) - max_std = stds[best_idx] - min_std = min(stds) + min_std = float(np.std(min_channel)) + gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) + gray_std = float(np.std(gray)) - # Only use single-channel extraction when one channel is notably - # better (>20% higher std than the weakest). Otherwise, standard - # grayscale is fine and more robust for neutral-colored images. - if max_std > 0 and (max_std - min_std) / max_std > 0.20: + # Use min-channel when it provides meaningfully more contrast + if min_std > gray_std * 1.1: logger.debug( - "Using %s channel (std=%.1f) over grayscale (stds: B=%.1f G=%.1f R=%.1f)", - channel_names[best_idx], max_std, stds[0], stds[1], stds[2], + "Using min-channel (std=%.1f) over grayscale (std=%.1f)", + min_std, gray_std, ) - return channels[best_idx] + return min_channel - return cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) + return gray def _apply_clahe(self, image: np.ndarray) -> np.ndarray: """ @@ -309,6 +301,20 @@ class VinPreprocessor: logger.warning(f"Adaptive threshold failed: {e}") return image + def _morphological_cleanup(self, image: np.ndarray) -> np.ndarray: + """ + Remove small noise artifacts from a thresholded binary image. + + Morphological opening (erosion then dilation) removes isolated + pixels and thin noise lines while preserving larger text characters. + """ + try: + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) + return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel) + except cv2.error as e: + logger.warning(f"Morphological cleanup failed: {e}") + return image + def _otsu_threshold(self, image: np.ndarray) -> np.ndarray: """ Apply Otsu's thresholding for binarization. @@ -361,6 +367,7 @@ class VinPreprocessor: steps_applied.append("denoise") gray = self._otsu_threshold(gray) + gray = self._morphological_cleanup(gray) steps_applied.append("otsu_threshold") result_image = Image.fromarray(gray) -- 2.49.1 From 63c027a454e817942a84a40ca2662bc01dda01da Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 21:32:52 -0600 Subject: [PATCH 11/15] fix: always use min-channel and add grayscale-only OCR path (refs #113) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes: 1. Always use min-channel for color images instead of gated comparison that was falling back to standard grayscale (which has only 23% contrast for white-on-green VIN stickers). 2. Add grayscale-only OCR path (CLAHE + denoise, no thresholding) between adaptive and Otsu attempts. Tesseract's LSTM engine is designed to handle grayscale input directly and often outperforms binarized input where thresholding creates artifacts. Pipeline order: adaptive threshold → grayscale-only → Otsu threshold Co-Authored-By: Claude Opus 4.6 --- ocr/app/extractors/vin_extractor.py | 28 +++++++++++++++++++++++ ocr/app/preprocessors/vin_preprocessor.py | 20 ++++++++-------- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py index 9d58501..66a694b 100644 --- a/ocr/app/extractors/vin_extractor.py +++ b/ocr/app/extractors/vin_extractor.py @@ -146,6 +146,34 @@ class VinExtractor(BaseExtractor): # No VIN candidates found - try with different PSM modes candidates = self._try_alternate_ocr(preprocessed_bytes) + if not candidates: + # Try grayscale-only (no thresholding) — the Tesseract + # LSTM engine often performs better on non-binarized input + # because it does its own internal preprocessing. + gray_result = vin_preprocessor.preprocess( + image_bytes, apply_threshold=False + ) + logger.debug( + "Grayscale preprocessing steps: %s", + gray_result.preprocessing_applied, + ) + if debug_session: + self._save_debug_image( + debug_session, "04_preprocessed_gray.png", + gray_result.image_bytes, + ) + + raw_text, word_confidences = self._perform_ocr( + gray_result.image_bytes + ) + logger.debug("Gray PSM 6 raw text: '%s'", raw_text) + candidates = vin_validator.extract_candidates(raw_text) + logger.debug("Gray PSM 6 candidates: %s", candidates) + if not candidates: + candidates = self._try_alternate_ocr( + gray_result.image_bytes, prefix="Gray" + ) + if not candidates: # Try alternative preprocessing (Otsu's thresholding) otsu_result = vin_preprocessor.preprocess_otsu(image_bytes) diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py index cd9b388..1362b12 100644 --- a/ocr/app/preprocessors/vin_preprocessor.py +++ b/ocr/app/preprocessors/vin_preprocessor.py @@ -167,20 +167,20 @@ class VinPreprocessor: b_channel, g_channel, r_channel = cv2.split(bgr_image) min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel) + gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) min_std = float(np.std(min_channel)) - gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) gray_std = float(np.std(gray)) + logger.debug( + "Channel contrast: min-channel std=%.1f, grayscale std=%.1f", + min_std, gray_std, + ) - # Use min-channel when it provides meaningfully more contrast - if min_std > gray_std * 1.1: - logger.debug( - "Using min-channel (std=%.1f) over grayscale (std=%.1f)", - min_std, gray_std, - ) - return min_channel - - return gray + # Always use min-channel for VIN images. White text keeps + # min(B,G,R)=255 while any colored background drops to its + # weakest channel. For neutral images the result is equivalent + # to grayscale, so there is no downside. + return min_channel def _apply_clahe(self, image: np.ndarray) -> np.ndarray: """ -- 2.49.1 From ae5221c75953d602aea175d7f2ddbb5dbbda1843 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 21:39:48 -0600 Subject: [PATCH 12/15] fix: invert min-channel so Tesseract gets dark-on-light text (refs #113) The min-channel correctly extracts contrast (white text=255 vs green sticker bg=130), but Tesseract expects dark text on light background. Without inversion, the grayscale-only path returned empty text for every PSM mode because Tesseract couldn't see bright-on-dark text. Invert via bitwise_not: text becomes 0 (black), sticker bg becomes 125 (gray). Fixes all three OCR paths (adaptive, grayscale, Otsu). Co-Authored-By: Claude Opus 4.6 --- ocr/app/preprocessors/vin_preprocessor.py | 34 +++++++++++------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py index 1362b12..290fb5b 100644 --- a/ocr/app/preprocessors/vin_preprocessor.py +++ b/ocr/app/preprocessors/vin_preprocessor.py @@ -153,34 +153,32 @@ class VinPreprocessor: def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray: """ - Compute a grayscale image that maximizes text-to-background contrast. + Compute a grayscale image with dark text on light background. - Uses per-pixel minimum across B, G, R channels. White text has - min(255,255,255) = 255 regardless of channel, while any colored - background has a low value in at least one channel (e.g. green - sticker: min(130,230,150) = 130). This gives ~125 units of - contrast vs ~60 from standard grayscale. + Uses inverted per-pixel minimum across B, G, R channels. + White text has min(255,255,255) = 255 → inverted to 0 (black). + Colored backgrounds have a low min value (e.g. green sticker: + min(130,230,150) = 130) → inverted to 125 (medium gray). - Falls back to standard grayscale when the min-channel doesn't - improve contrast (i.e. for already-neutral/gray images). + The inversion ensures Tesseract always receives dark-text-on- + light-background, which is the polarity it expects. """ b_channel, g_channel, r_channel = cv2.split(bgr_image) min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel) - gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) - min_std = float(np.std(min_channel)) - gray_std = float(np.std(gray)) + # Invert so white text (min=255) becomes black (0) and colored + # backgrounds (min~130) become lighter gray (~125). Tesseract + # expects dark text on light background. + inverted = cv2.bitwise_not(min_channel) + + gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) logger.debug( - "Channel contrast: min-channel std=%.1f, grayscale std=%.1f", - min_std, gray_std, + "Channel contrast: inverted-min std=%.1f, grayscale std=%.1f", + float(np.std(inverted)), float(np.std(gray)), ) - # Always use min-channel for VIN images. White text keeps - # min(B,G,R)=255 while any colored background drops to its - # weakest channel. For neutral images the result is equivalent - # to grayscale, so there is no downside. - return min_channel + return inverted def _apply_clahe(self, image: np.ndarray) -> np.ndarray: """ -- 2.49.1 From 432b3bda36e544f61586b5a792c349dd7d540e50 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 21:52:08 -0600 Subject: [PATCH 13/15] fix: remove char whitelist incompatible with Tesseract LSTM (refs #113) tessedit_char_whitelist does not work with OEM 1 (LSTM engine) and causes empty/erratic output. This was the root cause of Tesseract returning empty text despite clear, well-preprocessed images. Character filtering is already handled post-OCR by the VIN validator's correct_ocr_errors() method (I->1, O->0, Q->0, etc). Co-Authored-By: Claude Opus 4.6 --- ocr/app/extractors/vin_extractor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py index 66a694b..1edca3f 100644 --- a/ocr/app/extractors/vin_extractor.py +++ b/ocr/app/extractors/vin_extractor.py @@ -299,11 +299,12 @@ class VinExtractor(BaseExtractor): # Configure Tesseract for VIN extraction # OEM 1 = LSTM neural network engine (best accuracy) - # Disable dictionaries since VINs are not dictionary words + # NOTE: tessedit_char_whitelist does NOT work with OEM 1 (LSTM). + # Using it causes empty/erratic output. Character filtering is + # handled post-OCR by vin_validator.correct_ocr_errors() instead. config = ( f"--psm {psm} " f"--oem 1 " - f"-c tessedit_char_whitelist={self.VIN_WHITELIST} " f"-c load_system_dawg=false " f"-c load_freq_dawg=false" ) -- 2.49.1 From e4336ce9da80b6c0bbf200d41f0fe9e240228a36 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 22:00:07 -0600 Subject: [PATCH 14/15] fix: extract VIN from noisy OCR via sliding window + char deletion (refs #113) When OCR reads extra characters (e.g. sticker border as 'C', spurious 'Z' insertion), the raw text exceeds 17 chars and the old first-17 trim produced wrong VINs. New strategy tries all 17-char sliding windows and single/double character deletions, validating each via check digit. For 'CWVGGNPE2Z4NP069500', this finds the correct VIN 'WVGGNPE24NP069500' (valid check digit) instead of 'CWVGGNPE2Z4NP0695' (invalid). Co-Authored-By: Claude Opus 4.6 --- ocr/app/validators/vin_validator.py | 48 ++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/ocr/app/validators/vin_validator.py b/ocr/app/validators/vin_validator.py index 7c74ae9..c9c60ef 100644 --- a/ocr/app/validators/vin_validator.py +++ b/ocr/app/validators/vin_validator.py @@ -290,14 +290,48 @@ class VinValidator: """Try to add a corrected VIN candidate if it passes validation.""" corrected = self.correct_ocr_errors(raw) - # Trim to 17 chars if OCR captured extra characters - if len(corrected) > 17: - corrected = corrected[:17] + if len(corrected) == 17: + self._add_if_valid(corrected, start, end, candidates, seen_vins) + return - if len(corrected) == 17 and self.MODERN_VIN_PATTERN.match(corrected): - if corrected not in seen_vins: - seen_vins.add(corrected) - candidates.append((corrected, start, end)) + if len(corrected) > 17: + # Strategy A: try every 17-char sliding window + for i in range(len(corrected) - 16): + window = corrected[i : i + 17] + self._add_if_valid(window, start, end, candidates, seen_vins) + + # Strategy B: for 18-19 char strings, try deleting each + # character one at a time. OCR often inserts a spurious + # character (e.g. sticker border read as 'C') that breaks + # the VIN. Check-digit validation filters out false hits. + if len(corrected) <= 19: + for i in range(len(corrected)): + reduced = corrected[:i] + corrected[i + 1 :] + if len(reduced) == 17: + self._add_if_valid( + reduced, start, end, candidates, seen_vins + ) + elif len(reduced) == 18: + # Two deletions needed — try removing one more + for j in range(len(reduced)): + reduced2 = reduced[:j] + reduced[j + 1 :] + self._add_if_valid( + reduced2, start, end, candidates, seen_vins + ) + + def _add_if_valid( + self, + vin: str, + start: int, + end: int, + candidates: list[tuple[str, int, int]], + seen_vins: set[str], + ) -> None: + """Add a 17-char VIN to candidates if it matches the pattern.""" + if len(vin) == 17 and self.MODERN_VIN_PATTERN.match(vin): + if vin not in seen_vins: + seen_vins.add(vin) + candidates.append((vin, start, end)) # Singleton instance -- 2.49.1 From 75ce316aa5f8ba91fd2cb5cbb3ee21ca2d5acd02 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Fri, 6 Feb 2026 22:15:39 -0600 Subject: [PATCH 15/15] chore: Change crop to remove locked aspect ratio --- .../components/CameraCapture/CropTool.tsx | 242 ++++++++++-------- .../components/CameraCapture/useImageCrop.ts | 96 ++++++- 2 files changed, 222 insertions(+), 116 deletions(-) diff --git a/frontend/src/shared/components/CameraCapture/CropTool.tsx b/frontend/src/shared/components/CameraCapture/CropTool.tsx index 09f8b54..b1fd655 100644 --- a/frontend/src/shared/components/CameraCapture/CropTool.tsx +++ b/frontend/src/shared/components/CameraCapture/CropTool.tsx @@ -25,11 +25,13 @@ export const CropTool: React.FC = ({ const imageAreaRef = useRef(null); const [imageMaxHeight, setImageMaxHeight] = useState(0); - const { cropArea, isDragging, resetCrop, executeCrop, handleDragStart } = + const { cropArea, cropDrawn, isDragging, resetCrop, executeCrop, handleDragStart, handleDrawStart } = useImageCrop({ aspectRatio: lockAspectRatio ? aspectRatio : undefined, }); + const showCropArea = cropDrawn || (isDragging && cropArea.width > 1 && cropArea.height > 1); + // Measure available height for the image so the crop container // matches the rendered image exactly (fixes mobile crop offset) useEffect(() => { @@ -108,126 +110,150 @@ export const CropTool: React.FC = ({ draggable={false} /> + {/* Draw surface for free-form rectangle drawing */} + {!cropDrawn && ( + + )} + {/* Dark overlay outside crop area */} - - {/* Top overlay */} - - {/* Bottom overlay */} - - {/* Left overlay */} - - {/* Right overlay */} - - - - {/* Crop area with handles */} - - {/* Move handle (center area) */} - - - {/* Corner handles */} - - - - - - {/* Edge handles */} - - - - - - {/* Grid lines for alignment */} + {showCropArea && ( - {Array.from({ length: 9 }).map((_, i) => ( - - ))} + {/* Top overlay */} + + {/* Bottom overlay */} + + {/* Left overlay */} + + {/* Right overlay */} + - + )} + + {/* Crop area border and handles */} + {showCropArea && ( + + {/* Handles only appear after drawing is complete */} + {cropDrawn && ( + <> + {/* Move handle (center area) */} + + + {/* Corner handles */} + + + + + + {/* Edge handles */} + + + + + + )} + + {/* Grid lines for alignment */} + + {Array.from({ length: 9 }).map((_, i) => ( + + ))} + + + )} {/* Instructions */} - Drag to adjust crop area + {cropDrawn ? 'Drag handles to adjust crop area' : 'Tap and drag to select crop area'} @@ -255,7 +281,7 @@ export const CropTool: React.FC = ({ onClick={handleReset} startIcon={} sx={{ color: 'white' }} - disabled={isProcessing} + disabled={isProcessing || !cropDrawn} > Reset @@ -271,7 +297,7 @@ export const CropTool: React.FC = ({ void; - /** Reset crop to initial/default */ + /** Reset crop to drawing mode */ resetCrop: () => void; /** Execute crop and return cropped blob */ executeCrop: (imageSrc: string, mimeType?: string) => Promise; /** Handle drag start for crop handles */ handleDragStart: (handle: CropHandle, event: React.MouseEvent | React.TouchEvent) => void; + /** Handle draw start for free-form rectangle drawing */ + handleDrawStart: (event: React.MouseEvent | React.TouchEvent) => void; /** Handle move during drag */ handleMove: (event: MouseEvent | TouchEvent) => void; /** Handle drag end */ @@ -78,12 +82,22 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet const [cropArea, setCropAreaState] = useState( getAspectRatioAdjustedCrop(initialCrop) ); + const [cropDrawn, setCropDrawn] = useState(false); const [isDragging, setIsDragging] = useState(false); const activeHandleRef = useRef(null); const startPosRef = useRef({ x: 0, y: 0 }); const startCropRef = useRef(cropArea); - const containerRef = useRef<{ width: number; height: number }>({ width: 100, height: 100 }); + const containerRef = useRef<{ width: number; height: number; left: number; top: number }>({ + width: 100, height: 100, left: 0, top: 0, + }); + const isDrawingRef = useRef(false); + const drawOriginRef = useRef({ x: 0, y: 0 }); + const cropAreaRef = useRef(cropArea); + + useEffect(() => { + cropAreaRef.current = cropArea; + }, [cropArea]); const setCropArea = useCallback( (area: CropArea) => { @@ -94,6 +108,7 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet const resetCrop = useCallback(() => { setCropAreaState(getAspectRatioAdjustedCrop(initialCrop)); + setCropDrawn(false); }, [initialCrop, getAspectRatioAdjustedCrop]); const constrainCrop = useCallback( @@ -136,19 +151,75 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet const container = target.closest('[data-crop-container]'); if (container) { const rect = container.getBoundingClientRect(); - containerRef.current = { width: rect.width, height: rect.height }; + containerRef.current = { width: rect.width, height: rect.height, left: rect.left, top: rect.top }; } }, [cropArea] ); - const handleMove = useCallback( - (event: MouseEvent | TouchEvent) => { - if (!activeHandleRef.current || !isDragging) return; + const handleDrawStart = useCallback( + (event: React.MouseEvent | React.TouchEvent) => { + event.preventDefault(); + + const target = event.currentTarget as HTMLElement; + const container = target.closest('[data-crop-container]'); + if (!container) return; + + const rect = container.getBoundingClientRect(); + containerRef.current = { width: rect.width, height: rect.height, left: rect.left, top: rect.top }; const clientX = 'touches' in event ? event.touches[0].clientX : event.clientX; const clientY = 'touches' in event ? event.touches[0].clientY : event.clientY; + const x = Math.max(0, Math.min(100, ((clientX - rect.left) / rect.width) * 100)); + const y = Math.max(0, Math.min(100, ((clientY - rect.top) / rect.height) * 100)); + + startPosRef.current = { x: clientX, y: clientY }; + drawOriginRef.current = { x, y }; + + setCropAreaState({ x, y, width: 0, height: 0 }); + + isDrawingRef.current = true; + activeHandleRef.current = null; + setIsDragging(true); + }, + [] + ); + + const handleMove = useCallback( + (event: MouseEvent | TouchEvent) => { + if (!isDragging) return; + + const clientX = 'touches' in event ? event.touches[0].clientX : event.clientX; + const clientY = 'touches' in event ? event.touches[0].clientY : event.clientY; + + // Free-form drawing mode: compute rectangle from origin to current pointer + if (isDrawingRef.current) { + const currentX = Math.max(0, Math.min(100, + ((clientX - containerRef.current.left) / containerRef.current.width) * 100)); + const currentY = Math.max(0, Math.min(100, + ((clientY - containerRef.current.top) / containerRef.current.height) * 100)); + + const originX = drawOriginRef.current.x; + const originY = drawOriginRef.current.y; + + let newCrop: CropArea = { + x: Math.min(originX, currentX), + y: Math.min(originY, currentY), + width: Math.abs(currentX - originX), + height: Math.abs(currentY - originY), + }; + + if (aspectRatio) { + newCrop.height = newCrop.width / aspectRatio; + } + + setCropAreaState(newCrop); + return; + } + + if (!activeHandleRef.current) return; + // Calculate delta as percentage of container const deltaX = ((clientX - startPosRef.current.x) / containerRef.current.width) * 100; const deltaY = ((clientY - startPosRef.current.y) / containerRef.current.height) * 100; @@ -234,13 +305,20 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet setCropAreaState(constrainCrop(newCrop)); }, - [isDragging, constrainCrop] + [isDragging, constrainCrop, aspectRatio] ); const handleDragEnd = useCallback(() => { + if (isDrawingRef.current) { + isDrawingRef.current = false; + const area = cropAreaRef.current; + if (area.width >= minSize && area.height >= minSize) { + setCropDrawn(true); + } + } activeHandleRef.current = null; setIsDragging(false); - }, []); + }, [minSize]); // Add global event listeners for drag useEffect(() => { @@ -320,11 +398,13 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet return { cropArea, + cropDrawn, isDragging, setCropArea, resetCrop, executeCrop, handleDragStart, + handleDrawStart, handleMove, handleDragEnd, }; -- 2.49.1