diff --git a/.ai/context.json b/.ai/context.json index 736a130..1e2c4bd 100644 --- a/.ai/context.json +++ b/.ai/context.json @@ -5,7 +5,8 @@ "host": "gitea", "owner": "egullickson", "repo": "motovaultpro", - "url": "https://git.motovaultpro.com" + "url": "https://git.motovaultpro.com", + "default_branch": "main" }, "ai_quick_start": { "load_order": [ diff --git a/config/traefik/traefik.yml b/config/traefik/traefik.yml index 4f7f779..b9e3a7f 100755 --- a/config/traefik/traefik.yml +++ b/config/traefik/traefik.yml @@ -52,7 +52,7 @@ global: # Logging log: - level: INFO + level: DEBUG format: json # Access logs diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 65151a8..076aeaa 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -9,16 +9,13 @@ # - Development-specific settings services: - # PostgreSQL - Remove development port exposure - mvp-postgres: - ports: [] - - # Redis - Remove development port exposure - mvp-redis: - ports: [] - - # Traefik - Ensure dashboard authentication is enforced + # Traefik - Production log level and dashboard auth mvp-traefik: + environment: + LOG_LEVEL: error + command: + - --configFile=/etc/traefik/traefik.yml + - --log.level=ERROR labels: - "traefik.enable=true" - "traefik.http.routers.traefik-dashboard.rule=Host(`traefik.motovaultpro.local`)" @@ -26,3 +23,45 @@ services: - "traefik.http.routers.traefik-dashboard.middlewares=dashboard-auth" - "traefik.http.services.traefik-dashboard.loadbalancer.server.port=8080" - "traefik.http.middlewares.dashboard-auth.basicauth.users=admin:$$2y$$10$$foobar" + + # Backend - Production log level + mvp-backend: + environment: + NODE_ENV: production + CONFIG_PATH: /app/config/production.yml + SECRETS_DIR: /run/secrets + LOG_LEVEL: error + DATABASE_HOST: mvp-postgres + REDIS_HOST: mvp-redis + STRIPE_PRO_MONTHLY_PRICE_ID: prod_Toj6BG9Z9JwREl + STRIPE_PRO_YEARLY_PRICE_ID: prod_Toj8oo0RpVBQmB + STRIPE_ENTERPRISE_MONTHLY_PRICE_ID: prod_Toj8xGEui9jl6j + STRIPE_ENTERPRISE_YEARLY_PRICE_ID: prod_Toj9A7A773xrdn + + # OCR - Production log level + mvp-ocr: + environment: + LOG_LEVEL: error + REDIS_HOST: mvp-redis + REDIS_PORT: 6379 + REDIS_DB: 1 + + # PostgreSQL - Remove dev ports, production log level + mvp-postgres: + ports: [] + environment: + POSTGRES_DB: motovaultpro + POSTGRES_USER: postgres + POSTGRES_PASSWORD_FILE: /run/secrets/postgres-password + POSTGRES_INITDB_ARGS: --encoding=UTF8 + LOG_LEVEL: error + POSTGRES_LOG_STATEMENT: none + POSTGRES_LOG_MIN_DURATION_STATEMENT: -1 + PGDATA: /var/lib/postgresql/data + + # Redis - Remove dev ports, production log level + mvp-redis: + ports: [] + command: redis-server --appendonly yes --loglevel ${LOG_LEVEL:-info} + environment: + LOG_LEVEL: error diff --git a/docker-compose.yml b/docker-compose.yml index 0d83e30..b79e2c9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,7 @@ services: command: - --configFile=/etc/traefik/traefik.yml environment: + LOG_LEVEL: debug CLOUDFLARE_DNS_API_TOKEN_FILE: /run/secrets/cloudflare-dns-token ports: - "80:80" @@ -112,6 +113,7 @@ services: NODE_ENV: production CONFIG_PATH: /app/config/production.yml SECRETS_DIR: /run/secrets + LOG_LEVEL: debug # Service references DATABASE_HOST: mvp-postgres REDIS_HOST: mvp-redis @@ -187,10 +189,12 @@ services: container_name: mvp-ocr restart: unless-stopped environment: - LOG_LEVEL: info + LOG_LEVEL: debug REDIS_HOST: mvp-redis REDIS_PORT: 6379 REDIS_DB: 1 + volumes: + - /tmp/vin-debug:/tmp/vin-debug networks: - backend - database @@ -218,8 +222,10 @@ services: POSTGRES_USER: postgres POSTGRES_PASSWORD_FILE: /run/secrets/postgres-password POSTGRES_INITDB_ARGS: --encoding=UTF8 - POSTGRES_LOG_STATEMENT: ${POSTGRES_LOG_STATEMENT:-ddl} - POSTGRES_LOG_MIN_DURATION_STATEMENT: ${POSTGRES_LOG_MIN_DURATION:-500} + LOG_LEVEL: debug + POSTGRES_LOG_STATEMENT: all + POSTGRES_LOG_MIN_DURATION_STATEMENT: 0 + PGDATA: /var/lib/postgresql/data volumes: - mvp_postgres_data:/var/lib/postgresql/data # Secrets (K8s Secrets equivalent) @@ -245,7 +251,9 @@ services: image: ${REGISTRY_MIRRORS:-git.motovaultpro.com/egullickson/mirrors}/redis:8.4-alpine container_name: mvp-redis restart: unless-stopped - command: redis-server --appendonly yes --loglevel ${REDIS_LOGLEVEL:-notice} + command: redis-server --appendonly yes --loglevel ${LOG_LEVEL:-info} + environment: + LOG_LEVEL: debug volumes: - mvp_redis_data:/data networks: diff --git a/frontend/src/shared/components/CameraCapture/CropTool.tsx b/frontend/src/shared/components/CameraCapture/CropTool.tsx index 1d3a750..b1fd655 100644 --- a/frontend/src/shared/components/CameraCapture/CropTool.tsx +++ b/frontend/src/shared/components/CameraCapture/CropTool.tsx @@ -3,7 +3,7 @@ * @ai-context Allows user to adjust crop area with touch/mouse, confirm or retake */ -import React, { useCallback, useState } from 'react'; +import React, { useCallback, useState, useRef, useEffect } from 'react'; import { Box, IconButton, Button, Typography, CircularProgress } from '@mui/material'; import CheckIcon from '@mui/icons-material/Check'; import RefreshIcon from '@mui/icons-material/Refresh'; @@ -22,12 +22,30 @@ export const CropTool: React.FC = ({ onSkip, }) => { const [isProcessing, setIsProcessing] = useState(false); + const imageAreaRef = useRef(null); + const [imageMaxHeight, setImageMaxHeight] = useState(0); - const { cropArea, isDragging, resetCrop, executeCrop, handleDragStart } = + const { cropArea, cropDrawn, isDragging, resetCrop, executeCrop, handleDragStart, handleDrawStart } = useImageCrop({ aspectRatio: lockAspectRatio ? aspectRatio : undefined, }); + const showCropArea = cropDrawn || (isDragging && cropArea.width > 1 && cropArea.height > 1); + + // Measure available height for the image so the crop container + // matches the rendered image exactly (fixes mobile crop offset) + useEffect(() => { + const updateMaxHeight = () => { + if (imageAreaRef.current) { + const rect = imageAreaRef.current.getBoundingClientRect(); + setImageMaxHeight(rect.height - 32); // subtract p:2 padding (16px * 2) + } + }; + updateMaxHeight(); + window.addEventListener('resize', updateMaxHeight); + return () => window.removeEventListener('resize', updateMaxHeight); + }, []); + const handleConfirm = useCallback(async () => { setIsProcessing(true); try { @@ -61,6 +79,7 @@ export const CropTool: React.FC = ({ > {/* Image with crop overlay */} = ({ data-crop-container sx={{ position: 'relative', - maxWidth: '100%', - maxHeight: '100%', userSelect: 'none', touchAction: isDragging ? 'none' : 'auto', }} @@ -87,132 +104,156 @@ export const CropTool: React.FC = ({ alt="Captured" style={{ maxWidth: '100%', - maxHeight: '100%', + maxHeight: imageMaxHeight > 0 ? `${imageMaxHeight}px` : '70vh', display: 'block', }} draggable={false} /> + {/* Draw surface for free-form rectangle drawing */} + {!cropDrawn && ( + + )} + {/* Dark overlay outside crop area */} - - {/* Top overlay */} - - {/* Bottom overlay */} - - {/* Left overlay */} - - {/* Right overlay */} - - - - {/* Crop area with handles */} - - {/* Move handle (center area) */} - - - {/* Corner handles */} - - - - - - {/* Edge handles */} - - - - - - {/* Grid lines for alignment */} + {showCropArea && ( - {Array.from({ length: 9 }).map((_, i) => ( - - ))} + {/* Top overlay */} + + {/* Bottom overlay */} + + {/* Left overlay */} + + {/* Right overlay */} + - + )} + + {/* Crop area border and handles */} + {showCropArea && ( + + {/* Handles only appear after drawing is complete */} + {cropDrawn && ( + <> + {/* Move handle (center area) */} + + + {/* Corner handles */} + + + + + + {/* Edge handles */} + + + + + + )} + + {/* Grid lines for alignment */} + + {Array.from({ length: 9 }).map((_, i) => ( + + ))} + + + )} {/* Instructions */} - Drag to adjust crop area + {cropDrawn ? 'Drag handles to adjust crop area' : 'Tap and drag to select crop area'} @@ -240,7 +281,7 @@ export const CropTool: React.FC = ({ onClick={handleReset} startIcon={} sx={{ color: 'white' }} - disabled={isProcessing} + disabled={isProcessing || !cropDrawn} > Reset @@ -256,7 +297,7 @@ export const CropTool: React.FC = ({ void; - /** Reset crop to initial/default */ + /** Reset crop to drawing mode */ resetCrop: () => void; /** Execute crop and return cropped blob */ executeCrop: (imageSrc: string, mimeType?: string) => Promise; /** Handle drag start for crop handles */ handleDragStart: (handle: CropHandle, event: React.MouseEvent | React.TouchEvent) => void; + /** Handle draw start for free-form rectangle drawing */ + handleDrawStart: (event: React.MouseEvent | React.TouchEvent) => void; /** Handle move during drag */ handleMove: (event: MouseEvent | TouchEvent) => void; /** Handle drag end */ @@ -78,12 +82,22 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet const [cropArea, setCropAreaState] = useState( getAspectRatioAdjustedCrop(initialCrop) ); + const [cropDrawn, setCropDrawn] = useState(false); const [isDragging, setIsDragging] = useState(false); const activeHandleRef = useRef(null); const startPosRef = useRef({ x: 0, y: 0 }); const startCropRef = useRef(cropArea); - const containerRef = useRef<{ width: number; height: number }>({ width: 100, height: 100 }); + const containerRef = useRef<{ width: number; height: number; left: number; top: number }>({ + width: 100, height: 100, left: 0, top: 0, + }); + const isDrawingRef = useRef(false); + const drawOriginRef = useRef({ x: 0, y: 0 }); + const cropAreaRef = useRef(cropArea); + + useEffect(() => { + cropAreaRef.current = cropArea; + }, [cropArea]); const setCropArea = useCallback( (area: CropArea) => { @@ -94,6 +108,7 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet const resetCrop = useCallback(() => { setCropAreaState(getAspectRatioAdjustedCrop(initialCrop)); + setCropDrawn(false); }, [initialCrop, getAspectRatioAdjustedCrop]); const constrainCrop = useCallback( @@ -136,19 +151,75 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet const container = target.closest('[data-crop-container]'); if (container) { const rect = container.getBoundingClientRect(); - containerRef.current = { width: rect.width, height: rect.height }; + containerRef.current = { width: rect.width, height: rect.height, left: rect.left, top: rect.top }; } }, [cropArea] ); - const handleMove = useCallback( - (event: MouseEvent | TouchEvent) => { - if (!activeHandleRef.current || !isDragging) return; + const handleDrawStart = useCallback( + (event: React.MouseEvent | React.TouchEvent) => { + event.preventDefault(); + + const target = event.currentTarget as HTMLElement; + const container = target.closest('[data-crop-container]'); + if (!container) return; + + const rect = container.getBoundingClientRect(); + containerRef.current = { width: rect.width, height: rect.height, left: rect.left, top: rect.top }; const clientX = 'touches' in event ? event.touches[0].clientX : event.clientX; const clientY = 'touches' in event ? event.touches[0].clientY : event.clientY; + const x = Math.max(0, Math.min(100, ((clientX - rect.left) / rect.width) * 100)); + const y = Math.max(0, Math.min(100, ((clientY - rect.top) / rect.height) * 100)); + + startPosRef.current = { x: clientX, y: clientY }; + drawOriginRef.current = { x, y }; + + setCropAreaState({ x, y, width: 0, height: 0 }); + + isDrawingRef.current = true; + activeHandleRef.current = null; + setIsDragging(true); + }, + [] + ); + + const handleMove = useCallback( + (event: MouseEvent | TouchEvent) => { + if (!isDragging) return; + + const clientX = 'touches' in event ? event.touches[0].clientX : event.clientX; + const clientY = 'touches' in event ? event.touches[0].clientY : event.clientY; + + // Free-form drawing mode: compute rectangle from origin to current pointer + if (isDrawingRef.current) { + const currentX = Math.max(0, Math.min(100, + ((clientX - containerRef.current.left) / containerRef.current.width) * 100)); + const currentY = Math.max(0, Math.min(100, + ((clientY - containerRef.current.top) / containerRef.current.height) * 100)); + + const originX = drawOriginRef.current.x; + const originY = drawOriginRef.current.y; + + let newCrop: CropArea = { + x: Math.min(originX, currentX), + y: Math.min(originY, currentY), + width: Math.abs(currentX - originX), + height: Math.abs(currentY - originY), + }; + + if (aspectRatio) { + newCrop.height = newCrop.width / aspectRatio; + } + + setCropAreaState(newCrop); + return; + } + + if (!activeHandleRef.current) return; + // Calculate delta as percentage of container const deltaX = ((clientX - startPosRef.current.x) / containerRef.current.width) * 100; const deltaY = ((clientY - startPosRef.current.y) / containerRef.current.height) * 100; @@ -234,13 +305,20 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet setCropAreaState(constrainCrop(newCrop)); }, - [isDragging, constrainCrop] + [isDragging, constrainCrop, aspectRatio] ); const handleDragEnd = useCallback(() => { + if (isDrawingRef.current) { + isDrawingRef.current = false; + const area = cropAreaRef.current; + if (area.width >= minSize && area.height >= minSize) { + setCropDrawn(true); + } + } activeHandleRef.current = null; setIsDragging(false); - }, []); + }, [minSize]); // Add global event listeners for drag useEffect(() => { @@ -304,7 +382,7 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet } }, mimeType, - 0.92 + 0.95 ); }; @@ -320,11 +398,13 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet return { cropArea, + cropDrawn, isDragging, setCropArea, resetCrop, executeCrop, handleDragStart, + handleDrawStart, handleMove, handleDragEnd, }; diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py index 37fdad1..1edca3f 100644 --- a/ocr/app/extractors/vin_extractor.py +++ b/ocr/app/extractors/vin_extractor.py @@ -1,8 +1,10 @@ """VIN-specific OCR extractor with preprocessing and validation.""" import io import logging +import os import time from dataclasses import dataclass, field +from datetime import datetime from typing import Optional import magic @@ -57,9 +59,31 @@ class VinExtractor(BaseExtractor): # VIN character whitelist for Tesseract VIN_WHITELIST = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789" + # Fixed debug output directory (inside container) + DEBUG_DIR = "/tmp/vin-debug" + def __init__(self) -> None: """Initialize VIN extractor.""" pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd + self._debug = settings.log_level.upper() == "DEBUG" + + def _save_debug_image(self, session_dir: str, name: str, data: bytes) -> None: + """Save image bytes to the debug session directory when LOG_LEVEL=debug.""" + if not self._debug: + return + path = os.path.join(session_dir, name) + with open(path, "wb") as f: + f.write(data) + logger.debug("Saved debug image: %s (%d bytes)", name, len(data)) + + def _create_debug_session(self) -> Optional[str]: + """Create a timestamped debug directory. Returns path or None.""" + if not self._debug: + return None + ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f") + session_dir = os.path.join(self.DEBUG_DIR, ts) + os.makedirs(session_dir, exist_ok=True) + return session_dir def extract( self, image_bytes: bytes, content_type: Optional[str] = None @@ -89,21 +113,91 @@ class VinExtractor(BaseExtractor): ) try: + debug_session = self._create_debug_session() + + logger.debug( + "VIN extraction input: %d bytes, content_type=%s", + len(image_bytes), content_type, + ) + if debug_session: + self._save_debug_image(debug_session, "01_original.jpg", image_bytes) + # Apply VIN-optimized preprocessing preprocessing_result = vin_preprocessor.preprocess(image_bytes) preprocessed_bytes = preprocessing_result.image_bytes + logger.debug( + "Preprocessing steps: %s", preprocessing_result.preprocessing_applied + ) + if debug_session: + self._save_debug_image( + debug_session, "02_preprocessed_adaptive.png", preprocessed_bytes + ) # Perform OCR with VIN-optimized settings raw_text, word_confidences = self._perform_ocr(preprocessed_bytes) + logger.debug("PSM 6 raw text: '%s'", raw_text) + logger.debug("PSM 6 word confidences: %s", word_confidences) # Extract VIN candidates from raw text candidates = vin_validator.extract_candidates(raw_text) + logger.debug("PSM 6 candidates: %s", candidates) if not candidates: # No VIN candidates found - try with different PSM modes candidates = self._try_alternate_ocr(preprocessed_bytes) if not candidates: + # Try grayscale-only (no thresholding) — the Tesseract + # LSTM engine often performs better on non-binarized input + # because it does its own internal preprocessing. + gray_result = vin_preprocessor.preprocess( + image_bytes, apply_threshold=False + ) + logger.debug( + "Grayscale preprocessing steps: %s", + gray_result.preprocessing_applied, + ) + if debug_session: + self._save_debug_image( + debug_session, "04_preprocessed_gray.png", + gray_result.image_bytes, + ) + + raw_text, word_confidences = self._perform_ocr( + gray_result.image_bytes + ) + logger.debug("Gray PSM 6 raw text: '%s'", raw_text) + candidates = vin_validator.extract_candidates(raw_text) + logger.debug("Gray PSM 6 candidates: %s", candidates) + if not candidates: + candidates = self._try_alternate_ocr( + gray_result.image_bytes, prefix="Gray" + ) + + if not candidates: + # Try alternative preprocessing (Otsu's thresholding) + otsu_result = vin_preprocessor.preprocess_otsu(image_bytes) + logger.debug( + "Otsu preprocessing steps: %s", + otsu_result.preprocessing_applied, + ) + if debug_session: + self._save_debug_image( + debug_session, "03_preprocessed_otsu.png", + otsu_result.image_bytes, + ) + + raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes) + logger.debug("Otsu PSM 6 raw text: '%s'", raw_text) + candidates = vin_validator.extract_candidates(raw_text) + logger.debug("Otsu PSM 6 candidates: %s", candidates) + if not candidates: + candidates = self._try_alternate_ocr( + otsu_result.image_bytes, prefix="Otsu" + ) + + if not candidates: + logger.debug("No VIN pattern found in any OCR attempt") return VinExtractionResult( success=False, error="No VIN pattern found in image", @@ -153,8 +247,12 @@ class VinExtractor(BaseExtractor): processing_time_ms = int((time.time() - start_time) * 1000) logger.info( - f"VIN extraction: {primary_vin}, confidence={primary_confidence:.2%}, " - f"time={processing_time_ms}ms" + "VIN extraction: %s, confidence=%.2f%%, time=%dms", + primary_vin, primary_confidence * 100, processing_time_ms, + ) + logger.debug( + "VIN alternatives: %s", + [(a.vin, a.confidence) for a in alternatives], ) return VinExtractionResult( @@ -168,7 +266,7 @@ class VinExtractor(BaseExtractor): ) except Exception as e: - logger.error(f"VIN extraction failed: {e}", exc_info=True) + logger.error("VIN extraction failed: %s", e, exc_info=True) return VinExtractionResult( success=False, error=str(e), @@ -200,10 +298,15 @@ class VinExtractor(BaseExtractor): image = Image.open(io.BytesIO(image_bytes)) # Configure Tesseract for VIN extraction - # Use character whitelist to exclude I, O, Q + # OEM 1 = LSTM neural network engine (best accuracy) + # NOTE: tessedit_char_whitelist does NOT work with OEM 1 (LSTM). + # Using it causes empty/erratic output. Character filtering is + # handled post-OCR by vin_validator.correct_ocr_errors() instead. config = ( f"--psm {psm} " - f"-c tessedit_char_whitelist={self.VIN_WHITELIST}" + f"--oem 1 " + f"-c load_system_dawg=false " + f"-c load_freq_dawg=false" ) # Get detailed OCR data @@ -224,24 +327,31 @@ class VinExtractor(BaseExtractor): raw_text = " ".join(words) return raw_text, confidences - def _try_alternate_ocr(self, image_bytes: bytes) -> list[tuple[str, int, int]]: + def _try_alternate_ocr( + self, + image_bytes: bytes, + prefix: str = "", + ) -> list[tuple[str, int, int]]: """ Try alternate OCR configurations when initial extraction fails. + PSM modes tried in order: + 7 - Single text line + 8 - Single word + 11 - Sparse text (finds text in any order, good for angled photos) + 13 - Raw line (no Tesseract heuristics, good for clean VIN plates) + Returns: List of VIN candidates """ - # Try PSM 7 (single text line) - raw_text, _ = self._perform_ocr(image_bytes, psm=7) - candidates = vin_validator.extract_candidates(raw_text) - if candidates: - return candidates - - # Try PSM 8 (single word) - raw_text, _ = self._perform_ocr(image_bytes, psm=8) - candidates = vin_validator.extract_candidates(raw_text) - if candidates: - return candidates + tag = f"{prefix} " if prefix else "" + for psm in (7, 8, 11, 13): + raw_text, _ = self._perform_ocr(image_bytes, psm=psm) + logger.debug("%sPSM %d raw text: '%s'", tag, psm, raw_text) + candidates = vin_validator.extract_candidates(raw_text) + logger.debug("%sPSM %d candidates: %s", tag, psm, candidates) + if candidates: + return candidates return [] diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py index e0ffbba..290fb5b 100644 --- a/ocr/app/preprocessors/vin_preprocessor.py +++ b/ocr/app/preprocessors/vin_preprocessor.py @@ -86,13 +86,17 @@ class VinPreprocessor: if len(cv_image.shape) == 3: cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) - # Convert to grayscale + # Convert to grayscale using best-contrast channel selection if len(cv_image.shape) == 3: - gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) + gray = self._best_contrast_channel(cv_image) else: gray = cv_image steps_applied.append("grayscale") + # Upscale small images for better OCR (Tesseract needs ~300 DPI) + gray = self._ensure_minimum_resolution(gray) + steps_applied.append("resolution_check") + # Apply deskew if apply_deskew: gray = self._deskew(gray) @@ -111,6 +115,7 @@ class VinPreprocessor: # Apply adaptive thresholding if apply_threshold: gray = self._adaptive_threshold(gray) + gray = self._morphological_cleanup(gray) steps_applied.append("threshold") # Convert back to PNG bytes @@ -123,6 +128,58 @@ class VinPreprocessor: preprocessing_applied=steps_applied, ) + # Minimum width in pixels for reliable VIN OCR. + # A 17-char VIN needs ~30px per character for Tesseract accuracy. + MIN_WIDTH_FOR_VIN = 600 + + def _ensure_minimum_resolution(self, image: np.ndarray) -> np.ndarray: + """ + Upscale image if too small for reliable OCR. + + Tesseract works best at ~300 DPI. Mobile photos of VINs may have + the text occupy only a small portion of the frame, resulting in + low effective resolution for the VIN characters. + """ + height, width = image.shape[:2] + if width < self.MIN_WIDTH_FOR_VIN: + scale = self.MIN_WIDTH_FOR_VIN / width + new_width = int(width * scale) + new_height = int(height * scale) + image = cv2.resize( + image, (new_width, new_height), interpolation=cv2.INTER_CUBIC + ) + logger.debug(f"Upscaled image from {width}x{height} to {new_width}x{new_height}") + return image + + def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray: + """ + Compute a grayscale image with dark text on light background. + + Uses inverted per-pixel minimum across B, G, R channels. + White text has min(255,255,255) = 255 → inverted to 0 (black). + Colored backgrounds have a low min value (e.g. green sticker: + min(130,230,150) = 130) → inverted to 125 (medium gray). + + The inversion ensures Tesseract always receives dark-text-on- + light-background, which is the polarity it expects. + """ + b_channel, g_channel, r_channel = cv2.split(bgr_image) + + min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel) + + # Invert so white text (min=255) becomes black (0) and colored + # backgrounds (min~130) become lighter gray (~125). Tesseract + # expects dark text on light background. + inverted = cv2.bitwise_not(min_channel) + + gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) + logger.debug( + "Channel contrast: inverted-min std=%.1f, grayscale std=%.1f", + float(np.std(inverted)), float(np.std(gray)), + ) + + return inverted + def _apply_clahe(self, image: np.ndarray) -> np.ndarray: """ Apply CLAHE (Contrast Limited Adaptive Histogram Equalization). @@ -242,6 +299,84 @@ class VinPreprocessor: logger.warning(f"Adaptive threshold failed: {e}") return image + def _morphological_cleanup(self, image: np.ndarray) -> np.ndarray: + """ + Remove small noise artifacts from a thresholded binary image. + + Morphological opening (erosion then dilation) removes isolated + pixels and thin noise lines while preserving larger text characters. + """ + try: + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) + return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel) + except cv2.error as e: + logger.warning(f"Morphological cleanup failed: {e}") + return image + + def _otsu_threshold(self, image: np.ndarray) -> np.ndarray: + """ + Apply Otsu's thresholding for binarization. + + Otsu's method auto-calculates the optimal threshold value, + which can work better than adaptive thresholding on evenly-lit images. + """ + try: + _, result = cv2.threshold( + image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU + ) + return result + except cv2.error as e: + logger.warning(f"Otsu threshold failed: {e}") + return image + + def preprocess_otsu(self, image_bytes: bytes) -> PreprocessingResult: + """ + Alternative preprocessing pipeline using Otsu's thresholding. + + Used as a fallback when adaptive thresholding doesn't produce + good OCR results. + """ + steps_applied = [] + + pil_image = Image.open(io.BytesIO(image_bytes)) + steps_applied.append("loaded") + + if pil_image.mode not in ("RGB", "L"): + pil_image = pil_image.convert("RGB") + steps_applied.append("convert_rgb") + + cv_image = np.array(pil_image) + if len(cv_image.shape) == 3: + cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) + + if len(cv_image.shape) == 3: + gray = self._best_contrast_channel(cv_image) + else: + gray = cv_image + steps_applied.append("grayscale") + + gray = self._ensure_minimum_resolution(gray) + steps_applied.append("resolution_check") + + gray = self._apply_clahe(gray) + steps_applied.append("clahe") + + gray = self._denoise(gray) + steps_applied.append("denoise") + + gray = self._otsu_threshold(gray) + gray = self._morphological_cleanup(gray) + steps_applied.append("otsu_threshold") + + result_image = Image.fromarray(gray) + buffer = io.BytesIO() + result_image.save(buffer, format="PNG") + + return PreprocessingResult( + image_bytes=buffer.getvalue(), + preprocessing_applied=steps_applied, + ) + def detect_vin_region(self, image_bytes: bytes) -> Optional[BoundingBox]: """ Attempt to detect the VIN region in an image. diff --git a/ocr/app/validators/vin_validator.py b/ocr/app/validators/vin_validator.py index 6a4b264..c9c60ef 100644 --- a/ocr/app/validators/vin_validator.py +++ b/ocr/app/validators/vin_validator.py @@ -20,7 +20,9 @@ class VinValidator: # VIN character set (excludes I, O, Q) VALID_CHARS = set("ABCDEFGHJKLMNPRSTUVWXYZ0123456789") - # Common OCR misreads and their corrections + # Common OCR misreads and their corrections. + # Only map characters that are INVALID in VINs to their likely correct values. + # B and S are valid VIN characters and must NOT be transliterated. TRANSLITERATION = { "I": "1", "O": "0", @@ -29,9 +31,6 @@ class VinValidator: "o": "0", "q": "0", "l": "1", - "L": "1", - "B": "8", # Sometimes confused - "S": "5", # Sometimes confused } # Weights for check digit calculation (positions 1-17) @@ -224,6 +223,11 @@ class VinValidator: """ Extract VIN candidates from raw OCR text. + Uses two strategies: + 1. Find continuous 11-20 char alphanumeric runs (handles intact VINs) + 2. Concatenate adjacent short fragments separated by spaces/dashes + (handles Tesseract fragmenting VINs into multiple words) + Args: text: Raw OCR text max_candidates: Maximum number of candidates to return @@ -231,29 +235,104 @@ class VinValidator: Returns: List of (vin, start_pos, end_pos) tuples """ - # Pattern to find potential VIN sequences - # Allow some flexibility for OCR errors (include I, O, Q for correction later) - potential_vin_pattern = re.compile(r"[A-Z0-9IOQ]{11,17}", re.IGNORECASE) - candidates = [] - for match in potential_vin_pattern.finditer(text.upper()): - candidate = match.group() - corrected = self.correct_ocr_errors(candidate) + seen_vins: set[str] = set() - # Only include if it could be a valid VIN after correction - if len(corrected) == 17 and self.MODERN_VIN_PATTERN.match(corrected): - candidates.append((corrected, match.start(), match.end())) + upper_text = text.upper() - # Sort by likelihood of being valid (check digit validation) + # Strategy 1: Find continuous runs of VIN-like characters + continuous_pattern = re.compile(r"[A-Z0-9IOQ]{11,20}", re.IGNORECASE) + for match in continuous_pattern.finditer(upper_text): + self._try_add_candidate( + match.group(), match.start(), match.end(), candidates, seen_vins + ) + + # Strategy 2: Concatenate adjacent alphanumeric fragments + # This handles OCR fragmentation like "1HGBH 41JXMN 109186" + # Only consider fragments >= 3 chars (filters out noise/short words) + fragment_pattern = re.compile(r"[A-Z0-9IOQ]{3,}", re.IGNORECASE) + fragments = [ + (m.group(), m.start(), m.end()) + for m in fragment_pattern.finditer(upper_text) + ] + + # Try sliding windows of 2-4 adjacent fragments + for window_size in range(2, min(5, len(fragments) + 1)): + for i in range(len(fragments) - window_size + 1): + window = fragments[i : i + window_size] + combined = "".join(f[0] for f in window) + # Combined length must be close to 17 (allow +/- 2 for OCR noise) + # Must contain at least 2 digit characters (VINs always have digits; + # pure-alphabetic text is almost certainly not a VIN) + if 15 <= len(combined) <= 19 and sum(c.isdigit() for c in combined) >= 2: + self._try_add_candidate( + combined, window[0][1], window[-1][2], candidates, seen_vins + ) + + # Sort by likelihood of being valid (check digit first, then position) def score_candidate(c: tuple[str, int, int]) -> int: vin = c[0] if self.validate_check_digit(vin): - return 0 # Best score + return 0 return 1 candidates.sort(key=score_candidate) return candidates[:max_candidates] + def _try_add_candidate( + self, + raw: str, + start: int, + end: int, + candidates: list[tuple[str, int, int]], + seen_vins: set[str], + ) -> None: + """Try to add a corrected VIN candidate if it passes validation.""" + corrected = self.correct_ocr_errors(raw) + + if len(corrected) == 17: + self._add_if_valid(corrected, start, end, candidates, seen_vins) + return + + if len(corrected) > 17: + # Strategy A: try every 17-char sliding window + for i in range(len(corrected) - 16): + window = corrected[i : i + 17] + self._add_if_valid(window, start, end, candidates, seen_vins) + + # Strategy B: for 18-19 char strings, try deleting each + # character one at a time. OCR often inserts a spurious + # character (e.g. sticker border read as 'C') that breaks + # the VIN. Check-digit validation filters out false hits. + if len(corrected) <= 19: + for i in range(len(corrected)): + reduced = corrected[:i] + corrected[i + 1 :] + if len(reduced) == 17: + self._add_if_valid( + reduced, start, end, candidates, seen_vins + ) + elif len(reduced) == 18: + # Two deletions needed — try removing one more + for j in range(len(reduced)): + reduced2 = reduced[:j] + reduced[j + 1 :] + self._add_if_valid( + reduced2, start, end, candidates, seen_vins + ) + + def _add_if_valid( + self, + vin: str, + start: int, + end: int, + candidates: list[tuple[str, int, int]], + seen_vins: set[str], + ) -> None: + """Add a 17-char VIN to candidates if it matches the pattern.""" + if len(vin) == 17 and self.MODERN_VIN_PATTERN.match(vin): + if vin not in seen_vins: + seen_vins.add(vin) + candidates.append((vin, start, end)) + # Singleton instance vin_validator = VinValidator() diff --git a/ocr/tests/test_vin_preprocessor.py b/ocr/tests/test_vin_preprocessor.py index 8076294..2d81a7b 100644 --- a/ocr/tests/test_vin_preprocessor.py +++ b/ocr/tests/test_vin_preprocessor.py @@ -53,6 +53,7 @@ class TestVinPreprocessor: ) assert "grayscale" in result.preprocessing_applied + assert "resolution_check" in result.preprocessing_applied assert "clahe" in result.preprocessing_applied assert "deskew" in result.preprocessing_applied assert "denoise" in result.preprocessing_applied @@ -185,6 +186,54 @@ class TestVinPreprocessorThreshold: assert len(unique_values) <= 2 +class TestVinPreprocessorOtsu: + """Tests for Otsu's thresholding preprocessing.""" + + def test_otsu_threshold_creates_binary_image(self) -> None: + """Test Otsu's thresholding creates binary output.""" + preprocessor = VinPreprocessor() + image = np.full((100, 400), 128, dtype=np.uint8) + + result = preprocessor._otsu_threshold(image) + + unique_values = np.unique(result) + assert len(unique_values) <= 2 + + def test_preprocess_otsu_returns_result(self) -> None: + """Test Otsu preprocessing pipeline returns valid result.""" + preprocessor = VinPreprocessor() + image_bytes = create_test_image() + + result = preprocessor.preprocess_otsu(image_bytes) + + assert result.image_bytes is not None + assert len(result.image_bytes) > 0 + assert "otsu_threshold" in result.preprocessing_applied + assert "grayscale" in result.preprocessing_applied + + +class TestVinPreprocessorResolution: + """Tests for resolution upscaling.""" + + def test_upscale_small_image(self) -> None: + """Test small images are upscaled.""" + preprocessor = VinPreprocessor() + small_image = np.full((50, 200), 128, dtype=np.uint8) + + result = preprocessor._ensure_minimum_resolution(small_image) + + assert result.shape[1] >= preprocessor.MIN_WIDTH_FOR_VIN + + def test_no_upscale_large_image(self) -> None: + """Test large images are not upscaled.""" + preprocessor = VinPreprocessor() + large_image = np.full((200, 800), 128, dtype=np.uint8) + + result = preprocessor._ensure_minimum_resolution(large_image) + + assert result.shape == large_image.shape + + class TestVinRegionDetection: """Tests for VIN region detection.""" diff --git a/ocr/tests/test_vin_validator.py b/ocr/tests/test_vin_validator.py index 26f170b..241eabd 100644 --- a/ocr/tests/test_vin_validator.py +++ b/ocr/tests/test_vin_validator.py @@ -43,9 +43,9 @@ class TestVinValidator: result = validator.calculate_check_digit("1HGBH41JXMN109186") assert result == "X" - # 5YJSA1E28HF123456 has check digit 2 at position 9 + # 5YJSA1E28HF123456 has check digit at position 9 result = validator.calculate_check_digit("5YJSA1E28HF123456") - assert result == "8" # Verify this is correct for this VIN + assert result == "5" def test_validate_check_digit_valid(self) -> None: """Test check digit validation with valid VIN.""" @@ -161,6 +161,27 @@ class TestVinValidator: assert len(candidates) >= 1 assert candidates[0][0] == "1HGBH41JXMN109186" + def test_extract_candidates_fragmented_vin(self) -> None: + """Test candidate extraction handles space-fragmented VINs from OCR.""" + validator = VinValidator() + + # Tesseract often fragments VINs into multiple words + text = "1HGBH 41JXMN 109186" + candidates = validator.extract_candidates(text) + + assert len(candidates) >= 1 + assert candidates[0][0] == "1HGBH41JXMN109186" + + def test_extract_candidates_dash_fragmented_vin(self) -> None: + """Test candidate extraction handles dash-separated VINs.""" + validator = VinValidator() + + text = "1HGBH41J-XMN109186" + candidates = validator.extract_candidates(text) + + assert len(candidates) >= 1 + assert candidates[0][0] == "1HGBH41JXMN109186" + def test_extract_candidates_no_vin(self) -> None: """Test candidate extraction with no VIN.""" validator = VinValidator()