fix: VIN OCR scanning fails with "No VIN Pattern found" on all images (#113) #114

Merged
egullickson merged 15 commits from issue-113-fix-vin-ocr-scanning into main 2026-02-07 15:47:37 +00:00
11 changed files with 736 additions and 173 deletions

View File

@@ -5,7 +5,8 @@
"host": "gitea",
"owner": "egullickson",
"repo": "motovaultpro",
"url": "https://git.motovaultpro.com"
"url": "https://git.motovaultpro.com",
"default_branch": "main"
},
"ai_quick_start": {
"load_order": [

View File

@@ -52,7 +52,7 @@ global:
# Logging
log:
level: INFO
level: DEBUG
format: json
# Access logs

View File

@@ -9,16 +9,13 @@
# - Development-specific settings
services:
# PostgreSQL - Remove development port exposure
mvp-postgres:
ports: []
# Redis - Remove development port exposure
mvp-redis:
ports: []
# Traefik - Ensure dashboard authentication is enforced
# Traefik - Production log level and dashboard auth
mvp-traefik:
environment:
LOG_LEVEL: error
command:
- --configFile=/etc/traefik/traefik.yml
- --log.level=ERROR
labels:
- "traefik.enable=true"
- "traefik.http.routers.traefik-dashboard.rule=Host(`traefik.motovaultpro.local`)"
@@ -26,3 +23,45 @@ services:
- "traefik.http.routers.traefik-dashboard.middlewares=dashboard-auth"
- "traefik.http.services.traefik-dashboard.loadbalancer.server.port=8080"
- "traefik.http.middlewares.dashboard-auth.basicauth.users=admin:$$2y$$10$$foobar"
# Backend - Production log level
mvp-backend:
environment:
NODE_ENV: production
CONFIG_PATH: /app/config/production.yml
SECRETS_DIR: /run/secrets
LOG_LEVEL: error
DATABASE_HOST: mvp-postgres
REDIS_HOST: mvp-redis
STRIPE_PRO_MONTHLY_PRICE_ID: prod_Toj6BG9Z9JwREl
STRIPE_PRO_YEARLY_PRICE_ID: prod_Toj8oo0RpVBQmB
STRIPE_ENTERPRISE_MONTHLY_PRICE_ID: prod_Toj8xGEui9jl6j
STRIPE_ENTERPRISE_YEARLY_PRICE_ID: prod_Toj9A7A773xrdn
# OCR - Production log level
mvp-ocr:
environment:
LOG_LEVEL: error
REDIS_HOST: mvp-redis
REDIS_PORT: 6379
REDIS_DB: 1
# PostgreSQL - Remove dev ports, production log level
mvp-postgres:
ports: []
environment:
POSTGRES_DB: motovaultpro
POSTGRES_USER: postgres
POSTGRES_PASSWORD_FILE: /run/secrets/postgres-password
POSTGRES_INITDB_ARGS: --encoding=UTF8
LOG_LEVEL: error
POSTGRES_LOG_STATEMENT: none
POSTGRES_LOG_MIN_DURATION_STATEMENT: -1
PGDATA: /var/lib/postgresql/data
# Redis - Remove dev ports, production log level
mvp-redis:
ports: []
command: redis-server --appendonly yes --loglevel ${LOG_LEVEL:-info}
environment:
LOG_LEVEL: error

View File

@@ -11,6 +11,7 @@ services:
command:
- --configFile=/etc/traefik/traefik.yml
environment:
LOG_LEVEL: debug
CLOUDFLARE_DNS_API_TOKEN_FILE: /run/secrets/cloudflare-dns-token
ports:
- "80:80"
@@ -112,6 +113,7 @@ services:
NODE_ENV: production
CONFIG_PATH: /app/config/production.yml
SECRETS_DIR: /run/secrets
LOG_LEVEL: debug
# Service references
DATABASE_HOST: mvp-postgres
REDIS_HOST: mvp-redis
@@ -187,10 +189,12 @@ services:
container_name: mvp-ocr
restart: unless-stopped
environment:
LOG_LEVEL: info
LOG_LEVEL: debug
REDIS_HOST: mvp-redis
REDIS_PORT: 6379
REDIS_DB: 1
volumes:
- /tmp/vin-debug:/tmp/vin-debug
networks:
- backend
- database
@@ -218,8 +222,10 @@ services:
POSTGRES_USER: postgres
POSTGRES_PASSWORD_FILE: /run/secrets/postgres-password
POSTGRES_INITDB_ARGS: --encoding=UTF8
POSTGRES_LOG_STATEMENT: ${POSTGRES_LOG_STATEMENT:-ddl}
POSTGRES_LOG_MIN_DURATION_STATEMENT: ${POSTGRES_LOG_MIN_DURATION:-500}
LOG_LEVEL: debug
POSTGRES_LOG_STATEMENT: all
POSTGRES_LOG_MIN_DURATION_STATEMENT: 0
PGDATA: /var/lib/postgresql/data
volumes:
- mvp_postgres_data:/var/lib/postgresql/data
# Secrets (K8s Secrets equivalent)
@@ -245,7 +251,9 @@ services:
image: ${REGISTRY_MIRRORS:-git.motovaultpro.com/egullickson/mirrors}/redis:8.4-alpine
container_name: mvp-redis
restart: unless-stopped
command: redis-server --appendonly yes --loglevel ${REDIS_LOGLEVEL:-notice}
command: redis-server --appendonly yes --loglevel ${LOG_LEVEL:-info}
environment:
LOG_LEVEL: debug
volumes:
- mvp_redis_data:/data
networks:

View File

@@ -3,7 +3,7 @@
* @ai-context Allows user to adjust crop area with touch/mouse, confirm or retake
*/
import React, { useCallback, useState } from 'react';
import React, { useCallback, useState, useRef, useEffect } from 'react';
import { Box, IconButton, Button, Typography, CircularProgress } from '@mui/material';
import CheckIcon from '@mui/icons-material/Check';
import RefreshIcon from '@mui/icons-material/Refresh';
@@ -22,12 +22,30 @@ export const CropTool: React.FC<CropToolProps> = ({
onSkip,
}) => {
const [isProcessing, setIsProcessing] = useState(false);
const imageAreaRef = useRef<HTMLDivElement>(null);
const [imageMaxHeight, setImageMaxHeight] = useState(0);
const { cropArea, isDragging, resetCrop, executeCrop, handleDragStart } =
const { cropArea, cropDrawn, isDragging, resetCrop, executeCrop, handleDragStart, handleDrawStart } =
useImageCrop({
aspectRatio: lockAspectRatio ? aspectRatio : undefined,
});
const showCropArea = cropDrawn || (isDragging && cropArea.width > 1 && cropArea.height > 1);
// Measure available height for the image so the crop container
// matches the rendered image exactly (fixes mobile crop offset)
useEffect(() => {
const updateMaxHeight = () => {
if (imageAreaRef.current) {
const rect = imageAreaRef.current.getBoundingClientRect();
setImageMaxHeight(rect.height - 32); // subtract p:2 padding (16px * 2)
}
};
updateMaxHeight();
window.addEventListener('resize', updateMaxHeight);
return () => window.removeEventListener('resize', updateMaxHeight);
}, []);
const handleConfirm = useCallback(async () => {
setIsProcessing(true);
try {
@@ -61,6 +79,7 @@ export const CropTool: React.FC<CropToolProps> = ({
>
{/* Image with crop overlay */}
<Box
ref={imageAreaRef}
sx={{
flex: 1,
position: 'relative',
@@ -75,8 +94,6 @@ export const CropTool: React.FC<CropToolProps> = ({
data-crop-container
sx={{
position: 'relative',
maxWidth: '100%',
maxHeight: '100%',
userSelect: 'none',
touchAction: isDragging ? 'none' : 'auto',
}}
@@ -87,132 +104,156 @@ export const CropTool: React.FC<CropToolProps> = ({
alt="Captured"
style={{
maxWidth: '100%',
maxHeight: '100%',
maxHeight: imageMaxHeight > 0 ? `${imageMaxHeight}px` : '70vh',
display: 'block',
}}
draggable={false}
/>
{/* Draw surface for free-form rectangle drawing */}
{!cropDrawn && (
<Box
onMouseDown={handleDrawStart}
onTouchStart={handleDrawStart}
sx={{
position: 'absolute',
inset: 0,
cursor: 'crosshair',
zIndex: 5,
touchAction: 'none',
}}
/>
)}
{/* Dark overlay outside crop area */}
<Box
sx={{
position: 'absolute',
inset: 0,
pointerEvents: 'none',
}}
>
{/* Top overlay */}
<Box
sx={{
position: 'absolute',
top: 0,
left: 0,
right: 0,
height: `${cropArea.y}%`,
backgroundColor: 'rgba(0, 0, 0, 0.6)',
}}
/>
{/* Bottom overlay */}
<Box
sx={{
position: 'absolute',
bottom: 0,
left: 0,
right: 0,
height: `${100 - cropArea.y - cropArea.height}%`,
backgroundColor: 'rgba(0, 0, 0, 0.6)',
}}
/>
{/* Left overlay */}
<Box
sx={{
position: 'absolute',
top: `${cropArea.y}%`,
left: 0,
width: `${cropArea.x}%`,
height: `${cropArea.height}%`,
backgroundColor: 'rgba(0, 0, 0, 0.6)',
}}
/>
{/* Right overlay */}
<Box
sx={{
position: 'absolute',
top: `${cropArea.y}%`,
right: 0,
width: `${100 - cropArea.x - cropArea.width}%`,
height: `${cropArea.height}%`,
backgroundColor: 'rgba(0, 0, 0, 0.6)',
}}
/>
</Box>
{/* Crop area with handles */}
<Box
sx={{
position: 'absolute',
top: `${cropArea.y}%`,
left: `${cropArea.x}%`,
width: `${cropArea.width}%`,
height: `${cropArea.height}%`,
border: '2px solid white',
boxSizing: 'border-box',
}}
>
{/* Move handle (center area) */}
<CropHandleArea
handle="move"
onDragStart={handleDragStart}
sx={{
position: 'absolute',
inset: 8,
cursor: 'move',
}}
/>
{/* Corner handles */}
<CropHandle handle="nw" onDragStart={handleDragStart} position="top-left" />
<CropHandle handle="ne" onDragStart={handleDragStart} position="top-right" />
<CropHandle handle="sw" onDragStart={handleDragStart} position="bottom-left" />
<CropHandle handle="se" onDragStart={handleDragStart} position="bottom-right" />
{/* Edge handles */}
<CropHandle handle="n" onDragStart={handleDragStart} position="top" />
<CropHandle handle="s" onDragStart={handleDragStart} position="bottom" />
<CropHandle handle="w" onDragStart={handleDragStart} position="left" />
<CropHandle handle="e" onDragStart={handleDragStart} position="right" />
{/* Grid lines for alignment */}
{showCropArea && (
<Box
sx={{
position: 'absolute',
inset: 0,
display: 'grid',
gridTemplateColumns: '1fr 1fr 1fr',
gridTemplateRows: '1fr 1fr 1fr',
pointerEvents: 'none',
opacity: isDragging ? 1 : 0.5,
transition: 'opacity 0.2s',
}}
>
{Array.from({ length: 9 }).map((_, i) => (
<Box
key={i}
sx={{
borderRight: i % 3 !== 2 ? '1px solid rgba(255,255,255,0.3)' : 'none',
borderBottom: i < 6 ? '1px solid rgba(255,255,255,0.3)' : 'none',
}}
/>
))}
{/* Top overlay */}
<Box
sx={{
position: 'absolute',
top: 0,
left: 0,
right: 0,
height: `${cropArea.y}%`,
backgroundColor: 'rgba(0, 0, 0, 0.6)',
}}
/>
{/* Bottom overlay */}
<Box
sx={{
position: 'absolute',
bottom: 0,
left: 0,
right: 0,
height: `${100 - cropArea.y - cropArea.height}%`,
backgroundColor: 'rgba(0, 0, 0, 0.6)',
}}
/>
{/* Left overlay */}
<Box
sx={{
position: 'absolute',
top: `${cropArea.y}%`,
left: 0,
width: `${cropArea.x}%`,
height: `${cropArea.height}%`,
backgroundColor: 'rgba(0, 0, 0, 0.6)',
}}
/>
{/* Right overlay */}
<Box
sx={{
position: 'absolute',
top: `${cropArea.y}%`,
right: 0,
width: `${100 - cropArea.x - cropArea.width}%`,
height: `${cropArea.height}%`,
backgroundColor: 'rgba(0, 0, 0, 0.6)',
}}
/>
</Box>
</Box>
)}
{/* Crop area border and handles */}
{showCropArea && (
<Box
sx={{
position: 'absolute',
top: `${cropArea.y}%`,
left: `${cropArea.x}%`,
width: `${cropArea.width}%`,
height: `${cropArea.height}%`,
border: '2px solid white',
boxSizing: 'border-box',
}}
>
{/* Handles only appear after drawing is complete */}
{cropDrawn && (
<>
{/* Move handle (center area) */}
<CropHandleArea
handle="move"
onDragStart={handleDragStart}
sx={{
position: 'absolute',
inset: 8,
cursor: 'move',
}}
/>
{/* Corner handles */}
<CropHandle handle="nw" onDragStart={handleDragStart} position="top-left" />
<CropHandle handle="ne" onDragStart={handleDragStart} position="top-right" />
<CropHandle handle="sw" onDragStart={handleDragStart} position="bottom-left" />
<CropHandle handle="se" onDragStart={handleDragStart} position="bottom-right" />
{/* Edge handles */}
<CropHandle handle="n" onDragStart={handleDragStart} position="top" />
<CropHandle handle="s" onDragStart={handleDragStart} position="bottom" />
<CropHandle handle="w" onDragStart={handleDragStart} position="left" />
<CropHandle handle="e" onDragStart={handleDragStart} position="right" />
</>
)}
{/* Grid lines for alignment */}
<Box
sx={{
position: 'absolute',
inset: 0,
display: 'grid',
gridTemplateColumns: '1fr 1fr 1fr',
gridTemplateRows: '1fr 1fr 1fr',
pointerEvents: 'none',
opacity: isDragging ? 1 : 0.5,
transition: 'opacity 0.2s',
}}
>
{Array.from({ length: 9 }).map((_, i) => (
<Box
key={i}
sx={{
borderRight: i % 3 !== 2 ? '1px solid rgba(255,255,255,0.3)' : 'none',
borderBottom: i < 6 ? '1px solid rgba(255,255,255,0.3)' : 'none',
}}
/>
))}
</Box>
</Box>
)}
</Box>
</Box>
{/* Instructions */}
<Box sx={{ px: 2, py: 1, textAlign: 'center' }}>
<Typography variant="body2" sx={{ color: 'rgba(255, 255, 255, 0.7)' }}>
Drag to adjust crop area
{cropDrawn ? 'Drag handles to adjust crop area' : 'Tap and drag to select crop area'}
</Typography>
</Box>
@@ -240,7 +281,7 @@ export const CropTool: React.FC<CropToolProps> = ({
onClick={handleReset}
startIcon={<RefreshIcon />}
sx={{ color: 'white' }}
disabled={isProcessing}
disabled={isProcessing || !cropDrawn}
>
Reset
</Button>
@@ -256,7 +297,7 @@ export const CropTool: React.FC<CropToolProps> = ({
<IconButton
onClick={handleConfirm}
disabled={isProcessing}
disabled={isProcessing || !cropDrawn}
aria-label="Confirm crop"
sx={{
width: 56,

View File

@@ -18,16 +18,20 @@ interface UseImageCropOptions {
interface UseImageCropReturn {
/** Current crop area */
cropArea: CropArea;
/** Whether user has drawn a crop rectangle */
cropDrawn: boolean;
/** Whether user is actively dragging */
isDragging: boolean;
/** Set crop area */
setCropArea: (area: CropArea) => void;
/** Reset crop to initial/default */
/** Reset crop to drawing mode */
resetCrop: () => void;
/** Execute crop and return cropped blob */
executeCrop: (imageSrc: string, mimeType?: string) => Promise<Blob>;
/** Handle drag start for crop handles */
handleDragStart: (handle: CropHandle, event: React.MouseEvent | React.TouchEvent) => void;
/** Handle draw start for free-form rectangle drawing */
handleDrawStart: (event: React.MouseEvent | React.TouchEvent) => void;
/** Handle move during drag */
handleMove: (event: MouseEvent | TouchEvent) => void;
/** Handle drag end */
@@ -78,12 +82,22 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet
const [cropArea, setCropAreaState] = useState<CropArea>(
getAspectRatioAdjustedCrop(initialCrop)
);
const [cropDrawn, setCropDrawn] = useState(false);
const [isDragging, setIsDragging] = useState(false);
const activeHandleRef = useRef<CropHandle | null>(null);
const startPosRef = useRef({ x: 0, y: 0 });
const startCropRef = useRef<CropArea>(cropArea);
const containerRef = useRef<{ width: number; height: number }>({ width: 100, height: 100 });
const containerRef = useRef<{ width: number; height: number; left: number; top: number }>({
width: 100, height: 100, left: 0, top: 0,
});
const isDrawingRef = useRef(false);
const drawOriginRef = useRef({ x: 0, y: 0 });
const cropAreaRef = useRef(cropArea);
useEffect(() => {
cropAreaRef.current = cropArea;
}, [cropArea]);
const setCropArea = useCallback(
(area: CropArea) => {
@@ -94,6 +108,7 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet
const resetCrop = useCallback(() => {
setCropAreaState(getAspectRatioAdjustedCrop(initialCrop));
setCropDrawn(false);
}, [initialCrop, getAspectRatioAdjustedCrop]);
const constrainCrop = useCallback(
@@ -136,19 +151,75 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet
const container = target.closest('[data-crop-container]');
if (container) {
const rect = container.getBoundingClientRect();
containerRef.current = { width: rect.width, height: rect.height };
containerRef.current = { width: rect.width, height: rect.height, left: rect.left, top: rect.top };
}
},
[cropArea]
);
const handleMove = useCallback(
(event: MouseEvent | TouchEvent) => {
if (!activeHandleRef.current || !isDragging) return;
const handleDrawStart = useCallback(
(event: React.MouseEvent | React.TouchEvent) => {
event.preventDefault();
const target = event.currentTarget as HTMLElement;
const container = target.closest('[data-crop-container]');
if (!container) return;
const rect = container.getBoundingClientRect();
containerRef.current = { width: rect.width, height: rect.height, left: rect.left, top: rect.top };
const clientX = 'touches' in event ? event.touches[0].clientX : event.clientX;
const clientY = 'touches' in event ? event.touches[0].clientY : event.clientY;
const x = Math.max(0, Math.min(100, ((clientX - rect.left) / rect.width) * 100));
const y = Math.max(0, Math.min(100, ((clientY - rect.top) / rect.height) * 100));
startPosRef.current = { x: clientX, y: clientY };
drawOriginRef.current = { x, y };
setCropAreaState({ x, y, width: 0, height: 0 });
isDrawingRef.current = true;
activeHandleRef.current = null;
setIsDragging(true);
},
[]
);
const handleMove = useCallback(
(event: MouseEvent | TouchEvent) => {
if (!isDragging) return;
const clientX = 'touches' in event ? event.touches[0].clientX : event.clientX;
const clientY = 'touches' in event ? event.touches[0].clientY : event.clientY;
// Free-form drawing mode: compute rectangle from origin to current pointer
if (isDrawingRef.current) {
const currentX = Math.max(0, Math.min(100,
((clientX - containerRef.current.left) / containerRef.current.width) * 100));
const currentY = Math.max(0, Math.min(100,
((clientY - containerRef.current.top) / containerRef.current.height) * 100));
const originX = drawOriginRef.current.x;
const originY = drawOriginRef.current.y;
let newCrop: CropArea = {
x: Math.min(originX, currentX),
y: Math.min(originY, currentY),
width: Math.abs(currentX - originX),
height: Math.abs(currentY - originY),
};
if (aspectRatio) {
newCrop.height = newCrop.width / aspectRatio;
}
setCropAreaState(newCrop);
return;
}
if (!activeHandleRef.current) return;
// Calculate delta as percentage of container
const deltaX = ((clientX - startPosRef.current.x) / containerRef.current.width) * 100;
const deltaY = ((clientY - startPosRef.current.y) / containerRef.current.height) * 100;
@@ -234,13 +305,20 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet
setCropAreaState(constrainCrop(newCrop));
},
[isDragging, constrainCrop]
[isDragging, constrainCrop, aspectRatio]
);
const handleDragEnd = useCallback(() => {
if (isDrawingRef.current) {
isDrawingRef.current = false;
const area = cropAreaRef.current;
if (area.width >= minSize && area.height >= minSize) {
setCropDrawn(true);
}
}
activeHandleRef.current = null;
setIsDragging(false);
}, []);
}, [minSize]);
// Add global event listeners for drag
useEffect(() => {
@@ -304,7 +382,7 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet
}
},
mimeType,
0.92
0.95
);
};
@@ -320,11 +398,13 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet
return {
cropArea,
cropDrawn,
isDragging,
setCropArea,
resetCrop,
executeCrop,
handleDragStart,
handleDrawStart,
handleMove,
handleDragEnd,
};

View File

@@ -1,8 +1,10 @@
"""VIN-specific OCR extractor with preprocessing and validation."""
import io
import logging
import os
import time
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
import magic
@@ -57,9 +59,31 @@ class VinExtractor(BaseExtractor):
# VIN character whitelist for Tesseract
VIN_WHITELIST = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
# Fixed debug output directory (inside container)
DEBUG_DIR = "/tmp/vin-debug"
def __init__(self) -> None:
"""Initialize VIN extractor."""
pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd
self._debug = settings.log_level.upper() == "DEBUG"
def _save_debug_image(self, session_dir: str, name: str, data: bytes) -> None:
"""Save image bytes to the debug session directory when LOG_LEVEL=debug."""
if not self._debug:
return
path = os.path.join(session_dir, name)
with open(path, "wb") as f:
f.write(data)
logger.debug("Saved debug image: %s (%d bytes)", name, len(data))
def _create_debug_session(self) -> Optional[str]:
"""Create a timestamped debug directory. Returns path or None."""
if not self._debug:
return None
ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
session_dir = os.path.join(self.DEBUG_DIR, ts)
os.makedirs(session_dir, exist_ok=True)
return session_dir
def extract(
self, image_bytes: bytes, content_type: Optional[str] = None
@@ -89,21 +113,91 @@ class VinExtractor(BaseExtractor):
)
try:
debug_session = self._create_debug_session()
logger.debug(
"VIN extraction input: %d bytes, content_type=%s",
len(image_bytes), content_type,
)
if debug_session:
self._save_debug_image(debug_session, "01_original.jpg", image_bytes)
# Apply VIN-optimized preprocessing
preprocessing_result = vin_preprocessor.preprocess(image_bytes)
preprocessed_bytes = preprocessing_result.image_bytes
logger.debug(
"Preprocessing steps: %s", preprocessing_result.preprocessing_applied
)
if debug_session:
self._save_debug_image(
debug_session, "02_preprocessed_adaptive.png", preprocessed_bytes
)
# Perform OCR with VIN-optimized settings
raw_text, word_confidences = self._perform_ocr(preprocessed_bytes)
logger.debug("PSM 6 raw text: '%s'", raw_text)
logger.debug("PSM 6 word confidences: %s", word_confidences)
# Extract VIN candidates from raw text
candidates = vin_validator.extract_candidates(raw_text)
logger.debug("PSM 6 candidates: %s", candidates)
if not candidates:
# No VIN candidates found - try with different PSM modes
candidates = self._try_alternate_ocr(preprocessed_bytes)
if not candidates:
# Try grayscale-only (no thresholding) — the Tesseract
# LSTM engine often performs better on non-binarized input
# because it does its own internal preprocessing.
gray_result = vin_preprocessor.preprocess(
image_bytes, apply_threshold=False
)
logger.debug(
"Grayscale preprocessing steps: %s",
gray_result.preprocessing_applied,
)
if debug_session:
self._save_debug_image(
debug_session, "04_preprocessed_gray.png",
gray_result.image_bytes,
)
raw_text, word_confidences = self._perform_ocr(
gray_result.image_bytes
)
logger.debug("Gray PSM 6 raw text: '%s'", raw_text)
candidates = vin_validator.extract_candidates(raw_text)
logger.debug("Gray PSM 6 candidates: %s", candidates)
if not candidates:
candidates = self._try_alternate_ocr(
gray_result.image_bytes, prefix="Gray"
)
if not candidates:
# Try alternative preprocessing (Otsu's thresholding)
otsu_result = vin_preprocessor.preprocess_otsu(image_bytes)
logger.debug(
"Otsu preprocessing steps: %s",
otsu_result.preprocessing_applied,
)
if debug_session:
self._save_debug_image(
debug_session, "03_preprocessed_otsu.png",
otsu_result.image_bytes,
)
raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes)
logger.debug("Otsu PSM 6 raw text: '%s'", raw_text)
candidates = vin_validator.extract_candidates(raw_text)
logger.debug("Otsu PSM 6 candidates: %s", candidates)
if not candidates:
candidates = self._try_alternate_ocr(
otsu_result.image_bytes, prefix="Otsu"
)
if not candidates:
logger.debug("No VIN pattern found in any OCR attempt")
return VinExtractionResult(
success=False,
error="No VIN pattern found in image",
@@ -153,8 +247,12 @@ class VinExtractor(BaseExtractor):
processing_time_ms = int((time.time() - start_time) * 1000)
logger.info(
f"VIN extraction: {primary_vin}, confidence={primary_confidence:.2%}, "
f"time={processing_time_ms}ms"
"VIN extraction: %s, confidence=%.2f%%, time=%dms",
primary_vin, primary_confidence * 100, processing_time_ms,
)
logger.debug(
"VIN alternatives: %s",
[(a.vin, a.confidence) for a in alternatives],
)
return VinExtractionResult(
@@ -168,7 +266,7 @@ class VinExtractor(BaseExtractor):
)
except Exception as e:
logger.error(f"VIN extraction failed: {e}", exc_info=True)
logger.error("VIN extraction failed: %s", e, exc_info=True)
return VinExtractionResult(
success=False,
error=str(e),
@@ -200,10 +298,15 @@ class VinExtractor(BaseExtractor):
image = Image.open(io.BytesIO(image_bytes))
# Configure Tesseract for VIN extraction
# Use character whitelist to exclude I, O, Q
# OEM 1 = LSTM neural network engine (best accuracy)
# NOTE: tessedit_char_whitelist does NOT work with OEM 1 (LSTM).
# Using it causes empty/erratic output. Character filtering is
# handled post-OCR by vin_validator.correct_ocr_errors() instead.
config = (
f"--psm {psm} "
f"-c tessedit_char_whitelist={self.VIN_WHITELIST}"
f"--oem 1 "
f"-c load_system_dawg=false "
f"-c load_freq_dawg=false"
)
# Get detailed OCR data
@@ -224,24 +327,31 @@ class VinExtractor(BaseExtractor):
raw_text = " ".join(words)
return raw_text, confidences
def _try_alternate_ocr(self, image_bytes: bytes) -> list[tuple[str, int, int]]:
def _try_alternate_ocr(
self,
image_bytes: bytes,
prefix: str = "",
) -> list[tuple[str, int, int]]:
"""
Try alternate OCR configurations when initial extraction fails.
PSM modes tried in order:
7 - Single text line
8 - Single word
11 - Sparse text (finds text in any order, good for angled photos)
13 - Raw line (no Tesseract heuristics, good for clean VIN plates)
Returns:
List of VIN candidates
"""
# Try PSM 7 (single text line)
raw_text, _ = self._perform_ocr(image_bytes, psm=7)
candidates = vin_validator.extract_candidates(raw_text)
if candidates:
return candidates
# Try PSM 8 (single word)
raw_text, _ = self._perform_ocr(image_bytes, psm=8)
candidates = vin_validator.extract_candidates(raw_text)
if candidates:
return candidates
tag = f"{prefix} " if prefix else ""
for psm in (7, 8, 11, 13):
raw_text, _ = self._perform_ocr(image_bytes, psm=psm)
logger.debug("%sPSM %d raw text: '%s'", tag, psm, raw_text)
candidates = vin_validator.extract_candidates(raw_text)
logger.debug("%sPSM %d candidates: %s", tag, psm, candidates)
if candidates:
return candidates
return []

View File

@@ -86,13 +86,17 @@ class VinPreprocessor:
if len(cv_image.shape) == 3:
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
# Convert to grayscale
# Convert to grayscale using best-contrast channel selection
if len(cv_image.shape) == 3:
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
gray = self._best_contrast_channel(cv_image)
else:
gray = cv_image
steps_applied.append("grayscale")
# Upscale small images for better OCR (Tesseract needs ~300 DPI)
gray = self._ensure_minimum_resolution(gray)
steps_applied.append("resolution_check")
# Apply deskew
if apply_deskew:
gray = self._deskew(gray)
@@ -111,6 +115,7 @@ class VinPreprocessor:
# Apply adaptive thresholding
if apply_threshold:
gray = self._adaptive_threshold(gray)
gray = self._morphological_cleanup(gray)
steps_applied.append("threshold")
# Convert back to PNG bytes
@@ -123,6 +128,58 @@ class VinPreprocessor:
preprocessing_applied=steps_applied,
)
# Minimum width in pixels for reliable VIN OCR.
# A 17-char VIN needs ~30px per character for Tesseract accuracy.
MIN_WIDTH_FOR_VIN = 600
def _ensure_minimum_resolution(self, image: np.ndarray) -> np.ndarray:
"""
Upscale image if too small for reliable OCR.
Tesseract works best at ~300 DPI. Mobile photos of VINs may have
the text occupy only a small portion of the frame, resulting in
low effective resolution for the VIN characters.
"""
height, width = image.shape[:2]
if width < self.MIN_WIDTH_FOR_VIN:
scale = self.MIN_WIDTH_FOR_VIN / width
new_width = int(width * scale)
new_height = int(height * scale)
image = cv2.resize(
image, (new_width, new_height), interpolation=cv2.INTER_CUBIC
)
logger.debug(f"Upscaled image from {width}x{height} to {new_width}x{new_height}")
return image
def _best_contrast_channel(self, bgr_image: np.ndarray) -> np.ndarray:
"""
Compute a grayscale image with dark text on light background.
Uses inverted per-pixel minimum across B, G, R channels.
White text has min(255,255,255) = 255 → inverted to 0 (black).
Colored backgrounds have a low min value (e.g. green sticker:
min(130,230,150) = 130) → inverted to 125 (medium gray).
The inversion ensures Tesseract always receives dark-text-on-
light-background, which is the polarity it expects.
"""
b_channel, g_channel, r_channel = cv2.split(bgr_image)
min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)
# Invert so white text (min=255) becomes black (0) and colored
# backgrounds (min~130) become lighter gray (~125). Tesseract
# expects dark text on light background.
inverted = cv2.bitwise_not(min_channel)
gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
logger.debug(
"Channel contrast: inverted-min std=%.1f, grayscale std=%.1f",
float(np.std(inverted)), float(np.std(gray)),
)
return inverted
def _apply_clahe(self, image: np.ndarray) -> np.ndarray:
"""
Apply CLAHE (Contrast Limited Adaptive Histogram Equalization).
@@ -242,6 +299,84 @@ class VinPreprocessor:
logger.warning(f"Adaptive threshold failed: {e}")
return image
def _morphological_cleanup(self, image: np.ndarray) -> np.ndarray:
"""
Remove small noise artifacts from a thresholded binary image.
Morphological opening (erosion then dilation) removes isolated
pixels and thin noise lines while preserving larger text characters.
"""
try:
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
except cv2.error as e:
logger.warning(f"Morphological cleanup failed: {e}")
return image
def _otsu_threshold(self, image: np.ndarray) -> np.ndarray:
"""
Apply Otsu's thresholding for binarization.
Otsu's method auto-calculates the optimal threshold value,
which can work better than adaptive thresholding on evenly-lit images.
"""
try:
_, result = cv2.threshold(
image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
)
return result
except cv2.error as e:
logger.warning(f"Otsu threshold failed: {e}")
return image
def preprocess_otsu(self, image_bytes: bytes) -> PreprocessingResult:
"""
Alternative preprocessing pipeline using Otsu's thresholding.
Used as a fallback when adaptive thresholding doesn't produce
good OCR results.
"""
steps_applied = []
pil_image = Image.open(io.BytesIO(image_bytes))
steps_applied.append("loaded")
if pil_image.mode not in ("RGB", "L"):
pil_image = pil_image.convert("RGB")
steps_applied.append("convert_rgb")
cv_image = np.array(pil_image)
if len(cv_image.shape) == 3:
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
if len(cv_image.shape) == 3:
gray = self._best_contrast_channel(cv_image)
else:
gray = cv_image
steps_applied.append("grayscale")
gray = self._ensure_minimum_resolution(gray)
steps_applied.append("resolution_check")
gray = self._apply_clahe(gray)
steps_applied.append("clahe")
gray = self._denoise(gray)
steps_applied.append("denoise")
gray = self._otsu_threshold(gray)
gray = self._morphological_cleanup(gray)
steps_applied.append("otsu_threshold")
result_image = Image.fromarray(gray)
buffer = io.BytesIO()
result_image.save(buffer, format="PNG")
return PreprocessingResult(
image_bytes=buffer.getvalue(),
preprocessing_applied=steps_applied,
)
def detect_vin_region(self, image_bytes: bytes) -> Optional[BoundingBox]:
"""
Attempt to detect the VIN region in an image.

View File

@@ -20,7 +20,9 @@ class VinValidator:
# VIN character set (excludes I, O, Q)
VALID_CHARS = set("ABCDEFGHJKLMNPRSTUVWXYZ0123456789")
# Common OCR misreads and their corrections
# Common OCR misreads and their corrections.
# Only map characters that are INVALID in VINs to their likely correct values.
# B and S are valid VIN characters and must NOT be transliterated.
TRANSLITERATION = {
"I": "1",
"O": "0",
@@ -29,9 +31,6 @@ class VinValidator:
"o": "0",
"q": "0",
"l": "1",
"L": "1",
"B": "8", # Sometimes confused
"S": "5", # Sometimes confused
}
# Weights for check digit calculation (positions 1-17)
@@ -224,6 +223,11 @@ class VinValidator:
"""
Extract VIN candidates from raw OCR text.
Uses two strategies:
1. Find continuous 11-20 char alphanumeric runs (handles intact VINs)
2. Concatenate adjacent short fragments separated by spaces/dashes
(handles Tesseract fragmenting VINs into multiple words)
Args:
text: Raw OCR text
max_candidates: Maximum number of candidates to return
@@ -231,29 +235,104 @@ class VinValidator:
Returns:
List of (vin, start_pos, end_pos) tuples
"""
# Pattern to find potential VIN sequences
# Allow some flexibility for OCR errors (include I, O, Q for correction later)
potential_vin_pattern = re.compile(r"[A-Z0-9IOQ]{11,17}", re.IGNORECASE)
candidates = []
for match in potential_vin_pattern.finditer(text.upper()):
candidate = match.group()
corrected = self.correct_ocr_errors(candidate)
seen_vins: set[str] = set()
# Only include if it could be a valid VIN after correction
if len(corrected) == 17 and self.MODERN_VIN_PATTERN.match(corrected):
candidates.append((corrected, match.start(), match.end()))
upper_text = text.upper()
# Sort by likelihood of being valid (check digit validation)
# Strategy 1: Find continuous runs of VIN-like characters
continuous_pattern = re.compile(r"[A-Z0-9IOQ]{11,20}", re.IGNORECASE)
for match in continuous_pattern.finditer(upper_text):
self._try_add_candidate(
match.group(), match.start(), match.end(), candidates, seen_vins
)
# Strategy 2: Concatenate adjacent alphanumeric fragments
# This handles OCR fragmentation like "1HGBH 41JXMN 109186"
# Only consider fragments >= 3 chars (filters out noise/short words)
fragment_pattern = re.compile(r"[A-Z0-9IOQ]{3,}", re.IGNORECASE)
fragments = [
(m.group(), m.start(), m.end())
for m in fragment_pattern.finditer(upper_text)
]
# Try sliding windows of 2-4 adjacent fragments
for window_size in range(2, min(5, len(fragments) + 1)):
for i in range(len(fragments) - window_size + 1):
window = fragments[i : i + window_size]
combined = "".join(f[0] for f in window)
# Combined length must be close to 17 (allow +/- 2 for OCR noise)
# Must contain at least 2 digit characters (VINs always have digits;
# pure-alphabetic text is almost certainly not a VIN)
if 15 <= len(combined) <= 19 and sum(c.isdigit() for c in combined) >= 2:
self._try_add_candidate(
combined, window[0][1], window[-1][2], candidates, seen_vins
)
# Sort by likelihood of being valid (check digit first, then position)
def score_candidate(c: tuple[str, int, int]) -> int:
vin = c[0]
if self.validate_check_digit(vin):
return 0 # Best score
return 0
return 1
candidates.sort(key=score_candidate)
return candidates[:max_candidates]
def _try_add_candidate(
self,
raw: str,
start: int,
end: int,
candidates: list[tuple[str, int, int]],
seen_vins: set[str],
) -> None:
"""Try to add a corrected VIN candidate if it passes validation."""
corrected = self.correct_ocr_errors(raw)
if len(corrected) == 17:
self._add_if_valid(corrected, start, end, candidates, seen_vins)
return
if len(corrected) > 17:
# Strategy A: try every 17-char sliding window
for i in range(len(corrected) - 16):
window = corrected[i : i + 17]
self._add_if_valid(window, start, end, candidates, seen_vins)
# Strategy B: for 18-19 char strings, try deleting each
# character one at a time. OCR often inserts a spurious
# character (e.g. sticker border read as 'C') that breaks
# the VIN. Check-digit validation filters out false hits.
if len(corrected) <= 19:
for i in range(len(corrected)):
reduced = corrected[:i] + corrected[i + 1 :]
if len(reduced) == 17:
self._add_if_valid(
reduced, start, end, candidates, seen_vins
)
elif len(reduced) == 18:
# Two deletions needed — try removing one more
for j in range(len(reduced)):
reduced2 = reduced[:j] + reduced[j + 1 :]
self._add_if_valid(
reduced2, start, end, candidates, seen_vins
)
def _add_if_valid(
self,
vin: str,
start: int,
end: int,
candidates: list[tuple[str, int, int]],
seen_vins: set[str],
) -> None:
"""Add a 17-char VIN to candidates if it matches the pattern."""
if len(vin) == 17 and self.MODERN_VIN_PATTERN.match(vin):
if vin not in seen_vins:
seen_vins.add(vin)
candidates.append((vin, start, end))
# Singleton instance
vin_validator = VinValidator()

View File

@@ -53,6 +53,7 @@ class TestVinPreprocessor:
)
assert "grayscale" in result.preprocessing_applied
assert "resolution_check" in result.preprocessing_applied
assert "clahe" in result.preprocessing_applied
assert "deskew" in result.preprocessing_applied
assert "denoise" in result.preprocessing_applied
@@ -185,6 +186,54 @@ class TestVinPreprocessorThreshold:
assert len(unique_values) <= 2
class TestVinPreprocessorOtsu:
"""Tests for Otsu's thresholding preprocessing."""
def test_otsu_threshold_creates_binary_image(self) -> None:
"""Test Otsu's thresholding creates binary output."""
preprocessor = VinPreprocessor()
image = np.full((100, 400), 128, dtype=np.uint8)
result = preprocessor._otsu_threshold(image)
unique_values = np.unique(result)
assert len(unique_values) <= 2
def test_preprocess_otsu_returns_result(self) -> None:
"""Test Otsu preprocessing pipeline returns valid result."""
preprocessor = VinPreprocessor()
image_bytes = create_test_image()
result = preprocessor.preprocess_otsu(image_bytes)
assert result.image_bytes is not None
assert len(result.image_bytes) > 0
assert "otsu_threshold" in result.preprocessing_applied
assert "grayscale" in result.preprocessing_applied
class TestVinPreprocessorResolution:
"""Tests for resolution upscaling."""
def test_upscale_small_image(self) -> None:
"""Test small images are upscaled."""
preprocessor = VinPreprocessor()
small_image = np.full((50, 200), 128, dtype=np.uint8)
result = preprocessor._ensure_minimum_resolution(small_image)
assert result.shape[1] >= preprocessor.MIN_WIDTH_FOR_VIN
def test_no_upscale_large_image(self) -> None:
"""Test large images are not upscaled."""
preprocessor = VinPreprocessor()
large_image = np.full((200, 800), 128, dtype=np.uint8)
result = preprocessor._ensure_minimum_resolution(large_image)
assert result.shape == large_image.shape
class TestVinRegionDetection:
"""Tests for VIN region detection."""

View File

@@ -43,9 +43,9 @@ class TestVinValidator:
result = validator.calculate_check_digit("1HGBH41JXMN109186")
assert result == "X"
# 5YJSA1E28HF123456 has check digit 2 at position 9
# 5YJSA1E28HF123456 has check digit at position 9
result = validator.calculate_check_digit("5YJSA1E28HF123456")
assert result == "8" # Verify this is correct for this VIN
assert result == "5"
def test_validate_check_digit_valid(self) -> None:
"""Test check digit validation with valid VIN."""
@@ -161,6 +161,27 @@ class TestVinValidator:
assert len(candidates) >= 1
assert candidates[0][0] == "1HGBH41JXMN109186"
def test_extract_candidates_fragmented_vin(self) -> None:
"""Test candidate extraction handles space-fragmented VINs from OCR."""
validator = VinValidator()
# Tesseract often fragments VINs into multiple words
text = "1HGBH 41JXMN 109186"
candidates = validator.extract_candidates(text)
assert len(candidates) >= 1
assert candidates[0][0] == "1HGBH41JXMN109186"
def test_extract_candidates_dash_fragmented_vin(self) -> None:
"""Test candidate extraction handles dash-separated VINs."""
validator = VinValidator()
text = "1HGBH41J-XMN109186"
candidates = validator.extract_candidates(text)
assert len(candidates) >= 1
assert candidates[0][0] == "1HGBH41JXMN109186"
def test_extract_candidates_no_vin(self) -> None:
"""Test candidate extraction with no VIN."""
validator = VinValidator()