From ebc633fb36a39d46237665038fd551197520ae3a Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 10:47:40 -0600 Subject: [PATCH 01/16] feat: add OCR engine abstraction layer (refs #116) Introduce pluggable OcrEngine ABC with PaddleOCR PP-OCRv4 as primary engine and Tesseract wrapper for backward compatibility. Engine factory reads OCR_PRIMARY_ENGINE config to instantiate the correct engine. Co-Authored-By: Claude Opus 4.6 --- ocr/app/config.py | 6 ++ ocr/app/engines/__init__.py | 27 ++++++ ocr/app/engines/base_engine.py | 88 ++++++++++++++++++ ocr/app/engines/engine_factory.py | 52 +++++++++++ ocr/app/engines/paddle_engine.py | 133 ++++++++++++++++++++++++++++ ocr/app/engines/tesseract_engine.py | 114 ++++++++++++++++++++++++ ocr/requirements.txt | 2 + 7 files changed, 422 insertions(+) create mode 100644 ocr/app/engines/__init__.py create mode 100644 ocr/app/engines/base_engine.py create mode 100644 ocr/app/engines/engine_factory.py create mode 100644 ocr/app/engines/paddle_engine.py create mode 100644 ocr/app/engines/tesseract_engine.py diff --git a/ocr/app/config.py b/ocr/app/config.py index 384ffdc..c222693 100644 --- a/ocr/app/config.py +++ b/ocr/app/config.py @@ -11,6 +11,12 @@ class Settings: self.port: int = int(os.getenv("PORT", "8000")) self.tesseract_cmd: str = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract") + # OCR engine configuration + self.ocr_primary_engine: str = os.getenv("OCR_PRIMARY_ENGINE", "paddleocr") + self.ocr_confidence_threshold: float = float( + os.getenv("OCR_CONFIDENCE_THRESHOLD", "0.6") + ) + # Redis configuration for job queue self.redis_host: str = os.getenv("REDIS_HOST", "mvp-redis") self.redis_port: int = int(os.getenv("REDIS_PORT", "6379")) diff --git a/ocr/app/engines/__init__.py b/ocr/app/engines/__init__.py new file mode 100644 index 0000000..dcc565b --- /dev/null +++ b/ocr/app/engines/__init__.py @@ -0,0 +1,27 @@ +"""OCR engine abstraction layer. + +Provides a pluggable engine interface for OCR processing, +decoupling extractors from specific OCR libraries. +""" + +from app.engines.base_engine import ( + EngineError, + EngineProcessingError, + EngineUnavailableError, + OcrConfig, + OcrEngine, + OcrEngineResult, + WordBox, +) +from app.engines.engine_factory import create_engine + +__all__ = [ + "OcrEngine", + "OcrConfig", + "OcrEngineResult", + "WordBox", + "EngineError", + "EngineUnavailableError", + "EngineProcessingError", + "create_engine", +] diff --git a/ocr/app/engines/base_engine.py b/ocr/app/engines/base_engine.py new file mode 100644 index 0000000..ddca084 --- /dev/null +++ b/ocr/app/engines/base_engine.py @@ -0,0 +1,88 @@ +"""OCR engine abstract base class and shared data types.""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any + + +# --- Exception hierarchy --- + + +class EngineError(Exception): + """Base exception for all OCR engine errors.""" + + +class EngineUnavailableError(EngineError): + """Raised when an engine cannot be initialized (missing binary, bad config).""" + + +class EngineProcessingError(EngineError): + """Raised when an engine fails to process an image.""" + + +# --- Data types --- + + +@dataclass +class WordBox: + """A single recognized word with position and confidence.""" + + text: str + confidence: float # 0.0-1.0 + x: int = 0 + y: int = 0 + width: int = 0 + height: int = 0 + + +@dataclass +class OcrConfig: + """Engine-agnostic OCR configuration. + + Common fields cover the most frequent needs. Engine-specific + parameters go into ``hints`` so the interface stays stable. + """ + + char_whitelist: str | None = None # e.g. VIN: "ABCDEFGHJKLMNPRSTUVWXYZ0123456789" + single_line: bool = False # Treat image as a single text line + single_word: bool = False # Treat image as a single word + use_angle_cls: bool = True # Enable angle classification (PaddleOCR) + hints: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class OcrEngineResult: + """Normalized result returned by every engine implementation.""" + + text: str + confidence: float # 0.0-1.0 + word_boxes: list[WordBox] + engine_name: str # "paddleocr", "tesseract", "google_vision" + + +# --- Abstract base --- + + +class OcrEngine(ABC): + """Abstract base class that all OCR engines must implement.""" + + @abstractmethod + def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult: + """Run OCR on preprocessed image bytes. + + Args: + image_bytes: Raw image bytes (PNG/JPEG). + config: Engine-agnostic configuration. + + Returns: + Normalized OCR result. + + Raises: + EngineProcessingError: If recognition fails. + EngineUnavailableError: If the engine is not ready. + """ + + @property + @abstractmethod + def name(self) -> str: + """Short identifier used in OcrEngineResult.engine_name.""" diff --git a/ocr/app/engines/engine_factory.py b/ocr/app/engines/engine_factory.py new file mode 100644 index 0000000..dad2f16 --- /dev/null +++ b/ocr/app/engines/engine_factory.py @@ -0,0 +1,52 @@ +"""Factory function for creating OCR engine instances from configuration.""" + +import logging + +from app.config import settings +from app.engines.base_engine import EngineUnavailableError, OcrEngine + +logger = logging.getLogger(__name__) + +# Valid engine identifiers +_ENGINE_REGISTRY: dict[str, str] = { + "paddleocr": "app.engines.paddle_engine.PaddleOcrEngine", + "tesseract": "app.engines.tesseract_engine.TesseractEngine", +} + + +def create_engine(engine_name: str | None = None) -> OcrEngine: + """Instantiate an OCR engine by name (defaults to config value). + + Args: + engine_name: Engine identifier ("paddleocr", "tesseract"). + Falls back to ``settings.ocr_primary_engine``. + + Returns: + Initialized OcrEngine instance. + + Raises: + EngineUnavailableError: If the engine cannot be loaded or initialized. + """ + name = (engine_name or settings.ocr_primary_engine).lower().strip() + + if name not in _ENGINE_REGISTRY: + raise EngineUnavailableError( + f"Unknown engine '{name}'. Available: {list(_ENGINE_REGISTRY.keys())}" + ) + + module_path, class_name = _ENGINE_REGISTRY[name].rsplit(".", 1) + + try: + import importlib + + module = importlib.import_module(module_path) + engine_cls = getattr(module, class_name) + engine: OcrEngine = engine_cls() + logger.info("Created OCR engine: %s", name) + return engine + except EngineUnavailableError: + raise + except Exception as exc: + raise EngineUnavailableError( + f"Failed to create engine '{name}': {exc}" + ) from exc diff --git a/ocr/app/engines/paddle_engine.py b/ocr/app/engines/paddle_engine.py new file mode 100644 index 0000000..41433f1 --- /dev/null +++ b/ocr/app/engines/paddle_engine.py @@ -0,0 +1,133 @@ +"""PaddleOCR engine wrapper using PP-OCRv4 models.""" + +import io +import logging +from typing import Any + +from app.engines.base_engine import ( + EngineProcessingError, + EngineUnavailableError, + OcrConfig, + OcrEngine, + OcrEngineResult, + WordBox, +) + +logger = logging.getLogger(__name__) + + +class PaddleOcrEngine(OcrEngine): + """PaddleOCR PP-OCRv4 engine with angle classification, CPU-only.""" + + def __init__(self) -> None: + self._ocr: Any | None = None + + @property + def name(self) -> str: + return "paddleocr" + + def _get_ocr(self) -> Any: + """Lazy-initialize PaddleOCR instance on first use.""" + if self._ocr is not None: + return self._ocr + try: + from paddleocr import PaddleOCR # type: ignore[import-untyped] + + self._ocr = PaddleOCR( + use_angle_cls=True, + lang="en", + use_gpu=False, + show_log=False, + ) + logger.info("PaddleOCR PP-OCRv4 initialized (CPU, angle_cls=True)") + return self._ocr + except ImportError as exc: + raise EngineUnavailableError( + "paddleocr is not installed. " + "Install with: pip install paddlepaddle paddleocr" + ) from exc + except Exception as exc: + raise EngineUnavailableError( + f"Failed to initialize PaddleOCR: {exc}" + ) from exc + + def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult: + """Run PaddleOCR on image bytes. + + PaddleOCR returns: list of pages, each page is a list of + ``[[box_coords], (text, confidence)]`` entries. + """ + ocr = self._get_ocr() + + try: + import numpy as np # type: ignore[import-untyped] + from PIL import Image + + image = Image.open(io.BytesIO(image_bytes)).convert("RGB") + img_array = np.array(image) + + # PaddleOCR accepts numpy arrays + results = ocr.ocr(img_array, cls=config.use_angle_cls) + + if not results or not results[0]: + return OcrEngineResult( + text="", + confidence=0.0, + word_boxes=[], + engine_name=self.name, + ) + + word_boxes: list[WordBox] = [] + texts: list[str] = [] + confidences: list[float] = [] + + for line in results[0]: + box_coords = line[0] # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] + text = line[1][0] + conf = float(line[1][1]) + + # Apply character whitelist filter if configured + if config.char_whitelist: + allowed = set(config.char_whitelist) + text = "".join(ch for ch in text if ch in allowed) + + if not text.strip(): + continue + + # Convert quadrilateral to bounding box + xs = [pt[0] for pt in box_coords] + ys = [pt[1] for pt in box_coords] + x_min, y_min = int(min(xs)), int(min(ys)) + x_max, y_max = int(max(xs)), int(max(ys)) + + word_boxes.append( + WordBox( + text=text.strip(), + confidence=conf, + x=x_min, + y=y_min, + width=x_max - x_min, + height=y_max - y_min, + ) + ) + texts.append(text.strip()) + confidences.append(conf) + + combined_text = " ".join(texts) + avg_confidence = ( + sum(confidences) / len(confidences) if confidences else 0.0 + ) + + return OcrEngineResult( + text=combined_text, + confidence=avg_confidence, + word_boxes=word_boxes, + engine_name=self.name, + ) + + except (EngineUnavailableError, EngineProcessingError): + raise + except Exception as exc: + raise EngineProcessingError( + f"PaddleOCR recognition failed: {exc}" + ) from exc diff --git a/ocr/app/engines/tesseract_engine.py b/ocr/app/engines/tesseract_engine.py new file mode 100644 index 0000000..02108ec --- /dev/null +++ b/ocr/app/engines/tesseract_engine.py @@ -0,0 +1,114 @@ +"""Tesseract engine wrapper for backward compatibility.""" + +import io +import logging + +from app.config import settings +from app.engines.base_engine import ( + EngineProcessingError, + EngineUnavailableError, + OcrConfig, + OcrEngine, + OcrEngineResult, + WordBox, +) + +logger = logging.getLogger(__name__) + + +class TesseractEngine(OcrEngine): + """pytesseract wrapper conforming to the OcrEngine interface.""" + + def __init__(self) -> None: + try: + import pytesseract # type: ignore[import-untyped] + + pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd + self._pytesseract = pytesseract + logger.info("TesseractEngine initialized (cmd=%s)", settings.tesseract_cmd) + except ImportError as exc: + raise EngineUnavailableError( + "pytesseract is not installed. " + "Install with: pip install pytesseract" + ) from exc + + @property + def name(self) -> str: + return "tesseract" + + def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult: + """Run Tesseract OCR on image bytes.""" + try: + from PIL import Image + + image = Image.open(io.BytesIO(image_bytes)) + + # Build Tesseract config string from OcrConfig + tess_config = self._build_config(config) + + # Get word-level data + ocr_data = self._pytesseract.image_to_data( + image, + config=tess_config, + output_type=self._pytesseract.Output.DICT, + ) + + word_boxes: list[WordBox] = [] + texts: list[str] = [] + confidences: list[float] = [] + + for i, text in enumerate(ocr_data["text"]): + conf = int(ocr_data["conf"][i]) + if text.strip() and conf > 0: + normalized_conf = conf / 100.0 + word_boxes.append( + WordBox( + text=text.strip(), + confidence=normalized_conf, + x=int(ocr_data["left"][i]), + y=int(ocr_data["top"][i]), + width=int(ocr_data["width"][i]), + height=int(ocr_data["height"][i]), + ) + ) + texts.append(text.strip()) + confidences.append(normalized_conf) + + combined_text = " ".join(texts) + avg_confidence = ( + sum(confidences) / len(confidences) if confidences else 0.0 + ) + + return OcrEngineResult( + text=combined_text, + confidence=avg_confidence, + word_boxes=word_boxes, + engine_name=self.name, + ) + + except (EngineUnavailableError, EngineProcessingError): + raise + except Exception as exc: + raise EngineProcessingError( + f"Tesseract recognition failed: {exc}" + ) from exc + + def _build_config(self, config: OcrConfig) -> str: + """Translate OcrConfig into a Tesseract CLI config string.""" + parts: list[str] = [] + + # Page segmentation mode + if config.single_word: + parts.append("--psm 8") + elif config.single_line: + parts.append("--psm 7") + else: + # Default: assume uniform block of text + psm = config.hints.get("psm", 6) + parts.append(f"--psm {psm}") + + # Character whitelist + if config.char_whitelist: + parts.append(f"-c tessedit_char_whitelist={config.char_whitelist}") + + return " ".join(parts) diff --git a/ocr/requirements.txt b/ocr/requirements.txt index 8138d85..fb3c268 100644 --- a/ocr/requirements.txt +++ b/ocr/requirements.txt @@ -15,6 +15,8 @@ numpy>=1.24.0 # OCR Engines pytesseract>=0.3.10 +paddlepaddle>=2.6.0 +paddleocr>=2.8.0 # PDF Processing PyMuPDF>=1.23.0 From 013fb0c67a2a2ed4ab25503e1bf8f50d784ecde2 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 10:56:27 -0600 Subject: [PATCH 02/16] feat: migrate VIN/receipt extractors and OCR service to engine abstraction (refs #117) Replace direct pytesseract calls with OcrEngine interface in vin_extractor.py, receipt_extractor.py, and ocr_service.py. PSM mode fallbacks replaced with engine-agnostic single-line/single-word configs. Dead _process_ocr_data removed. Co-Authored-By: Claude Opus 4.6 --- ocr/app/extractors/receipt_extractor.py | 26 ++---- ocr/app/extractors/vin_extractor.py | 105 ++++++++++-------------- ocr/app/services/ocr_service.py | 42 ++-------- 3 files changed, 60 insertions(+), 113 deletions(-) diff --git a/ocr/app/extractors/receipt_extractor.py b/ocr/app/extractors/receipt_extractor.py index 6134988..111cfb1 100644 --- a/ocr/app/extractors/receipt_extractor.py +++ b/ocr/app/extractors/receipt_extractor.py @@ -1,16 +1,13 @@ """Receipt-specific OCR extractor with field extraction.""" -import io import logging import time from dataclasses import dataclass, field from typing import Any, Optional import magic -import pytesseract -from PIL import Image from pillow_heif import register_heif_opener -from app.config import settings +from app.engines import OcrConfig, create_engine from app.extractors.base import BaseExtractor from app.preprocessors.receipt_preprocessor import receipt_preprocessor from app.patterns import currency_matcher, date_matcher, fuel_matcher @@ -53,8 +50,8 @@ class ReceiptExtractor(BaseExtractor): } def __init__(self) -> None: - """Initialize receipt extractor.""" - pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd + """Initialize receipt extractor with engine from factory.""" + self._engine = create_engine() def extract( self, @@ -150,26 +147,19 @@ class ReceiptExtractor(BaseExtractor): detected = mime.from_buffer(file_bytes) return detected or "application/octet-stream" - def _perform_ocr(self, image_bytes: bytes, psm: int = 6) -> str: + def _perform_ocr(self, image_bytes: bytes) -> str: """ - Perform OCR on preprocessed image. + Perform OCR on preprocessed image via engine abstraction. Args: image_bytes: Preprocessed image bytes - psm: Tesseract page segmentation mode - 4 = Assume single column of text - 6 = Uniform block of text Returns: Raw OCR text """ - image = Image.open(io.BytesIO(image_bytes)) - - # Configure Tesseract for receipt OCR - # PSM 4 works well for columnar receipt text - config = f"--psm {psm}" - - return pytesseract.image_to_string(image, config=config) + config = OcrConfig() + result = self._engine.recognize(image_bytes, config) + return result.text def _detect_receipt_type(self, text: str) -> str: """ diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py index 1edca3f..cce88e9 100644 --- a/ocr/app/extractors/vin_extractor.py +++ b/ocr/app/extractors/vin_extractor.py @@ -1,5 +1,4 @@ """VIN-specific OCR extractor with preprocessing and validation.""" -import io import logging import os import time @@ -8,11 +7,10 @@ from datetime import datetime from typing import Optional import magic -import pytesseract -from PIL import Image from pillow_heif import register_heif_opener from app.config import settings +from app.engines import OcrConfig, create_engine from app.extractors.base import BaseExtractor from app.preprocessors.vin_preprocessor import vin_preprocessor, BoundingBox from app.validators.vin_validator import vin_validator @@ -56,15 +54,15 @@ class VinExtractor(BaseExtractor): "image/heif", } - # VIN character whitelist for Tesseract + # VIN character whitelist (passed to engine for post-OCR filtering) VIN_WHITELIST = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789" # Fixed debug output directory (inside container) DEBUG_DIR = "/tmp/vin-debug" def __init__(self) -> None: - """Initialize VIN extractor.""" - pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd + """Initialize VIN extractor with engine from factory.""" + self._engine = create_engine() self._debug = settings.log_level.upper() == "DEBUG" def _save_debug_image(self, session_dir: str, name: str, data: bytes) -> None: @@ -135,21 +133,21 @@ class VinExtractor(BaseExtractor): # Perform OCR with VIN-optimized settings raw_text, word_confidences = self._perform_ocr(preprocessed_bytes) - logger.debug("PSM 6 raw text: '%s'", raw_text) - logger.debug("PSM 6 word confidences: %s", word_confidences) + logger.debug("Primary OCR raw text: '%s'", raw_text) + logger.debug("Primary OCR word confidences: %s", word_confidences) # Extract VIN candidates from raw text candidates = vin_validator.extract_candidates(raw_text) - logger.debug("PSM 6 candidates: %s", candidates) + logger.debug("Primary OCR candidates: %s", candidates) if not candidates: - # No VIN candidates found - try with different PSM modes + # No VIN candidates found - try alternate OCR configurations candidates = self._try_alternate_ocr(preprocessed_bytes) if not candidates: - # Try grayscale-only (no thresholding) — the Tesseract - # LSTM engine often performs better on non-binarized input - # because it does its own internal preprocessing. + # Try grayscale-only (no thresholding) — OCR engines often + # perform better on non-binarized input because they do + # their own internal preprocessing. gray_result = vin_preprocessor.preprocess( image_bytes, apply_threshold=False ) @@ -166,9 +164,9 @@ class VinExtractor(BaseExtractor): raw_text, word_confidences = self._perform_ocr( gray_result.image_bytes ) - logger.debug("Gray PSM 6 raw text: '%s'", raw_text) + logger.debug("Gray primary raw text: '%s'", raw_text) candidates = vin_validator.extract_candidates(raw_text) - logger.debug("Gray PSM 6 candidates: %s", candidates) + logger.debug("Gray primary candidates: %s", candidates) if not candidates: candidates = self._try_alternate_ocr( gray_result.image_bytes, prefix="Gray" @@ -188,9 +186,9 @@ class VinExtractor(BaseExtractor): ) raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes) - logger.debug("Otsu PSM 6 raw text: '%s'", raw_text) + logger.debug("Otsu primary raw text: '%s'", raw_text) candidates = vin_validator.extract_candidates(raw_text) - logger.debug("Otsu PSM 6 candidates: %s", candidates) + logger.debug("Otsu primary candidates: %s", candidates) if not candidates: candidates = self._try_alternate_ocr( otsu_result.image_bytes, prefix="Otsu" @@ -280,52 +278,31 @@ class VinExtractor(BaseExtractor): return detected or "application/octet-stream" def _perform_ocr( - self, image_bytes: bytes, psm: int = 6 + self, + image_bytes: bytes, + single_line: bool = False, + single_word: bool = False, ) -> tuple[str, list[float]]: """ - Perform OCR with VIN-optimized settings. + Perform OCR with VIN-optimized settings via engine abstraction. Args: image_bytes: Preprocessed image bytes - psm: Tesseract page segmentation mode - 6 = Uniform block of text - 7 = Single text line - 8 = Single word + single_line: Treat image as a single text line + single_word: Treat image as a single word Returns: Tuple of (raw_text, word_confidences) """ - image = Image.open(io.BytesIO(image_bytes)) - - # Configure Tesseract for VIN extraction - # OEM 1 = LSTM neural network engine (best accuracy) - # NOTE: tessedit_char_whitelist does NOT work with OEM 1 (LSTM). - # Using it causes empty/erratic output. Character filtering is - # handled post-OCR by vin_validator.correct_ocr_errors() instead. - config = ( - f"--psm {psm} " - f"--oem 1 " - f"-c load_system_dawg=false " - f"-c load_freq_dawg=false" + config = OcrConfig( + char_whitelist=self.VIN_WHITELIST, + single_line=single_line, + single_word=single_word, + use_angle_cls=True, ) - - # Get detailed OCR data - ocr_data = pytesseract.image_to_data( - image, config=config, output_type=pytesseract.Output.DICT - ) - - # Extract words and confidences - words = [] - confidences = [] - - for i, text in enumerate(ocr_data["text"]): - conf = int(ocr_data["conf"][i]) - if text.strip() and conf > 0: - words.append(text.strip()) - confidences.append(conf / 100.0) - - raw_text = " ".join(words) - return raw_text, confidences + result = self._engine.recognize(image_bytes, config) + word_confidences = [wb.confidence for wb in result.word_boxes] + return result.text, word_confidences def _try_alternate_ocr( self, @@ -335,21 +312,25 @@ class VinExtractor(BaseExtractor): """ Try alternate OCR configurations when initial extraction fails. - PSM modes tried in order: - 7 - Single text line - 8 - Single word - 11 - Sparse text (finds text in any order, good for angled photos) - 13 - Raw line (no Tesseract heuristics, good for clean VIN plates) + Modes tried: + single-line - Treat as a single text line + single-word - Treat as a single word + + For PaddleOCR, angle classification handles rotated/angled text + inherently, replacing the need for Tesseract PSM mode fallbacks. Returns: List of VIN candidates """ tag = f"{prefix} " if prefix else "" - for psm in (7, 8, 11, 13): - raw_text, _ = self._perform_ocr(image_bytes, psm=psm) - logger.debug("%sPSM %d raw text: '%s'", tag, psm, raw_text) + for mode_name, kwargs in [ + ("single-line", {"single_line": True}), + ("single-word", {"single_word": True}), + ]: + raw_text, _ = self._perform_ocr(image_bytes, **kwargs) + logger.debug("%s%s raw text: '%s'", tag, mode_name, raw_text) candidates = vin_validator.extract_candidates(raw_text) - logger.debug("%sPSM %d candidates: %s", tag, psm, candidates) + logger.debug("%s%s candidates: %s", tag, mode_name, candidates) if candidates: return candidates diff --git a/ocr/app/services/ocr_service.py b/ocr/app/services/ocr_service.py index 4c317b3..4d06452 100644 --- a/ocr/app/services/ocr_service.py +++ b/ocr/app/services/ocr_service.py @@ -1,15 +1,14 @@ -"""Core OCR service using Tesseract with HEIC support.""" +"""Core OCR service with HEIC support, using pluggable engine abstraction.""" import io import logging import time from typing import Optional import magic -import pytesseract from PIL import Image from pillow_heif import register_heif_opener -from app.config import settings +from app.engines import OcrConfig, create_engine from app.models import DocumentType, ExtractedField, OcrResponse from app.services.preprocessor import preprocessor @@ -32,8 +31,8 @@ class OcrService: } def __init__(self) -> None: - """Initialize OCR service.""" - pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd + """Initialize OCR service with engine from factory.""" + self._engine = create_engine() def extract( self, @@ -86,14 +85,11 @@ class OcrService: file_bytes, deskew=True, denoise=True ) - # Perform OCR - image = Image.open(io.BytesIO(file_bytes)) - ocr_data = pytesseract.image_to_data( - image, output_type=pytesseract.Output.DICT - ) - - # Extract text and calculate confidence - raw_text, confidence = self._process_ocr_data(ocr_data) + # Perform OCR via engine abstraction + config = OcrConfig() + result = self._engine.recognize(file_bytes, config) + raw_text = result.text + confidence = result.confidence # Detect document type from content document_type = self._detect_document_type(raw_text) @@ -160,26 +156,6 @@ class OcrService: return b"" - def _process_ocr_data( - self, ocr_data: dict - ) -> tuple[str, float]: - """Process Tesseract output to extract text and confidence.""" - words = [] - confidences = [] - - for i, text in enumerate(ocr_data["text"]): - # Filter out empty strings and low-confidence results - conf = int(ocr_data["conf"][i]) - if text.strip() and conf > 0: - words.append(text) - confidences.append(conf) - - raw_text = " ".join(words) - avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0 - - # Normalize confidence to 0-1 range (Tesseract returns 0-100) - return raw_text, avg_confidence / 100.0 - def _detect_document_type(self, text: str) -> DocumentType: """Detect document type from extracted text content.""" text_lower = text.lower() From 4ef942cb9d7e645bf31e37a924149b40e942ddb3 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 11:12:08 -0600 Subject: [PATCH 03/16] feat: add optional Google Vision cloud fallback engine (refs #118) CloudEngine wraps Google Vision TEXT_DETECTION with lazy init. HybridEngine runs primary engine, falls back to cloud when confidence is below threshold. Disabled by default (OCR_FALLBACK_ENGINE=none). Co-Authored-By: Claude Opus 4.6 --- ocr/app/config.py | 9 ++ ocr/app/engines/__init__.py | 6 ++ ocr/app/engines/cloud_engine.py | 166 ++++++++++++++++++++++++++++++ ocr/app/engines/engine_factory.py | 71 +++++++++---- ocr/app/engines/hybrid_engine.py | 116 +++++++++++++++++++++ ocr/requirements.txt | 1 + 6 files changed, 351 insertions(+), 18 deletions(-) create mode 100644 ocr/app/engines/cloud_engine.py create mode 100644 ocr/app/engines/hybrid_engine.py diff --git a/ocr/app/config.py b/ocr/app/config.py index c222693..4a15906 100644 --- a/ocr/app/config.py +++ b/ocr/app/config.py @@ -17,6 +17,15 @@ class Settings: os.getenv("OCR_CONFIDENCE_THRESHOLD", "0.6") ) + # Cloud fallback configuration (disabled by default) + self.ocr_fallback_engine: str = os.getenv("OCR_FALLBACK_ENGINE", "none") + self.ocr_fallback_threshold: float = float( + os.getenv("OCR_FALLBACK_THRESHOLD", "0.6") + ) + self.google_vision_key_path: str = os.getenv( + "GOOGLE_VISION_KEY_PATH", "/run/secrets/google-vision-key.json" + ) + # Redis configuration for job queue self.redis_host: str = os.getenv("REDIS_HOST", "mvp-redis") self.redis_port: int = int(os.getenv("REDIS_PORT", "6379")) diff --git a/ocr/app/engines/__init__.py b/ocr/app/engines/__init__.py index dcc565b..abc8b05 100644 --- a/ocr/app/engines/__init__.py +++ b/ocr/app/engines/__init__.py @@ -2,6 +2,12 @@ Provides a pluggable engine interface for OCR processing, decoupling extractors from specific OCR libraries. + +Engines: + - PaddleOcrEngine: PaddleOCR PP-OCRv4 (primary, CPU-only) + - TesseractEngine: pytesseract wrapper (backward compatibility) + - CloudEngine: Google Vision TEXT_DETECTION (optional cloud fallback) + - HybridEngine: Primary + fallback with confidence threshold """ from app.engines.base_engine import ( diff --git a/ocr/app/engines/cloud_engine.py b/ocr/app/engines/cloud_engine.py new file mode 100644 index 0000000..c768bdf --- /dev/null +++ b/ocr/app/engines/cloud_engine.py @@ -0,0 +1,166 @@ +"""Google Vision cloud OCR engine with lazy initialization.""" + +import logging +import os +from typing import Any + +from app.engines.base_engine import ( + EngineProcessingError, + EngineUnavailableError, + OcrConfig, + OcrEngine, + OcrEngineResult, + WordBox, +) + +logger = logging.getLogger(__name__) + +# Default path for Google Vision service account key (Docker secret mount) +_DEFAULT_KEY_PATH = "/run/secrets/google-vision-key.json" + + +class CloudEngine(OcrEngine): + """Google Vision TEXT_DETECTION wrapper with lazy initialization. + + The client is not created until the first ``recognize()`` call, + so the container starts normally even when the secret file is + missing or the dependency is not installed. + """ + + def __init__(self, key_path: str | None = None) -> None: + self._key_path = key_path or os.getenv( + "GOOGLE_VISION_KEY_PATH", _DEFAULT_KEY_PATH + ) + self._client: Any | None = None + + @property + def name(self) -> str: + return "google_vision" + + # ------------------------------------------------------------------ + # Lazy init + # ------------------------------------------------------------------ + + def _get_client(self) -> Any: + """Create the Vision client on first use.""" + if self._client is not None: + return self._client + + # Verify credentials file exists + if not os.path.isfile(self._key_path): + raise EngineUnavailableError( + f"Google Vision key not found at {self._key_path}. " + "Set GOOGLE_VISION_KEY_PATH or mount the secret." + ) + + try: + from google.cloud import vision # type: ignore[import-untyped] + + # Point the SDK at the service account key + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self._key_path + self._client = vision.ImageAnnotatorClient() + logger.info( + "Google Vision client initialized (key: %s)", self._key_path + ) + return self._client + except ImportError as exc: + raise EngineUnavailableError( + "google-cloud-vision is not installed. " + "Install with: pip install google-cloud-vision" + ) from exc + except Exception as exc: + raise EngineUnavailableError( + f"Failed to initialize Google Vision client: {exc}" + ) from exc + + # ------------------------------------------------------------------ + # OCR + # ------------------------------------------------------------------ + + def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult: + """Run Google Vision TEXT_DETECTION on image bytes.""" + client = self._get_client() + + try: + from google.cloud import vision # type: ignore[import-untyped] + + image = vision.Image(content=image_bytes) + response = client.text_detection(image=image) + + if response.error.message: + raise EngineProcessingError( + f"Google Vision API error: {response.error.message}" + ) + + annotations = response.text_annotations + if not annotations: + return OcrEngineResult( + text="", + confidence=0.0, + word_boxes=[], + engine_name=self.name, + ) + + # First annotation is the full-page text; the rest are words + full_text = annotations[0].description.strip() + word_boxes: list[WordBox] = [] + confidences: list[float] = [] + + for annotation in annotations[1:]: + text = annotation.description + vertices = annotation.bounding_poly.vertices + + # Apply character whitelist filter if configured + if config.char_whitelist: + allowed = set(config.char_whitelist) + text = "".join(ch for ch in text if ch in allowed) + + if not text.strip(): + continue + + xs = [v.x for v in vertices] + ys = [v.y for v in vertices] + x_min, y_min = min(xs), min(ys) + x_max, y_max = max(xs), max(ys) + + # Google Vision TEXT_DETECTION does not return per-word + # confidence in annotations. Use 0.95 as the documented + # typical accuracy for clear images so comparisons with + # PaddleOCR are meaningful. + word_conf = 0.95 + word_boxes.append( + WordBox( + text=text.strip(), + confidence=word_conf, + x=x_min, + y=y_min, + width=x_max - x_min, + height=y_max - y_min, + ) + ) + confidences.append(word_conf) + + # Apply whitelist to full text too + if config.char_whitelist: + allowed = set(config.char_whitelist) + full_text = "".join( + ch for ch in full_text if ch in allowed or ch in " \n" + ) + + avg_confidence = ( + sum(confidences) / len(confidences) if confidences else 0.0 + ) + + return OcrEngineResult( + text=full_text, + confidence=avg_confidence, + word_boxes=word_boxes, + engine_name=self.name, + ) + + except (EngineUnavailableError, EngineProcessingError): + raise + except Exception as exc: + raise EngineProcessingError( + f"Google Vision recognition failed: {exc}" + ) from exc diff --git a/ocr/app/engines/engine_factory.py b/ocr/app/engines/engine_factory.py index dad2f16..49464d2 100644 --- a/ocr/app/engines/engine_factory.py +++ b/ocr/app/engines/engine_factory.py @@ -1,5 +1,6 @@ """Factory function for creating OCR engine instances from configuration.""" +import importlib import logging from app.config import settings @@ -7,28 +8,16 @@ from app.engines.base_engine import EngineUnavailableError, OcrEngine logger = logging.getLogger(__name__) -# Valid engine identifiers +# Valid engine identifiers (primary engines only; hybrid is constructed separately) _ENGINE_REGISTRY: dict[str, str] = { "paddleocr": "app.engines.paddle_engine.PaddleOcrEngine", "tesseract": "app.engines.tesseract_engine.TesseractEngine", + "google_vision": "app.engines.cloud_engine.CloudEngine", } -def create_engine(engine_name: str | None = None) -> OcrEngine: - """Instantiate an OCR engine by name (defaults to config value). - - Args: - engine_name: Engine identifier ("paddleocr", "tesseract"). - Falls back to ``settings.ocr_primary_engine``. - - Returns: - Initialized OcrEngine instance. - - Raises: - EngineUnavailableError: If the engine cannot be loaded or initialized. - """ - name = (engine_name or settings.ocr_primary_engine).lower().strip() - +def _create_single_engine(name: str) -> OcrEngine: + """Instantiate a single engine by registry name.""" if name not in _ENGINE_REGISTRY: raise EngineUnavailableError( f"Unknown engine '{name}'. Available: {list(_ENGINE_REGISTRY.keys())}" @@ -37,8 +26,6 @@ def create_engine(engine_name: str | None = None) -> OcrEngine: module_path, class_name = _ENGINE_REGISTRY[name].rsplit(".", 1) try: - import importlib - module = importlib.import_module(module_path) engine_cls = getattr(module, class_name) engine: OcrEngine = engine_cls() @@ -50,3 +37,51 @@ def create_engine(engine_name: str | None = None) -> OcrEngine: raise EngineUnavailableError( f"Failed to create engine '{name}': {exc}" ) from exc + + +def create_engine(engine_name: str | None = None) -> OcrEngine: + """Instantiate an OCR engine by name (defaults to config value). + + When a fallback engine is configured (``OCR_FALLBACK_ENGINE != "none"``), + returns a ``HybridEngine`` that wraps the primary with the fallback. + + Args: + engine_name: Engine identifier ("paddleocr", "tesseract"). + Falls back to ``settings.ocr_primary_engine``. + + Returns: + Initialized OcrEngine instance (possibly a HybridEngine wrapper). + + Raises: + EngineUnavailableError: If the primary engine cannot be loaded. + """ + name = (engine_name or settings.ocr_primary_engine).lower().strip() + primary = _create_single_engine(name) + + # Check for cloud fallback configuration + fallback_name = settings.ocr_fallback_engine.lower().strip() + if fallback_name == "none" or not fallback_name: + return primary + + # Create fallback engine (failure is non-fatal -- log and return primary only) + try: + fallback = _create_single_engine(fallback_name) + except EngineUnavailableError as exc: + logger.warning( + "Fallback engine '%s' unavailable, proceeding without fallback: %s", + fallback_name, + exc, + ) + return primary + + from app.engines.hybrid_engine import HybridEngine + + threshold = settings.ocr_fallback_threshold + hybrid = HybridEngine(primary=primary, fallback=fallback, threshold=threshold) + logger.info( + "Created hybrid engine: primary=%s, fallback=%s, threshold=%.2f", + name, + fallback_name, + threshold, + ) + return hybrid diff --git a/ocr/app/engines/hybrid_engine.py b/ocr/app/engines/hybrid_engine.py new file mode 100644 index 0000000..5923ae9 --- /dev/null +++ b/ocr/app/engines/hybrid_engine.py @@ -0,0 +1,116 @@ +"""Hybrid OCR engine: primary engine with optional cloud fallback.""" + +import logging +import time + +from app.engines.base_engine import ( + EngineError, + EngineProcessingError, + OcrConfig, + OcrEngine, + OcrEngineResult, +) + +logger = logging.getLogger(__name__) + +# Maximum time (seconds) to wait for the cloud fallback +_CLOUD_TIMEOUT_SECONDS = 5.0 + + +class HybridEngine(OcrEngine): + """Runs a primary engine and falls back to a cloud engine when + the primary result confidence is below the configured threshold. + + If the fallback is ``None`` (default), this engine behaves identically + to the primary engine. Cloud failures are handled gracefully -- the + primary result is returned whenever the fallback is unavailable, + times out, or errors. + """ + + def __init__( + self, + primary: OcrEngine, + fallback: OcrEngine | None = None, + threshold: float = 0.6, + ) -> None: + self._primary = primary + self._fallback = fallback + self._threshold = threshold + + @property + def name(self) -> str: + fallback_name = self._fallback.name if self._fallback else "none" + return f"hybrid({self._primary.name}+{fallback_name})" + + def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult: + """Run primary OCR, optionally falling back to cloud engine.""" + primary_result = self._primary.recognize(image_bytes, config) + + # Happy path: primary confidence meets threshold + if primary_result.confidence >= self._threshold: + logger.debug( + "Primary engine confidence %.2f >= threshold %.2f, no fallback", + primary_result.confidence, + self._threshold, + ) + return primary_result + + # No fallback configured -- return primary result as-is + if self._fallback is None: + logger.debug( + "Primary confidence %.2f < threshold %.2f but no fallback configured", + primary_result.confidence, + self._threshold, + ) + return primary_result + + # Attempt cloud fallback with timeout guard + logger.info( + "Primary confidence %.2f < threshold %.2f, trying fallback (%s)", + primary_result.confidence, + self._threshold, + self._fallback.name, + ) + + try: + start = time.monotonic() + fallback_result = self._fallback.recognize(image_bytes, config) + elapsed = time.monotonic() - start + + if elapsed > _CLOUD_TIMEOUT_SECONDS: + logger.warning( + "Cloud fallback took %.1fs (> %.1fs limit), using primary result", + elapsed, + _CLOUD_TIMEOUT_SECONDS, + ) + return primary_result + + # Return whichever result has higher confidence + if fallback_result.confidence > primary_result.confidence: + logger.info( + "Fallback confidence %.2f > primary %.2f, using fallback result", + fallback_result.confidence, + primary_result.confidence, + ) + return fallback_result + + logger.info( + "Primary confidence %.2f >= fallback %.2f, keeping primary result", + primary_result.confidence, + fallback_result.confidence, + ) + return primary_result + + except EngineError as exc: + logger.warning( + "Cloud fallback failed (%s), returning primary result: %s", + self._fallback.name, + exc, + ) + return primary_result + except Exception as exc: + logger.warning( + "Unexpected cloud fallback error, returning primary result: %s", + exc, + ) + return primary_result diff --git a/ocr/requirements.txt b/ocr/requirements.txt index fb3c268..9ac83ad 100644 --- a/ocr/requirements.txt +++ b/ocr/requirements.txt @@ -17,6 +17,7 @@ numpy>=1.24.0 pytesseract>=0.3.10 paddlepaddle>=2.6.0 paddleocr>=2.8.0 +google-cloud-vision>=3.7.0 # PDF Processing PyMuPDF>=1.23.0 From 9b6417379beeadeabaf4f0ffbe965d6bd17c6684 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 11:17:44 -0600 Subject: [PATCH 04/16] chore: update Docker and compose files for PaddleOCR engine (refs #119) - Replace libtesseract-dev with libgomp1 (OpenMP for PaddlePaddle) - Pre-download PP-OCRv4 models during Docker build - Add OCR engine env vars to all compose files (base, staging, prod) - Add optional Google Vision secret mount (commented, enable on demand) - Create google-vision-key.json.example placeholder Co-Authored-By: Claude Opus 4.6 --- docker-compose.prod.yml | 6 +++++- docker-compose.staging.yml | 9 +++++++++ docker-compose.yml | 8 ++++++++ ocr/Dockerfile | 18 +++++++++++++++++- secrets/app/google-vision-key.json.example | 18 ++++++++++++++++++ 5 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 secrets/app/google-vision-key.json.example diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 076aeaa..fb64c09 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -38,13 +38,17 @@ services: STRIPE_ENTERPRISE_MONTHLY_PRICE_ID: prod_Toj8xGEui9jl6j STRIPE_ENTERPRISE_YEARLY_PRICE_ID: prod_Toj9A7A773xrdn - # OCR - Production log level + # OCR - Production log level + engine config mvp-ocr: environment: LOG_LEVEL: error REDIS_HOST: mvp-redis REDIS_PORT: 6379 REDIS_DB: 1 + OCR_PRIMARY_ENGINE: paddleocr + OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none} + OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6} + GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json # PostgreSQL - Remove dev ports, production log level mvp-postgres: diff --git a/docker-compose.staging.yml b/docker-compose.staging.yml index 5702e00..253b78b 100644 --- a/docker-compose.staging.yml +++ b/docker-compose.staging.yml @@ -63,6 +63,15 @@ services: mvp-ocr: image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest} container_name: mvp-ocr-staging + environment: + LOG_LEVEL: debug + REDIS_HOST: mvp-redis + REDIS_PORT: 6379 + REDIS_DB: 1 + OCR_PRIMARY_ENGINE: paddleocr + OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none} + OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6} + GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json # ======================================== # PostgreSQL (Staging - Separate Database) diff --git a/docker-compose.yml b/docker-compose.yml index b79e2c9..ae4b8c6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -193,8 +193,16 @@ services: REDIS_HOST: mvp-redis REDIS_PORT: 6379 REDIS_DB: 1 + # OCR engine configuration (PaddleOCR primary, cloud fallback optional) + OCR_PRIMARY_ENGINE: paddleocr + OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none} + OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6} + GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json volumes: - /tmp/vin-debug:/tmp/vin-debug + # Optional: Uncomment to enable Google Vision cloud fallback. + # Requires: secrets/app/google-vision-key.json and OCR_FALLBACK_ENGINE=google_vision + # - ./secrets/app/google-vision-key.json:/run/secrets/google-vision-key.json:ro networks: - backend - database diff --git a/ocr/Dockerfile b/ocr/Dockerfile index 90aa37f..c460f0b 100644 --- a/ocr/Dockerfile +++ b/ocr/Dockerfile @@ -1,5 +1,9 @@ # Production Dockerfile for MotoVaultPro OCR Service # Uses mirrored base images from Gitea Package Registry +# +# Primary engine: PaddleOCR PP-OCRv4 (models baked into image) +# Backward compat: Tesseract 5.x (optional, via TesseractEngine) +# Cloud fallback: Google Vision (optional, requires API key at runtime) # Build argument for registry (defaults to Gitea mirrors, falls back to Docker Hub) ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors @@ -7,10 +11,16 @@ ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors FROM ${REGISTRY_MIRRORS}/python:3.13-slim # System dependencies +# - tesseract-ocr/eng: Backward-compatible OCR engine (used by TesseractEngine) +# - libgomp1: OpenMP runtime required by PaddlePaddle +# - libheif1/libheif-dev: HEIF image support (iPhone photos) +# - libglib2.0-0: GLib shared library (OpenCV dependency) +# - libmagic1: File type detection +# - curl: Health check endpoint RUN apt-get update && apt-get install -y --no-install-recommends \ tesseract-ocr \ tesseract-ocr-eng \ - libtesseract-dev \ + libgomp1 \ libheif1 \ libheif-dev \ libglib2.0-0 \ @@ -23,6 +33,12 @@ WORKDIR /app COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +# Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime). +# Models are baked into the image so container starts are fast and +# no network access is needed at runtime for model download. +RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)" \ + && echo "PaddleOCR PP-OCRv4 models downloaded and verified" + COPY . . EXPOSE 8000 diff --git a/secrets/app/google-vision-key.json.example b/secrets/app/google-vision-key.json.example new file mode 100644 index 0000000..67ef039 --- /dev/null +++ b/secrets/app/google-vision-key.json.example @@ -0,0 +1,18 @@ +{ + "_comment": "Google Vision API service account key for OCR cloud fallback", + "_instructions": [ + "1. Create a Google Cloud service account with Vision API access", + "2. Download the JSON key file", + "3. Save it as secrets/app/google-vision-key.json (gitignored)", + "4. Uncomment the volume mount in docker-compose.yml", + "5. Set OCR_FALLBACK_ENGINE=google_vision" + ], + "type": "service_account", + "project_id": "your-project-id", + "private_key_id": "", + "private_key": "", + "client_email": "your-sa@your-project-id.iam.gserviceaccount.com", + "client_id": "", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token" +} From 3c1a090ae32352ee086024fa85cf8f181565d32e Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 11:29:16 -0600 Subject: [PATCH 05/16] fix: resolve crop tool regression with stale ref and aspect ratio minSize (refs #120) Three bugs fixed in the draw-first crop tool introduced by PR #114: 1. Stale cropAreaRef: replaced useEffect-based ref sync with direct synchronous updates in handleMove and handleDrawStart. The useEffect ran after browser paint, so handleDragEnd read stale values (often {width:0, height:0}), preventing cropDrawn from being set. 2. Aspect ratio minSize: when aspectRatio=6 (VIN mode), height=width/6 required width>=60% to pass the height>=10% check. Now only checks width>=minSize when aspect ratio constrains height. 3. Bounds clamping: aspect-ratio-forced height could push crop area past 100% of container. Now clamps y position to keep within bounds. Co-Authored-By: Claude Opus 4.6 --- .../components/CameraCapture/useImageCrop.ts | 42 ++++++++++++------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/frontend/src/shared/components/CameraCapture/useImageCrop.ts b/frontend/src/shared/components/CameraCapture/useImageCrop.ts index 562d1b3..5399273 100644 --- a/frontend/src/shared/components/CameraCapture/useImageCrop.ts +++ b/frontend/src/shared/components/CameraCapture/useImageCrop.ts @@ -95,10 +95,6 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet const drawOriginRef = useRef({ x: 0, y: 0 }); const cropAreaRef = useRef(cropArea); - useEffect(() => { - cropAreaRef.current = cropArea; - }, [cropArea]); - const setCropArea = useCallback( (area: CropArea) => { setCropAreaState(getAspectRatioAdjustedCrop(area)); @@ -177,7 +173,9 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet startPosRef.current = { x: clientX, y: clientY }; drawOriginRef.current = { x, y }; - setCropAreaState({ x, y, width: 0, height: 0 }); + const initial = { x, y, width: 0, height: 0 }; + setCropAreaState(initial); + cropAreaRef.current = initial; isDrawingRef.current = true; activeHandleRef.current = null; @@ -203,18 +201,24 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet const originX = drawOriginRef.current.x; const originY = drawOriginRef.current.y; - let newCrop: CropArea = { + const drawnWidth = Math.abs(currentX - originX); + const drawnHeight = aspectRatio + ? drawnWidth / aspectRatio + : Math.abs(currentY - originY); + let drawnY = Math.min(originY, currentY); + // Clamp so crop doesn't exceed container bounds when aspect ratio forces height + if (aspectRatio && drawnY + drawnHeight > 100) { + drawnY = Math.max(0, 100 - drawnHeight); + } + const newCrop: CropArea = { x: Math.min(originX, currentX), - y: Math.min(originY, currentY), - width: Math.abs(currentX - originX), - height: Math.abs(currentY - originY), + y: drawnY, + width: drawnWidth, + height: drawnHeight, }; - if (aspectRatio) { - newCrop.height = newCrop.width / aspectRatio; - } - setCropAreaState(newCrop); + cropAreaRef.current = newCrop; return; } @@ -303,7 +307,9 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet break; } - setCropAreaState(constrainCrop(newCrop)); + const constrained = constrainCrop(newCrop); + setCropAreaState(constrained); + cropAreaRef.current = constrained; }, [isDragging, constrainCrop, aspectRatio] ); @@ -312,13 +318,17 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet if (isDrawingRef.current) { isDrawingRef.current = false; const area = cropAreaRef.current; - if (area.width >= minSize && area.height >= minSize) { + // When aspect ratio constrains one dimension, only check the free dimension + const meetsMinSize = aspectRatio + ? area.width >= minSize + : area.width >= minSize && area.height >= minSize; + if (meetsMinSize) { setCropDrawn(true); } } activeHandleRef.current = null; setIsDragging(false); - }, [minSize]); + }, [minSize, aspectRatio]); // Add global event listeners for drag useEffect(() => { From 1e96baca6fcb41a02fc2400b6f1c331443911ce9 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 11:32:36 -0600 Subject: [PATCH 06/16] fix: workflow contract --- .ai/workflow-contract.json | 2 +- frontend/.claude/tdd-guard/data/test.json | 115 +++++++++++++++++++++- 2 files changed, 114 insertions(+), 3 deletions(-) diff --git a/.ai/workflow-contract.json b/.ai/workflow-contract.json index af6bceb..be948e2 100644 --- a/.ai/workflow-contract.json +++ b/.ai/workflow-contract.json @@ -45,7 +45,7 @@ "parent_issue": "The original feature issue. Tracks overall status. Only the parent gets status label transitions.", "sub_issue_title_format": "{type}: {summary} (#{parent_index})", "sub_issue_body": "First line must be 'Relates to #{parent_index}'. Each sub-issue is a self-contained unit of work.", - "sub_issue_labels": "status/backlog + same type/* as parent. Sub-issues stay in backlog; parent issue tracks status.", + "sub_issue_labels": "status/in-progress + same type/* as parent. Sub-issues move to in-progress as they are worked on.", "sub_issue_milestone": "Same sprint milestone as parent.", "rules": [ "ONE branch for the parent issue. Never create branches per sub-issue.", diff --git a/frontend/.claude/tdd-guard/data/test.json b/frontend/.claude/tdd-guard/data/test.json index 6d8590e..fd77c20 100644 --- a/frontend/.claude/tdd-guard/data/test.json +++ b/frontend/.claude/tdd-guard/data/test.json @@ -1,5 +1,116 @@ { - "testModules": [], + "testModules": [ + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/shared/components/CameraCapture/CameraCapture.test.tsx", + "tests": [ + { + "name": "shows loading state while requesting permission", + "fullName": "CameraCapture Permission handling shows loading state while requesting permission", + "state": "passed" + }, + { + "name": "shows error when permission denied", + "fullName": "CameraCapture Permission handling shows error when permission denied", + "state": "passed" + }, + { + "name": "shows error when camera unavailable", + "fullName": "CameraCapture Permission handling shows error when camera unavailable", + "state": "passed" + }, + { + "name": "shows viewfinder when camera access granted", + "fullName": "CameraCapture Viewfinder shows viewfinder when camera access granted", + "state": "passed" + }, + { + "name": "shows cancel button in viewfinder", + "fullName": "CameraCapture Viewfinder shows cancel button in viewfinder", + "state": "passed" + }, + { + "name": "calls onCancel when cancel button clicked", + "fullName": "CameraCapture Viewfinder calls onCancel when cancel button clicked", + "state": "passed" + }, + { + "name": "shows VIN guidance when guidanceType is vin", + "fullName": "CameraCapture Guidance overlay shows VIN guidance when guidanceType is vin", + "state": "passed" + }, + { + "name": "shows receipt guidance when guidanceType is receipt", + "fullName": "CameraCapture Guidance overlay shows receipt guidance when guidanceType is receipt", + "state": "passed" + }, + { + "name": "shows upload file button in viewfinder", + "fullName": "CameraCapture File fallback shows upload file button in viewfinder", + "state": "passed" + }, + { + "name": "switches to file fallback when upload file clicked", + "fullName": "CameraCapture File fallback switches to file fallback when upload file clicked", + "state": "passed" + }, + { + "name": "renders upload area", + "fullName": "FileInputFallback renders upload area", + "state": "passed" + }, + { + "name": "shows accepted formats", + "fullName": "FileInputFallback shows accepted formats", + "state": "passed" + }, + { + "name": "shows max file size", + "fullName": "FileInputFallback shows max file size", + "state": "passed" + }, + { + "name": "calls onCancel when cancel clicked", + "fullName": "FileInputFallback calls onCancel when cancel clicked", + "state": "passed" + }, + { + "name": "shows error for invalid file type", + "fullName": "FileInputFallback shows error for invalid file type", + "state": "passed" + }, + { + "name": "shows error for file too large", + "fullName": "FileInputFallback shows error for file too large", + "state": "passed" + }, + { + "name": "calls onFileSelect with valid file", + "fullName": "FileInputFallback calls onFileSelect with valid file", + "state": "passed" + }, + { + "name": "renders nothing when type is none", + "fullName": "GuidanceOverlay renders nothing when type is none", + "state": "passed" + }, + { + "name": "renders VIN guidance with correct description", + "fullName": "GuidanceOverlay renders VIN guidance with correct description", + "state": "passed" + }, + { + "name": "renders receipt guidance with correct description", + "fullName": "GuidanceOverlay renders receipt guidance with correct description", + "state": "passed" + }, + { + "name": "renders document guidance with correct description", + "fullName": "GuidanceOverlay renders document guidance with correct description", + "state": "passed" + } + ] + } + ], "unhandledErrors": [], - "reason": "failed" + "reason": "passed" } \ No newline at end of file From 47c56764983ec6b78abf2bbbf91eae7ca8db218e Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 11:42:51 -0600 Subject: [PATCH 07/16] chore: update OCR tests and documentation (refs #121) Add engine abstraction tests and update docs to reflect PaddleOCR primary architecture with optional Google Vision cloud fallback. Co-Authored-By: Claude Opus 4.6 --- .ai/context.json | 2 +- docs/CLAUDE.md | 2 +- docs/ocr-pipeline-tech-stack.md | 145 +++--- ocr/CLAUDE.md | 5 +- ocr/app/CLAUDE.md | 1 + ocr/tests/test_engine_abstraction.py | 675 +++++++++++++++++++++++++++ ocr/tests/test_vin_extraction.py | 108 ++++- 7 files changed, 870 insertions(+), 68 deletions(-) create mode 100644 ocr/tests/test_engine_abstraction.py diff --git a/.ai/context.json b/.ai/context.json index 1e2c4bd..06b6032 100644 --- a/.ai/context.json +++ b/.ai/context.json @@ -108,7 +108,7 @@ }, "mvp-ocr": { "type": "ocr_service", - "description": "Python-based OCR for document text extraction", + "description": "Python OCR service with pluggable engine abstraction (PaddleOCR PP-OCRv4 primary, optional Google Vision cloud fallback, Tesseract backward compat)", "port": 8000 }, "mvp-loki": { diff --git a/docs/CLAUDE.md b/docs/CLAUDE.md index e17808d..59ba6eb 100644 --- a/docs/CLAUDE.md +++ b/docs/CLAUDE.md @@ -18,5 +18,5 @@ | `AUDIT.md` | Audit documentation | Security audits, compliance | | `MVP-COLOR-SCHEME.md` | Color scheme reference | UI styling decisions | | `LOGGING.md` | Unified logging system | Log levels, correlation IDs, Grafana | -| `ocr-pipeline-tech-stack.md` | OCR pipeline technology decisions | OCR architecture, Tesseract setup | +| `ocr-pipeline-tech-stack.md` | OCR pipeline technology decisions | OCR architecture, PaddleOCR engine abstraction | | `TIER-GATING.md` | Subscription tier gating rules | Feature access by tier, vehicle limits | diff --git a/docs/ocr-pipeline-tech-stack.md b/docs/ocr-pipeline-tech-stack.md index 5160a9a..a80090c 100644 --- a/docs/ocr-pipeline-tech-stack.md +++ b/docs/ocr-pipeline-tech-stack.md @@ -118,35 +118,48 @@ │ ├─────────────────────────────────────────────────────────┤ │ │ │ │ │ ┌─────────────────────────────────────────────────┐ │ - │ │ │ 5a. Primary OCR: Tesseract 5.x │ │ - │ │ │ │ │ - │ │ │ • Engine: LSTM (--oem 1) │ │ - │ │ │ • Page segmentation: Auto (--psm 3) │ │ - │ │ │ • Output: hOCR with word confidence │ │ + │ │ │ 5a. Engine Abstraction Layer │ │ + │ │ │ │ │ + │ │ │ OcrEngine ABC -> PaddleOcrEngine (primary) │ │ + │ │ │ -> CloudEngine (optional fallback) │ │ + │ │ │ -> TesseractEngine (backward compat)│ │ + │ │ │ -> HybridEngine (primary+fallback) │ │ + │ │ └─────────────────────────────────────────────────┘ │ + │ │ │ │ + │ │ ▼ │ + │ │ ┌─────────────────────────────────────────────────┐ │ + │ │ │ 5b. Primary OCR: PaddleOCR PP-OCRv4 │ │ + │ │ │ │ │ + │ │ │ • Scene text detection + angle classification │ │ + │ │ │ • CPU-only, models baked into Docker image │ │ + │ │ │ • Normalized output: text, confidence, word boxes│ │ │ │ └─────────────────────────────────────────────────┘ │ │ │ │ │ │ │ ▼ │ │ │ ┌───────────────┐ │ │ │ │ Confidence │ │ - │ │ │ > 80% ? │ │ + │ │ │ >= 60% ? │ │ │ │ └───────────────┘ │ │ │ │ │ │ - │ │ YES ──┘ └── NO │ + │ │ YES ──┘ └── NO (and cloud enabled) │ │ │ │ │ │ │ │ │ ▼ │ │ │ │ ┌─────────────────────────────────┐ │ - │ │ │ │ 5b. Fallback: PaddleOCR │ │ - │ │ │ │ │ │ - │ │ │ │ • Better for degraded images │ │ - │ │ │ │ • Better table detection │ │ - │ │ │ │ • Slower but more accurate │ │ + │ │ │ │ 5c. Optional Cloud Fallback │ │ + │ │ │ │ (Google Vision API) │ │ + │ │ │ │ │ │ + │ │ │ │ • Disabled by default │ │ + │ │ │ │ • 5-second timeout guard │ │ + │ │ │ │ • Returns higher-confidence │ │ + │ │ │ │ result of primary vs fallback │ │ │ │ │ └─────────────────────────────────┘ │ │ │ │ │ │ │ │ ▼ ▼ │ │ │ ┌─────────────────────────────────┐ │ - │ │ │ 5c. Result Merging │ │ - │ │ │ • Merge by bounding box │ │ + │ │ │ 5d. HybridEngine Result │ │ + │ │ │ • Compare confidences │ │ │ │ │ • Keep highest confidence │ │ + │ │ │ • Graceful fallback on error │ │ │ │ └─────────────────────────────────┘ │ │ │ │ │ └─────────────────────────────────────────────────────────┘ @@ -257,10 +270,10 @@ | Component | Tool | Purpose | |------------------------|-----------------------|--------------------------------------| -| **Primary OCR** | Tesseract 5.x | Fast, reliable text extraction | -| **Python Binding** | pytesseract | Tesseract Python wrapper | -| **Fallback OCR** | PaddleOCR | Higher accuracy, better tables | -| **Layout Analysis** | PaddleOCR / LayoutParser | Document structure detection | +| **Primary OCR** | PaddleOCR PP-OCRv4 | Highest accuracy scene text, CPU-only | +| **Cloud Fallback** | Google Vision API | Optional cloud fallback (disabled by default) | +| **Backward Compat** | Tesseract 5.x / pytesseract | Legacy engine, configurable via env var | +| **Engine Abstraction** | `OcrEngine` ABC | Pluggable engine interface in `ocr/app/engines/` | ### Data Extraction @@ -291,85 +304,93 @@ fastapi>=0.100.0 uvicorn[standard]>=0.23.0 python-multipart>=0.0.6 - -# Task Queue -celery>=5.3.0 -redis>=4.6.0 +pydantic>=2.0.0 # File Detection & Handling python-magic>=0.4.27 pillow>=10.0.0 pillow-heif>=0.13.0 -# PDF Processing -pymupdf>=1.23.0 - # Image Preprocessing opencv-python-headless>=4.8.0 -deskew>=1.4.0 -scikit-image>=0.21.0 numpy>=1.24.0 # OCR Engines pytesseract>=0.3.10 -paddlepaddle>=2.5.0 -paddleocr>=2.7.0 +paddlepaddle>=2.6.0 +paddleocr>=2.8.0 +google-cloud-vision>=3.7.0 -# Table Extraction -img2table>=1.2.0 -camelot-py[cv]>=0.11.0 +# PDF Processing +PyMuPDF>=1.23.0 -# NLP & Data -spacy>=3.6.0 -pandas>=2.0.0 +# Redis for job queue +redis>=5.0.0 -# Storage & Database -boto3>=1.28.0 -psycopg2-binary>=2.9.0 -sqlalchemy>=2.0.0 +# HTTP client for callbacks +httpx>=0.24.0 + +# Testing +pytest>=7.4.0 +pytest-asyncio>=0.21.0 ``` ### System Package Requirements (Ubuntu/Debian) ```bash -# Tesseract OCR -apt-get install tesseract-ocr tesseract-ocr-eng libtesseract-dev +# Tesseract OCR (backward compatibility engine) +apt-get install tesseract-ocr tesseract-ocr-eng + +# PaddlePaddle OpenMP runtime +apt-get install libgomp1 # HEIC Support -apt-get install libheif-examples libheif-dev +apt-get install libheif1 libheif-dev -# OpenCV dependencies -apt-get install libgl1-mesa-glx libglib2.0-0 +# GLib (OpenCV dependency) +apt-get install libglib2.0-0 -# PDF rendering dependencies -apt-get install libmupdf-dev mupdf-tools - -# Image processing -apt-get install libmagic1 ghostscript - -# Camelot dependencies -apt-get install ghostscript python3-tk +# File type detection +apt-get install libmagic1 ``` +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `OCR_PRIMARY_ENGINE` | `paddleocr` | Primary OCR engine (`paddleocr`, `tesseract`) | +| `OCR_CONFIDENCE_THRESHOLD` | `0.6` | Minimum confidence threshold | +| `OCR_FALLBACK_ENGINE` | `none` | Fallback engine (`google_vision`, `none`) | +| `OCR_FALLBACK_THRESHOLD` | `0.6` | Confidence below this triggers fallback | +| `GOOGLE_VISION_KEY_PATH` | `/run/secrets/google-vision-key.json` | Path to Google Vision service account key | + --- ## DOCKERFILE ```dockerfile -FROM python:3.11-slim +# Primary engine: PaddleOCR PP-OCRv4 (models baked into image) +# Backward compat: Tesseract 5.x (optional, via TesseractEngine) +# Cloud fallback: Google Vision (optional, requires API key at runtime) + +FROM python:3.13-slim # System dependencies +# - tesseract-ocr/eng: Backward-compatible OCR engine +# - libgomp1: OpenMP runtime required by PaddlePaddle +# - libheif1/libheif-dev: HEIF image support (iPhone photos) +# - libglib2.0-0: GLib shared library (OpenCV dependency) +# - libmagic1: File type detection +# - curl: Health check endpoint RUN apt-get update && apt-get install -y --no-install-recommends \ tesseract-ocr \ tesseract-ocr-eng \ - libtesseract-dev \ - libheif-examples \ + libgomp1 \ + libheif1 \ libheif-dev \ - libgl1-mesa-glx \ libglib2.0-0 \ libmagic1 \ - ghostscript \ - poppler-utils \ + curl \ && rm -rf /var/lib/apt/lists/* # Python dependencies @@ -377,11 +398,9 @@ WORKDIR /app COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -# Download spaCy model -RUN python -m spacy download en_core_web_sm - -# Download PaddleOCR models (cached in image) -RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en')" +# Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime) +RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)" \ + && echo "PaddleOCR PP-OCRv4 models downloaded and verified" COPY . . diff --git a/ocr/CLAUDE.md b/ocr/CLAUDE.md index 2020969..1f3988d 100644 --- a/ocr/CLAUDE.md +++ b/ocr/CLAUDE.md @@ -1,10 +1,12 @@ # ocr/ +Python OCR microservice. Primary engine: PaddleOCR PP-OCRv4 with optional Google Vision cloud fallback. Pluggable engine abstraction in `app/engines/`. + ## Files | File | What | When to read | | ---- | ---- | ------------ | -| `Dockerfile` | Container build definition | Docker builds, deployment | +| `Dockerfile` | Container build (PaddleOCR models baked in) | Docker builds, deployment | | `requirements.txt` | Python dependencies | Adding dependencies | ## Subdirectories @@ -12,4 +14,5 @@ | Directory | What | When to read | | --------- | ---- | ------------ | | `app/` | FastAPI application source | OCR endpoint development | +| `app/engines/` | Engine abstraction layer (OcrEngine ABC, factory, hybrid) | Adding or changing OCR engines | | `tests/` | Test suite | Adding or modifying tests | diff --git a/ocr/app/CLAUDE.md b/ocr/app/CLAUDE.md index 26c799a..8fbc7f1 100644 --- a/ocr/app/CLAUDE.md +++ b/ocr/app/CLAUDE.md @@ -12,6 +12,7 @@ | Directory | What | When to read | | --------- | ---- | ------------ | +| `engines/` | OCR engine abstraction (PaddleOCR primary, Google Vision fallback, Tesseract compat) | Engine changes, adding new engines | | `extractors/` | Data extraction logic | Adding new extraction types | | `models/` | Data models and schemas | Request/response types | | `patterns/` | Regex and parsing patterns | Pattern matching rules | diff --git a/ocr/tests/test_engine_abstraction.py b/ocr/tests/test_engine_abstraction.py new file mode 100644 index 0000000..2e8c150 --- /dev/null +++ b/ocr/tests/test_engine_abstraction.py @@ -0,0 +1,675 @@ +"""Tests for OCR engine abstraction layer. + +Covers: base types, exception hierarchy, PaddleOcrEngine, +TesseractEngine, CloudEngine, HybridEngine, and engine_factory. +""" + +import io +from unittest.mock import MagicMock, patch + +import pytest +from PIL import Image + +from app.engines.base_engine import ( + EngineError, + EngineProcessingError, + EngineUnavailableError, + OcrConfig, + OcrEngine, + OcrEngineResult, + WordBox, +) + + +# --- Helpers --- + + +def _create_test_image_bytes() -> bytes: + """Create minimal PNG image bytes for engine testing.""" + img = Image.new("RGB", (100, 50), (255, 255, 255)) + buf = io.BytesIO() + img.save(buf, format="PNG") + return buf.getvalue() + + +def _make_result( + text: str, confidence: float, engine_name: str +) -> OcrEngineResult: + """Create a minimal OcrEngineResult for testing.""" + return OcrEngineResult( + text=text, confidence=confidence, word_boxes=[], engine_name=engine_name + ) + + +# --------------------------------------------------------------------------- +# Exception hierarchy +# --------------------------------------------------------------------------- + + +class TestExceptionHierarchy: + """Engine errors form a proper hierarchy under EngineError.""" + + def test_unavailable_is_engine_error(self) -> None: + assert issubclass(EngineUnavailableError, EngineError) + + def test_processing_is_engine_error(self) -> None: + assert issubclass(EngineProcessingError, EngineError) + + def test_engine_error_is_exception(self) -> None: + assert issubclass(EngineError, Exception) + + def test_catch_base_catches_subtypes(self) -> None: + with pytest.raises(EngineError): + raise EngineUnavailableError("not installed") + with pytest.raises(EngineError): + raise EngineProcessingError("OCR failed") + + +# --------------------------------------------------------------------------- +# Data types +# --------------------------------------------------------------------------- + + +class TestWordBox: + def test_default_positions(self) -> None: + wb = WordBox(text="VIN", confidence=0.95) + assert wb.x == 0 + assert wb.y == 0 + assert wb.width == 0 + assert wb.height == 0 + + def test_all_fields(self) -> None: + wb = WordBox(text="ABC", confidence=0.88, x=10, y=20, width=100, height=30) + assert wb.text == "ABC" + assert wb.confidence == 0.88 + assert wb.x == 10 + assert wb.width == 100 + + +class TestOcrConfig: + def test_defaults(self) -> None: + config = OcrConfig() + assert config.char_whitelist is None + assert config.single_line is False + assert config.single_word is False + assert config.use_angle_cls is True + assert config.hints == {} + + def test_vin_whitelist_excludes_ioq(self) -> None: + whitelist = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789" + config = OcrConfig(char_whitelist=whitelist) + assert "I" not in config.char_whitelist + assert "O" not in config.char_whitelist + assert "Q" not in config.char_whitelist + + def test_hints_are_independent_across_instances(self) -> None: + c1 = OcrConfig() + c2 = OcrConfig() + c1.hints["psm"] = 7 + assert "psm" not in c2.hints + + +class TestOcrEngineResult: + def test_construction(self) -> None: + result = OcrEngineResult( + text="1HGBH41JXMN109186", + confidence=0.94, + word_boxes=[WordBox(text="1HGBH41JXMN109186", confidence=0.94)], + engine_name="paddleocr", + ) + assert result.text == "1HGBH41JXMN109186" + assert result.confidence == 0.94 + assert len(result.word_boxes) == 1 + assert result.engine_name == "paddleocr" + + def test_empty_result(self) -> None: + result = OcrEngineResult( + text="", confidence=0.0, word_boxes=[], engine_name="tesseract" + ) + assert result.text == "" + assert result.word_boxes == [] + + +# --------------------------------------------------------------------------- +# OcrEngine ABC +# --------------------------------------------------------------------------- + + +class TestOcrEngineABC: + def test_cannot_instantiate_directly(self) -> None: + with pytest.raises(TypeError): + OcrEngine() # type: ignore[abstract] + + def test_concrete_subclass_works(self) -> None: + class StubEngine(OcrEngine): + @property + def name(self) -> str: + return "stub" + + def recognize( + self, image_bytes: bytes, config: OcrConfig + ) -> OcrEngineResult: + return OcrEngineResult( + text="ok", confidence=1.0, word_boxes=[], engine_name="stub" + ) + + engine = StubEngine() + assert engine.name == "stub" + result = engine.recognize(b"", OcrConfig()) + assert result.text == "ok" + + +# --------------------------------------------------------------------------- +# PaddleOcrEngine +# --------------------------------------------------------------------------- + + +class TestPaddleOcrEngine: + def test_name(self) -> None: + from app.engines.paddle_engine import PaddleOcrEngine + + engine = PaddleOcrEngine() + assert engine.name == "paddleocr" + + def test_lazy_init_not_loaded_at_construction(self) -> None: + from app.engines.paddle_engine import PaddleOcrEngine + + engine = PaddleOcrEngine() + assert engine._ocr is None + + def test_recognize_empty_results(self) -> None: + from app.engines.paddle_engine import PaddleOcrEngine + + engine = PaddleOcrEngine() + mock_ocr = MagicMock() + mock_ocr.ocr.return_value = [None] + engine._ocr = mock_ocr + + result = engine.recognize(_create_test_image_bytes(), OcrConfig()) + assert result.text == "" + assert result.confidence == 0.0 + assert result.word_boxes == [] + assert result.engine_name == "paddleocr" + + def test_recognize_with_results(self) -> None: + from app.engines.paddle_engine import PaddleOcrEngine + + engine = PaddleOcrEngine() + mock_ocr = MagicMock() + mock_ocr.ocr.return_value = [ + [ + [[[10, 20], [110, 20], [110, 50], [10, 50]], ("HELLO", 0.95)], + [[[10, 60], [110, 60], [110, 90], [10, 90]], ("WORLD", 0.88)], + ] + ] + engine._ocr = mock_ocr + + result = engine.recognize(_create_test_image_bytes(), OcrConfig()) + assert result.text == "HELLO WORLD" + assert abs(result.confidence - 0.915) < 0.01 + assert len(result.word_boxes) == 2 + assert result.word_boxes[0].text == "HELLO" + assert result.word_boxes[0].confidence == 0.95 + assert result.word_boxes[1].text == "WORLD" + assert result.engine_name == "paddleocr" + + def test_recognize_whitelist_filters_characters(self) -> None: + from app.engines.paddle_engine import PaddleOcrEngine + + engine = PaddleOcrEngine() + mock_ocr = MagicMock() + mock_ocr.ocr.return_value = [ + [ + [[[0, 0], [100, 0], [100, 30], [0, 30]], ("1HG-BH4!", 0.9)], + ] + ] + engine._ocr = mock_ocr + + config = OcrConfig(char_whitelist="ABCDEFGHJKLMNPRSTUVWXYZ0123456789") + result = engine.recognize(_create_test_image_bytes(), config) + assert "-" not in result.text + assert "!" not in result.text + assert result.word_boxes[0].text == "1HGBH4" + + def test_recognize_quadrilateral_to_bounding_box(self) -> None: + from app.engines.paddle_engine import PaddleOcrEngine + + engine = PaddleOcrEngine() + mock_ocr = MagicMock() + # Slightly rotated quad: min x=8, min y=20, max x=110, max y=55 + mock_ocr.ocr.return_value = [ + [ + [[[10, 20], [110, 25], [108, 55], [8, 50]], ("TEXT", 0.9)], + ] + ] + engine._ocr = mock_ocr + + result = engine.recognize(_create_test_image_bytes(), OcrConfig()) + wb = result.word_boxes[0] + assert wb.x == 8 + assert wb.y == 20 + assert wb.width == 102 # 110 - 8 + assert wb.height == 35 # 55 - 20 + + def test_recognize_skips_empty_after_whitelist(self) -> None: + """Text consisting only of non-whitelisted characters is skipped.""" + from app.engines.paddle_engine import PaddleOcrEngine + + engine = PaddleOcrEngine() + mock_ocr = MagicMock() + mock_ocr.ocr.return_value = [ + [ + [[[0, 0], [50, 0], [50, 20], [0, 20]], ("---", 0.9)], + ] + ] + engine._ocr = mock_ocr + + config = OcrConfig(char_whitelist="ABC") + result = engine.recognize(_create_test_image_bytes(), config) + assert result.text == "" + assert result.word_boxes == [] + assert result.confidence == 0.0 + + def test_import_error_raises_unavailable(self) -> None: + from app.engines.paddle_engine import PaddleOcrEngine + + engine = PaddleOcrEngine() + engine._ocr = None + with patch.dict("sys.modules", {"paddleocr": None}): + with patch( + "app.engines.paddle_engine.importlib.import_module", + side_effect=ImportError("No module"), + ): + # Force re-import by removing cached paddleocr + original_import = __builtins__.__import__ if hasattr(__builtins__, '__import__') else __import__ + def mock_import(name, *args, **kwargs): + if name == "paddleocr": + raise ImportError("No module named 'paddleocr'") + return original_import(name, *args, **kwargs) + + with patch("builtins.__import__", side_effect=mock_import): + with pytest.raises(EngineUnavailableError, match="paddleocr"): + engine._get_ocr() + + def test_processing_error_on_exception(self) -> None: + from app.engines.paddle_engine import PaddleOcrEngine + + engine = PaddleOcrEngine() + mock_ocr = MagicMock() + mock_ocr.ocr.side_effect = RuntimeError("OCR crashed") + engine._ocr = mock_ocr + + with pytest.raises(EngineProcessingError, match="PaddleOCR recognition failed"): + engine.recognize(_create_test_image_bytes(), OcrConfig()) + + +# --------------------------------------------------------------------------- +# TesseractEngine +# --------------------------------------------------------------------------- + + +class TestTesseractEngine: + """Tests for TesseractEngine using mocked pytesseract.""" + + @pytest.fixture() + def engine(self) -> "TesseractEngine": # type: ignore[name-defined] + """Create a TesseractEngine with mocked pytesseract dependency.""" + mock_pytesseract = MagicMock() + mock_pytesseract.Output.DICT = "dict" + + with patch.dict("sys.modules", {"pytesseract": mock_pytesseract}): + with patch("app.engines.tesseract_engine.settings") as mock_settings: + mock_settings.tesseract_cmd = "/usr/bin/tesseract" + from app.engines.tesseract_engine import TesseractEngine + + eng = TesseractEngine() + eng._mock_pytesseract = mock_pytesseract # type: ignore[attr-defined] + return eng + + def test_name(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] + assert engine.name == "tesseract" + + def test_build_config_default_psm(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] + config_str = engine._build_config(OcrConfig()) + assert "--psm 6" in config_str + + def test_build_config_single_line(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] + config_str = engine._build_config(OcrConfig(single_line=True)) + assert "--psm 7" in config_str + + def test_build_config_single_word(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] + config_str = engine._build_config(OcrConfig(single_word=True)) + assert "--psm 8" in config_str + + def test_build_config_whitelist(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] + config_str = engine._build_config(OcrConfig(char_whitelist="ABC123")) + assert "-c tessedit_char_whitelist=ABC123" in config_str + + def test_build_config_psm_hint(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] + config_str = engine._build_config(OcrConfig(hints={"psm": 11})) + assert "--psm 11" in config_str + + def test_recognize_normalizes_confidence(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] + """Tesseract returns 0-100 confidence; engine normalizes to 0.0-1.0.""" + engine._pytesseract.image_to_data.return_value = { + "text": ["HELLO", ""], + "conf": [92, -1], + "left": [10], + "top": [20], + "width": [100], + "height": [30], + } + + result = engine.recognize(_create_test_image_bytes(), OcrConfig()) + assert result.text == "HELLO" + assert abs(result.confidence - 0.92) < 0.01 + assert result.engine_name == "tesseract" + + def test_import_error_raises_unavailable(self) -> None: + with patch.dict("sys.modules", {"pytesseract": None}): + with patch("app.engines.tesseract_engine.settings") as mock_settings: + mock_settings.tesseract_cmd = "/usr/bin/tesseract" + + def mock_import(name, *args, **kwargs): + if name == "pytesseract": + raise ImportError("No module named 'pytesseract'") + return __import__(name, *args, **kwargs) + + with patch("builtins.__import__", side_effect=mock_import): + from app.engines.tesseract_engine import TesseractEngine + + with pytest.raises(EngineUnavailableError, match="pytesseract"): + TesseractEngine() + + +# --------------------------------------------------------------------------- +# CloudEngine +# --------------------------------------------------------------------------- + + +class TestCloudEngine: + def test_name(self) -> None: + from app.engines.cloud_engine import CloudEngine + + engine = CloudEngine(key_path="/fake/path.json") + assert engine.name == "google_vision" + + def test_lazy_init_not_loaded_at_construction(self) -> None: + from app.engines.cloud_engine import CloudEngine + + engine = CloudEngine(key_path="/fake/path.json") + assert engine._client is None + + def test_missing_key_file_raises_unavailable(self) -> None: + from app.engines.cloud_engine import CloudEngine + + engine = CloudEngine(key_path="/nonexistent/key.json") + with pytest.raises(EngineUnavailableError, match="key not found"): + engine._get_client() + + @patch("os.path.isfile", return_value=True) + def test_missing_library_raises_unavailable(self, _mock_isfile: MagicMock) -> None: + from app.engines.cloud_engine import CloudEngine + + engine = CloudEngine(key_path="/fake/key.json") + + def mock_import(name, *args, **kwargs): + if "google.cloud" in name: + raise ImportError("No module named 'google.cloud'") + return __import__(name, *args, **kwargs) + + with patch("builtins.__import__", side_effect=mock_import): + with pytest.raises(EngineUnavailableError, match="google-cloud-vision"): + engine._get_client() + + def test_recognize_empty_annotations(self) -> None: + from app.engines.cloud_engine import CloudEngine + + engine = CloudEngine(key_path="/fake/key.json") + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.error.message = "" + mock_response.text_annotations = [] + mock_client.text_detection.return_value = mock_response + engine._client = mock_client + + # Mock the google.cloud.vision import inside recognize() + mock_vision = MagicMock() + with patch.dict("sys.modules", {"google.cloud.vision": mock_vision, "google.cloud": MagicMock(), "google": MagicMock()}): + result = engine.recognize(b"fake_image", OcrConfig()) + assert result.text == "" + assert result.confidence == 0.0 + assert result.engine_name == "google_vision" + + def test_recognize_api_error_raises_processing_error(self) -> None: + from app.engines.cloud_engine import CloudEngine + + engine = CloudEngine(key_path="/fake/key.json") + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.error.message = "API quota exceeded" + mock_client.text_detection.return_value = mock_response + engine._client = mock_client + + mock_vision = MagicMock() + with patch.dict("sys.modules", {"google.cloud.vision": mock_vision, "google.cloud": MagicMock(), "google": MagicMock()}): + with pytest.raises(EngineProcessingError, match="API quota exceeded"): + engine.recognize(b"fake_image", OcrConfig()) + + +# --------------------------------------------------------------------------- +# HybridEngine +# --------------------------------------------------------------------------- + + +class TestHybridEngine: + def test_name_with_fallback(self) -> None: + from app.engines.hybrid_engine import HybridEngine + + primary = MagicMock(spec=OcrEngine) + primary.name = "paddleocr" + fallback = MagicMock(spec=OcrEngine) + fallback.name = "google_vision" + engine = HybridEngine(primary=primary, fallback=fallback) + assert engine.name == "hybrid(paddleocr+google_vision)" + + def test_name_without_fallback(self) -> None: + from app.engines.hybrid_engine import HybridEngine + + primary = MagicMock(spec=OcrEngine) + primary.name = "paddleocr" + engine = HybridEngine(primary=primary) + assert engine.name == "hybrid(paddleocr+none)" + + def test_high_confidence_skips_fallback(self) -> None: + from app.engines.hybrid_engine import HybridEngine + + primary = MagicMock(spec=OcrEngine) + fallback = MagicMock(spec=OcrEngine) + primary.name = "paddleocr" + fallback.name = "cloud" + primary.recognize.return_value = _make_result("VIN123", 0.95, "paddleocr") + + engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6) + result = engine.recognize(b"img", OcrConfig()) + assert result.text == "VIN123" + assert result.engine_name == "paddleocr" + fallback.recognize.assert_not_called() + + def test_low_confidence_triggers_fallback(self) -> None: + from app.engines.hybrid_engine import HybridEngine + + primary = MagicMock(spec=OcrEngine) + fallback = MagicMock(spec=OcrEngine) + primary.name = "paddleocr" + fallback.name = "google_vision" + primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr") + fallback.recognize.return_value = _make_result("VIN456", 0.92, "google_vision") + + engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6) + result = engine.recognize(b"img", OcrConfig()) + assert result.text == "VIN456" + assert result.engine_name == "google_vision" + fallback.recognize.assert_called_once() + + def test_low_confidence_no_fallback_returns_primary(self) -> None: + from app.engines.hybrid_engine import HybridEngine + + primary = MagicMock(spec=OcrEngine) + primary.name = "paddleocr" + primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr") + + engine = HybridEngine(primary=primary, fallback=None, threshold=0.6) + result = engine.recognize(b"img", OcrConfig()) + assert result.text == "VIN123" + + def test_fallback_lower_confidence_returns_primary(self) -> None: + from app.engines.hybrid_engine import HybridEngine + + primary = MagicMock(spec=OcrEngine) + fallback = MagicMock(spec=OcrEngine) + primary.name = "paddleocr" + fallback.name = "google_vision" + primary.recognize.return_value = _make_result("VIN123", 0.4, "paddleocr") + fallback.recognize.return_value = _make_result("VIN456", 0.3, "google_vision") + + engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6) + result = engine.recognize(b"img", OcrConfig()) + assert result.text == "VIN123" + + def test_fallback_engine_error_returns_primary(self) -> None: + from app.engines.hybrid_engine import HybridEngine + + primary = MagicMock(spec=OcrEngine) + fallback = MagicMock(spec=OcrEngine) + primary.name = "paddleocr" + fallback.name = "google_vision" + primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr") + fallback.recognize.side_effect = EngineUnavailableError("key missing") + + engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6) + result = engine.recognize(b"img", OcrConfig()) + assert result.text == "VIN123" + + def test_fallback_unexpected_error_returns_primary(self) -> None: + from app.engines.hybrid_engine import HybridEngine + + primary = MagicMock(spec=OcrEngine) + fallback = MagicMock(spec=OcrEngine) + primary.name = "paddleocr" + fallback.name = "google_vision" + primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr") + fallback.recognize.side_effect = RuntimeError("network error") + + engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6) + result = engine.recognize(b"img", OcrConfig()) + assert result.text == "VIN123" + + @patch("app.engines.hybrid_engine.time") + def test_fallback_timeout_returns_primary(self, mock_time: MagicMock) -> None: + from app.engines.hybrid_engine import HybridEngine + + primary = MagicMock(spec=OcrEngine) + fallback = MagicMock(spec=OcrEngine) + primary.name = "paddleocr" + fallback.name = "google_vision" + primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr") + fallback.recognize.return_value = _make_result("VIN456", 0.92, "google_vision") + # Simulate 6-second delay (exceeds 5s limit) + mock_time.monotonic.side_effect = [0.0, 6.0] + + engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6) + result = engine.recognize(b"img", OcrConfig()) + assert result.text == "VIN123" # timeout -> use primary + + def test_exact_threshold_skips_fallback(self) -> None: + """When confidence == threshold, no fallback needed (>= check).""" + from app.engines.hybrid_engine import HybridEngine + + primary = MagicMock(spec=OcrEngine) + fallback = MagicMock(spec=OcrEngine) + primary.name = "paddleocr" + fallback.name = "cloud" + primary.recognize.return_value = _make_result("VIN", 0.6, "paddleocr") + + engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6) + result = engine.recognize(b"img", OcrConfig()) + assert result.engine_name == "paddleocr" + fallback.recognize.assert_not_called() + + +# --------------------------------------------------------------------------- +# Engine factory +# --------------------------------------------------------------------------- + + +class TestEngineFactory: + def test_unknown_engine_raises(self) -> None: + from app.engines.engine_factory import _create_single_engine + + with pytest.raises(EngineUnavailableError, match="Unknown engine"): + _create_single_engine("nonexistent") + + @patch("app.engines.engine_factory.settings") + @patch("app.engines.engine_factory._create_single_engine") + def test_defaults_to_settings_primary( + self, mock_create: MagicMock, mock_settings: MagicMock + ) -> None: + mock_settings.ocr_primary_engine = "paddleocr" + mock_settings.ocr_fallback_engine = "none" + mock_engine = MagicMock(spec=OcrEngine) + mock_create.return_value = mock_engine + + from app.engines.engine_factory import create_engine + + result = create_engine() + mock_create.assert_called_once_with("paddleocr") + assert result == mock_engine + + @patch("app.engines.engine_factory.settings") + @patch("app.engines.engine_factory._create_single_engine") + def test_explicit_name_overrides_settings( + self, mock_create: MagicMock, mock_settings: MagicMock + ) -> None: + mock_settings.ocr_fallback_engine = "none" + mock_engine = MagicMock(spec=OcrEngine) + mock_create.return_value = mock_engine + + from app.engines.engine_factory import create_engine + + create_engine("tesseract") + mock_create.assert_called_once_with("tesseract") + + @patch("app.engines.engine_factory.settings") + @patch("app.engines.engine_factory._create_single_engine") + def test_creates_hybrid_when_fallback_configured( + self, mock_create: MagicMock, mock_settings: MagicMock + ) -> None: + mock_settings.ocr_primary_engine = "paddleocr" + mock_settings.ocr_fallback_engine = "google_vision" + mock_settings.ocr_fallback_threshold = 0.7 + mock_primary = MagicMock(spec=OcrEngine) + mock_fallback = MagicMock(spec=OcrEngine) + mock_create.side_effect = [mock_primary, mock_fallback] + + from app.engines.engine_factory import create_engine + from app.engines.hybrid_engine import HybridEngine + + result = create_engine() + assert isinstance(result, HybridEngine) + + @patch("app.engines.engine_factory.settings") + @patch("app.engines.engine_factory._create_single_engine") + def test_fallback_failure_returns_primary_only( + self, mock_create: MagicMock, mock_settings: MagicMock + ) -> None: + mock_settings.ocr_primary_engine = "paddleocr" + mock_settings.ocr_fallback_engine = "google_vision" + mock_settings.ocr_fallback_threshold = 0.6 + mock_primary = MagicMock(spec=OcrEngine) + mock_create.side_effect = [mock_primary, EngineUnavailableError("no key")] + + from app.engines.engine_factory import create_engine + + result = create_engine() + assert result == mock_primary diff --git a/ocr/tests/test_vin_extraction.py b/ocr/tests/test_vin_extraction.py index b2c8170..3a42c8b 100644 --- a/ocr/tests/test_vin_extraction.py +++ b/ocr/tests/test_vin_extraction.py @@ -1,11 +1,12 @@ -"""Integration tests for VIN extraction endpoint.""" +"""Integration tests for VIN extraction endpoint and engine integration.""" import io from unittest.mock import patch, MagicMock import pytest from fastapi.testclient import TestClient -from PIL import Image, ImageDraw, ImageFont +from PIL import Image, ImageDraw +from app.engines.base_engine import OcrConfig, OcrEngineResult, WordBox from app.main import app @@ -240,3 +241,106 @@ class TestVinExtractionContentTypes: ) assert response.status_code == 200 + + +# --------------------------------------------------------------------------- +# VIN extractor engine integration tests +# --------------------------------------------------------------------------- + + +class TestVinExtractorEngineIntegration: + """Tests verifying VinExtractor integrates correctly with engine abstraction.""" + + @patch("app.extractors.vin_extractor.create_engine") + def test_perform_ocr_calls_engine_with_vin_config( + self, mock_create_engine: MagicMock + ) -> None: + """_perform_ocr passes VIN whitelist and angle_cls to engine.""" + from app.extractors.vin_extractor import VinExtractor + + mock_engine = MagicMock() + mock_engine.recognize.return_value = OcrEngineResult( + text="1HGBH41JXMN109186", + confidence=0.94, + word_boxes=[WordBox(text="1HGBH41JXMN109186", confidence=0.94)], + engine_name="paddleocr", + ) + mock_create_engine.return_value = mock_engine + + extractor = VinExtractor() + text, confidences = extractor._perform_ocr(b"fake_image") + + mock_engine.recognize.assert_called_once() + call_config = mock_engine.recognize.call_args[0][1] + assert isinstance(call_config, OcrConfig) + assert call_config.char_whitelist == VinExtractor.VIN_WHITELIST + assert call_config.use_angle_cls is True + assert call_config.single_line is False + assert call_config.single_word is False + assert text == "1HGBH41JXMN109186" + assert confidences == [0.94] + + @patch("app.extractors.vin_extractor.create_engine") + def test_perform_ocr_single_line_mode( + self, mock_create_engine: MagicMock + ) -> None: + """_perform_ocr passes single_line flag to engine config.""" + from app.extractors.vin_extractor import VinExtractor + + mock_engine = MagicMock() + mock_engine.recognize.return_value = OcrEngineResult( + text="VIN123", confidence=0.9, word_boxes=[], engine_name="paddleocr" + ) + mock_create_engine.return_value = mock_engine + + extractor = VinExtractor() + extractor._perform_ocr(b"img", single_line=True) + + call_config = mock_engine.recognize.call_args[0][1] + assert call_config.single_line is True + assert call_config.single_word is False + + @patch("app.extractors.vin_extractor.create_engine") + def test_perform_ocr_single_word_mode( + self, mock_create_engine: MagicMock + ) -> None: + """_perform_ocr passes single_word flag to engine config.""" + from app.extractors.vin_extractor import VinExtractor + + mock_engine = MagicMock() + mock_engine.recognize.return_value = OcrEngineResult( + text="VIN123", confidence=0.9, word_boxes=[], engine_name="paddleocr" + ) + mock_create_engine.return_value = mock_engine + + extractor = VinExtractor() + extractor._perform_ocr(b"img", single_word=True) + + call_config = mock_engine.recognize.call_args[0][1] + assert call_config.single_word is True + assert call_config.single_line is False + + def test_calculate_base_confidence_empty_returns_default(self) -> None: + """Empty word confidences return 0.5 default.""" + from app.extractors.vin_extractor import VinExtractor + + extractor = VinExtractor.__new__(VinExtractor) + assert extractor._calculate_base_confidence([]) == 0.5 + + def test_calculate_base_confidence_weighted_blend(self) -> None: + """Confidence = 70% average + 30% minimum.""" + from app.extractors.vin_extractor import VinExtractor + + extractor = VinExtractor.__new__(VinExtractor) + # avg = (0.9 + 0.8) / 2 = 0.85, min = 0.8 + # result = 0.7 * 0.85 + 0.3 * 0.8 = 0.595 + 0.24 = 0.835 + result = extractor._calculate_base_confidence([0.9, 0.8]) + assert abs(result - 0.835) < 0.001 + + def test_calculate_base_confidence_single_value(self) -> None: + """Single confidence value: avg == min, so result equals that value.""" + from app.extractors.vin_extractor import VinExtractor + + extractor = VinExtractor.__new__(VinExtractor) + result = extractor._calculate_base_confidence([0.92]) + assert abs(result - 0.92) < 0.001 From cf114fad3ca7cd9ad3a8357b85d36b3dccf0fb19 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 11:58:00 -0600 Subject: [PATCH 08/16] fix: build errors for OpenCV --- frontend/.claude/tdd-guard/data/test.json | 798 +++++++++++++++++++++- ocr/Dockerfile | 2 + 2 files changed, 799 insertions(+), 1 deletion(-) diff --git a/frontend/.claude/tdd-guard/data/test.json b/frontend/.claude/tdd-guard/data/test.json index fd77c20..5648028 100644 --- a/frontend/.claude/tdd-guard/data/test.json +++ b/frontend/.claude/tdd-guard/data/test.json @@ -1,5 +1,759 @@ { "testModules": [ + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/documents/components/ExpirationBadge.test.tsx", + "tests": [ + { + "name": "renders nothing for null", + "fullName": "ExpirationBadge when no expiration date is provided renders nothing for null", + "state": "passed" + }, + { + "name": "renders nothing for undefined", + "fullName": "ExpirationBadge when no expiration date is provided renders nothing for undefined", + "state": "passed" + }, + { + "name": "renders nothing for empty string", + "fullName": "ExpirationBadge when no expiration date is provided renders nothing for empty string", + "state": "passed" + }, + { + "name": "shows \"Expired\" badge for past dates", + "fullName": "ExpirationBadge when document is expired shows \"Expired\" badge for past dates", + "state": "passed" + }, + { + "name": "shows \"Expired\" badge for dates far in the past", + "fullName": "ExpirationBadge when document is expired shows \"Expired\" badge for dates far in the past", + "state": "passed" + }, + { + "name": "has red styling for expired badge", + "fullName": "ExpirationBadge when document is expired has red styling for expired badge", + "state": "passed" + }, + { + "name": "shows \"Expires today\" badge", + "fullName": "ExpirationBadge when document expires today shows \"Expires today\" badge", + "state": "passed" + }, + { + "name": "has amber styling for expiring soon badge", + "fullName": "ExpirationBadge when document expires today has amber styling for expiring soon badge", + "state": "passed" + }, + { + "name": "shows \"Expires tomorrow\" badge", + "fullName": "ExpirationBadge when document expires tomorrow shows \"Expires tomorrow\" badge", + "state": "passed" + }, + { + "name": "shows \"Expires in X days\" badge for 15 days", + "fullName": "ExpirationBadge when document expires within 30 days shows \"Expires in X days\" badge for 15 days", + "state": "passed" + }, + { + "name": "shows \"Expires in X days\" badge for 30 days", + "fullName": "ExpirationBadge when document expires within 30 days shows \"Expires in X days\" badge for 30 days", + "state": "passed" + }, + { + "name": "shows \"Expires in X days\" badge for 2 days", + "fullName": "ExpirationBadge when document expires within 30 days shows \"Expires in X days\" badge for 2 days", + "state": "passed" + }, + { + "name": "renders nothing for 31 days out", + "fullName": "ExpirationBadge when document expires after 30 days renders nothing for 31 days out", + "state": "passed" + }, + { + "name": "renders nothing for dates far in the future", + "fullName": "ExpirationBadge when document expires after 30 days renders nothing for dates far in the future", + "state": "passed" + }, + { + "name": "applies custom className to the badge", + "fullName": "ExpirationBadge className prop applies custom className to the badge", + "state": "passed" + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/api/community-stations.api.test.ts", + "tests": [ + { + "name": "Module failed to load (Error)", + "fullName": "Module failed to load (Error)", + "state": "failed", + "errors": [ + { + "message": "Cannot use 'import.meta' outside a module", + "name": "Error", + "stack": "Jest encountered an unexpected token\n\nJest failed to parse a file. This happens e.g. when your code or its dependencies use non-standard JavaScript syntax, or when Jest is not configured to support such syntax.\n\nOut of the box Jest supports Babel, which will be used to transform your files into valid JS based on your Babel configuration.\n\nBy default \"node_modules\" folder is ignored by transformers.\n\nHere's what you can do:\n • If you are trying to use ECMAScript Modules, see https://jestjs.io/docs/ecmascript-modules for how to enable it.\n • If you are trying to use TypeScript, see https://jestjs.io/docs/getting-started#using-typescript\n • To have some of your \"node_modules\" files transformed, you can specify a custom \"transformIgnorePatterns\" in your config.\n • If you need a custom transformation specify a \"transform\" option in your config.\n • If you simply want to mock your non-JS modules (e.g. binary assets) you can stub them out with the \"moduleNameMapper\" config option.\n\nYou'll find more details and examples of these config options in the docs:\nhttps://jestjs.io/docs/configuration\nFor information about custom transformations, see:\nhttps://jestjs.io/docs/code-transformation\n\nDetails:\n\n/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/core/api/client.ts:46\nconst API_BASE_URL = import.meta.env.VITE_API_BASE_URL || '/api';\n ^^^^\n\nSyntaxError: Cannot use 'import.meta' outside a module\n at new Script (node:vm:117:7)\n at Runtime.createScriptFromCode (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1505:14)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1399:25)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/api/community-stations.api.ts:5:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/api/community-stations.api.test.ts:6:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:77:13)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/api/stations.api.test.ts", + "tests": [ + { + "name": "Module failed to load (Error)", + "fullName": "Module failed to load (Error)", + "state": "failed", + "errors": [ + { + "message": "Cannot use 'import.meta' outside a module", + "name": "Error", + "stack": "Jest encountered an unexpected token\n\nJest failed to parse a file. This happens e.g. when your code or its dependencies use non-standard JavaScript syntax, or when Jest is not configured to support such syntax.\n\nOut of the box Jest supports Babel, which will be used to transform your files into valid JS based on your Babel configuration.\n\nBy default \"node_modules\" folder is ignored by transformers.\n\nHere's what you can do:\n • If you are trying to use ECMAScript Modules, see https://jestjs.io/docs/ecmascript-modules for how to enable it.\n • If you are trying to use TypeScript, see https://jestjs.io/docs/getting-started#using-typescript\n • To have some of your \"node_modules\" files transformed, you can specify a custom \"transformIgnorePatterns\" in your config.\n • If you need a custom transformation specify a \"transform\" option in your config.\n • If you simply want to mock your non-JS modules (e.g. binary assets) you can stub them out with the \"moduleNameMapper\" config option.\n\nYou'll find more details and examples of these config options in the docs:\nhttps://jestjs.io/docs/configuration\nFor information about custom transformations, see:\nhttps://jestjs.io/docs/code-transformation\n\nDetails:\n\n/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/core/api/client.ts:46\nconst API_BASE_URL = import.meta.env.VITE_API_BASE_URL || '/api';\n ^^^^\n\nSyntaxError: Cannot use 'import.meta' outside a module\n at new Script (node:vm:117:7)\n at Runtime.createScriptFromCode (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1505:14)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1399:25)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/api/stations.api.ts:5:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/api/stations.api.test.ts:6:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:77:13)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/hooks/useStationsSearch.test.ts", + "tests": [ + { + "name": "Module failed to load (Error)", + "fullName": "Module failed to load (Error)", + "state": "failed", + "errors": [ + { + "message": "Cannot use 'import.meta' outside a module", + "name": "Error", + "stack": "Jest encountered an unexpected token\n\nJest failed to parse a file. This happens e.g. when your code or its dependencies use non-standard JavaScript syntax, or when Jest is not configured to support such syntax.\n\nOut of the box Jest supports Babel, which will be used to transform your files into valid JS based on your Babel configuration.\n\nBy default \"node_modules\" folder is ignored by transformers.\n\nHere's what you can do:\n • If you are trying to use ECMAScript Modules, see https://jestjs.io/docs/ecmascript-modules for how to enable it.\n • If you are trying to use TypeScript, see https://jestjs.io/docs/getting-started#using-typescript\n • To have some of your \"node_modules\" files transformed, you can specify a custom \"transformIgnorePatterns\" in your config.\n • If you need a custom transformation specify a \"transform\" option in your config.\n • If you simply want to mock your non-JS modules (e.g. binary assets) you can stub them out with the \"moduleNameMapper\" config option.\n\nYou'll find more details and examples of these config options in the docs:\nhttps://jestjs.io/docs/configuration\nFor information about custom transformations, see:\nhttps://jestjs.io/docs/code-transformation\n\nDetails:\n\n/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/core/api/client.ts:46\nconst API_BASE_URL = import.meta.env.VITE_API_BASE_URL || '/api';\n ^^^^\n\nSyntaxError: Cannot use 'import.meta' outside a module\n at new Script (node:vm:117:7)\n at Runtime.createScriptFromCode (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1505:14)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1399:25)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/api/stations.api.ts:5:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime._generateMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1690:34)\n at Runtime.requireMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:996:39)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1046:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/hooks/useStationsSearch.ts:6:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/hooks/useStationsSearch.test.ts:8:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:77:13)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/documents/components/DocumentPreview.test.tsx", + "tests": [ + { + "name": "Module failed to load (Error)", + "fullName": "Module failed to load (Error)", + "state": "failed", + "errors": [ + { + "message": "Cannot use 'import.meta' outside a module", + "name": "Error", + "stack": "Jest encountered an unexpected token\n\nJest failed to parse a file. This happens e.g. when your code or its dependencies use non-standard JavaScript syntax, or when Jest is not configured to support such syntax.\n\nOut of the box Jest supports Babel, which will be used to transform your files into valid JS based on your Babel configuration.\n\nBy default \"node_modules\" folder is ignored by transformers.\n\nHere's what you can do:\n • If you are trying to use ECMAScript Modules, see https://jestjs.io/docs/ecmascript-modules for how to enable it.\n • If you are trying to use TypeScript, see https://jestjs.io/docs/getting-started#using-typescript\n • To have some of your \"node_modules\" files transformed, you can specify a custom \"transformIgnorePatterns\" in your config.\n • If you need a custom transformation specify a \"transform\" option in your config.\n • If you simply want to mock your non-JS modules (e.g. binary assets) you can stub them out with the \"moduleNameMapper\" config option.\n\nYou'll find more details and examples of these config options in the docs:\nhttps://jestjs.io/docs/configuration\nFor information about custom transformations, see:\nhttps://jestjs.io/docs/code-transformation\n\nDetails:\n\n/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/core/api/client.ts:46\nconst API_BASE_URL = import.meta.env.VITE_API_BASE_URL || '/api';\n ^^^^\n\nSyntaxError: Cannot use 'import.meta' outside a module\n at new Script (node:vm:117:7)\n at Runtime.createScriptFromCode (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1505:14)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1399:25)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/documents/api/documents.api.ts:1:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime._generateMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1690:34)\n at Runtime.requireMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:996:39)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1046:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/documents/components/DocumentPreview.tsx:3:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/documents/components/DocumentPreview.test.tsx:7:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:77:13)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/hooks/useCommunityStations.test.ts", + "tests": [ + { + "name": "Module failed to load (Error)", + "fullName": "Module failed to load (Error)", + "state": "failed", + "errors": [ + { + "message": "Cannot use 'import.meta' outside a module", + "name": "Error", + "stack": "Jest encountered an unexpected token\n\nJest failed to parse a file. This happens e.g. when your code or its dependencies use non-standard JavaScript syntax, or when Jest is not configured to support such syntax.\n\nOut of the box Jest supports Babel, which will be used to transform your files into valid JS based on your Babel configuration.\n\nBy default \"node_modules\" folder is ignored by transformers.\n\nHere's what you can do:\n • If you are trying to use ECMAScript Modules, see https://jestjs.io/docs/ecmascript-modules for how to enable it.\n • If you are trying to use TypeScript, see https://jestjs.io/docs/getting-started#using-typescript\n • To have some of your \"node_modules\" files transformed, you can specify a custom \"transformIgnorePatterns\" in your config.\n • If you need a custom transformation specify a \"transform\" option in your config.\n • If you simply want to mock your non-JS modules (e.g. binary assets) you can stub them out with the \"moduleNameMapper\" config option.\n\nYou'll find more details and examples of these config options in the docs:\nhttps://jestjs.io/docs/configuration\nFor information about custom transformations, see:\nhttps://jestjs.io/docs/code-transformation\n\nDetails:\n\n/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/core/api/client.ts:46\nconst API_BASE_URL = import.meta.env.VITE_API_BASE_URL || '/api';\n ^^^^\n\nSyntaxError: Cannot use 'import.meta' outside a module\n at new Script (node:vm:117:7)\n at Runtime.createScriptFromCode (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1505:14)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1399:25)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/api/community-stations.api.ts:5:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime._generateMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1690:34)\n at Runtime.requireMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:996:39)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1046:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/hooks/useCommunityStations.ts:6:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/hooks/useCommunityStations.test.ts:8:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:77:13)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/useAdmins.test.tsx", + "tests": [ + { + "name": "Module failed to load (Error)", + "fullName": "Module failed to load (Error)", + "state": "failed", + "errors": [ + { + "message": "TextEncoder is not defined", + "name": "Error", + "stack": "ReferenceError: TextEncoder is not defined\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@auth0/auth0-react/node_modules/@auth0/auth0-spa-js/dist/auth0-spa-js.production.esm.js:1:10973)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime._generateMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1690:34)\n at Runtime.requireMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:996:39)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1046:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/useAdmins.test.tsx:7:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:77:13)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/documents/components/DocumentCardMetadata.test.tsx", + "tests": [ + { + "name": "displays expiration date", + "fullName": "DocumentCardMetadata insurance documents displays expiration date", + "state": "passed" + }, + { + "name": "displays policy number", + "fullName": "DocumentCardMetadata insurance documents displays policy number", + "state": "passed" + }, + { + "name": "displays insurance company", + "fullName": "DocumentCardMetadata insurance documents displays insurance company", + "state": "passed" + }, + { + "name": "limits to 3 fields in card variant", + "fullName": "DocumentCardMetadata insurance documents limits to 3 fields in card variant", + "state": "passed" + }, + { + "name": "shows all fields in detail variant", + "fullName": "DocumentCardMetadata insurance documents shows all fields in detail variant", + "state": "passed" + }, + { + "name": "displays expiration date", + "fullName": "DocumentCardMetadata registration documents displays expiration date", + "state": "passed" + }, + { + "name": "displays license plate", + "fullName": "DocumentCardMetadata registration documents displays license plate", + "state": "passed" + }, + { + "name": "shows cost in detail variant only", + "fullName": "DocumentCardMetadata registration documents shows cost in detail variant only", + "state": "passed" + }, + { + "name": "displays issued date if set", + "fullName": "DocumentCardMetadata manual documents displays issued date if set", + "state": "passed" + }, + { + "name": "shows notes preview in detail variant only", + "fullName": "DocumentCardMetadata manual documents shows notes preview in detail variant only", + "state": "passed" + }, + { + "name": "truncates long notes in detail variant", + "fullName": "DocumentCardMetadata manual documents truncates long notes in detail variant", + "state": "passed" + }, + { + "name": "returns null when no metadata to display", + "fullName": "DocumentCardMetadata empty states returns null when no metadata to display", + "state": "passed" + }, + { + "name": "handles missing details gracefully", + "fullName": "DocumentCardMetadata empty states handles missing details gracefully", + "state": "passed" + }, + { + "name": "uses text-xs for mobile variant", + "fullName": "DocumentCardMetadata variant styling uses text-xs for mobile variant", + "state": "passed" + }, + { + "name": "uses text-sm for card variant", + "fullName": "DocumentCardMetadata variant styling uses text-sm for card variant", + "state": "passed" + }, + { + "name": "uses grid layout for detail variant", + "fullName": "DocumentCardMetadata variant styling uses grid layout for detail variant", + "state": "passed" + }, + { + "name": "formats premium correctly", + "fullName": "DocumentCardMetadata currency formatting formats premium correctly", + "state": "passed" + }, + { + "name": "handles string numbers", + "fullName": "DocumentCardMetadata currency formatting handles string numbers", + "state": "passed" + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/documents/mobile/DocumentsMobileScreen.test.tsx", + "tests": [ + { + "name": "Module failed to load (Error)", + "fullName": "Module failed to load (Error)", + "state": "failed", + "errors": [ + { + "message": "TextEncoder is not defined", + "name": "Error", + "stack": "ReferenceError: TextEncoder is not defined\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@auth0/auth0-react/node_modules/@auth0/auth0-spa-js/dist/auth0-spa-js.production.esm.js:1:10973)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/documents/mobile/DocumentsMobileScreen.tsx:2:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/documents/mobile/DocumentsMobileScreen.test.tsx:8:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:77:13)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/hooks/useBulkSelection.test.ts", + "tests": [ + { + "name": "should initialize with empty selection", + "fullName": "useBulkSelection should initialize with empty selection", + "state": "passed" + }, + { + "name": "should toggle individual item selection", + "fullName": "useBulkSelection should toggle individual item selection", + "state": "passed" + }, + { + "name": "should toggle all items", + "fullName": "useBulkSelection should toggle all items", + "state": "passed" + }, + { + "name": "should reset all selections", + "fullName": "useBulkSelection should reset all selections", + "state": "passed" + }, + { + "name": "should return selected items", + "fullName": "useBulkSelection should return selected items", + "state": "passed" + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/catalogShared.test.ts", + "tests": [ + { + "name": "describes dependent counts for makes", + "fullName": "getCascadeSummary describes dependent counts for makes", + "state": "passed" + }, + { + "name": "returns empty string when nothing selected", + "fullName": "getCascadeSummary returns empty string when nothing selected", + "state": "passed" + }, + { + "name": "prefills parent context for create operations", + "fullName": "buildDefaultValues prefills parent context for create operations", + "state": "passed" + }, + { + "name": "hydrates existing entity data for editing engines", + "fullName": "buildDefaultValues hydrates existing entity data for editing engines", + "state": "passed" + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/useAdminAccess.test.tsx", + "tests": [ + { + "name": "Module failed to load (Error)", + "fullName": "Module failed to load (Error)", + "state": "failed", + "errors": [ + { + "message": "TextEncoder is not defined", + "name": "Error", + "stack": "ReferenceError: TextEncoder is not defined\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@auth0/auth0-react/node_modules/@auth0/auth0-spa-js/dist/auth0-spa-js.production.esm.js:1:10973)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime._generateMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1690:34)\n at Runtime.requireMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:996:39)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1046:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/useAdminAccess.test.tsx:7:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:77:13)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/utils/navigation-links.test.ts", + "tests": [ + { + "name": "uses coordinates when valid", + "fullName": "buildNavigationLinks uses coordinates when valid", + "state": "passed" + }, + { + "name": "falls back to query when coordinates are missing", + "fullName": "buildNavigationLinks falls back to query when coordinates are missing", + "state": "passed" + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/components/SelectionToolbar.test.tsx", + "tests": [ + { + "name": "should not render when selectedCount is 0", + "fullName": "SelectionToolbar should not render when selectedCount is 0", + "state": "passed" + }, + { + "name": "should render when items are selected", + "fullName": "SelectionToolbar should render when items are selected", + "state": "passed" + }, + { + "name": "should call onClear when Clear button clicked", + "fullName": "SelectionToolbar should call onClear when Clear button clicked", + "state": "passed" + }, + { + "name": "should call onSelectAll when Select All button clicked", + "fullName": "SelectionToolbar should call onSelectAll when Select All button clicked", + "state": "passed" + }, + { + "name": "should render custom action buttons", + "fullName": "SelectionToolbar should render custom action buttons", + "state": "passed" + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/components/EmptyState.test.tsx", + "tests": [ + { + "name": "should render with title and description", + "fullName": "EmptyState should render with title and description", + "state": "passed" + }, + { + "name": "should render with icon", + "fullName": "EmptyState should render with icon", + "state": "passed" + }, + { + "name": "should render action button when provided", + "fullName": "EmptyState should render action button when provided", + "state": "passed" + }, + { + "name": "should not render action button when not provided", + "fullName": "EmptyState should not render action button when not provided", + "state": "passed" + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/shared-minimal/components/VehicleLimitDialog.test.tsx", + "tests": [ + { + "name": "renders when open", + "fullName": "VehicleLimitDialog Dialog rendering renders when open", + "state": "passed" + }, + { + "name": "does not render when closed", + "fullName": "VehicleLimitDialog Dialog rendering does not render when closed", + "state": "passed" + }, + { + "name": "displays current count and limit", + "fullName": "VehicleLimitDialog Props display displays current count and limit", + "state": "passed" + }, + { + "name": "displays free tier upgrade prompt", + "fullName": "VehicleLimitDialog Props display displays free tier upgrade prompt", + "state": "passed" + }, + { + "name": "displays pro tier upgrade prompt", + "fullName": "VehicleLimitDialog Props display displays pro tier upgrade prompt", + "state": "passed" + }, + { + "name": "shows tier chips for free user", + "fullName": "VehicleLimitDialog Props display shows tier chips for free user", + "state": "passed" + }, + { + "name": "shows tier chips for pro user", + "fullName": "VehicleLimitDialog Props display shows tier chips for pro user", + "state": "passed" + }, + { + "name": "calls onClose when \"Maybe Later\" is clicked", + "fullName": "VehicleLimitDialog User interactions calls onClose when \"Maybe Later\" is clicked", + "state": "passed" + }, + { + "name": "calls onClose when \"Upgrade (Coming Soon)\" is clicked", + "fullName": "VehicleLimitDialog User interactions calls onClose when \"Upgrade (Coming Soon)\" is clicked", + "state": "passed" + }, + { + "name": "renders fullscreen on mobile", + "fullName": "VehicleLimitDialog Mobile responsiveness renders fullscreen on mobile", + "state": "failed", + "errors": [ + { + "message": "Error: expect(received).toBeInTheDocument()\n\nreceived value must be an HTMLElement or an SVGElement.\nReceived has value: null\n at __EXTERNAL_MATCHER_TRAP__ (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/expect/build/index.js:325:30)\n at Object.throwingMatcher [as toBeInTheDocument] (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/expect/build/index.js:326:15)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/shared-minimal/components/VehicleLimitDialog.test.tsx:185:22)\n at Promise.then.completed (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:298:28)\n at new Promise ()\n at callAsyncCircusFn (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:231:10)\n at _callCircusTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:316:40)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at _runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:252:3)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:126:9)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:121:9)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:121:9)\n at run (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:71:3)\n at runAndTransformResultsToJestFormat (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapterInit.js:122:21)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:79:19)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + }, + { + "name": "shows close button on mobile", + "fullName": "VehicleLimitDialog Mobile responsiveness shows close button on mobile", + "state": "passed" + }, + { + "name": "hides close button on desktop", + "fullName": "VehicleLimitDialog Mobile responsiveness hides close button on desktop", + "state": "passed" + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/components/AdminSkeleton.test.tsx", + "tests": [ + { + "name": "should render default number of rows", + "fullName": "AdminSkeleton SkeletonRow should render default number of rows", + "state": "passed" + }, + { + "name": "should render specified number of rows", + "fullName": "AdminSkeleton SkeletonRow should render specified number of rows", + "state": "failed", + "errors": [ + { + "message": "Error: expect(received).toHaveLength(expected)\n\nExpected length: 15\nReceived length: 20\nReceived object: [, , , , , , , , , , …]\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/components/AdminSkeleton.test.tsx:20:63)\n at Promise.then.completed (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:298:28)\n at new Promise ()\n at callAsyncCircusFn (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:231:10)\n at _callCircusTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:316:40)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at _runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:252:3)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:126:9)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:121:9)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:121:9)\n at run (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:71:3)\n at runAndTransformResultsToJestFormat (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapterInit.js:122:21)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:79:19)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + }, + { + "name": "should render default number of cards", + "fullName": "AdminSkeleton SkeletonCard should render default number of cards", + "state": "passed" + }, + { + "name": "should render specified number of cards", + "fullName": "AdminSkeleton SkeletonCard should render specified number of cards", + "state": "passed" + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/components/AdminSectionHeader.test.tsx", + "tests": [ + { + "name": "should render with title and stats", + "fullName": "AdminSectionHeader should render with title and stats", + "state": "passed" + }, + { + "name": "should render with empty stats", + "fullName": "AdminSectionHeader should render with empty stats", + "state": "passed" + }, + { + "name": "should format large numbers with locale", + "fullName": "AdminSectionHeader should format large numbers with locale", + "state": "passed" + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/components/CommunityStationCard.test.tsx", + "tests": [ + { + "name": "should render station details", + "fullName": "CommunityStationCard should render station details", + "state": "passed" + }, + { + "name": "should display 93 octane status", + "fullName": "CommunityStationCard should display 93 octane status", + "state": "passed" + }, + { + "name": "should display price when available", + "fullName": "CommunityStationCard should display price when available", + "state": "passed" + }, + { + "name": "should display status badge", + "fullName": "CommunityStationCard should display status badge", + "state": "passed" + }, + { + "name": "should show withdraw button for user view", + "fullName": "CommunityStationCard should show withdraw button for user view", + "state": "failed", + "errors": [ + { + "message": "TestingLibraryElementError: Found multiple elements with the role \"button\"\n\nHere are the matching elements:\n\nIgnored nodes: comments, script, style\n\u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n\u001b[36m\u001b[39m\n\nIgnored nodes: comments, script, style\n\u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n\u001b[36m\u001b[39m\n\nIgnored nodes: comments, script, style\n\u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n\u001b[36m\u001b[39m\n\n(If this is intentional, then use the `*AllBy*` variant of the query (like `queryAllByText`, `getAllByText`, or `findAllByText`)).\n\nIgnored nodes: comments, script, style\n\u001b[36m\u001b[39m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mShell Downtown\u001b[0m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mapproved\u001b[0m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0m123 Main St\u001b[0m\n \u001b[36m

\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mDenver, CO, 80202\u001b[0m\n \u001b[36m

\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mBrand: Shell\u001b[0m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0m93 Octane · w/ Ethanol\u001b[0m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0m$3.599/gal\u001b[0m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mNotes:\u001b[0m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mGood quality fuel\u001b[0m\n \u001b[36m

\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mSubmitted by: \u001b[0m\n \u001b[0muser@example.com\u001b[0m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mNavigate\u001b[0m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mPremium 93\u001b[0m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mFavorite\u001b[0m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n\u001b[36m\u001b[39m\n at Object.getElementError (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/config.js:37:19)\n at getElementError (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/query-helpers.js:20:35)\n at getMultipleElementsFoundError (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/query-helpers.js:23:10)\n at /Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/query-helpers.js:55:13\n at /Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/query-helpers.js:95:19\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/components/CommunityStationCard.test.tsx:66:19)\n at Promise.then.completed (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:298:28)\n at new Promise ()\n at callAsyncCircusFn (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:231:10)\n at _callCircusTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:316:40)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at _runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:252:3)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:126:9)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:121:9)\n at run (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:71:3)\n at runAndTransformResultsToJestFormat (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapterInit.js:122:21)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:79:19)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)", + "name": "TestingLibraryElementError" + } + ] + }, + { + "name": "should show approve and reject buttons for admin", + "fullName": "CommunityStationCard should show approve and reject buttons for admin", + "state": "passed" + }, + { + "name": "should call onWithdraw when withdraw button is clicked", + "fullName": "CommunityStationCard should call onWithdraw when withdraw button is clicked", + "state": "failed", + "errors": [ + { + "message": "TestingLibraryElementError: Found multiple elements with the role \"button\"\n\nHere are the matching elements:\n\nIgnored nodes: comments, script, style\n\u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n\u001b[36m\u001b[39m\n\nIgnored nodes: comments, script, style\n\u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n\u001b[36m\u001b[39m\n\nIgnored nodes: comments, script, style\n\u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n\u001b[36m\u001b[39m\n\n(If this is intentional, then use the `*AllBy*` variant of the query (like `queryAllByText`, `getAllByText`, or `findAllByText`)).\n\nIgnored nodes: comments, script, style\n\u001b[36m\u001b[39m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mShell Downtown\u001b[0m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mapproved\u001b[0m\n \u001b[36m
\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0m123 Main St\u001b[0m\n \u001b[36m

\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mDenver, CO, 80202\u001b[0m\n \u001b[36m

\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mBrand: Shell\u001b[0m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0m93 Octane · w/ Ethanol\u001b[0m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0m$3.599/gal\u001b[0m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mNotes:\u001b[0m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mGood quality fuel\u001b[0m\n \u001b[36m

\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mSubmitted by: \u001b[0m\n \u001b[0muser@example.com\u001b[0m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mNavigate\u001b[0m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mPremium 93\u001b[0m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mFavorite\u001b[0m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n\u001b[36m\u001b[39m\n at Object.getElementError (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/config.js:37:19)\n at getElementError (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/query-helpers.js:20:35)\n at getMultipleElementsFoundError (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/query-helpers.js:23:10)\n at /Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/query-helpers.js:55:13\n at /Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/query-helpers.js:95:19\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/components/CommunityStationCard.test.tsx:94:35)\n at Promise.then.completed (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:298:28)\n at new Promise ()\n at callAsyncCircusFn (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:231:10)\n at _callCircusTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:316:40)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at _runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:252:3)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:126:9)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:121:9)\n at run (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:71:3)\n at runAndTransformResultsToJestFormat (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapterInit.js:122:21)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:79:19)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)", + "name": "TestingLibraryElementError" + } + ] + }, + { + "name": "should handle rejection with reason", + "fullName": "CommunityStationCard should handle rejection with reason", + "state": "passed" + }, + { + "name": "should work on mobile viewport", + "fullName": "CommunityStationCard should work on mobile viewport", + "state": "passed" + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/components/BulkActionDialog.test.tsx", + "tests": [ + { + "name": "should render dialog when open", + "fullName": "BulkActionDialog should render dialog when open", + "state": "passed" + }, + { + "name": "should display list of items", + "fullName": "BulkActionDialog should display list of items", + "state": "passed" + }, + { + "name": "should call onConfirm when confirm button clicked", + "fullName": "BulkActionDialog should call onConfirm when confirm button clicked", + "state": "passed" + }, + { + "name": "should call onCancel when cancel button clicked", + "fullName": "BulkActionDialog should call onCancel when cancel button clicked", + "state": "passed" + }, + { + "name": "should disable buttons when loading", + "fullName": "BulkActionDialog should disable buttons when loading", + "state": "failed", + "errors": [ + { + "message": "TestingLibraryElementError: Unable to find an accessible element with the role \"button\" and name `/confirm/i`\n\nHere are the accessible roles:\n\n presentation:\n\n Name \"\":\n \u001b[36m\u001b[39m\n\n Name \"\":\n \u001b[36m\u001b[39m\n\n --------------------------------------------------\n dialog:\n\n Name \"Delete Items?\":\n \u001b[36m\u001b[39m\n\n --------------------------------------------------\n heading:\n\n Name \"Delete Items?\":\n \u001b[36m\u001b[39m\n\n --------------------------------------------------\n paragraph:\n\n Name \"\":\n \u001b[36m\u001b[39m\n\n Name \"\":\n \u001b[36m\u001b[39m\n\n Name \"\":\n \u001b[36m\u001b[39m\n\n Name \"\":\n \u001b[36m\u001b[39m\n\n --------------------------------------------------\n list:\n\n Name \"\":\n \u001b[36m\u001b[39m\n\n --------------------------------------------------\n listitem:\n\n Name \"\":\n \u001b[36m\u001b[39m\n\n Name \"\":\n \u001b[36m\u001b[39m\n\n Name \"\":\n \u001b[36m\u001b[39m\n\n --------------------------------------------------\n button:\n\n Name \"Cancel\":\n \u001b[36m\u001b[39m\n\n Name \"\":\n \u001b[36m\u001b[39m\n\n --------------------------------------------------\n progressbar:\n\n Name \"\":\n \u001b[36m\u001b[39m\n\n --------------------------------------------------\n\nIgnored nodes: comments, script, style\n\u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mDelete Items?\u001b[0m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mThis action cannot be undone.\u001b[0m\n \u001b[36m

\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mItem 1\u001b[0m\n \u001b[36m

\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mItem 2\u001b[0m\n \u001b[36m

\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mItem 3\u001b[0m\n \u001b[36m

\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[0mCancel\u001b[0m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n \u001b[36m\u001b[39m\n\u001b[36m\u001b[39m\n at Object.getElementError (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/config.js:37:19)\n at /Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/query-helpers.js:76:38\n at /Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/query-helpers.js:52:17\n at /Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@testing-library/dom/dist/query-helpers.js:95:19\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/components/BulkActionDialog.test.tsx:55:34)\n at Promise.then.completed (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:298:28)\n at new Promise ()\n at callAsyncCircusFn (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:231:10)\n at _callCircusTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:316:40)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at _runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:252:3)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:126:9)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:121:9)\n at run (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:71:3)\n at runAndTransformResultsToJestFormat (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapterInit.js:122:21)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:79:19)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)", + "name": "TestingLibraryElementError" + } + ] + }, + { + "name": "should show loading spinner when loading", + "fullName": "BulkActionDialog should show loading spinner when loading", + "state": "failed", + "errors": [ + { + "message": "Error: expect(received).toBeInTheDocument()\n\nreceived value must be an HTMLElement or an SVGElement.\nReceived has value: null\n at __EXTERNAL_MATCHER_TRAP__ (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/expect/build/index.js:325:30)\n at Object.throwingMatcher [as toBeInTheDocument] (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/expect/build/index.js:326:15)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/components/BulkActionDialog.test.tsx:67:66)\n at Promise.then.completed (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:298:28)\n at new Promise ()\n at callAsyncCircusFn (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:231:10)\n at _callCircusTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:316:40)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at _runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:252:3)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:126:9)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:121:9)\n at run (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:71:3)\n at runAndTransformResultsToJestFormat (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapterInit.js:122:21)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:79:19)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + }, + { + "name": "should support custom button text", + "fullName": "BulkActionDialog should support custom button text", + "state": "passed" + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/components/StationCard.test.tsx", + "tests": [ + { + "name": "should render station name and address", + "fullName": "StationCard Rendering should render station name and address", + "state": "passed" + }, + { + "name": "should render station photo if available", + "fullName": "StationCard Rendering should render station photo if available", + "state": "passed" + }, + { + "name": "should render rating when available", + "fullName": "StationCard Rendering should render rating when available", + "state": "passed" + }, + { + "name": "should render distance chip", + "fullName": "StationCard Rendering should render distance chip", + "state": "passed" + }, + { + "name": "should not crash when photo is missing", + "fullName": "StationCard Rendering should not crash when photo is missing", + "state": "passed" + }, + { + "name": "should call onSave when bookmark button clicked (not saved)", + "fullName": "StationCard Save/Delete Actions should call onSave when bookmark button clicked (not saved)", + "state": "passed" + }, + { + "name": "should call onDelete when bookmark button clicked (saved)", + "fullName": "StationCard Save/Delete Actions should call onDelete when bookmark button clicked (saved)", + "state": "passed" + }, + { + "name": "should show filled bookmark icon when saved", + "fullName": "StationCard Save/Delete Actions should show filled bookmark icon when saved", + "state": "passed" + }, + { + "name": "should show outline bookmark icon when not saved", + "fullName": "StationCard Save/Delete Actions should show outline bookmark icon when not saved", + "state": "passed" + }, + { + "name": "should open Google Maps when directions button clicked", + "fullName": "StationCard Directions Link should open Google Maps when directions button clicked", + "state": "failed", + "errors": [ + { + "message": "Error: expect(jest.fn()).toHaveBeenCalledWith(...expected)\n\nExpected: StringContaining \"google.com/maps\", \"_blank\"\n\nNumber of calls: 0\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/components/StationCard.test.tsx:120:27)\n at Promise.then.completed (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:298:28)\n at new Promise ()\n at callAsyncCircusFn (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:231:10)\n at _callCircusTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:316:40)\n at _runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:252:3)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:126:9)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:121:9)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:121:9)\n at run (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:71:3)\n at runAndTransformResultsToJestFormat (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapterInit.js:122:21)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:79:19)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + }, + { + "name": "should encode address in directions URL", + "fullName": "StationCard Directions Link should encode address in directions URL", + "state": "failed", + "errors": [ + { + "message": "Error: expect(jest.fn()).toHaveBeenCalledWith(...expected)\n\nExpected: StringContaining \"123%20Main%20St%2C%20San%20Francisco%2C%20CA%2094105\", \"_blank\"\n\nNumber of calls: 0\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/stations/__tests__/components/StationCard.test.tsx:132:27)\n at Promise.then.completed (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:298:28)\n at new Promise ()\n at callAsyncCircusFn (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/utils.js:231:10)\n at _callCircusTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:316:40)\n at _runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:252:3)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:126:9)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:121:9)\n at _runTestsForDescribeBlock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:121:9)\n at run (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/run.js:71:3)\n at runAndTransformResultsToJestFormat (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapterInit.js:122:21)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:79:19)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + }, + { + "name": "should have minimum 44px button heights", + "fullName": "StationCard Touch Targets should have minimum 44px button heights", + "state": "passed" + }, + { + "name": "should call onSelect when card is clicked", + "fullName": "StationCard Card Selection should call onSelect when card is clicked", + "state": "passed" + }, + { + "name": "should not call onSelect when button is clicked", + "fullName": "StationCard Card Selection should not call onSelect when button is clicked", + "state": "passed" + } + ] + }, { "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/shared/components/CameraCapture/CameraCapture.test.tsx", "tests": [ @@ -109,8 +863,50 @@ "state": "passed" } ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/AdminUsersPage.test.tsx", + "tests": [ + { + "name": "Module failed to load (Error)", + "fullName": "Module failed to load (Error)", + "state": "failed", + "errors": [ + { + "message": "TextEncoder is not defined", + "name": "Error", + "stack": "ReferenceError: TextEncoder is not defined\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/@auth0/auth0-react/node_modules/@auth0/auth0-spa-js/dist/auth0-spa-js.production.esm.js:1:10973)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/core/auth/useAdminAccess.ts:7:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime._generateMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1690:34)\n at Runtime.requireMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:996:39)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1046:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/pages/admin/AdminUsersPage.tsx:52:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at Runtime.requireModuleOrMock (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1048:21)\n at Object. (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/AdminUsersPage.test.tsx:7:1)\n at Runtime._execModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1439:24)\n at Runtime._loadModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:1022:12)\n at Runtime.requireModule (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runtime/build/index.js:882:12)\n at jestAdapter (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-circus/build/legacy-code-todo-rewrite/jestAdapter.js:77:13)\n at processTicksAndRejections (node:internal/process/task_queues:103:5)\n at runTestInternal (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:367:16)\n at runTest (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/runTest.js:444:34)\n at Object.worker (/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/node_modules/jest-runner/build/testWorker.js:106:12)" + } + ] + } + ] + }, + { + "moduleId": "/Users/egullickson/Documents/Technology/coding/motovaultpro/frontend/src/features/admin/__tests__/components/ErrorState.test.tsx", + "tests": [ + { + "name": "should render error message", + "fullName": "ErrorState should render error message", + "state": "passed" + }, + { + "name": "should render retry button when onRetry provided", + "fullName": "ErrorState should render retry button when onRetry provided", + "state": "passed" + }, + { + "name": "should not render retry button when onRetry not provided", + "fullName": "ErrorState should not render retry button when onRetry not provided", + "state": "passed" + }, + { + "name": "should show default message when error has no message", + "fullName": "ErrorState should show default message when error has no message", + "state": "passed" + } + ] } ], "unhandledErrors": [], - "reason": "passed" + "reason": "failed" } \ No newline at end of file diff --git a/ocr/Dockerfile b/ocr/Dockerfile index c460f0b..d1c52e4 100644 --- a/ocr/Dockerfile +++ b/ocr/Dockerfile @@ -15,6 +15,7 @@ FROM ${REGISTRY_MIRRORS}/python:3.13-slim # - libgomp1: OpenMP runtime required by PaddlePaddle # - libheif1/libheif-dev: HEIF image support (iPhone photos) # - libglib2.0-0: GLib shared library (OpenCV dependency) +# - libgl1-mesa-glx: OpenGL runtime (OpenCV cv2 dependency, pulled by PaddleX) # - libmagic1: File type detection # - curl: Health check endpoint RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -24,6 +25,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libheif1 \ libheif-dev \ libglib2.0-0 \ + libgl1-mesa-glx \ libmagic1 \ curl \ && rm -rf /var/lib/apt/lists/* From b9fe222f125359f3b288ecdad0f10deba832b5e0 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 12:12:04 -0600 Subject: [PATCH 09/16] fix: Build errors and tesseract removal --- ocr/Dockerfile | 12 +-- ocr/app/CLAUDE.md | 2 +- ocr/app/config.py | 2 - ocr/app/engines/__init__.py | 1 - ocr/app/engines/base_engine.py | 2 +- ocr/app/engines/engine_factory.py | 3 +- ocr/app/engines/tesseract_engine.py | 114 ---------------------- ocr/app/extractors/manual_extractor.py | 12 ++- ocr/app/extractors/vin_extractor.py | 4 +- ocr/app/preprocessors/vin_preprocessor.py | 12 +-- ocr/app/table_extraction/detector.py | 2 +- ocr/app/validators/vin_validator.py | 2 +- ocr/requirements.txt | 1 - ocr/tests/test_engine_abstraction.py | 87 +---------------- ocr/tests/test_health.py | 15 +-- ocr/tests/test_vin_validator.py | 2 +- 16 files changed, 35 insertions(+), 238 deletions(-) delete mode 100644 ocr/app/engines/tesseract_engine.py diff --git a/ocr/Dockerfile b/ocr/Dockerfile index d1c52e4..8028575 100644 --- a/ocr/Dockerfile +++ b/ocr/Dockerfile @@ -2,7 +2,6 @@ # Uses mirrored base images from Gitea Package Registry # # Primary engine: PaddleOCR PP-OCRv4 (models baked into image) -# Backward compat: Tesseract 5.x (optional, via TesseractEngine) # Cloud fallback: Google Vision (optional, requires API key at runtime) # Build argument for registry (defaults to Gitea mirrors, falls back to Docker Hub) @@ -11,21 +10,16 @@ ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors FROM ${REGISTRY_MIRRORS}/python:3.13-slim # System dependencies -# - tesseract-ocr/eng: Backward-compatible OCR engine (used by TesseractEngine) # - libgomp1: OpenMP runtime required by PaddlePaddle # - libheif1/libheif-dev: HEIF image support (iPhone photos) # - libglib2.0-0: GLib shared library (OpenCV dependency) -# - libgl1-mesa-glx: OpenGL runtime (OpenCV cv2 dependency, pulled by PaddleX) # - libmagic1: File type detection # - curl: Health check endpoint RUN apt-get update && apt-get install -y --no-install-recommends \ - tesseract-ocr \ - tesseract-ocr-eng \ libgomp1 \ libheif1 \ libheif-dev \ libglib2.0-0 \ - libgl1-mesa-glx \ libmagic1 \ curl \ && rm -rf /var/lib/apt/lists/* @@ -33,7 +27,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Python dependencies WORKDIR /app COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt +# Install dependencies. PaddleX (transitive via paddleocr) pulls in the full +# opencv-python which requires libGL.so.1. Force-reinstall the headless +# variant afterwards so the container stays GUI-free. +RUN pip install --no-cache-dir -r requirements.txt \ + && pip install --no-cache-dir --force-reinstall opencv-python-headless # Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime). # Models are baked into the image so container starts are fast and diff --git a/ocr/app/CLAUDE.md b/ocr/app/CLAUDE.md index 8fbc7f1..7d0441b 100644 --- a/ocr/app/CLAUDE.md +++ b/ocr/app/CLAUDE.md @@ -12,7 +12,7 @@ | Directory | What | When to read | | --------- | ---- | ------------ | -| `engines/` | OCR engine abstraction (PaddleOCR primary, Google Vision fallback, Tesseract compat) | Engine changes, adding new engines | +| `engines/` | OCR engine abstraction (PaddleOCR primary, Google Vision fallback) | Engine changes, adding new engines | | `extractors/` | Data extraction logic | Adding new extraction types | | `models/` | Data models and schemas | Request/response types | | `patterns/` | Regex and parsing patterns | Pattern matching rules | diff --git a/ocr/app/config.py b/ocr/app/config.py index 4a15906..e933d4b 100644 --- a/ocr/app/config.py +++ b/ocr/app/config.py @@ -9,8 +9,6 @@ class Settings: self.log_level: str = os.getenv("LOG_LEVEL", "info") self.host: str = os.getenv("HOST", "0.0.0.0") self.port: int = int(os.getenv("PORT", "8000")) - self.tesseract_cmd: str = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract") - # OCR engine configuration self.ocr_primary_engine: str = os.getenv("OCR_PRIMARY_ENGINE", "paddleocr") self.ocr_confidence_threshold: float = float( diff --git a/ocr/app/engines/__init__.py b/ocr/app/engines/__init__.py index abc8b05..df38155 100644 --- a/ocr/app/engines/__init__.py +++ b/ocr/app/engines/__init__.py @@ -5,7 +5,6 @@ decoupling extractors from specific OCR libraries. Engines: - PaddleOcrEngine: PaddleOCR PP-OCRv4 (primary, CPU-only) - - TesseractEngine: pytesseract wrapper (backward compatibility) - CloudEngine: Google Vision TEXT_DETECTION (optional cloud fallback) - HybridEngine: Primary + fallback with confidence threshold """ diff --git a/ocr/app/engines/base_engine.py b/ocr/app/engines/base_engine.py index ddca084..d10ca26 100644 --- a/ocr/app/engines/base_engine.py +++ b/ocr/app/engines/base_engine.py @@ -57,7 +57,7 @@ class OcrEngineResult: text: str confidence: float # 0.0-1.0 word_boxes: list[WordBox] - engine_name: str # "paddleocr", "tesseract", "google_vision" + engine_name: str # "paddleocr", "google_vision" # --- Abstract base --- diff --git a/ocr/app/engines/engine_factory.py b/ocr/app/engines/engine_factory.py index 49464d2..f52926f 100644 --- a/ocr/app/engines/engine_factory.py +++ b/ocr/app/engines/engine_factory.py @@ -11,7 +11,6 @@ logger = logging.getLogger(__name__) # Valid engine identifiers (primary engines only; hybrid is constructed separately) _ENGINE_REGISTRY: dict[str, str] = { "paddleocr": "app.engines.paddle_engine.PaddleOcrEngine", - "tesseract": "app.engines.tesseract_engine.TesseractEngine", "google_vision": "app.engines.cloud_engine.CloudEngine", } @@ -46,7 +45,7 @@ def create_engine(engine_name: str | None = None) -> OcrEngine: returns a ``HybridEngine`` that wraps the primary with the fallback. Args: - engine_name: Engine identifier ("paddleocr", "tesseract"). + engine_name: Engine identifier ("paddleocr", "google_vision"). Falls back to ``settings.ocr_primary_engine``. Returns: diff --git a/ocr/app/engines/tesseract_engine.py b/ocr/app/engines/tesseract_engine.py deleted file mode 100644 index 02108ec..0000000 --- a/ocr/app/engines/tesseract_engine.py +++ /dev/null @@ -1,114 +0,0 @@ -"""Tesseract engine wrapper for backward compatibility.""" - -import io -import logging - -from app.config import settings -from app.engines.base_engine import ( - EngineProcessingError, - EngineUnavailableError, - OcrConfig, - OcrEngine, - OcrEngineResult, - WordBox, -) - -logger = logging.getLogger(__name__) - - -class TesseractEngine(OcrEngine): - """pytesseract wrapper conforming to the OcrEngine interface.""" - - def __init__(self) -> None: - try: - import pytesseract # type: ignore[import-untyped] - - pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd - self._pytesseract = pytesseract - logger.info("TesseractEngine initialized (cmd=%s)", settings.tesseract_cmd) - except ImportError as exc: - raise EngineUnavailableError( - "pytesseract is not installed. " - "Install with: pip install pytesseract" - ) from exc - - @property - def name(self) -> str: - return "tesseract" - - def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult: - """Run Tesseract OCR on image bytes.""" - try: - from PIL import Image - - image = Image.open(io.BytesIO(image_bytes)) - - # Build Tesseract config string from OcrConfig - tess_config = self._build_config(config) - - # Get word-level data - ocr_data = self._pytesseract.image_to_data( - image, - config=tess_config, - output_type=self._pytesseract.Output.DICT, - ) - - word_boxes: list[WordBox] = [] - texts: list[str] = [] - confidences: list[float] = [] - - for i, text in enumerate(ocr_data["text"]): - conf = int(ocr_data["conf"][i]) - if text.strip() and conf > 0: - normalized_conf = conf / 100.0 - word_boxes.append( - WordBox( - text=text.strip(), - confidence=normalized_conf, - x=int(ocr_data["left"][i]), - y=int(ocr_data["top"][i]), - width=int(ocr_data["width"][i]), - height=int(ocr_data["height"][i]), - ) - ) - texts.append(text.strip()) - confidences.append(normalized_conf) - - combined_text = " ".join(texts) - avg_confidence = ( - sum(confidences) / len(confidences) if confidences else 0.0 - ) - - return OcrEngineResult( - text=combined_text, - confidence=avg_confidence, - word_boxes=word_boxes, - engine_name=self.name, - ) - - except (EngineUnavailableError, EngineProcessingError): - raise - except Exception as exc: - raise EngineProcessingError( - f"Tesseract recognition failed: {exc}" - ) from exc - - def _build_config(self, config: OcrConfig) -> str: - """Translate OcrConfig into a Tesseract CLI config string.""" - parts: list[str] = [] - - # Page segmentation mode - if config.single_word: - parts.append("--psm 8") - elif config.single_line: - parts.append("--psm 7") - else: - # Default: assume uniform block of text - psm = config.hints.get("psm", 6) - parts.append(f"--psm {psm}") - - # Character whitelist - if config.char_whitelist: - parts.append(f"-c tessedit_char_whitelist={config.char_whitelist}") - - return " ".join(parts) diff --git a/ocr/app/extractors/manual_extractor.py b/ocr/app/extractors/manual_extractor.py index e447882..ad5f159 100644 --- a/ocr/app/extractors/manual_extractor.py +++ b/ocr/app/extractors/manual_extractor.py @@ -5,9 +5,9 @@ import time from dataclasses import dataclass, field from typing import Callable, Optional -import pytesseract from PIL import Image +from app.engines import create_engine, OcrConfig from app.preprocessors.pdf_preprocessor import pdf_preprocessor, PdfInfo from app.table_extraction.detector import table_detector, DetectedTable from app.table_extraction.parser import table_parser, ParsedScheduleRow @@ -243,8 +243,9 @@ class ManualExtractor: # OCR the full page try: - image = Image.open(io.BytesIO(image_bytes)) - ocr_text = pytesseract.image_to_string(image) + engine = create_engine() + ocr_result = engine.recognize(image_bytes, OcrConfig()) + ocr_text = ocr_result.text # Mark tables as maintenance if page contains maintenance keywords for table in detected_tables: @@ -358,8 +359,9 @@ class ManualExtractor: if not text and first_page.image_bytes: # OCR first page - image = Image.open(io.BytesIO(first_page.image_bytes)) - text = pytesseract.image_to_string(image) + engine = create_engine() + ocr_result = engine.recognize(first_page.image_bytes, OcrConfig()) + text = ocr_result.text if text: return self._parse_vehicle_from_text(text) diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py index cce88e9..01a9343 100644 --- a/ocr/app/extractors/vin_extractor.py +++ b/ocr/app/extractors/vin_extractor.py @@ -316,8 +316,8 @@ class VinExtractor(BaseExtractor): single-line - Treat as a single text line single-word - Treat as a single word - For PaddleOCR, angle classification handles rotated/angled text - inherently, replacing the need for Tesseract PSM mode fallbacks. + PaddleOCR angle classification handles rotated/angled text + inherently, so no PSM mode fallbacks are needed. Returns: List of VIN candidates diff --git a/ocr/app/preprocessors/vin_preprocessor.py b/ocr/app/preprocessors/vin_preprocessor.py index 290fb5b..4128e68 100644 --- a/ocr/app/preprocessors/vin_preprocessor.py +++ b/ocr/app/preprocessors/vin_preprocessor.py @@ -93,7 +93,7 @@ class VinPreprocessor: gray = cv_image steps_applied.append("grayscale") - # Upscale small images for better OCR (Tesseract needs ~300 DPI) + # Upscale small images for better OCR (~300 DPI recommended) gray = self._ensure_minimum_resolution(gray) steps_applied.append("resolution_check") @@ -129,14 +129,14 @@ class VinPreprocessor: ) # Minimum width in pixels for reliable VIN OCR. - # A 17-char VIN needs ~30px per character for Tesseract accuracy. + # A 17-char VIN needs ~30px per character for reliable OCR accuracy. MIN_WIDTH_FOR_VIN = 600 def _ensure_minimum_resolution(self, image: np.ndarray) -> np.ndarray: """ Upscale image if too small for reliable OCR. - Tesseract works best at ~300 DPI. Mobile photos of VINs may have + OCR works best at ~300 DPI. Mobile photos of VINs may have the text occupy only a small portion of the frame, resulting in low effective resolution for the VIN characters. """ @@ -160,7 +160,7 @@ class VinPreprocessor: Colored backgrounds have a low min value (e.g. green sticker: min(130,230,150) = 130) → inverted to 125 (medium gray). - The inversion ensures Tesseract always receives dark-text-on- + The inversion ensures the OCR engine always receives dark-text-on- light-background, which is the polarity it expects. """ b_channel, g_channel, r_channel = cv2.split(bgr_image) @@ -168,8 +168,8 @@ class VinPreprocessor: min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel) # Invert so white text (min=255) becomes black (0) and colored - # backgrounds (min~130) become lighter gray (~125). Tesseract - # expects dark text on light background. + # backgrounds (min~130) become lighter gray (~125). OCR engines + # expect dark text on light background. inverted = cv2.bitwise_not(min_channel) gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) diff --git a/ocr/app/table_extraction/detector.py b/ocr/app/table_extraction/detector.py index 362990f..9c5af54 100644 --- a/ocr/app/table_extraction/detector.py +++ b/ocr/app/table_extraction/detector.py @@ -312,7 +312,7 @@ class TableDetector: Returns: 2D list of cell contents """ - # This would use Tesseract on the cropped region + # This would use OCR on the cropped region # For now, return empty - actual OCR will be done in manual_extractor logger.debug(f"Table region: ({table.x}, {table.y}) {table.width}x{table.height}") return [] diff --git a/ocr/app/validators/vin_validator.py b/ocr/app/validators/vin_validator.py index c9c60ef..79a2062 100644 --- a/ocr/app/validators/vin_validator.py +++ b/ocr/app/validators/vin_validator.py @@ -226,7 +226,7 @@ class VinValidator: Uses two strategies: 1. Find continuous 11-20 char alphanumeric runs (handles intact VINs) 2. Concatenate adjacent short fragments separated by spaces/dashes - (handles Tesseract fragmenting VINs into multiple words) + (handles OCR fragmenting VINs into multiple words) Args: text: Raw OCR text diff --git a/ocr/requirements.txt b/ocr/requirements.txt index 9ac83ad..946f645 100644 --- a/ocr/requirements.txt +++ b/ocr/requirements.txt @@ -14,7 +14,6 @@ opencv-python-headless>=4.8.0 numpy>=1.24.0 # OCR Engines -pytesseract>=0.3.10 paddlepaddle>=2.6.0 paddleocr>=2.8.0 google-cloud-vision>=3.7.0 diff --git a/ocr/tests/test_engine_abstraction.py b/ocr/tests/test_engine_abstraction.py index 2e8c150..44c314f 100644 --- a/ocr/tests/test_engine_abstraction.py +++ b/ocr/tests/test_engine_abstraction.py @@ -1,7 +1,7 @@ """Tests for OCR engine abstraction layer. Covers: base types, exception hierarchy, PaddleOcrEngine, -TesseractEngine, CloudEngine, HybridEngine, and engine_factory. +CloudEngine, HybridEngine, and engine_factory. """ import io @@ -124,7 +124,7 @@ class TestOcrEngineResult: def test_empty_result(self) -> None: result = OcrEngineResult( - text="", confidence=0.0, word_boxes=[], engine_name="tesseract" + text="", confidence=0.0, word_boxes=[], engine_name="paddleocr" ) assert result.text == "" assert result.word_boxes == [] @@ -303,85 +303,6 @@ class TestPaddleOcrEngine: engine.recognize(_create_test_image_bytes(), OcrConfig()) -# --------------------------------------------------------------------------- -# TesseractEngine -# --------------------------------------------------------------------------- - - -class TestTesseractEngine: - """Tests for TesseractEngine using mocked pytesseract.""" - - @pytest.fixture() - def engine(self) -> "TesseractEngine": # type: ignore[name-defined] - """Create a TesseractEngine with mocked pytesseract dependency.""" - mock_pytesseract = MagicMock() - mock_pytesseract.Output.DICT = "dict" - - with patch.dict("sys.modules", {"pytesseract": mock_pytesseract}): - with patch("app.engines.tesseract_engine.settings") as mock_settings: - mock_settings.tesseract_cmd = "/usr/bin/tesseract" - from app.engines.tesseract_engine import TesseractEngine - - eng = TesseractEngine() - eng._mock_pytesseract = mock_pytesseract # type: ignore[attr-defined] - return eng - - def test_name(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] - assert engine.name == "tesseract" - - def test_build_config_default_psm(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] - config_str = engine._build_config(OcrConfig()) - assert "--psm 6" in config_str - - def test_build_config_single_line(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] - config_str = engine._build_config(OcrConfig(single_line=True)) - assert "--psm 7" in config_str - - def test_build_config_single_word(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] - config_str = engine._build_config(OcrConfig(single_word=True)) - assert "--psm 8" in config_str - - def test_build_config_whitelist(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] - config_str = engine._build_config(OcrConfig(char_whitelist="ABC123")) - assert "-c tessedit_char_whitelist=ABC123" in config_str - - def test_build_config_psm_hint(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] - config_str = engine._build_config(OcrConfig(hints={"psm": 11})) - assert "--psm 11" in config_str - - def test_recognize_normalizes_confidence(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined] - """Tesseract returns 0-100 confidence; engine normalizes to 0.0-1.0.""" - engine._pytesseract.image_to_data.return_value = { - "text": ["HELLO", ""], - "conf": [92, -1], - "left": [10], - "top": [20], - "width": [100], - "height": [30], - } - - result = engine.recognize(_create_test_image_bytes(), OcrConfig()) - assert result.text == "HELLO" - assert abs(result.confidence - 0.92) < 0.01 - assert result.engine_name == "tesseract" - - def test_import_error_raises_unavailable(self) -> None: - with patch.dict("sys.modules", {"pytesseract": None}): - with patch("app.engines.tesseract_engine.settings") as mock_settings: - mock_settings.tesseract_cmd = "/usr/bin/tesseract" - - def mock_import(name, *args, **kwargs): - if name == "pytesseract": - raise ImportError("No module named 'pytesseract'") - return __import__(name, *args, **kwargs) - - with patch("builtins.__import__", side_effect=mock_import): - from app.engines.tesseract_engine import TesseractEngine - - with pytest.raises(EngineUnavailableError, match="pytesseract"): - TesseractEngine() - - # --------------------------------------------------------------------------- # CloudEngine # --------------------------------------------------------------------------- @@ -637,8 +558,8 @@ class TestEngineFactory: from app.engines.engine_factory import create_engine - create_engine("tesseract") - mock_create.assert_called_once_with("tesseract") + create_engine("google_vision") + mock_create.assert_called_once_with("google_vision") @patch("app.engines.engine_factory.settings") @patch("app.engines.engine_factory._create_single_engine") diff --git a/ocr/tests/test_health.py b/ocr/tests/test_health.py index cd1e914..a127293 100644 --- a/ocr/tests/test_health.py +++ b/ocr/tests/test_health.py @@ -39,14 +39,9 @@ def test_pillow_heif_can_register(): assert "HEIF" in Image.registered_extensions().values() -def test_tesseract_available(): - """Tesseract OCR is available and can process images.""" - import pytesseract +def test_paddleocr_engine_available(): + """PaddleOCR engine can be created.""" + from app.engines.paddle_engine import PaddleOcrEngine - # Create a simple test image with text - img = Image.new("RGB", (200, 50), color="white") - - # Verify pytesseract can call tesseract (will return empty string for blank image) - result = pytesseract.image_to_string(img) - # Just verify it doesn't raise an exception - blank image returns empty/whitespace - assert isinstance(result, str) + engine = PaddleOcrEngine() + assert engine.name == "paddleocr" diff --git a/ocr/tests/test_vin_validator.py b/ocr/tests/test_vin_validator.py index 241eabd..e6c65e1 100644 --- a/ocr/tests/test_vin_validator.py +++ b/ocr/tests/test_vin_validator.py @@ -165,7 +165,7 @@ class TestVinValidator: """Test candidate extraction handles space-fragmented VINs from OCR.""" validator = VinValidator() - # Tesseract often fragments VINs into multiple words + # OCR engines sometimes fragment VINs into multiple words text = "1HGBH 41JXMN 109186" candidates = validator.extract_candidates(text) From 639ca117f16c5255e29e786bcc7d196f1aed24e5 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 14:44:06 -0600 Subject: [PATCH 10/16] fix: Update PaddleOCR API --- ocr/Dockerfile | 3 +- ocr/app/engines/paddle_engine.py | 56 ++++++++++++++-------- ocr/tests/test_engine_abstraction.py | 71 +++++++++++++++++++--------- 3 files changed, 86 insertions(+), 44 deletions(-) diff --git a/ocr/Dockerfile b/ocr/Dockerfile index 8028575..49a89bb 100644 --- a/ocr/Dockerfile +++ b/ocr/Dockerfile @@ -36,7 +36,8 @@ RUN pip install --no-cache-dir -r requirements.txt \ # Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime). # Models are baked into the image so container starts are fast and # no network access is needed at runtime for model download. -RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)" \ +ENV PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK=True +RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(ocr_version='PP-OCRv4', use_textline_orientation=True, lang='en', device='cpu')" \ && echo "PaddleOCR PP-OCRv4 models downloaded and verified" COPY . . diff --git a/ocr/app/engines/paddle_engine.py b/ocr/app/engines/paddle_engine.py index 41433f1..61fb0cf 100644 --- a/ocr/app/engines/paddle_engine.py +++ b/ocr/app/engines/paddle_engine.py @@ -34,12 +34,12 @@ class PaddleOcrEngine(OcrEngine): from paddleocr import PaddleOCR # type: ignore[import-untyped] self._ocr = PaddleOCR( - use_angle_cls=True, + ocr_version="PP-OCRv4", + use_textline_orientation=True, lang="en", - use_gpu=False, - show_log=False, + device="cpu", ) - logger.info("PaddleOCR PP-OCRv4 initialized (CPU, angle_cls=True)") + logger.info("PaddleOCR PP-OCRv4 initialized (CPU, textline_orientation=True)") return self._ocr except ImportError as exc: raise EngineUnavailableError( @@ -54,8 +54,9 @@ class PaddleOcrEngine(OcrEngine): def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult: """Run PaddleOCR on image bytes. - PaddleOCR returns: list of pages, each page is a list of - ``[[box_coords], (text, confidence)]`` entries. + PaddleOCR v3.x ``predict()`` returns an iterator of result objects. + Each result has a ``res`` dict with ``dt_polys``, ``rec_texts``, + and ``rec_scores``. """ ocr = self._get_ocr() @@ -66,10 +67,22 @@ class PaddleOcrEngine(OcrEngine): image = Image.open(io.BytesIO(image_bytes)).convert("RGB") img_array = np.array(image) - # PaddleOCR accepts numpy arrays - results = ocr.ocr(img_array, cls=config.use_angle_cls) + results = list(ocr.predict(img_array)) - if not results or not results[0]: + if not results: + return OcrEngineResult( + text="", + confidence=0.0, + word_boxes=[], + engine_name=self.name, + ) + + res = results[0].res + dt_polys = res.get("dt_polys", []) + rec_texts = res.get("rec_texts", []) + rec_scores = res.get("rec_scores", []) + + if not rec_texts: return OcrEngineResult( text="", confidence=0.0, @@ -81,10 +94,8 @@ class PaddleOcrEngine(OcrEngine): texts: list[str] = [] confidences: list[float] = [] - for line in results[0]: - box_coords = line[0] # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] - text = line[1][0] - conf = float(line[1][1]) + for i, text in enumerate(rec_texts): + conf = float(rec_scores[i]) if i < len(rec_scores) else 0.0 # Apply character whitelist filter if configured if config.char_whitelist: @@ -94,11 +105,16 @@ class PaddleOcrEngine(OcrEngine): if not text.strip(): continue - # Convert quadrilateral to bounding box - xs = [pt[0] for pt in box_coords] - ys = [pt[1] for pt in box_coords] - x_min, y_min = int(min(xs)), int(min(ys)) - x_max, y_max = int(max(xs)), int(max(ys)) + # Convert quadrilateral polygon to bounding box + x_min, y_min, width, height = 0, 0, 0, 0 + if i < len(dt_polys): + poly = dt_polys[i] + xs = [pt[0] for pt in poly] + ys = [pt[1] for pt in poly] + x_min, y_min = int(min(xs)), int(min(ys)) + x_max, y_max = int(max(xs)), int(max(ys)) + width = x_max - x_min + height = y_max - y_min word_boxes.append( WordBox( @@ -106,8 +122,8 @@ class PaddleOcrEngine(OcrEngine): confidence=conf, x=x_min, y=y_min, - width=x_max - x_min, - height=y_max - y_min, + width=width, + height=height, ) ) texts.append(text.strip()) diff --git a/ocr/tests/test_engine_abstraction.py b/ocr/tests/test_engine_abstraction.py index 44c314f..a85f0f5 100644 --- a/ocr/tests/test_engine_abstraction.py +++ b/ocr/tests/test_engine_abstraction.py @@ -41,6 +41,19 @@ def _make_result( ) +def _mock_paddle_result( + dt_polys: list, rec_texts: list[str], rec_scores: list[float] +) -> MagicMock: + """Create a mock PaddleOCR v3.x predict() result object.""" + result = MagicMock() + result.res = { + "dt_polys": dt_polys, + "rec_texts": rec_texts, + "rec_scores": rec_scores, + } + return result + + # --------------------------------------------------------------------------- # Exception hierarchy # --------------------------------------------------------------------------- @@ -182,7 +195,9 @@ class TestPaddleOcrEngine: engine = PaddleOcrEngine() mock_ocr = MagicMock() - mock_ocr.ocr.return_value = [None] + mock_ocr.predict.return_value = iter([ + _mock_paddle_result(dt_polys=[], rec_texts=[], rec_scores=[]) + ]) engine._ocr = mock_ocr result = engine.recognize(_create_test_image_bytes(), OcrConfig()) @@ -196,12 +211,16 @@ class TestPaddleOcrEngine: engine = PaddleOcrEngine() mock_ocr = MagicMock() - mock_ocr.ocr.return_value = [ - [ - [[[10, 20], [110, 20], [110, 50], [10, 50]], ("HELLO", 0.95)], - [[[10, 60], [110, 60], [110, 90], [10, 90]], ("WORLD", 0.88)], - ] - ] + mock_ocr.predict.return_value = iter([ + _mock_paddle_result( + dt_polys=[ + [[10, 20], [110, 20], [110, 50], [10, 50]], + [[10, 60], [110, 60], [110, 90], [10, 90]], + ], + rec_texts=["HELLO", "WORLD"], + rec_scores=[0.95, 0.88], + ) + ]) engine._ocr = mock_ocr result = engine.recognize(_create_test_image_bytes(), OcrConfig()) @@ -218,11 +237,13 @@ class TestPaddleOcrEngine: engine = PaddleOcrEngine() mock_ocr = MagicMock() - mock_ocr.ocr.return_value = [ - [ - [[[0, 0], [100, 0], [100, 30], [0, 30]], ("1HG-BH4!", 0.9)], - ] - ] + mock_ocr.predict.return_value = iter([ + _mock_paddle_result( + dt_polys=[[[0, 0], [100, 0], [100, 30], [0, 30]]], + rec_texts=["1HG-BH4!"], + rec_scores=[0.9], + ) + ]) engine._ocr = mock_ocr config = OcrConfig(char_whitelist="ABCDEFGHJKLMNPRSTUVWXYZ0123456789") @@ -237,11 +258,13 @@ class TestPaddleOcrEngine: engine = PaddleOcrEngine() mock_ocr = MagicMock() # Slightly rotated quad: min x=8, min y=20, max x=110, max y=55 - mock_ocr.ocr.return_value = [ - [ - [[[10, 20], [110, 25], [108, 55], [8, 50]], ("TEXT", 0.9)], - ] - ] + mock_ocr.predict.return_value = iter([ + _mock_paddle_result( + dt_polys=[[[10, 20], [110, 25], [108, 55], [8, 50]]], + rec_texts=["TEXT"], + rec_scores=[0.9], + ) + ]) engine._ocr = mock_ocr result = engine.recognize(_create_test_image_bytes(), OcrConfig()) @@ -257,11 +280,13 @@ class TestPaddleOcrEngine: engine = PaddleOcrEngine() mock_ocr = MagicMock() - mock_ocr.ocr.return_value = [ - [ - [[[0, 0], [50, 0], [50, 20], [0, 20]], ("---", 0.9)], - ] - ] + mock_ocr.predict.return_value = iter([ + _mock_paddle_result( + dt_polys=[[[0, 0], [50, 0], [50, 20], [0, 20]]], + rec_texts=["---"], + rec_scores=[0.9], + ) + ]) engine._ocr = mock_ocr config = OcrConfig(char_whitelist="ABC") @@ -296,7 +321,7 @@ class TestPaddleOcrEngine: engine = PaddleOcrEngine() mock_ocr = MagicMock() - mock_ocr.ocr.side_effect = RuntimeError("OCR crashed") + mock_ocr.predict.side_effect = RuntimeError("OCR crashed") engine._ocr = mock_ocr with pytest.raises(EngineProcessingError, match="PaddleOCR recognition failed"): From dab4a3bdf3831c4e54a5ecc78c69101fa42111f1 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 15:51:04 -0600 Subject: [PATCH 11/16] fix: PaddleOCR error --- frontend/src/shared/components/CameraCapture/CameraCapture.tsx | 2 +- ocr/Dockerfile | 2 +- ocr/app/engines/paddle_engine.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/frontend/src/shared/components/CameraCapture/CameraCapture.tsx b/frontend/src/shared/components/CameraCapture/CameraCapture.tsx index a876c46..2e17580 100644 --- a/frontend/src/shared/components/CameraCapture/CameraCapture.tsx +++ b/frontend/src/shared/components/CameraCapture/CameraCapture.tsx @@ -245,7 +245,7 @@ export const CameraCapture: React.FC = ({ return ( Date: Sat, 7 Feb 2026 16:00:23 -0600 Subject: [PATCH 12/16] fix: Crop box broken --- .../src/shared/components/CameraCapture/useImageCrop.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/frontend/src/shared/components/CameraCapture/useImageCrop.ts b/frontend/src/shared/components/CameraCapture/useImageCrop.ts index 5399273..c58cec5 100644 --- a/frontend/src/shared/components/CameraCapture/useImageCrop.ts +++ b/frontend/src/shared/components/CameraCapture/useImageCrop.ts @@ -318,17 +318,15 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet if (isDrawingRef.current) { isDrawingRef.current = false; const area = cropAreaRef.current; - // When aspect ratio constrains one dimension, only check the free dimension - const meetsMinSize = aspectRatio - ? area.width >= minSize - : area.width >= minSize && area.height >= minSize; + // Accept crop if at least one dimension is meaningful (allows thin strips like VINs) + const meetsMinSize = area.width >= minSize || area.height >= minSize; if (meetsMinSize) { setCropDrawn(true); } } activeHandleRef.current = null; setIsDragging(false); - }, [minSize, aspectRatio]); + }, [minSize]); // Add global event listeners for drag useEffect(() => { From 9d2d4e57b7b93527cd7ee3246169074e86890cda Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 16:12:07 -0600 Subject: [PATCH 13/16] fix: PaddleOCR error --- ocr/app/engines/paddle_engine.py | 2 +- ocr/tests/test_engine_abstraction.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ocr/app/engines/paddle_engine.py b/ocr/app/engines/paddle_engine.py index f4d8e3b..4965ebf 100644 --- a/ocr/app/engines/paddle_engine.py +++ b/ocr/app/engines/paddle_engine.py @@ -78,7 +78,7 @@ class PaddleOcrEngine(OcrEngine): engine_name=self.name, ) - res = results[0].res + res = results[0].json dt_polys = res.get("dt_polys", []) rec_texts = res.get("rec_texts", []) rec_scores = res.get("rec_scores", []) diff --git a/ocr/tests/test_engine_abstraction.py b/ocr/tests/test_engine_abstraction.py index a85f0f5..446fcbd 100644 --- a/ocr/tests/test_engine_abstraction.py +++ b/ocr/tests/test_engine_abstraction.py @@ -46,7 +46,7 @@ def _mock_paddle_result( ) -> MagicMock: """Create a mock PaddleOCR v3.x predict() result object.""" result = MagicMock() - result.res = { + result.json = { "dt_polys": dt_polys, "rec_texts": rec_texts, "rec_scores": rec_scores, From fcffb0bb4382ae72dc693655fb1211370bb6c934 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 16:18:14 -0600 Subject: [PATCH 14/16] fix: PaddleOCR timeout --- backend/src/features/ocr/external/ocr-client.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/features/ocr/external/ocr-client.ts b/backend/src/features/ocr/external/ocr-client.ts index 134b5f9..42a1711 100644 --- a/backend/src/features/ocr/external/ocr-client.ts +++ b/backend/src/features/ocr/external/ocr-client.ts @@ -6,7 +6,7 @@ import type { JobResponse, OcrResponse, VinExtractionResponse } from '../domain/ /** OCR service configuration */ const OCR_SERVICE_URL = process.env.OCR_SERVICE_URL || 'http://mvp-ocr:8000'; -const OCR_TIMEOUT_MS = 30000; // 30 seconds for sync operations +const OCR_TIMEOUT_MS = 120000; // 120 seconds for sync operations (PaddleOCR model loading on first call) /** * HTTP client for communicating with the OCR service. From 3adbb10ff6688b4374b8004a27ed982129aa8815 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 16:26:10 -0600 Subject: [PATCH 15/16] fix: OCR Timout still --- frontend/src/features/vehicles/hooks/useVinOcr.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/features/vehicles/hooks/useVinOcr.ts b/frontend/src/features/vehicles/hooks/useVinOcr.ts index 8845432..9554259 100644 --- a/frontend/src/features/vehicles/hooks/useVinOcr.ts +++ b/frontend/src/features/vehicles/hooks/useVinOcr.ts @@ -49,7 +49,7 @@ async function extractVinFromImage(file: File): Promise { const response = await apiClient.post('/ocr/extract/vin', formData, { headers: { 'Content-Type': 'multipart/form-data' }, - timeout: 30000, // 30 seconds for OCR processing + timeout: 120000, // 120 seconds for OCR processing }); const data = response.data; From 9a2b12c5dc8d16a97c840d2c075e59943e2faceb Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sat, 7 Feb 2026 16:35:28 -0600 Subject: [PATCH 16/16] fix: No matches --- ocr/app/engines/paddle_engine.py | 13 ++++++++++--- ocr/tests/test_engine_abstraction.py | 13 +++++++++---- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/ocr/app/engines/paddle_engine.py b/ocr/app/engines/paddle_engine.py index 4965ebf..81db632 100644 --- a/ocr/app/engines/paddle_engine.py +++ b/ocr/app/engines/paddle_engine.py @@ -56,8 +56,9 @@ class PaddleOcrEngine(OcrEngine): """Run PaddleOCR on image bytes. PaddleOCR v3.x ``predict()`` returns an iterator of result objects. - Each result has a ``res`` dict with ``dt_polys``, ``rec_texts``, - and ``rec_scores``. + Each result's ``.json`` property returns a dict. The OCR fields + (``dt_polys``, ``rec_texts``, ``rec_scores``) may be at the top + level or nested under a ``"res"`` key depending on the version. """ ocr = self._get_ocr() @@ -78,7 +79,13 @@ class PaddleOcrEngine(OcrEngine): engine_name=self.name, ) - res = results[0].json + raw = results[0].json + # Unwrap nested "res" key if present (save_to_json format) + res = raw.get("res", raw) if isinstance(raw, dict) else raw + logger.debug( + "PaddleOCR result keys: %s", + list(res.keys()) if isinstance(res, dict) else type(res).__name__, + ) dt_polys = res.get("dt_polys", []) rec_texts = res.get("rec_texts", []) rec_scores = res.get("rec_scores", []) diff --git a/ocr/tests/test_engine_abstraction.py b/ocr/tests/test_engine_abstraction.py index 446fcbd..9328aad 100644 --- a/ocr/tests/test_engine_abstraction.py +++ b/ocr/tests/test_engine_abstraction.py @@ -44,12 +44,17 @@ def _make_result( def _mock_paddle_result( dt_polys: list, rec_texts: list[str], rec_scores: list[float] ) -> MagicMock: - """Create a mock PaddleOCR v3.x predict() result object.""" + """Create a mock PaddleOCR v3.x predict() result object. + + Wraps data under ``"res"`` key to match save_to_json format. + """ result = MagicMock() result.json = { - "dt_polys": dt_polys, - "rec_texts": rec_texts, - "rec_scores": rec_scores, + "res": { + "dt_polys": dt_polys, + "rec_texts": rec_texts, + "rec_scores": rec_scores, + } } return result