Merge pull request 'feat: Improve OCR process - replace Tesseract with PaddleOCR (#115)' (#122) from issue-115-improve-ocr-paddleocr into main

Reviewed-on: #122
2026-02-08 01:13:33 +00:00
parent 6b0c18a41c 9a2b12c5dc
commit dd77cb3836
35 changed files with 2560 additions and 232 deletions
--- a/.ai/context.json
+++ b/.ai/context.json
@@ -108,7 +108,7 @@
    },
    "mvp-ocr": {
      "type": "ocr_service",
-      "description": "Python-based OCR for document text extraction",
+      "description": "Python OCR service with pluggable engine abstraction (PaddleOCR PP-OCRv4 primary, optional Google Vision cloud fallback, Tesseract backward compat)",
      "port": 8000
    },
    "mvp-loki": {
--- a/.ai/workflow-contract.json
+++ b/.ai/workflow-contract.json
@@ -45,7 +45,7 @@
    "parent_issue": "The original feature issue. Tracks overall status. Only the parent gets status label transitions.",
    "sub_issue_title_format": "{type}: {summary} (#{parent_index})",
    "sub_issue_body": "First line must be 'Relates to #{parent_index}'. Each sub-issue is a self-contained unit of work.",
-    "sub_issue_labels": "status/backlog + same type/* as parent. Sub-issues stay in backlog; parent issue tracks status.",
+    "sub_issue_labels": "status/in-progress + same type/* as parent. Sub-issues move to in-progress as they are worked on.",
    "sub_issue_milestone": "Same sprint milestone as parent.",
    "rules": [
      "ONE branch for the parent issue. Never create branches per sub-issue.",
--- a/backend/src/features/ocr/external/ocr-client.ts
+++ b/backend/src/features/ocr/external/ocr-client.ts
@@ -6,7 +6,7 @@ import type { JobResponse, OcrResponse, VinExtractionResponse } from '../domain/
 /** OCR service configuration */
 const OCR_SERVICE_URL = process.env.OCR_SERVICE_URL || 'http://mvp-ocr:8000';
-const OCR_TIMEOUT_MS = 30000; // 30 seconds for sync operations
+const OCR_TIMEOUT_MS = 120000; // 120 seconds for sync operations (PaddleOCR model loading on first call)
 /**
 * HTTP client for communicating with the OCR service.
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -38,13 +38,17 @@ services:
      STRIPE_ENTERPRISE_MONTHLY_PRICE_ID: prod_Toj8xGEui9jl6j
      STRIPE_ENTERPRISE_YEARLY_PRICE_ID: prod_Toj9A7A773xrdn
-  # OCR - Production log level
+  # OCR - Production log level + engine config
  mvp-ocr:
    environment:
      LOG_LEVEL: error
      REDIS_HOST: mvp-redis
      REDIS_PORT: 6379
      REDIS_DB: 1
      OCR_PRIMARY_ENGINE: paddleocr
      OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none}
      OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6}
      GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json
  # PostgreSQL - Remove dev ports, production log level
  mvp-postgres:
--- a/docker-compose.staging.yml
+++ b/docker-compose.staging.yml
@@ -63,6 +63,15 @@ services:
  mvp-ocr:
    image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest}
    container_name: mvp-ocr-staging
    environment:
      LOG_LEVEL: debug
      REDIS_HOST: mvp-redis
      REDIS_PORT: 6379
      REDIS_DB: 1
      OCR_PRIMARY_ENGINE: paddleocr
      OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none}
      OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6}
      GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json
  # ========================================
  # PostgreSQL (Staging - Separate Database)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -193,8 +193,16 @@ services:
      REDIS_HOST: mvp-redis
      REDIS_PORT: 6379
      REDIS_DB: 1
      # OCR engine configuration (PaddleOCR primary, cloud fallback optional)
      OCR_PRIMARY_ENGINE: paddleocr
      OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none}
      OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6}
      GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json
    volumes:
      - /tmp/vin-debug:/tmp/vin-debug
      # Optional: Uncomment to enable Google Vision cloud fallback.
      # Requires: secrets/app/google-vision-key.json and OCR_FALLBACK_ENGINE=google_vision
      # - ./secrets/app/google-vision-key.json:/run/secrets/google-vision-key.json:ro
    networks:
      - backend
      - database
--- a/docs/CLAUDE.md
+++ b/docs/CLAUDE.md
@@ -18,5 +18,5 @@
 | `AUDIT.md` | Audit documentation | Security audits, compliance |
 | `MVP-COLOR-SCHEME.md` | Color scheme reference | UI styling decisions |
 | `LOGGING.md` | Unified logging system | Log levels, correlation IDs, Grafana |
-| `ocr-pipeline-tech-stack.md` | OCR pipeline technology decisions | OCR architecture, Tesseract setup |
+| `ocr-pipeline-tech-stack.md` | OCR pipeline technology decisions | OCR architecture, PaddleOCR engine abstraction |
 | `TIER-GATING.md` | Subscription tier gating rules | Feature access by tier, vehicle limits |
--- a/docs/ocr-pipeline-tech-stack.md
+++ b/docs/ocr-pipeline-tech-stack.md
@@ -118,35 +118,48 @@
        │       ├─────────────────────────────────────────────────────────┤
        │       │                                                         │
        │       │   ┌─────────────────────────────────────────────────┐   │
-        │       │   │  5a. Primary OCR: Tesseract 5.x                 │   │
+        │       │   │  5a. Engine Abstraction Layer                    │   │
        │       │   │                                                  │   │
-        │       │   │  • Engine: LSTM (--oem 1)                       │   │
+        │       │   │  OcrEngine ABC -> PaddleOcrEngine (primary)      │   │
-        │       │   │  • Page segmentation: Auto (--psm 3)            │   │
+        │       │   │                -> CloudEngine (optional fallback) │   │
-        │       │   │  • Output: hOCR with word confidence            │   │
+        │       │   │                -> TesseractEngine (backward compat)│  │
        │       │   │                -> HybridEngine (primary+fallback) │   │
        │       │   └─────────────────────────────────────────────────┘   │
        │       │                         │                               │
        │       │                         ▼                               │
        │       │   ┌─────────────────────────────────────────────────┐   │
        │       │   │  5b. Primary OCR: PaddleOCR PP-OCRv4             │   │
        │       │   │                                                  │   │
        │       │   │  • Scene text detection + angle classification   │   │
        │       │   │  • CPU-only, models baked into Docker image      │   │
        │       │   │  • Normalized output: text, confidence, word boxes│  │
        │       │   └─────────────────────────────────────────────────┘   │
        │       │                         │                               │
        │       │                         ▼                               │
        │       │                 ┌───────────────┐                       │
        │       │                 │  Confidence   │                       │
-        │       │                 │    > 80% ?    │                       │
+        │       │                 │   >= 60% ?    │                       │
        │       │                 └───────────────┘                       │
        │       │                    │         │                          │
-        │       │              YES ──┘         └── NO                     │
+        │       │              YES ──┘         └── NO (and cloud enabled) │
        │       │               │                   │                     │
        │       │               │                   ▼                     │
        │       │               │   ┌─────────────────────────────────┐   │
-        │       │               │   │  5b. Fallback: PaddleOCR        │   │
+        │       │               │   │  5c. Optional Cloud Fallback     │   │
        │       │               │   │      (Google Vision API)         │   │
        │       │               │   │                                  │   │
-        │       │               │   │  • Better for degraded images   │   │
+        │       │               │   │  • Disabled by default           │   │
-        │       │               │   │  • Better table detection       │   │
+        │       │               │   │  • 5-second timeout guard        │   │
-        │       │               │   │  • Slower but more accurate     │   │
+        │       │               │   │  • Returns higher-confidence     │   │
        │       │               │   │    result of primary vs fallback │   │
        │       │               │   └─────────────────────────────────┘   │
        │       │               │                   │                     │
        │       │               ▼                   ▼                     │
        │       │         ┌─────────────────────────────────┐             │
-        │       │         │  5c. Result Merging             │             │
+        │       │         │  5d. HybridEngine Result        │             │
-        │       │         │  • Merge by bounding box        │             │
+        │       │         │  • Compare confidences          │             │
        │       │         │  • Keep highest confidence      │             │
        │       │         │  • Graceful fallback on error   │             │
        │       │         └─────────────────────────────────┘             │
        │       │                                                         │
        │       └─────────────────────────────────────────────────────────┘
@@ -257,10 +270,10 @@
 | Component              | Tool                  | Purpose                              |
 |------------------------|-----------------------|--------------------------------------|
-| **Primary OCR**        | Tesseract 5.x         | Fast, reliable text extraction       |
+| **Primary OCR**        | PaddleOCR PP-OCRv4    | Highest accuracy scene text, CPU-only |
-| **Python Binding**     | pytesseract           | Tesseract Python wrapper             |
+| **Cloud Fallback**     | Google Vision API     | Optional cloud fallback (disabled by default) |
-| **Fallback OCR**       | PaddleOCR             | Higher accuracy, better tables       |
+| **Backward Compat**    | Tesseract 5.x / pytesseract | Legacy engine, configurable via env var |
-| **Layout Analysis**    | PaddleOCR / LayoutParser | Document structure detection      |
+| **Engine Abstraction** | `OcrEngine` ABC       | Pluggable engine interface in `ocr/app/engines/` |
 ### Data Extraction
@@ -291,85 +304,93 @@
 fastapi>=0.100.0
 uvicorn[standard]>=0.23.0
 python-multipart>=0.0.6
-
+pydantic>=2.0.0
 # Task Queue
 celery>=5.3.0
 redis>=4.6.0
 # File Detection & Handling
 python-magic>=0.4.27
 pillow>=10.0.0
 pillow-heif>=0.13.0
 # PDF Processing
 pymupdf>=1.23.0
 # Image Preprocessing
 opencv-python-headless>=4.8.0
 deskew>=1.4.0
 scikit-image>=0.21.0
 numpy>=1.24.0
 # OCR Engines
 pytesseract>=0.3.10
-paddlepaddle>=2.5.0
+paddlepaddle>=2.6.0
-paddleocr>=2.7.0
+paddleocr>=2.8.0
 google-cloud-vision>=3.7.0
-# Table Extraction
+# PDF Processing
-img2table>=1.2.0
+PyMuPDF>=1.23.0
 camelot-py[cv]>=0.11.0
-# NLP & Data
+# Redis for job queue
-spacy>=3.6.0
+redis>=5.0.0
 pandas>=2.0.0
-# Storage & Database
+# HTTP client for callbacks
-boto3>=1.28.0
+httpx>=0.24.0
-psycopg2-binary>=2.9.0
+
-sqlalchemy>=2.0.0
+# Testing
 pytest>=7.4.0
 pytest-asyncio>=0.21.0
 ```
 ### System Package Requirements (Ubuntu/Debian)
 ```bash
-# Tesseract OCR
+# Tesseract OCR (backward compatibility engine)
-apt-get install tesseract-ocr tesseract-ocr-eng libtesseract-dev
+apt-get install tesseract-ocr tesseract-ocr-eng
 # PaddlePaddle OpenMP runtime
 apt-get install libgomp1
 # HEIC Support
-apt-get install libheif-examples libheif-dev
+apt-get install libheif1 libheif-dev
-# OpenCV dependencies
+# GLib (OpenCV dependency)
-apt-get install libgl1-mesa-glx libglib2.0-0
+apt-get install libglib2.0-0
-# PDF rendering dependencies
+# File type detection
-apt-get install libmupdf-dev mupdf-tools
+apt-get install libmagic1
 # Image processing
 apt-get install libmagic1 ghostscript
 # Camelot dependencies
 apt-get install ghostscript python3-tk
 ```
 ### Environment Variables
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `OCR_PRIMARY_ENGINE` | `paddleocr` | Primary OCR engine (`paddleocr`, `tesseract`) |
 | `OCR_CONFIDENCE_THRESHOLD` | `0.6` | Minimum confidence threshold |
 | `OCR_FALLBACK_ENGINE` | `none` | Fallback engine (`google_vision`, `none`) |
 | `OCR_FALLBACK_THRESHOLD` | `0.6` | Confidence below this triggers fallback |
 | `GOOGLE_VISION_KEY_PATH` | `/run/secrets/google-vision-key.json` | Path to Google Vision service account key |
 ---
 ## DOCKERFILE
 ```dockerfile
-FROM python:3.11-slim
+# Primary engine: PaddleOCR PP-OCRv4 (models baked into image)
 # Backward compat: Tesseract 5.x (optional, via TesseractEngine)
 # Cloud fallback: Google Vision (optional, requires API key at runtime)
 FROM python:3.13-slim
 # System dependencies
 # - tesseract-ocr/eng: Backward-compatible OCR engine
 # - libgomp1: OpenMP runtime required by PaddlePaddle
 # - libheif1/libheif-dev: HEIF image support (iPhone photos)
 # - libglib2.0-0: GLib shared library (OpenCV dependency)
 # - libmagic1: File type detection
 # - curl: Health check endpoint
 RUN apt-get update && apt-get install -y --no-install-recommends \
    tesseract-ocr \
    tesseract-ocr-eng \
-    libtesseract-dev \
+    libgomp1 \
-    libheif-examples \
+    libheif1 \
    libheif-dev \
    libgl1-mesa-glx \
    libglib2.0-0 \
    libmagic1 \
-    ghostscript \
+    curl \
    poppler-utils \
    && rm -rf /var/lib/apt/lists/*
 # Python dependencies
@@ -377,11 +398,9 @@ WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Download spaCy model
+# Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime)
-RUN python -m spacy download en_core_web_sm
+RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)" \
-
+    && echo "PaddleOCR PP-OCRv4 models downloaded and verified"
 # Download PaddleOCR models (cached in image)
 RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en')"
 COPY . .
--- a/frontend/.claude/tdd-guard/data/test.json
+++ b/frontend/.claude/tdd-guard/data/test.json
--- a/frontend/src/features/vehicles/hooks/useVinOcr.ts
+++ b/frontend/src/features/vehicles/hooks/useVinOcr.ts
@@ -49,7 +49,7 @@ async function extractVinFromImage(file: File): Promise<VinOcrResult> {
  const response = await apiClient.post('/ocr/extract/vin', formData, {
    headers: { 'Content-Type': 'multipart/form-data' },
-    timeout: 30000, // 30 seconds for OCR processing
+    timeout: 120000, // 120 seconds for OCR processing
  });
  const data = response.data;
--- a/frontend/src/shared/components/CameraCapture/CameraCapture.tsx
+++ b/frontend/src/shared/components/CameraCapture/CameraCapture.tsx
@@ -245,7 +245,7 @@ export const CameraCapture: React.FC<CameraCaptureProps> = ({
    return (
      <CropTool
        imageSrc={capturedImageSrc}
-        lockAspectRatio={guidanceType !== 'none'}
+        lockAspectRatio={guidanceType !== 'none' && guidanceType !== 'vin'}
        aspectRatio={cropAspectRatio}
        onConfirm={handleCropConfirm}
        onReset={handleCropReset}
--- a/frontend/src/shared/components/CameraCapture/useImageCrop.ts
+++ b/frontend/src/shared/components/CameraCapture/useImageCrop.ts
@@ -95,10 +95,6 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet
  const drawOriginRef = useRef({ x: 0, y: 0 });
  const cropAreaRef = useRef(cropArea);
  useEffect(() => {
    cropAreaRef.current = cropArea;
  }, [cropArea]);
  const setCropArea = useCallback(
    (area: CropArea) => {
      setCropAreaState(getAspectRatioAdjustedCrop(area));
@@ -177,7 +173,9 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet
      startPosRef.current = { x: clientX, y: clientY };
      drawOriginRef.current = { x, y };
-      setCropAreaState({ x, y, width: 0, height: 0 });
+      const initial = { x, y, width: 0, height: 0 };
      setCropAreaState(initial);
      cropAreaRef.current = initial;
      isDrawingRef.current = true;
      activeHandleRef.current = null;
@@ -203,18 +201,24 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet
        const originX = drawOriginRef.current.x;
        const originY = drawOriginRef.current.y;
-        let newCrop: CropArea = {
+        const drawnWidth = Math.abs(currentX - originX);
        const drawnHeight = aspectRatio
          ? drawnWidth / aspectRatio
          : Math.abs(currentY - originY);
        let drawnY = Math.min(originY, currentY);
        // Clamp so crop doesn't exceed container bounds when aspect ratio forces height
        if (aspectRatio && drawnY + drawnHeight > 100) {
          drawnY = Math.max(0, 100 - drawnHeight);
        }
        const newCrop: CropArea = {
          x: Math.min(originX, currentX),
-          y: Math.min(originY, currentY),
+          y: drawnY,
-          width: Math.abs(currentX - originX),
+          width: drawnWidth,
-          height: Math.abs(currentY - originY),
+          height: drawnHeight,
        };
        if (aspectRatio) {
          newCrop.height = newCrop.width / aspectRatio;
        }
        setCropAreaState(newCrop);
        cropAreaRef.current = newCrop;
        return;
      }
@@ -303,7 +307,9 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet
          break;
      }
-      setCropAreaState(constrainCrop(newCrop));
+      const constrained = constrainCrop(newCrop);
      setCropAreaState(constrained);
      cropAreaRef.current = constrained;
    },
    [isDragging, constrainCrop, aspectRatio]
  );
@@ -312,7 +318,9 @@ export function useImageCrop(options: UseImageCropOptions = {}): UseImageCropRet
    if (isDrawingRef.current) {
      isDrawingRef.current = false;
      const area = cropAreaRef.current;
-      if (area.width >= minSize && area.height >= minSize) {
+      // Accept crop if at least one dimension is meaningful (allows thin strips like VINs)
      const meetsMinSize = area.width >= minSize || area.height >= minSize;
      if (meetsMinSize) {
        setCropDrawn(true);
      }
    }
--- a/ocr/CLAUDE.md
+++ b/ocr/CLAUDE.md
@@ -1,10 +1,12 @@
 # ocr/
 Python OCR microservice. Primary engine: PaddleOCR PP-OCRv4 with optional Google Vision cloud fallback. Pluggable engine abstraction in `app/engines/`.
 ## Files
 | File | What | When to read |
 | ---- | ---- | ------------ |
-| `Dockerfile` | Container build definition | Docker builds, deployment |
+| `Dockerfile` | Container build (PaddleOCR models baked in) | Docker builds, deployment |
 | `requirements.txt` | Python dependencies | Adding dependencies |
 ## Subdirectories
@@ -12,4 +14,5 @@
 | Directory | What | When to read |
 | --------- | ---- | ------------ |
 | `app/` | FastAPI application source | OCR endpoint development |
 | `app/engines/` | Engine abstraction layer (OcrEngine ABC, factory, hybrid) | Adding or changing OCR engines |
 | `tests/` | Test suite | Adding or modifying tests |
--- a/ocr/Dockerfile
+++ b/ocr/Dockerfile
@@ -1,5 +1,8 @@
 # Production Dockerfile for MotoVaultPro OCR Service
 # Uses mirrored base images from Gitea Package Registry
 #
 # Primary engine: PaddleOCR PP-OCRv4 (models baked into image)
 # Cloud fallback: Google Vision (optional, requires API key at runtime)
 # Build argument for registry (defaults to Gitea mirrors, falls back to Docker Hub)
 ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors
@@ -7,10 +10,13 @@ ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors
 FROM ${REGISTRY_MIRRORS}/python:3.13-slim
 # System dependencies
 # - libgomp1: OpenMP runtime required by PaddlePaddle
 # - libheif1/libheif-dev: HEIF image support (iPhone photos)
 # - libglib2.0-0: GLib shared library (OpenCV dependency)
 # - libmagic1: File type detection
 # - curl: Health check endpoint
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    tesseract-ocr \
+    libgomp1 \
    tesseract-ocr-eng \
    libtesseract-dev \
    libheif1 \
    libheif-dev \
    libglib2.0-0 \
@@ -21,7 +27,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 # Python dependencies
 WORKDIR /app
 COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
+# Install dependencies. PaddleX (transitive via paddleocr) pulls in the full
 # opencv-python which requires libGL.so.1.  Force-reinstall the headless
 # variant afterwards so the container stays GUI-free.
 RUN pip install --no-cache-dir -r requirements.txt \
    && pip install --no-cache-dir --force-reinstall opencv-python-headless
 # Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime).
 # Models are baked into the image so container starts are fast and
 # no network access is needed at runtime for model download.
 ENV PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK=True
 RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(ocr_version='PP-OCRv4', use_textline_orientation=True, lang='en', device='cpu', enable_mkldnn=False)" \
    && echo "PaddleOCR PP-OCRv4 models downloaded and verified"
 COPY . .
--- a/ocr/app/CLAUDE.md
+++ b/ocr/app/CLAUDE.md
@@ -12,6 +12,7 @@
 | Directory | What | When to read |
 | --------- | ---- | ------------ |
 | `engines/` | OCR engine abstraction (PaddleOCR primary, Google Vision fallback) | Engine changes, adding new engines |
 | `extractors/` | Data extraction logic | Adding new extraction types |
 | `models/` | Data models and schemas | Request/response types |
 | `patterns/` | Regex and parsing patterns | Pattern matching rules |
--- a/ocr/app/config.py
+++ b/ocr/app/config.py
@@ -9,7 +9,20 @@ class Settings:
        self.log_level: str = os.getenv("LOG_LEVEL", "info")
        self.host: str = os.getenv("HOST", "0.0.0.0")
        self.port: int = int(os.getenv("PORT", "8000"))
-        self.tesseract_cmd: str = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract")
+        # OCR engine configuration
        self.ocr_primary_engine: str = os.getenv("OCR_PRIMARY_ENGINE", "paddleocr")
        self.ocr_confidence_threshold: float = float(
            os.getenv("OCR_CONFIDENCE_THRESHOLD", "0.6")
        )
        # Cloud fallback configuration (disabled by default)
        self.ocr_fallback_engine: str = os.getenv("OCR_FALLBACK_ENGINE", "none")
        self.ocr_fallback_threshold: float = float(
            os.getenv("OCR_FALLBACK_THRESHOLD", "0.6")
        )
        self.google_vision_key_path: str = os.getenv(
            "GOOGLE_VISION_KEY_PATH", "/run/secrets/google-vision-key.json"
        )
        # Redis configuration for job queue
        self.redis_host: str = os.getenv("REDIS_HOST", "mvp-redis")
--- a/ocr/app/engines/init.py
+++ b/ocr/app/engines/init.py
@@ -0,0 +1,32 @@
 """OCR engine abstraction layer.
 Provides a pluggable engine interface for OCR processing,
 decoupling extractors from specific OCR libraries.
 Engines:
  - PaddleOcrEngine: PaddleOCR PP-OCRv4 (primary, CPU-only)
  - CloudEngine: Google Vision TEXT_DETECTION (optional cloud fallback)
  - HybridEngine: Primary + fallback with confidence threshold
 """
 from app.engines.base_engine import (
    EngineError,
    EngineProcessingError,
    EngineUnavailableError,
    OcrConfig,
    OcrEngine,
    OcrEngineResult,
    WordBox,
 )
 from app.engines.engine_factory import create_engine
 __all__ = [
    "OcrEngine",
    "OcrConfig",
    "OcrEngineResult",
    "WordBox",
    "EngineError",
    "EngineUnavailableError",
    "EngineProcessingError",
    "create_engine",
 ]
--- a/ocr/app/engines/base_engine.py
+++ b/ocr/app/engines/base_engine.py
@@ -0,0 +1,88 @@
 """OCR engine abstract base class and shared data types."""
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from typing import Any
 # --- Exception hierarchy ---
 class EngineError(Exception):
    """Base exception for all OCR engine errors."""
 class EngineUnavailableError(EngineError):
    """Raised when an engine cannot be initialized (missing binary, bad config)."""
 class EngineProcessingError(EngineError):
    """Raised when an engine fails to process an image."""
 # --- Data types ---
@dataclass
 class WordBox:
    """A single recognized word with position and confidence."""
    text: str
    confidence: float  # 0.0-1.0
    x: int = 0
    y: int = 0
    width: int = 0
    height: int = 0
@dataclass
 class OcrConfig:
    """Engine-agnostic OCR configuration.
    Common fields cover the most frequent needs. Engine-specific
    parameters go into ``hints`` so the interface stays stable.
    """
    char_whitelist: str | None = None  # e.g. VIN: "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
    single_line: bool = False  # Treat image as a single text line
    single_word: bool = False  # Treat image as a single word
    use_angle_cls: bool = True  # Enable angle classification (PaddleOCR)
    hints: dict[str, Any] = field(default_factory=dict)
@dataclass
 class OcrEngineResult:
    """Normalized result returned by every engine implementation."""
    text: str
    confidence: float  # 0.0-1.0
    word_boxes: list[WordBox]
    engine_name: str  # "paddleocr", "google_vision"
 # --- Abstract base ---
 class OcrEngine(ABC):
    """Abstract base class that all OCR engines must implement."""
    @abstractmethod
    def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
        """Run OCR on preprocessed image bytes.
        Args:
            image_bytes: Raw image bytes (PNG/JPEG).
            config: Engine-agnostic configuration.
        Returns:
            Normalized OCR result.
        Raises:
            EngineProcessingError: If recognition fails.
            EngineUnavailableError: If the engine is not ready.
        """
    @property
    @abstractmethod
    def name(self) -> str:
        """Short identifier used in OcrEngineResult.engine_name."""
--- a/ocr/app/engines/cloud_engine.py
+++ b/ocr/app/engines/cloud_engine.py
@@ -0,0 +1,166 @@
 """Google Vision cloud OCR engine with lazy initialization."""
 import logging
 import os
 from typing import Any
 from app.engines.base_engine import (
    EngineProcessingError,
    EngineUnavailableError,
    OcrConfig,
    OcrEngine,
    OcrEngineResult,
    WordBox,
 )
 logger = logging.getLogger(__name__)
 # Default path for Google Vision service account key (Docker secret mount)
 _DEFAULT_KEY_PATH = "/run/secrets/google-vision-key.json"
 class CloudEngine(OcrEngine):
    """Google Vision TEXT_DETECTION wrapper with lazy initialization.
    The client is not created until the first ``recognize()`` call,
    so the container starts normally even when the secret file is
    missing or the dependency is not installed.
    """
    def __init__(self, key_path: str | None = None) -> None:
        self._key_path = key_path or os.getenv(
            "GOOGLE_VISION_KEY_PATH", _DEFAULT_KEY_PATH
        )
        self._client: Any | None = None
    @property
    def name(self) -> str:
        return "google_vision"
    # ------------------------------------------------------------------
    # Lazy init
    # ------------------------------------------------------------------
    def _get_client(self) -> Any:
        """Create the Vision client on first use."""
        if self._client is not None:
            return self._client
        # Verify credentials file exists
        if not os.path.isfile(self._key_path):
            raise EngineUnavailableError(
                f"Google Vision key not found at {self._key_path}. "
                "Set GOOGLE_VISION_KEY_PATH or mount the secret."
            )
        try:
            from google.cloud import vision  # type: ignore[import-untyped]
            # Point the SDK at the service account key
            os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self._key_path
            self._client = vision.ImageAnnotatorClient()
            logger.info(
                "Google Vision client initialized (key: %s)", self._key_path
            )
            return self._client
        except ImportError as exc:
            raise EngineUnavailableError(
                "google-cloud-vision is not installed. "
                "Install with: pip install google-cloud-vision"
            ) from exc
        except Exception as exc:
            raise EngineUnavailableError(
                f"Failed to initialize Google Vision client: {exc}"
            ) from exc
    # ------------------------------------------------------------------
    # OCR
    # ------------------------------------------------------------------
    def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
        """Run Google Vision TEXT_DETECTION on image bytes."""
        client = self._get_client()
        try:
            from google.cloud import vision  # type: ignore[import-untyped]
            image = vision.Image(content=image_bytes)
            response = client.text_detection(image=image)
            if response.error.message:
                raise EngineProcessingError(
                    f"Google Vision API error: {response.error.message}"
                )
            annotations = response.text_annotations
            if not annotations:
                return OcrEngineResult(
                    text="",
                    confidence=0.0,
                    word_boxes=[],
                    engine_name=self.name,
                )
            # First annotation is the full-page text; the rest are words
            full_text = annotations[0].description.strip()
            word_boxes: list[WordBox] = []
            confidences: list[float] = []
            for annotation in annotations[1:]:
                text = annotation.description
                vertices = annotation.bounding_poly.vertices
                # Apply character whitelist filter if configured
                if config.char_whitelist:
                    allowed = set(config.char_whitelist)
                    text = "".join(ch for ch in text if ch in allowed)
                if not text.strip():
                    continue
                xs = [v.x for v in vertices]
                ys = [v.y for v in vertices]
                x_min, y_min = min(xs), min(ys)
                x_max, y_max = max(xs), max(ys)
                # Google Vision TEXT_DETECTION does not return per-word
                # confidence in annotations.  Use 0.95 as the documented
                # typical accuracy for clear images so comparisons with
                # PaddleOCR are meaningful.
                word_conf = 0.95
                word_boxes.append(
                    WordBox(
                        text=text.strip(),
                        confidence=word_conf,
                        x=x_min,
                        y=y_min,
                        width=x_max - x_min,
                        height=y_max - y_min,
                    )
                )
                confidences.append(word_conf)
            # Apply whitelist to full text too
            if config.char_whitelist:
                allowed = set(config.char_whitelist)
                full_text = "".join(
                    ch for ch in full_text if ch in allowed or ch in " \n"
                )
            avg_confidence = (
                sum(confidences) / len(confidences) if confidences else 0.0
            )
            return OcrEngineResult(
                text=full_text,
                confidence=avg_confidence,
                word_boxes=word_boxes,
                engine_name=self.name,
            )
        except (EngineUnavailableError, EngineProcessingError):
            raise
        except Exception as exc:
            raise EngineProcessingError(
                f"Google Vision recognition failed: {exc}"
            ) from exc
--- a/ocr/app/engines/engine_factory.py
+++ b/ocr/app/engines/engine_factory.py
@@ -0,0 +1,86 @@
 """Factory function for creating OCR engine instances from configuration."""
 import importlib
 import logging
 from app.config import settings
 from app.engines.base_engine import EngineUnavailableError, OcrEngine
 logger = logging.getLogger(__name__)
 # Valid engine identifiers (primary engines only; hybrid is constructed separately)
 _ENGINE_REGISTRY: dict[str, str] = {
    "paddleocr": "app.engines.paddle_engine.PaddleOcrEngine",
    "google_vision": "app.engines.cloud_engine.CloudEngine",
 }
 def _create_single_engine(name: str) -> OcrEngine:
    """Instantiate a single engine by registry name."""
    if name not in _ENGINE_REGISTRY:
        raise EngineUnavailableError(
            f"Unknown engine '{name}'. Available: {list(_ENGINE_REGISTRY.keys())}"
        )
    module_path, class_name = _ENGINE_REGISTRY[name].rsplit(".", 1)
    try:
        module = importlib.import_module(module_path)
        engine_cls = getattr(module, class_name)
        engine: OcrEngine = engine_cls()
        logger.info("Created OCR engine: %s", name)
        return engine
    except EngineUnavailableError:
        raise
    except Exception as exc:
        raise EngineUnavailableError(
            f"Failed to create engine '{name}': {exc}"
        ) from exc
 def create_engine(engine_name: str | None = None) -> OcrEngine:
    """Instantiate an OCR engine by name (defaults to config value).
    When a fallback engine is configured (``OCR_FALLBACK_ENGINE != "none"``),
    returns a ``HybridEngine`` that wraps the primary with the fallback.
    Args:
        engine_name: Engine identifier ("paddleocr", "google_vision").
                     Falls back to ``settings.ocr_primary_engine``.
    Returns:
        Initialized OcrEngine instance (possibly a HybridEngine wrapper).
    Raises:
        EngineUnavailableError: If the primary engine cannot be loaded.
    """
    name = (engine_name or settings.ocr_primary_engine).lower().strip()
    primary = _create_single_engine(name)
    # Check for cloud fallback configuration
    fallback_name = settings.ocr_fallback_engine.lower().strip()
    if fallback_name == "none" or not fallback_name:
        return primary
    # Create fallback engine (failure is non-fatal -- log and return primary only)
    try:
        fallback = _create_single_engine(fallback_name)
    except EngineUnavailableError as exc:
        logger.warning(
            "Fallback engine '%s' unavailable, proceeding without fallback: %s",
            fallback_name,
            exc,
        )
        return primary
    from app.engines.hybrid_engine import HybridEngine
    threshold = settings.ocr_fallback_threshold
    hybrid = HybridEngine(primary=primary, fallback=fallback, threshold=threshold)
    logger.info(
        "Created hybrid engine: primary=%s, fallback=%s, threshold=%.2f",
        name,
        fallback_name,
        threshold,
    )
    return hybrid
--- a/ocr/app/engines/hybrid_engine.py
+++ b/ocr/app/engines/hybrid_engine.py
@@ -0,0 +1,116 @@
 """Hybrid OCR engine: primary engine with optional cloud fallback."""
 import logging
 import time
 from app.engines.base_engine import (
    EngineError,
    EngineProcessingError,
    OcrConfig,
    OcrEngine,
    OcrEngineResult,
 )
 logger = logging.getLogger(__name__)
 # Maximum time (seconds) to wait for the cloud fallback
 _CLOUD_TIMEOUT_SECONDS = 5.0
 class HybridEngine(OcrEngine):
    """Runs a primary engine and falls back to a cloud engine when
    the primary result confidence is below the configured threshold.
    If the fallback is ``None`` (default), this engine behaves identically
    to the primary engine.  Cloud failures are handled gracefully -- the
    primary result is returned whenever the fallback is unavailable,
    times out, or errors.
    """
    def __init__(
        self,
        primary: OcrEngine,
        fallback: OcrEngine | None = None,
        threshold: float = 0.6,
    ) -> None:
        self._primary = primary
        self._fallback = fallback
        self._threshold = threshold
    @property
    def name(self) -> str:
        fallback_name = self._fallback.name if self._fallback else "none"
        return f"hybrid({self._primary.name}+{fallback_name})"
    def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
        """Run primary OCR, optionally falling back to cloud engine."""
        primary_result = self._primary.recognize(image_bytes, config)
        # Happy path: primary confidence meets threshold
        if primary_result.confidence >= self._threshold:
            logger.debug(
                "Primary engine confidence %.2f >= threshold %.2f, no fallback",
                primary_result.confidence,
                self._threshold,
            )
            return primary_result
        # No fallback configured -- return primary result as-is
        if self._fallback is None:
            logger.debug(
                "Primary confidence %.2f < threshold %.2f but no fallback configured",
                primary_result.confidence,
                self._threshold,
            )
            return primary_result
        # Attempt cloud fallback with timeout guard
        logger.info(
            "Primary confidence %.2f < threshold %.2f, trying fallback (%s)",
            primary_result.confidence,
            self._threshold,
            self._fallback.name,
        )
        try:
            start = time.monotonic()
            fallback_result = self._fallback.recognize(image_bytes, config)
            elapsed = time.monotonic() - start
            if elapsed > _CLOUD_TIMEOUT_SECONDS:
                logger.warning(
                    "Cloud fallback took %.1fs (> %.1fs limit), using primary result",
                    elapsed,
                    _CLOUD_TIMEOUT_SECONDS,
                )
                return primary_result
            # Return whichever result has higher confidence
            if fallback_result.confidence > primary_result.confidence:
                logger.info(
                    "Fallback confidence %.2f > primary %.2f, using fallback result",
                    fallback_result.confidence,
                    primary_result.confidence,
                )
                return fallback_result
            logger.info(
                "Primary confidence %.2f >= fallback %.2f, keeping primary result",
                primary_result.confidence,
                fallback_result.confidence,
            )
            return primary_result
        except EngineError as exc:
            logger.warning(
                "Cloud fallback failed (%s), returning primary result: %s",
                self._fallback.name,
                exc,
            )
            return primary_result
        except Exception as exc:
            logger.warning(
                "Unexpected cloud fallback error, returning primary result: %s",
                exc,
            )
            return primary_result
--- a/ocr/app/engines/paddle_engine.py
+++ b/ocr/app/engines/paddle_engine.py
@@ -0,0 +1,157 @@
 """PaddleOCR engine wrapper using PP-OCRv4 models."""
 import io
 import logging
 from typing import Any
 from app.engines.base_engine import (
    EngineProcessingError,
    EngineUnavailableError,
    OcrConfig,
    OcrEngine,
    OcrEngineResult,
    WordBox,
 )
 logger = logging.getLogger(__name__)
 class PaddleOcrEngine(OcrEngine):
    """PaddleOCR PP-OCRv4 engine with angle classification, CPU-only."""
    def __init__(self) -> None:
        self._ocr: Any | None = None
    @property
    def name(self) -> str:
        return "paddleocr"
    def _get_ocr(self) -> Any:
        """Lazy-initialize PaddleOCR instance on first use."""
        if self._ocr is not None:
            return self._ocr
        try:
            from paddleocr import PaddleOCR  # type: ignore[import-untyped]
            self._ocr = PaddleOCR(
                ocr_version="PP-OCRv4",
                use_textline_orientation=True,
                lang="en",
                device="cpu",
                enable_mkldnn=False,
            )
            logger.info("PaddleOCR PP-OCRv4 initialized (CPU, textline_orientation=True)")
            return self._ocr
        except ImportError as exc:
            raise EngineUnavailableError(
                "paddleocr is not installed. "
                "Install with: pip install paddlepaddle paddleocr"
            ) from exc
        except Exception as exc:
            raise EngineUnavailableError(
                f"Failed to initialize PaddleOCR: {exc}"
            ) from exc
    def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
        """Run PaddleOCR on image bytes.
        PaddleOCR v3.x ``predict()`` returns an iterator of result objects.
        Each result's ``.json`` property returns a dict.  The OCR fields
        (``dt_polys``, ``rec_texts``, ``rec_scores``) may be at the top
        level or nested under a ``"res"`` key depending on the version.
        """
        ocr = self._get_ocr()
        try:
            import numpy as np  # type: ignore[import-untyped]
            from PIL import Image
            image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
            img_array = np.array(image)
            results = list(ocr.predict(img_array))
            if not results:
                return OcrEngineResult(
                    text="",
                    confidence=0.0,
                    word_boxes=[],
                    engine_name=self.name,
                )
            raw = results[0].json
            # Unwrap nested "res" key if present (save_to_json format)
            res = raw.get("res", raw) if isinstance(raw, dict) else raw
            logger.debug(
                "PaddleOCR result keys: %s",
                list(res.keys()) if isinstance(res, dict) else type(res).__name__,
            )
            dt_polys = res.get("dt_polys", [])
            rec_texts = res.get("rec_texts", [])
            rec_scores = res.get("rec_scores", [])
            if not rec_texts:
                return OcrEngineResult(
                    text="",
                    confidence=0.0,
                    word_boxes=[],
                    engine_name=self.name,
                )
            word_boxes: list[WordBox] = []
            texts: list[str] = []
            confidences: list[float] = []
            for i, text in enumerate(rec_texts):
                conf = float(rec_scores[i]) if i < len(rec_scores) else 0.0
                # Apply character whitelist filter if configured
                if config.char_whitelist:
                    allowed = set(config.char_whitelist)
                    text = "".join(ch for ch in text if ch in allowed)
                if not text.strip():
                    continue
                # Convert quadrilateral polygon to bounding box
                x_min, y_min, width, height = 0, 0, 0, 0
                if i < len(dt_polys):
                    poly = dt_polys[i]
                    xs = [pt[0] for pt in poly]
                    ys = [pt[1] for pt in poly]
                    x_min, y_min = int(min(xs)), int(min(ys))
                    x_max, y_max = int(max(xs)), int(max(ys))
                    width = x_max - x_min
                    height = y_max - y_min
                word_boxes.append(
                    WordBox(
                        text=text.strip(),
                        confidence=conf,
                        x=x_min,
                        y=y_min,
                        width=width,
                        height=height,
                    )
                )
                texts.append(text.strip())
                confidences.append(conf)
            combined_text = " ".join(texts)
            avg_confidence = (
                sum(confidences) / len(confidences) if confidences else 0.0
            )
            return OcrEngineResult(
                text=combined_text,
                confidence=avg_confidence,
                word_boxes=word_boxes,
                engine_name=self.name,
            )
        except (EngineUnavailableError, EngineProcessingError):
            raise
        except Exception as exc:
            raise EngineProcessingError(
                f"PaddleOCR recognition failed: {exc}"
            ) from exc
--- a/ocr/app/extractors/manual_extractor.py
+++ b/ocr/app/extractors/manual_extractor.py
@@ -5,9 +5,9 @@ import time
 from dataclasses import dataclass, field
 from typing import Callable, Optional
 import pytesseract
 from PIL import Image
 from app.engines import create_engine, OcrConfig
 from app.preprocessors.pdf_preprocessor import pdf_preprocessor, PdfInfo
 from app.table_extraction.detector import table_detector, DetectedTable
 from app.table_extraction.parser import table_parser, ParsedScheduleRow
@@ -243,8 +243,9 @@ class ManualExtractor:
        # OCR the full page
        try:
-            image = Image.open(io.BytesIO(image_bytes))
+            engine = create_engine()
-            ocr_text = pytesseract.image_to_string(image)
+            ocr_result = engine.recognize(image_bytes, OcrConfig())
            ocr_text = ocr_result.text
            # Mark tables as maintenance if page contains maintenance keywords
            for table in detected_tables:
@@ -358,8 +359,9 @@ class ManualExtractor:
            if not text and first_page.image_bytes:
                # OCR first page
-                image = Image.open(io.BytesIO(first_page.image_bytes))
+                engine = create_engine()
-                text = pytesseract.image_to_string(image)
+                ocr_result = engine.recognize(first_page.image_bytes, OcrConfig())
                text = ocr_result.text
            if text:
                return self._parse_vehicle_from_text(text)
--- a/ocr/app/extractors/receipt_extractor.py
+++ b/ocr/app/extractors/receipt_extractor.py
@@ -1,16 +1,13 @@
 """Receipt-specific OCR extractor with field extraction."""
 import io
 import logging
 import time
 from dataclasses import dataclass, field
 from typing import Any, Optional
 import magic
 import pytesseract
 from PIL import Image
 from pillow_heif import register_heif_opener
-from app.config import settings
+from app.engines import OcrConfig, create_engine
 from app.extractors.base import BaseExtractor
 from app.preprocessors.receipt_preprocessor import receipt_preprocessor
 from app.patterns import currency_matcher, date_matcher, fuel_matcher
@@ -53,8 +50,8 @@ class ReceiptExtractor(BaseExtractor):
    }
    def __init__(self) -> None:
-        """Initialize receipt extractor."""
+        """Initialize receipt extractor with engine from factory."""
-        pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd
+        self._engine = create_engine()
    def extract(
        self,
@@ -150,26 +147,19 @@ class ReceiptExtractor(BaseExtractor):
        detected = mime.from_buffer(file_bytes)
        return detected or "application/octet-stream"
-    def _perform_ocr(self, image_bytes: bytes, psm: int = 6) -> str:
+    def _perform_ocr(self, image_bytes: bytes) -> str:
        """
-        Perform OCR on preprocessed image.
+        Perform OCR on preprocessed image via engine abstraction.
        Args:
            image_bytes: Preprocessed image bytes
            psm: Tesseract page segmentation mode
                 4 = Assume single column of text
                 6 = Uniform block of text
        Returns:
            Raw OCR text
        """
-        image = Image.open(io.BytesIO(image_bytes))
+        config = OcrConfig()
-
+        result = self._engine.recognize(image_bytes, config)
-        # Configure Tesseract for receipt OCR
+        return result.text
        # PSM 4 works well for columnar receipt text
        config = f"--psm {psm}"
        return pytesseract.image_to_string(image, config=config)
    def _detect_receipt_type(self, text: str) -> str:
        """
--- a/ocr/app/extractors/vin_extractor.py
+++ b/ocr/app/extractors/vin_extractor.py
@@ -1,5 +1,4 @@
 """VIN-specific OCR extractor with preprocessing and validation."""
 import io
 import logging
 import os
 import time
@@ -8,11 +7,10 @@ from datetime import datetime
 from typing import Optional
 import magic
 import pytesseract
 from PIL import Image
 from pillow_heif import register_heif_opener
 from app.config import settings
 from app.engines import OcrConfig, create_engine
 from app.extractors.base import BaseExtractor
 from app.preprocessors.vin_preprocessor import vin_preprocessor, BoundingBox
 from app.validators.vin_validator import vin_validator
@@ -56,15 +54,15 @@ class VinExtractor(BaseExtractor):
        "image/heif",
    }
-    # VIN character whitelist for Tesseract
+    # VIN character whitelist (passed to engine for post-OCR filtering)
    VIN_WHITELIST = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
    # Fixed debug output directory (inside container)
    DEBUG_DIR = "/tmp/vin-debug"
    def __init__(self) -> None:
-        """Initialize VIN extractor."""
+        """Initialize VIN extractor with engine from factory."""
-        pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd
+        self._engine = create_engine()
        self._debug = settings.log_level.upper() == "DEBUG"
    def _save_debug_image(self, session_dir: str, name: str, data: bytes) -> None:
@@ -135,21 +133,21 @@ class VinExtractor(BaseExtractor):
            # Perform OCR with VIN-optimized settings
            raw_text, word_confidences = self._perform_ocr(preprocessed_bytes)
-            logger.debug("PSM 6 raw text: '%s'", raw_text)
+            logger.debug("Primary OCR raw text: '%s'", raw_text)
-            logger.debug("PSM 6 word confidences: %s", word_confidences)
+            logger.debug("Primary OCR word confidences: %s", word_confidences)
            # Extract VIN candidates from raw text
            candidates = vin_validator.extract_candidates(raw_text)
-            logger.debug("PSM 6 candidates: %s", candidates)
+            logger.debug("Primary OCR candidates: %s", candidates)
            if not candidates:
-                # No VIN candidates found - try with different PSM modes
+                # No VIN candidates found - try alternate OCR configurations
                candidates = self._try_alternate_ocr(preprocessed_bytes)
            if not candidates:
-                # Try grayscale-only (no thresholding) — the Tesseract
+                # Try grayscale-only (no thresholding) — OCR engines often
-                # LSTM engine often performs better on non-binarized input
+                # perform better on non-binarized input because they do
-                # because it does its own internal preprocessing.
+                # their own internal preprocessing.
                gray_result = vin_preprocessor.preprocess(
                    image_bytes, apply_threshold=False
                )
@@ -166,9 +164,9 @@ class VinExtractor(BaseExtractor):
                raw_text, word_confidences = self._perform_ocr(
                    gray_result.image_bytes
                )
-                logger.debug("Gray PSM 6 raw text: '%s'", raw_text)
+                logger.debug("Gray primary raw text: '%s'", raw_text)
                candidates = vin_validator.extract_candidates(raw_text)
-                logger.debug("Gray PSM 6 candidates: %s", candidates)
+                logger.debug("Gray primary candidates: %s", candidates)
                if not candidates:
                    candidates = self._try_alternate_ocr(
                        gray_result.image_bytes, prefix="Gray"
@@ -188,9 +186,9 @@ class VinExtractor(BaseExtractor):
                    )
                raw_text, word_confidences = self._perform_ocr(otsu_result.image_bytes)
-                logger.debug("Otsu PSM 6 raw text: '%s'", raw_text)
+                logger.debug("Otsu primary raw text: '%s'", raw_text)
                candidates = vin_validator.extract_candidates(raw_text)
-                logger.debug("Otsu PSM 6 candidates: %s", candidates)
+                logger.debug("Otsu primary candidates: %s", candidates)
                if not candidates:
                    candidates = self._try_alternate_ocr(
                        otsu_result.image_bytes, prefix="Otsu"
@@ -280,52 +278,31 @@ class VinExtractor(BaseExtractor):
        return detected or "application/octet-stream"
    def _perform_ocr(
-        self, image_bytes: bytes, psm: int = 6
+        self,
        image_bytes: bytes,
        single_line: bool = False,
        single_word: bool = False,
    ) -> tuple[str, list[float]]:
        """
-        Perform OCR with VIN-optimized settings.
+        Perform OCR with VIN-optimized settings via engine abstraction.
        Args:
            image_bytes: Preprocessed image bytes
-            psm: Tesseract page segmentation mode
+            single_line: Treat image as a single text line
-                 6 = Uniform block of text
+            single_word: Treat image as a single word
                 7 = Single text line
                 8 = Single word
        Returns:
            Tuple of (raw_text, word_confidences)
        """
-        image = Image.open(io.BytesIO(image_bytes))
+        config = OcrConfig(
-
+            char_whitelist=self.VIN_WHITELIST,
-        # Configure Tesseract for VIN extraction
+            single_line=single_line,
-        # OEM 1 = LSTM neural network engine (best accuracy)
+            single_word=single_word,
-        # NOTE: tessedit_char_whitelist does NOT work with OEM 1 (LSTM).
+            use_angle_cls=True,
        # Using it causes empty/erratic output.  Character filtering is
        # handled post-OCR by vin_validator.correct_ocr_errors() instead.
        config = (
            f"--psm {psm} "
            f"--oem 1 "
            f"-c load_system_dawg=false "
            f"-c load_freq_dawg=false"
        )
-
+        result = self._engine.recognize(image_bytes, config)
-        # Get detailed OCR data
+        word_confidences = [wb.confidence for wb in result.word_boxes]
-        ocr_data = pytesseract.image_to_data(
+        return result.text, word_confidences
            image, config=config, output_type=pytesseract.Output.DICT
        )
        # Extract words and confidences
        words = []
        confidences = []
        for i, text in enumerate(ocr_data["text"]):
            conf = int(ocr_data["conf"][i])
            if text.strip() and conf > 0:
                words.append(text.strip())
                confidences.append(conf / 100.0)
        raw_text = " ".join(words)
        return raw_text, confidences
    def _try_alternate_ocr(
        self,
@@ -335,21 +312,25 @@ class VinExtractor(BaseExtractor):
        """
        Try alternate OCR configurations when initial extraction fails.
-        PSM modes tried in order:
+        Modes tried:
-            7  - Single text line
+            single-line - Treat as a single text line
-            8  - Single word
+            single-word - Treat as a single word
-            11 - Sparse text (finds text in any order, good for angled photos)
+
-            13 - Raw line (no Tesseract heuristics, good for clean VIN plates)
+        PaddleOCR angle classification handles rotated/angled text
        inherently, so no PSM mode fallbacks are needed.
        Returns:
            List of VIN candidates
        """
        tag = f"{prefix} " if prefix else ""
-        for psm in (7, 8, 11, 13):
+        for mode_name, kwargs in [
-            raw_text, _ = self._perform_ocr(image_bytes, psm=psm)
+            ("single-line", {"single_line": True}),
-            logger.debug("%sPSM %d raw text: '%s'", tag, psm, raw_text)
+            ("single-word", {"single_word": True}),
        ]:
            raw_text, _ = self._perform_ocr(image_bytes, **kwargs)
            logger.debug("%s%s raw text: '%s'", tag, mode_name, raw_text)
            candidates = vin_validator.extract_candidates(raw_text)
-            logger.debug("%sPSM %d candidates: %s", tag, psm, candidates)
+            logger.debug("%s%s candidates: %s", tag, mode_name, candidates)
            if candidates:
                return candidates
--- a/ocr/app/preprocessors/vin_preprocessor.py
+++ b/ocr/app/preprocessors/vin_preprocessor.py
@@ -93,7 +93,7 @@ class VinPreprocessor:
            gray = cv_image
        steps_applied.append("grayscale")
-        # Upscale small images for better OCR (Tesseract needs ~300 DPI)
+        # Upscale small images for better OCR (~300 DPI recommended)
        gray = self._ensure_minimum_resolution(gray)
        steps_applied.append("resolution_check")
@@ -129,14 +129,14 @@ class VinPreprocessor:
        )
    # Minimum width in pixels for reliable VIN OCR.
-    # A 17-char VIN needs ~30px per character for Tesseract accuracy.
+    # A 17-char VIN needs ~30px per character for reliable OCR accuracy.
    MIN_WIDTH_FOR_VIN = 600
    def _ensure_minimum_resolution(self, image: np.ndarray) -> np.ndarray:
        """
        Upscale image if too small for reliable OCR.
-        Tesseract works best at ~300 DPI. Mobile photos of VINs may have
+        OCR works best at ~300 DPI. Mobile photos of VINs may have
        the text occupy only a small portion of the frame, resulting in
        low effective resolution for the VIN characters.
        """
@@ -160,7 +160,7 @@ class VinPreprocessor:
        Colored backgrounds have a low min value (e.g. green sticker:
        min(130,230,150) = 130) → inverted to 125 (medium gray).
-        The inversion ensures Tesseract always receives dark-text-on-
+        The inversion ensures the OCR engine always receives dark-text-on-
        light-background, which is the polarity it expects.
        """
        b_channel, g_channel, r_channel = cv2.split(bgr_image)
@@ -168,8 +168,8 @@ class VinPreprocessor:
        min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)
        # Invert so white text (min=255) becomes black (0) and colored
-        # backgrounds (min~130) become lighter gray (~125).  Tesseract
+        # backgrounds (min~130) become lighter gray (~125).  OCR engines
-        # expects dark text on light background.
+        # expect dark text on light background.
        inverted = cv2.bitwise_not(min_channel)
        gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
--- a/ocr/app/services/ocr_service.py
+++ b/ocr/app/services/ocr_service.py
@@ -1,15 +1,14 @@
-"""Core OCR service using Tesseract with HEIC support."""
+"""Core OCR service with HEIC support, using pluggable engine abstraction."""
 import io
 import logging
 import time
 from typing import Optional
 import magic
 import pytesseract
 from PIL import Image
 from pillow_heif import register_heif_opener
-from app.config import settings
+from app.engines import OcrConfig, create_engine
 from app.models import DocumentType, ExtractedField, OcrResponse
 from app.services.preprocessor import preprocessor
@@ -32,8 +31,8 @@ class OcrService:
    }
    def __init__(self) -> None:
-        """Initialize OCR service."""
+        """Initialize OCR service with engine from factory."""
-        pytesseract.pytesseract.tesseract_cmd = settings.tesseract_cmd
+        self._engine = create_engine()
    def extract(
        self,
@@ -86,14 +85,11 @@ class OcrService:
                    file_bytes, deskew=True, denoise=True
                )
-            # Perform OCR
+            # Perform OCR via engine abstraction
-            image = Image.open(io.BytesIO(file_bytes))
+            config = OcrConfig()
-            ocr_data = pytesseract.image_to_data(
+            result = self._engine.recognize(file_bytes, config)
-                image, output_type=pytesseract.Output.DICT
+            raw_text = result.text
-            )
+            confidence = result.confidence
            # Extract text and calculate confidence
            raw_text, confidence = self._process_ocr_data(ocr_data)
            # Detect document type from content
            document_type = self._detect_document_type(raw_text)
@@ -160,26 +156,6 @@ class OcrService:
        return b""
    def _process_ocr_data(
        self, ocr_data: dict
    ) -> tuple[str, float]:
        """Process Tesseract output to extract text and confidence."""
        words = []
        confidences = []
        for i, text in enumerate(ocr_data["text"]):
            # Filter out empty strings and low-confidence results
            conf = int(ocr_data["conf"][i])
            if text.strip() and conf > 0:
                words.append(text)
                confidences.append(conf)
        raw_text = " ".join(words)
        avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
        # Normalize confidence to 0-1 range (Tesseract returns 0-100)
        return raw_text, avg_confidence / 100.0
    def _detect_document_type(self, text: str) -> DocumentType:
        """Detect document type from extracted text content."""
        text_lower = text.lower()
--- a/ocr/app/table_extraction/detector.py
+++ b/ocr/app/table_extraction/detector.py
@@ -312,7 +312,7 @@ class TableDetector:
        Returns:
            2D list of cell contents
        """
-        # This would use Tesseract on the cropped region
+        # This would use OCR on the cropped region
        # For now, return empty - actual OCR will be done in manual_extractor
        logger.debug(f"Table region: ({table.x}, {table.y}) {table.width}x{table.height}")
        return []
--- a/ocr/app/validators/vin_validator.py
+++ b/ocr/app/validators/vin_validator.py
@@ -226,7 +226,7 @@ class VinValidator:
        Uses two strategies:
        1. Find continuous 11-20 char alphanumeric runs (handles intact VINs)
        2. Concatenate adjacent short fragments separated by spaces/dashes
-           (handles Tesseract fragmenting VINs into multiple words)
+           (handles OCR fragmenting VINs into multiple words)
        Args:
            text: Raw OCR text
--- a/ocr/requirements.txt
+++ b/ocr/requirements.txt
@@ -14,7 +14,9 @@ opencv-python-headless>=4.8.0
 numpy>=1.24.0
 # OCR Engines
-pytesseract>=0.3.10
+paddlepaddle>=2.6.0
 paddleocr>=2.8.0
 google-cloud-vision>=3.7.0
 # PDF Processing
 PyMuPDF>=1.23.0
--- a/ocr/tests/test_engine_abstraction.py
+++ b/ocr/tests/test_engine_abstraction.py
@@ -0,0 +1,626 @@
 """Tests for OCR engine abstraction layer.
 Covers: base types, exception hierarchy, PaddleOcrEngine,
 CloudEngine, HybridEngine, and engine_factory.
 """
 import io
 from unittest.mock import MagicMock, patch
 import pytest
 from PIL import Image
 from app.engines.base_engine import (
    EngineError,
    EngineProcessingError,
    EngineUnavailableError,
    OcrConfig,
    OcrEngine,
    OcrEngineResult,
    WordBox,
 )
 # --- Helpers ---
 def _create_test_image_bytes() -> bytes:
    """Create minimal PNG image bytes for engine testing."""
    img = Image.new("RGB", (100, 50), (255, 255, 255))
    buf = io.BytesIO()
    img.save(buf, format="PNG")
    return buf.getvalue()
 def _make_result(
    text: str, confidence: float, engine_name: str
 ) -> OcrEngineResult:
    """Create a minimal OcrEngineResult for testing."""
    return OcrEngineResult(
        text=text, confidence=confidence, word_boxes=[], engine_name=engine_name
    )
 def _mock_paddle_result(
    dt_polys: list, rec_texts: list[str], rec_scores: list[float]
 ) -> MagicMock:
    """Create a mock PaddleOCR v3.x predict() result object.
    Wraps data under ``"res"`` key to match save_to_json format.
    """
    result = MagicMock()
    result.json = {
        "res": {
            "dt_polys": dt_polys,
            "rec_texts": rec_texts,
            "rec_scores": rec_scores,
        }
    }
    return result
 # ---------------------------------------------------------------------------
 # Exception hierarchy
 # ---------------------------------------------------------------------------
 class TestExceptionHierarchy:
    """Engine errors form a proper hierarchy under EngineError."""
    def test_unavailable_is_engine_error(self) -> None:
        assert issubclass(EngineUnavailableError, EngineError)
    def test_processing_is_engine_error(self) -> None:
        assert issubclass(EngineProcessingError, EngineError)
    def test_engine_error_is_exception(self) -> None:
        assert issubclass(EngineError, Exception)
    def test_catch_base_catches_subtypes(self) -> None:
        with pytest.raises(EngineError):
            raise EngineUnavailableError("not installed")
        with pytest.raises(EngineError):
            raise EngineProcessingError("OCR failed")
 # ---------------------------------------------------------------------------
 # Data types
 # ---------------------------------------------------------------------------
 class TestWordBox:
    def test_default_positions(self) -> None:
        wb = WordBox(text="VIN", confidence=0.95)
        assert wb.x == 0
        assert wb.y == 0
        assert wb.width == 0
        assert wb.height == 0
    def test_all_fields(self) -> None:
        wb = WordBox(text="ABC", confidence=0.88, x=10, y=20, width=100, height=30)
        assert wb.text == "ABC"
        assert wb.confidence == 0.88
        assert wb.x == 10
        assert wb.width == 100
 class TestOcrConfig:
    def test_defaults(self) -> None:
        config = OcrConfig()
        assert config.char_whitelist is None
        assert config.single_line is False
        assert config.single_word is False
        assert config.use_angle_cls is True
        assert config.hints == {}
    def test_vin_whitelist_excludes_ioq(self) -> None:
        whitelist = "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
        config = OcrConfig(char_whitelist=whitelist)
        assert "I" not in config.char_whitelist
        assert "O" not in config.char_whitelist
        assert "Q" not in config.char_whitelist
    def test_hints_are_independent_across_instances(self) -> None:
        c1 = OcrConfig()
        c2 = OcrConfig()
        c1.hints["psm"] = 7
        assert "psm" not in c2.hints
 class TestOcrEngineResult:
    def test_construction(self) -> None:
        result = OcrEngineResult(
            text="1HGBH41JXMN109186",
            confidence=0.94,
            word_boxes=[WordBox(text="1HGBH41JXMN109186", confidence=0.94)],
            engine_name="paddleocr",
        )
        assert result.text == "1HGBH41JXMN109186"
        assert result.confidence == 0.94
        assert len(result.word_boxes) == 1
        assert result.engine_name == "paddleocr"
    def test_empty_result(self) -> None:
        result = OcrEngineResult(
            text="", confidence=0.0, word_boxes=[], engine_name="paddleocr"
        )
        assert result.text == ""
        assert result.word_boxes == []
 # ---------------------------------------------------------------------------
 # OcrEngine ABC
 # ---------------------------------------------------------------------------
 class TestOcrEngineABC:
    def test_cannot_instantiate_directly(self) -> None:
        with pytest.raises(TypeError):
            OcrEngine()  # type: ignore[abstract]
    def test_concrete_subclass_works(self) -> None:
        class StubEngine(OcrEngine):
            @property
            def name(self) -> str:
                return "stub"
            def recognize(
                self, image_bytes: bytes, config: OcrConfig
            ) -> OcrEngineResult:
                return OcrEngineResult(
                    text="ok", confidence=1.0, word_boxes=[], engine_name="stub"
                )
        engine = StubEngine()
        assert engine.name == "stub"
        result = engine.recognize(b"", OcrConfig())
        assert result.text == "ok"
 # ---------------------------------------------------------------------------
 # PaddleOcrEngine
 # ---------------------------------------------------------------------------
 class TestPaddleOcrEngine:
    def test_name(self) -> None:
        from app.engines.paddle_engine import PaddleOcrEngine
        engine = PaddleOcrEngine()
        assert engine.name == "paddleocr"
    def test_lazy_init_not_loaded_at_construction(self) -> None:
        from app.engines.paddle_engine import PaddleOcrEngine
        engine = PaddleOcrEngine()
        assert engine._ocr is None
    def test_recognize_empty_results(self) -> None:
        from app.engines.paddle_engine import PaddleOcrEngine
        engine = PaddleOcrEngine()
        mock_ocr = MagicMock()
        mock_ocr.predict.return_value = iter([
            _mock_paddle_result(dt_polys=[], rec_texts=[], rec_scores=[])
        ])
        engine._ocr = mock_ocr
        result = engine.recognize(_create_test_image_bytes(), OcrConfig())
        assert result.text == ""
        assert result.confidence == 0.0
        assert result.word_boxes == []
        assert result.engine_name == "paddleocr"
    def test_recognize_with_results(self) -> None:
        from app.engines.paddle_engine import PaddleOcrEngine
        engine = PaddleOcrEngine()
        mock_ocr = MagicMock()
        mock_ocr.predict.return_value = iter([
            _mock_paddle_result(
                dt_polys=[
                    [[10, 20], [110, 20], [110, 50], [10, 50]],
                    [[10, 60], [110, 60], [110, 90], [10, 90]],
                ],
                rec_texts=["HELLO", "WORLD"],
                rec_scores=[0.95, 0.88],
            )
        ])
        engine._ocr = mock_ocr
        result = engine.recognize(_create_test_image_bytes(), OcrConfig())
        assert result.text == "HELLO WORLD"
        assert abs(result.confidence - 0.915) < 0.01
        assert len(result.word_boxes) == 2
        assert result.word_boxes[0].text == "HELLO"
        assert result.word_boxes[0].confidence == 0.95
        assert result.word_boxes[1].text == "WORLD"
        assert result.engine_name == "paddleocr"
    def test_recognize_whitelist_filters_characters(self) -> None:
        from app.engines.paddle_engine import PaddleOcrEngine
        engine = PaddleOcrEngine()
        mock_ocr = MagicMock()
        mock_ocr.predict.return_value = iter([
            _mock_paddle_result(
                dt_polys=[[[0, 0], [100, 0], [100, 30], [0, 30]]],
                rec_texts=["1HG-BH4!"],
                rec_scores=[0.9],
            )
        ])
        engine._ocr = mock_ocr
        config = OcrConfig(char_whitelist="ABCDEFGHJKLMNPRSTUVWXYZ0123456789")
        result = engine.recognize(_create_test_image_bytes(), config)
        assert "-" not in result.text
        assert "!" not in result.text
        assert result.word_boxes[0].text == "1HGBH4"
    def test_recognize_quadrilateral_to_bounding_box(self) -> None:
        from app.engines.paddle_engine import PaddleOcrEngine
        engine = PaddleOcrEngine()
        mock_ocr = MagicMock()
        # Slightly rotated quad: min x=8, min y=20, max x=110, max y=55
        mock_ocr.predict.return_value = iter([
            _mock_paddle_result(
                dt_polys=[[[10, 20], [110, 25], [108, 55], [8, 50]]],
                rec_texts=["TEXT"],
                rec_scores=[0.9],
            )
        ])
        engine._ocr = mock_ocr
        result = engine.recognize(_create_test_image_bytes(), OcrConfig())
        wb = result.word_boxes[0]
        assert wb.x == 8
        assert wb.y == 20
        assert wb.width == 102  # 110 - 8
        assert wb.height == 35  # 55 - 20
    def test_recognize_skips_empty_after_whitelist(self) -> None:
        """Text consisting only of non-whitelisted characters is skipped."""
        from app.engines.paddle_engine import PaddleOcrEngine
        engine = PaddleOcrEngine()
        mock_ocr = MagicMock()
        mock_ocr.predict.return_value = iter([
            _mock_paddle_result(
                dt_polys=[[[0, 0], [50, 0], [50, 20], [0, 20]]],
                rec_texts=["---"],
                rec_scores=[0.9],
            )
        ])
        engine._ocr = mock_ocr
        config = OcrConfig(char_whitelist="ABC")
        result = engine.recognize(_create_test_image_bytes(), config)
        assert result.text == ""
        assert result.word_boxes == []
        assert result.confidence == 0.0
    def test_import_error_raises_unavailable(self) -> None:
        from app.engines.paddle_engine import PaddleOcrEngine
        engine = PaddleOcrEngine()
        engine._ocr = None
        with patch.dict("sys.modules", {"paddleocr": None}):
            with patch(
                "app.engines.paddle_engine.importlib.import_module",
                side_effect=ImportError("No module"),
            ):
                # Force re-import by removing cached paddleocr
                original_import = __builtins__.__import__ if hasattr(__builtins__, '__import__') else __import__
                def mock_import(name, *args, **kwargs):
                    if name == "paddleocr":
                        raise ImportError("No module named 'paddleocr'")
                    return original_import(name, *args, **kwargs)
                with patch("builtins.__import__", side_effect=mock_import):
                    with pytest.raises(EngineUnavailableError, match="paddleocr"):
                        engine._get_ocr()
    def test_processing_error_on_exception(self) -> None:
        from app.engines.paddle_engine import PaddleOcrEngine
        engine = PaddleOcrEngine()
        mock_ocr = MagicMock()
        mock_ocr.predict.side_effect = RuntimeError("OCR crashed")
        engine._ocr = mock_ocr
        with pytest.raises(EngineProcessingError, match="PaddleOCR recognition failed"):
            engine.recognize(_create_test_image_bytes(), OcrConfig())
 # ---------------------------------------------------------------------------
 # CloudEngine
 # ---------------------------------------------------------------------------
 class TestCloudEngine:
    def test_name(self) -> None:
        from app.engines.cloud_engine import CloudEngine
        engine = CloudEngine(key_path="/fake/path.json")
        assert engine.name == "google_vision"
    def test_lazy_init_not_loaded_at_construction(self) -> None:
        from app.engines.cloud_engine import CloudEngine
        engine = CloudEngine(key_path="/fake/path.json")
        assert engine._client is None
    def test_missing_key_file_raises_unavailable(self) -> None:
        from app.engines.cloud_engine import CloudEngine
        engine = CloudEngine(key_path="/nonexistent/key.json")
        with pytest.raises(EngineUnavailableError, match="key not found"):
            engine._get_client()
    @patch("os.path.isfile", return_value=True)
    def test_missing_library_raises_unavailable(self, _mock_isfile: MagicMock) -> None:
        from app.engines.cloud_engine import CloudEngine
        engine = CloudEngine(key_path="/fake/key.json")
        def mock_import(name, *args, **kwargs):
            if "google.cloud" in name:
                raise ImportError("No module named 'google.cloud'")
            return __import__(name, *args, **kwargs)
        with patch("builtins.__import__", side_effect=mock_import):
            with pytest.raises(EngineUnavailableError, match="google-cloud-vision"):
                engine._get_client()
    def test_recognize_empty_annotations(self) -> None:
        from app.engines.cloud_engine import CloudEngine
        engine = CloudEngine(key_path="/fake/key.json")
        mock_client = MagicMock()
        mock_response = MagicMock()
        mock_response.error.message = ""
        mock_response.text_annotations = []
        mock_client.text_detection.return_value = mock_response
        engine._client = mock_client
        # Mock the google.cloud.vision import inside recognize()
        mock_vision = MagicMock()
        with patch.dict("sys.modules", {"google.cloud.vision": mock_vision, "google.cloud": MagicMock(), "google": MagicMock()}):
            result = engine.recognize(b"fake_image", OcrConfig())
        assert result.text == ""
        assert result.confidence == 0.0
        assert result.engine_name == "google_vision"
    def test_recognize_api_error_raises_processing_error(self) -> None:
        from app.engines.cloud_engine import CloudEngine
        engine = CloudEngine(key_path="/fake/key.json")
        mock_client = MagicMock()
        mock_response = MagicMock()
        mock_response.error.message = "API quota exceeded"
        mock_client.text_detection.return_value = mock_response
        engine._client = mock_client
        mock_vision = MagicMock()
        with patch.dict("sys.modules", {"google.cloud.vision": mock_vision, "google.cloud": MagicMock(), "google": MagicMock()}):
            with pytest.raises(EngineProcessingError, match="API quota exceeded"):
                engine.recognize(b"fake_image", OcrConfig())
 # ---------------------------------------------------------------------------
 # HybridEngine
 # ---------------------------------------------------------------------------
 class TestHybridEngine:
    def test_name_with_fallback(self) -> None:
        from app.engines.hybrid_engine import HybridEngine
        primary = MagicMock(spec=OcrEngine)
        primary.name = "paddleocr"
        fallback = MagicMock(spec=OcrEngine)
        fallback.name = "google_vision"
        engine = HybridEngine(primary=primary, fallback=fallback)
        assert engine.name == "hybrid(paddleocr+google_vision)"
    def test_name_without_fallback(self) -> None:
        from app.engines.hybrid_engine import HybridEngine
        primary = MagicMock(spec=OcrEngine)
        primary.name = "paddleocr"
        engine = HybridEngine(primary=primary)
        assert engine.name == "hybrid(paddleocr+none)"
    def test_high_confidence_skips_fallback(self) -> None:
        from app.engines.hybrid_engine import HybridEngine
        primary = MagicMock(spec=OcrEngine)
        fallback = MagicMock(spec=OcrEngine)
        primary.name = "paddleocr"
        fallback.name = "cloud"
        primary.recognize.return_value = _make_result("VIN123", 0.95, "paddleocr")
        engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
        result = engine.recognize(b"img", OcrConfig())
        assert result.text == "VIN123"
        assert result.engine_name == "paddleocr"
        fallback.recognize.assert_not_called()
    def test_low_confidence_triggers_fallback(self) -> None:
        from app.engines.hybrid_engine import HybridEngine
        primary = MagicMock(spec=OcrEngine)
        fallback = MagicMock(spec=OcrEngine)
        primary.name = "paddleocr"
        fallback.name = "google_vision"
        primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
        fallback.recognize.return_value = _make_result("VIN456", 0.92, "google_vision")
        engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
        result = engine.recognize(b"img", OcrConfig())
        assert result.text == "VIN456"
        assert result.engine_name == "google_vision"
        fallback.recognize.assert_called_once()
    def test_low_confidence_no_fallback_returns_primary(self) -> None:
        from app.engines.hybrid_engine import HybridEngine
        primary = MagicMock(spec=OcrEngine)
        primary.name = "paddleocr"
        primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
        engine = HybridEngine(primary=primary, fallback=None, threshold=0.6)
        result = engine.recognize(b"img", OcrConfig())
        assert result.text == "VIN123"
    def test_fallback_lower_confidence_returns_primary(self) -> None:
        from app.engines.hybrid_engine import HybridEngine
        primary = MagicMock(spec=OcrEngine)
        fallback = MagicMock(spec=OcrEngine)
        primary.name = "paddleocr"
        fallback.name = "google_vision"
        primary.recognize.return_value = _make_result("VIN123", 0.4, "paddleocr")
        fallback.recognize.return_value = _make_result("VIN456", 0.3, "google_vision")
        engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
        result = engine.recognize(b"img", OcrConfig())
        assert result.text == "VIN123"
    def test_fallback_engine_error_returns_primary(self) -> None:
        from app.engines.hybrid_engine import HybridEngine
        primary = MagicMock(spec=OcrEngine)
        fallback = MagicMock(spec=OcrEngine)
        primary.name = "paddleocr"
        fallback.name = "google_vision"
        primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
        fallback.recognize.side_effect = EngineUnavailableError("key missing")
        engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
        result = engine.recognize(b"img", OcrConfig())
        assert result.text == "VIN123"
    def test_fallback_unexpected_error_returns_primary(self) -> None:
        from app.engines.hybrid_engine import HybridEngine
        primary = MagicMock(spec=OcrEngine)
        fallback = MagicMock(spec=OcrEngine)
        primary.name = "paddleocr"
        fallback.name = "google_vision"
        primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
        fallback.recognize.side_effect = RuntimeError("network error")
        engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
        result = engine.recognize(b"img", OcrConfig())
        assert result.text == "VIN123"
    @patch("app.engines.hybrid_engine.time")
    def test_fallback_timeout_returns_primary(self, mock_time: MagicMock) -> None:
        from app.engines.hybrid_engine import HybridEngine
        primary = MagicMock(spec=OcrEngine)
        fallback = MagicMock(spec=OcrEngine)
        primary.name = "paddleocr"
        fallback.name = "google_vision"
        primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
        fallback.recognize.return_value = _make_result("VIN456", 0.92, "google_vision")
        # Simulate 6-second delay (exceeds 5s limit)
        mock_time.monotonic.side_effect = [0.0, 6.0]
        engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
        result = engine.recognize(b"img", OcrConfig())
        assert result.text == "VIN123"  # timeout -> use primary
    def test_exact_threshold_skips_fallback(self) -> None:
        """When confidence == threshold, no fallback needed (>= check)."""
        from app.engines.hybrid_engine import HybridEngine
        primary = MagicMock(spec=OcrEngine)
        fallback = MagicMock(spec=OcrEngine)
        primary.name = "paddleocr"
        fallback.name = "cloud"
        primary.recognize.return_value = _make_result("VIN", 0.6, "paddleocr")
        engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
        result = engine.recognize(b"img", OcrConfig())
        assert result.engine_name == "paddleocr"
        fallback.recognize.assert_not_called()
 # ---------------------------------------------------------------------------
 # Engine factory
 # ---------------------------------------------------------------------------
 class TestEngineFactory:
    def test_unknown_engine_raises(self) -> None:
        from app.engines.engine_factory import _create_single_engine
        with pytest.raises(EngineUnavailableError, match="Unknown engine"):
            _create_single_engine("nonexistent")
    @patch("app.engines.engine_factory.settings")
    @patch("app.engines.engine_factory._create_single_engine")
    def test_defaults_to_settings_primary(
        self, mock_create: MagicMock, mock_settings: MagicMock
    ) -> None:
        mock_settings.ocr_primary_engine = "paddleocr"
        mock_settings.ocr_fallback_engine = "none"
        mock_engine = MagicMock(spec=OcrEngine)
        mock_create.return_value = mock_engine
        from app.engines.engine_factory import create_engine
        result = create_engine()
        mock_create.assert_called_once_with("paddleocr")
        assert result == mock_engine
    @patch("app.engines.engine_factory.settings")
    @patch("app.engines.engine_factory._create_single_engine")
    def test_explicit_name_overrides_settings(
        self, mock_create: MagicMock, mock_settings: MagicMock
    ) -> None:
        mock_settings.ocr_fallback_engine = "none"
        mock_engine = MagicMock(spec=OcrEngine)
        mock_create.return_value = mock_engine
        from app.engines.engine_factory import create_engine
        create_engine("google_vision")
        mock_create.assert_called_once_with("google_vision")
    @patch("app.engines.engine_factory.settings")
    @patch("app.engines.engine_factory._create_single_engine")
    def test_creates_hybrid_when_fallback_configured(
        self, mock_create: MagicMock, mock_settings: MagicMock
    ) -> None:
        mock_settings.ocr_primary_engine = "paddleocr"
        mock_settings.ocr_fallback_engine = "google_vision"
        mock_settings.ocr_fallback_threshold = 0.7
        mock_primary = MagicMock(spec=OcrEngine)
        mock_fallback = MagicMock(spec=OcrEngine)
        mock_create.side_effect = [mock_primary, mock_fallback]
        from app.engines.engine_factory import create_engine
        from app.engines.hybrid_engine import HybridEngine
        result = create_engine()
        assert isinstance(result, HybridEngine)
    @patch("app.engines.engine_factory.settings")
    @patch("app.engines.engine_factory._create_single_engine")
    def test_fallback_failure_returns_primary_only(
        self, mock_create: MagicMock, mock_settings: MagicMock
    ) -> None:
        mock_settings.ocr_primary_engine = "paddleocr"
        mock_settings.ocr_fallback_engine = "google_vision"
        mock_settings.ocr_fallback_threshold = 0.6
        mock_primary = MagicMock(spec=OcrEngine)
        mock_create.side_effect = [mock_primary, EngineUnavailableError("no key")]
        from app.engines.engine_factory import create_engine
        result = create_engine()
        assert result == mock_primary
--- a/ocr/tests/test_health.py
+++ b/ocr/tests/test_health.py
@@ -39,14 +39,9 @@ def test_pillow_heif_can_register():
    assert "HEIF" in Image.registered_extensions().values()
-def test_tesseract_available():
+def test_paddleocr_engine_available():
-    """Tesseract OCR is available and can process images."""
+    """PaddleOCR engine can be created."""
-    import pytesseract
+    from app.engines.paddle_engine import PaddleOcrEngine
-    # Create a simple test image with text
+    engine = PaddleOcrEngine()
-    img = Image.new("RGB", (200, 50), color="white")
+    assert engine.name == "paddleocr"
    # Verify pytesseract can call tesseract (will return empty string for blank image)
    result = pytesseract.image_to_string(img)
    # Just verify it doesn't raise an exception - blank image returns empty/whitespace
    assert isinstance(result, str)
--- a/ocr/tests/test_vin_extraction.py
+++ b/ocr/tests/test_vin_extraction.py
@@ -1,11 +1,12 @@
-"""Integration tests for VIN extraction endpoint."""
+"""Integration tests for VIN extraction endpoint and engine integration."""
 import io
 from unittest.mock import patch, MagicMock
 import pytest
 from fastapi.testclient import TestClient
-from PIL import Image, ImageDraw, ImageFont
+from PIL import Image, ImageDraw
 from app.engines.base_engine import OcrConfig, OcrEngineResult, WordBox
 from app.main import app
@@ -240,3 +241,106 @@ class TestVinExtractionContentTypes:
        )
        assert response.status_code == 200
 # ---------------------------------------------------------------------------
 # VIN extractor engine integration tests
 # ---------------------------------------------------------------------------
 class TestVinExtractorEngineIntegration:
    """Tests verifying VinExtractor integrates correctly with engine abstraction."""
    @patch("app.extractors.vin_extractor.create_engine")
    def test_perform_ocr_calls_engine_with_vin_config(
        self, mock_create_engine: MagicMock
    ) -> None:
        """_perform_ocr passes VIN whitelist and angle_cls to engine."""
        from app.extractors.vin_extractor import VinExtractor
        mock_engine = MagicMock()
        mock_engine.recognize.return_value = OcrEngineResult(
            text="1HGBH41JXMN109186",
            confidence=0.94,
            word_boxes=[WordBox(text="1HGBH41JXMN109186", confidence=0.94)],
            engine_name="paddleocr",
        )
        mock_create_engine.return_value = mock_engine
        extractor = VinExtractor()
        text, confidences = extractor._perform_ocr(b"fake_image")
        mock_engine.recognize.assert_called_once()
        call_config = mock_engine.recognize.call_args[0][1]
        assert isinstance(call_config, OcrConfig)
        assert call_config.char_whitelist == VinExtractor.VIN_WHITELIST
        assert call_config.use_angle_cls is True
        assert call_config.single_line is False
        assert call_config.single_word is False
        assert text == "1HGBH41JXMN109186"
        assert confidences == [0.94]
    @patch("app.extractors.vin_extractor.create_engine")
    def test_perform_ocr_single_line_mode(
        self, mock_create_engine: MagicMock
    ) -> None:
        """_perform_ocr passes single_line flag to engine config."""
        from app.extractors.vin_extractor import VinExtractor
        mock_engine = MagicMock()
        mock_engine.recognize.return_value = OcrEngineResult(
            text="VIN123", confidence=0.9, word_boxes=[], engine_name="paddleocr"
        )
        mock_create_engine.return_value = mock_engine
        extractor = VinExtractor()
        extractor._perform_ocr(b"img", single_line=True)
        call_config = mock_engine.recognize.call_args[0][1]
        assert call_config.single_line is True
        assert call_config.single_word is False
    @patch("app.extractors.vin_extractor.create_engine")
    def test_perform_ocr_single_word_mode(
        self, mock_create_engine: MagicMock
    ) -> None:
        """_perform_ocr passes single_word flag to engine config."""
        from app.extractors.vin_extractor import VinExtractor
        mock_engine = MagicMock()
        mock_engine.recognize.return_value = OcrEngineResult(
            text="VIN123", confidence=0.9, word_boxes=[], engine_name="paddleocr"
        )
        mock_create_engine.return_value = mock_engine
        extractor = VinExtractor()
        extractor._perform_ocr(b"img", single_word=True)
        call_config = mock_engine.recognize.call_args[0][1]
        assert call_config.single_word is True
        assert call_config.single_line is False
    def test_calculate_base_confidence_empty_returns_default(self) -> None:
        """Empty word confidences return 0.5 default."""
        from app.extractors.vin_extractor import VinExtractor
        extractor = VinExtractor.__new__(VinExtractor)
        assert extractor._calculate_base_confidence([]) == 0.5
    def test_calculate_base_confidence_weighted_blend(self) -> None:
        """Confidence = 70% average + 30% minimum."""
        from app.extractors.vin_extractor import VinExtractor
        extractor = VinExtractor.__new__(VinExtractor)
        # avg = (0.9 + 0.8) / 2 = 0.85, min = 0.8
        # result = 0.7 * 0.85 + 0.3 * 0.8 = 0.595 + 0.24 = 0.835
        result = extractor._calculate_base_confidence([0.9, 0.8])
        assert abs(result - 0.835) < 0.001
    def test_calculate_base_confidence_single_value(self) -> None:
        """Single confidence value: avg == min, so result equals that value."""
        from app.extractors.vin_extractor import VinExtractor
        extractor = VinExtractor.__new__(VinExtractor)
        result = extractor._calculate_base_confidence([0.92])
        assert abs(result - 0.92) < 0.001
--- a/ocr/tests/test_vin_validator.py
+++ b/ocr/tests/test_vin_validator.py
@@ -165,7 +165,7 @@ class TestVinValidator:
        """Test candidate extraction handles space-fragmented VINs from OCR."""
        validator = VinValidator()
-        # Tesseract often fragments VINs into multiple words
+        # OCR engines sometimes fragment VINs into multiple words
        text = "1HGBH 41JXMN 109186"
        candidates = validator.extract_candidates(text)
--- a/secrets/app/google-vision-key.json.example
+++ b/secrets/app/google-vision-key.json.example
@@ -0,0 +1,18 @@
 {
  "_comment": "Google Vision API service account key for OCR cloud fallback",
  "_instructions": [
    "1. Create a Google Cloud service account with Vision API access",
    "2. Download the JSON key file",
    "3. Save it as secrets/app/google-vision-key.json (gitignored)",
    "4. Uncomment the volume mount in docker-compose.yml",
    "5. Set OCR_FALLBACK_ENGINE=google_vision"
  ],
  "type": "service_account",
  "project_id": "your-project-id",
  "private_key_id": "",
  "private_key": "",
  "client_email": "your-sa@your-project-id.iam.gserviceaccount.com",
  "client_id": "",
  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
  "token_uri": "https://oauth2.googleapis.com/token"
 }