feat: add OCR engine abstraction layer (refs #116)

Introduce pluggable OcrEngine ABC with PaddleOCR PP-OCRv4 as primary
engine and Tesseract wrapper for backward compatibility. Engine factory
reads OCR_PRIMARY_ENGINE config to instantiate the correct engine.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-07 10:47:40 -06:00
parent 6b0c18a41c
commit ebc633fb36
7 changed files with 422 additions and 0 deletions

View File

@@ -0,0 +1,88 @@
"""OCR engine abstract base class and shared data types."""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any
# --- Exception hierarchy ---
class EngineError(Exception):
"""Base exception for all OCR engine errors."""
class EngineUnavailableError(EngineError):
"""Raised when an engine cannot be initialized (missing binary, bad config)."""
class EngineProcessingError(EngineError):
"""Raised when an engine fails to process an image."""
# --- Data types ---
@dataclass
class WordBox:
"""A single recognized word with position and confidence."""
text: str
confidence: float # 0.0-1.0
x: int = 0
y: int = 0
width: int = 0
height: int = 0
@dataclass
class OcrConfig:
"""Engine-agnostic OCR configuration.
Common fields cover the most frequent needs. Engine-specific
parameters go into ``hints`` so the interface stays stable.
"""
char_whitelist: str | None = None # e.g. VIN: "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
single_line: bool = False # Treat image as a single text line
single_word: bool = False # Treat image as a single word
use_angle_cls: bool = True # Enable angle classification (PaddleOCR)
hints: dict[str, Any] = field(default_factory=dict)
@dataclass
class OcrEngineResult:
"""Normalized result returned by every engine implementation."""
text: str
confidence: float # 0.0-1.0
word_boxes: list[WordBox]
engine_name: str # "paddleocr", "tesseract", "google_vision"
# --- Abstract base ---
class OcrEngine(ABC):
"""Abstract base class that all OCR engines must implement."""
@abstractmethod
def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
"""Run OCR on preprocessed image bytes.
Args:
image_bytes: Raw image bytes (PNG/JPEG).
config: Engine-agnostic configuration.
Returns:
Normalized OCR result.
Raises:
EngineProcessingError: If recognition fails.
EngineUnavailableError: If the engine is not ready.
"""
@property
@abstractmethod
def name(self) -> str:
"""Short identifier used in OcrEngineResult.engine_name."""