Introduce pluggable OcrEngine ABC with PaddleOCR PP-OCRv4 as primary engine and Tesseract wrapper for backward compatibility. Engine factory reads OCR_PRIMARY_ENGINE config to instantiate the correct engine. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
89 lines
2.2 KiB
Python
89 lines
2.2 KiB
Python
"""OCR engine abstract base class and shared data types."""
|
|
|
|
from abc import ABC, abstractmethod
|
|
from dataclasses import dataclass, field
|
|
from typing import Any
|
|
|
|
|
|
# --- Exception hierarchy ---
|
|
|
|
|
|
class EngineError(Exception):
|
|
"""Base exception for all OCR engine errors."""
|
|
|
|
|
|
class EngineUnavailableError(EngineError):
|
|
"""Raised when an engine cannot be initialized (missing binary, bad config)."""
|
|
|
|
|
|
class EngineProcessingError(EngineError):
|
|
"""Raised when an engine fails to process an image."""
|
|
|
|
|
|
# --- Data types ---
|
|
|
|
|
|
@dataclass
|
|
class WordBox:
|
|
"""A single recognized word with position and confidence."""
|
|
|
|
text: str
|
|
confidence: float # 0.0-1.0
|
|
x: int = 0
|
|
y: int = 0
|
|
width: int = 0
|
|
height: int = 0
|
|
|
|
|
|
@dataclass
|
|
class OcrConfig:
|
|
"""Engine-agnostic OCR configuration.
|
|
|
|
Common fields cover the most frequent needs. Engine-specific
|
|
parameters go into ``hints`` so the interface stays stable.
|
|
"""
|
|
|
|
char_whitelist: str | None = None # e.g. VIN: "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
|
|
single_line: bool = False # Treat image as a single text line
|
|
single_word: bool = False # Treat image as a single word
|
|
use_angle_cls: bool = True # Enable angle classification (PaddleOCR)
|
|
hints: dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
@dataclass
|
|
class OcrEngineResult:
|
|
"""Normalized result returned by every engine implementation."""
|
|
|
|
text: str
|
|
confidence: float # 0.0-1.0
|
|
word_boxes: list[WordBox]
|
|
engine_name: str # "paddleocr", "tesseract", "google_vision"
|
|
|
|
|
|
# --- Abstract base ---
|
|
|
|
|
|
class OcrEngine(ABC):
|
|
"""Abstract base class that all OCR engines must implement."""
|
|
|
|
@abstractmethod
|
|
def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
|
|
"""Run OCR on preprocessed image bytes.
|
|
|
|
Args:
|
|
image_bytes: Raw image bytes (PNG/JPEG).
|
|
config: Engine-agnostic configuration.
|
|
|
|
Returns:
|
|
Normalized OCR result.
|
|
|
|
Raises:
|
|
EngineProcessingError: If recognition fails.
|
|
EngineUnavailableError: If the engine is not ready.
|
|
"""
|
|
|
|
@property
|
|
@abstractmethod
|
|
def name(self) -> str:
|
|
"""Short identifier used in OcrEngineResult.engine_name."""
|