"""OCR engine abstract base class and shared data types.""" from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import Any # --- Exception hierarchy --- class EngineError(Exception): """Base exception for all OCR engine errors.""" class EngineUnavailableError(EngineError): """Raised when an engine cannot be initialized (missing binary, bad config).""" class EngineProcessingError(EngineError): """Raised when an engine fails to process an image.""" # --- Data types --- @dataclass class WordBox: """A single recognized word with position and confidence.""" text: str confidence: float # 0.0-1.0 x: int = 0 y: int = 0 width: int = 0 height: int = 0 @dataclass class OcrConfig: """Engine-agnostic OCR configuration. Common fields cover the most frequent needs. Engine-specific parameters go into ``hints`` so the interface stays stable. """ char_whitelist: str | None = None # e.g. VIN: "ABCDEFGHJKLMNPRSTUVWXYZ0123456789" single_line: bool = False # Treat image as a single text line single_word: bool = False # Treat image as a single word use_angle_cls: bool = True # Enable angle classification (PaddleOCR) hints: dict[str, Any] = field(default_factory=dict) @dataclass class OcrEngineResult: """Normalized result returned by every engine implementation.""" text: str confidence: float # 0.0-1.0 word_boxes: list[WordBox] engine_name: str # "paddleocr", "tesseract", "google_vision" # --- Abstract base --- class OcrEngine(ABC): """Abstract base class that all OCR engines must implement.""" @abstractmethod def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult: """Run OCR on preprocessed image bytes. Args: image_bytes: Raw image bytes (PNG/JPEG). config: Engine-agnostic configuration. Returns: Normalized OCR result. Raises: EngineProcessingError: If recognition fails. EngineUnavailableError: If the engine is not ready. """ @property @abstractmethod def name(self) -> str: """Short identifier used in OcrEngineResult.engine_name."""