feat: add OCR engine abstraction layer (refs #116)
Introduce pluggable OcrEngine ABC with PaddleOCR PP-OCRv4 as primary engine and Tesseract wrapper for backward compatibility. Engine factory reads OCR_PRIMARY_ENGINE config to instantiate the correct engine. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
88
ocr/app/engines/base_engine.py
Normal file
88
ocr/app/engines/base_engine.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""OCR engine abstract base class and shared data types."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
# --- Exception hierarchy ---
|
||||
|
||||
|
||||
class EngineError(Exception):
|
||||
"""Base exception for all OCR engine errors."""
|
||||
|
||||
|
||||
class EngineUnavailableError(EngineError):
|
||||
"""Raised when an engine cannot be initialized (missing binary, bad config)."""
|
||||
|
||||
|
||||
class EngineProcessingError(EngineError):
|
||||
"""Raised when an engine fails to process an image."""
|
||||
|
||||
|
||||
# --- Data types ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class WordBox:
|
||||
"""A single recognized word with position and confidence."""
|
||||
|
||||
text: str
|
||||
confidence: float # 0.0-1.0
|
||||
x: int = 0
|
||||
y: int = 0
|
||||
width: int = 0
|
||||
height: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class OcrConfig:
|
||||
"""Engine-agnostic OCR configuration.
|
||||
|
||||
Common fields cover the most frequent needs. Engine-specific
|
||||
parameters go into ``hints`` so the interface stays stable.
|
||||
"""
|
||||
|
||||
char_whitelist: str | None = None # e.g. VIN: "ABCDEFGHJKLMNPRSTUVWXYZ0123456789"
|
||||
single_line: bool = False # Treat image as a single text line
|
||||
single_word: bool = False # Treat image as a single word
|
||||
use_angle_cls: bool = True # Enable angle classification (PaddleOCR)
|
||||
hints: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class OcrEngineResult:
|
||||
"""Normalized result returned by every engine implementation."""
|
||||
|
||||
text: str
|
||||
confidence: float # 0.0-1.0
|
||||
word_boxes: list[WordBox]
|
||||
engine_name: str # "paddleocr", "tesseract", "google_vision"
|
||||
|
||||
|
||||
# --- Abstract base ---
|
||||
|
||||
|
||||
class OcrEngine(ABC):
|
||||
"""Abstract base class that all OCR engines must implement."""
|
||||
|
||||
@abstractmethod
|
||||
def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
|
||||
"""Run OCR on preprocessed image bytes.
|
||||
|
||||
Args:
|
||||
image_bytes: Raw image bytes (PNG/JPEG).
|
||||
config: Engine-agnostic configuration.
|
||||
|
||||
Returns:
|
||||
Normalized OCR result.
|
||||
|
||||
Raises:
|
||||
EngineProcessingError: If recognition fails.
|
||||
EngineUnavailableError: If the engine is not ready.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Short identifier used in OcrEngineResult.engine_name."""
|
||||
Reference in New Issue
Block a user