Files
motovaultpro/ocr/app/engines/engine_factory.py
Eric Gullickson ebc633fb36 feat: add OCR engine abstraction layer (refs #116)
Introduce pluggable OcrEngine ABC with PaddleOCR PP-OCRv4 as primary
engine and Tesseract wrapper for backward compatibility. Engine factory
reads OCR_PRIMARY_ENGINE config to instantiate the correct engine.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-07 10:47:40 -06:00

53 lines
1.6 KiB
Python

"""Factory function for creating OCR engine instances from configuration."""
import logging
from app.config import settings
from app.engines.base_engine import EngineUnavailableError, OcrEngine
logger = logging.getLogger(__name__)
# Valid engine identifiers
_ENGINE_REGISTRY: dict[str, str] = {
"paddleocr": "app.engines.paddle_engine.PaddleOcrEngine",
"tesseract": "app.engines.tesseract_engine.TesseractEngine",
}
def create_engine(engine_name: str | None = None) -> OcrEngine:
"""Instantiate an OCR engine by name (defaults to config value).
Args:
engine_name: Engine identifier ("paddleocr", "tesseract").
Falls back to ``settings.ocr_primary_engine``.
Returns:
Initialized OcrEngine instance.
Raises:
EngineUnavailableError: If the engine cannot be loaded or initialized.
"""
name = (engine_name or settings.ocr_primary_engine).lower().strip()
if name not in _ENGINE_REGISTRY:
raise EngineUnavailableError(
f"Unknown engine '{name}'. Available: {list(_ENGINE_REGISTRY.keys())}"
)
module_path, class_name = _ENGINE_REGISTRY[name].rsplit(".", 1)
try:
import importlib
module = importlib.import_module(module_path)
engine_cls = getattr(module, class_name)
engine: OcrEngine = engine_cls()
logger.info("Created OCR engine: %s", name)
return engine
except EngineUnavailableError:
raise
except Exception as exc:
raise EngineUnavailableError(
f"Failed to create engine '{name}': {exc}"
) from exc