"""Factory function for creating OCR engine instances from configuration.""" import logging from app.config import settings from app.engines.base_engine import EngineUnavailableError, OcrEngine logger = logging.getLogger(__name__) # Valid engine identifiers _ENGINE_REGISTRY: dict[str, str] = { "paddleocr": "app.engines.paddle_engine.PaddleOcrEngine", "tesseract": "app.engines.tesseract_engine.TesseractEngine", } def create_engine(engine_name: str | None = None) -> OcrEngine: """Instantiate an OCR engine by name (defaults to config value). Args: engine_name: Engine identifier ("paddleocr", "tesseract"). Falls back to ``settings.ocr_primary_engine``. Returns: Initialized OcrEngine instance. Raises: EngineUnavailableError: If the engine cannot be loaded or initialized. """ name = (engine_name or settings.ocr_primary_engine).lower().strip() if name not in _ENGINE_REGISTRY: raise EngineUnavailableError( f"Unknown engine '{name}'. Available: {list(_ENGINE_REGISTRY.keys())}" ) module_path, class_name = _ENGINE_REGISTRY[name].rsplit(".", 1) try: import importlib module = importlib.import_module(module_path) engine_cls = getattr(module, class_name) engine: OcrEngine = engine_cls() logger.info("Created OCR engine: %s", name) return engine except EngineUnavailableError: raise except Exception as exc: raise EngineUnavailableError( f"Failed to create engine '{name}': {exc}" ) from exc