"""Factory function for creating OCR engine instances from configuration.""" import importlib import logging from app.config import settings from app.engines.base_engine import EngineUnavailableError, OcrEngine logger = logging.getLogger(__name__) # Valid engine identifiers (primary engines only; hybrid is constructed separately) _ENGINE_REGISTRY: dict[str, str] = { "paddleocr": "app.engines.paddle_engine.PaddleOcrEngine", "google_vision": "app.engines.cloud_engine.CloudEngine", } def _create_single_engine(name: str) -> OcrEngine: """Instantiate a single engine by registry name.""" if name not in _ENGINE_REGISTRY: raise EngineUnavailableError( f"Unknown engine '{name}'. Available: {list(_ENGINE_REGISTRY.keys())}" ) module_path, class_name = _ENGINE_REGISTRY[name].rsplit(".", 1) try: module = importlib.import_module(module_path) engine_cls = getattr(module, class_name) engine: OcrEngine = engine_cls() logger.info("Created OCR engine: %s", name) return engine except EngineUnavailableError: raise except Exception as exc: raise EngineUnavailableError( f"Failed to create engine '{name}': {exc}" ) from exc def create_engine(engine_name: str | None = None) -> OcrEngine: """Instantiate an OCR engine by name (defaults to config value). When a fallback engine is configured (``OCR_FALLBACK_ENGINE != "none"``), returns a ``HybridEngine`` that wraps the primary with the fallback. Args: engine_name: Engine identifier ("paddleocr", "google_vision"). Falls back to ``settings.ocr_primary_engine``. Returns: Initialized OcrEngine instance (possibly a HybridEngine wrapper). Raises: EngineUnavailableError: If the primary engine cannot be loaded. """ name = (engine_name or settings.ocr_primary_engine).lower().strip() primary = _create_single_engine(name) # Check for cloud fallback configuration fallback_name = settings.ocr_fallback_engine.lower().strip() if fallback_name == "none" or not fallback_name: return primary # Create fallback engine (failure is non-fatal -- log and return primary only) try: fallback = _create_single_engine(fallback_name) except EngineUnavailableError as exc: logger.warning( "Fallback engine '%s' unavailable, proceeding without fallback: %s", fallback_name, exc, ) return primary from app.engines.hybrid_engine import HybridEngine threshold = settings.ocr_fallback_threshold hybrid = HybridEngine(primary=primary, fallback=fallback, threshold=threshold) logger.info( "Created hybrid engine: primary=%s, fallback=%s, threshold=%.2f", name, fallback_name, threshold, ) return hybrid