Files
motovaultpro/ocr/app/engines/engine_factory.py
Eric Gullickson b9fe222f12
Some checks failed
Deploy to Staging / Build Images (pull_request) Failing after 4m14s
Deploy to Staging / Deploy to Staging (pull_request) Has been skipped
Deploy to Staging / Verify Staging (pull_request) Has been skipped
Deploy to Staging / Notify Staging Ready (pull_request) Has been skipped
Deploy to Staging / Notify Staging Failure (pull_request) Successful in 8s
fix: Build errors and tesseract removal
2026-02-07 12:12:04 -06:00

87 lines
2.8 KiB
Python

"""Factory function for creating OCR engine instances from configuration."""
import importlib
import logging
from app.config import settings
from app.engines.base_engine import EngineUnavailableError, OcrEngine
logger = logging.getLogger(__name__)
# Valid engine identifiers (primary engines only; hybrid is constructed separately)
_ENGINE_REGISTRY: dict[str, str] = {
"paddleocr": "app.engines.paddle_engine.PaddleOcrEngine",
"google_vision": "app.engines.cloud_engine.CloudEngine",
}
def _create_single_engine(name: str) -> OcrEngine:
"""Instantiate a single engine by registry name."""
if name not in _ENGINE_REGISTRY:
raise EngineUnavailableError(
f"Unknown engine '{name}'. Available: {list(_ENGINE_REGISTRY.keys())}"
)
module_path, class_name = _ENGINE_REGISTRY[name].rsplit(".", 1)
try:
module = importlib.import_module(module_path)
engine_cls = getattr(module, class_name)
engine: OcrEngine = engine_cls()
logger.info("Created OCR engine: %s", name)
return engine
except EngineUnavailableError:
raise
except Exception as exc:
raise EngineUnavailableError(
f"Failed to create engine '{name}': {exc}"
) from exc
def create_engine(engine_name: str | None = None) -> OcrEngine:
"""Instantiate an OCR engine by name (defaults to config value).
When a fallback engine is configured (``OCR_FALLBACK_ENGINE != "none"``),
returns a ``HybridEngine`` that wraps the primary with the fallback.
Args:
engine_name: Engine identifier ("paddleocr", "google_vision").
Falls back to ``settings.ocr_primary_engine``.
Returns:
Initialized OcrEngine instance (possibly a HybridEngine wrapper).
Raises:
EngineUnavailableError: If the primary engine cannot be loaded.
"""
name = (engine_name or settings.ocr_primary_engine).lower().strip()
primary = _create_single_engine(name)
# Check for cloud fallback configuration
fallback_name = settings.ocr_fallback_engine.lower().strip()
if fallback_name == "none" or not fallback_name:
return primary
# Create fallback engine (failure is non-fatal -- log and return primary only)
try:
fallback = _create_single_engine(fallback_name)
except EngineUnavailableError as exc:
logger.warning(
"Fallback engine '%s' unavailable, proceeding without fallback: %s",
fallback_name,
exc,
)
return primary
from app.engines.hybrid_engine import HybridEngine
threshold = settings.ocr_fallback_threshold
hybrid = HybridEngine(primary=primary, fallback=fallback, threshold=threshold)
logger.info(
"Created hybrid engine: primary=%s, fallback=%s, threshold=%.2f",
name,
fallback_name,
threshold,
)
return hybrid