"""Hybrid OCR engine: primary engine with optional cloud fallback.""" import logging import time from app.engines.base_engine import ( EngineError, EngineProcessingError, OcrConfig, OcrEngine, OcrEngineResult, ) logger = logging.getLogger(__name__) # Maximum time (seconds) to wait for the cloud fallback _CLOUD_TIMEOUT_SECONDS = 5.0 class HybridEngine(OcrEngine): """Runs a primary engine and falls back to a cloud engine when the primary result confidence is below the configured threshold. If the fallback is ``None`` (default), this engine behaves identically to the primary engine. Cloud failures are handled gracefully -- the primary result is returned whenever the fallback is unavailable, times out, or errors. """ def __init__( self, primary: OcrEngine, fallback: OcrEngine | None = None, threshold: float = 0.6, ) -> None: self._primary = primary self._fallback = fallback self._threshold = threshold @property def name(self) -> str: fallback_name = self._fallback.name if self._fallback else "none" return f"hybrid({self._primary.name}+{fallback_name})" def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult: """Run primary OCR, optionally falling back to cloud engine.""" primary_result = self._primary.recognize(image_bytes, config) # Happy path: primary confidence meets threshold if primary_result.confidence >= self._threshold: logger.debug( "Primary engine confidence %.2f >= threshold %.2f, no fallback", primary_result.confidence, self._threshold, ) return primary_result # No fallback configured -- return primary result as-is if self._fallback is None: logger.debug( "Primary confidence %.2f < threshold %.2f but no fallback configured", primary_result.confidence, self._threshold, ) return primary_result # Attempt cloud fallback with timeout guard logger.info( "Primary confidence %.2f < threshold %.2f, trying fallback (%s)", primary_result.confidence, self._threshold, self._fallback.name, ) try: start = time.monotonic() fallback_result = self._fallback.recognize(image_bytes, config) elapsed = time.monotonic() - start if elapsed > _CLOUD_TIMEOUT_SECONDS: logger.warning( "Cloud fallback took %.1fs (> %.1fs limit), using primary result", elapsed, _CLOUD_TIMEOUT_SECONDS, ) return primary_result # Return whichever result has higher confidence if fallback_result.confidence > primary_result.confidence: logger.info( "Fallback confidence %.2f > primary %.2f, using fallback result", fallback_result.confidence, primary_result.confidence, ) return fallback_result logger.info( "Primary confidence %.2f >= fallback %.2f, keeping primary result", primary_result.confidence, fallback_result.confidence, ) return primary_result except EngineError as exc: logger.warning( "Cloud fallback failed (%s), returning primary result: %s", self._fallback.name, exc, ) return primary_result except Exception as exc: logger.warning( "Unexpected cloud fallback error, returning primary result: %s", exc, ) return primary_result