feat: add optional Google Vision cloud fallback engine (refs #118)
CloudEngine wraps Google Vision TEXT_DETECTION with lazy init. HybridEngine runs primary engine, falls back to cloud when confidence is below threshold. Disabled by default (OCR_FALLBACK_ENGINE=none). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
116
ocr/app/engines/hybrid_engine.py
Normal file
116
ocr/app/engines/hybrid_engine.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""Hybrid OCR engine: primary engine with optional cloud fallback."""
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
from app.engines.base_engine import (
|
||||
EngineError,
|
||||
EngineProcessingError,
|
||||
OcrConfig,
|
||||
OcrEngine,
|
||||
OcrEngineResult,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Maximum time (seconds) to wait for the cloud fallback
|
||||
_CLOUD_TIMEOUT_SECONDS = 5.0
|
||||
|
||||
|
||||
class HybridEngine(OcrEngine):
|
||||
"""Runs a primary engine and falls back to a cloud engine when
|
||||
the primary result confidence is below the configured threshold.
|
||||
|
||||
If the fallback is ``None`` (default), this engine behaves identically
|
||||
to the primary engine. Cloud failures are handled gracefully -- the
|
||||
primary result is returned whenever the fallback is unavailable,
|
||||
times out, or errors.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
primary: OcrEngine,
|
||||
fallback: OcrEngine | None = None,
|
||||
threshold: float = 0.6,
|
||||
) -> None:
|
||||
self._primary = primary
|
||||
self._fallback = fallback
|
||||
self._threshold = threshold
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
fallback_name = self._fallback.name if self._fallback else "none"
|
||||
return f"hybrid({self._primary.name}+{fallback_name})"
|
||||
|
||||
def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
|
||||
"""Run primary OCR, optionally falling back to cloud engine."""
|
||||
primary_result = self._primary.recognize(image_bytes, config)
|
||||
|
||||
# Happy path: primary confidence meets threshold
|
||||
if primary_result.confidence >= self._threshold:
|
||||
logger.debug(
|
||||
"Primary engine confidence %.2f >= threshold %.2f, no fallback",
|
||||
primary_result.confidence,
|
||||
self._threshold,
|
||||
)
|
||||
return primary_result
|
||||
|
||||
# No fallback configured -- return primary result as-is
|
||||
if self._fallback is None:
|
||||
logger.debug(
|
||||
"Primary confidence %.2f < threshold %.2f but no fallback configured",
|
||||
primary_result.confidence,
|
||||
self._threshold,
|
||||
)
|
||||
return primary_result
|
||||
|
||||
# Attempt cloud fallback with timeout guard
|
||||
logger.info(
|
||||
"Primary confidence %.2f < threshold %.2f, trying fallback (%s)",
|
||||
primary_result.confidence,
|
||||
self._threshold,
|
||||
self._fallback.name,
|
||||
)
|
||||
|
||||
try:
|
||||
start = time.monotonic()
|
||||
fallback_result = self._fallback.recognize(image_bytes, config)
|
||||
elapsed = time.monotonic() - start
|
||||
|
||||
if elapsed > _CLOUD_TIMEOUT_SECONDS:
|
||||
logger.warning(
|
||||
"Cloud fallback took %.1fs (> %.1fs limit), using primary result",
|
||||
elapsed,
|
||||
_CLOUD_TIMEOUT_SECONDS,
|
||||
)
|
||||
return primary_result
|
||||
|
||||
# Return whichever result has higher confidence
|
||||
if fallback_result.confidence > primary_result.confidence:
|
||||
logger.info(
|
||||
"Fallback confidence %.2f > primary %.2f, using fallback result",
|
||||
fallback_result.confidence,
|
||||
primary_result.confidence,
|
||||
)
|
||||
return fallback_result
|
||||
|
||||
logger.info(
|
||||
"Primary confidence %.2f >= fallback %.2f, keeping primary result",
|
||||
primary_result.confidence,
|
||||
fallback_result.confidence,
|
||||
)
|
||||
return primary_result
|
||||
|
||||
except EngineError as exc:
|
||||
logger.warning(
|
||||
"Cloud fallback failed (%s), returning primary result: %s",
|
||||
self._fallback.name,
|
||||
exc,
|
||||
)
|
||||
return primary_result
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Unexpected cloud fallback error, returning primary result: %s",
|
||||
exc,
|
||||
)
|
||||
return primary_result
|
||||
Reference in New Issue
Block a user