- Add VISION_MONTHLY_LIMIT config setting (default 1000) - Update CloudEngine to use WIF credential config via ADC - Rewrite HybridEngine to support cloud-primary with Redis counter - Pass monthly_limit through engine factory Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
175 lines
6.0 KiB
Python
175 lines
6.0 KiB
Python
"""Google Vision cloud OCR engine with lazy initialization."""
|
|
|
|
import logging
|
|
import os
|
|
from typing import Any
|
|
|
|
from app.engines.base_engine import (
|
|
EngineProcessingError,
|
|
EngineUnavailableError,
|
|
OcrConfig,
|
|
OcrEngine,
|
|
OcrEngineResult,
|
|
WordBox,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Default path for Google WIF credential config (Docker secret mount)
|
|
_DEFAULT_KEY_PATH = "/run/secrets/google-wif-config.json"
|
|
|
|
|
|
class CloudEngine(OcrEngine):
|
|
"""Google Vision TEXT_DETECTION wrapper with lazy initialization.
|
|
|
|
The client is not created until the first ``recognize()`` call,
|
|
so the container starts normally even when the secret file is
|
|
missing or the dependency is not installed.
|
|
"""
|
|
|
|
def __init__(self, key_path: str | None = None) -> None:
|
|
self._key_path = key_path or os.getenv(
|
|
"GOOGLE_VISION_KEY_PATH", _DEFAULT_KEY_PATH
|
|
)
|
|
self._client: Any | None = None
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "google_vision"
|
|
|
|
# ------------------------------------------------------------------
|
|
# Lazy init
|
|
# ------------------------------------------------------------------
|
|
|
|
def _get_client(self) -> Any:
|
|
"""Create the Vision client on first use.
|
|
|
|
Uses Application Default Credentials (ADC) pointed at a WIF
|
|
credential config file. The WIF config references an executable
|
|
that fetches an Auth0 M2M JWT.
|
|
"""
|
|
if self._client is not None:
|
|
return self._client
|
|
|
|
# Verify credentials config exists
|
|
if not os.path.isfile(self._key_path):
|
|
raise EngineUnavailableError(
|
|
f"Google Vision credential config not found at {self._key_path}. "
|
|
"Set GOOGLE_VISION_KEY_PATH or mount the secret."
|
|
)
|
|
|
|
try:
|
|
from google.cloud import vision # type: ignore[import-untyped]
|
|
|
|
# Point ADC at the WIF credential config
|
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self._key_path
|
|
# Required for executable-sourced credentials
|
|
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
|
|
self._client = vision.ImageAnnotatorClient()
|
|
logger.info(
|
|
"Google Vision client initialized via WIF (config: %s)",
|
|
self._key_path,
|
|
)
|
|
return self._client
|
|
except ImportError as exc:
|
|
raise EngineUnavailableError(
|
|
"google-cloud-vision is not installed. "
|
|
"Install with: pip install google-cloud-vision"
|
|
) from exc
|
|
except Exception as exc:
|
|
raise EngineUnavailableError(
|
|
f"Failed to initialize Google Vision client: {exc}"
|
|
) from exc
|
|
|
|
# ------------------------------------------------------------------
|
|
# OCR
|
|
# ------------------------------------------------------------------
|
|
|
|
def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
|
|
"""Run Google Vision TEXT_DETECTION on image bytes."""
|
|
client = self._get_client()
|
|
|
|
try:
|
|
from google.cloud import vision # type: ignore[import-untyped]
|
|
|
|
image = vision.Image(content=image_bytes)
|
|
response = client.text_detection(image=image)
|
|
|
|
if response.error.message:
|
|
raise EngineProcessingError(
|
|
f"Google Vision API error: {response.error.message}"
|
|
)
|
|
|
|
annotations = response.text_annotations
|
|
if not annotations:
|
|
return OcrEngineResult(
|
|
text="",
|
|
confidence=0.0,
|
|
word_boxes=[],
|
|
engine_name=self.name,
|
|
)
|
|
|
|
# First annotation is the full-page text; the rest are words
|
|
full_text = annotations[0].description.strip()
|
|
word_boxes: list[WordBox] = []
|
|
confidences: list[float] = []
|
|
|
|
for annotation in annotations[1:]:
|
|
text = annotation.description
|
|
vertices = annotation.bounding_poly.vertices
|
|
|
|
# Apply character whitelist filter if configured
|
|
if config.char_whitelist:
|
|
allowed = set(config.char_whitelist)
|
|
text = "".join(ch for ch in text if ch in allowed)
|
|
|
|
if not text.strip():
|
|
continue
|
|
|
|
xs = [v.x for v in vertices]
|
|
ys = [v.y for v in vertices]
|
|
x_min, y_min = min(xs), min(ys)
|
|
x_max, y_max = max(xs), max(ys)
|
|
|
|
# Google Vision TEXT_DETECTION does not return per-word
|
|
# confidence in annotations. Use 0.95 as the documented
|
|
# typical accuracy for clear images so comparisons with
|
|
# PaddleOCR are meaningful.
|
|
word_conf = 0.95
|
|
word_boxes.append(
|
|
WordBox(
|
|
text=text.strip(),
|
|
confidence=word_conf,
|
|
x=x_min,
|
|
y=y_min,
|
|
width=x_max - x_min,
|
|
height=y_max - y_min,
|
|
)
|
|
)
|
|
confidences.append(word_conf)
|
|
|
|
# Apply whitelist to full text too
|
|
if config.char_whitelist:
|
|
allowed = set(config.char_whitelist)
|
|
full_text = "".join(
|
|
ch for ch in full_text if ch in allowed or ch in " \n"
|
|
)
|
|
|
|
avg_confidence = (
|
|
sum(confidences) / len(confidences) if confidences else 0.0
|
|
)
|
|
|
|
return OcrEngineResult(
|
|
text=full_text,
|
|
confidence=avg_confidence,
|
|
word_boxes=word_boxes,
|
|
engine_name=self.name,
|
|
)
|
|
|
|
except (EngineUnavailableError, EngineProcessingError):
|
|
raise
|
|
except Exception as exc:
|
|
raise EngineProcessingError(
|
|
f"Google Vision recognition failed: {exc}"
|
|
) from exc
|