Merge pull request 'feat: Google Vision primary OCR with Auth0 WIF and monthly usage cap (#127)' (#128) from issue-127-google-vision-primary-ocr into main
All checks were successful
Deploy to Staging / Build Images (push) Successful in 34s
Deploy to Staging / Deploy to Staging (push) Successful in 51s
Deploy to Staging / Verify Staging (push) Successful in 8s
Deploy to Staging / Notify Staging Ready (push) Successful in 7s
Deploy to Staging / Notify Staging Failure (push) Has been skipped

Reviewed-on: #128
This commit was merged in pull request #128.
This commit is contained in:
2026-02-11 01:46:20 +00:00
19 changed files with 647 additions and 109 deletions

View File

@@ -95,6 +95,7 @@ jobs:
sparse-checkout: | sparse-checkout: |
scripts/ scripts/
config/ config/
secrets/app/google-wif-config.json
docker-compose.yml docker-compose.yml
docker-compose.blue-green.yml docker-compose.blue-green.yml
docker-compose.prod.yml docker-compose.prod.yml
@@ -108,6 +109,11 @@ jobs:
cp "$GITHUB_WORKSPACE/docker-compose.yml" "$DEPLOY_PATH/" cp "$GITHUB_WORKSPACE/docker-compose.yml" "$DEPLOY_PATH/"
cp "$GITHUB_WORKSPACE/docker-compose.blue-green.yml" "$DEPLOY_PATH/" cp "$GITHUB_WORKSPACE/docker-compose.blue-green.yml" "$DEPLOY_PATH/"
cp "$GITHUB_WORKSPACE/docker-compose.prod.yml" "$DEPLOY_PATH/" cp "$GITHUB_WORKSPACE/docker-compose.prod.yml" "$DEPLOY_PATH/"
# WIF credential config (not a secret -- references Auth0 token script path)
# Remove any Docker-created directory artifact from failed bind mounts
rm -rf "$DEPLOY_PATH/secrets/app/google-wif-config.json"
mkdir -p "$DEPLOY_PATH/secrets/app"
cp "$GITHUB_WORKSPACE/secrets/app/google-wif-config.json" "$DEPLOY_PATH/secrets/app/"
- name: Generate logging configuration - name: Generate logging configuration
run: | run: |
@@ -129,6 +135,8 @@ jobs:
AUTH0_CLIENT_SECRET: ${{ secrets.AUTH0_CLIENT_SECRET }} AUTH0_CLIENT_SECRET: ${{ secrets.AUTH0_CLIENT_SECRET }}
AUTH0_MANAGEMENT_CLIENT_ID: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_ID }} AUTH0_MANAGEMENT_CLIENT_ID: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_ID }}
AUTH0_MANAGEMENT_CLIENT_SECRET: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_SECRET }} AUTH0_MANAGEMENT_CLIENT_SECRET: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_SECRET }}
AUTH0_OCR_CLIENT_ID: ${{ secrets.AUTH0_OCR_CLIENT_ID }}
AUTH0_OCR_CLIENT_SECRET: ${{ secrets.AUTH0_OCR_CLIENT_SECRET }}
GOOGLE_MAPS_API_KEY: ${{ secrets.GOOGLE_MAPS_API_KEY }} GOOGLE_MAPS_API_KEY: ${{ secrets.GOOGLE_MAPS_API_KEY }}
GOOGLE_MAPS_MAP_ID: ${{ secrets.GOOGLE_MAPS_MAP_ID }} GOOGLE_MAPS_MAP_ID: ${{ secrets.GOOGLE_MAPS_MAP_ID }}
CF_DNS_API_TOKEN: ${{ secrets.CF_DNS_API_TOKEN }} CF_DNS_API_TOKEN: ${{ secrets.CF_DNS_API_TOKEN }}

View File

@@ -118,6 +118,11 @@ jobs:
rsync -av --delete "$GITHUB_WORKSPACE/scripts/" "$DEPLOY_PATH/scripts/" rsync -av --delete "$GITHUB_WORKSPACE/scripts/" "$DEPLOY_PATH/scripts/"
cp "$GITHUB_WORKSPACE/docker-compose.yml" "$DEPLOY_PATH/" cp "$GITHUB_WORKSPACE/docker-compose.yml" "$DEPLOY_PATH/"
cp "$GITHUB_WORKSPACE/docker-compose.staging.yml" "$DEPLOY_PATH/" cp "$GITHUB_WORKSPACE/docker-compose.staging.yml" "$DEPLOY_PATH/"
# WIF credential config (not a secret -- references Auth0 token script path)
# Remove any Docker-created directory artifact from failed bind mounts
rm -rf "$DEPLOY_PATH/secrets/app/google-wif-config.json"
mkdir -p "$DEPLOY_PATH/secrets/app"
cp "$GITHUB_WORKSPACE/secrets/app/google-wif-config.json" "$DEPLOY_PATH/secrets/app/"
- name: Generate logging configuration - name: Generate logging configuration
run: | run: |
@@ -139,6 +144,8 @@ jobs:
AUTH0_CLIENT_SECRET: ${{ secrets.AUTH0_CLIENT_SECRET }} AUTH0_CLIENT_SECRET: ${{ secrets.AUTH0_CLIENT_SECRET }}
AUTH0_MANAGEMENT_CLIENT_ID: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_ID }} AUTH0_MANAGEMENT_CLIENT_ID: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_ID }}
AUTH0_MANAGEMENT_CLIENT_SECRET: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_SECRET }} AUTH0_MANAGEMENT_CLIENT_SECRET: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_SECRET }}
AUTH0_OCR_CLIENT_ID: ${{ secrets.AUTH0_OCR_CLIENT_ID }}
AUTH0_OCR_CLIENT_SECRET: ${{ secrets.AUTH0_OCR_CLIENT_SECRET }}
GOOGLE_MAPS_API_KEY: ${{ secrets.GOOGLE_MAPS_API_KEY }} GOOGLE_MAPS_API_KEY: ${{ secrets.GOOGLE_MAPS_API_KEY }}
GOOGLE_MAPS_MAP_ID: ${{ secrets.GOOGLE_MAPS_MAP_ID }} GOOGLE_MAPS_MAP_ID: ${{ secrets.GOOGLE_MAPS_MAP_ID }}
CF_DNS_API_TOKEN: ${{ secrets.CF_DNS_API_TOKEN }} CF_DNS_API_TOKEN: ${{ secrets.CF_DNS_API_TOKEN }}

1
.gitignore vendored
View File

@@ -22,6 +22,7 @@ secrets/**
!secrets/ !secrets/
!secrets/**/ !secrets/**/
!secrets/**/*.example !secrets/**/*.example
!secrets/app/google-wif-config.json
# Traefik ACME certificates (contains private keys) # Traefik ACME certificates (contains private keys)
data/traefik/acme.json data/traefik/acme.json

View File

@@ -199,6 +199,10 @@ services:
# ======================================== # ========================================
mvp-ocr: mvp-ocr:
image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest} image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest}
volumes:
- ./secrets/app/auth0-ocr-client-id.txt:/run/secrets/auth0-ocr-client-id:ro
- ./secrets/app/auth0-ocr-client-secret.txt:/run/secrets/auth0-ocr-client-secret:ro
- ./secrets/app/google-wif-config.json:/run/secrets/google-wif-config.json:ro
# ======================================== # ========================================
# Override Traefik to add dynamic config # Override Traefik to add dynamic config

View File

@@ -49,10 +49,13 @@ services:
REDIS_HOST: mvp-redis REDIS_HOST: mvp-redis
REDIS_PORT: 6379 REDIS_PORT: 6379
REDIS_DB: 1 REDIS_DB: 1
OCR_PRIMARY_ENGINE: paddleocr # OCR engine configuration (Google Vision primary, PaddleOCR fallback)
OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none} OCR_PRIMARY_ENGINE: google_vision
OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6} OCR_FALLBACK_ENGINE: paddleocr
GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json OCR_CONFIDENCE_THRESHOLD: "0.6"
OCR_FALLBACK_THRESHOLD: "0.6"
GOOGLE_VISION_KEY_PATH: /run/secrets/google-wif-config.json
VISION_MONTHLY_LIMIT: "1000"
# PostgreSQL - Remove dev ports, production log level # PostgreSQL - Remove dev ports, production log level
mvp-postgres: mvp-postgres:

View File

@@ -69,10 +69,17 @@ services:
REDIS_HOST: mvp-redis REDIS_HOST: mvp-redis
REDIS_PORT: 6379 REDIS_PORT: 6379
REDIS_DB: 1 REDIS_DB: 1
OCR_PRIMARY_ENGINE: paddleocr # OCR engine configuration (Google Vision primary, PaddleOCR fallback)
OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none} OCR_PRIMARY_ENGINE: google_vision
OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6} OCR_FALLBACK_ENGINE: paddleocr
GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json OCR_CONFIDENCE_THRESHOLD: "0.6"
OCR_FALLBACK_THRESHOLD: "0.6"
GOOGLE_VISION_KEY_PATH: /run/secrets/google-wif-config.json
VISION_MONTHLY_LIMIT: "1000"
volumes:
- ./secrets/app/auth0-ocr-client-id.txt:/run/secrets/auth0-ocr-client-id:ro
- ./secrets/app/auth0-ocr-client-secret.txt:/run/secrets/auth0-ocr-client-secret:ro
- ./secrets/app/google-wif-config.json:/run/secrets/google-wif-config.json:ro
# ======================================== # ========================================
# PostgreSQL (Staging - Separate Database) # PostgreSQL (Staging - Separate Database)

View File

@@ -196,16 +196,18 @@ services:
REDIS_HOST: mvp-redis REDIS_HOST: mvp-redis
REDIS_PORT: 6379 REDIS_PORT: 6379
REDIS_DB: 1 REDIS_DB: 1
# OCR engine configuration (PaddleOCR primary, cloud fallback optional) # OCR engine configuration (Google Vision primary, PaddleOCR fallback)
OCR_PRIMARY_ENGINE: paddleocr OCR_PRIMARY_ENGINE: google_vision
OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none} OCR_FALLBACK_ENGINE: paddleocr
OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6} OCR_CONFIDENCE_THRESHOLD: "0.6"
GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json OCR_FALLBACK_THRESHOLD: "0.6"
GOOGLE_VISION_KEY_PATH: /run/secrets/google-wif-config.json
VISION_MONTHLY_LIMIT: "1000"
volumes: volumes:
- /tmp/vin-debug:/tmp/vin-debug - /tmp/vin-debug:/tmp/vin-debug
# Optional: Uncomment to enable Google Vision cloud fallback. - ./secrets/app/auth0-ocr-client-id.txt:/run/secrets/auth0-ocr-client-id:ro
# Requires: secrets/app/google-vision-key.json and OCR_FALLBACK_ENGINE=google_vision - ./secrets/app/auth0-ocr-client-secret.txt:/run/secrets/auth0-ocr-client-secret:ro
# - ./secrets/app/google-vision-key.json:/run/secrets/google-vision-key.json:ro - ./secrets/app/google-wif-config.json:/run/secrets/google-wif-config.json:ro
networks: networks:
- backend - backend
- database - database

View File

@@ -1,8 +1,8 @@
# Production Dockerfile for MotoVaultPro OCR Service # Production Dockerfile for MotoVaultPro OCR Service
# Uses mirrored base images from Gitea Package Registry # Uses mirrored base images from Gitea Package Registry
# #
# Primary engine: PaddleOCR PP-OCRv4 (models baked into image) # Primary engine: Google Vision via Auth0 WIF (monthly-capped)
# Cloud fallback: Google Vision (optional, requires API key at runtime) # Fallback engine: PaddleOCR PP-OCRv4 (models baked into image)
# Build argument for registry (defaults to Gitea mirrors, falls back to Docker Hub) # Build argument for registry (defaults to Gitea mirrors, falls back to Docker Hub)
ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors
@@ -14,7 +14,8 @@ FROM ${REGISTRY_MIRRORS}/python:3.13-slim
# - libheif1/libheif-dev: HEIF image support (iPhone photos) # - libheif1/libheif-dev: HEIF image support (iPhone photos)
# - libglib2.0-0: GLib shared library (OpenCV dependency) # - libglib2.0-0: GLib shared library (OpenCV dependency)
# - libmagic1: File type detection # - libmagic1: File type detection
# - curl: Health check endpoint # - curl: Health check endpoint + Auth0 token fetch
# - jq: JSON parsing for Auth0 token script
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 \ libgomp1 \
libheif1 \ libheif1 \
@@ -22,6 +23,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libglib2.0-0 \ libglib2.0-0 \
libmagic1 \ libmagic1 \
curl \ curl \
jq \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Python dependencies # Python dependencies
@@ -42,5 +44,8 @@ RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(ocr_version='PP-OCRv4'
COPY . . COPY . .
# Ensure Auth0 WIF token script is executable
RUN chmod +x /app/scripts/fetch-auth0-token.sh
EXPOSE 8000 EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -21,7 +21,12 @@ class Settings:
os.getenv("OCR_FALLBACK_THRESHOLD", "0.6") os.getenv("OCR_FALLBACK_THRESHOLD", "0.6")
) )
self.google_vision_key_path: str = os.getenv( self.google_vision_key_path: str = os.getenv(
"GOOGLE_VISION_KEY_PATH", "/run/secrets/google-vision-key.json" "GOOGLE_VISION_KEY_PATH", "/run/secrets/google-wif-config.json"
)
# Google Vision monthly usage cap (requests per calendar month)
self.vision_monthly_limit: int = int(
os.getenv("VISION_MONTHLY_LIMIT", "1000")
) )
# Redis configuration for job queue # Redis configuration for job queue

View File

@@ -15,8 +15,8 @@ from app.engines.base_engine import (
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Default path for Google Vision service account key (Docker secret mount) # Default path for Google WIF credential config (Docker secret mount)
_DEFAULT_KEY_PATH = "/run/secrets/google-vision-key.json" _DEFAULT_KEY_PATH = "/run/secrets/google-wif-config.json"
class CloudEngine(OcrEngine): class CloudEngine(OcrEngine):
@@ -42,25 +42,33 @@ class CloudEngine(OcrEngine):
# ------------------------------------------------------------------ # ------------------------------------------------------------------
def _get_client(self) -> Any: def _get_client(self) -> Any:
"""Create the Vision client on first use.""" """Create the Vision client on first use.
Uses Application Default Credentials (ADC) pointed at a WIF
credential config file. The WIF config references an executable
that fetches an Auth0 M2M JWT.
"""
if self._client is not None: if self._client is not None:
return self._client return self._client
# Verify credentials file exists # Verify credentials config exists
if not os.path.isfile(self._key_path): if not os.path.isfile(self._key_path):
raise EngineUnavailableError( raise EngineUnavailableError(
f"Google Vision key not found at {self._key_path}. " f"Google Vision credential config not found at {self._key_path}. "
"Set GOOGLE_VISION_KEY_PATH or mount the secret." "Set GOOGLE_VISION_KEY_PATH or mount the secret."
) )
try: try:
from google.cloud import vision # type: ignore[import-untyped] from google.cloud import vision # type: ignore[import-untyped]
# Point the SDK at the service account key # Point ADC at the WIF credential config
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self._key_path os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self._key_path
# Required for executable-sourced credentials
os.environ["GOOGLE_EXTERNAL_ACCOUNT_ALLOW_EXECUTABLES"] = "1"
self._client = vision.ImageAnnotatorClient() self._client = vision.ImageAnnotatorClient()
logger.info( logger.info(
"Google Vision client initialized (key: %s)", self._key_path "Google Vision client initialized via WIF (config: %s)",
self._key_path,
) )
return self._client return self._client
except ImportError as exc: except ImportError as exc:

View File

@@ -76,11 +76,18 @@ def create_engine(engine_name: str | None = None) -> OcrEngine:
from app.engines.hybrid_engine import HybridEngine from app.engines.hybrid_engine import HybridEngine
threshold = settings.ocr_fallback_threshold threshold = settings.ocr_fallback_threshold
hybrid = HybridEngine(primary=primary, fallback=fallback, threshold=threshold) monthly_limit = settings.vision_monthly_limit
hybrid = HybridEngine(
primary=primary,
fallback=fallback,
threshold=threshold,
monthly_limit=monthly_limit,
)
logger.info( logger.info(
"Created hybrid engine: primary=%s, fallback=%s, threshold=%.2f", "Created hybrid engine: primary=%s, fallback=%s, threshold=%.2f, vision_limit=%d",
name, name,
fallback_name, fallback_name,
threshold, threshold,
monthly_limit,
) )
return hybrid return hybrid

View File

@@ -1,8 +1,13 @@
"""Hybrid OCR engine: primary engine with optional cloud fallback.""" """Hybrid OCR engine: primary with fallback and monthly usage cap."""
import calendar
import datetime
import logging import logging
import time import time
import redis
from app.config import settings
from app.engines.base_engine import ( from app.engines.base_engine import (
EngineError, EngineError,
EngineProcessingError, EngineProcessingError,
@@ -16,15 +21,42 @@ logger = logging.getLogger(__name__)
# Maximum time (seconds) to wait for the cloud fallback # Maximum time (seconds) to wait for the cloud fallback
_CLOUD_TIMEOUT_SECONDS = 5.0 _CLOUD_TIMEOUT_SECONDS = 5.0
# Redis key prefix for monthly Vision API request counter
_VISION_COUNTER_PREFIX = "ocr:vision_requests"
def _vision_counter_key() -> str:
"""Return the Redis key for the current calendar month counter."""
now = datetime.datetime.now(datetime.timezone.utc)
return f"{_VISION_COUNTER_PREFIX}:{now.strftime('%Y-%m')}"
def _seconds_until_month_end() -> int:
"""Seconds from now until midnight UTC on the 1st of next month."""
now = datetime.datetime.now(datetime.timezone.utc)
_, days_in_month = calendar.monthrange(now.year, now.month)
first_of_next = now.replace(
day=1, hour=0, minute=0, second=0, microsecond=0
) + datetime.timedelta(days=days_in_month)
return max(int((first_of_next - now).total_seconds()), 1)
class HybridEngine(OcrEngine): class HybridEngine(OcrEngine):
"""Runs a primary engine and falls back to a cloud engine when """Runs a primary engine with an optional fallback engine and a
the primary result confidence is below the configured threshold. configurable monthly usage cap on cloud API requests.
If the fallback is ``None`` (default), this engine behaves identically **When the primary engine is a cloud engine** (e.g. ``google_vision``),
to the primary engine. Cloud failures are handled gracefully -- the the monthly cap is checked *before* calling the primary. Once the
primary result is returned whenever the fallback is unavailable, limit is reached the fallback becomes the sole engine for the rest
times out, or errors. of the calendar month.
**When the primary engine is local** (e.g. ``paddleocr``), the
original confidence-based fallback logic applies: if confidence is
below the threshold, the cloud fallback is tried (subject to the
same monthly cap).
Cloud failures are handled gracefully -- the local result is always
returned when the cloud engine is unavailable, times out, or errors.
""" """
def __init__( def __init__(
@@ -32,21 +64,143 @@ class HybridEngine(OcrEngine):
primary: OcrEngine, primary: OcrEngine,
fallback: OcrEngine | None = None, fallback: OcrEngine | None = None,
threshold: float = 0.6, threshold: float = 0.6,
monthly_limit: int = 1000,
) -> None: ) -> None:
self._primary = primary self._primary = primary
self._fallback = fallback self._fallback = fallback
self._threshold = threshold self._threshold = threshold
self._monthly_limit = monthly_limit
self._redis: redis.Redis | None = None
@property @property
def name(self) -> str: def name(self) -> str:
fallback_name = self._fallback.name if self._fallback else "none" fallback_name = self._fallback.name if self._fallback else "none"
return f"hybrid({self._primary.name}+{fallback_name})" return f"hybrid({self._primary.name}+{fallback_name})"
# ------------------------------------------------------------------
# Redis helpers
# ------------------------------------------------------------------
def _get_redis(self) -> redis.Redis:
"""Return a synchronous Redis connection (lazy init)."""
if self._redis is not None:
return self._redis
self._redis = redis.Redis(
host=settings.redis_host,
port=settings.redis_port,
db=settings.redis_db,
decode_responses=True,
)
return self._redis
def _vision_limit_reached(self) -> bool:
"""Check whether the monthly Vision API limit has been reached."""
try:
r = self._get_redis()
count = r.get(_vision_counter_key())
current = int(count) if count else 0
if current >= self._monthly_limit:
logger.info(
"Vision monthly limit reached (%d/%d)",
current,
self._monthly_limit,
)
return True
return False
except Exception as exc:
logger.warning(
"Redis counter check failed, assuming limit NOT reached: %s",
exc,
)
return False
def _increment_vision_counter(self) -> None:
"""Atomically increment the monthly Vision counter with TTL."""
try:
r = self._get_redis()
key = _vision_counter_key()
pipe = r.pipeline()
pipe.incr(key)
pipe.expire(key, _seconds_until_month_end())
pipe.execute()
except Exception as exc:
logger.warning("Failed to increment Vision counter: %s", exc)
# ------------------------------------------------------------------
# Engine selection helpers
# ------------------------------------------------------------------
def _is_cloud_engine(self, engine: OcrEngine) -> bool:
"""Return True if this engine calls a cloud API."""
return engine.name == "google_vision"
def _run_cloud_with_cap(
self, cloud: OcrEngine, image_bytes: bytes, config: OcrConfig
) -> OcrEngineResult | None:
"""Run a cloud engine if the monthly cap allows, else return None."""
if self._vision_limit_reached():
return None
try:
start = time.monotonic()
result = cloud.recognize(image_bytes, config)
elapsed = time.monotonic() - start
if elapsed > _CLOUD_TIMEOUT_SECONDS:
logger.warning(
"Cloud engine took %.1fs (> %.1fs limit), discarding result",
elapsed,
_CLOUD_TIMEOUT_SECONDS,
)
return None
self._increment_vision_counter()
return result
except EngineError as exc:
logger.warning("Cloud engine failed: %s", exc)
return None
except Exception as exc:
logger.warning("Unexpected cloud engine error: %s", exc)
return None
# ------------------------------------------------------------------
# Main recognize
# ------------------------------------------------------------------
def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult: def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
"""Run primary OCR, optionally falling back to cloud engine.""" """Run OCR with monthly-capped cloud usage.
When primary is cloud: check cap -> run cloud or fall back.
When primary is local: run local -> if low confidence, try cloud
fallback (also subject to cap).
"""
# --- Cloud-primary path ---
if self._is_cloud_engine(self._primary):
cloud_result = self._run_cloud_with_cap(
self._primary, image_bytes, config
)
if cloud_result is not None:
logger.debug(
"Cloud primary returned confidence %.2f",
cloud_result.confidence,
)
return cloud_result
# Limit reached or cloud failed -- use fallback
if self._fallback is not None:
logger.info(
"Cloud primary unavailable/capped, using fallback (%s)",
self._fallback.name,
)
return self._fallback.recognize(image_bytes, config)
raise EngineProcessingError(
"Cloud primary unavailable and no fallback configured"
)
# --- Local-primary path (original confidence-based fallback) ---
primary_result = self._primary.recognize(image_bytes, config) primary_result = self._primary.recognize(image_bytes, config)
# Happy path: primary confidence meets threshold
if primary_result.confidence >= self._threshold: if primary_result.confidence >= self._threshold:
logger.debug( logger.debug(
"Primary engine confidence %.2f >= threshold %.2f, no fallback", "Primary engine confidence %.2f >= threshold %.2f, no fallback",
@@ -55,7 +209,6 @@ class HybridEngine(OcrEngine):
) )
return primary_result return primary_result
# No fallback configured -- return primary result as-is
if self._fallback is None: if self._fallback is None:
logger.debug( logger.debug(
"Primary confidence %.2f < threshold %.2f but no fallback configured", "Primary confidence %.2f < threshold %.2f but no fallback configured",
@@ -64,14 +217,39 @@ class HybridEngine(OcrEngine):
) )
return primary_result return primary_result
# Attempt cloud fallback with timeout guard # Only try cloud fallback if it is the fallback engine
if self._is_cloud_engine(self._fallback):
logger.info(
"Primary confidence %.2f < threshold %.2f, trying cloud fallback (%s)",
primary_result.confidence,
self._threshold,
self._fallback.name,
)
fallback_result = self._run_cloud_with_cap(
self._fallback, image_bytes, config
)
if fallback_result is not None:
if fallback_result.confidence > primary_result.confidence:
logger.info(
"Fallback confidence %.2f > primary %.2f, using fallback",
fallback_result.confidence,
primary_result.confidence,
)
return fallback_result
logger.info(
"Primary confidence %.2f >= fallback %.2f, keeping primary",
primary_result.confidence,
fallback_result.confidence,
)
return primary_result
# Non-cloud fallback (no cap needed)
logger.info( logger.info(
"Primary confidence %.2f < threshold %.2f, trying fallback (%s)", "Primary confidence %.2f < threshold %.2f, trying fallback (%s)",
primary_result.confidence, primary_result.confidence,
self._threshold, self._threshold,
self._fallback.name, self._fallback.name,
) )
try: try:
start = time.monotonic() start = time.monotonic()
fallback_result = self._fallback.recognize(image_bytes, config) fallback_result = self._fallback.recognize(image_bytes, config)
@@ -79,23 +257,22 @@ class HybridEngine(OcrEngine):
if elapsed > _CLOUD_TIMEOUT_SECONDS: if elapsed > _CLOUD_TIMEOUT_SECONDS:
logger.warning( logger.warning(
"Cloud fallback took %.1fs (> %.1fs limit), using primary result", "Fallback took %.1fs (> %.1fs limit), using primary result",
elapsed, elapsed,
_CLOUD_TIMEOUT_SECONDS, _CLOUD_TIMEOUT_SECONDS,
) )
return primary_result return primary_result
# Return whichever result has higher confidence
if fallback_result.confidence > primary_result.confidence: if fallback_result.confidence > primary_result.confidence:
logger.info( logger.info(
"Fallback confidence %.2f > primary %.2f, using fallback result", "Fallback confidence %.2f > primary %.2f, using fallback",
fallback_result.confidence, fallback_result.confidence,
primary_result.confidence, primary_result.confidence,
) )
return fallback_result return fallback_result
logger.info( logger.info(
"Primary confidence %.2f >= fallback %.2f, keeping primary result", "Primary confidence %.2f >= fallback %.2f, keeping primary",
primary_result.confidence, primary_result.confidence,
fallback_result.confidence, fallback_result.confidence,
) )
@@ -103,14 +280,13 @@ class HybridEngine(OcrEngine):
except EngineError as exc: except EngineError as exc:
logger.warning( logger.warning(
"Cloud fallback failed (%s), returning primary result: %s", "Fallback failed (%s), returning primary: %s",
self._fallback.name, self._fallback.name,
exc, exc,
) )
return primary_result return primary_result
except Exception as exc: except Exception as exc:
logger.warning( logger.warning(
"Unexpected cloud fallback error, returning primary result: %s", "Unexpected fallback error, returning primary: %s", exc
exc,
) )
return primary_result return primary_result

View File

@@ -0,0 +1,80 @@
#!/bin/sh
# fetch-auth0-token.sh -- Auth0 M2M token fetcher for Google WIF
#
# Called by the Google Auth library when using executable-sourced
# credentials (see google-wif-config.json). Reads Auth0 client
# credentials from Docker secrets and returns the JWT in the format
# expected by Google's credential helpers.
#
# Exit codes:
# 0 -- success (JSON with token on stdout)
# 1 -- missing secrets or curl/jq failure
set -e
CLIENT_ID_FILE="/run/secrets/auth0-ocr-client-id"
CLIENT_SECRET_FILE="/run/secrets/auth0-ocr-client-secret"
AUTH0_DOMAIN="motovaultpro.us.auth0.com"
AUDIENCE="https://iam.googleapis.com/projects/487954699429/locations/global/workloadIdentityPools/motovaultpro-pool/providers/auth0-provider"
# Read credentials from Docker secrets
if [ ! -f "$CLIENT_ID_FILE" ]; then
echo "Error: $CLIENT_ID_FILE not found" >&2
exit 1
fi
if [ ! -f "$CLIENT_SECRET_FILE" ]; then
echo "Error: $CLIENT_SECRET_FILE not found" >&2
exit 1
fi
CLIENT_ID=$(cat "$CLIENT_ID_FILE" | tr -d '[:space:]')
CLIENT_SECRET=$(cat "$CLIENT_SECRET_FILE" | tr -d '[:space:]')
# Request M2M token from Auth0
# Write body to temp file, capture HTTP status code separately.
# Avoids --fail-with-body + set -e which swallows errors inside $().
BODY_FILE=$(mktemp)
HTTP_CODE=$(curl -s -w '%{http_code}' -o "$BODY_FILE" \
--request POST \
--url "https://${AUTH0_DOMAIN}/oauth/token" \
--header 'Content-Type: application/json' \
--data "{
\"client_id\": \"${CLIENT_ID}\",
\"client_secret\": \"${CLIENT_SECRET}\",
\"audience\": \"${AUDIENCE}\",
\"grant_type\": \"client_credentials\"
}") || true
RESPONSE=$(cat "$BODY_FILE")
rm -f "$BODY_FILE"
if [ "$HTTP_CODE" != "200" ]; then
echo "Error: Auth0 token request failed (HTTP $HTTP_CODE)" >&2
echo "Response: $RESPONSE" >&2
exit 1
fi
# Extract the access token
TOKEN=$(echo "$RESPONSE" | jq -r '.access_token')
if [ -z "$TOKEN" ] || [ "$TOKEN" = "null" ]; then
echo "Error: No access_token in Auth0 response" >&2
echo "$RESPONSE" >&2
exit 1
fi
EXPIRY=$(echo "$RESPONSE" | jq -r '.expires_in')
# Calculate expiration timestamp (seconds since epoch)
EXPIRATION_TIME=$(($(date +%s) + ${EXPIRY:-3600}))
# Output in Google executable-sourced credential format
# https://cloud.google.com/iam/docs/workload-identity-federation-with-other-providers#create_a_credential_configuration
cat <<GCPEOF
{
"version": 1,
"success": true,
"token_type": "urn:ietf:params:oauth:token-type:jwt",
"id_token": "${TOKEN}",
"expiration_time": ${EXPIRATION_TIME}
}
GCPEOF

View File

@@ -355,7 +355,7 @@ class TestCloudEngine:
from app.engines.cloud_engine import CloudEngine from app.engines.cloud_engine import CloudEngine
engine = CloudEngine(key_path="/nonexistent/key.json") engine = CloudEngine(key_path="/nonexistent/key.json")
with pytest.raises(EngineUnavailableError, match="key not found"): with pytest.raises(EngineUnavailableError, match="credential config not found"):
engine._get_client() engine._get_client()
@patch("os.path.isfile", return_value=True) @patch("os.path.isfile", return_value=True)
@@ -414,6 +414,16 @@ class TestCloudEngine:
class TestHybridEngine: class TestHybridEngine:
"""Tests for HybridEngine with monthly Vision API cap."""
def _mock_redis(self, current_count: int = 0) -> MagicMock:
"""Create a mock Redis instance with a configurable counter value."""
mock_r = MagicMock()
mock_r.get.return_value = str(current_count) if current_count else None
mock_pipe = MagicMock()
mock_r.pipeline.return_value = mock_pipe
return mock_r
def test_name_with_fallback(self) -> None: def test_name_with_fallback(self) -> None:
from app.engines.hybrid_engine import HybridEngine from app.engines.hybrid_engine import HybridEngine
@@ -432,13 +442,15 @@ class TestHybridEngine:
engine = HybridEngine(primary=primary) engine = HybridEngine(primary=primary)
assert engine.name == "hybrid(paddleocr+none)" assert engine.name == "hybrid(paddleocr+none)"
# --- Local-primary path (original confidence-based fallback) ---
def test_high_confidence_skips_fallback(self) -> None: def test_high_confidence_skips_fallback(self) -> None:
from app.engines.hybrid_engine import HybridEngine from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine) primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine) fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr" primary.name = "paddleocr"
fallback.name = "cloud" fallback.name = "tesseract"
primary.recognize.return_value = _make_result("VIN123", 0.95, "paddleocr") primary.recognize.return_value = _make_result("VIN123", 0.95, "paddleocr")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6) engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
@@ -447,22 +459,6 @@ class TestHybridEngine:
assert result.engine_name == "paddleocr" assert result.engine_name == "paddleocr"
fallback.recognize.assert_not_called() fallback.recognize.assert_not_called()
def test_low_confidence_triggers_fallback(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "google_vision"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.return_value = _make_result("VIN456", 0.92, "google_vision")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN456"
assert result.engine_name == "google_vision"
fallback.recognize.assert_called_once()
def test_low_confidence_no_fallback_returns_primary(self) -> None: def test_low_confidence_no_fallback_returns_primary(self) -> None:
from app.engines.hybrid_engine import HybridEngine from app.engines.hybrid_engine import HybridEngine
@@ -474,6 +470,57 @@ class TestHybridEngine:
result = engine.recognize(b"img", OcrConfig()) result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123" assert result.text == "VIN123"
def test_exact_threshold_skips_fallback(self) -> None:
"""When confidence == threshold, no fallback needed (>= check)."""
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "tesseract"
primary.recognize.return_value = _make_result("VIN", 0.6, "paddleocr")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
result = engine.recognize(b"img", OcrConfig())
assert result.engine_name == "paddleocr"
fallback.recognize.assert_not_called()
# --- Local-primary with cloud fallback (subject to monthly cap) ---
def test_low_confidence_triggers_cloud_fallback(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "google_vision"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.return_value = _make_result("VIN456", 0.92, "google_vision")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
engine._redis = self._mock_redis(current_count=0)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN456"
assert result.engine_name == "google_vision"
def test_cloud_fallback_skipped_when_limit_reached(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "google_vision"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=100
)
engine._redis = self._mock_redis(current_count=100)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123"
assert result.engine_name == "paddleocr"
fallback.recognize.assert_not_called()
def test_fallback_lower_confidence_returns_primary(self) -> None: def test_fallback_lower_confidence_returns_primary(self) -> None:
from app.engines.hybrid_engine import HybridEngine from app.engines.hybrid_engine import HybridEngine
@@ -485,10 +532,11 @@ class TestHybridEngine:
fallback.recognize.return_value = _make_result("VIN456", 0.3, "google_vision") fallback.recognize.return_value = _make_result("VIN456", 0.3, "google_vision")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6) engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
engine._redis = self._mock_redis(current_count=0)
result = engine.recognize(b"img", OcrConfig()) result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123" assert result.text == "VIN123"
def test_fallback_engine_error_returns_primary(self) -> None: def test_cloud_fallback_error_returns_primary(self) -> None:
from app.engines.hybrid_engine import HybridEngine from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine) primary = MagicMock(spec=OcrEngine)
@@ -499,25 +547,14 @@ class TestHybridEngine:
fallback.recognize.side_effect = EngineUnavailableError("key missing") fallback.recognize.side_effect = EngineUnavailableError("key missing")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6) engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
result = engine.recognize(b"img", OcrConfig()) engine._redis = self._mock_redis(current_count=0)
assert result.text == "VIN123"
def test_fallback_unexpected_error_returns_primary(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "google_vision"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.side_effect = RuntimeError("network error")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
result = engine.recognize(b"img", OcrConfig()) result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123" assert result.text == "VIN123"
@patch("app.engines.hybrid_engine.time") @patch("app.engines.hybrid_engine.time")
def test_fallback_timeout_returns_primary(self, mock_time: MagicMock) -> None: def test_cloud_fallback_timeout_returns_primary(
self, mock_time: MagicMock
) -> None:
from app.engines.hybrid_engine import HybridEngine from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine) primary = MagicMock(spec=OcrEngine)
@@ -525,28 +562,211 @@ class TestHybridEngine:
primary.name = "paddleocr" primary.name = "paddleocr"
fallback.name = "google_vision" fallback.name = "google_vision"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr") primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.return_value = _make_result("VIN456", 0.92, "google_vision") fallback.recognize.return_value = _make_result(
# Simulate 6-second delay (exceeds 5s limit) "VIN456", 0.92, "google_vision"
)
mock_time.monotonic.side_effect = [0.0, 6.0] mock_time.monotonic.side_effect = [0.0, 6.0]
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6) engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
engine._redis = self._mock_redis(current_count=0)
result = engine.recognize(b"img", OcrConfig()) result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123" # timeout -> use primary assert result.text == "VIN123"
def test_exact_threshold_skips_fallback(self) -> None: # --- Cloud-primary path ---
"""When confidence == threshold, no fallback needed (>= check)."""
def test_cloud_primary_returns_result_when_under_limit(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
fallback.name = "paddleocr"
primary.recognize.return_value = _make_result(
"VIN789", 0.95, "google_vision"
)
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=1000
)
engine._redis = self._mock_redis(current_count=500)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN789"
assert result.engine_name == "google_vision"
fallback.recognize.assert_not_called()
def test_cloud_primary_falls_back_when_limit_reached(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
fallback.name = "paddleocr"
fallback.recognize.return_value = _make_result(
"VIN_LOCAL", 0.75, "paddleocr"
)
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=1000
)
engine._redis = self._mock_redis(current_count=1000)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN_LOCAL"
assert result.engine_name == "paddleocr"
primary.recognize.assert_not_called()
def test_cloud_primary_no_fallback_raises_when_limit_reached(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
engine = HybridEngine(
primary=primary, fallback=None, threshold=0.6, monthly_limit=1000
)
engine._redis = self._mock_redis(current_count=1000)
with pytest.raises(EngineProcessingError, match="no fallback"):
engine.recognize(b"img", OcrConfig())
def test_cloud_primary_error_falls_back(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
fallback.name = "paddleocr"
primary.recognize.side_effect = EngineUnavailableError("API down")
fallback.recognize.return_value = _make_result(
"VIN_LOCAL", 0.75, "paddleocr"
)
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=1000
)
engine._redis = self._mock_redis(current_count=500)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN_LOCAL"
assert result.engine_name == "paddleocr"
# --- Redis counter behavior ---
def test_counter_increments_on_successful_cloud_call(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
fallback.name = "paddleocr"
primary.recognize.return_value = _make_result(
"VIN789", 0.95, "google_vision"
)
mock_r = self._mock_redis(current_count=10)
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=1000
)
engine._redis = mock_r
engine.recognize(b"img", OcrConfig())
mock_r.pipeline.assert_called_once()
pipe = mock_r.pipeline.return_value
pipe.incr.assert_called_once()
pipe.expire.assert_called_once()
pipe.execute.assert_called_once()
def test_counter_not_incremented_when_limit_reached(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
fallback.name = "paddleocr"
fallback.recognize.return_value = _make_result(
"VIN_LOCAL", 0.75, "paddleocr"
)
mock_r = self._mock_redis(current_count=1000)
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=1000
)
engine._redis = mock_r
engine.recognize(b"img", OcrConfig())
mock_r.pipeline.assert_not_called()
def test_redis_failure_assumes_limit_not_reached(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "google_vision"
fallback.name = "paddleocr"
primary.recognize.return_value = _make_result(
"VIN789", 0.95, "google_vision"
)
mock_r = MagicMock()
mock_r.get.side_effect = Exception("Redis connection refused")
mock_pipe = MagicMock()
mock_r.pipeline.return_value = mock_pipe
engine = HybridEngine(
primary=primary, fallback=fallback, threshold=0.6, monthly_limit=1000
)
engine._redis = mock_r
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN789"
# --- Non-cloud fallback path (no cap needed) ---
def test_non_cloud_fallback_not_subject_to_cap(self) -> None:
from app.engines.hybrid_engine import HybridEngine from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine) primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine) fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr" primary.name = "paddleocr"
fallback.name = "cloud" fallback.name = "tesseract"
primary.recognize.return_value = _make_result("VIN", 0.6, "paddleocr") primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.return_value = _make_result(
"VIN456", 0.92, "tesseract"
)
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6) engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
result = engine.recognize(b"img", OcrConfig()) result = engine.recognize(b"img", OcrConfig())
assert result.engine_name == "paddleocr" assert result.text == "VIN456"
fallback.recognize.assert_not_called() assert result.engine_name == "tesseract"
@patch("app.engines.hybrid_engine.time")
def test_non_cloud_fallback_timeout_returns_primary(
self, mock_time: MagicMock
) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "tesseract"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.return_value = _make_result(
"VIN456", 0.92, "tesseract"
)
mock_time.monotonic.side_effect = [0.0, 6.0]
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123"
def test_non_cloud_fallback_error_returns_primary(self) -> None:
from app.engines.hybrid_engine import HybridEngine
primary = MagicMock(spec=OcrEngine)
fallback = MagicMock(spec=OcrEngine)
primary.name = "paddleocr"
fallback.name = "tesseract"
primary.recognize.return_value = _make_result("VIN123", 0.3, "paddleocr")
fallback.recognize.side_effect = RuntimeError("crash")
engine = HybridEngine(primary=primary, fallback=fallback, threshold=0.6)
result = engine.recognize(b"img", OcrConfig())
assert result.text == "VIN123"
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -599,6 +819,7 @@ class TestEngineFactory:
mock_settings.ocr_primary_engine = "paddleocr" mock_settings.ocr_primary_engine = "paddleocr"
mock_settings.ocr_fallback_engine = "google_vision" mock_settings.ocr_fallback_engine = "google_vision"
mock_settings.ocr_fallback_threshold = 0.7 mock_settings.ocr_fallback_threshold = 0.7
mock_settings.vision_monthly_limit = 1000
mock_primary = MagicMock(spec=OcrEngine) mock_primary = MagicMock(spec=OcrEngine)
mock_fallback = MagicMock(spec=OcrEngine) mock_fallback = MagicMock(spec=OcrEngine)
mock_create.side_effect = [mock_primary, mock_fallback] mock_create.side_effect = [mock_primary, mock_fallback]

View File

@@ -11,6 +11,8 @@
# - AUTH0_CLIENT_SECRET # - AUTH0_CLIENT_SECRET
# - AUTH0_MANAGEMENT_CLIENT_ID # - AUTH0_MANAGEMENT_CLIENT_ID
# - AUTH0_MANAGEMENT_CLIENT_SECRET # - AUTH0_MANAGEMENT_CLIENT_SECRET
# - AUTH0_OCR_CLIENT_ID
# - AUTH0_OCR_CLIENT_SECRET
# - GOOGLE_MAPS_API_KEY # - GOOGLE_MAPS_API_KEY
# - GOOGLE_MAPS_MAP_ID # - GOOGLE_MAPS_MAP_ID
# - CF_DNS_API_TOKEN # - CF_DNS_API_TOKEN
@@ -30,6 +32,8 @@ SECRET_FILES=(
"auth0-client-secret.txt" "auth0-client-secret.txt"
"auth0-management-client-id.txt" "auth0-management-client-id.txt"
"auth0-management-client-secret.txt" "auth0-management-client-secret.txt"
"auth0-ocr-client-id.txt"
"auth0-ocr-client-secret.txt"
"google-maps-api-key.txt" "google-maps-api-key.txt"
"google-maps-map-id.txt" "google-maps-map-id.txt"
"cloudflare-dns-token.txt" "cloudflare-dns-token.txt"
@@ -99,6 +103,8 @@ inject_secret "POSTGRES_PASSWORD" "postgres-password.txt" || FAILED=1
inject_secret "AUTH0_CLIENT_SECRET" "auth0-client-secret.txt" || FAILED=1 inject_secret "AUTH0_CLIENT_SECRET" "auth0-client-secret.txt" || FAILED=1
inject_secret "AUTH0_MANAGEMENT_CLIENT_ID" "auth0-management-client-id.txt" || FAILED=1 inject_secret "AUTH0_MANAGEMENT_CLIENT_ID" "auth0-management-client-id.txt" || FAILED=1
inject_secret "AUTH0_MANAGEMENT_CLIENT_SECRET" "auth0-management-client-secret.txt" || FAILED=1 inject_secret "AUTH0_MANAGEMENT_CLIENT_SECRET" "auth0-management-client-secret.txt" || FAILED=1
inject_secret "AUTH0_OCR_CLIENT_ID" "auth0-ocr-client-id.txt" || FAILED=1
inject_secret "AUTH0_OCR_CLIENT_SECRET" "auth0-ocr-client-secret.txt" || FAILED=1
inject_secret "GOOGLE_MAPS_API_KEY" "google-maps-api-key.txt" || FAILED=1 inject_secret "GOOGLE_MAPS_API_KEY" "google-maps-api-key.txt" || FAILED=1
inject_secret "GOOGLE_MAPS_MAP_ID" "google-maps-map-id.txt" || FAILED=1 inject_secret "GOOGLE_MAPS_MAP_ID" "google-maps-map-id.txt" || FAILED=1
inject_secret "CF_DNS_API_TOKEN" "cloudflare-dns-token.txt" || FAILED=1 inject_secret "CF_DNS_API_TOKEN" "cloudflare-dns-token.txt" || FAILED=1

View File

@@ -0,0 +1 @@
your-auth0-m2m-client-id

View File

@@ -0,0 +1 @@
your-auth0-m2m-client-secret

View File

@@ -1,18 +0,0 @@
{
"_comment": "Google Vision API service account key for OCR cloud fallback",
"_instructions": [
"1. Create a Google Cloud service account with Vision API access",
"2. Download the JSON key file",
"3. Save it as secrets/app/google-vision-key.json (gitignored)",
"4. Uncomment the volume mount in docker-compose.yml",
"5. Set OCR_FALLBACK_ENGINE=google_vision"
],
"type": "service_account",
"project_id": "your-project-id",
"private_key_id": "",
"private_key": "",
"client_email": "your-sa@your-project-id.iam.gserviceaccount.com",
"client_id": "",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token"
}

View File

@@ -0,0 +1,14 @@
{
"universe_domain": "googleapis.com",
"type": "external_account",
"audience": "//iam.googleapis.com/projects/487954699429/locations/global/workloadIdentityPools/motovaultpro-pool/providers/auth0-provider",
"subject_token_type": "urn:ietf:params:oauth:token-type:jwt",
"token_url": "https://sts.googleapis.com/v1/token",
"credential_source": {
"executable": {
"command": "/app/scripts/fetch-auth0-token.sh",
"timeout_millis": 30000
}
},
"service_account_impersonation_url": "https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/mvp-svc-account@motovaultpro.iam.gserviceaccount.com:generateAccessToken"
}