Introduce pluggable OcrEngine ABC with PaddleOCR PP-OCRv4 as primary engine and Tesseract wrapper for backward compatibility. Engine factory reads OCR_PRIMARY_ENGINE config to instantiate the correct engine. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
27 lines
921 B
Python
27 lines
921 B
Python
"""OCR Service Configuration."""
|
|
import os
|
|
|
|
|
|
class Settings:
|
|
"""Application settings loaded from environment variables."""
|
|
|
|
def __init__(self) -> None:
|
|
self.log_level: str = os.getenv("LOG_LEVEL", "info")
|
|
self.host: str = os.getenv("HOST", "0.0.0.0")
|
|
self.port: int = int(os.getenv("PORT", "8000"))
|
|
self.tesseract_cmd: str = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract")
|
|
|
|
# OCR engine configuration
|
|
self.ocr_primary_engine: str = os.getenv("OCR_PRIMARY_ENGINE", "paddleocr")
|
|
self.ocr_confidence_threshold: float = float(
|
|
os.getenv("OCR_CONFIDENCE_THRESHOLD", "0.6")
|
|
)
|
|
|
|
# Redis configuration for job queue
|
|
self.redis_host: str = os.getenv("REDIS_HOST", "mvp-redis")
|
|
self.redis_port: int = int(os.getenv("REDIS_PORT", "6379"))
|
|
self.redis_db: int = int(os.getenv("REDIS_DB", "1"))
|
|
|
|
|
|
settings = Settings()
|