fix: Build errors and tesseract removal
Some checks failed
Deploy to Staging / Build Images (pull_request) Failing after 4m14s
Deploy to Staging / Deploy to Staging (pull_request) Has been skipped
Deploy to Staging / Verify Staging (pull_request) Has been skipped
Deploy to Staging / Notify Staging Ready (pull_request) Has been skipped
Deploy to Staging / Notify Staging Failure (pull_request) Successful in 8s

This commit is contained in:
Eric Gullickson
2026-02-07 12:12:04 -06:00
parent cf114fad3c
commit b9fe222f12
16 changed files with 35 additions and 238 deletions

View File

@@ -2,7 +2,6 @@
# Uses mirrored base images from Gitea Package Registry
#
# Primary engine: PaddleOCR PP-OCRv4 (models baked into image)
# Backward compat: Tesseract 5.x (optional, via TesseractEngine)
# Cloud fallback: Google Vision (optional, requires API key at runtime)
# Build argument for registry (defaults to Gitea mirrors, falls back to Docker Hub)
@@ -11,21 +10,16 @@ ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors
FROM ${REGISTRY_MIRRORS}/python:3.13-slim
# System dependencies
# - tesseract-ocr/eng: Backward-compatible OCR engine (used by TesseractEngine)
# - libgomp1: OpenMP runtime required by PaddlePaddle
# - libheif1/libheif-dev: HEIF image support (iPhone photos)
# - libglib2.0-0: GLib shared library (OpenCV dependency)
# - libgl1-mesa-glx: OpenGL runtime (OpenCV cv2 dependency, pulled by PaddleX)
# - libmagic1: File type detection
# - curl: Health check endpoint
RUN apt-get update && apt-get install -y --no-install-recommends \
tesseract-ocr \
tesseract-ocr-eng \
libgomp1 \
libheif1 \
libheif-dev \
libglib2.0-0 \
libgl1-mesa-glx \
libmagic1 \
curl \
&& rm -rf /var/lib/apt/lists/*
@@ -33,7 +27,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
# Python dependencies
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Install dependencies. PaddleX (transitive via paddleocr) pulls in the full
# opencv-python which requires libGL.so.1. Force-reinstall the headless
# variant afterwards so the container stays GUI-free.
RUN pip install --no-cache-dir -r requirements.txt \
&& pip install --no-cache-dir --force-reinstall opencv-python-headless
# Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime).
# Models are baked into the image so container starts are fast and