FROM python:3.11-slim # Set working directory WORKDIR /app # Install system dependencies and ODBC drivers RUN apt-get update && apt-get install -y \ curl \ apt-transport-https \ gnupg2 \ unixodbc-dev \ unixodbc \ && curl -sSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /usr/share/keyrings/microsoft-prod.gpg \ && echo "deb [arch=amd64,arm64,armhf signed-by=/usr/share/keyrings/microsoft-prod.gpg] https://packages.microsoft.com/debian/12/prod bookworm main" > /etc/apt/sources.list.d/mssql-release.list \ && apt-get update \ && ACCEPT_EULA=Y apt-get install -y msodbcsql17 mssql-tools \ && rm -rf /var/lib/apt/lists/* # Add SQL Server tools to PATH ENV PATH="$PATH:/opt/mssql-tools/bin" # Copy requirements and install Python dependencies COPY requirements-etl.txt . RUN pip install --no-cache-dir -r requirements-etl.txt # Copy ETL code COPY etl/ ./etl/ # Copy make configuration for filtering COPY makes.json /app/makes.json # Create logs and data directories RUN mkdir -p /app/logs /app/data # Set Python path ENV PYTHONPATH=/app # Expose port for health check EXPOSE 8001 # Health check HEALTHCHECK --interval=60s --timeout=10s --start-period=60s --retries=3 \ CMD python -c "import sys; import os; sys.path.append('/app'); from etl.connections import test_connections; exit(0 if test_connections() else 1)" || exit 1 # Run ETL scheduler CMD ["python", "-m", "etl.main"]