Update Dockerfile with curl

This commit is contained in:
Eric Gullickson
2025-12-20 09:50:00 -06:00
parent 2174a1b1e3
commit b9801d5f3b
7 changed files with 148 additions and 16 deletions

View File

@@ -98,7 +98,8 @@
"mcp__brave-search__brave_web_search",
"mcp__firecrawl__firecrawl_search",
"Bash(ssh:*)",
"Bash(git checkout:*)"
"Bash(git checkout:*)",
"Bash(git stash:*)"
],
"deny": []
}

View File

@@ -4,7 +4,7 @@
FROM node:20-alpine AS builder
# Install build dependencies
RUN apk add --no-cache dumb-init git
RUN apk add --no-cache dumb-init git curl
# Set working directory
WORKDIR /app

View File

@@ -34,8 +34,8 @@ Step 1: Fetch Data from VehAPI
python3 vehapi_fetch_snapshot.py --min-year 2020 --max-year 2020
# Full ETL workflow
./reset_database.sh # Clear old data
python3 vehapi_fetch_snapshot.py # Fetch from API
python3 etl_generate_sql.py --snapshot-path snapshots/<date> # Generate SQL
./import_data.sh # Import to Postgres
docker compose exec mvp-redis redis-cli FLUSHALL # Flush Redis Cache for front end
./reset_database.sh # Clear old data
python3 vehapi_fetch_snapshot.py # Fetch from API
python3 etl_generate_sql.py --snapshot-path snapshots/*.sqlite # Generate SQL
./import_data.sh # Import to Postgres
docker compose exec mvp-redis redis-cli FLUSHALL # Flush Redis Cache for front end

View File

@@ -0,0 +1,131 @@
#!/usr/bin/env python3
"""
Merges two VehAPI snapshot databases into a single consolidated database.
Handles deduplication via PRIMARY KEY constraint.
"""
from __future__ import annotations
import sqlite3
import sys
from pathlib import Path
def merge_databases(db1_path: Path, db2_path: Path, output_path: Path) -> dict:
"""Merge two snapshot databases into one, deduplicating by PRIMARY KEY."""
if output_path.exists():
output_path.unlink()
print(f"[info] Removed existing output file: {output_path}")
conn = sqlite3.connect(output_path)
conn.execute("PRAGMA journal_mode=WAL;")
conn.execute("PRAGMA synchronous=NORMAL;")
# Create target schema (pairs table only)
conn.execute("""
CREATE TABLE pairs(
year INT,
make TEXT,
model TEXT,
trim TEXT,
engine_display TEXT,
engine_canon TEXT,
engine_bucket TEXT,
trans_display TEXT,
trans_canon TEXT,
trans_bucket TEXT,
PRIMARY KEY(year, make, model, trim, engine_canon, trans_canon)
)
""")
conn.commit()
# Attach source databases
conn.execute(f"ATTACH DATABASE '{db1_path}' AS db1")
conn.execute(f"ATTACH DATABASE '{db2_path}' AS db2")
# Insert from first database
print(f"[info] Inserting records from {db1_path.name}...")
cursor = conn.execute("""
INSERT OR IGNORE INTO pairs
SELECT year, make, model, trim, engine_display, engine_canon, engine_bucket,
trans_display, trans_canon, trans_bucket
FROM db1.pairs
""")
db1_inserted = cursor.rowcount
conn.commit()
print(f"[info] Inserted {db1_inserted:,} records from {db1_path.name}")
# Insert from second database (duplicates ignored)
print(f"[info] Inserting records from {db2_path.name}...")
cursor = conn.execute("""
INSERT OR IGNORE INTO pairs
SELECT year, make, model, trim, engine_display, engine_canon, engine_bucket,
trans_display, trans_canon, trans_bucket
FROM db2.pairs
""")
db2_inserted = cursor.rowcount
conn.commit()
print(f"[info] Inserted {db2_inserted:,} new records from {db2_path.name}")
# Detach source databases
conn.execute("DETACH DATABASE db1")
conn.execute("DETACH DATABASE db2")
# Get final stats
total_count = conn.execute("SELECT COUNT(*) FROM pairs").fetchone()[0]
min_year = conn.execute("SELECT MIN(year) FROM pairs").fetchone()[0]
max_year = conn.execute("SELECT MAX(year) FROM pairs").fetchone()[0]
# Optimize the database
print("[info] Running VACUUM to optimize database...")
conn.execute("VACUUM")
conn.close()
stats = {
"db1_inserted": db1_inserted,
"db2_inserted": db2_inserted,
"total_records": total_count,
"min_year": min_year,
"max_year": max_year,
"output_path": str(output_path),
}
return stats
def main() -> int:
base_dir = Path(__file__).resolve().parent
snapshots_dir = base_dir / "snapshots"
db1_path = snapshots_dir / "1980-2007.sqlite"
db2_path = snapshots_dir / "2007-2022.sqlite"
output_path = snapshots_dir / "1980-2022-vehicles.sqlite"
# Validate source files exist
if not db1_path.exists():
print(f"[error] Source database not found: {db1_path}", file=sys.stderr)
return 1
if not db2_path.exists():
print(f"[error] Source database not found: {db2_path}", file=sys.stderr)
return 1
print(f"[info] Merging databases...")
print(f" Source 1: {db1_path}")
print(f" Source 2: {db2_path}")
print(f" Output: {output_path}")
print()
stats = merge_databases(db1_path, db2_path, output_path)
print()
print(f"[done] Merge complete!")
print(f" Total records: {stats['total_records']:,}")
print(f" Year range: {stats['min_year']} - {stats['max_year']}")
print(f" Output: {stats['output_path']}")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -143,7 +143,7 @@ services:
# Database Services - Application PostgreSQL
mvp-postgres:
image: postgres:15-alpine
image: postgres:18-alpine
container_name: mvp-postgres
restart: unless-stopped
environment:
@@ -168,7 +168,7 @@ services:
# Database Services - Application Redis
mvp-redis:
image: redis:7-alpine
image: redis:8.4-alpine
container_name: mvp-redis
restart: unless-stopped
command: redis-server --appendonly yes

View File

@@ -20,16 +20,16 @@ Your task is to create a plan that can be dispatched to a seprate set of AI agen
*** ROLE ***
You are a senior web deveoper specializing in nodejs, typescript along with CSS and HTML.
You are a senior DevOps SRE in charge of improving the deployment of htis app.
*** ACTION ***
- You need to create a plan to upgrade the node packages to the latest versions that are compatible with each other. Some of the packages are very old and while we might not get to the very latest we need to get as close as possible for security reasons.
- Use context7 mcp to find the latest versions and compatibilities.
- You need to add a deployment step where the vehicle-etl process that's currentl configured to be manual, is executed during the gitlab deployment.
- Research the directory @data/vehicle-etl/ to understand the process.
- The source database will be @data/vehicle-etl/snapshots/vehicle-drop-down.sqlite
- Read README.md CLAUDE.md and AI-INDEX.md to understand this code repository in the context of this change.
*** CONTEXT ***
- This is a modern web app for managing a vehicle fleet. It has both a desktop and mobile versions of the site that both need to maintain feature parity. The UX will be different between mobile and desktop due to resolution differences but that's it.
- This is a modern web app for managing a vehicle fleet. It has both a desktop and mobile versions of the site that both need to maintain feature parity. It's currently deployed via docker compose but in the future will be deployed via k8s.
*** EXECUTE ***
Create a plan that can be tasked to sub agents to explore and find dependancies between all the packages. Make sure nothing breaks. Use context7, brave search and firecrawl MCP's extensively. Make no assumptions.
- Update the CI plan to run the import process on deployment.

View File

@@ -2,7 +2,7 @@
# Stage 1: Base with dependencies
FROM node:20-alpine AS base
RUN apk add --no-cache dumb-init
RUN apk add --no-cache dumb-init curl
WORKDIR /app
COPY package*.json ./