Update Dockerfile with curl

2025-12-20 09:50:00 -06:00
parent 2174a1b1e3
commit b9801d5f3b
7 changed files with 148 additions and 16 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -98,7 +98,8 @@
      "mcp__brave-search__brave_web_search",
      "mcp__firecrawl__firecrawl_search",
      "Bash(ssh:*)",
-      "Bash(git checkout:*)"
+      "Bash(git checkout:*)",
+      "Bash(git stash:*)"
    ],
    "deny": []
  }
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -4,7 +4,7 @@
 FROM node:20-alpine AS builder

 # Install build dependencies
-RUN apk add --no-cache dumb-init git
+RUN apk add --no-cache dumb-init git curl

 # Set working directory
 WORKDIR /app
--- a/data/vehicle-etl/README.md
+++ b/data/vehicle-etl/README.md
@@ -34,8 +34,8 @@ Step 1: Fetch Data from VehAPI
  python3 vehapi_fetch_snapshot.py --min-year 2020 --max-year 2020

  # Full ETL workflow
-  ./reset_database.sh                               # Clear old data
-  python3 vehapi_fetch_snapshot.py                  # Fetch from API
-  python3 etl_generate_sql.py --snapshot-path snapshots/<date>                       # Generate SQL
-  ./import_data.sh                                  # Import to Postgres
-  docker compose exec mvp-redis redis-cli FLUSHALL  # Flush Redis Cache for front end
+  ./reset_database.sh                                                         # Clear old data
+  python3 vehapi_fetch_snapshot.py                                            # Fetch from API
+  python3 etl_generate_sql.py --snapshot-path snapshots/*.sqlite              # Generate SQL
+  ./import_data.sh                                                            # Import to Postgres
+  docker compose exec mvp-redis redis-cli FLUSHALL                            # Flush Redis Cache for front end
--- a/data/vehicle-etl/merge_snapshots.py
+++ b/data/vehicle-etl/merge_snapshots.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""
+Merges two VehAPI snapshot databases into a single consolidated database.
+Handles deduplication via PRIMARY KEY constraint.
+"""
+
+from __future__ import annotations
+
+import sqlite3
+import sys
+from pathlib import Path
+
+
+def merge_databases(db1_path: Path, db2_path: Path, output_path: Path) -> dict:
+    """Merge two snapshot databases into one, deduplicating by PRIMARY KEY."""
+
+    if output_path.exists():
+        output_path.unlink()
+        print(f"[info] Removed existing output file: {output_path}")
+
+    conn = sqlite3.connect(output_path)
+    conn.execute("PRAGMA journal_mode=WAL;")
+    conn.execute("PRAGMA synchronous=NORMAL;")
+
+    # Create target schema (pairs table only)
+    conn.execute("""
+        CREATE TABLE pairs(
+            year INT,
+            make TEXT,
+            model TEXT,
+            trim TEXT,
+            engine_display TEXT,
+            engine_canon TEXT,
+            engine_bucket TEXT,
+            trans_display TEXT,
+            trans_canon TEXT,
+            trans_bucket TEXT,
+            PRIMARY KEY(year, make, model, trim, engine_canon, trans_canon)
+        )
+    """)
+    conn.commit()
+
+    # Attach source databases
+    conn.execute(f"ATTACH DATABASE '{db1_path}' AS db1")
+    conn.execute(f"ATTACH DATABASE '{db2_path}' AS db2")
+
+    # Insert from first database
+    print(f"[info] Inserting records from {db1_path.name}...")
+    cursor = conn.execute("""
+        INSERT OR IGNORE INTO pairs
+        SELECT year, make, model, trim, engine_display, engine_canon, engine_bucket,
+               trans_display, trans_canon, trans_bucket
+        FROM db1.pairs
+    """)
+    db1_inserted = cursor.rowcount
+    conn.commit()
+    print(f"[info] Inserted {db1_inserted:,} records from {db1_path.name}")
+
+    # Insert from second database (duplicates ignored)
+    print(f"[info] Inserting records from {db2_path.name}...")
+    cursor = conn.execute("""
+        INSERT OR IGNORE INTO pairs
+        SELECT year, make, model, trim, engine_display, engine_canon, engine_bucket,
+               trans_display, trans_canon, trans_bucket
+        FROM db2.pairs
+    """)
+    db2_inserted = cursor.rowcount
+    conn.commit()
+    print(f"[info] Inserted {db2_inserted:,} new records from {db2_path.name}")
+
+    # Detach source databases
+    conn.execute("DETACH DATABASE db1")
+    conn.execute("DETACH DATABASE db2")
+
+    # Get final stats
+    total_count = conn.execute("SELECT COUNT(*) FROM pairs").fetchone()[0]
+    min_year = conn.execute("SELECT MIN(year) FROM pairs").fetchone()[0]
+    max_year = conn.execute("SELECT MAX(year) FROM pairs").fetchone()[0]
+
+    # Optimize the database
+    print("[info] Running VACUUM to optimize database...")
+    conn.execute("VACUUM")
+    conn.close()
+
+    stats = {
+        "db1_inserted": db1_inserted,
+        "db2_inserted": db2_inserted,
+        "total_records": total_count,
+        "min_year": min_year,
+        "max_year": max_year,
+        "output_path": str(output_path),
+    }
+
+    return stats
+
+
+def main() -> int:
+    base_dir = Path(__file__).resolve().parent
+    snapshots_dir = base_dir / "snapshots"
+
+    db1_path = snapshots_dir / "1980-2007.sqlite"
+    db2_path = snapshots_dir / "2007-2022.sqlite"
+    output_path = snapshots_dir / "1980-2022-vehicles.sqlite"
+
+    # Validate source files exist
+    if not db1_path.exists():
+        print(f"[error] Source database not found: {db1_path}", file=sys.stderr)
+        return 1
+    if not db2_path.exists():
+        print(f"[error] Source database not found: {db2_path}", file=sys.stderr)
+        return 1
+
+    print(f"[info] Merging databases...")
+    print(f"       Source 1: {db1_path}")
+    print(f"       Source 2: {db2_path}")
+    print(f"       Output:   {output_path}")
+    print()
+
+    stats = merge_databases(db1_path, db2_path, output_path)
+
+    print()
+    print(f"[done] Merge complete!")
+    print(f"       Total records: {stats['total_records']:,}")
+    print(f"       Year range: {stats['min_year']} - {stats['max_year']}")
+    print(f"       Output: {stats['output_path']}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -143,7 +143,7 @@ services:

  # Database Services - Application PostgreSQL
  mvp-postgres:
-    image: postgres:15-alpine
+    image: postgres:18-alpine
    container_name: mvp-postgres
    restart: unless-stopped
    environment:
@@ -168,7 +168,7 @@ services:

  # Database Services - Application Redis
  mvp-redis:
-    image: redis:7-alpine
+    image: redis:8.4-alpine
    container_name: mvp-redis
    restart: unless-stopped
    command: redis-server --appendonly yes
--- a/docs/PROMPTS.md
+++ b/docs/PROMPTS.md
@@ -20,16 +20,16 @@ Your task is to create a plan that can be dispatched to a seprate set of AI agen


 *** ROLE ***
-You are a senior web deveoper specializing in nodejs, typescript along with CSS and HTML. 
-
+You are a senior DevOps SRE in charge of improving the deployment of htis app. 

 *** ACTION ***
- You need to create a plan to upgrade the node packages to the latest versions that are compatible with each other. Some of the packages are very old and while we might not get to the very latest we need to get as close as possible for security reasons.
- Use context7 mcp to find the latest versions and compatibilities. 
+- You need to add a deployment step where the vehicle-etl process that's currentl configured to be manual, is executed during the gitlab deployment. 
+- Research the directory @data/vehicle-etl/ to understand the process. 
+- The source database will be @data/vehicle-etl/snapshots/vehicle-drop-down.sqlite
 - Read README.md CLAUDE.md and AI-INDEX.md to understand this code repository in the context of this change.  

 *** CONTEXT ***
- This is a modern web app for managing a vehicle fleet. It has both a desktop and mobile versions of the site that both need to maintain feature parity. The UX will be different between mobile and desktop due to resolution differences but that's it. 
+- This is a modern web app for managing a vehicle fleet. It has both a desktop and mobile versions of the site that both need to maintain feature parity. It's currently deployed via docker compose but in the future will be deployed via k8s. 

 *** EXECUTE ***
- Create a plan that can be tasked to sub agents to explore and find dependancies between all the packages. Make sure nothing breaks. Use context7, brave search and firecrawl MCP's extensively. Make no assumptions.
+- Update the CI plan to run the import process on deployment. 
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@@ -2,7 +2,7 @@

 # Stage 1: Base with dependencies  
 FROM node:20-alpine AS base
-RUN apk add --no-cache dumb-init
+RUN apk add --no-cache dumb-init curl
 WORKDIR /app
 COPY package*.json ./