diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 02e1479..692b255 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -98,7 +98,8 @@ "mcp__brave-search__brave_web_search", "mcp__firecrawl__firecrawl_search", "Bash(ssh:*)", - "Bash(git checkout:*)" + "Bash(git checkout:*)", + "Bash(git stash:*)" ], "deny": [] } diff --git a/backend/Dockerfile b/backend/Dockerfile index 701ac0a..94f93a3 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -4,7 +4,7 @@ FROM node:20-alpine AS builder # Install build dependencies -RUN apk add --no-cache dumb-init git +RUN apk add --no-cache dumb-init git curl # Set working directory WORKDIR /app diff --git a/data/vehicle-etl/README.md b/data/vehicle-etl/README.md index edaabb3..d784997 100644 --- a/data/vehicle-etl/README.md +++ b/data/vehicle-etl/README.md @@ -34,8 +34,8 @@ Step 1: Fetch Data from VehAPI python3 vehapi_fetch_snapshot.py --min-year 2020 --max-year 2020 # Full ETL workflow - ./reset_database.sh # Clear old data - python3 vehapi_fetch_snapshot.py # Fetch from API - python3 etl_generate_sql.py --snapshot-path snapshots/ # Generate SQL - ./import_data.sh # Import to Postgres - docker compose exec mvp-redis redis-cli FLUSHALL # Flush Redis Cache for front end + ./reset_database.sh # Clear old data + python3 vehapi_fetch_snapshot.py # Fetch from API + python3 etl_generate_sql.py --snapshot-path snapshots/*.sqlite # Generate SQL + ./import_data.sh # Import to Postgres + docker compose exec mvp-redis redis-cli FLUSHALL # Flush Redis Cache for front end diff --git a/data/vehicle-etl/merge_snapshots.py b/data/vehicle-etl/merge_snapshots.py new file mode 100644 index 0000000..1e14d9e --- /dev/null +++ b/data/vehicle-etl/merge_snapshots.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +Merges two VehAPI snapshot databases into a single consolidated database. +Handles deduplication via PRIMARY KEY constraint. +""" + +from __future__ import annotations + +import sqlite3 +import sys +from pathlib import Path + + +def merge_databases(db1_path: Path, db2_path: Path, output_path: Path) -> dict: + """Merge two snapshot databases into one, deduplicating by PRIMARY KEY.""" + + if output_path.exists(): + output_path.unlink() + print(f"[info] Removed existing output file: {output_path}") + + conn = sqlite3.connect(output_path) + conn.execute("PRAGMA journal_mode=WAL;") + conn.execute("PRAGMA synchronous=NORMAL;") + + # Create target schema (pairs table only) + conn.execute(""" + CREATE TABLE pairs( + year INT, + make TEXT, + model TEXT, + trim TEXT, + engine_display TEXT, + engine_canon TEXT, + engine_bucket TEXT, + trans_display TEXT, + trans_canon TEXT, + trans_bucket TEXT, + PRIMARY KEY(year, make, model, trim, engine_canon, trans_canon) + ) + """) + conn.commit() + + # Attach source databases + conn.execute(f"ATTACH DATABASE '{db1_path}' AS db1") + conn.execute(f"ATTACH DATABASE '{db2_path}' AS db2") + + # Insert from first database + print(f"[info] Inserting records from {db1_path.name}...") + cursor = conn.execute(""" + INSERT OR IGNORE INTO pairs + SELECT year, make, model, trim, engine_display, engine_canon, engine_bucket, + trans_display, trans_canon, trans_bucket + FROM db1.pairs + """) + db1_inserted = cursor.rowcount + conn.commit() + print(f"[info] Inserted {db1_inserted:,} records from {db1_path.name}") + + # Insert from second database (duplicates ignored) + print(f"[info] Inserting records from {db2_path.name}...") + cursor = conn.execute(""" + INSERT OR IGNORE INTO pairs + SELECT year, make, model, trim, engine_display, engine_canon, engine_bucket, + trans_display, trans_canon, trans_bucket + FROM db2.pairs + """) + db2_inserted = cursor.rowcount + conn.commit() + print(f"[info] Inserted {db2_inserted:,} new records from {db2_path.name}") + + # Detach source databases + conn.execute("DETACH DATABASE db1") + conn.execute("DETACH DATABASE db2") + + # Get final stats + total_count = conn.execute("SELECT COUNT(*) FROM pairs").fetchone()[0] + min_year = conn.execute("SELECT MIN(year) FROM pairs").fetchone()[0] + max_year = conn.execute("SELECT MAX(year) FROM pairs").fetchone()[0] + + # Optimize the database + print("[info] Running VACUUM to optimize database...") + conn.execute("VACUUM") + conn.close() + + stats = { + "db1_inserted": db1_inserted, + "db2_inserted": db2_inserted, + "total_records": total_count, + "min_year": min_year, + "max_year": max_year, + "output_path": str(output_path), + } + + return stats + + +def main() -> int: + base_dir = Path(__file__).resolve().parent + snapshots_dir = base_dir / "snapshots" + + db1_path = snapshots_dir / "1980-2007.sqlite" + db2_path = snapshots_dir / "2007-2022.sqlite" + output_path = snapshots_dir / "1980-2022-vehicles.sqlite" + + # Validate source files exist + if not db1_path.exists(): + print(f"[error] Source database not found: {db1_path}", file=sys.stderr) + return 1 + if not db2_path.exists(): + print(f"[error] Source database not found: {db2_path}", file=sys.stderr) + return 1 + + print(f"[info] Merging databases...") + print(f" Source 1: {db1_path}") + print(f" Source 2: {db2_path}") + print(f" Output: {output_path}") + print() + + stats = merge_databases(db1_path, db2_path, output_path) + + print() + print(f"[done] Merge complete!") + print(f" Total records: {stats['total_records']:,}") + print(f" Year range: {stats['min_year']} - {stats['max_year']}") + print(f" Output: {stats['output_path']}") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docker-compose.yml b/docker-compose.yml index 6347551..0e46b9a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -143,7 +143,7 @@ services: # Database Services - Application PostgreSQL mvp-postgres: - image: postgres:15-alpine + image: postgres:18-alpine container_name: mvp-postgres restart: unless-stopped environment: @@ -168,7 +168,7 @@ services: # Database Services - Application Redis mvp-redis: - image: redis:7-alpine + image: redis:8.4-alpine container_name: mvp-redis restart: unless-stopped command: redis-server --appendonly yes diff --git a/docs/PROMPTS.md b/docs/PROMPTS.md index 6a88b1c..7fa3e50 100644 --- a/docs/PROMPTS.md +++ b/docs/PROMPTS.md @@ -20,16 +20,16 @@ Your task is to create a plan that can be dispatched to a seprate set of AI agen *** ROLE *** -You are a senior web deveoper specializing in nodejs, typescript along with CSS and HTML. - +You are a senior DevOps SRE in charge of improving the deployment of htis app. *** ACTION *** -- You need to create a plan to upgrade the node packages to the latest versions that are compatible with each other. Some of the packages are very old and while we might not get to the very latest we need to get as close as possible for security reasons. -- Use context7 mcp to find the latest versions and compatibilities. +- You need to add a deployment step where the vehicle-etl process that's currentl configured to be manual, is executed during the gitlab deployment. +- Research the directory @data/vehicle-etl/ to understand the process. +- The source database will be @data/vehicle-etl/snapshots/vehicle-drop-down.sqlite - Read README.md CLAUDE.md and AI-INDEX.md to understand this code repository in the context of this change. *** CONTEXT *** -- This is a modern web app for managing a vehicle fleet. It has both a desktop and mobile versions of the site that both need to maintain feature parity. The UX will be different between mobile and desktop due to resolution differences but that's it. +- This is a modern web app for managing a vehicle fleet. It has both a desktop and mobile versions of the site that both need to maintain feature parity. It's currently deployed via docker compose but in the future will be deployed via k8s. *** EXECUTE *** - Create a plan that can be tasked to sub agents to explore and find dependancies between all the packages. Make sure nothing breaks. Use context7, brave search and firecrawl MCP's extensively. Make no assumptions. \ No newline at end of file +- Update the CI plan to run the import process on deployment. \ No newline at end of file diff --git a/frontend/Dockerfile b/frontend/Dockerfile index dd0c2bb..9a041d6 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -2,7 +2,7 @@ # Stage 1: Base with dependencies FROM node:20-alpine AS base -RUN apk add --no-cache dumb-init +RUN apk add --no-cache dumb-init curl WORKDIR /app COPY package*.json ./