From 3852ec9f7158604601d9a02eff3f4fe80be67a73 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sun, 14 Dec 2025 12:00:42 -0600 Subject: [PATCH] Documentation Updates --- .claude/settings.local.json | 4 +- STATION-CHANGES.md | 195 ++++++++++ data/make-model-import/etl_generate_sql.py | 2 +- AUDIT.md => docs/AUDIT.md | 0 docs/PROMPTS.md | 22 +- .../changes/BULK-DELETE-ENDPOINT-DOCS.md | 0 .../changes/SECURITY-FIXES.md | 0 scripts/README.md | 240 ------------ scripts/generate-feature-capsule.sh | 95 ----- scripts/load_vehicle_data.py | 342 ------------------ scripts/run-data-load.sh | 30 -- 11 files changed, 217 insertions(+), 713 deletions(-) create mode 100644 STATION-CHANGES.md rename AUDIT.md => docs/AUDIT.md (100%) rename BULK-DELETE-ENDPOINT-DOCS.md => docs/changes/BULK-DELETE-ENDPOINT-DOCS.md (100%) rename SECURITY-FIXES.md => docs/changes/SECURITY-FIXES.md (100%) delete mode 100644 scripts/README.md delete mode 100755 scripts/generate-feature-capsule.sh delete mode 100755 scripts/load_vehicle_data.py delete mode 100644 scripts/run-data-load.sh diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 5575cec..a0e7116 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -93,7 +93,9 @@ "Bash(for f in frontend/src/features/stations/types/stations.types.ts frontend/src/features/stations/api/stations.api.ts frontend/src/features/stations/hooks/useStationsSearch.ts)", "Bash(head:*)", "Bash(tail:*)", - "mcp__playwright__browser_close" + "mcp__playwright__browser_close", + "Bash(wc:*)", + "mcp__brave-search__brave_web_search" ], "deny": [] } diff --git a/STATION-CHANGES.md b/STATION-CHANGES.md new file mode 100644 index 0000000..a2996a8 --- /dev/null +++ b/STATION-CHANGES.md @@ -0,0 +1,195 @@ +# Stations (Gas/Fuel) Feature — Dispatchable Change Plan + +This document is written as an execution plan that can be handed to multiple AI agents to implement in parallel. + +## Repo Constraints (Must Follow) + +- Docker-first workflow (production builds): validate changes via `make rebuild` and container logs. +- Mobile + Desktop requirement: every UI change must be validated on both `frontend/src/features/stations/pages/StationsPage.tsx` (desktop) and `frontend/src/features/stations/mobile/StationsMobileScreen.tsx` (mobile). +- Never expose the Google Maps API key to the browser or logs. + +## Scope + +1. Fix broken station photo rendering on stations UI after the “hide Google API key” change. +2. Add navigation links for saved/favorite stations: + - “Navigate in Google” (Google Maps) + - “Navigate in Apple Maps” (Apple Maps) + - “Navigate in Waze” (Waze) + +## Bug: Station Photos Not Displaying + +### Current Implementation (What Exists Today) + +- Frontend cards render an `` via MUI `CardMedia` when `station.photoReference` is present: + - `frontend/src/features/stations/components/StationCard.tsx` + - URL generation: `frontend/src/features/stations/utils/photo-utils.ts` → `/api/stations/photo/:reference` +- Backend exposes a proxy endpoint that fetches the Google Places photo (server-side, using the secret key): + - Route: `GET /api/stations/photo/:reference` + - `backend/src/features/stations/api/stations.routes.ts` + - `backend/src/features/stations/api/stations.controller.ts` + - Google client: `backend/src/features/stations/external/google-maps/google-maps.client.ts` (`fetchPhoto`) + +### Likely Root Cause (Agents Must Confirm) + +The photo endpoint is protected by `fastify.authenticate`, but `` requests do not include the Authorization header. This results in `401 Unauthorized` responses and broken images. + +Second thing to confirm while debugging: +- Verify what `Station.photoReference` contains at runtime: + - expected: Google `photo_reference` token + - risk: code/docs mismatch where `photoReference` became a URL like `/api/stations/photo/{reference}`, causing double-encoding by `getStationPhotoUrl()`. + +### Repro Checklist (Fast Confirmation) + +- Open stations page and observe broken images in browser devtools Network: + - `GET /api/stations/photo/...` should show `401` if auth-header issue is the cause. +- Confirm backend logs show JWT auth failure for photo requests. + +## Decision: Image Strategy (Selected) + +Selected: **Option A1** (keep images; authenticated blob fetch in frontend; photo endpoint remains JWT-protected). + +### Option A (Keep Images): Fix Auth Mismatch Without Exposing API Key + +#### Option A1 (Recommended): Fetch Photo as Blob via Authenticated XHR + +Why: Keeps `/api/stations/photo/:reference` protected (prevents public key abuse), avoids putting JWT in query params, and avoids exposing the Google API key. + +Implementation outline: +- Frontend: replace direct `` usage with an authenticated fetch that includes the JWT (via existing Axios `apiClient` interceptors), then render via `blob:` object URL. + - Add a small component like `StationPhoto` used by `StationCard`: + - `apiClient.get('/stations/photo/:reference', { responseType: 'blob' })` + - `URL.createObjectURL(blob)` for display + - `URL.revokeObjectURL` cleanup on unmount / reference change + - graceful fallback (hide image) on 401/500 +- Backend: no route auth changes required. + +Tradeoffs: +- Slightly more frontend code, but minimal security risk. +- Must ensure caching behavior is acceptable (browser cache won’t cache `blob:` URLs; rely on backend caching headers + client-side memoization). + +#### Option A2 (Simplest Code, Higher Risk): Make Photo Endpoint Public + +Why: Restores `` behavior with minimal frontend work. + +Implementation outline: +- Backend: remove `preHandler: [fastify.authenticate]` from `/stations/photo/:reference`. +- Add lightweight protections to reduce abuse (choose as many as feasible without adding heavy deps): + - strict input validation (length/charset) for `reference` + - low maxWidth clamp and no arbitrary URL fetching + - maintain `Cache-Control` header (already present) + - optionally add server-side rate limit (only if repo already uses a rate-limit plugin; avoid introducing new infra unless necessary) + +Tradeoffs: +- Anyone can hit `/api/stations/photo/:reference` and spend your Google quota. + +### Option B (Remove Images): Simplify Cards + +Why: If image delivery adds too much complexity or risk, remove images from station cards. + +Implementation outline: +- Frontend: remove `CardMedia` photo block from `StationCard` and any other station photo rendering. +- Leave `photoReference` in API/types untouched for now (or remove later as a cleanup task, separate PR). +- Update any tests that assert on image presence. + +Tradeoffs: +- Reduced UX polish, but simplest and most robust. + +## Feature: Navigation Links on Saved/Favorite Stations + +### UX Requirements + +- On saved station UI (desktop + mobile), provide 3 explicit navigation options: + - Google Maps + - Apple Maps + - Waze +- “Saved/favorite” is interpreted as “stations in the Saved list”; favorites are a subset. + +### URL Construction (Preferred) + +Use coordinates if available; fall back to address query if not. + +- Google Maps: + - Preferred: `https://www.google.com/maps/dir/?api=1&destination=LAT,LNG&destination_place_id=PLACE_ID` + - Fallback: `https://www.google.com/maps/search/?api=1&query=ENCODED_QUERY` +- Apple Maps: + - Preferred: `https://maps.apple.com/?daddr=LAT,LNG` + - Fallback: `https://maps.apple.com/?q=ENCODED_QUERY` +- Waze: + - Preferred: `https://waze.com/ul?ll=LAT,LNG&navigate=yes` + - Fallback: `https://waze.com/ul?q=ENCODED_QUERY&navigate=yes` + +Important: some saved stations may have `latitude/longitude = 0` if cache miss; treat `(0,0)` as “no coordinates”. + +### UI Placement Recommendation + +- Desktop saved list: add a “Navigate” icon button that opens a small menu with the 3 links (cleaner than inline links inside `ListItemText`). + - File: `frontend/src/features/stations/components/SavedStationsList.tsx` +- Mobile bottom sheet (station details): add a “Navigate” section with the same 3 links (buttons or list items). + - File: `frontend/src/features/stations/mobile/StationsMobileScreen.tsx` + +## Work Breakdown for Multiple Agents + +### Agent 1 — Confirm Root Cause + Backend Adjustments (If Needed) + +Deliverables: +- Confirm whether photo requests return `401` due to missing Authorization. +- Confirm whether `photoReference` is a raw reference token vs a URL string. +- Implement backend changes only if Option A2 is chosen. + +Files likely touched (Option A2 only): +- `backend/src/features/stations/api/stations.routes.ts` (remove auth preHandler on photo route) +- `backend/src/features/stations/api/stations.controller.ts` (add stricter validation; keep cache headers) +- `backend/src/features/stations/docs/API.md` (update auth expectations for photo endpoint) + +### Agent 2 — Frontend Photo Fix (Option A1) OR Photo Removal (Option B) + +Deliverables: +- Option A1: implement authenticated blob photo loading for station cards. +- Option B: remove station photos from cards cleanly (no layout regressions). + +Files likely touched: +- `frontend/src/features/stations/components/StationCard.tsx` +- Option A1: + - Add `frontend/src/features/stations/components/StationPhoto.tsx` (or similar) + - Potentially update `frontend/src/features/stations/utils/photo-utils.ts` + - Add unit tests under `frontend/src/features/stations/__tests__/` + +### Agent 3 — Navigation Links for Saved Stations (Desktop + Mobile) + +Deliverables: +- Create a single URL-builder utility with tests. +- Add a “Navigate” menu/section in saved stations UI (desktop + mobile). + +Files likely touched: +- `frontend/src/features/stations/utils/` (new `navigation-links.ts`) +- `frontend/src/features/stations/components/SavedStationsList.tsx` +- `frontend/src/features/stations/mobile/StationsMobileScreen.tsx` +- Optional: reuse in `frontend/src/features/stations/components/StationCard.tsx` (only if product wants it outside Saved) + +### Agent 4 — Tests + QA Pass (Update What Breaks) + +Deliverables: +- Update/extend tests to cover: + - navigation menu/links present for saved stations + - photo rendering behavior per chosen option +- Ensure both desktop and mobile flows still pass basic E2E checks. + +Files likely touched: +- `frontend/cypress/e2e/stations.cy.ts` +- `frontend/src/features/stations/__tests__/components/StationCard.test.tsx` +- New tests for `navigation-links.ts` + +## Acceptance Criteria + +- Station photos render on station cards via Option A1 without exposing Google API key (no `401` responses for photo requests in Network). +- Saved stations show 3 navigation options (Google, Apple, Waze) on both desktop and mobile. +- No lint/test regressions; container build succeeds. + +## Validation (Container-First) + +- Rebuild and watch logs: `make rebuild` then `make logs` +- Optional focused logs: `make logs-frontend` and `make logs-backend` +- Run feature tests where available (prefer container exec): + - Backend: `docker compose exec mvp-backend npm test -- features/stations` + - Frontend: `docker compose exec mvp-frontend npm test -- stations` + - E2E: `docker compose exec mvp-frontend npm run e2e` diff --git a/data/make-model-import/etl_generate_sql.py b/data/make-model-import/etl_generate_sql.py index d4d76cd..f911145 100755 --- a/data/make-model-import/etl_generate_sql.py +++ b/data/make-model-import/etl_generate_sql.py @@ -676,7 +676,7 @@ class VehicleSQLGenerator: print(f" - {self.vehicles_sql_file}") print(f" - output/stats.txt") print("\nNext step: Import SQL files into database") - print(" cat output/*.sql | docker exec -i mvp-postgres psql -U postgres -d motovaultpro") + print(" ./import_data.sh") except Exception as e: print(f"\n❌ ETL Pipeline Failed: {e}") diff --git a/AUDIT.md b/docs/AUDIT.md similarity index 100% rename from AUDIT.md rename to docs/AUDIT.md diff --git a/docs/PROMPTS.md b/docs/PROMPTS.md index aca8b23..b617ff8 100644 --- a/docs/PROMPTS.md +++ b/docs/PROMPTS.md @@ -1,6 +1,20 @@ *** CRITICAL ***: Never read this file. Stop and move on. -There are some bugs in the work that was done. -- When you set the drop down for 93 ethonal and 93 ethonal-free, it doesn't persist. -- The search result card for saved gas stations doesn't show if it has 93 -- The delete button doesn't immedately delete the save station. You have to navigate off and back for it to remove. \ No newline at end of file +Read README.md CLAUDE.md and AI-INDEX.md to understand this code repository. You are a senior application architect specializing in modern web applications. +Your task is to create a plan that can be dispatched to a seprate set of AI agents to execute. Write this plan out in STATION-CHANGES.md + +*** FEATURE TO FOCUS ON *** +The gas / fuel stations functionality + +*** BUGS TO FIX *** +- There was a change done to hide the Google API key in displaying the images for fuel stations. +- This broke the display of images on the gas/fuel station screen. Plan the fix for this. +- If this addes too much complexity. Plan to remove the image from the gas station cards. +- Prompt the user for which plan to implement. + +*** CHANGES TO IMPLEMENT *** +- Requirment. Add links on saved/favorite stations +- Links should be +- - "Navigate in Google" with a link to Google Maps +- - "Navigate in Apple Maps" with a link to Apple Maps +- - "Navigate in Wave" with a link to Waze \ No newline at end of file diff --git a/BULK-DELETE-ENDPOINT-DOCS.md b/docs/changes/BULK-DELETE-ENDPOINT-DOCS.md similarity index 100% rename from BULK-DELETE-ENDPOINT-DOCS.md rename to docs/changes/BULK-DELETE-ENDPOINT-DOCS.md diff --git a/SECURITY-FIXES.md b/docs/changes/SECURITY-FIXES.md similarity index 100% rename from SECURITY-FIXES.md rename to docs/changes/SECURITY-FIXES.md diff --git a/scripts/README.md b/scripts/README.md deleted file mode 100644 index 1c7d47f..0000000 --- a/scripts/README.md +++ /dev/null @@ -1,240 +0,0 @@ -# MotoVaultPro Scripts - -Utility scripts for database management and operations. - -## Database Export/Import Scripts - -### Quick Start - -```bash -# Export full database -make db-export - -# Export schema only -make db-export-schema - -# Create timestamped backup -make db-backup - -# Import from file -make db-import-file FILE=database-exports/backup.sql.gz -``` - -### Available Scripts - -#### `export-database.sh` -Exports PostgreSQL database in multiple formats with metadata and instructions. - -**Features:** -- Multiple export formats (SQL, custom, directory) -- Automatic compression -- Schema-only or data-only exports -- Table filtering -- Generates import instructions - -**Usage:** -```bash -# Full export -./scripts/export-database.sh - -# Schema only -./scripts/export-database.sh --schema-only - -# Custom format (faster for large databases) -./scripts/export-database.sh --format custom - -# Specific tables -./scripts/export-database.sh --include-table vehicles --include-table fuel_logs - -# Exclude tables -./scripts/export-database.sh --exclude-table audit_logs -``` - -#### `import-database.sh` -Imports PostgreSQL database with safety features and validation. - -**Features:** -- Auto-detects export format -- Automatic backup before import -- Safety confirmations -- Database creation -- Import verification - -**Usage:** -```bash -# Basic import -./scripts/import-database.sh database-exports/backup.sql.gz - -# Create new database -./scripts/import-database.sh --create-db backup.sql.gz - -# Replace existing (with confirmation) -./scripts/import-database.sh --drop-existing backup.sql.gz - -# Automated import (no prompts) -./scripts/import-database.sh --drop-existing --force backup.sql.gz -``` - -### Makefile Shortcuts - -```bash -# Export Commands -make db-export # Full database export -make db-export-schema # Schema only with date -make db-export-custom # Custom format with date -make db-backup # Timestamped backup - -# Import Commands -make db-import # Show import help -make db-import-file FILE=path/to/backup.sql.gz -``` - -### Output Files - -Each export creates three files: - -1. **Export file** (`.sql.gz`, `.dump`, or directory) - - The actual database dump - -2. **Metadata file** (`*_metadata.json`) - - Export timestamp, format, size - - PostgreSQL version - - Export options used - -3. **Import instructions** (`*_import_instructions.txt`) - - Step-by-step import guide - - Format-specific commands - - Database preparation steps - -### Common Use Cases - -#### Production Backup -```bash -# Daily backup with timestamp -make db-backup - -# Result: database-exports/backup_20251102_143000.sql.gz -``` - -#### Development Setup -```bash -# Get schema from production -ssh prod "cd /app && ./scripts/export-database.sh --schema-only --output dev_schema" -scp prod:/app/database-exports/dev_schema.sql.gz ./database-exports/ - -# Import to dev -./scripts/import-database.sh --create-db database-exports/dev_schema.sql.gz -``` - -#### Migration Between Servers -```bash -# On source server -./scripts/export-database.sh --format custom --output migration_$(date +%Y%m%d) - -# Transfer to target -scp database-exports/migration_* target:/app/database-exports/ - -# On target server -./scripts/import-database.sh --drop-existing database-exports/migration_20251102.dump -``` - -#### Selective Data Export -```bash -# Export only vehicle-related data -./scripts/export-database.sh \ - --include-table vehicles \ - --include-table fuel_logs \ - --include-table maintenance_records \ - --output vehicle_data -``` - -### Safety Features - -#### Automatic Backups -Before destructive operations, the import script automatically creates a backup: -``` -[INFO] Creating backup: database-exports/motovaultpro_backup_20251102_143000.sql.gz -[INFO] Backup created successfully -``` - -#### Confirmation Prompts -Dangerous operations require explicit confirmation: -``` -WARNING: This will DROP the existing database 'motovaultpro' -All data will be permanently deleted! - -Are you sure you want to continue? (type 'yes' to confirm): -``` - -#### Format Validation -Scripts detect and validate file formats automatically: -``` -[INFO] Auto-detected format: sql-compressed -``` - -### Troubleshooting - -#### Container Not Running -```bash -Error: PostgreSQL container 'mvp-postgres' is not running - -Solution: -docker compose up -d mvp-postgres -``` - -#### Permission Issues -```bash -chmod +x scripts/export-database.sh -chmod +x scripts/import-database.sh -``` - -#### Large Database Exports -For databases >1GB, use custom format: -```bash -./scripts/export-database.sh --format custom --no-compress -``` - -### Advanced Usage - -#### Automated Backups with Cron -```bash -# Daily at 2 AM -0 2 * * * cd /app && ./scripts/export-database.sh --output daily_$(date +%Y%m%d) >> /var/log/db-backup.log 2>&1 - -# Weekly on Sunday at 3 AM -0 3 * * 0 cd /app && ./scripts/export-database.sh --format custom --output weekly_$(date +%Y%m%d) >> /var/log/db-backup.log 2>&1 -``` - -#### Cleanup Old Backups -```bash -# Keep last 7 days of daily backups -find database-exports/ -name "daily_*.sql.gz" -mtime +7 -delete - -# Keep last 4 weeks of weekly backups -find database-exports/ -name "weekly_*.dump" -mtime +28 -delete -``` - -#### Export for Analysis -```bash -# Export specific tables for data analysis -./scripts/export-database.sh \ - --data-only \ - --include-table fuel_logs \ - --include-table maintenance_records \ - --output analytics_data -``` - -## Documentation - -For detailed information, see: -- [Database Migration Guide](../docs/DATABASE-MIGRATION.md) - Comprehensive migration documentation -- [Architecture](../docs/ARCHITECTURE.md) - System architecture -- [Platform Services](../docs/PLATFORM-SERVICES.md) - Service architecture - -## Support - -For issues: -1. Check the import instructions file (`*_import_instructions.txt`) -2. Review `docker logs mvp-postgres` -3. See troubleshooting in [DATABASE-MIGRATION.md](../docs/DATABASE-MIGRATION.md) -4. Create an issue in the repository diff --git a/scripts/generate-feature-capsule.sh b/scripts/generate-feature-capsule.sh deleted file mode 100755 index d9b6dc1..0000000 --- a/scripts/generate-feature-capsule.sh +++ /dev/null @@ -1,95 +0,0 @@ -#!/bin/bash -set -e - -# Color codes for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' # No Color - -FEATURE_NAME=$1 -if [ -z "$FEATURE_NAME" ]; then - echo -e "${RED}Error: Feature name is required${NC}" - echo "Usage: $0 " - echo "Example: $0 user-settings" - exit 1 -fi - -# Convert kebab-case to PascalCase and camelCase -FEATURE_PASCAL=$(echo $FEATURE_NAME | sed -r 's/(^|-)([a-z])/\U\2/g') -FEATURE_CAMEL=$(echo $FEATURE_PASCAL | sed 's/^./\l&/') - -echo -e "${GREEN}Creating Modified Feature Capsule: $FEATURE_NAME${NC}" - -# Backend Feature Capsule -BACKEND_DIR="backend/src/features/$FEATURE_NAME" -mkdir -p "$BACKEND_DIR"/{api,domain,data,migrations,external,events,tests/{unit,integration,fixtures},docs} - -# Create Feature README -cat > "$BACKEND_DIR/README.md" << EOF -# $FEATURE_PASCAL Feature Capsule - -## Quick Summary (50 tokens) -[AI: Complete feature description, main operations, dependencies, caching strategy] - -## API Endpoints -- GET /api/$FEATURE_NAME - List all $FEATURE_NAME -- GET /api/$FEATURE_NAME/:id - Get specific $FEATURE_CAMEL -- POST /api/$FEATURE_NAME - Create new $FEATURE_CAMEL -- PUT /api/$FEATURE_NAME/:id - Update $FEATURE_CAMEL -- DELETE /api/$FEATURE_NAME/:id - Delete $FEATURE_CAMEL - -## Structure -- **api/** - HTTP endpoints, routes, validators -- **domain/** - Business logic, types, rules -- **data/** - Repository, database queries -- **migrations/** - Feature-specific schema -- **external/** - External API integrations -- **events/** - Event handlers -- **tests/** - All feature tests -- **docs/** - Detailed documentation - -## Dependencies -- Internal: core/auth, core/cache -- External: [List any external APIs] -- Database: $FEATURE_NAME table - -## Quick Commands -\`\`\`bash -# Run feature tests -npm test -- features/$FEATURE_NAME - -# Run migrations (all features) -npm run migrate:all -\`\`\` -EOF - -# Create index.ts (Public API) -cat > "$BACKEND_DIR/index.ts" << EOF -/** - * @ai-summary Public API for $FEATURE_NAME feature capsule - * @ai-note This is the ONLY file other features should import from - */ - -// Export service for use by other features -export { ${FEATURE_PASCAL}Service } from './domain/${FEATURE_CAMEL}.service'; - -// Export types needed by other features -export type { - ${FEATURE_PASCAL}, - Create${FEATURE_PASCAL}Request, - Update${FEATURE_PASCAL}Request, - ${FEATURE_PASCAL}Response -} from './domain/${FEATURE_CAMEL}.types'; - -// Internal: Register routes with Fastify app (plugin) -export { ${FEATURE_PASCAL}Routes } from './api/${FEATURE_CAMEL}.routes'; -EOF - -echo -e "${GREEN}✅ Feature capsule created: $FEATURE_NAME${NC}" -echo -e "${YELLOW}Next steps:${NC}" -echo "1. Implement business logic in domain/${FEATURE_CAMEL}.service.ts" -echo "2. Add database columns to migrations/" -echo "3. Implement API validation" -echo "4. Add tests" -echo "5. Register Fastify plugin in backend/src/app.ts" diff --git a/scripts/load_vehicle_data.py b/scripts/load_vehicle_data.py deleted file mode 100755 index 7ef274d..0000000 --- a/scripts/load_vehicle_data.py +++ /dev/null @@ -1,342 +0,0 @@ -#!/usr/bin/env python3 -""" -Populate the normalized vehicles schema (make/model/model_year/trim/engine) -from the JSON sources in data/make-models. - -Example: - PGPASSWORD=$(cat secrets/app/postgres-password.txt) \\ - python3 scripts/load_vehicle_data.py \\ - --db-user postgres --db-name motovaultpro --db-host 127.0.0.1 -""" - -from __future__ import annotations - -import argparse -import json -import os -import re -import sys -from collections import Counter -from pathlib import Path -from typing import Dict, Iterable, List, Tuple - -try: - import psycopg -except ImportError as exc: # pragma: no cover - ease troubleshooting - sys.stderr.write( - "Error: psycopg is required. Install with `pip install psycopg[binary]`.\n", - ) - raise - - -DEFAULT_DATA_DIR = Path(__file__).resolve().parents[1] / "data" / "make-models" -DEFAULT_TRANSMISSIONS = ("Automatic", "Manual") - - -def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser(description="Load vehicle dropdown data into Postgres.") - parser.add_argument( - "--data-dir", - default=str(DEFAULT_DATA_DIR), - help=f"Directory with make JSON files (default: {DEFAULT_DATA_DIR})", - ) - parser.add_argument( - "--database-url", - help="Full postgres URL. Falls back to PG* environment variables if omitted.", - ) - parser.add_argument("--db-host", default=os.environ.get("PGHOST", "127.0.0.1")) - parser.add_argument("--db-port", type=int, default=int(os.environ.get("PGPORT", 5432))) - parser.add_argument("--db-name", default=os.environ.get("PGDATABASE", "motovaultpro")) - parser.add_argument("--db-user", default=os.environ.get("PGUSER", "postgres")) - parser.add_argument("--db-password", default=os.environ.get("PGPASSWORD")) - parser.add_argument( - "--transmissions", - default=",".join(DEFAULT_TRANSMISSIONS), - help="Comma-separated list of transmission labels (default: Automatic,Manual)", - ) - parser.add_argument( - "--skip-truncate", - action="store_true", - help="Do not truncate lookup tables before loading (useful for incremental testing).", - ) - return parser.parse_args() - - -def build_conninfo(args: argparse.Namespace) -> str: - if args.database_url: - return args.database_url - - parts = [ - f"host={args.db_host}", - f"port={args.db_port}", - f"dbname={args.db_name}", - f"user={args.db_user}", - ] - if args.db_password: - parts.append(f"password={args.db_password}") - return " ".join(parts) - - -def load_json_documents(data_dir: Path) -> List[Tuple[str, dict]]: - if not data_dir.exists() or not data_dir.is_dir(): - raise FileNotFoundError(f"Data directory not found: {data_dir}") - - documents: List[Tuple[str, dict]] = [] - for file_path in sorted(data_dir.glob("*.json")): - with file_path.open("r", encoding="utf-8") as handle: - try: - documents.append((file_path.name, json.load(handle))) - except json.JSONDecodeError as exc: - raise ValueError(f"Invalid JSON in {file_path}: {exc}") from exc - if not documents: - raise RuntimeError(f"No JSON files found under {data_dir}") - return documents - - -def clean_label(value: str) -> str: - text = str(value or "").replace("_", " ").strip() - text = re.sub(r"\s+", " ", text) - return text - - -def normalize_key(value: str) -> str: - text = clean_label(value).lower() - return text - - -def unique_labels(values: Iterable[str]) -> List[str]: - seen = set() - result: List[str] = [] - for value in values: - label = clean_label(value) - if not label: - continue - key = normalize_key(label) - if key in seen: - continue - seen.add(key) - result.append(label) - return result - - -class LoaderCaches: - def __init__(self) -> None: - self.makes: Dict[str, int] = {} - self.models: Dict[Tuple[int, str], int] = {} - self.model_years: Dict[Tuple[int, int], int] = {} - self.trims: Dict[Tuple[int, str], int] = {} - self.engines: Dict[str, int] = {} - - -class LoaderStats: - def __init__(self) -> None: - self.counter = Counter() - - def as_dict(self) -> Dict[str, int]: - return dict(self.counter) - - def bump(self, key: str, amount: int = 1) -> None: - self.counter[key] += amount - - -def truncate_lookup_tables(cur: psycopg.Cursor) -> None: - cur.execute("TRUNCATE vehicles.trim_engine, vehicles.trim_transmission RESTART IDENTITY CASCADE") - cur.execute("TRUNCATE vehicles.trim, vehicles.model_year, vehicles.model, vehicles.make RESTART IDENTITY CASCADE") - cur.execute("TRUNCATE vehicles.engine, vehicles.transmission RESTART IDENTITY CASCADE") - - -def ensure_transmissions(cur: psycopg.Cursor, names: Iterable[str]) -> None: - for name in unique_labels(names): - cur.execute( - """ - INSERT INTO vehicles.transmission (name) - VALUES (%s) - ON CONFLICT (name) DO NOTHING - """, - (name,), - ) - - -def upsert_make(cur: psycopg.Cursor, caches: LoaderCaches, name: str) -> int: - key = normalize_key(name) - if key in caches.makes: - return caches.makes[key] - cur.execute( - """ - INSERT INTO vehicles.make (name) - VALUES (%s) - ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name - RETURNING id - """, - (name,), - ) - make_id = cur.fetchone()[0] - caches.makes[key] = make_id - return make_id - - -def upsert_model(cur: psycopg.Cursor, caches: LoaderCaches, make_id: int, name: str) -> int: - key = (make_id, normalize_key(name)) - if key in caches.models: - return caches.models[key] - cur.execute( - """ - INSERT INTO vehicles.model (make_id, name) - VALUES (%s, %s) - ON CONFLICT (make_id, name) DO UPDATE SET name = EXCLUDED.name - RETURNING id - """, - (make_id, name), - ) - model_id = cur.fetchone()[0] - caches.models[key] = model_id - return model_id - - -def upsert_model_year(cur: psycopg.Cursor, caches: LoaderCaches, model_id: int, year: int) -> int: - key = (model_id, year) - if key in caches.model_years: - return caches.model_years[key] - cur.execute( - """ - INSERT INTO vehicles.model_year (model_id, year) - VALUES (%s, %s) - ON CONFLICT (model_id, year) DO UPDATE SET year = EXCLUDED.year - RETURNING id - """, - (model_id, year), - ) - model_year_id = cur.fetchone()[0] - caches.model_years[key] = model_year_id - return model_year_id - - -def upsert_trim(cur: psycopg.Cursor, caches: LoaderCaches, model_year_id: int, name: str) -> int: - key = (model_year_id, normalize_key(name)) - if key in caches.trims: - return caches.trims[key] - cur.execute( - """ - INSERT INTO vehicles.trim (model_year_id, name) - VALUES (%s, %s) - ON CONFLICT (model_year_id, name) DO UPDATE SET name = EXCLUDED.name - RETURNING id - """, - (model_year_id, name), - ) - trim_id = cur.fetchone()[0] - caches.trims[key] = trim_id - return trim_id - - -def upsert_engine(cur: psycopg.Cursor, caches: LoaderCaches, name: str) -> int: - key = normalize_key(name) - if key in caches.engines: - return caches.engines[key] - cur.execute( - """ - INSERT INTO vehicles.engine (name) - VALUES (%s) - ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name - RETURNING id - """, - (name,), - ) - engine_id = cur.fetchone()[0] - caches.engines[key] = engine_id - return engine_id - - -def link_trim_engine(cur: psycopg.Cursor, trim_id: int, engine_id: int) -> None: - cur.execute( - """ - INSERT INTO vehicles.trim_engine (trim_id, engine_id) - VALUES (%s, %s) - ON CONFLICT (trim_id, engine_id) DO NOTHING - """, - (trim_id, engine_id), - ) - - -def process_documents(cur: psycopg.Cursor, documents: List[Tuple[str, dict]], stats: LoaderStats) -> None: - caches = LoaderCaches() - - for filename, payload in documents: - if not isinstance(payload, dict): - stats.bump("skipped_files_invalid_root") - print(f"[WARN] Skipping {filename}: root is not an object") - continue - - for make_key, year_entries in payload.items(): - make_name = clean_label(make_key) - if not make_name: - stats.bump("skipped_makes_invalid_name") - continue - make_id = upsert_make(cur, caches, make_name) - stats.bump("makes") - - for year_entry in year_entries or []: - year_raw = year_entry.get("year") - try: - year = int(year_raw) - except (TypeError, ValueError): - stats.bump("skipped_years_invalid") - continue - - models = year_entry.get("models") or [] - for model in models: - model_name = clean_label(model.get("name", "")) - if not model_name: - stats.bump("skipped_models_invalid_name") - continue - - engine_names = unique_labels(model.get("engines") or []) - if not engine_names: - stats.bump("skipped_models_missing_engines") - continue - - trim_names = unique_labels(model.get("submodels") or []) - if not trim_names: - trim_names = [model_name] - - model_id = upsert_model(cur, caches, make_id, model_name) - model_year_id = upsert_model_year(cur, caches, model_id, year) - stats.bump("model_years") - - trim_ids: List[int] = [] - for trim_name in trim_names: - trim_id = upsert_trim(cur, caches, model_year_id, trim_name) - trim_ids.append(trim_id) - stats.bump("trims") - - for engine_name in engine_names: - engine_id = upsert_engine(cur, caches, engine_name) - stats.bump("engines") - for trim_id in trim_ids: - link_trim_engine(cur, trim_id, engine_id) - stats.bump("trim_engine_links") - - -def main() -> None: - args = parse_args() - data_dir = Path(args.data_dir).expanduser().resolve() - documents = load_json_documents(data_dir) - conninfo = build_conninfo(args) - transmissions = unique_labels(args.transmissions.split(",")) - - with psycopg.connect(conninfo) as conn: - with conn.cursor() as cur: - if not args.skip_truncate: - truncate_lookup_tables(cur) - ensure_transmissions(cur, transmissions or DEFAULT_TRANSMISSIONS) - stats = LoaderStats() - process_documents(cur, documents, stats) - - print("\nVehicle lookup data load completed.") - for key, value in sorted(stats.as_dict().items()): - print(f" {key}: {value}") - print(f"\nProcessed directory: {data_dir}") - - -if __name__ == "__main__": - main() diff --git a/scripts/run-data-load.sh b/scripts/run-data-load.sh deleted file mode 100644 index 4aec3a5..0000000 --- a/scripts/run-data-load.sh +++ /dev/null @@ -1,30 +0,0 @@ -container=vehicle-loader-$(date +%s) - - # Start on backend network for outbound internet access - docker run -d --rm \ - --name "$container" \ - --network motovaultpro_backend \ - -v "$PWD":/workspace \ - -w /workspace \ - -e PGPASSWORD="$(cat secrets/app/postgres-password.txt)" \ - python:3.12-slim \ - sleep infinity - - # Install psycopg before touching the DB network - docker exec "$container" bash -lc "pip install psycopg[binary] >/tmp/pip.log" - - # Now attach to the database network so Postgres is reachable - docker network connect motovaultpro_database "$container" - - # Run the loader - docker exec "$container" bash -lc " - python scripts/load_vehicle_data.py \ - --db-host mvp-postgres \ - --db-port 5432 \ - --db-user postgres \ - --db-name motovaultpro \ - --data-dir data/make-models - " - - # Clean up - docker rm -f "$container" \ No newline at end of file