Initial Commit
This commit is contained in:
328
docs/changes/vehicles-dropdown-v2/06-cli-commands.md
Normal file
328
docs/changes/vehicles-dropdown-v2/06-cli-commands.md
Normal file
@@ -0,0 +1,328 @@
|
||||
# CLI Commands - Manual JSON ETL
|
||||
|
||||
## Overview
|
||||
New CLI commands for processing JSON vehicle data into the PostgreSQL database.
|
||||
|
||||
## Primary Command: `load-manual`
|
||||
|
||||
### Basic Syntax
|
||||
```bash
|
||||
python -m etl load-manual [OPTIONS]
|
||||
```
|
||||
|
||||
### Command Options
|
||||
|
||||
#### Load Mode (`--mode`)
|
||||
Controls how data is handled in the database:
|
||||
|
||||
```bash
|
||||
# Append mode (safe, default)
|
||||
python -m etl load-manual --mode=append
|
||||
|
||||
# Clear mode (destructive - removes existing data first)
|
||||
python -m etl load-manual --mode=clear
|
||||
```
|
||||
|
||||
**Mode Details:**
|
||||
- **`append`** (default): Uses `ON CONFLICT DO NOTHING` - safe for existing data
|
||||
- **`clear`**: Uses `TRUNCATE CASCADE` then insert - completely replaces existing data
|
||||
|
||||
#### Specific Make Processing (`--make`)
|
||||
Process only a specific make instead of all 55 files:
|
||||
|
||||
```bash
|
||||
# Process only Toyota
|
||||
python -m etl load-manual --make=toyota
|
||||
|
||||
# Process only BMW (uses filename format)
|
||||
python -m etl load-manual --make=bmw
|
||||
|
||||
# Process Alfa Romeo (underscore format from filename)
|
||||
python -m etl load-manual --make=alfa_romeo
|
||||
```
|
||||
|
||||
#### Validation Only (`--validate-only`)
|
||||
Validate JSON files without loading to database:
|
||||
|
||||
```bash
|
||||
# Validate all JSON files
|
||||
python -m etl load-manual --validate-only
|
||||
|
||||
# Validate specific make
|
||||
python -m etl load-manual --make=tesla --validate-only
|
||||
```
|
||||
|
||||
#### Verbose Output (`--verbose`)
|
||||
Enable detailed progress output:
|
||||
|
||||
```bash
|
||||
# Verbose processing
|
||||
python -m etl load-manual --verbose
|
||||
|
||||
# Quiet processing (errors only)
|
||||
python -m etl load-manual --quiet
|
||||
```
|
||||
|
||||
### Complete Command Examples
|
||||
|
||||
```bash
|
||||
# Standard usage - process all makes safely
|
||||
python -m etl load-manual
|
||||
|
||||
# Full reload - clear and rebuild entire database
|
||||
python -m etl load-manual --mode=clear --verbose
|
||||
|
||||
# Process specific make with validation
|
||||
python -m etl load-manual --make=honda --mode=append --verbose
|
||||
|
||||
# Validate before processing
|
||||
python -m etl load-manual --validate-only
|
||||
python -m etl load-manual --mode=clear # If validation passes
|
||||
```
|
||||
|
||||
## Secondary Command: `validate-json`
|
||||
|
||||
### Purpose
|
||||
Standalone validation of JSON files without database operations.
|
||||
|
||||
### Syntax
|
||||
```bash
|
||||
python -m etl validate-json [OPTIONS]
|
||||
```
|
||||
|
||||
### Options
|
||||
|
||||
```bash
|
||||
# Validate all JSON files
|
||||
python -m etl validate-json
|
||||
|
||||
# Validate specific make
|
||||
python -m etl validate-json --make=toyota
|
||||
|
||||
# Generate detailed report
|
||||
python -m etl validate-json --detailed-report
|
||||
|
||||
# Export validation results to file
|
||||
python -m etl validate-json --export-report=/tmp/validation.json
|
||||
```
|
||||
|
||||
### Validation Checks
|
||||
1. **JSON structure** validation
|
||||
2. **Engine parsing** validation
|
||||
3. **Make name mapping** validation
|
||||
4. **Data completeness** checks
|
||||
5. **Cross-reference** with authoritative makes list
|
||||
|
||||
## Implementation Details
|
||||
|
||||
### CLI Command Structure
|
||||
Add to `etl/main.py`:
|
||||
|
||||
```python
|
||||
@cli.command()
|
||||
@click.option('--mode', type=click.Choice(['clear', 'append']),
|
||||
default='append', help='Database load mode')
|
||||
@click.option('--make', help='Process specific make only (use filename format)')
|
||||
@click.option('--validate-only', is_flag=True,
|
||||
help='Validate JSON files without loading to database')
|
||||
@click.option('--verbose', is_flag=True, help='Enable verbose output')
|
||||
@click.option('--quiet', is_flag=True, help='Suppress non-error output')
|
||||
def load_manual(mode, make, validate_only, verbose, quiet):
|
||||
"""Load vehicle data from JSON files"""
|
||||
|
||||
if quiet:
|
||||
logging.getLogger().setLevel(logging.ERROR)
|
||||
elif verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
|
||||
try:
|
||||
pipeline = ManualJsonPipeline(
|
||||
sources_dir=config.JSON_SOURCES_DIR,
|
||||
load_mode=LoadMode(mode.upper())
|
||||
)
|
||||
|
||||
if validate_only:
|
||||
result = pipeline.validate_all_json()
|
||||
display_validation_report(result)
|
||||
return
|
||||
|
||||
result = pipeline.run_manual_pipeline(specific_make=make)
|
||||
display_pipeline_result(result)
|
||||
|
||||
if not result.success:
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Manual load failed: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
@cli.command()
|
||||
@click.option('--make', help='Validate specific make only')
|
||||
@click.option('--detailed-report', is_flag=True,
|
||||
help='Generate detailed validation report')
|
||||
@click.option('--export-report', help='Export validation report to file')
|
||||
def validate_json(make, detailed_report, export_report):
|
||||
"""Validate JSON files structure and data quality"""
|
||||
|
||||
try:
|
||||
validator = JsonValidator(sources_dir=config.JSON_SOURCES_DIR)
|
||||
|
||||
if make:
|
||||
result = validator.validate_make(make)
|
||||
else:
|
||||
result = validator.validate_all_makes()
|
||||
|
||||
if detailed_report or export_report:
|
||||
report = validator.generate_detailed_report(result)
|
||||
|
||||
if export_report:
|
||||
with open(export_report, 'w') as f:
|
||||
json.dump(report, f, indent=2)
|
||||
logger.info(f"Validation report exported to {export_report}")
|
||||
else:
|
||||
display_detailed_report(report)
|
||||
else:
|
||||
display_validation_summary(result)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"JSON validation failed: {e}")
|
||||
sys.exit(1)
|
||||
```
|
||||
|
||||
## Output Examples
|
||||
|
||||
### Successful Load Output
|
||||
```
|
||||
$ python -m etl load-manual --mode=append --verbose
|
||||
|
||||
🚀 Starting manual JSON ETL pipeline...
|
||||
📁 Processing 55 JSON files from sources/makes/
|
||||
|
||||
✅ Make normalization validation passed (55/55)
|
||||
✅ Engine parsing validation passed (1,247 engines)
|
||||
|
||||
📊 Processing makes:
|
||||
├── toyota.json → Toyota (47 models, 203 engines, 312 trims)
|
||||
├── ford.json → Ford (52 models, 189 engines, 298 trims)
|
||||
├── chevrolet.json → Chevrolet (48 models, 167 engines, 287 trims)
|
||||
└── ... (52 more makes)
|
||||
|
||||
💾 Database loading:
|
||||
├── Makes: 55 loaded (0 duplicates)
|
||||
├── Models: 2,847 loaded (23 duplicates)
|
||||
├── Model Years: 18,392 loaded (105 duplicates)
|
||||
├── Engines: 1,247 loaded (45 duplicates)
|
||||
└── Trims: 12,058 loaded (234 duplicates)
|
||||
|
||||
✅ Manual JSON ETL completed successfully in 2m 34s
|
||||
```
|
||||
|
||||
### Validation Output
|
||||
```
|
||||
$ python -m etl validate-json
|
||||
|
||||
📋 JSON Validation Report
|
||||
|
||||
✅ File Structure: 55/55 files valid
|
||||
✅ Make Name Mapping: 55/55 mappings valid
|
||||
⚠️ Engine Parsing: 1,201/1,247 engines parsed (46 unparseable)
|
||||
✅ Data Completeness: All required fields present
|
||||
|
||||
🔍 Issues Found:
|
||||
├── Unparseable engines:
|
||||
│ ├── toyota.json: "Custom Hybrid System" (1 occurrence)
|
||||
│ ├── ferrari.json: "V12 Twin-Turbo Custom" (2 occurrences)
|
||||
│ └── lamborghini.json: "V10 Plus" (43 occurrences)
|
||||
└── Empty engine arrays:
|
||||
├── tesla.json: 24 models with empty engines
|
||||
└── lucid.json: 3 models with empty engines
|
||||
|
||||
💡 Recommendations:
|
||||
• Review unparseable engine formats
|
||||
• Electric vehicle handling will create default "Electric Motor" entries
|
||||
|
||||
Overall Status: ✅ READY FOR PROCESSING
|
||||
```
|
||||
|
||||
### Error Handling Output
|
||||
```
|
||||
$ python -m etl load-manual --make=invalid_make
|
||||
|
||||
❌ Error: Make 'invalid_make' not found
|
||||
|
||||
Available makes:
|
||||
acura, alfa_romeo, aston_martin, audi, bentley, bmw,
|
||||
buick, cadillac, chevrolet, chrysler, dodge, ferrari,
|
||||
... (showing first 20)
|
||||
|
||||
💡 Tip: Use 'python -m etl validate-json' to see all available makes
|
||||
```
|
||||
|
||||
## Integration with Existing Commands
|
||||
|
||||
### Command Compatibility
|
||||
The new commands integrate seamlessly with existing ETL commands:
|
||||
|
||||
```bash
|
||||
# Existing MSSQL pipeline (unchanged)
|
||||
python -m etl build-catalog
|
||||
|
||||
# New manual JSON pipeline
|
||||
python -m etl load-manual
|
||||
|
||||
# Test connections (works for both)
|
||||
python -m etl test
|
||||
|
||||
# Scheduling (MSSQL only currently)
|
||||
python -m etl schedule
|
||||
```
|
||||
|
||||
### Configuration Integration
|
||||
Uses existing config structure with new JSON-specific settings:
|
||||
|
||||
```python
|
||||
# In config.py
|
||||
JSON_SOURCES_DIR: str = "sources/makes"
|
||||
MANUAL_LOAD_DEFAULT_MODE: str = "append"
|
||||
MANUAL_LOAD_BATCH_SIZE: int = 1000
|
||||
JSON_VALIDATION_STRICT: bool = False
|
||||
```
|
||||
|
||||
## Help and Documentation
|
||||
|
||||
### Built-in Help
|
||||
```bash
|
||||
# Main command help
|
||||
python -m etl load-manual --help
|
||||
|
||||
# All commands help
|
||||
python -m etl --help
|
||||
```
|
||||
|
||||
### Command Discovery
|
||||
```bash
|
||||
# List all available commands
|
||||
python -m etl
|
||||
|
||||
# Shows:
|
||||
# Commands:
|
||||
# build-catalog Build vehicle catalog from MSSQL database
|
||||
# load-manual Load vehicle data from JSON files
|
||||
# validate-json Validate JSON files structure and data quality
|
||||
# schedule Start ETL scheduler (default mode)
|
||||
# test Test database connections
|
||||
# update Run ETL update
|
||||
```
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
### Planned Command Options
|
||||
- `--dry-run`: Show what would be processed without making changes
|
||||
- `--since`: Process only files modified since timestamp
|
||||
- `--parallel`: Enable parallel processing of makes
|
||||
- `--rollback`: Rollback previous manual load operation
|
||||
|
||||
### Advanced Validation Options
|
||||
- `--strict-parsing`: Fail on any engine parsing errors
|
||||
- `--cross-validate`: Compare JSON data against MSSQL data where available
|
||||
- `--performance-test`: Benchmark processing performance
|
||||
Reference in New Issue
Block a user