#!/usr/bin/env python3 """ Post-import QA validation for vehicle dropdown data. Runs basic duplicate and range checks against the motovaultpro Postgres container. """ import os import subprocess import sys def run_psql(query: str) -> str: cmd = [ "docker", "exec", "mvp-postgres", "psql", "-U", "postgres", "-d", "motovaultpro", "-At", "-c", query, ] return subprocess.check_output(cmd, text=True) def check_container(): try: subprocess.check_output(["docker", "ps"], text=True) except Exception: print("āŒ Docker not available.") sys.exit(1) try: containers = subprocess.check_output( ["docker", "ps", "--filter", "name=mvp-postgres", "--format", "{{.Names}}"], text=True, ).strip() if not containers: print("āŒ mvp-postgres container not running.") sys.exit(1) except Exception as exc: print(f"āŒ Failed to check containers: {exc}") sys.exit(1) def check_invalid_combinations(): """Verify known invalid combinations do not exist.""" invalid_combos = [ (1992, "Chevrolet", "Corvette", "Z06"), # Z06 started 2001 (2000, "Chevrolet", "Corvette", "35th Anniversary Edition"), # Was 1988 (2000, "Chevrolet", "Corvette", "Stingray"), # Stingray started 2014 (1995, "Ford", "Mustang", "Mach-E"), # Mach-E is 2021+ (2020, "Tesla", "Cybertruck", "Base"), # Not in production until later ] issues = [] for year, make, model, trim in invalid_combos: query = f""" SELECT COUNT(*) FROM vehicle_options WHERE year = {year} AND make = '{make}' AND model = '{model}' AND trim = '{trim}' """ count = int(run_psql(query).strip()) if count > 0: issues.append(f"Invalid combo found: {year} {make} {model} {trim}") return issues def check_trim_coverage(): """Report on trim coverage statistics.""" query = """ SELECT COUNT(DISTINCT (year, make, model)) as total_models, COUNT(DISTINCT (year, make, model)) FILTER (WHERE trim = 'Base') as base_only, COUNT(DISTINCT (year, make, model)) FILTER (WHERE trim != 'Base') as has_specific_trims FROM vehicle_options """ result = run_psql(query).strip() print(f"Trim coverage (total/base_only/has_specific_trims): {result}") def main(): check_container() print("šŸ” Running QA checks...\n") queries = { "engine_duplicate_names": """ SELECT COUNT(*) FROM ( SELECT LOWER(name) as n, COUNT(*) c FROM engines GROUP BY 1 HAVING COUNT(*) > 1 ) t; """, "transmission_duplicate_types": """ SELECT COUNT(*) FROM ( SELECT LOWER(type) as t, COUNT(*) c FROM transmissions GROUP BY 1 HAVING COUNT(*) > 1 ) t; """, "vehicle_option_duplicates": """ SELECT COUNT(*) FROM ( SELECT year, make, model, trim, engine_id, transmission_id, COUNT(*) c FROM vehicle_options GROUP BY 1,2,3,4,5,6 HAVING COUNT(*) > 1 ) t; """, "year_range": """ SELECT MIN(year) || ' - ' || MAX(year) FROM vehicle_options; """, "year_range_valid": """ SELECT COUNT(*) FROM ( SELECT 1 FROM vehicle_options WHERE year < 2015 OR year > 2022 LIMIT 1 ) t; """, "counts": """ SELECT (SELECT COUNT(*) FROM engines) AS engines, (SELECT COUNT(*) FROM transmissions) AS transmissions, (SELECT COUNT(*) FROM vehicle_options) AS vehicle_options; """, "cross_join_gaps": """ SELECT COUNT(*) FROM ( SELECT base.year, base.make, base.model, base.trim, e.engine_id, t.transmission_id FROM ( SELECT DISTINCT year, make, model, trim FROM vehicle_options ) base JOIN ( SELECT DISTINCT year, make, model, trim, engine_id FROM vehicle_options ) e ON base.year = e.year AND base.make = e.make AND base.model = e.model AND base.trim = e.trim JOIN ( SELECT DISTINCT year, make, model, trim, transmission_id FROM vehicle_options ) t ON base.year = t.year AND base.make = t.make AND base.model = t.model AND base.trim = t.trim EXCEPT SELECT year, make, model, trim, engine_id, transmission_id FROM vehicle_options ) gap; """, } results = {} for key, query in queries.items(): try: results[key] = run_psql(query).strip() except subprocess.CalledProcessError as exc: print(f"āŒ Query failed ({key}): {exc}") sys.exit(1) issues_found = False print(f"Engine duplicate names: {results['engine_duplicate_names']}") print(f"Transmission duplicate types: {results['transmission_duplicate_types']}") print(f"Vehicle option duplicates: {results['vehicle_option_duplicates']}") print(f"Year range: {results['year_range']}") print(f"Out-of-range years (should be 0): {results['year_range_valid']}") print(f"Counts (engines, transmissions, vehicle_options): {results['counts']}") print(f"Cross-join gaps (should be 0 to avoid impossible pairs): {results['cross_join_gaps']}") if ( results["engine_duplicate_names"] != "0" or results["transmission_duplicate_types"] != "0" or results["vehicle_option_duplicates"] != "0" or results["year_range_valid"] != "0" or results["cross_join_gaps"] != "0" ): issues_found = True invalids = check_invalid_combinations() if invalids: issues_found = True print("\nāŒ Invalid combinations detected:") for issue in invalids: print(f" - {issue}") else: print("\nāœ… No known invalid year/make/model/trim combos found.") check_trim_coverage() if not issues_found: print("\nāœ… QA checks passed.") else: print("\nāŒ QA checks found issues.") if __name__ == "__main__": main()