Files
motovaultpro/data/make-model-import/qa_validate.py
2025-12-14 14:53:45 -06:00

113 lines
3.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Post-import QA validation for vehicle dropdown data.
Runs basic duplicate and range checks against the motovaultpro Postgres container.
"""
import os
import subprocess
import sys
def run_psql(query: str) -> str:
cmd = [
"docker",
"exec",
"mvp-postgres",
"psql",
"-U",
"postgres",
"-d",
"motovaultpro",
"-At",
"-c",
query,
]
return subprocess.check_output(cmd, text=True)
def check_container():
try:
subprocess.check_output(["docker", "ps"], text=True)
except Exception:
print("❌ Docker not available.")
sys.exit(1)
try:
containers = subprocess.check_output(
["docker", "ps", "--filter", "name=mvp-postgres", "--format", "{{.Names}}"],
text=True,
).strip()
if not containers:
print("❌ mvp-postgres container not running.")
sys.exit(1)
except Exception as exc:
print(f"❌ Failed to check containers: {exc}")
sys.exit(1)
def main():
check_container()
print("🔍 Running QA checks...\n")
queries = {
"engine_duplicate_names": """
SELECT COUNT(*) FROM (
SELECT LOWER(name) as n, COUNT(*) c
FROM engines
GROUP BY 1 HAVING COUNT(*) > 1
) t;
""",
"transmission_duplicate_types": """
SELECT COUNT(*) FROM (
SELECT LOWER(type) as t, COUNT(*) c
FROM transmissions
GROUP BY 1 HAVING COUNT(*) > 1
) t;
""",
"vehicle_option_duplicates": """
SELECT COUNT(*) FROM (
SELECT year, make, model, trim, engine_id, transmission_id, COUNT(*) c
FROM vehicle_options
GROUP BY 1,2,3,4,5,6 HAVING COUNT(*) > 1
) t;
""",
"year_range": """
SELECT MIN(year) || ' - ' || MAX(year) FROM vehicle_options;
""",
"counts": """
SELECT
(SELECT COUNT(*) FROM engines) AS engines,
(SELECT COUNT(*) FROM transmissions) AS transmissions,
(SELECT COUNT(*) FROM vehicle_options) AS vehicle_options;
""",
}
results = {}
for key, query in queries.items():
try:
results[key] = run_psql(query).strip()
except subprocess.CalledProcessError as exc:
print(f"❌ Query failed ({key}): {exc}")
sys.exit(1)
print(f"Engine duplicate names: {results['engine_duplicate_names']}")
print(f"Transmission duplicate types: {results['transmission_duplicate_types']}")
print(f"Vehicle option duplicates: {results['vehicle_option_duplicates']}")
print(f"Year range: {results['year_range']}")
print(f"Counts (engines, transmissions, vehicle_options): {results['counts']}")
if (
results["engine_duplicate_names"] == "0"
and results["transmission_duplicate_types"] == "0"
and results["vehicle_option_duplicates"] == "0"
):
print("\n✅ QA checks passed.")
else:
print("\n❌ QA checks found issues.")
if __name__ == "__main__":
main()