Before updates to NHTSA
This commit is contained in:
112
data/make-model-import/qa_validate.py
Executable file
112
data/make-model-import/qa_validate.py
Executable file
@@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Post-import QA validation for vehicle dropdown data.
|
||||
Runs basic duplicate and range checks against the motovaultpro Postgres container.
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def run_psql(query: str) -> str:
|
||||
cmd = [
|
||||
"docker",
|
||||
"exec",
|
||||
"mvp-postgres",
|
||||
"psql",
|
||||
"-U",
|
||||
"postgres",
|
||||
"-d",
|
||||
"motovaultpro",
|
||||
"-At",
|
||||
"-c",
|
||||
query,
|
||||
]
|
||||
return subprocess.check_output(cmd, text=True)
|
||||
|
||||
|
||||
def check_container():
|
||||
try:
|
||||
subprocess.check_output(["docker", "ps"], text=True)
|
||||
except Exception:
|
||||
print("❌ Docker not available.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
containers = subprocess.check_output(
|
||||
["docker", "ps", "--filter", "name=mvp-postgres", "--format", "{{.Names}}"],
|
||||
text=True,
|
||||
).strip()
|
||||
if not containers:
|
||||
print("❌ mvp-postgres container not running.")
|
||||
sys.exit(1)
|
||||
except Exception as exc:
|
||||
print(f"❌ Failed to check containers: {exc}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
check_container()
|
||||
|
||||
print("🔍 Running QA checks...\n")
|
||||
|
||||
queries = {
|
||||
"engine_duplicate_names": """
|
||||
SELECT COUNT(*) FROM (
|
||||
SELECT LOWER(name) as n, COUNT(*) c
|
||||
FROM engines
|
||||
GROUP BY 1 HAVING COUNT(*) > 1
|
||||
) t;
|
||||
""",
|
||||
"transmission_duplicate_types": """
|
||||
SELECT COUNT(*) FROM (
|
||||
SELECT LOWER(type) as t, COUNT(*) c
|
||||
FROM transmissions
|
||||
GROUP BY 1 HAVING COUNT(*) > 1
|
||||
) t;
|
||||
""",
|
||||
"vehicle_option_duplicates": """
|
||||
SELECT COUNT(*) FROM (
|
||||
SELECT year, make, model, trim, engine_id, transmission_id, COUNT(*) c
|
||||
FROM vehicle_options
|
||||
GROUP BY 1,2,3,4,5,6 HAVING COUNT(*) > 1
|
||||
) t;
|
||||
""",
|
||||
"year_range": """
|
||||
SELECT MIN(year) || ' - ' || MAX(year) FROM vehicle_options;
|
||||
""",
|
||||
"counts": """
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM engines) AS engines,
|
||||
(SELECT COUNT(*) FROM transmissions) AS transmissions,
|
||||
(SELECT COUNT(*) FROM vehicle_options) AS vehicle_options;
|
||||
""",
|
||||
}
|
||||
|
||||
results = {}
|
||||
for key, query in queries.items():
|
||||
try:
|
||||
results[key] = run_psql(query).strip()
|
||||
except subprocess.CalledProcessError as exc:
|
||||
print(f"❌ Query failed ({key}): {exc}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Engine duplicate names: {results['engine_duplicate_names']}")
|
||||
print(f"Transmission duplicate types: {results['transmission_duplicate_types']}")
|
||||
print(f"Vehicle option duplicates: {results['vehicle_option_duplicates']}")
|
||||
print(f"Year range: {results['year_range']}")
|
||||
print(f"Counts (engines, transmissions, vehicle_options): {results['counts']}")
|
||||
|
||||
if (
|
||||
results["engine_duplicate_names"] == "0"
|
||||
and results["transmission_duplicate_types"] == "0"
|
||||
and results["vehicle_option_duplicates"] == "0"
|
||||
):
|
||||
print("\n✅ QA checks passed.")
|
||||
else:
|
||||
print("\n❌ QA checks found issues.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user