Files
motovaultpro/mvp-platform-services/vehicles/etl/tests/test_json_extractor.py
Eric Gullickson a052040e3a Initial Commit
2025-09-17 16:09:15 -05:00

427 lines
15 KiB
Python

"""
Unit Tests for JsonExtractor
Tests the JSON extraction functionality including:
- JSON structure validation
- Make/model/year/trim/engine extraction
- Electric vehicle handling (empty engines arrays)
- Data normalization and quality assurance
- Error handling and reporting
- Integration with MakeNameMapper and EngineSpecParser
"""
import unittest
import tempfile
import json
import os
from unittest.mock import patch, MagicMock
# Import the classes we're testing
from ..extractors.json_extractor import (
JsonExtractor, MakeData, ModelData, ExtractionResult, ValidationResult
)
from ..utils.make_name_mapper import MakeNameMapper
from ..utils.engine_spec_parser import EngineSpecParser, EngineSpec
class TestJsonExtractor(unittest.TestCase):
"""Test cases for JsonExtractor functionality"""
def setUp(self):
"""Set up test environment before each test"""
self.make_mapper = MakeNameMapper()
self.engine_parser = EngineSpecParser()
self.extractor = JsonExtractor(self.make_mapper, self.engine_parser)
def create_test_json_file(self, filename: str, content: dict) -> str:
"""Create a temporary JSON file for testing"""
temp_dir = tempfile.mkdtemp()
file_path = os.path.join(temp_dir, filename)
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(content, f)
return file_path
def test_validate_json_structure_valid(self):
"""Test JSON structure validation with valid data"""
valid_json = {
"toyota": [
{
"year": "2024",
"models": [
{
"name": "camry",
"engines": ["2.5L I4", "3.5L V6"],
"submodels": ["LE", "XLE", "XSE"]
}
]
}
]
}
result = self.extractor.validate_json_structure(valid_json, "toyota.json")
self.assertTrue(result.is_valid)
self.assertEqual(len(result.errors), 0)
def test_validate_json_structure_invalid_top_level(self):
"""Test JSON validation with invalid top-level structure"""
invalid_json = ["not", "a", "dict"]
result = self.extractor.validate_json_structure(invalid_json, "test.json")
self.assertFalse(result.is_valid)
self.assertGreater(len(result.errors), 0)
self.assertIn("must be a dictionary", result.errors[0])
def test_validate_json_structure_multiple_keys(self):
"""Test JSON validation with multiple top-level keys"""
invalid_json = {
"toyota": [],
"honda": []
}
result = self.extractor.validate_json_structure(invalid_json, "test.json")
self.assertFalse(result.is_valid)
self.assertIn("exactly one top-level key", result.errors[0])
def test_validate_json_structure_missing_required_fields(self):
"""Test JSON validation with missing required fields"""
invalid_json = {
"toyota": [
{
# Missing 'year' field
"models": [
{
# Missing 'name' field
"engines": ["2.5L I4"]
}
]
}
]
}
result = self.extractor.validate_json_structure(invalid_json, "test.json")
self.assertFalse(result.is_valid)
self.assertTrue(any("missing 'year' field" in error for error in result.errors))
self.assertTrue(any("missing 'name' field" in error for error in result.errors))
def test_extract_make_data_simple(self):
"""Test extraction of simple make data"""
test_json = {
"toyota": [
{
"year": "2024",
"models": [
{
"name": "camry",
"engines": ["2.5L I4", "3.5L V6"],
"submodels": ["LE", "XLE"]
}
]
}
]
}
json_file = self.create_test_json_file("toyota.json", test_json)
try:
make_data = self.extractor.extract_make_data(json_file)
self.assertEqual(make_data.name, "Toyota")
self.assertEqual(make_data.filename, "toyota.json")
self.assertEqual(len(make_data.models), 1)
self.assertEqual(len(make_data.processing_errors), 0)
# Check model data
model = make_data.models[0]
self.assertEqual(model.name, "camry")
self.assertEqual(model.years, [2024])
self.assertEqual(len(model.engines), 2)
self.assertEqual(len(model.trims), 2)
self.assertFalse(model.is_electric)
finally:
os.unlink(json_file)
def test_extract_make_data_electric_vehicle(self):
"""Test extraction with electric vehicle (empty engines array)"""
test_json = {
"tesla": [
{
"year": "2024",
"models": [
{
"name": "model s",
"engines": [], # Empty engines - electric vehicle
"submodels": ["Base", "Plaid"]
}
]
}
]
}
json_file = self.create_test_json_file("tesla.json", test_json)
try:
make_data = self.extractor.extract_make_data(json_file)
self.assertEqual(make_data.name, "Tesla")
self.assertEqual(len(make_data.models), 1)
model = make_data.models[0]
self.assertTrue(model.is_electric)
self.assertEqual(len(model.engines), 1) # Should get default electric motor
self.assertEqual(model.engines[0].fuel_type, "Electric")
self.assertEqual(model.engines[0].configuration, "Electric")
finally:
os.unlink(json_file)
def test_extract_make_data_multiple_years(self):
"""Test extraction with model appearing across multiple years"""
test_json = {
"honda": [
{
"year": "2023",
"models": [
{
"name": "civic",
"engines": ["1.5L I4"],
"submodels": ["LX", "EX"]
}
]
},
{
"year": "2024",
"models": [
{
"name": "civic",
"engines": ["1.5L I4", "2.0L I4"],
"submodels": ["LX", "EX", "Type R"]
}
]
}
]
}
json_file = self.create_test_json_file("honda.json", test_json)
try:
make_data = self.extractor.extract_make_data(json_file)
self.assertEqual(len(make_data.models), 1) # Should merge into one model
model = make_data.models[0]
self.assertEqual(model.name, "civic")
self.assertEqual(sorted(model.years), [2023, 2024])
self.assertEqual(len(model.engines), 2) # Should have both engines
self.assertEqual(len(model.trims), 3) # Should have unique trims
finally:
os.unlink(json_file)
def test_extract_make_data_l_to_i_normalization(self):
"""Test that L→I normalization is applied during extraction"""
test_json = {
"geo": [
{
"year": "1995",
"models": [
{
"name": "metro",
"engines": ["1.0L L3", "1.3L I4"], # L3 should become I3
"submodels": ["Base", "LSi"]
}
]
}
]
}
json_file = self.create_test_json_file("geo.json", test_json)
try:
make_data = self.extractor.extract_make_data(json_file)
model = make_data.models[0]
# Find the L3 engine (should be normalized to I3)
l3_engine = None
for engine in model.engines:
if engine.displacement_l == 1.0 and engine.cylinders == 3:
l3_engine = engine
break
self.assertIsNotNone(l3_engine)
self.assertEqual(l3_engine.configuration, "I") # Should be normalized from L
finally:
os.unlink(json_file)
def test_extract_make_data_invalid_json(self):
"""Test extraction with invalid JSON file"""
json_file = self.create_test_json_file("invalid.json", {"invalid": "structure"})
try:
make_data = self.extractor.extract_make_data(json_file)
# Should return make data with errors
self.assertEqual(make_data.name, "Invalid")
self.assertEqual(len(make_data.models), 0)
self.assertGreater(len(make_data.processing_errors), 0)
finally:
os.unlink(json_file)
def test_extract_all_makes_multiple_files(self):
"""Test extraction of multiple make files"""
# Create temporary directory with multiple JSON files
temp_dir = tempfile.mkdtemp()
try:
# Create test files
toyota_json = {"toyota": [{"year": "2024", "models": [{"name": "camry", "engines": ["2.5L I4"], "submodels": ["LE"]}]}]}
tesla_json = {"tesla": [{"year": "2024", "models": [{"name": "model s", "engines": [], "submodels": ["Base"]}]}]}
toyota_file = os.path.join(temp_dir, "toyota.json")
tesla_file = os.path.join(temp_dir, "tesla.json")
with open(toyota_file, 'w') as f:
json.dump(toyota_json, f)
with open(tesla_file, 'w') as f:
json.dump(tesla_json, f)
# Extract all makes
result = self.extractor.extract_all_makes(temp_dir)
self.assertEqual(result.total_files_processed, 2)
self.assertEqual(result.successful_extractions, 2)
self.assertEqual(result.failed_extractions, 0)
self.assertEqual(len(result.makes), 2)
self.assertEqual(result.total_models, 2)
self.assertEqual(result.total_engines, 2) # Toyota: 1, Tesla: 1 (electric)
self.assertEqual(result.total_electric_models, 1) # Tesla
# Check make names
make_names = [make.name for make in result.makes]
self.assertIn("Toyota", make_names)
self.assertIn("Tesla", make_names)
finally:
# Clean up
for file in os.listdir(temp_dir):
os.unlink(os.path.join(temp_dir, file))
os.rmdir(temp_dir)
def test_extract_all_makes_empty_directory(self):
"""Test extraction from empty directory"""
temp_dir = tempfile.mkdtemp()
try:
result = self.extractor.extract_all_makes(temp_dir)
self.assertEqual(result.total_files_processed, 0)
self.assertEqual(result.successful_extractions, 0)
self.assertEqual(result.failed_extractions, 0)
self.assertEqual(len(result.makes), 0)
finally:
os.rmdir(temp_dir)
def test_get_extraction_statistics(self):
"""Test extraction statistics generation"""
# Create mock extraction result
make1 = MakeData("Toyota", "toyota.json", [], [], [])
make1.models = [ModelData("camry", [2024], [], [], False)]
make2 = MakeData("Tesla", "tesla.json", [], [], [])
make2.models = [ModelData("model s", [2024], [], [], True)]
result = ExtractionResult(
makes=[make1, make2],
total_files_processed=2,
successful_extractions=2,
failed_extractions=0,
total_models=2,
total_engines=2,
total_electric_models=1
)
stats = self.extractor.get_extraction_statistics(result)
self.assertEqual(stats['files']['total_processed'], 2)
self.assertEqual(stats['files']['successful'], 2)
self.assertEqual(stats['files']['success_rate'], 1.0)
self.assertEqual(stats['data']['total_makes'], 2)
self.assertEqual(stats['data']['total_models'], 2)
self.assertEqual(stats['data']['electric_models'], 1)
self.assertEqual(len(stats['makes']), 2)
class TestDataStructures(unittest.TestCase):
"""Test cases for data structure classes"""
def test_validation_result(self):
"""Test ValidationResult properties"""
result = ValidationResult(True, [], ["warning"])
self.assertTrue(result.is_valid)
self.assertFalse(result.has_errors)
self.assertTrue(result.has_warnings)
def test_model_data_properties(self):
"""Test ModelData calculated properties"""
# Create mock engine specs
engines = [
EngineSpec(2.5, "I", 4, "Gasoline", "Natural", "2.5L I4"),
EngineSpec(3.5, "V", 6, "Gasoline", "Natural", "3.5L V6")
]
model = ModelData(
name="camry",
years=[2023, 2024],
engines=engines,
trims=["LE", "XLE", "XSE"],
is_electric=False
)
self.assertEqual(model.total_trims, 3)
self.assertEqual(model.total_engines, 2)
self.assertEqual(model.year_range, "2023-2024")
def test_model_data_single_year(self):
"""Test ModelData with single year"""
model = ModelData("camry", [2024], [], ["LE"])
self.assertEqual(model.year_range, "2024")
def test_make_data_properties(self):
"""Test MakeData calculated properties"""
model1 = ModelData("camry", [2024], [], ["LE", "XLE"], False)
model2 = ModelData("prius", [2024], [], ["L", "LE"], True) # Electric
make = MakeData("Toyota", "toyota.json", [model1, model2], [], [])
self.assertEqual(make.total_models, 2)
self.assertEqual(make.total_trims, 4)
self.assertEqual(make.electric_models_count, 1)
def test_extraction_result_properties(self):
"""Test ExtractionResult calculated properties"""
result = ExtractionResult(
makes=[],
total_files_processed=10,
successful_extractions=8,
failed_extractions=2,
total_models=100,
total_engines=500,
total_electric_models=25
)
self.assertEqual(result.success_rate, 0.8)
if __name__ == '__main__':
unittest.main(verbosity=2)