Initial Commit
This commit is contained in:
427
mvp-platform-services/vehicles/etl/tests/test_json_extractor.py
Normal file
427
mvp-platform-services/vehicles/etl/tests/test_json_extractor.py
Normal file
@@ -0,0 +1,427 @@
|
||||
"""
|
||||
Unit Tests for JsonExtractor
|
||||
|
||||
Tests the JSON extraction functionality including:
|
||||
- JSON structure validation
|
||||
- Make/model/year/trim/engine extraction
|
||||
- Electric vehicle handling (empty engines arrays)
|
||||
- Data normalization and quality assurance
|
||||
- Error handling and reporting
|
||||
- Integration with MakeNameMapper and EngineSpecParser
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import tempfile
|
||||
import json
|
||||
import os
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
# Import the classes we're testing
|
||||
from ..extractors.json_extractor import (
|
||||
JsonExtractor, MakeData, ModelData, ExtractionResult, ValidationResult
|
||||
)
|
||||
from ..utils.make_name_mapper import MakeNameMapper
|
||||
from ..utils.engine_spec_parser import EngineSpecParser, EngineSpec
|
||||
|
||||
|
||||
class TestJsonExtractor(unittest.TestCase):
|
||||
"""Test cases for JsonExtractor functionality"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment before each test"""
|
||||
self.make_mapper = MakeNameMapper()
|
||||
self.engine_parser = EngineSpecParser()
|
||||
self.extractor = JsonExtractor(self.make_mapper, self.engine_parser)
|
||||
|
||||
def create_test_json_file(self, filename: str, content: dict) -> str:
|
||||
"""Create a temporary JSON file for testing"""
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
file_path = os.path.join(temp_dir, filename)
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(content, f)
|
||||
|
||||
return file_path
|
||||
|
||||
def test_validate_json_structure_valid(self):
|
||||
"""Test JSON structure validation with valid data"""
|
||||
valid_json = {
|
||||
"toyota": [
|
||||
{
|
||||
"year": "2024",
|
||||
"models": [
|
||||
{
|
||||
"name": "camry",
|
||||
"engines": ["2.5L I4", "3.5L V6"],
|
||||
"submodels": ["LE", "XLE", "XSE"]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
result = self.extractor.validate_json_structure(valid_json, "toyota.json")
|
||||
|
||||
self.assertTrue(result.is_valid)
|
||||
self.assertEqual(len(result.errors), 0)
|
||||
|
||||
def test_validate_json_structure_invalid_top_level(self):
|
||||
"""Test JSON validation with invalid top-level structure"""
|
||||
invalid_json = ["not", "a", "dict"]
|
||||
|
||||
result = self.extractor.validate_json_structure(invalid_json, "test.json")
|
||||
|
||||
self.assertFalse(result.is_valid)
|
||||
self.assertGreater(len(result.errors), 0)
|
||||
self.assertIn("must be a dictionary", result.errors[0])
|
||||
|
||||
def test_validate_json_structure_multiple_keys(self):
|
||||
"""Test JSON validation with multiple top-level keys"""
|
||||
invalid_json = {
|
||||
"toyota": [],
|
||||
"honda": []
|
||||
}
|
||||
|
||||
result = self.extractor.validate_json_structure(invalid_json, "test.json")
|
||||
|
||||
self.assertFalse(result.is_valid)
|
||||
self.assertIn("exactly one top-level key", result.errors[0])
|
||||
|
||||
def test_validate_json_structure_missing_required_fields(self):
|
||||
"""Test JSON validation with missing required fields"""
|
||||
invalid_json = {
|
||||
"toyota": [
|
||||
{
|
||||
# Missing 'year' field
|
||||
"models": [
|
||||
{
|
||||
# Missing 'name' field
|
||||
"engines": ["2.5L I4"]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
result = self.extractor.validate_json_structure(invalid_json, "test.json")
|
||||
|
||||
self.assertFalse(result.is_valid)
|
||||
self.assertTrue(any("missing 'year' field" in error for error in result.errors))
|
||||
self.assertTrue(any("missing 'name' field" in error for error in result.errors))
|
||||
|
||||
def test_extract_make_data_simple(self):
|
||||
"""Test extraction of simple make data"""
|
||||
test_json = {
|
||||
"toyota": [
|
||||
{
|
||||
"year": "2024",
|
||||
"models": [
|
||||
{
|
||||
"name": "camry",
|
||||
"engines": ["2.5L I4", "3.5L V6"],
|
||||
"submodels": ["LE", "XLE"]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
json_file = self.create_test_json_file("toyota.json", test_json)
|
||||
|
||||
try:
|
||||
make_data = self.extractor.extract_make_data(json_file)
|
||||
|
||||
self.assertEqual(make_data.name, "Toyota")
|
||||
self.assertEqual(make_data.filename, "toyota.json")
|
||||
self.assertEqual(len(make_data.models), 1)
|
||||
self.assertEqual(len(make_data.processing_errors), 0)
|
||||
|
||||
# Check model data
|
||||
model = make_data.models[0]
|
||||
self.assertEqual(model.name, "camry")
|
||||
self.assertEqual(model.years, [2024])
|
||||
self.assertEqual(len(model.engines), 2)
|
||||
self.assertEqual(len(model.trims), 2)
|
||||
self.assertFalse(model.is_electric)
|
||||
|
||||
finally:
|
||||
os.unlink(json_file)
|
||||
|
||||
def test_extract_make_data_electric_vehicle(self):
|
||||
"""Test extraction with electric vehicle (empty engines array)"""
|
||||
test_json = {
|
||||
"tesla": [
|
||||
{
|
||||
"year": "2024",
|
||||
"models": [
|
||||
{
|
||||
"name": "model s",
|
||||
"engines": [], # Empty engines - electric vehicle
|
||||
"submodels": ["Base", "Plaid"]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
json_file = self.create_test_json_file("tesla.json", test_json)
|
||||
|
||||
try:
|
||||
make_data = self.extractor.extract_make_data(json_file)
|
||||
|
||||
self.assertEqual(make_data.name, "Tesla")
|
||||
self.assertEqual(len(make_data.models), 1)
|
||||
|
||||
model = make_data.models[0]
|
||||
self.assertTrue(model.is_electric)
|
||||
self.assertEqual(len(model.engines), 1) # Should get default electric motor
|
||||
self.assertEqual(model.engines[0].fuel_type, "Electric")
|
||||
self.assertEqual(model.engines[0].configuration, "Electric")
|
||||
|
||||
finally:
|
||||
os.unlink(json_file)
|
||||
|
||||
def test_extract_make_data_multiple_years(self):
|
||||
"""Test extraction with model appearing across multiple years"""
|
||||
test_json = {
|
||||
"honda": [
|
||||
{
|
||||
"year": "2023",
|
||||
"models": [
|
||||
{
|
||||
"name": "civic",
|
||||
"engines": ["1.5L I4"],
|
||||
"submodels": ["LX", "EX"]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"year": "2024",
|
||||
"models": [
|
||||
{
|
||||
"name": "civic",
|
||||
"engines": ["1.5L I4", "2.0L I4"],
|
||||
"submodels": ["LX", "EX", "Type R"]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
json_file = self.create_test_json_file("honda.json", test_json)
|
||||
|
||||
try:
|
||||
make_data = self.extractor.extract_make_data(json_file)
|
||||
|
||||
self.assertEqual(len(make_data.models), 1) # Should merge into one model
|
||||
|
||||
model = make_data.models[0]
|
||||
self.assertEqual(model.name, "civic")
|
||||
self.assertEqual(sorted(model.years), [2023, 2024])
|
||||
self.assertEqual(len(model.engines), 2) # Should have both engines
|
||||
self.assertEqual(len(model.trims), 3) # Should have unique trims
|
||||
|
||||
finally:
|
||||
os.unlink(json_file)
|
||||
|
||||
def test_extract_make_data_l_to_i_normalization(self):
|
||||
"""Test that L→I normalization is applied during extraction"""
|
||||
test_json = {
|
||||
"geo": [
|
||||
{
|
||||
"year": "1995",
|
||||
"models": [
|
||||
{
|
||||
"name": "metro",
|
||||
"engines": ["1.0L L3", "1.3L I4"], # L3 should become I3
|
||||
"submodels": ["Base", "LSi"]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
json_file = self.create_test_json_file("geo.json", test_json)
|
||||
|
||||
try:
|
||||
make_data = self.extractor.extract_make_data(json_file)
|
||||
|
||||
model = make_data.models[0]
|
||||
|
||||
# Find the L3 engine (should be normalized to I3)
|
||||
l3_engine = None
|
||||
for engine in model.engines:
|
||||
if engine.displacement_l == 1.0 and engine.cylinders == 3:
|
||||
l3_engine = engine
|
||||
break
|
||||
|
||||
self.assertIsNotNone(l3_engine)
|
||||
self.assertEqual(l3_engine.configuration, "I") # Should be normalized from L
|
||||
|
||||
finally:
|
||||
os.unlink(json_file)
|
||||
|
||||
def test_extract_make_data_invalid_json(self):
|
||||
"""Test extraction with invalid JSON file"""
|
||||
json_file = self.create_test_json_file("invalid.json", {"invalid": "structure"})
|
||||
|
||||
try:
|
||||
make_data = self.extractor.extract_make_data(json_file)
|
||||
|
||||
# Should return make data with errors
|
||||
self.assertEqual(make_data.name, "Invalid")
|
||||
self.assertEqual(len(make_data.models), 0)
|
||||
self.assertGreater(len(make_data.processing_errors), 0)
|
||||
|
||||
finally:
|
||||
os.unlink(json_file)
|
||||
|
||||
def test_extract_all_makes_multiple_files(self):
|
||||
"""Test extraction of multiple make files"""
|
||||
# Create temporary directory with multiple JSON files
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
|
||||
try:
|
||||
# Create test files
|
||||
toyota_json = {"toyota": [{"year": "2024", "models": [{"name": "camry", "engines": ["2.5L I4"], "submodels": ["LE"]}]}]}
|
||||
tesla_json = {"tesla": [{"year": "2024", "models": [{"name": "model s", "engines": [], "submodels": ["Base"]}]}]}
|
||||
|
||||
toyota_file = os.path.join(temp_dir, "toyota.json")
|
||||
tesla_file = os.path.join(temp_dir, "tesla.json")
|
||||
|
||||
with open(toyota_file, 'w') as f:
|
||||
json.dump(toyota_json, f)
|
||||
with open(tesla_file, 'w') as f:
|
||||
json.dump(tesla_json, f)
|
||||
|
||||
# Extract all makes
|
||||
result = self.extractor.extract_all_makes(temp_dir)
|
||||
|
||||
self.assertEqual(result.total_files_processed, 2)
|
||||
self.assertEqual(result.successful_extractions, 2)
|
||||
self.assertEqual(result.failed_extractions, 0)
|
||||
self.assertEqual(len(result.makes), 2)
|
||||
self.assertEqual(result.total_models, 2)
|
||||
self.assertEqual(result.total_engines, 2) # Toyota: 1, Tesla: 1 (electric)
|
||||
self.assertEqual(result.total_electric_models, 1) # Tesla
|
||||
|
||||
# Check make names
|
||||
make_names = [make.name for make in result.makes]
|
||||
self.assertIn("Toyota", make_names)
|
||||
self.assertIn("Tesla", make_names)
|
||||
|
||||
finally:
|
||||
# Clean up
|
||||
for file in os.listdir(temp_dir):
|
||||
os.unlink(os.path.join(temp_dir, file))
|
||||
os.rmdir(temp_dir)
|
||||
|
||||
def test_extract_all_makes_empty_directory(self):
|
||||
"""Test extraction from empty directory"""
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
|
||||
try:
|
||||
result = self.extractor.extract_all_makes(temp_dir)
|
||||
|
||||
self.assertEqual(result.total_files_processed, 0)
|
||||
self.assertEqual(result.successful_extractions, 0)
|
||||
self.assertEqual(result.failed_extractions, 0)
|
||||
self.assertEqual(len(result.makes), 0)
|
||||
|
||||
finally:
|
||||
os.rmdir(temp_dir)
|
||||
|
||||
def test_get_extraction_statistics(self):
|
||||
"""Test extraction statistics generation"""
|
||||
# Create mock extraction result
|
||||
make1 = MakeData("Toyota", "toyota.json", [], [], [])
|
||||
make1.models = [ModelData("camry", [2024], [], [], False)]
|
||||
|
||||
make2 = MakeData("Tesla", "tesla.json", [], [], [])
|
||||
make2.models = [ModelData("model s", [2024], [], [], True)]
|
||||
|
||||
result = ExtractionResult(
|
||||
makes=[make1, make2],
|
||||
total_files_processed=2,
|
||||
successful_extractions=2,
|
||||
failed_extractions=0,
|
||||
total_models=2,
|
||||
total_engines=2,
|
||||
total_electric_models=1
|
||||
)
|
||||
|
||||
stats = self.extractor.get_extraction_statistics(result)
|
||||
|
||||
self.assertEqual(stats['files']['total_processed'], 2)
|
||||
self.assertEqual(stats['files']['successful'], 2)
|
||||
self.assertEqual(stats['files']['success_rate'], 1.0)
|
||||
self.assertEqual(stats['data']['total_makes'], 2)
|
||||
self.assertEqual(stats['data']['total_models'], 2)
|
||||
self.assertEqual(stats['data']['electric_models'], 1)
|
||||
self.assertEqual(len(stats['makes']), 2)
|
||||
|
||||
|
||||
class TestDataStructures(unittest.TestCase):
|
||||
"""Test cases for data structure classes"""
|
||||
|
||||
def test_validation_result(self):
|
||||
"""Test ValidationResult properties"""
|
||||
result = ValidationResult(True, [], ["warning"])
|
||||
|
||||
self.assertTrue(result.is_valid)
|
||||
self.assertFalse(result.has_errors)
|
||||
self.assertTrue(result.has_warnings)
|
||||
|
||||
def test_model_data_properties(self):
|
||||
"""Test ModelData calculated properties"""
|
||||
# Create mock engine specs
|
||||
engines = [
|
||||
EngineSpec(2.5, "I", 4, "Gasoline", "Natural", "2.5L I4"),
|
||||
EngineSpec(3.5, "V", 6, "Gasoline", "Natural", "3.5L V6")
|
||||
]
|
||||
|
||||
model = ModelData(
|
||||
name="camry",
|
||||
years=[2023, 2024],
|
||||
engines=engines,
|
||||
trims=["LE", "XLE", "XSE"],
|
||||
is_electric=False
|
||||
)
|
||||
|
||||
self.assertEqual(model.total_trims, 3)
|
||||
self.assertEqual(model.total_engines, 2)
|
||||
self.assertEqual(model.year_range, "2023-2024")
|
||||
|
||||
def test_model_data_single_year(self):
|
||||
"""Test ModelData with single year"""
|
||||
model = ModelData("camry", [2024], [], ["LE"])
|
||||
self.assertEqual(model.year_range, "2024")
|
||||
|
||||
def test_make_data_properties(self):
|
||||
"""Test MakeData calculated properties"""
|
||||
model1 = ModelData("camry", [2024], [], ["LE", "XLE"], False)
|
||||
model2 = ModelData("prius", [2024], [], ["L", "LE"], True) # Electric
|
||||
|
||||
make = MakeData("Toyota", "toyota.json", [model1, model2], [], [])
|
||||
|
||||
self.assertEqual(make.total_models, 2)
|
||||
self.assertEqual(make.total_trims, 4)
|
||||
self.assertEqual(make.electric_models_count, 1)
|
||||
|
||||
def test_extraction_result_properties(self):
|
||||
"""Test ExtractionResult calculated properties"""
|
||||
result = ExtractionResult(
|
||||
makes=[],
|
||||
total_files_processed=10,
|
||||
successful_extractions=8,
|
||||
failed_extractions=2,
|
||||
total_models=100,
|
||||
total_engines=500,
|
||||
total_electric_models=25
|
||||
)
|
||||
|
||||
self.assertEqual(result.success_rate, 0.8)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(verbosity=2)
|
||||
Reference in New Issue
Block a user