427 lines
15 KiB
Python
427 lines
15 KiB
Python
"""
|
|
Unit Tests for JsonExtractor
|
|
|
|
Tests the JSON extraction functionality including:
|
|
- JSON structure validation
|
|
- Make/model/year/trim/engine extraction
|
|
- Electric vehicle handling (empty engines arrays)
|
|
- Data normalization and quality assurance
|
|
- Error handling and reporting
|
|
- Integration with MakeNameMapper and EngineSpecParser
|
|
"""
|
|
|
|
import unittest
|
|
import tempfile
|
|
import json
|
|
import os
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
# Import the classes we're testing
|
|
from ..extractors.json_extractor import (
|
|
JsonExtractor, MakeData, ModelData, ExtractionResult, ValidationResult
|
|
)
|
|
from ..utils.make_name_mapper import MakeNameMapper
|
|
from ..utils.engine_spec_parser import EngineSpecParser, EngineSpec
|
|
|
|
|
|
class TestJsonExtractor(unittest.TestCase):
|
|
"""Test cases for JsonExtractor functionality"""
|
|
|
|
def setUp(self):
|
|
"""Set up test environment before each test"""
|
|
self.make_mapper = MakeNameMapper()
|
|
self.engine_parser = EngineSpecParser()
|
|
self.extractor = JsonExtractor(self.make_mapper, self.engine_parser)
|
|
|
|
def create_test_json_file(self, filename: str, content: dict) -> str:
|
|
"""Create a temporary JSON file for testing"""
|
|
temp_dir = tempfile.mkdtemp()
|
|
file_path = os.path.join(temp_dir, filename)
|
|
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
json.dump(content, f)
|
|
|
|
return file_path
|
|
|
|
def test_validate_json_structure_valid(self):
|
|
"""Test JSON structure validation with valid data"""
|
|
valid_json = {
|
|
"toyota": [
|
|
{
|
|
"year": "2024",
|
|
"models": [
|
|
{
|
|
"name": "camry",
|
|
"engines": ["2.5L I4", "3.5L V6"],
|
|
"submodels": ["LE", "XLE", "XSE"]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
|
|
result = self.extractor.validate_json_structure(valid_json, "toyota.json")
|
|
|
|
self.assertTrue(result.is_valid)
|
|
self.assertEqual(len(result.errors), 0)
|
|
|
|
def test_validate_json_structure_invalid_top_level(self):
|
|
"""Test JSON validation with invalid top-level structure"""
|
|
invalid_json = ["not", "a", "dict"]
|
|
|
|
result = self.extractor.validate_json_structure(invalid_json, "test.json")
|
|
|
|
self.assertFalse(result.is_valid)
|
|
self.assertGreater(len(result.errors), 0)
|
|
self.assertIn("must be a dictionary", result.errors[0])
|
|
|
|
def test_validate_json_structure_multiple_keys(self):
|
|
"""Test JSON validation with multiple top-level keys"""
|
|
invalid_json = {
|
|
"toyota": [],
|
|
"honda": []
|
|
}
|
|
|
|
result = self.extractor.validate_json_structure(invalid_json, "test.json")
|
|
|
|
self.assertFalse(result.is_valid)
|
|
self.assertIn("exactly one top-level key", result.errors[0])
|
|
|
|
def test_validate_json_structure_missing_required_fields(self):
|
|
"""Test JSON validation with missing required fields"""
|
|
invalid_json = {
|
|
"toyota": [
|
|
{
|
|
# Missing 'year' field
|
|
"models": [
|
|
{
|
|
# Missing 'name' field
|
|
"engines": ["2.5L I4"]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
|
|
result = self.extractor.validate_json_structure(invalid_json, "test.json")
|
|
|
|
self.assertFalse(result.is_valid)
|
|
self.assertTrue(any("missing 'year' field" in error for error in result.errors))
|
|
self.assertTrue(any("missing 'name' field" in error for error in result.errors))
|
|
|
|
def test_extract_make_data_simple(self):
|
|
"""Test extraction of simple make data"""
|
|
test_json = {
|
|
"toyota": [
|
|
{
|
|
"year": "2024",
|
|
"models": [
|
|
{
|
|
"name": "camry",
|
|
"engines": ["2.5L I4", "3.5L V6"],
|
|
"submodels": ["LE", "XLE"]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
|
|
json_file = self.create_test_json_file("toyota.json", test_json)
|
|
|
|
try:
|
|
make_data = self.extractor.extract_make_data(json_file)
|
|
|
|
self.assertEqual(make_data.name, "Toyota")
|
|
self.assertEqual(make_data.filename, "toyota.json")
|
|
self.assertEqual(len(make_data.models), 1)
|
|
self.assertEqual(len(make_data.processing_errors), 0)
|
|
|
|
# Check model data
|
|
model = make_data.models[0]
|
|
self.assertEqual(model.name, "camry")
|
|
self.assertEqual(model.years, [2024])
|
|
self.assertEqual(len(model.engines), 2)
|
|
self.assertEqual(len(model.trims), 2)
|
|
self.assertFalse(model.is_electric)
|
|
|
|
finally:
|
|
os.unlink(json_file)
|
|
|
|
def test_extract_make_data_electric_vehicle(self):
|
|
"""Test extraction with electric vehicle (empty engines array)"""
|
|
test_json = {
|
|
"tesla": [
|
|
{
|
|
"year": "2024",
|
|
"models": [
|
|
{
|
|
"name": "model s",
|
|
"engines": [], # Empty engines - electric vehicle
|
|
"submodels": ["Base", "Plaid"]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
|
|
json_file = self.create_test_json_file("tesla.json", test_json)
|
|
|
|
try:
|
|
make_data = self.extractor.extract_make_data(json_file)
|
|
|
|
self.assertEqual(make_data.name, "Tesla")
|
|
self.assertEqual(len(make_data.models), 1)
|
|
|
|
model = make_data.models[0]
|
|
self.assertTrue(model.is_electric)
|
|
self.assertEqual(len(model.engines), 1) # Should get default electric motor
|
|
self.assertEqual(model.engines[0].fuel_type, "Electric")
|
|
self.assertEqual(model.engines[0].configuration, "Electric")
|
|
|
|
finally:
|
|
os.unlink(json_file)
|
|
|
|
def test_extract_make_data_multiple_years(self):
|
|
"""Test extraction with model appearing across multiple years"""
|
|
test_json = {
|
|
"honda": [
|
|
{
|
|
"year": "2023",
|
|
"models": [
|
|
{
|
|
"name": "civic",
|
|
"engines": ["1.5L I4"],
|
|
"submodels": ["LX", "EX"]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"year": "2024",
|
|
"models": [
|
|
{
|
|
"name": "civic",
|
|
"engines": ["1.5L I4", "2.0L I4"],
|
|
"submodels": ["LX", "EX", "Type R"]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
|
|
json_file = self.create_test_json_file("honda.json", test_json)
|
|
|
|
try:
|
|
make_data = self.extractor.extract_make_data(json_file)
|
|
|
|
self.assertEqual(len(make_data.models), 1) # Should merge into one model
|
|
|
|
model = make_data.models[0]
|
|
self.assertEqual(model.name, "civic")
|
|
self.assertEqual(sorted(model.years), [2023, 2024])
|
|
self.assertEqual(len(model.engines), 2) # Should have both engines
|
|
self.assertEqual(len(model.trims), 3) # Should have unique trims
|
|
|
|
finally:
|
|
os.unlink(json_file)
|
|
|
|
def test_extract_make_data_l_to_i_normalization(self):
|
|
"""Test that L→I normalization is applied during extraction"""
|
|
test_json = {
|
|
"geo": [
|
|
{
|
|
"year": "1995",
|
|
"models": [
|
|
{
|
|
"name": "metro",
|
|
"engines": ["1.0L L3", "1.3L I4"], # L3 should become I3
|
|
"submodels": ["Base", "LSi"]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
|
|
json_file = self.create_test_json_file("geo.json", test_json)
|
|
|
|
try:
|
|
make_data = self.extractor.extract_make_data(json_file)
|
|
|
|
model = make_data.models[0]
|
|
|
|
# Find the L3 engine (should be normalized to I3)
|
|
l3_engine = None
|
|
for engine in model.engines:
|
|
if engine.displacement_l == 1.0 and engine.cylinders == 3:
|
|
l3_engine = engine
|
|
break
|
|
|
|
self.assertIsNotNone(l3_engine)
|
|
self.assertEqual(l3_engine.configuration, "I") # Should be normalized from L
|
|
|
|
finally:
|
|
os.unlink(json_file)
|
|
|
|
def test_extract_make_data_invalid_json(self):
|
|
"""Test extraction with invalid JSON file"""
|
|
json_file = self.create_test_json_file("invalid.json", {"invalid": "structure"})
|
|
|
|
try:
|
|
make_data = self.extractor.extract_make_data(json_file)
|
|
|
|
# Should return make data with errors
|
|
self.assertEqual(make_data.name, "Invalid")
|
|
self.assertEqual(len(make_data.models), 0)
|
|
self.assertGreater(len(make_data.processing_errors), 0)
|
|
|
|
finally:
|
|
os.unlink(json_file)
|
|
|
|
def test_extract_all_makes_multiple_files(self):
|
|
"""Test extraction of multiple make files"""
|
|
# Create temporary directory with multiple JSON files
|
|
temp_dir = tempfile.mkdtemp()
|
|
|
|
try:
|
|
# Create test files
|
|
toyota_json = {"toyota": [{"year": "2024", "models": [{"name": "camry", "engines": ["2.5L I4"], "submodels": ["LE"]}]}]}
|
|
tesla_json = {"tesla": [{"year": "2024", "models": [{"name": "model s", "engines": [], "submodels": ["Base"]}]}]}
|
|
|
|
toyota_file = os.path.join(temp_dir, "toyota.json")
|
|
tesla_file = os.path.join(temp_dir, "tesla.json")
|
|
|
|
with open(toyota_file, 'w') as f:
|
|
json.dump(toyota_json, f)
|
|
with open(tesla_file, 'w') as f:
|
|
json.dump(tesla_json, f)
|
|
|
|
# Extract all makes
|
|
result = self.extractor.extract_all_makes(temp_dir)
|
|
|
|
self.assertEqual(result.total_files_processed, 2)
|
|
self.assertEqual(result.successful_extractions, 2)
|
|
self.assertEqual(result.failed_extractions, 0)
|
|
self.assertEqual(len(result.makes), 2)
|
|
self.assertEqual(result.total_models, 2)
|
|
self.assertEqual(result.total_engines, 2) # Toyota: 1, Tesla: 1 (electric)
|
|
self.assertEqual(result.total_electric_models, 1) # Tesla
|
|
|
|
# Check make names
|
|
make_names = [make.name for make in result.makes]
|
|
self.assertIn("Toyota", make_names)
|
|
self.assertIn("Tesla", make_names)
|
|
|
|
finally:
|
|
# Clean up
|
|
for file in os.listdir(temp_dir):
|
|
os.unlink(os.path.join(temp_dir, file))
|
|
os.rmdir(temp_dir)
|
|
|
|
def test_extract_all_makes_empty_directory(self):
|
|
"""Test extraction from empty directory"""
|
|
temp_dir = tempfile.mkdtemp()
|
|
|
|
try:
|
|
result = self.extractor.extract_all_makes(temp_dir)
|
|
|
|
self.assertEqual(result.total_files_processed, 0)
|
|
self.assertEqual(result.successful_extractions, 0)
|
|
self.assertEqual(result.failed_extractions, 0)
|
|
self.assertEqual(len(result.makes), 0)
|
|
|
|
finally:
|
|
os.rmdir(temp_dir)
|
|
|
|
def test_get_extraction_statistics(self):
|
|
"""Test extraction statistics generation"""
|
|
# Create mock extraction result
|
|
make1 = MakeData("Toyota", "toyota.json", [], [], [])
|
|
make1.models = [ModelData("camry", [2024], [], [], False)]
|
|
|
|
make2 = MakeData("Tesla", "tesla.json", [], [], [])
|
|
make2.models = [ModelData("model s", [2024], [], [], True)]
|
|
|
|
result = ExtractionResult(
|
|
makes=[make1, make2],
|
|
total_files_processed=2,
|
|
successful_extractions=2,
|
|
failed_extractions=0,
|
|
total_models=2,
|
|
total_engines=2,
|
|
total_electric_models=1
|
|
)
|
|
|
|
stats = self.extractor.get_extraction_statistics(result)
|
|
|
|
self.assertEqual(stats['files']['total_processed'], 2)
|
|
self.assertEqual(stats['files']['successful'], 2)
|
|
self.assertEqual(stats['files']['success_rate'], 1.0)
|
|
self.assertEqual(stats['data']['total_makes'], 2)
|
|
self.assertEqual(stats['data']['total_models'], 2)
|
|
self.assertEqual(stats['data']['electric_models'], 1)
|
|
self.assertEqual(len(stats['makes']), 2)
|
|
|
|
|
|
class TestDataStructures(unittest.TestCase):
|
|
"""Test cases for data structure classes"""
|
|
|
|
def test_validation_result(self):
|
|
"""Test ValidationResult properties"""
|
|
result = ValidationResult(True, [], ["warning"])
|
|
|
|
self.assertTrue(result.is_valid)
|
|
self.assertFalse(result.has_errors)
|
|
self.assertTrue(result.has_warnings)
|
|
|
|
def test_model_data_properties(self):
|
|
"""Test ModelData calculated properties"""
|
|
# Create mock engine specs
|
|
engines = [
|
|
EngineSpec(2.5, "I", 4, "Gasoline", "Natural", "2.5L I4"),
|
|
EngineSpec(3.5, "V", 6, "Gasoline", "Natural", "3.5L V6")
|
|
]
|
|
|
|
model = ModelData(
|
|
name="camry",
|
|
years=[2023, 2024],
|
|
engines=engines,
|
|
trims=["LE", "XLE", "XSE"],
|
|
is_electric=False
|
|
)
|
|
|
|
self.assertEqual(model.total_trims, 3)
|
|
self.assertEqual(model.total_engines, 2)
|
|
self.assertEqual(model.year_range, "2023-2024")
|
|
|
|
def test_model_data_single_year(self):
|
|
"""Test ModelData with single year"""
|
|
model = ModelData("camry", [2024], [], ["LE"])
|
|
self.assertEqual(model.year_range, "2024")
|
|
|
|
def test_make_data_properties(self):
|
|
"""Test MakeData calculated properties"""
|
|
model1 = ModelData("camry", [2024], [], ["LE", "XLE"], False)
|
|
model2 = ModelData("prius", [2024], [], ["L", "LE"], True) # Electric
|
|
|
|
make = MakeData("Toyota", "toyota.json", [model1, model2], [], [])
|
|
|
|
self.assertEqual(make.total_models, 2)
|
|
self.assertEqual(make.total_trims, 4)
|
|
self.assertEqual(make.electric_models_count, 1)
|
|
|
|
def test_extraction_result_properties(self):
|
|
"""Test ExtractionResult calculated properties"""
|
|
result = ExtractionResult(
|
|
makes=[],
|
|
total_files_processed=10,
|
|
successful_extractions=8,
|
|
failed_extractions=2,
|
|
total_models=100,
|
|
total_engines=500,
|
|
total_electric_models=25
|
|
)
|
|
|
|
self.assertEqual(result.success_rate, 0.8)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main(verbosity=2) |