Files
markitect-main/tests/test_datamodel_optimizer.py
tegwick a98e2fa329 feat: create Datamodel Optimization Specialist Agent - Issue #127
Based on successful IssueActivity optimization (Issue #126), created a
comprehensive Claude Code subagent specialized in datamodel enhancement:

Agent Documentation (docs/sub_agents/datamodel_optimizer.md):
- 4-phase optimization methodology (Discovery, Analysis, Enhancement, Validation)
- Core patterns: property-based formatting, serialization consolidation
- Integration framework with Claude Code ecosystem
- Success metrics and implementation roadmap

Practical Implementation Tool (tools/datamodel_optimizer.py):
- AST-based datamodel discovery engine
- Usage pattern analysis with impact scoring
- Multi-format reporting (summary, detailed, JSON)
- CLI interface for interactive and batch processing

Real Codebase Validation:
- Analyzed 97 datamodels in current codebase
- Identified 350 usage patterns and 119 optimization opportunities
- Potential 518 lines of code reduction
- Correctly recognized IssueActivity optimizations from Issue #126

Core Capabilities:
- Property-based formatting consolidation
- Verbose serialization → single method calls
- Test data consistency (dict mocks → proper objects)
- Business logic encapsulation

Agent provides systematic, reusable framework for datamodel optimization
across any codebase while preserving interface compatibility.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-05 14:05:48 +02:00

258 lines
9.5 KiB
Python

"""
Tests for the Datamodel Optimizer Agent
Validates that the datamodel optimization tool correctly identifies
optimization opportunities and provides accurate assessments.
"""
import pytest
import tempfile
from pathlib import Path
from tools.datamodel_optimizer import (
DatamodelDiscovery,
UsageAnalyzer,
OptimizationAnalyzer,
OptimizationReporter
)
class TestDatamodelOptimizer:
"""Test the datamodel optimizer functionality."""
@pytest.fixture
def temp_project(self):
"""Create a temporary project with sample datamodels."""
with tempfile.TemporaryDirectory() as tmpdir:
project_path = Path(tmpdir)
# Create sample datamodel with optimization opportunities
sample_model = """
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
class Status(Enum):
ACTIVE = "active"
INACTIVE = "inactive"
@dataclass
class SampleModel:
id: int
name: str
status: Status
created_at: datetime
description: str = ""
"""
# Create sample usage with verbose patterns
sample_usage = """
from models import SampleModel, Status
def format_models(models):
# Verbose serialization pattern
data = []
for model in models:
item = {
'id': model.id,
'name': model.name,
'status': model.status.value,
'created_at': model.created_at.strftime('%Y-%m-%d'),
'description': model.description[:50] + '...' if len(model.description) > 50 else model.description
}
data.append(item)
return data
def display_model(model):
# Repetitive formatting
status_display = model.status.value.title()
formatted_date = model.created_at.strftime('%Y-%m-%d') if model.created_at else 'N/A'
short_desc = model.description[:40] + '...' if len(model.description) > 40 else model.description
return f"{model.name} ({status_display}) - {formatted_date} - {short_desc}"
"""
# Create sample test with dict mocks
sample_test = """
from unittest.mock import Mock
import pytest
def test_model_processing():
# Dictionary mock instead of real object
mock_model = {
'id': 1,
'name': 'Test',
'status': 'active', # String instead of enum!
'created_at': '2023-01-01',
'description': 'Test description'
}
result = process_model(mock_model)
assert result is not None
"""
# Write files
(project_path / "models.py").write_text(sample_model)
(project_path / "usage.py").write_text(sample_usage)
(project_path / "test_models.py").write_text(sample_test)
yield project_path
def test_datamodel_discovery(self, temp_project):
"""Test that datamodel discovery works correctly."""
discovery = DatamodelDiscovery(temp_project)
datamodels = discovery.discover_datamodels()
assert "SampleModel" in datamodels
model = datamodels["SampleModel"]
assert model.name == "SampleModel"
assert model.is_dataclass is True
assert model.is_pydantic is False
assert len(model.fields) == 5
assert "id" in model.fields
assert "name" in model.fields
assert "status" in model.fields
def test_usage_pattern_analysis(self, temp_project):
"""Test that usage pattern analysis identifies optimization opportunities."""
discovery = DatamodelDiscovery(temp_project)
datamodels = discovery.discover_datamodels()
analyzer = UsageAnalyzer(temp_project, datamodels)
patterns = analyzer.analyze_usage_patterns()
# Should find formatting patterns
formatting_patterns = [p for p in patterns if p.pattern_type in
['date_formatting', 'enum_formatting', 'truncation']]
assert len(formatting_patterns) > 0
# Should find serialization patterns
serialization_patterns = [p for p in patterns if p.pattern_type in
['verbose_serialization', 'dict_building']]
assert len(serialization_patterns) > 0
# Should find test patterns
test_patterns = [p for p in patterns if p.pattern_type == 'dict_test_data']
assert len(test_patterns) > 0
def test_optimization_opportunities(self, temp_project):
"""Test that optimization opportunities are correctly identified."""
discovery = DatamodelDiscovery(temp_project)
datamodels = discovery.discover_datamodels()
analyzer = UsageAnalyzer(temp_project, datamodels)
patterns = analyzer.analyze_usage_patterns()
optimizer = OptimizationAnalyzer(datamodels, patterns)
opportunities = optimizer.analyze_opportunities()
# Should identify property opportunities
property_ops = [op for op in opportunities if op.opportunity_type == 'property']
assert len(property_ops) > 0
# Should identify serialization opportunities
serialization_ops = [op for op in opportunities if op.opportunity_type == 'serialization']
assert len(serialization_ops) > 0
# Should identify test alignment opportunities
test_ops = [op for op in opportunities if op.opportunity_type == 'test_alignment']
assert len(test_ops) > 0
def test_optimization_reporter(self, temp_project):
"""Test that optimization reports are generated correctly."""
discovery = DatamodelDiscovery(temp_project)
datamodels = discovery.discover_datamodels()
analyzer = UsageAnalyzer(temp_project, datamodels)
patterns = analyzer.analyze_usage_patterns()
optimizer = OptimizationAnalyzer(datamodels, patterns)
opportunities = optimizer.analyze_opportunities()
reporter = OptimizationReporter(datamodels, patterns, opportunities)
# Test summary report
summary = reporter.generate_summary_report()
assert "Total Datamodels Found" in summary
assert "Optimization Opportunities" in summary
assert "SampleModel" in summary
# Test detailed report
detailed = reporter.generate_detailed_report("SampleModel")
assert "Detailed Analysis: SampleModel" in detailed
assert "Model Information" in detailed
assert "Optimization Opportunities" in detailed
# Test JSON report
json_report = reporter.generate_json_report()
assert '"total_datamodels"' in json_report
assert '"total_opportunities"' in json_report
def test_real_codebase_issueactivity(self):
"""Test against real IssueActivity to verify it recognizes our optimizations."""
project_root = Path(__file__).parent.parent
discovery = DatamodelDiscovery(project_root)
datamodels = discovery.discover_datamodels()
# Should find IssueActivity
assert "IssueActivity" in datamodels
model = datamodels["IssueActivity"]
assert model.is_dataclass is True
assert len(model.properties) >= 5 # Should have the properties we added
assert len(model.methods) >= 3 # Should have the methods we added
# Should have the optimization methods we added
assert "to_dict" in model.methods
assert "has_implementation_activity" in model.methods
assert "contains_keyword" in model.methods
# Should have the properties we added
assert "activity_type_value" in model.properties
assert "formatted_date" in model.properties
assert "truncated_details" in model.properties
def test_impact_scoring(self, temp_project):
"""Test that impact scoring works correctly."""
discovery = DatamodelDiscovery(temp_project)
datamodels = discovery.discover_datamodels()
analyzer = UsageAnalyzer(temp_project, datamodels)
patterns = analyzer.analyze_usage_patterns()
optimizer = OptimizationAnalyzer(datamodels, patterns)
opportunities = optimizer.analyze_opportunities()
# All opportunities should have reasonable impact scores
for opportunity in opportunities:
assert 1 <= opportunity.impact_score <= 10
assert opportunity.loc_reduction_estimate >= 0
# High complexity patterns should have higher impact scores
high_impact = [op for op in opportunities if op.impact_score >= 7]
assert len(high_impact) > 0
class TestDatamodelOptimizerCLI:
"""Test the CLI interface of the datamodel optimizer."""
def test_cli_help(self):
"""Test that CLI help works."""
import subprocess
result = subprocess.run(['python', 'tools/datamodel_optimizer.py', '--help'],
capture_output=True, text=True)
assert result.returncode == 0
assert 'Datamodel Optimization Analysis Tool' in result.stdout
def test_cli_summary_format(self):
"""Test that CLI summary format works."""
import subprocess
result = subprocess.run(['python', 'tools/datamodel_optimizer.py', '--format', 'summary'],
capture_output=True, text=True, cwd=Path(__file__).parent.parent)
assert result.returncode == 0
assert 'Total Datamodels Found' in result.stdout
assert 'Optimization Opportunities' in result.stdout
if __name__ == '__main__':
pytest.main([__file__])