Added 33 unit tests covering: Schema Analyzer (16 tests): - Flexible vs rigid schema detection - Exact count constraint detection - Const value detection - Overly specific number detection - Narrow range detection - Deprecated extension detection - Missing classification/content control detection - Rigidity score calculation - Nested property analysis - Report formatting (normal and verbose) Schema Refiner (17 tests): - Exact count refinement - Const value refinement - Number rounding - Narrow range widening - Nested property refinement - Array items refinement - Option enabling/disabling - Action details validation - Original schema preservation - Report formatting - Complex manpage schema refinement All tests passing (33/33). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
382 lines
12 KiB
Python
382 lines
12 KiB
Python
"""
|
|
Unit tests for schema_analyzer module (Phase 2 schema refinement).
|
|
"""
|
|
|
|
import pytest
|
|
import json
|
|
from markitect.schema_analyzer import (
|
|
SchemaAnalyzer,
|
|
IssueType,
|
|
IssueSeverity,
|
|
SchemaAnalysisResult
|
|
)
|
|
|
|
|
|
class TestSchemaAnalyzer:
|
|
"""Tests for SchemaAnalyzer class."""
|
|
|
|
def test_analyze_flexible_schema(self):
|
|
"""Test analysis of a well-designed flexible schema."""
|
|
schema = {
|
|
"type": "object",
|
|
"x-markitect-sections": {
|
|
"INTRO": {
|
|
"classification": "required",
|
|
"heading_level": 2
|
|
}
|
|
},
|
|
"x-markitect-content-control": {
|
|
"intro": {
|
|
"content_quality": {
|
|
"min_words": 50,
|
|
"max_words": 500
|
|
}
|
|
}
|
|
},
|
|
"properties": {
|
|
"headings": {
|
|
"type": "object",
|
|
"properties": {
|
|
"level_2": {
|
|
"type": "array",
|
|
"minItems": 2,
|
|
"maxItems": 10
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
assert isinstance(result, SchemaAnalysisResult)
|
|
assert result.has_classifications
|
|
assert result.has_content_control
|
|
assert result.rigidity_score < 50
|
|
assert not result.is_rigid
|
|
|
|
def test_analyze_rigid_schema_exact_counts(self):
|
|
"""Test detection of exact count constraints."""
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"paragraphs": {
|
|
"type": "array",
|
|
"minItems": 5,
|
|
"maxItems": 5 # Exact count
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
assert result.rigidity_score > 0
|
|
exact_count_issues = [i for i in result.issues if i.issue_type == IssueType.EXACT_COUNT]
|
|
assert len(exact_count_issues) > 0
|
|
assert exact_count_issues[0].severity == IssueSeverity.WARNING
|
|
|
|
def test_analyze_const_values(self):
|
|
"""Test detection of const constraints."""
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"level": {
|
|
"type": "integer",
|
|
"const": 1
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
const_issues = [i for i in result.issues if i.issue_type == IssueType.EXACT_COUNT]
|
|
assert len(const_issues) > 0
|
|
assert const_issues[0].current_value == 1
|
|
|
|
def test_analyze_overly_specific_numbers(self):
|
|
"""Test detection of overly specific numbers."""
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"items": {
|
|
"type": "array",
|
|
"minItems": 73 # Overly specific
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
specific_issues = [i for i in result.issues if i.issue_type == IssueType.OVERLY_SPECIFIC]
|
|
assert len(specific_issues) > 0
|
|
assert specific_issues[0].current_value == 73
|
|
assert specific_issues[0].suggested_value == 70 # Should be rounded
|
|
|
|
def test_analyze_narrow_range(self):
|
|
"""Test detection of narrow integer ranges."""
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"score": {
|
|
"type": "integer",
|
|
"minimum": 5,
|
|
"maximum": 6 # Very narrow range
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
narrow_issues = [i for i in result.issues if i.issue_type == IssueType.NO_FLEXIBILITY]
|
|
assert len(narrow_issues) > 0
|
|
|
|
def test_analyze_deprecated_extensions(self):
|
|
"""Test detection of deprecated extensions."""
|
|
schema = {
|
|
"type": "object",
|
|
"x-markitect-required-sections": ["INTRO", "CONCLUSION"]
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
assert result.uses_deprecated_extensions
|
|
deprecated_issues = [i for i in result.issues if i.issue_type == IssueType.DEPRECATED_EXTENSIONS]
|
|
assert len(deprecated_issues) > 0
|
|
assert deprecated_issues[0].severity == IssueSeverity.WARNING
|
|
|
|
def test_analyze_missing_classifications(self):
|
|
"""Test detection of missing classification system."""
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"headings": {
|
|
"type": "object"
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
assert not result.has_classifications
|
|
classification_issues = [i for i in result.issues if i.issue_type == IssueType.MISSING_CLASSIFICATIONS]
|
|
assert len(classification_issues) > 0
|
|
assert classification_issues[0].severity == IssueSeverity.INFO
|
|
|
|
def test_analyze_missing_content_control(self):
|
|
"""Test detection of missing content control."""
|
|
schema = {
|
|
"type": "object",
|
|
"x-markitect-sections": {
|
|
"INTRO": {"classification": "required"}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
assert result.has_classifications
|
|
assert not result.has_content_control
|
|
content_issues = [i for i in result.issues if i.issue_type == IssueType.MISSING_CONTENT_INSTRUCTIONS]
|
|
assert len(content_issues) > 0
|
|
|
|
def test_rigidity_score_calculation(self):
|
|
"""Test rigidity score calculation with multiple issues."""
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"array1": {
|
|
"type": "array",
|
|
"minItems": 5,
|
|
"maxItems": 5
|
|
},
|
|
"array2": {
|
|
"type": "array",
|
|
"minItems": 73
|
|
},
|
|
"number": {
|
|
"type": "integer",
|
|
"const": 42
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
# Should have moderate rigidity with multiple issues
|
|
assert result.rigidity_score > 30
|
|
assert result.rigidity_score < 60 # Moderate range
|
|
|
|
def test_issue_count_by_severity(self):
|
|
"""Test counting issues by severity."""
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"items": {
|
|
"type": "array",
|
|
"minItems": 1,
|
|
"maxItems": 1
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
counts = result.issue_count_by_severity
|
|
assert IssueSeverity.WARNING in counts
|
|
assert IssueSeverity.ERROR in counts
|
|
assert IssueSeverity.INFO in counts
|
|
|
|
def test_nested_properties_analysis(self):
|
|
"""Test analysis of nested property structures."""
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"outer": {
|
|
"type": "object",
|
|
"properties": {
|
|
"inner": {
|
|
"type": "array",
|
|
"minItems": 3,
|
|
"maxItems": 3
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
# Should detect exact count in nested property
|
|
exact_count_issues = [i for i in result.issues if i.issue_type == IssueType.EXACT_COUNT]
|
|
assert len(exact_count_issues) > 0
|
|
assert "properties.outer.inner" in exact_count_issues[0].path
|
|
|
|
def test_format_analysis_report(self):
|
|
"""Test report formatting."""
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"items": {
|
|
"type": "array",
|
|
"minItems": 1,
|
|
"maxItems": 1
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
report = analyzer.format_analysis_report(result, verbose=False)
|
|
|
|
assert "Schema Analysis Report" in report
|
|
assert "Rigidity Score" in report
|
|
assert "Issues Found" in report
|
|
|
|
def test_format_analysis_report_verbose(self):
|
|
"""Test verbose report formatting."""
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"items": {
|
|
"type": "array",
|
|
"minItems": 5,
|
|
"maxItems": 5
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
report = analyzer.format_analysis_report(result, verbose=True)
|
|
|
|
assert "Current:" in report
|
|
assert "Suggested:" in report
|
|
|
|
def test_analyze_array_items_with_properties(self):
|
|
"""Test analysis of array items that have nested properties."""
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"headings": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"level": {
|
|
"type": "integer",
|
|
"const": 1
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
# Should detect const in nested items
|
|
const_issues = [i for i in result.issues if i.issue_type == IssueType.EXACT_COUNT]
|
|
assert len(const_issues) > 0
|
|
assert "items" in const_issues[0].path
|
|
|
|
def test_empty_schema(self):
|
|
"""Test analysis of minimal/empty schema."""
|
|
schema = {
|
|
"type": "object"
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
|
|
# Should detect missing features but not crash
|
|
assert not result.has_classifications
|
|
assert not result.has_content_control
|
|
assert result.rigidity_score < 50 # Not rigid, just minimal
|
|
|
|
def test_no_issues_schema(self):
|
|
"""Test schema with perfect design (no issues)."""
|
|
schema = {
|
|
"type": "object",
|
|
"x-markitect-sections": {
|
|
"INTRO": {
|
|
"classification": "required",
|
|
"heading_level": 2,
|
|
"content_instruction": "Introduction section"
|
|
}
|
|
},
|
|
"x-markitect-content-control": {
|
|
"intro": {
|
|
"content_quality": {
|
|
"min_words": 50,
|
|
"max_words": 500
|
|
}
|
|
}
|
|
},
|
|
"properties": {
|
|
"paragraphs": {
|
|
"type": "array",
|
|
"minItems": 5,
|
|
"maxItems": 50 # Good range
|
|
}
|
|
}
|
|
}
|
|
|
|
analyzer = SchemaAnalyzer()
|
|
result = analyzer.analyze_schema(schema)
|
|
report = analyzer.format_analysis_report(result)
|
|
|
|
assert result.rigidity_score < 20
|
|
assert not result.is_rigid
|
|
assert "No issues found" in report or result.issue_count_by_severity[IssueSeverity.WARNING] == 0
|