Implements semantic validation to complement existing structural validation: Phase 1 & 2 Complete: - SemanticValidator: Main validator orchestrating sub-validators - SectionValidator: Enforces section classifications (required, recommended, optional, discouraged, improper) from x-markitect-sections - ContentValidator: Validates content patterns, forbidden patterns, and quality metrics (word counts, sentence counts) from x-markitect-content-control Features: - Pattern matching with regex for required/forbidden/discouraged patterns - Word count and sentence count validation - Detailed error reporting with severity levels (ERROR, WARNING) - Support for section alternatives (e.g., FLAGS vs OPTIONS) - Comprehensive test coverage (16 tests, 100% passing) Architecture: - Complements existing SchemaValidator (structural AST validation) - Clean separation: validators/ package for modular validators - Semantic validation focuses on x-markitect-* extensions - LinkValidator planned for Phase 3 (optional --check-links) Next: Phase 4 - CLI integration to enhance 'markitect validate' command Workplan: roadmap/20260106-semantic-document-validation/WORKPLAN.md 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
507 lines
15 KiB
Python
507 lines
15 KiB
Python
"""
|
|
Tests for SemanticValidator.
|
|
|
|
Tests semantic validation of markdown documents against x-markitect extensions.
|
|
"""
|
|
|
|
import pytest
|
|
from pathlib import Path
|
|
import tempfile
|
|
import json
|
|
|
|
from markitect.semantic_validator import (
|
|
SemanticValidator,
|
|
SemanticValidationReport,
|
|
load_schema_from_path
|
|
)
|
|
from markitect.validators.section_validator import (
|
|
SectionValidator,
|
|
SectionMissing,
|
|
SectionImproper
|
|
)
|
|
from markitect.validators.content_validator import (
|
|
ContentValidator,
|
|
PatternMissing,
|
|
ForbiddenPattern,
|
|
DiscouragedPattern,
|
|
ContentTooShort,
|
|
ContentTooLong
|
|
)
|
|
|
|
|
|
class TestSectionValidator:
|
|
"""Test section validation functionality."""
|
|
|
|
def test_required_section_missing(self):
|
|
"""Test that missing required sections are detected as errors."""
|
|
schema = {
|
|
'x-markitect-sections': {
|
|
'SYNOPSIS': {
|
|
'classification': 'required',
|
|
'heading_level': 2,
|
|
'error_message': 'SYNOPSIS section is mandatory'
|
|
}
|
|
}
|
|
}
|
|
|
|
validator = SectionValidator(schema)
|
|
|
|
# Create a mock document without SYNOPSIS
|
|
class MockDocument:
|
|
def get_headings_by_level(self, level):
|
|
return ['DESCRIPTION', 'EXAMPLES']
|
|
|
|
doc = MockDocument()
|
|
result = validator.check(doc)
|
|
|
|
# Should have one error
|
|
assert not result.is_valid()
|
|
assert result.has_errors()
|
|
assert len(result.get_errors()) == 1
|
|
|
|
error = result.get_errors()[0]
|
|
assert isinstance(error, SectionMissing)
|
|
assert error.section_name == 'SYNOPSIS'
|
|
assert error.severity == 'ERROR'
|
|
assert 'mandatory' in error.message
|
|
|
|
def test_improper_section_present(self):
|
|
"""Test that improper sections are detected as errors."""
|
|
schema = {
|
|
'x-markitect-sections': {
|
|
'INTERNAL_NOTES': {
|
|
'classification': 'improper',
|
|
'heading_level': 2,
|
|
'error_message': 'Internal notes must not appear in published docs'
|
|
}
|
|
}
|
|
}
|
|
|
|
validator = SectionValidator(schema)
|
|
|
|
# Create a mock document with INTERNAL_NOTES
|
|
class MockDocument:
|
|
def get_headings_by_level(self, level):
|
|
return [
|
|
{
|
|
'content': 'INTERNAL_NOTES',
|
|
'level': 2,
|
|
'line_number': 25
|
|
}
|
|
]
|
|
|
|
doc = MockDocument()
|
|
result = validator.check(doc)
|
|
|
|
# Should have one error
|
|
assert not result.is_valid()
|
|
assert result.has_errors()
|
|
assert len(result.get_errors()) == 1
|
|
|
|
error = result.get_errors()[0]
|
|
assert isinstance(error, SectionImproper)
|
|
assert error.section_name == 'INTERNAL_NOTES'
|
|
assert error.severity == 'ERROR'
|
|
assert error.line_number == 25
|
|
|
|
def test_recommended_section_missing(self):
|
|
"""Test that missing recommended sections generate warnings."""
|
|
schema = {
|
|
'x-markitect-sections': {
|
|
'EXAMPLES': {
|
|
'classification': 'recommended',
|
|
'heading_level': 2,
|
|
'warning_if_missing': 'Examples improve documentation quality'
|
|
}
|
|
}
|
|
}
|
|
|
|
validator = SectionValidator(schema)
|
|
|
|
# Create a mock document without EXAMPLES
|
|
class MockDocument:
|
|
def get_headings_by_level(self, level):
|
|
return ['SYNOPSIS', 'DESCRIPTION']
|
|
|
|
doc = MockDocument()
|
|
result = validator.check(doc)
|
|
|
|
# Should pass validation (warnings don't fail)
|
|
assert result.is_valid()
|
|
assert not result.has_errors()
|
|
assert result.has_warnings()
|
|
assert len(result.get_warnings()) == 1
|
|
|
|
warning = result.get_warnings()[0]
|
|
assert warning.section_name == 'EXAMPLES'
|
|
assert warning.severity == 'WARNING'
|
|
|
|
def test_all_required_sections_present(self):
|
|
"""Test that validation passes when all required sections present."""
|
|
schema = {
|
|
'x-markitect-sections': {
|
|
'SYNOPSIS': {
|
|
'classification': 'required',
|
|
'heading_level': 2
|
|
},
|
|
'DESCRIPTION': {
|
|
'classification': 'required',
|
|
'heading_level': 2
|
|
}
|
|
}
|
|
}
|
|
|
|
validator = SectionValidator(schema)
|
|
|
|
# Create a mock document with all required sections
|
|
class MockDocument:
|
|
def get_headings_by_level(self, level):
|
|
return [
|
|
{'content': 'SYNOPSIS', 'level': 2},
|
|
{'content': 'DESCRIPTION', 'level': 2},
|
|
{'content': 'EXAMPLES', 'level': 2}
|
|
]
|
|
|
|
doc = MockDocument()
|
|
result = validator.check(doc)
|
|
|
|
# Should pass
|
|
assert result.is_valid()
|
|
assert not result.has_errors()
|
|
assert not result.has_warnings()
|
|
assert len(result.issues) == 0
|
|
|
|
def test_section_alternatives(self):
|
|
"""Test that alternative section names are recognized."""
|
|
schema = {
|
|
'x-markitect-sections': {
|
|
'OPTIONS': {
|
|
'classification': 'required',
|
|
'heading_level': 2,
|
|
'alternatives': ['FLAGS', 'COMMAND OPTIONS']
|
|
}
|
|
}
|
|
}
|
|
|
|
validator = SectionValidator(schema)
|
|
|
|
# Document uses alternative name 'FLAGS'
|
|
class MockDocument:
|
|
def get_headings_by_level(self, level):
|
|
return [{'content': 'FLAGS', 'level': 2}]
|
|
|
|
doc = MockDocument()
|
|
result = validator.check(doc)
|
|
|
|
# Should pass (alternative is accepted)
|
|
assert result.is_valid()
|
|
assert not result.has_errors()
|
|
|
|
|
|
class TestSemanticValidator:
|
|
"""Test complete semantic validation."""
|
|
|
|
def test_validator_initialization(self):
|
|
"""Test that validator initializes correctly."""
|
|
schema = {
|
|
'$schema': 'http://json-schema.org/draft-07/schema#',
|
|
'x-markitect-sections': {
|
|
'SYNOPSIS': {'classification': 'required', 'heading_level': 2}
|
|
}
|
|
}
|
|
|
|
validator = SemanticValidator(schema)
|
|
|
|
assert validator.schema == schema
|
|
assert validator.section_validator is not None
|
|
|
|
def test_validation_report_formatting(self):
|
|
"""Test that validation reports format correctly."""
|
|
from markitect.validators.section_validator import (
|
|
SectionValidationResult,
|
|
SectionMissing
|
|
)
|
|
|
|
section_result = SectionValidationResult(
|
|
issues=[
|
|
SectionMissing(
|
|
section_name='SYNOPSIS',
|
|
severity='ERROR',
|
|
message='SYNOPSIS is required',
|
|
classification='required'
|
|
)
|
|
],
|
|
sections_checked=2,
|
|
sections_found=1
|
|
)
|
|
|
|
report = SemanticValidationReport(section_result=section_result)
|
|
|
|
# Check report properties
|
|
assert report.has_errors()
|
|
assert not report.is_valid()
|
|
|
|
# Check text formatting
|
|
text = report.format_text()
|
|
assert 'Section Validation:' in text
|
|
assert 'SYNOPSIS' in text
|
|
assert 'Errors: 1' in text
|
|
assert 'FAILED' in text
|
|
|
|
def test_load_json_schema(self, tmp_path):
|
|
"""Test loading a JSON schema file."""
|
|
schema_file = tmp_path / "test-schema.json"
|
|
schema_data = {
|
|
'$schema': 'http://json-schema.org/draft-07/schema#',
|
|
'title': 'Test Schema',
|
|
'x-markitect-sections': {
|
|
'SYNOPSIS': {'classification': 'required', 'heading_level': 2}
|
|
}
|
|
}
|
|
|
|
schema_file.write_text(json.dumps(schema_data, indent=2))
|
|
|
|
loaded_schema = load_schema_from_path(schema_file)
|
|
|
|
assert loaded_schema == schema_data
|
|
assert 'x-markitect-sections' in loaded_schema
|
|
|
|
def test_schema_not_found(self):
|
|
"""Test that missing schema file raises error."""
|
|
with pytest.raises(FileNotFoundError):
|
|
load_schema_from_path('/nonexistent/schema.json')
|
|
|
|
def test_unsupported_schema_format(self, tmp_path):
|
|
"""Test that unsupported format raises error."""
|
|
schema_file = tmp_path / "schema.xml"
|
|
schema_file.write_text('<schema></schema>')
|
|
|
|
with pytest.raises(ValueError, match="Unsupported schema format"):
|
|
load_schema_from_path(schema_file)
|
|
|
|
|
|
class TestContentValidator:
|
|
"""Test content validation functionality."""
|
|
|
|
def test_required_pattern_missing(self):
|
|
"""Test that missing required patterns are detected."""
|
|
schema = {
|
|
'x-markitect-content-control': {
|
|
'synopsis': {
|
|
'required_patterns': [
|
|
r'\*\*[a-z][a-z0-9-]*\*\*' # Bold command name
|
|
]
|
|
}
|
|
}
|
|
}
|
|
|
|
validator = ContentValidator(schema)
|
|
|
|
# Create mock document without bold command
|
|
class MockDocument:
|
|
def get_section(self, name):
|
|
if name == 'SYNOPSIS':
|
|
return {
|
|
'name': 'SYNOPSIS',
|
|
'content': 'command [options] arguments' # No bold
|
|
}
|
|
return None
|
|
|
|
doc = MockDocument()
|
|
result = validator.check(doc)
|
|
|
|
# Should have one error
|
|
assert not result.is_valid()
|
|
assert result.has_errors()
|
|
assert len(result.get_errors()) == 1
|
|
|
|
error = result.get_errors()[0]
|
|
assert isinstance(error, PatternMissing)
|
|
assert error.section_name == 'SYNOPSIS'
|
|
assert error.severity == 'ERROR'
|
|
|
|
def test_forbidden_pattern_found(self):
|
|
"""Test that forbidden patterns are detected."""
|
|
schema = {
|
|
'x-markitect-content-control': {
|
|
'description': {
|
|
'forbidden_patterns': [
|
|
r'\bTODO\b',
|
|
r'\bFIXME\b'
|
|
]
|
|
}
|
|
}
|
|
}
|
|
|
|
validator = ContentValidator(schema)
|
|
|
|
# Create mock document with forbidden pattern
|
|
class MockDocument:
|
|
def get_section(self, name):
|
|
if name == 'DESCRIPTION':
|
|
return {
|
|
'name': 'DESCRIPTION',
|
|
'content': 'This is a description. TODO: Add more details.'
|
|
}
|
|
return None
|
|
|
|
doc = MockDocument()
|
|
result = validator.check(doc)
|
|
|
|
# Should have one error
|
|
assert not result.is_valid()
|
|
assert result.has_errors()
|
|
assert len(result.get_errors()) == 1
|
|
|
|
error = result.get_errors()[0]
|
|
assert isinstance(error, ForbiddenPattern)
|
|
assert error.section_name == 'DESCRIPTION'
|
|
assert 'TODO' in error.matched_text
|
|
|
|
def test_discouraged_pattern_warning(self):
|
|
"""Test that discouraged patterns generate warnings."""
|
|
schema = {
|
|
'x-markitect-content-control': {
|
|
'description': {
|
|
'discouraged_patterns': [
|
|
r'\bWIP\b'
|
|
]
|
|
}
|
|
}
|
|
}
|
|
|
|
validator = ContentValidator(schema)
|
|
|
|
# Create mock document with discouraged pattern
|
|
class MockDocument:
|
|
def get_section(self, name):
|
|
if name == 'DESCRIPTION':
|
|
return {
|
|
'name': 'DESCRIPTION',
|
|
'content': 'This is WIP content.'
|
|
}
|
|
return None
|
|
|
|
doc = MockDocument()
|
|
result = validator.check(doc)
|
|
|
|
# Should pass (warnings don't fail)
|
|
assert result.is_valid()
|
|
assert not result.has_errors()
|
|
assert result.has_warnings()
|
|
|
|
warning = result.get_warnings()[0]
|
|
assert isinstance(warning, DiscouragedPattern)
|
|
assert warning.severity == 'WARNING'
|
|
|
|
def test_content_too_short(self):
|
|
"""Test word count validation - too short."""
|
|
schema = {
|
|
'x-markitect-content-control': {
|
|
'description': {
|
|
'content_quality': {
|
|
'min_words': 50,
|
|
'max_words': 1000
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
validator = ContentValidator(schema)
|
|
|
|
# Create mock document with short content
|
|
class MockDocument:
|
|
def get_section(self, name):
|
|
if name == 'DESCRIPTION':
|
|
return {
|
|
'name': 'DESCRIPTION',
|
|
'content': 'Short description.' # Only 2 words
|
|
}
|
|
return None
|
|
|
|
doc = MockDocument()
|
|
result = validator.check(doc)
|
|
|
|
# Should have warning
|
|
assert result.is_valid() # Warnings don't fail
|
|
assert result.has_warnings()
|
|
|
|
warning = result.get_warnings()[0]
|
|
assert isinstance(warning, ContentTooShort)
|
|
assert warning.actual == 2
|
|
assert warning.required == 50
|
|
|
|
def test_content_too_long(self):
|
|
"""Test word count validation - too long."""
|
|
schema = {
|
|
'x-markitect-content-control': {
|
|
'synopsis': {
|
|
'content_quality': {
|
|
'min_words': 5,
|
|
'max_words': 20
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
validator = ContentValidator(schema)
|
|
|
|
# Create mock document with long content
|
|
class MockDocument:
|
|
def get_section(self, name):
|
|
if name == 'SYNOPSIS':
|
|
return {
|
|
'name': 'SYNOPSIS',
|
|
'content': ' '.join(['word'] * 50) # 50 words
|
|
}
|
|
return None
|
|
|
|
doc = MockDocument()
|
|
result = validator.check(doc)
|
|
|
|
# Should have warning
|
|
assert result.is_valid()
|
|
assert result.has_warnings()
|
|
|
|
warning = result.get_warnings()[0]
|
|
assert isinstance(warning, ContentTooLong)
|
|
assert warning.actual == 50
|
|
assert warning.limit == 20
|
|
|
|
def test_all_content_requirements_met(self):
|
|
"""Test that validation passes when all requirements met."""
|
|
schema = {
|
|
'x-markitect-content-control': {
|
|
'synopsis': {
|
|
'required_patterns': [
|
|
r'\*\*[a-z]+\*\*'
|
|
],
|
|
'content_quality': {
|
|
'min_words': 5,
|
|
'max_words': 50
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
validator = ContentValidator(schema)
|
|
|
|
# Create valid document
|
|
class MockDocument:
|
|
def get_section(self, name):
|
|
if name == 'SYNOPSIS':
|
|
return {
|
|
'name': 'SYNOPSIS',
|
|
'content': '**command** [options] arguments and more words here'
|
|
}
|
|
return None
|
|
|
|
doc = MockDocument()
|
|
result = validator.check(doc)
|
|
|
|
# Should pass
|
|
assert result.is_valid()
|
|
assert not result.has_errors()
|
|
assert not result.has_warnings()
|
|
assert len(result.issues) == 0
|