Files
markitect-main/tests/test_semantic_validator.py
tegwick a969c5de47 feat: add semantic document validator for x-markitect extensions
Implements semantic validation to complement existing structural validation:

Phase 1 & 2 Complete:
- SemanticValidator: Main validator orchestrating sub-validators
- SectionValidator: Enforces section classifications (required, recommended,
  optional, discouraged, improper) from x-markitect-sections
- ContentValidator: Validates content patterns, forbidden patterns, and
  quality metrics (word counts, sentence counts) from x-markitect-content-control

Features:
- Pattern matching with regex for required/forbidden/discouraged patterns
- Word count and sentence count validation
- Detailed error reporting with severity levels (ERROR, WARNING)
- Support for section alternatives (e.g., FLAGS vs OPTIONS)
- Comprehensive test coverage (16 tests, 100% passing)

Architecture:
- Complements existing SchemaValidator (structural AST validation)
- Clean separation: validators/ package for modular validators
- Semantic validation focuses on x-markitect-* extensions
- LinkValidator planned for Phase 3 (optional --check-links)

Next: Phase 4 - CLI integration to enhance 'markitect validate' command

Workplan: roadmap/20260106-semantic-document-validation/WORKPLAN.md

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-06 03:24:32 +01:00

507 lines
15 KiB
Python

"""
Tests for SemanticValidator.
Tests semantic validation of markdown documents against x-markitect extensions.
"""
import pytest
from pathlib import Path
import tempfile
import json
from markitect.semantic_validator import (
SemanticValidator,
SemanticValidationReport,
load_schema_from_path
)
from markitect.validators.section_validator import (
SectionValidator,
SectionMissing,
SectionImproper
)
from markitect.validators.content_validator import (
ContentValidator,
PatternMissing,
ForbiddenPattern,
DiscouragedPattern,
ContentTooShort,
ContentTooLong
)
class TestSectionValidator:
"""Test section validation functionality."""
def test_required_section_missing(self):
"""Test that missing required sections are detected as errors."""
schema = {
'x-markitect-sections': {
'SYNOPSIS': {
'classification': 'required',
'heading_level': 2,
'error_message': 'SYNOPSIS section is mandatory'
}
}
}
validator = SectionValidator(schema)
# Create a mock document without SYNOPSIS
class MockDocument:
def get_headings_by_level(self, level):
return ['DESCRIPTION', 'EXAMPLES']
doc = MockDocument()
result = validator.check(doc)
# Should have one error
assert not result.is_valid()
assert result.has_errors()
assert len(result.get_errors()) == 1
error = result.get_errors()[0]
assert isinstance(error, SectionMissing)
assert error.section_name == 'SYNOPSIS'
assert error.severity == 'ERROR'
assert 'mandatory' in error.message
def test_improper_section_present(self):
"""Test that improper sections are detected as errors."""
schema = {
'x-markitect-sections': {
'INTERNAL_NOTES': {
'classification': 'improper',
'heading_level': 2,
'error_message': 'Internal notes must not appear in published docs'
}
}
}
validator = SectionValidator(schema)
# Create a mock document with INTERNAL_NOTES
class MockDocument:
def get_headings_by_level(self, level):
return [
{
'content': 'INTERNAL_NOTES',
'level': 2,
'line_number': 25
}
]
doc = MockDocument()
result = validator.check(doc)
# Should have one error
assert not result.is_valid()
assert result.has_errors()
assert len(result.get_errors()) == 1
error = result.get_errors()[0]
assert isinstance(error, SectionImproper)
assert error.section_name == 'INTERNAL_NOTES'
assert error.severity == 'ERROR'
assert error.line_number == 25
def test_recommended_section_missing(self):
"""Test that missing recommended sections generate warnings."""
schema = {
'x-markitect-sections': {
'EXAMPLES': {
'classification': 'recommended',
'heading_level': 2,
'warning_if_missing': 'Examples improve documentation quality'
}
}
}
validator = SectionValidator(schema)
# Create a mock document without EXAMPLES
class MockDocument:
def get_headings_by_level(self, level):
return ['SYNOPSIS', 'DESCRIPTION']
doc = MockDocument()
result = validator.check(doc)
# Should pass validation (warnings don't fail)
assert result.is_valid()
assert not result.has_errors()
assert result.has_warnings()
assert len(result.get_warnings()) == 1
warning = result.get_warnings()[0]
assert warning.section_name == 'EXAMPLES'
assert warning.severity == 'WARNING'
def test_all_required_sections_present(self):
"""Test that validation passes when all required sections present."""
schema = {
'x-markitect-sections': {
'SYNOPSIS': {
'classification': 'required',
'heading_level': 2
},
'DESCRIPTION': {
'classification': 'required',
'heading_level': 2
}
}
}
validator = SectionValidator(schema)
# Create a mock document with all required sections
class MockDocument:
def get_headings_by_level(self, level):
return [
{'content': 'SYNOPSIS', 'level': 2},
{'content': 'DESCRIPTION', 'level': 2},
{'content': 'EXAMPLES', 'level': 2}
]
doc = MockDocument()
result = validator.check(doc)
# Should pass
assert result.is_valid()
assert not result.has_errors()
assert not result.has_warnings()
assert len(result.issues) == 0
def test_section_alternatives(self):
"""Test that alternative section names are recognized."""
schema = {
'x-markitect-sections': {
'OPTIONS': {
'classification': 'required',
'heading_level': 2,
'alternatives': ['FLAGS', 'COMMAND OPTIONS']
}
}
}
validator = SectionValidator(schema)
# Document uses alternative name 'FLAGS'
class MockDocument:
def get_headings_by_level(self, level):
return [{'content': 'FLAGS', 'level': 2}]
doc = MockDocument()
result = validator.check(doc)
# Should pass (alternative is accepted)
assert result.is_valid()
assert not result.has_errors()
class TestSemanticValidator:
"""Test complete semantic validation."""
def test_validator_initialization(self):
"""Test that validator initializes correctly."""
schema = {
'$schema': 'http://json-schema.org/draft-07/schema#',
'x-markitect-sections': {
'SYNOPSIS': {'classification': 'required', 'heading_level': 2}
}
}
validator = SemanticValidator(schema)
assert validator.schema == schema
assert validator.section_validator is not None
def test_validation_report_formatting(self):
"""Test that validation reports format correctly."""
from markitect.validators.section_validator import (
SectionValidationResult,
SectionMissing
)
section_result = SectionValidationResult(
issues=[
SectionMissing(
section_name='SYNOPSIS',
severity='ERROR',
message='SYNOPSIS is required',
classification='required'
)
],
sections_checked=2,
sections_found=1
)
report = SemanticValidationReport(section_result=section_result)
# Check report properties
assert report.has_errors()
assert not report.is_valid()
# Check text formatting
text = report.format_text()
assert 'Section Validation:' in text
assert 'SYNOPSIS' in text
assert 'Errors: 1' in text
assert 'FAILED' in text
def test_load_json_schema(self, tmp_path):
"""Test loading a JSON schema file."""
schema_file = tmp_path / "test-schema.json"
schema_data = {
'$schema': 'http://json-schema.org/draft-07/schema#',
'title': 'Test Schema',
'x-markitect-sections': {
'SYNOPSIS': {'classification': 'required', 'heading_level': 2}
}
}
schema_file.write_text(json.dumps(schema_data, indent=2))
loaded_schema = load_schema_from_path(schema_file)
assert loaded_schema == schema_data
assert 'x-markitect-sections' in loaded_schema
def test_schema_not_found(self):
"""Test that missing schema file raises error."""
with pytest.raises(FileNotFoundError):
load_schema_from_path('/nonexistent/schema.json')
def test_unsupported_schema_format(self, tmp_path):
"""Test that unsupported format raises error."""
schema_file = tmp_path / "schema.xml"
schema_file.write_text('<schema></schema>')
with pytest.raises(ValueError, match="Unsupported schema format"):
load_schema_from_path(schema_file)
class TestContentValidator:
"""Test content validation functionality."""
def test_required_pattern_missing(self):
"""Test that missing required patterns are detected."""
schema = {
'x-markitect-content-control': {
'synopsis': {
'required_patterns': [
r'\*\*[a-z][a-z0-9-]*\*\*' # Bold command name
]
}
}
}
validator = ContentValidator(schema)
# Create mock document without bold command
class MockDocument:
def get_section(self, name):
if name == 'SYNOPSIS':
return {
'name': 'SYNOPSIS',
'content': 'command [options] arguments' # No bold
}
return None
doc = MockDocument()
result = validator.check(doc)
# Should have one error
assert not result.is_valid()
assert result.has_errors()
assert len(result.get_errors()) == 1
error = result.get_errors()[0]
assert isinstance(error, PatternMissing)
assert error.section_name == 'SYNOPSIS'
assert error.severity == 'ERROR'
def test_forbidden_pattern_found(self):
"""Test that forbidden patterns are detected."""
schema = {
'x-markitect-content-control': {
'description': {
'forbidden_patterns': [
r'\bTODO\b',
r'\bFIXME\b'
]
}
}
}
validator = ContentValidator(schema)
# Create mock document with forbidden pattern
class MockDocument:
def get_section(self, name):
if name == 'DESCRIPTION':
return {
'name': 'DESCRIPTION',
'content': 'This is a description. TODO: Add more details.'
}
return None
doc = MockDocument()
result = validator.check(doc)
# Should have one error
assert not result.is_valid()
assert result.has_errors()
assert len(result.get_errors()) == 1
error = result.get_errors()[0]
assert isinstance(error, ForbiddenPattern)
assert error.section_name == 'DESCRIPTION'
assert 'TODO' in error.matched_text
def test_discouraged_pattern_warning(self):
"""Test that discouraged patterns generate warnings."""
schema = {
'x-markitect-content-control': {
'description': {
'discouraged_patterns': [
r'\bWIP\b'
]
}
}
}
validator = ContentValidator(schema)
# Create mock document with discouraged pattern
class MockDocument:
def get_section(self, name):
if name == 'DESCRIPTION':
return {
'name': 'DESCRIPTION',
'content': 'This is WIP content.'
}
return None
doc = MockDocument()
result = validator.check(doc)
# Should pass (warnings don't fail)
assert result.is_valid()
assert not result.has_errors()
assert result.has_warnings()
warning = result.get_warnings()[0]
assert isinstance(warning, DiscouragedPattern)
assert warning.severity == 'WARNING'
def test_content_too_short(self):
"""Test word count validation - too short."""
schema = {
'x-markitect-content-control': {
'description': {
'content_quality': {
'min_words': 50,
'max_words': 1000
}
}
}
}
validator = ContentValidator(schema)
# Create mock document with short content
class MockDocument:
def get_section(self, name):
if name == 'DESCRIPTION':
return {
'name': 'DESCRIPTION',
'content': 'Short description.' # Only 2 words
}
return None
doc = MockDocument()
result = validator.check(doc)
# Should have warning
assert result.is_valid() # Warnings don't fail
assert result.has_warnings()
warning = result.get_warnings()[0]
assert isinstance(warning, ContentTooShort)
assert warning.actual == 2
assert warning.required == 50
def test_content_too_long(self):
"""Test word count validation - too long."""
schema = {
'x-markitect-content-control': {
'synopsis': {
'content_quality': {
'min_words': 5,
'max_words': 20
}
}
}
}
validator = ContentValidator(schema)
# Create mock document with long content
class MockDocument:
def get_section(self, name):
if name == 'SYNOPSIS':
return {
'name': 'SYNOPSIS',
'content': ' '.join(['word'] * 50) # 50 words
}
return None
doc = MockDocument()
result = validator.check(doc)
# Should have warning
assert result.is_valid()
assert result.has_warnings()
warning = result.get_warnings()[0]
assert isinstance(warning, ContentTooLong)
assert warning.actual == 50
assert warning.limit == 20
def test_all_content_requirements_met(self):
"""Test that validation passes when all requirements met."""
schema = {
'x-markitect-content-control': {
'synopsis': {
'required_patterns': [
r'\*\*[a-z]+\*\*'
],
'content_quality': {
'min_words': 5,
'max_words': 50
}
}
}
}
validator = ContentValidator(schema)
# Create valid document
class MockDocument:
def get_section(self, name):
if name == 'SYNOPSIS':
return {
'name': 'SYNOPSIS',
'content': '**command** [options] arguments and more words here'
}
return None
doc = MockDocument()
result = validator.check(doc)
# Should pass
assert result.is_valid()
assert not result.has_errors()
assert not result.has_warnings()
assert len(result.issues) == 0