""" Tests for SemanticValidator. Tests semantic validation of markdown documents against x-markitect extensions. """ import pytest from pathlib import Path import tempfile import json from markitect.semantic_validator import ( SemanticValidator, SemanticValidationReport, load_schema_from_path ) from markitect.validators.section_validator import ( SectionValidator, SectionMissing, SectionImproper ) from markitect.validators.content_validator import ( ContentValidator, PatternMissing, ForbiddenPattern, DiscouragedPattern, ContentTooShort, ContentTooLong ) class TestSectionValidator: """Test section validation functionality.""" def test_required_section_missing(self): """Test that missing required sections are detected as errors.""" schema = { 'x-markitect-sections': { 'SYNOPSIS': { 'classification': 'required', 'heading_level': 2, 'error_message': 'SYNOPSIS section is mandatory' } } } validator = SectionValidator(schema) # Create a mock document without SYNOPSIS class MockDocument: def get_headings_by_level(self, level): return ['DESCRIPTION', 'EXAMPLES'] doc = MockDocument() result = validator.check(doc) # Should have one error assert not result.is_valid() assert result.has_errors() assert len(result.get_errors()) == 1 error = result.get_errors()[0] assert isinstance(error, SectionMissing) assert error.section_name == 'SYNOPSIS' assert error.severity == 'ERROR' assert 'mandatory' in error.message def test_improper_section_present(self): """Test that improper sections are detected as errors.""" schema = { 'x-markitect-sections': { 'INTERNAL_NOTES': { 'classification': 'improper', 'heading_level': 2, 'error_message': 'Internal notes must not appear in published docs' } } } validator = SectionValidator(schema) # Create a mock document with INTERNAL_NOTES class MockDocument: def get_headings_by_level(self, level): return [ { 'content': 'INTERNAL_NOTES', 'level': 2, 'line_number': 25 } ] doc = MockDocument() result = validator.check(doc) # Should have one error assert not result.is_valid() assert result.has_errors() assert len(result.get_errors()) == 1 error = result.get_errors()[0] assert isinstance(error, SectionImproper) assert error.section_name == 'INTERNAL_NOTES' assert error.severity == 'ERROR' assert error.line_number == 25 def test_recommended_section_missing(self): """Test that missing recommended sections generate warnings.""" schema = { 'x-markitect-sections': { 'EXAMPLES': { 'classification': 'recommended', 'heading_level': 2, 'warning_if_missing': 'Examples improve documentation quality' } } } validator = SectionValidator(schema) # Create a mock document without EXAMPLES class MockDocument: def get_headings_by_level(self, level): return ['SYNOPSIS', 'DESCRIPTION'] doc = MockDocument() result = validator.check(doc) # Should pass validation (warnings don't fail) assert result.is_valid() assert not result.has_errors() assert result.has_warnings() assert len(result.get_warnings()) == 1 warning = result.get_warnings()[0] assert warning.section_name == 'EXAMPLES' assert warning.severity == 'WARNING' def test_all_required_sections_present(self): """Test that validation passes when all required sections present.""" schema = { 'x-markitect-sections': { 'SYNOPSIS': { 'classification': 'required', 'heading_level': 2 }, 'DESCRIPTION': { 'classification': 'required', 'heading_level': 2 } } } validator = SectionValidator(schema) # Create a mock document with all required sections class MockDocument: def get_headings_by_level(self, level): return [ {'content': 'SYNOPSIS', 'level': 2}, {'content': 'DESCRIPTION', 'level': 2}, {'content': 'EXAMPLES', 'level': 2} ] doc = MockDocument() result = validator.check(doc) # Should pass assert result.is_valid() assert not result.has_errors() assert not result.has_warnings() assert len(result.issues) == 0 def test_section_alternatives(self): """Test that alternative section names are recognized.""" schema = { 'x-markitect-sections': { 'OPTIONS': { 'classification': 'required', 'heading_level': 2, 'alternatives': ['FLAGS', 'COMMAND OPTIONS'] } } } validator = SectionValidator(schema) # Document uses alternative name 'FLAGS' class MockDocument: def get_headings_by_level(self, level): return [{'content': 'FLAGS', 'level': 2}] doc = MockDocument() result = validator.check(doc) # Should pass (alternative is accepted) assert result.is_valid() assert not result.has_errors() class TestSemanticValidator: """Test complete semantic validation.""" def test_validator_initialization(self): """Test that validator initializes correctly.""" schema = { '$schema': 'http://json-schema.org/draft-07/schema#', 'x-markitect-sections': { 'SYNOPSIS': {'classification': 'required', 'heading_level': 2} } } validator = SemanticValidator(schema) assert validator.schema == schema assert validator.section_validator is not None def test_validation_report_formatting(self): """Test that validation reports format correctly.""" from markitect.validators.section_validator import ( SectionValidationResult, SectionMissing ) section_result = SectionValidationResult( issues=[ SectionMissing( section_name='SYNOPSIS', severity='ERROR', message='SYNOPSIS is required', classification='required' ) ], sections_checked=2, sections_found=1 ) report = SemanticValidationReport(section_result=section_result) # Check report properties assert report.has_errors() assert not report.is_valid() # Check text formatting text = report.format_text() assert 'Section Validation:' in text assert 'SYNOPSIS' in text assert 'Errors: 1' in text assert 'FAILED' in text def test_load_json_schema(self, tmp_path): """Test loading a JSON schema file.""" schema_file = tmp_path / "test-schema.json" schema_data = { '$schema': 'http://json-schema.org/draft-07/schema#', 'title': 'Test Schema', 'x-markitect-sections': { 'SYNOPSIS': {'classification': 'required', 'heading_level': 2} } } schema_file.write_text(json.dumps(schema_data, indent=2)) loaded_schema = load_schema_from_path(schema_file) assert loaded_schema == schema_data assert 'x-markitect-sections' in loaded_schema def test_schema_not_found(self): """Test that missing schema file raises error.""" with pytest.raises(FileNotFoundError): load_schema_from_path('/nonexistent/schema.json') def test_unsupported_schema_format(self, tmp_path): """Test that unsupported format raises error.""" schema_file = tmp_path / "schema.xml" schema_file.write_text('') with pytest.raises(ValueError, match="Unsupported schema format"): load_schema_from_path(schema_file) class TestContentValidator: """Test content validation functionality.""" def test_required_pattern_missing(self): """Test that missing required patterns are detected.""" schema = { 'x-markitect-content-control': { 'synopsis': { 'required_patterns': [ r'\*\*[a-z][a-z0-9-]*\*\*' # Bold command name ] } } } validator = ContentValidator(schema) # Create mock document without bold command class MockDocument: def get_section(self, name): if name == 'SYNOPSIS': return { 'name': 'SYNOPSIS', 'content': 'command [options] arguments' # No bold } return None doc = MockDocument() result = validator.check(doc) # Should have one error assert not result.is_valid() assert result.has_errors() assert len(result.get_errors()) == 1 error = result.get_errors()[0] assert isinstance(error, PatternMissing) assert error.section_name == 'SYNOPSIS' assert error.severity == 'ERROR' def test_forbidden_pattern_found(self): """Test that forbidden patterns are detected.""" schema = { 'x-markitect-content-control': { 'description': { 'forbidden_patterns': [ r'\bTODO\b', r'\bFIXME\b' ] } } } validator = ContentValidator(schema) # Create mock document with forbidden pattern class MockDocument: def get_section(self, name): if name == 'DESCRIPTION': return { 'name': 'DESCRIPTION', 'content': 'This is a description. TODO: Add more details.' } return None doc = MockDocument() result = validator.check(doc) # Should have one error assert not result.is_valid() assert result.has_errors() assert len(result.get_errors()) == 1 error = result.get_errors()[0] assert isinstance(error, ForbiddenPattern) assert error.section_name == 'DESCRIPTION' assert 'TODO' in error.matched_text def test_discouraged_pattern_warning(self): """Test that discouraged patterns generate warnings.""" schema = { 'x-markitect-content-control': { 'description': { 'discouraged_patterns': [ r'\bWIP\b' ] } } } validator = ContentValidator(schema) # Create mock document with discouraged pattern class MockDocument: def get_section(self, name): if name == 'DESCRIPTION': return { 'name': 'DESCRIPTION', 'content': 'This is WIP content.' } return None doc = MockDocument() result = validator.check(doc) # Should pass (warnings don't fail) assert result.is_valid() assert not result.has_errors() assert result.has_warnings() warning = result.get_warnings()[0] assert isinstance(warning, DiscouragedPattern) assert warning.severity == 'WARNING' def test_content_too_short(self): """Test word count validation - too short.""" schema = { 'x-markitect-content-control': { 'description': { 'content_quality': { 'min_words': 50, 'max_words': 1000 } } } } validator = ContentValidator(schema) # Create mock document with short content class MockDocument: def get_section(self, name): if name == 'DESCRIPTION': return { 'name': 'DESCRIPTION', 'content': 'Short description.' # Only 2 words } return None doc = MockDocument() result = validator.check(doc) # Should have warning assert result.is_valid() # Warnings don't fail assert result.has_warnings() warning = result.get_warnings()[0] assert isinstance(warning, ContentTooShort) assert warning.actual == 2 assert warning.required == 50 def test_content_too_long(self): """Test word count validation - too long.""" schema = { 'x-markitect-content-control': { 'synopsis': { 'content_quality': { 'min_words': 5, 'max_words': 20 } } } } validator = ContentValidator(schema) # Create mock document with long content class MockDocument: def get_section(self, name): if name == 'SYNOPSIS': return { 'name': 'SYNOPSIS', 'content': ' '.join(['word'] * 50) # 50 words } return None doc = MockDocument() result = validator.check(doc) # Should have warning assert result.is_valid() assert result.has_warnings() warning = result.get_warnings()[0] assert isinstance(warning, ContentTooLong) assert warning.actual == 50 assert warning.limit == 20 def test_all_content_requirements_met(self): """Test that validation passes when all requirements met.""" schema = { 'x-markitect-content-control': { 'synopsis': { 'required_patterns': [ r'\*\*[a-z]+\*\*' ], 'content_quality': { 'min_words': 5, 'max_words': 50 } } } } validator = ContentValidator(schema) # Create valid document class MockDocument: def get_section(self, name): if name == 'SYNOPSIS': return { 'name': 'SYNOPSIS', 'content': '**command** [options] arguments and more words here' } return None doc = MockDocument() result = validator.check(doc) # Should pass assert result.is_valid() assert not result.has_errors() assert not result.has_warnings() assert len(result.issues) == 0