""" Tests for SemanticValidator. Tests semantic validation of markdown documents against x-markitect extensions. """ import pytest from pathlib import Path import tempfile import json from markitect.semantic_validator import ( SemanticValidator, SemanticValidationReport, load_schema_from_path ) from markitect.validators.section_validator import ( SectionValidator, SectionMissing, SectionImproper ) from markitect.validators.content_validator import ( ContentValidator, PatternMissing, ForbiddenPattern, DiscouragedPattern, ContentTooShort, ContentTooLong ) from markitect.validators.link_validator import ( LinkValidator, BrokenInternalLink, BrokenExternalLink, FragmentNotAllowed, InvalidEmail ) class TestSectionValidator: """Test section validation functionality.""" def test_required_section_missing(self): """Test that missing required sections are detected as errors.""" schema = { 'x-markitect-sections': { 'SYNOPSIS': { 'classification': 'required', 'heading_level': 2, 'error_message': 'SYNOPSIS section is mandatory' } } } validator = SectionValidator(schema) # Create a mock document without SYNOPSIS class MockDocument: def get_headings_by_level(self, level): return ['DESCRIPTION', 'EXAMPLES'] doc = MockDocument() result = validator.check(doc) # Should have one error assert not result.is_valid() assert result.has_errors() assert len(result.get_errors()) == 1 error = result.get_errors()[0] assert isinstance(error, SectionMissing) assert error.section_name == 'SYNOPSIS' assert error.severity == 'ERROR' assert 'mandatory' in error.message def test_improper_section_present(self): """Test that improper sections are detected as errors.""" schema = { 'x-markitect-sections': { 'INTERNAL_NOTES': { 'classification': 'improper', 'heading_level': 2, 'error_message': 'Internal notes must not appear in published docs' } } } validator = SectionValidator(schema) # Create a mock document with INTERNAL_NOTES class MockDocument: def get_headings_by_level(self, level): return [ { 'content': 'INTERNAL_NOTES', 'level': 2, 'line_number': 25 } ] doc = MockDocument() result = validator.check(doc) # Should have one error assert not result.is_valid() assert result.has_errors() assert len(result.get_errors()) == 1 error = result.get_errors()[0] assert isinstance(error, SectionImproper) assert error.section_name == 'INTERNAL_NOTES' assert error.severity == 'ERROR' assert error.line_number == 25 def test_recommended_section_missing(self): """Test that missing recommended sections generate warnings.""" schema = { 'x-markitect-sections': { 'EXAMPLES': { 'classification': 'recommended', 'heading_level': 2, 'warning_if_missing': 'Examples improve documentation quality' } } } validator = SectionValidator(schema) # Create a mock document without EXAMPLES class MockDocument: def get_headings_by_level(self, level): return ['SYNOPSIS', 'DESCRIPTION'] doc = MockDocument() result = validator.check(doc) # Should pass validation (warnings don't fail) assert result.is_valid() assert not result.has_errors() assert result.has_warnings() assert len(result.get_warnings()) == 1 warning = result.get_warnings()[0] assert warning.section_name == 'EXAMPLES' assert warning.severity == 'WARNING' def test_all_required_sections_present(self): """Test that validation passes when all required sections present.""" schema = { 'x-markitect-sections': { 'SYNOPSIS': { 'classification': 'required', 'heading_level': 2 }, 'DESCRIPTION': { 'classification': 'required', 'heading_level': 2 } } } validator = SectionValidator(schema) # Create a mock document with all required sections class MockDocument: def get_headings_by_level(self, level): return [ {'content': 'SYNOPSIS', 'level': 2}, {'content': 'DESCRIPTION', 'level': 2}, {'content': 'EXAMPLES', 'level': 2} ] doc = MockDocument() result = validator.check(doc) # Should pass assert result.is_valid() assert not result.has_errors() assert not result.has_warnings() assert len(result.issues) == 0 def test_section_alternatives(self): """Test that alternative section names are recognized.""" schema = { 'x-markitect-sections': { 'OPTIONS': { 'classification': 'required', 'heading_level': 2, 'alternatives': ['FLAGS', 'COMMAND OPTIONS'] } } } validator = SectionValidator(schema) # Document uses alternative name 'FLAGS' class MockDocument: def get_headings_by_level(self, level): return [{'content': 'FLAGS', 'level': 2}] doc = MockDocument() result = validator.check(doc) # Should pass (alternative is accepted) assert result.is_valid() assert not result.has_errors() class TestSemanticValidator: """Test complete semantic validation.""" def test_validator_initialization(self): """Test that validator initializes correctly.""" schema = { '$schema': 'http://json-schema.org/draft-07/schema#', 'x-markitect-sections': { 'SYNOPSIS': {'classification': 'required', 'heading_level': 2} } } validator = SemanticValidator(schema) assert validator.schema == schema assert validator.section_validator is not None def test_validation_report_formatting(self): """Test that validation reports format correctly.""" from markitect.validators.section_validator import ( SectionValidationResult, SectionMissing ) section_result = SectionValidationResult( issues=[ SectionMissing( section_name='SYNOPSIS', severity='ERROR', message='SYNOPSIS is required', classification='required' ) ], sections_checked=2, sections_found=1 ) report = SemanticValidationReport(section_result=section_result) # Check report properties assert report.has_errors() assert not report.is_valid() # Check text formatting text = report.format_text() assert 'Section Validation:' in text assert 'SYNOPSIS' in text assert 'Errors: 1' in text assert 'FAILED' in text def test_load_json_schema(self, tmp_path): """Test loading a JSON schema file.""" schema_file = tmp_path / "test-schema.json" schema_data = { '$schema': 'http://json-schema.org/draft-07/schema#', 'title': 'Test Schema', 'x-markitect-sections': { 'SYNOPSIS': {'classification': 'required', 'heading_level': 2} } } schema_file.write_text(json.dumps(schema_data, indent=2)) loaded_schema = load_schema_from_path(schema_file) assert loaded_schema == schema_data assert 'x-markitect-sections' in loaded_schema def test_schema_not_found(self): """Test that missing schema file raises error.""" with pytest.raises(FileNotFoundError): load_schema_from_path('/nonexistent/schema.json') def test_unsupported_schema_format(self, tmp_path): """Test that unsupported format raises error.""" schema_file = tmp_path / "schema.xml" schema_file.write_text('') with pytest.raises(ValueError, match="Unsupported schema format"): load_schema_from_path(schema_file) class TestContentValidator: """Test content validation functionality.""" def test_required_pattern_missing(self): """Test that missing required patterns are detected.""" schema = { 'x-markitect-content-control': { 'synopsis': { 'required_patterns': [ r'\*\*[a-z][a-z0-9-]*\*\*' # Bold command name ] } } } validator = ContentValidator(schema) # Create mock document without bold command class MockDocument: def get_section(self, name): if name == 'SYNOPSIS': return { 'name': 'SYNOPSIS', 'content': 'command [options] arguments' # No bold } return None doc = MockDocument() result = validator.check(doc) # Should have one error assert not result.is_valid() assert result.has_errors() assert len(result.get_errors()) == 1 error = result.get_errors()[0] assert isinstance(error, PatternMissing) assert error.section_name == 'SYNOPSIS' assert error.severity == 'ERROR' def test_forbidden_pattern_found(self): """Test that forbidden patterns are detected.""" schema = { 'x-markitect-content-control': { 'description': { 'forbidden_patterns': [ r'\bTODO\b', r'\bFIXME\b' ] } } } validator = ContentValidator(schema) # Create mock document with forbidden pattern class MockDocument: def get_section(self, name): if name == 'DESCRIPTION': return { 'name': 'DESCRIPTION', 'content': 'This is a description. TODO: Add more details.' } return None doc = MockDocument() result = validator.check(doc) # Should have one error assert not result.is_valid() assert result.has_errors() assert len(result.get_errors()) == 1 error = result.get_errors()[0] assert isinstance(error, ForbiddenPattern) assert error.section_name == 'DESCRIPTION' assert 'TODO' in error.matched_text def test_discouraged_pattern_warning(self): """Test that discouraged patterns generate warnings.""" schema = { 'x-markitect-content-control': { 'description': { 'discouraged_patterns': [ r'\bWIP\b' ] } } } validator = ContentValidator(schema) # Create mock document with discouraged pattern class MockDocument: def get_section(self, name): if name == 'DESCRIPTION': return { 'name': 'DESCRIPTION', 'content': 'This is WIP content.' } return None doc = MockDocument() result = validator.check(doc) # Should pass (warnings don't fail) assert result.is_valid() assert not result.has_errors() assert result.has_warnings() warning = result.get_warnings()[0] assert isinstance(warning, DiscouragedPattern) assert warning.severity == 'WARNING' def test_content_too_short(self): """Test word count validation - too short.""" schema = { 'x-markitect-content-control': { 'description': { 'content_quality': { 'min_words': 50, 'max_words': 1000 } } } } validator = ContentValidator(schema) # Create mock document with short content class MockDocument: def get_section(self, name): if name == 'DESCRIPTION': return { 'name': 'DESCRIPTION', 'content': 'Short description.' # Only 2 words } return None doc = MockDocument() result = validator.check(doc) # Should have warning assert result.is_valid() # Warnings don't fail assert result.has_warnings() warning = result.get_warnings()[0] assert isinstance(warning, ContentTooShort) assert warning.actual == 2 assert warning.required == 50 def test_content_too_long(self): """Test word count validation - too long.""" schema = { 'x-markitect-content-control': { 'synopsis': { 'content_quality': { 'min_words': 5, 'max_words': 20 } } } } validator = ContentValidator(schema) # Create mock document with long content class MockDocument: def get_section(self, name): if name == 'SYNOPSIS': return { 'name': 'SYNOPSIS', 'content': ' '.join(['word'] * 50) # 50 words } return None doc = MockDocument() result = validator.check(doc) # Should have warning assert result.is_valid() assert result.has_warnings() warning = result.get_warnings()[0] assert isinstance(warning, ContentTooLong) assert warning.actual == 50 assert warning.limit == 20 def test_all_content_requirements_met(self): """Test that validation passes when all requirements met.""" schema = { 'x-markitect-content-control': { 'synopsis': { 'required_patterns': [ r'\*\*[a-z]+\*\*' ], 'content_quality': { 'min_words': 5, 'max_words': 50 } } } } validator = ContentValidator(schema) # Create valid document class MockDocument: def get_section(self, name): if name == 'SYNOPSIS': return { 'name': 'SYNOPSIS', 'content': '**command** [options] arguments and more words here' } return None doc = MockDocument() result = validator.check(doc) # Should pass assert result.is_valid() assert not result.has_errors() assert not result.has_warnings() assert len(result.issues) == 0 class TestLinkValidator: """Test link validation functionality.""" def test_link_classification(self): """Test that links are correctly classified by type.""" schema = {'x-markitect-content-control': {}} validator = LinkValidator(schema) assert validator._classify_link('http://example.com') == 'external' assert validator._classify_link('https://example.com') == 'external' assert validator._classify_link('//example.com') == 'external' assert validator._classify_link('mailto:test@example.com') == 'email' assert validator._classify_link('#section-name') == 'fragment' assert validator._classify_link('../other-doc.md') == 'internal' assert validator._classify_link('/absolute/path.md') == 'internal' def test_broken_internal_link_fragment(self): """Test detection of broken internal fragment links.""" schema = { 'x-markitect-content-control': { 'link_validation': { 'check_internal': True } } } validator = LinkValidator(schema) # Create mock document with headings class MockDocument: def get_headings_by_level(self, level): if level == 2: return [ {'content': 'Introduction', 'level': 2}, {'content': 'Getting Started', 'level': 2} ] return [] def extract_links(self): return [ {'url': '#introduction', 'line_number': 10}, {'url': '#nonexistent-section', 'line_number': 15} ] doc = MockDocument() result = validator.check(doc) # Should detect broken fragment assert not result.is_valid() assert result.has_errors() assert len(result.get_errors()) == 1 error = result.get_errors()[0] assert isinstance(error, BrokenInternalLink) assert 'nonexistent-section' in error.link assert error.line_number == 15 def test_fragment_not_allowed(self): """Test detection of fragment links when not allowed.""" schema = { 'x-markitect-content-control': { 'link_validation': { 'allow_fragments': False } } } validator = LinkValidator(schema) # Create mock document with fragment link class MockDocument: def extract_links(self): return [{'url': '#section', 'line_number': 5}] doc = MockDocument() result = validator.check(doc) # Should have warning assert result.is_valid() # Warnings don't fail assert result.has_warnings() warning = result.get_warnings()[0] assert isinstance(warning, FragmentNotAllowed) def test_invalid_email(self): """Test detection of invalid email addresses.""" schema = { 'x-markitect-content-control': { 'link_validation': { 'check_email': True } } } validator = LinkValidator(schema) # Create mock document with invalid email class MockDocument: def extract_links(self): return [ {'url': 'mailto:valid@example.com', 'line_number': 5}, {'url': 'mailto:invalid-email', 'line_number': 10} ] doc = MockDocument() result = validator.check(doc) # Should have one warning for invalid email assert result.is_valid() # Email validation uses warnings assert result.has_warnings() assert len(result.get_warnings()) == 1 warning = result.get_warnings()[0] assert isinstance(warning, InvalidEmail) assert 'invalid-email' in warning.link def test_link_extraction_from_content(self): """Test extraction of links from markdown content.""" schema = {'x-markitect-content-control': {}} validator = LinkValidator(schema) # Create mock document with raw content class MockDocument: content = """# Test Document This is a [link](http://example.com) in text. Another [internal link](../docs/other.md). Reference style [link][ref]. [ref]: https://example.org """ doc = MockDocument() links = validator._extract_links(doc) # Should extract all links assert len(links) == 3 urls = [link['url'] for link in links] assert 'http://example.com' in urls assert '../docs/other.md' in urls assert 'https://example.org' in urls def test_heading_to_fragment_conversion(self): """Test conversion of headings to fragment IDs.""" schema = {'x-markitect-content-control': {}} validator = LinkValidator(schema) # Test various heading formats assert validator._heading_to_fragment_id('Getting Started') == 'getting-started' assert validator._heading_to_fragment_id('API Reference') == 'api-reference' assert validator._heading_to_fragment_id('FAQ (Frequently Asked)') == 'faq-frequently-asked' assert validator._heading_to_fragment_id(' Spaces Around ') == 'spaces-around' def test_no_link_validation_when_disabled(self): """Test that link validation is skipped when all checks disabled.""" schema = { 'x-markitect-content-control': { 'link_validation': { 'check_internal': False, 'check_external': False, 'allow_fragments': True, 'check_email': False } } } validator = LinkValidator(schema) class MockDocument: def extract_links(self): return [ {'url': '#broken-fragment'}, {'url': 'http://broken-link.invalid'} ] doc = MockDocument() result = validator.check(doc) # Should skip all validation assert result.is_valid() assert len(result.issues) == 0 assert result.links_checked == 0 def test_external_link_validation_opt_in(self): """Test that external link validation requires explicit opt-in.""" schema = { 'x-markitect-content-control': { 'link_validation': { 'check_external': False # Disabled by default } } } validator = LinkValidator(schema) class MockDocument: def extract_links(self): return [{'url': 'http://definitely-broken-12345.invalid'}] doc = MockDocument() # Without check_external override result = validator.check(doc) assert result.is_valid() assert len(result.issues) == 0 # With check_external override result = validator.check(doc, check_external=True) # This would check external links (may fail or timeout) # We don't assert on the result since it depends on network def test_link_validation_statistics(self): """Test that link validation tracks statistics.""" schema = { 'x-markitect-content-control': { 'link_validation': { 'check_internal': True } } } validator = LinkValidator(schema) class MockDocument: def get_headings_by_level(self, level): return [] def extract_links(self): return [ {'url': '#fragment'}, {'url': 'http://example.com'}, {'url': '../internal.md'}, {'url': 'mailto:test@example.com'} ] doc = MockDocument() result = validator.check(doc) # Check statistics assert result.links_checked == 4 assert result.fragment_links == 1 assert result.external_links == 1 assert result.internal_links == 1 assert result.email_links == 1