feat: add LinkValidator for semantic link validation (Phase 3)
Implement comprehensive link validation as part of semantic validation: Core Features: - Link classification: internal, external, fragment, email - Internal link validation: fragment anchors and file paths - External link validation: HTTP/HTTPS with configurable timeout - Email validation: mailto: link format checking - Fragment policy enforcement: allow/disallow fragment identifiers Link Validator: - markitect/validators/link_validator.py - Full link validation implementation - Supports x-markitect-content-control.link_validation configuration - Default: check internal links, skip external (fast) - Opt-in external checking with --check-links flag Integration: - Updated SemanticValidator to include link_result in reports - CLI already supports --check-links flag (line 1629 in cli.py) - Link validation runs by default for internal links (fast) - External link checking requires explicit --check-links flag Test Coverage: - Added 9 comprehensive tests for LinkValidator - Tests cover: classification, broken links, fragments, email, statistics - All 25 semantic validator tests passing (100%) Documentation: - Updated SCHEMA_MANAGEMENT_GUIDE.md with link validation section - Added examples for broken links and external link checking - Documented link types, validation rules, and configuration Statistics Tracking: - Links checked, internal/external/fragment/email counts - Detailed error/warning reporting with line numbers - Integration with existing semantic validation reporting 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -27,6 +27,13 @@ from markitect.validators.content_validator import (
|
||||
ContentTooShort,
|
||||
ContentTooLong
|
||||
)
|
||||
from markitect.validators.link_validator import (
|
||||
LinkValidator,
|
||||
BrokenInternalLink,
|
||||
BrokenExternalLink,
|
||||
FragmentNotAllowed,
|
||||
InvalidEmail
|
||||
)
|
||||
|
||||
|
||||
class TestSectionValidator:
|
||||
@@ -504,3 +511,251 @@ class TestContentValidator:
|
||||
assert not result.has_errors()
|
||||
assert not result.has_warnings()
|
||||
assert len(result.issues) == 0
|
||||
|
||||
|
||||
class TestLinkValidator:
|
||||
"""Test link validation functionality."""
|
||||
|
||||
def test_link_classification(self):
|
||||
"""Test that links are correctly classified by type."""
|
||||
schema = {'x-markitect-content-control': {}}
|
||||
validator = LinkValidator(schema)
|
||||
|
||||
assert validator._classify_link('http://example.com') == 'external'
|
||||
assert validator._classify_link('https://example.com') == 'external'
|
||||
assert validator._classify_link('//example.com') == 'external'
|
||||
assert validator._classify_link('mailto:test@example.com') == 'email'
|
||||
assert validator._classify_link('#section-name') == 'fragment'
|
||||
assert validator._classify_link('../other-doc.md') == 'internal'
|
||||
assert validator._classify_link('/absolute/path.md') == 'internal'
|
||||
|
||||
def test_broken_internal_link_fragment(self):
|
||||
"""Test detection of broken internal fragment links."""
|
||||
schema = {
|
||||
'x-markitect-content-control': {
|
||||
'link_validation': {
|
||||
'check_internal': True
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
validator = LinkValidator(schema)
|
||||
|
||||
# Create mock document with headings
|
||||
class MockDocument:
|
||||
def get_headings_by_level(self, level):
|
||||
if level == 2:
|
||||
return [
|
||||
{'content': 'Introduction', 'level': 2},
|
||||
{'content': 'Getting Started', 'level': 2}
|
||||
]
|
||||
return []
|
||||
|
||||
def extract_links(self):
|
||||
return [
|
||||
{'url': '#introduction', 'line_number': 10},
|
||||
{'url': '#nonexistent-section', 'line_number': 15}
|
||||
]
|
||||
|
||||
doc = MockDocument()
|
||||
result = validator.check(doc)
|
||||
|
||||
# Should detect broken fragment
|
||||
assert not result.is_valid()
|
||||
assert result.has_errors()
|
||||
assert len(result.get_errors()) == 1
|
||||
|
||||
error = result.get_errors()[0]
|
||||
assert isinstance(error, BrokenInternalLink)
|
||||
assert 'nonexistent-section' in error.link
|
||||
assert error.line_number == 15
|
||||
|
||||
def test_fragment_not_allowed(self):
|
||||
"""Test detection of fragment links when not allowed."""
|
||||
schema = {
|
||||
'x-markitect-content-control': {
|
||||
'link_validation': {
|
||||
'allow_fragments': False
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
validator = LinkValidator(schema)
|
||||
|
||||
# Create mock document with fragment link
|
||||
class MockDocument:
|
||||
def extract_links(self):
|
||||
return [{'url': '#section', 'line_number': 5}]
|
||||
|
||||
doc = MockDocument()
|
||||
result = validator.check(doc)
|
||||
|
||||
# Should have warning
|
||||
assert result.is_valid() # Warnings don't fail
|
||||
assert result.has_warnings()
|
||||
|
||||
warning = result.get_warnings()[0]
|
||||
assert isinstance(warning, FragmentNotAllowed)
|
||||
|
||||
def test_invalid_email(self):
|
||||
"""Test detection of invalid email addresses."""
|
||||
schema = {
|
||||
'x-markitect-content-control': {
|
||||
'link_validation': {
|
||||
'check_email': True
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
validator = LinkValidator(schema)
|
||||
|
||||
# Create mock document with invalid email
|
||||
class MockDocument:
|
||||
def extract_links(self):
|
||||
return [
|
||||
{'url': 'mailto:valid@example.com', 'line_number': 5},
|
||||
{'url': 'mailto:invalid-email', 'line_number': 10}
|
||||
]
|
||||
|
||||
doc = MockDocument()
|
||||
result = validator.check(doc)
|
||||
|
||||
# Should have one warning for invalid email
|
||||
assert result.is_valid() # Email validation uses warnings
|
||||
assert result.has_warnings()
|
||||
assert len(result.get_warnings()) == 1
|
||||
|
||||
warning = result.get_warnings()[0]
|
||||
assert isinstance(warning, InvalidEmail)
|
||||
assert 'invalid-email' in warning.link
|
||||
|
||||
def test_link_extraction_from_content(self):
|
||||
"""Test extraction of links from markdown content."""
|
||||
schema = {'x-markitect-content-control': {}}
|
||||
validator = LinkValidator(schema)
|
||||
|
||||
# Create mock document with raw content
|
||||
class MockDocument:
|
||||
content = """# Test Document
|
||||
|
||||
This is a [link](http://example.com) in text.
|
||||
|
||||
Another [internal link](../docs/other.md).
|
||||
|
||||
Reference style [link][ref].
|
||||
|
||||
[ref]: https://example.org
|
||||
"""
|
||||
|
||||
doc = MockDocument()
|
||||
links = validator._extract_links(doc)
|
||||
|
||||
# Should extract all links
|
||||
assert len(links) == 3
|
||||
urls = [link['url'] for link in links]
|
||||
assert 'http://example.com' in urls
|
||||
assert '../docs/other.md' in urls
|
||||
assert 'https://example.org' in urls
|
||||
|
||||
def test_heading_to_fragment_conversion(self):
|
||||
"""Test conversion of headings to fragment IDs."""
|
||||
schema = {'x-markitect-content-control': {}}
|
||||
validator = LinkValidator(schema)
|
||||
|
||||
# Test various heading formats
|
||||
assert validator._heading_to_fragment_id('Getting Started') == 'getting-started'
|
||||
assert validator._heading_to_fragment_id('API Reference') == 'api-reference'
|
||||
assert validator._heading_to_fragment_id('FAQ (Frequently Asked)') == 'faq-frequently-asked'
|
||||
assert validator._heading_to_fragment_id(' Spaces Around ') == 'spaces-around'
|
||||
|
||||
def test_no_link_validation_when_disabled(self):
|
||||
"""Test that link validation is skipped when all checks disabled."""
|
||||
schema = {
|
||||
'x-markitect-content-control': {
|
||||
'link_validation': {
|
||||
'check_internal': False,
|
||||
'check_external': False,
|
||||
'allow_fragments': True,
|
||||
'check_email': False
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
validator = LinkValidator(schema)
|
||||
|
||||
class MockDocument:
|
||||
def extract_links(self):
|
||||
return [
|
||||
{'url': '#broken-fragment'},
|
||||
{'url': 'http://broken-link.invalid'}
|
||||
]
|
||||
|
||||
doc = MockDocument()
|
||||
result = validator.check(doc)
|
||||
|
||||
# Should skip all validation
|
||||
assert result.is_valid()
|
||||
assert len(result.issues) == 0
|
||||
assert result.links_checked == 0
|
||||
|
||||
def test_external_link_validation_opt_in(self):
|
||||
"""Test that external link validation requires explicit opt-in."""
|
||||
schema = {
|
||||
'x-markitect-content-control': {
|
||||
'link_validation': {
|
||||
'check_external': False # Disabled by default
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
validator = LinkValidator(schema)
|
||||
|
||||
class MockDocument:
|
||||
def extract_links(self):
|
||||
return [{'url': 'http://definitely-broken-12345.invalid'}]
|
||||
|
||||
doc = MockDocument()
|
||||
|
||||
# Without check_external override
|
||||
result = validator.check(doc)
|
||||
assert result.is_valid()
|
||||
assert len(result.issues) == 0
|
||||
|
||||
# With check_external override
|
||||
result = validator.check(doc, check_external=True)
|
||||
# This would check external links (may fail or timeout)
|
||||
# We don't assert on the result since it depends on network
|
||||
|
||||
def test_link_validation_statistics(self):
|
||||
"""Test that link validation tracks statistics."""
|
||||
schema = {
|
||||
'x-markitect-content-control': {
|
||||
'link_validation': {
|
||||
'check_internal': True
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
validator = LinkValidator(schema)
|
||||
|
||||
class MockDocument:
|
||||
def get_headings_by_level(self, level):
|
||||
return []
|
||||
|
||||
def extract_links(self):
|
||||
return [
|
||||
{'url': '#fragment'},
|
||||
{'url': 'http://example.com'},
|
||||
{'url': '../internal.md'},
|
||||
{'url': 'mailto:test@example.com'}
|
||||
]
|
||||
|
||||
doc = MockDocument()
|
||||
result = validator.check(doc)
|
||||
|
||||
# Check statistics
|
||||
assert result.links_checked == 4
|
||||
assert result.fragment_links == 1
|
||||
assert result.external_links == 1
|
||||
assert result.internal_links == 1
|
||||
assert result.email_links == 1
|
||||
|
||||
Reference in New Issue
Block a user