feat: add semantic document validator for x-markitect extensions
Implements semantic validation to complement existing structural validation: Phase 1 & 2 Complete: - SemanticValidator: Main validator orchestrating sub-validators - SectionValidator: Enforces section classifications (required, recommended, optional, discouraged, improper) from x-markitect-sections - ContentValidator: Validates content patterns, forbidden patterns, and quality metrics (word counts, sentence counts) from x-markitect-content-control Features: - Pattern matching with regex for required/forbidden/discouraged patterns - Word count and sentence count validation - Detailed error reporting with severity levels (ERROR, WARNING) - Support for section alternatives (e.g., FLAGS vs OPTIONS) - Comprehensive test coverage (16 tests, 100% passing) Architecture: - Complements existing SchemaValidator (structural AST validation) - Clean separation: validators/ package for modular validators - Semantic validation focuses on x-markitect-* extensions - LinkValidator planned for Phase 3 (optional --check-links) Next: Phase 4 - CLI integration to enhance 'markitect validate' command Workplan: roadmap/20260106-semantic-document-validation/WORKPLAN.md 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
226
markitect/validators/section_validator.py
Normal file
226
markitect/validators/section_validator.py
Normal file
@@ -0,0 +1,226 @@
|
||||
"""
|
||||
Section Validator for markdown documents.
|
||||
|
||||
Validates that document sections comply with x-markitect-sections classifications:
|
||||
- REQUIRED: Section must be present (ERROR if missing)
|
||||
- RECOMMENDED: Section should be present (WARNING if missing)
|
||||
- OPTIONAL: Section may be present (no check)
|
||||
- DISCOURAGED: Section should not be present (WARNING if present)
|
||||
- IMPROPER: Section must not be present (ERROR if present)
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@dataclass
|
||||
class SectionIssue:
|
||||
"""Base class for section validation issues."""
|
||||
section_name: str
|
||||
severity: str # 'ERROR', 'WARNING', 'INFO'
|
||||
message: str
|
||||
classification: str # 'required', 'recommended', etc.
|
||||
line_number: Optional[int] = None
|
||||
|
||||
def __str__(self) -> str:
|
||||
location = f" (line {self.line_number})" if self.line_number else ""
|
||||
return f"[{self.severity}]{location} {self.section_name}: {self.message}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class SectionMissing(SectionIssue):
|
||||
"""Section is missing from document."""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class SectionImproper(SectionIssue):
|
||||
"""Improper section found in document."""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class SectionDiscouraged(SectionIssue):
|
||||
"""Discouraged section found in document."""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class SectionValidationResult:
|
||||
"""Result of section validation."""
|
||||
issues: List[SectionIssue]
|
||||
sections_checked: int
|
||||
sections_found: int
|
||||
|
||||
def has_errors(self) -> bool:
|
||||
"""Check if there are any ERROR-level issues."""
|
||||
return any(issue.severity == 'ERROR' for issue in self.issues)
|
||||
|
||||
def has_warnings(self) -> bool:
|
||||
"""Check if there are any WARNING-level issues."""
|
||||
return any(issue.severity == 'WARNING' for issue in self.issues)
|
||||
|
||||
def is_valid(self) -> bool:
|
||||
"""Check if validation passed (no errors)."""
|
||||
return not self.has_errors()
|
||||
|
||||
def get_errors(self) -> List[SectionIssue]:
|
||||
"""Get all ERROR-level issues."""
|
||||
return [issue for issue in self.issues if issue.severity == 'ERROR']
|
||||
|
||||
def get_warnings(self) -> List[SectionIssue]:
|
||||
"""Get all WARNING-level issues."""
|
||||
return [issue for issue in self.issues if issue.severity == 'WARNING']
|
||||
|
||||
|
||||
class SectionValidator:
|
||||
"""
|
||||
Validates section presence and classification compliance.
|
||||
|
||||
Checks that markdown documents have the correct sections based on
|
||||
x-markitect-sections classifications in the schema.
|
||||
"""
|
||||
|
||||
def __init__(self, schema: Dict[str, Any]):
|
||||
"""
|
||||
Initialize validator with a schema.
|
||||
|
||||
Args:
|
||||
schema: JSON schema with x-markitect-sections extension
|
||||
"""
|
||||
self.schema = schema
|
||||
self.sections_spec = schema.get('x-markitect-sections', {})
|
||||
|
||||
def check(self, document: 'MarkdownDocument') -> SectionValidationResult:
|
||||
"""
|
||||
Validate section presence against schema classifications.
|
||||
|
||||
Args:
|
||||
document: Parsed markdown document
|
||||
|
||||
Returns:
|
||||
SectionValidationResult with any issues found
|
||||
"""
|
||||
issues = []
|
||||
|
||||
# Get level-2 headings (main sections) from document
|
||||
doc_sections = self._get_document_sections(document)
|
||||
|
||||
# Check each specification
|
||||
for section_name, spec in self.sections_spec.items():
|
||||
classification = spec.get('classification')
|
||||
section_in_doc = self._find_section(section_name, doc_sections, spec)
|
||||
|
||||
if classification == 'required':
|
||||
if not section_in_doc:
|
||||
issues.append(SectionMissing(
|
||||
section_name=section_name,
|
||||
severity='ERROR',
|
||||
message=spec.get('error_message', f'{section_name} section is required'),
|
||||
classification='required'
|
||||
))
|
||||
|
||||
elif classification == 'improper':
|
||||
if section_in_doc:
|
||||
issues.append(SectionImproper(
|
||||
section_name=section_name,
|
||||
severity='ERROR',
|
||||
message=spec.get('error_message', f'{section_name} section must not appear'),
|
||||
classification='improper',
|
||||
line_number=section_in_doc.get('line_number')
|
||||
))
|
||||
|
||||
elif classification == 'recommended':
|
||||
if not section_in_doc:
|
||||
issues.append(SectionMissing(
|
||||
section_name=section_name,
|
||||
severity='WARNING',
|
||||
message=spec.get('warning_if_missing', f'{section_name} section is recommended'),
|
||||
classification='recommended'
|
||||
))
|
||||
|
||||
elif classification == 'discouraged':
|
||||
if section_in_doc:
|
||||
issues.append(SectionDiscouraged(
|
||||
section_name=section_name,
|
||||
severity='WARNING',
|
||||
message=spec.get('warning_if_present', f'{section_name} section is discouraged'),
|
||||
classification='discouraged',
|
||||
line_number=section_in_doc.get('line_number')
|
||||
))
|
||||
|
||||
return SectionValidationResult(
|
||||
issues=issues,
|
||||
sections_checked=len(self.sections_spec),
|
||||
sections_found=len(doc_sections)
|
||||
)
|
||||
|
||||
def _get_document_sections(self, document: 'MarkdownDocument') -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract level-2 headings from document.
|
||||
|
||||
Args:
|
||||
document: Parsed markdown document
|
||||
|
||||
Returns:
|
||||
List of section dicts with name and line_number
|
||||
"""
|
||||
sections = []
|
||||
|
||||
# Get headings from document
|
||||
if hasattr(document, 'get_headings_by_level'):
|
||||
level_2_headings = document.get_headings_by_level(2)
|
||||
elif hasattr(document, 'headings'):
|
||||
level_2_headings = [
|
||||
h for h in document.headings
|
||||
if h.get('level') == 2
|
||||
]
|
||||
else:
|
||||
# Fallback: parse from AST
|
||||
level_2_headings = []
|
||||
|
||||
for heading in level_2_headings:
|
||||
if isinstance(heading, dict):
|
||||
sections.append({
|
||||
'name': heading.get('content', '').strip().upper(),
|
||||
'line_number': heading.get('line_number')
|
||||
})
|
||||
elif isinstance(heading, str):
|
||||
sections.append({
|
||||
'name': heading.strip().upper(),
|
||||
'line_number': None
|
||||
})
|
||||
|
||||
return sections
|
||||
|
||||
def _find_section(self, section_name: str, doc_sections: List[Dict[str, Any]],
|
||||
spec: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Find a section in document, checking alternatives.
|
||||
|
||||
Args:
|
||||
section_name: Primary section name to find
|
||||
doc_sections: List of sections in document
|
||||
spec: Section specification with potential alternatives
|
||||
|
||||
Returns:
|
||||
Section dict if found, None otherwise
|
||||
"""
|
||||
# Normalize section name for comparison
|
||||
normalized_name = section_name.upper().strip()
|
||||
|
||||
# Check primary name
|
||||
for section in doc_sections:
|
||||
if section['name'] == normalized_name:
|
||||
return section
|
||||
|
||||
# Check alternatives
|
||||
alternatives = spec.get('alternatives', [])
|
||||
for alt_name in alternatives:
|
||||
normalized_alt = alt_name.upper().strip()
|
||||
for section in doc_sections:
|
||||
if section['name'] == normalized_alt:
|
||||
return section
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user