Implements semantic validation to complement existing structural validation: Phase 1 & 2 Complete: - SemanticValidator: Main validator orchestrating sub-validators - SectionValidator: Enforces section classifications (required, recommended, optional, discouraged, improper) from x-markitect-sections - ContentValidator: Validates content patterns, forbidden patterns, and quality metrics (word counts, sentence counts) from x-markitect-content-control Features: - Pattern matching with regex for required/forbidden/discouraged patterns - Word count and sentence count validation - Detailed error reporting with severity levels (ERROR, WARNING) - Support for section alternatives (e.g., FLAGS vs OPTIONS) - Comprehensive test coverage (16 tests, 100% passing) Architecture: - Complements existing SchemaValidator (structural AST validation) - Clean separation: validators/ package for modular validators - Semantic validation focuses on x-markitect-* extensions - LinkValidator planned for Phase 3 (optional --check-links) Next: Phase 4 - CLI integration to enhance 'markitect validate' command Workplan: roadmap/20260106-semantic-document-validation/WORKPLAN.md 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
227 lines
7.6 KiB
Python
227 lines
7.6 KiB
Python
"""
|
|
Section Validator for markdown documents.
|
|
|
|
Validates that document sections comply with x-markitect-sections classifications:
|
|
- REQUIRED: Section must be present (ERROR if missing)
|
|
- RECOMMENDED: Section should be present (WARNING if missing)
|
|
- OPTIONAL: Section may be present (no check)
|
|
- DISCOURAGED: Section should not be present (WARNING if present)
|
|
- IMPROPER: Section must not be present (ERROR if present)
|
|
"""
|
|
|
|
from dataclasses import dataclass
|
|
from typing import List, Dict, Any, Optional
|
|
from pathlib import Path
|
|
|
|
|
|
@dataclass
|
|
class SectionIssue:
|
|
"""Base class for section validation issues."""
|
|
section_name: str
|
|
severity: str # 'ERROR', 'WARNING', 'INFO'
|
|
message: str
|
|
classification: str # 'required', 'recommended', etc.
|
|
line_number: Optional[int] = None
|
|
|
|
def __str__(self) -> str:
|
|
location = f" (line {self.line_number})" if self.line_number else ""
|
|
return f"[{self.severity}]{location} {self.section_name}: {self.message}"
|
|
|
|
|
|
@dataclass
|
|
class SectionMissing(SectionIssue):
|
|
"""Section is missing from document."""
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class SectionImproper(SectionIssue):
|
|
"""Improper section found in document."""
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class SectionDiscouraged(SectionIssue):
|
|
"""Discouraged section found in document."""
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class SectionValidationResult:
|
|
"""Result of section validation."""
|
|
issues: List[SectionIssue]
|
|
sections_checked: int
|
|
sections_found: int
|
|
|
|
def has_errors(self) -> bool:
|
|
"""Check if there are any ERROR-level issues."""
|
|
return any(issue.severity == 'ERROR' for issue in self.issues)
|
|
|
|
def has_warnings(self) -> bool:
|
|
"""Check if there are any WARNING-level issues."""
|
|
return any(issue.severity == 'WARNING' for issue in self.issues)
|
|
|
|
def is_valid(self) -> bool:
|
|
"""Check if validation passed (no errors)."""
|
|
return not self.has_errors()
|
|
|
|
def get_errors(self) -> List[SectionIssue]:
|
|
"""Get all ERROR-level issues."""
|
|
return [issue for issue in self.issues if issue.severity == 'ERROR']
|
|
|
|
def get_warnings(self) -> List[SectionIssue]:
|
|
"""Get all WARNING-level issues."""
|
|
return [issue for issue in self.issues if issue.severity == 'WARNING']
|
|
|
|
|
|
class SectionValidator:
|
|
"""
|
|
Validates section presence and classification compliance.
|
|
|
|
Checks that markdown documents have the correct sections based on
|
|
x-markitect-sections classifications in the schema.
|
|
"""
|
|
|
|
def __init__(self, schema: Dict[str, Any]):
|
|
"""
|
|
Initialize validator with a schema.
|
|
|
|
Args:
|
|
schema: JSON schema with x-markitect-sections extension
|
|
"""
|
|
self.schema = schema
|
|
self.sections_spec = schema.get('x-markitect-sections', {})
|
|
|
|
def check(self, document: 'MarkdownDocument') -> SectionValidationResult:
|
|
"""
|
|
Validate section presence against schema classifications.
|
|
|
|
Args:
|
|
document: Parsed markdown document
|
|
|
|
Returns:
|
|
SectionValidationResult with any issues found
|
|
"""
|
|
issues = []
|
|
|
|
# Get level-2 headings (main sections) from document
|
|
doc_sections = self._get_document_sections(document)
|
|
|
|
# Check each specification
|
|
for section_name, spec in self.sections_spec.items():
|
|
classification = spec.get('classification')
|
|
section_in_doc = self._find_section(section_name, doc_sections, spec)
|
|
|
|
if classification == 'required':
|
|
if not section_in_doc:
|
|
issues.append(SectionMissing(
|
|
section_name=section_name,
|
|
severity='ERROR',
|
|
message=spec.get('error_message', f'{section_name} section is required'),
|
|
classification='required'
|
|
))
|
|
|
|
elif classification == 'improper':
|
|
if section_in_doc:
|
|
issues.append(SectionImproper(
|
|
section_name=section_name,
|
|
severity='ERROR',
|
|
message=spec.get('error_message', f'{section_name} section must not appear'),
|
|
classification='improper',
|
|
line_number=section_in_doc.get('line_number')
|
|
))
|
|
|
|
elif classification == 'recommended':
|
|
if not section_in_doc:
|
|
issues.append(SectionMissing(
|
|
section_name=section_name,
|
|
severity='WARNING',
|
|
message=spec.get('warning_if_missing', f'{section_name} section is recommended'),
|
|
classification='recommended'
|
|
))
|
|
|
|
elif classification == 'discouraged':
|
|
if section_in_doc:
|
|
issues.append(SectionDiscouraged(
|
|
section_name=section_name,
|
|
severity='WARNING',
|
|
message=spec.get('warning_if_present', f'{section_name} section is discouraged'),
|
|
classification='discouraged',
|
|
line_number=section_in_doc.get('line_number')
|
|
))
|
|
|
|
return SectionValidationResult(
|
|
issues=issues,
|
|
sections_checked=len(self.sections_spec),
|
|
sections_found=len(doc_sections)
|
|
)
|
|
|
|
def _get_document_sections(self, document: 'MarkdownDocument') -> List[Dict[str, Any]]:
|
|
"""
|
|
Extract level-2 headings from document.
|
|
|
|
Args:
|
|
document: Parsed markdown document
|
|
|
|
Returns:
|
|
List of section dicts with name and line_number
|
|
"""
|
|
sections = []
|
|
|
|
# Get headings from document
|
|
if hasattr(document, 'get_headings_by_level'):
|
|
level_2_headings = document.get_headings_by_level(2)
|
|
elif hasattr(document, 'headings'):
|
|
level_2_headings = [
|
|
h for h in document.headings
|
|
if h.get('level') == 2
|
|
]
|
|
else:
|
|
# Fallback: parse from AST
|
|
level_2_headings = []
|
|
|
|
for heading in level_2_headings:
|
|
if isinstance(heading, dict):
|
|
sections.append({
|
|
'name': heading.get('content', '').strip().upper(),
|
|
'line_number': heading.get('line_number')
|
|
})
|
|
elif isinstance(heading, str):
|
|
sections.append({
|
|
'name': heading.strip().upper(),
|
|
'line_number': None
|
|
})
|
|
|
|
return sections
|
|
|
|
def _find_section(self, section_name: str, doc_sections: List[Dict[str, Any]],
|
|
spec: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Find a section in document, checking alternatives.
|
|
|
|
Args:
|
|
section_name: Primary section name to find
|
|
doc_sections: List of sections in document
|
|
spec: Section specification with potential alternatives
|
|
|
|
Returns:
|
|
Section dict if found, None otherwise
|
|
"""
|
|
# Normalize section name for comparison
|
|
normalized_name = section_name.upper().strip()
|
|
|
|
# Check primary name
|
|
for section in doc_sections:
|
|
if section['name'] == normalized_name:
|
|
return section
|
|
|
|
# Check alternatives
|
|
alternatives = spec.get('alternatives', [])
|
|
for alt_name in alternatives:
|
|
normalized_alt = alt_name.upper().strip()
|
|
for section in doc_sections:
|
|
if section['name'] == normalized_alt:
|
|
return section
|
|
|
|
return None
|