This commit closes the schema-evolution topic (260105) by adding the final deliverable (ADR schema) and fixing markdown schema support across commands. **ADR Schema Created**: - Comprehensive Architecture Decision Record validation schema - 12 section classifications (7 required, 2 recommended, 2 optional, 3 improper/discouraged) - Content pattern validation for ADR formatting rules (status dates, decision statements, rationale structure) - Quality metrics for completeness (word counts, sentence counts) - Follows title case naming convention (Status, Context, Decision, etc.) **Markdown Schema Support Fixed**: - Fixed `markitect validate` command to support .md schemas - Added load_schema_from_path() for both .json and .md files - Updated structural and semantic validation to use schema dict - Fixed `markitect generate-stub` command to support .md schemas - Uses load_schema_from_path() instead of direct JSON loading - Created DocumentWrapper class in semantic_validator.py - Extracts headings from AST tokens (heading_open, inline) - Provides get_headings_by_level() interface expected by validators - Enables section validation to work with real documents **Topic Closure**: - Updated SCHEMA_EVOLUTION_WORKPLAN.md with completion summary - Phases 1-3: 100% complete (via Schema-of-Schemas and Semantic Validation) - Phase 4: Deferred as future enhancement (15-20 sessions) - Phase 5: 70% complete (docs done, CI/CD templates deferred) - Created DONE.md with comprehensive task checklist - Generated ADR template stub (examples/templates/adr-template.md) - Moved topic from roadmap/ to history/260105-schema-evolution/ **Files Changed**: - markitect/cli.py: Added markdown schema support to validate and generate-stub - markitect/semantic_validator.py: Added DocumentWrapper class for AST parsing - markitect/schemas/adr-schema-v1.0.md: New ADR validation schema (560 lines) - examples/templates/adr-template.md: Generated ADR template stub - history/260105-schema-evolution/: Moved completed topic to history **Status**: Schema evolution topic successfully closed with ADR schema as final deliverable. All schema commands now support markdown schemas. Section validation working correctly. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
352 lines
12 KiB
Python
352 lines
12 KiB
Python
"""
|
|
Semantic Validator for markdown documents.
|
|
|
|
Validates markdown documents against x-markitect schema extensions:
|
|
- x-markitect-sections: Section classifications (required, recommended, etc.)
|
|
- x-markitect-content-control: Content patterns and quality metrics
|
|
- Link validation: Internal and external link checking
|
|
|
|
Complements the existing SchemaValidator which handles structural AST validation.
|
|
"""
|
|
|
|
from dataclasses import dataclass
|
|
from typing import List, Dict, Any, Optional
|
|
from pathlib import Path
|
|
import json
|
|
|
|
from markitect.validators.section_validator import (
|
|
SectionValidator,
|
|
SectionValidationResult
|
|
)
|
|
from markitect.validators.content_validator import (
|
|
ContentValidator,
|
|
ContentValidationResult
|
|
)
|
|
from markitect.validators.link_validator import (
|
|
LinkValidator,
|
|
LinkValidationResult
|
|
)
|
|
|
|
|
|
class DocumentWrapper:
|
|
"""
|
|
Wrapper for document dict to provide expected interface for validators.
|
|
|
|
Extracts headings from AST and provides get_headings_by_level() method.
|
|
"""
|
|
|
|
def __init__(self, doc_dict: Dict[str, Any]):
|
|
"""Initialize wrapper with document dict from DocumentManager."""
|
|
self.doc_dict = doc_dict
|
|
self._headings_cache = None
|
|
self._extract_headings()
|
|
|
|
def _extract_headings(self):
|
|
"""Extract headings from AST and cache them."""
|
|
ast = self.doc_dict.get('ast', [])
|
|
headings = []
|
|
|
|
# Parse AST tokens to find headings
|
|
# AST format: heading_open, inline (with content), heading_close
|
|
i = 0
|
|
while i < len(ast):
|
|
token = ast[i]
|
|
if isinstance(token, dict) and token.get('type') == 'heading_open':
|
|
level_str = token.get('tag', 'h1')[1:] # 'h2' -> '2'
|
|
level = int(level_str) if level_str.isdigit() else 1
|
|
|
|
# Next token should be inline with heading content
|
|
if i + 1 < len(ast) and ast[i + 1].get('type') == 'inline':
|
|
content = ast[i + 1].get('content', '')
|
|
line_number = token.get('map', [0])[0] + 1 if token.get('map') else None
|
|
|
|
headings.append({
|
|
'content': content,
|
|
'level': level,
|
|
'line_number': line_number
|
|
})
|
|
i += 1
|
|
|
|
self._headings_cache = headings
|
|
|
|
def get_headings_by_level(self, level: int) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get headings at specified level.
|
|
|
|
Args:
|
|
level: Heading level (1-6)
|
|
|
|
Returns:
|
|
List of heading dicts with 'content', 'level', 'line_number'
|
|
"""
|
|
if self._headings_cache is None:
|
|
self._extract_headings()
|
|
|
|
return [h for h in self._headings_cache if h.get('level') == level]
|
|
|
|
@property
|
|
def headings(self) -> List[Dict[str, Any]]:
|
|
"""Get all headings."""
|
|
if self._headings_cache is None:
|
|
self._extract_headings()
|
|
return self._headings_cache
|
|
|
|
def __getitem__(self, key):
|
|
"""Allow dict-like access for compatibility."""
|
|
return self.doc_dict[key]
|
|
|
|
def get(self, key, default=None):
|
|
"""Allow dict-like get for compatibility."""
|
|
return self.doc_dict.get(key, default)
|
|
|
|
|
|
@dataclass
|
|
class SemanticValidationReport:
|
|
"""
|
|
Report of semantic validation results.
|
|
|
|
Combines results from section, content, and link validators.
|
|
"""
|
|
section_result: SectionValidationResult
|
|
content_result: Optional[ContentValidationResult] = None
|
|
link_result: Optional[LinkValidationResult] = None
|
|
|
|
def has_errors(self) -> bool:
|
|
"""Check if there are any ERROR-level issues."""
|
|
errors = self.section_result.has_errors()
|
|
|
|
if self.content_result and hasattr(self.content_result, 'has_errors'):
|
|
errors = errors or self.content_result.has_errors()
|
|
|
|
if self.link_result and hasattr(self.link_result, 'has_errors'):
|
|
errors = errors or self.link_result.has_errors()
|
|
|
|
return errors
|
|
|
|
def has_warnings(self) -> bool:
|
|
"""Check if there are any WARNING-level issues."""
|
|
warnings = self.section_result.has_warnings()
|
|
|
|
if self.content_result and hasattr(self.content_result, 'has_warnings'):
|
|
warnings = warnings or self.content_result.has_warnings()
|
|
|
|
if self.link_result and hasattr(self.link_result, 'has_warnings'):
|
|
warnings = warnings or self.link_result.has_warnings()
|
|
|
|
return warnings
|
|
|
|
def is_valid(self) -> bool:
|
|
"""Check if validation passed (no errors)."""
|
|
return not self.has_errors()
|
|
|
|
def get_all_issues(self) -> List[Any]:
|
|
"""Get all issues from all validators."""
|
|
issues = list(self.section_result.issues)
|
|
|
|
if self.content_result and hasattr(self.content_result, 'issues'):
|
|
issues.extend(self.content_result.issues)
|
|
|
|
if self.link_result and hasattr(self.link_result, 'issues'):
|
|
issues.extend(self.link_result.issues)
|
|
|
|
return issues
|
|
|
|
def format_text(self) -> str:
|
|
"""Format validation report as text."""
|
|
lines = []
|
|
|
|
# Section validation
|
|
lines.append("Section Validation:")
|
|
if self.section_result.issues:
|
|
for issue in self.section_result.issues:
|
|
status = "❌" if issue.severity == 'ERROR' else "⚠️"
|
|
lines.append(f" {status} {issue.section_name} - {issue.message}")
|
|
else:
|
|
lines.append(" ✅ All section requirements met")
|
|
|
|
# Content validation
|
|
if self.content_result:
|
|
lines.append("")
|
|
lines.append("Content Validation:")
|
|
if self.content_result.issues:
|
|
for issue in self.content_result.issues:
|
|
status = "❌" if issue.severity == 'ERROR' else "⚠️"
|
|
lines.append(f" {status} {issue.section_name} - {issue.message}")
|
|
else:
|
|
lines.append(" ✅ All content requirements met")
|
|
|
|
# Link validation
|
|
if self.link_result:
|
|
lines.append("")
|
|
lines.append("Link Validation:")
|
|
if self.link_result.issues:
|
|
for issue in self.link_result.issues:
|
|
status = "❌" if issue.severity == 'ERROR' else "⚠️"
|
|
lines.append(f" {status} {issue.link} - {issue.message}")
|
|
else:
|
|
lines.append(f" ✅ All {self.link_result.links_checked} links valid")
|
|
|
|
# Summary
|
|
lines.append("")
|
|
lines.append("Summary:")
|
|
lines.append(f" Sections checked: {self.section_result.sections_checked}")
|
|
lines.append(f" Sections found: {self.section_result.sections_found}")
|
|
|
|
all_errors = self.section_result.get_errors()
|
|
all_warnings = self.section_result.get_warnings()
|
|
|
|
if self.content_result:
|
|
all_errors.extend(self.content_result.get_errors())
|
|
all_warnings.extend(self.content_result.get_warnings())
|
|
|
|
if self.link_result:
|
|
all_errors.extend(self.link_result.get_errors())
|
|
all_warnings.extend(self.link_result.get_warnings())
|
|
|
|
lines.append(f" Errors: {len(all_errors)}")
|
|
lines.append(f" Warnings: {len(all_warnings)}")
|
|
|
|
if self.is_valid():
|
|
lines.append(" Status: PASSED ✅")
|
|
else:
|
|
lines.append(" Status: FAILED ❌")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
class SemanticValidator:
|
|
"""
|
|
Validates markdown documents against x-markitect extensions.
|
|
|
|
Complements existing SchemaValidator which handles structural AST validation.
|
|
This validator checks semantic aspects defined in x-markitect-* extensions.
|
|
|
|
Example:
|
|
>>> schema = load_schema('manpage-schema-v1.0.md')
|
|
>>> validator = SemanticValidator(schema)
|
|
>>> report = validator.validate('my-command.1.md')
|
|
>>> if not report.is_valid():
|
|
... print(report.format_text())
|
|
"""
|
|
|
|
def __init__(self, schema: Dict[str, Any]):
|
|
"""
|
|
Initialize semantic validator with a schema.
|
|
|
|
Args:
|
|
schema: JSON schema with x-markitect-* extensions
|
|
|
|
The schema can be either:
|
|
- A dict loaded from JSON
|
|
- A dict loaded from markdown with embedded JSON
|
|
- Must contain x-markitect-sections and/or x-markitect-content-control
|
|
"""
|
|
self.schema = schema
|
|
|
|
# Initialize sub-validators
|
|
self.section_validator = SectionValidator(schema)
|
|
self.content_validator = ContentValidator(schema)
|
|
self.link_validator = LinkValidator(schema)
|
|
|
|
def validate(self, document_path: str | Path,
|
|
check_links: bool = False) -> SemanticValidationReport:
|
|
"""
|
|
Validate a markdown document against schema extensions.
|
|
|
|
Args:
|
|
document_path: Path to markdown document to validate
|
|
check_links: Whether to validate links (may be slow)
|
|
|
|
Returns:
|
|
SemanticValidationReport with validation results
|
|
|
|
Raises:
|
|
FileNotFoundError: If document_path doesn't exist
|
|
ValueError: If document cannot be parsed
|
|
"""
|
|
document_path = Path(document_path)
|
|
|
|
if not document_path.exists():
|
|
raise FileNotFoundError(f"Document not found: {document_path}")
|
|
|
|
# Parse document
|
|
document = self._parse_document(document_path)
|
|
|
|
# Run section validation
|
|
section_result = self.section_validator.check(document)
|
|
|
|
# Run content validation
|
|
content_result = self.content_validator.check(document)
|
|
|
|
# Run link validation (if enabled)
|
|
if check_links:
|
|
link_result = self.link_validator.check(document, check_external=True)
|
|
else:
|
|
# Still check internal links by default (fast)
|
|
link_result = self.link_validator.check(document, check_external=False)
|
|
|
|
return SemanticValidationReport(
|
|
section_result=section_result,
|
|
content_result=content_result,
|
|
link_result=link_result
|
|
)
|
|
|
|
def _parse_document(self, document_path: Path) -> 'MarkdownDocument':
|
|
"""
|
|
Parse markdown document into AST.
|
|
|
|
Args:
|
|
document_path: Path to markdown file
|
|
|
|
Returns:
|
|
Parsed MarkdownDocument object
|
|
|
|
This uses the existing markitect markdown parser.
|
|
"""
|
|
# Import here to avoid circular dependency
|
|
from markitect.document_manager import DocumentManager
|
|
|
|
# Use DocumentManager to parse the document
|
|
doc_manager = DocumentManager()
|
|
doc = doc_manager.ingest_file(document_path)
|
|
|
|
# Wrap in DocumentWrapper to provide expected interface
|
|
return DocumentWrapper(doc)
|
|
|
|
|
|
def load_schema_from_path(schema_path: str | Path) -> Dict[str, Any]:
|
|
"""
|
|
Load a schema from file (supports .json and .md formats).
|
|
|
|
Args:
|
|
schema_path: Path to schema file
|
|
|
|
Returns:
|
|
Schema dict with embedded JSON
|
|
|
|
Raises:
|
|
FileNotFoundError: If schema file doesn't exist
|
|
ValueError: If schema cannot be parsed
|
|
"""
|
|
schema_path = Path(schema_path)
|
|
|
|
if not schema_path.exists():
|
|
raise FileNotFoundError(f"Schema not found: {schema_path}")
|
|
|
|
if schema_path.suffix == '.json':
|
|
# Load JSON schema directly
|
|
with open(schema_path, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
|
|
elif schema_path.suffix == '.md':
|
|
# Load markdown schema with embedded JSON
|
|
from markitect.schema_loader import MarkdownSchemaLoader
|
|
|
|
loader = MarkdownSchemaLoader()
|
|
schema_data = loader.load_schema(schema_path)
|
|
|
|
return schema_data['schema']
|
|
|
|
else:
|
|
raise ValueError(f"Unsupported schema format: {schema_path.suffix}")
|