feat: add semantic document validator for x-markitect extensions

Implements semantic validation to complement existing structural validation: Phase 1 & 2 Complete: - SemanticValidator: Main validator orchestrating sub-validators - SectionValidator: Enforces section classifications (required, recommended, optional, discouraged, improper) from x-markitect-sections - ContentValidator: Validates content patterns, forbidden patterns, and quality metrics (word counts, sentence counts) from x-markitect-content-control Features: - Pattern matching with regex for required/forbidden/discouraged patterns - Word count and sentence count validation - Detailed error reporting with severity levels (ERROR, WARNING) - Support for section alternatives (e.g., FLAGS vs OPTIONS) - Comprehensive test coverage (16 tests, 100% passing) Architecture: - Complements existing SchemaValidator (structural AST validation) - Clean separation: validators/ package for modular validators - Semantic validation focuses on x-markitect-* extensions - LinkValidator planned for Phase 3 (optional --check-links) Next: Phase 4 - CLI integration to enhance 'markitect validate' command Workplan: roadmap/20260106-semantic-document-validation/WORKPLAN.md 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-06 03:24:32 +01:00
parent f27eea6b5b
commit a969c5de47
6 changed files with 1932 additions and 0 deletions
--- a/markitect/semantic_validator.py
+++ b/markitect/semantic_validator.py
@@ -0,0 +1,261 @@
+"""
+Semantic Validator for markdown documents.
+
+Validates markdown documents against x-markitect schema extensions:
+- x-markitect-sections: Section classifications (required, recommended, etc.)
+- x-markitect-content-control: Content patterns and quality metrics
+- Link validation: Internal and external link checking
+
+Complements the existing SchemaValidator which handles structural AST validation.
+"""
+
+from dataclasses import dataclass
+from typing import List, Dict, Any, Optional
+from pathlib import Path
+import json
+
+from markitect.validators.section_validator import (
+    SectionValidator,
+    SectionValidationResult
+)
+from markitect.validators.content_validator import (
+    ContentValidator,
+    ContentValidationResult
+)
+
+
+@dataclass
+class SemanticValidationReport:
+    """
+    Report of semantic validation results.
+
+    Combines results from section, content, and link validators.
+    """
+    section_result: SectionValidationResult
+    content_result: Optional[ContentValidationResult] = None
+    link_result: Optional[Any] = None  # LinkValidationResult when implemented
+
+    def has_errors(self) -> bool:
+        """Check if there are any ERROR-level issues."""
+        errors = self.section_result.has_errors()
+
+        if self.content_result and hasattr(self.content_result, 'has_errors'):
+            errors = errors or self.content_result.has_errors()
+
+        if self.link_result and hasattr(self.link_result, 'has_errors'):
+            errors = errors or self.link_result.has_errors()
+
+        return errors
+
+    def has_warnings(self) -> bool:
+        """Check if there are any WARNING-level issues."""
+        warnings = self.section_result.has_warnings()
+
+        if self.content_result and hasattr(self.content_result, 'has_warnings'):
+            warnings = warnings or self.content_result.has_warnings()
+
+        if self.link_result and hasattr(self.link_result, 'has_warnings'):
+            warnings = warnings or self.link_result.has_warnings()
+
+        return warnings
+
+    def is_valid(self) -> bool:
+        """Check if validation passed (no errors)."""
+        return not self.has_errors()
+
+    def get_all_issues(self) -> List[Any]:
+        """Get all issues from all validators."""
+        issues = list(self.section_result.issues)
+
+        if self.content_result and hasattr(self.content_result, 'issues'):
+            issues.extend(self.content_result.issues)
+
+        if self.link_result and hasattr(self.link_result, 'issues'):
+            issues.extend(self.link_result.issues)
+
+        return issues
+
+    def format_text(self) -> str:
+        """Format validation report as text."""
+        lines = []
+
+        # Section validation
+        lines.append("Section Validation:")
+        if self.section_result.issues:
+            for issue in self.section_result.issues:
+                status = "❌" if issue.severity == 'ERROR' else "⚠️"
+                lines.append(f"  {status} {issue.section_name} - {issue.message}")
+        else:
+            lines.append("  ✅ All section requirements met")
+
+        # Content validation
+        if self.content_result:
+            lines.append("")
+            lines.append("Content Validation:")
+            if self.content_result.issues:
+                for issue in self.content_result.issues:
+                    status = "❌" if issue.severity == 'ERROR' else "⚠️"
+                    lines.append(f"  {status} {issue.section_name} - {issue.message}")
+            else:
+                lines.append("  ✅ All content requirements met")
+
+        # Summary
+        lines.append("")
+        lines.append("Summary:")
+        lines.append(f"  Sections checked: {self.section_result.sections_checked}")
+        lines.append(f"  Sections found: {self.section_result.sections_found}")
+
+        all_errors = self.section_result.get_errors()
+        all_warnings = self.section_result.get_warnings()
+
+        if self.content_result:
+            all_errors.extend(self.content_result.get_errors())
+            all_warnings.extend(self.content_result.get_warnings())
+
+        lines.append(f"  Errors: {len(all_errors)}")
+        lines.append(f"  Warnings: {len(all_warnings)}")
+
+        if self.is_valid():
+            lines.append("  Status: PASSED ✅")
+        else:
+            lines.append("  Status: FAILED ❌")
+
+        return "\n".join(lines)
+
+
+class SemanticValidator:
+    """
+    Validates markdown documents against x-markitect extensions.
+
+    Complements existing SchemaValidator which handles structural AST validation.
+    This validator checks semantic aspects defined in x-markitect-* extensions.
+
+    Example:
+        >>> schema = load_schema('manpage-schema-v1.0.md')
+        >>> validator = SemanticValidator(schema)
+        >>> report = validator.validate('my-command.1.md')
+        >>> if not report.is_valid():
+        ...     print(report.format_text())
+    """
+
+    def __init__(self, schema: Dict[str, Any]):
+        """
+        Initialize semantic validator with a schema.
+
+        Args:
+            schema: JSON schema with x-markitect-* extensions
+
+        The schema can be either:
+        - A dict loaded from JSON
+        - A dict loaded from markdown with embedded JSON
+        - Must contain x-markitect-sections and/or x-markitect-content-control
+        """
+        self.schema = schema
+
+        # Initialize sub-validators
+        self.section_validator = SectionValidator(schema)
+        self.content_validator = ContentValidator(schema)
+
+        # TODO: Initialize link validator when implemented
+        # self.link_validator = LinkValidator(schema)
+
+    def validate(self, document_path: str | Path,
+                 check_links: bool = False) -> SemanticValidationReport:
+        """
+        Validate a markdown document against schema extensions.
+
+        Args:
+            document_path: Path to markdown document to validate
+            check_links: Whether to validate links (may be slow)
+
+        Returns:
+            SemanticValidationReport with validation results
+
+        Raises:
+            FileNotFoundError: If document_path doesn't exist
+            ValueError: If document cannot be parsed
+        """
+        document_path = Path(document_path)
+
+        if not document_path.exists():
+            raise FileNotFoundError(f"Document not found: {document_path}")
+
+        # Parse document
+        document = self._parse_document(document_path)
+
+        # Run section validation
+        section_result = self.section_validator.check(document)
+
+        # Run content validation
+        content_result = self.content_validator.check(document)
+
+        # TODO: Run link validation when implemented
+        # if check_links:
+        #     link_result = self.link_validator.check(document)
+        # else:
+        #     link_result = None
+        link_result = None
+
+        return SemanticValidationReport(
+            section_result=section_result,
+            content_result=content_result,
+            link_result=link_result
+        )
+
+    def _parse_document(self, document_path: Path) -> 'MarkdownDocument':
+        """
+        Parse markdown document into AST.
+
+        Args:
+            document_path: Path to markdown file
+
+        Returns:
+            Parsed MarkdownDocument object
+
+        This uses the existing markitect markdown parser.
+        """
+        # Import here to avoid circular dependency
+        from markitect.document_manager import DocumentManager
+
+        # Use DocumentManager to parse the document
+        doc_manager = DocumentManager()
+        doc = doc_manager.ingest_file(document_path)
+
+        return doc
+
+
+def load_schema_from_path(schema_path: str | Path) -> Dict[str, Any]:
+    """
+    Load a schema from file (supports .json and .md formats).
+
+    Args:
+        schema_path: Path to schema file
+
+    Returns:
+        Schema dict with embedded JSON
+
+    Raises:
+        FileNotFoundError: If schema file doesn't exist
+        ValueError: If schema cannot be parsed
+    """
+    schema_path = Path(schema_path)
+
+    if not schema_path.exists():
+        raise FileNotFoundError(f"Schema not found: {schema_path}")
+
+    if schema_path.suffix == '.json':
+        # Load JSON schema directly
+        with open(schema_path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+
+    elif schema_path.suffix == '.md':
+        # Load markdown schema with embedded JSON
+        from markitect.schema_loader import MarkdownSchemaLoader
+
+        loader = MarkdownSchemaLoader()
+        schema_data = loader.load_schema(schema_path)
+
+        return schema_data['schema']
+
+    else:
+        raise ValueError(f"Unsupported schema format: {schema_path.suffix}")
--- a/markitect/validators/init.py
+++ b/markitect/validators/init.py
@@ -0,0 +1,50 @@
+"""
+Validators package for semantic document validation.
+
+This package contains validators that check markdown documents against
+x-markitect schema extensions (sections, content-control, link validation).
+
+Validators:
+    - SectionValidator: Validates section presence based on classifications
+    - ContentValidator: Validates content patterns and quality metrics
+    - LinkValidator: Validates internal and external links
+"""
+
+from markitect.validators.section_validator import (
+    SectionValidator,
+    SectionValidationResult,
+    SectionIssue,
+    SectionMissing,
+    SectionImproper,
+    SectionDiscouraged,
+)
+
+from markitect.validators.content_validator import (
+    ContentValidator,
+    ContentValidationResult,
+    ContentIssue,
+    PatternMissing,
+    ForbiddenPattern,
+    DiscouragedPattern,
+    ContentTooShort,
+    ContentTooLong,
+)
+
+__all__ = [
+    # Section validator
+    'SectionValidator',
+    'SectionValidationResult',
+    'SectionIssue',
+    'SectionMissing',
+    'SectionImproper',
+    'SectionDiscouraged',
+    # Content validator
+    'ContentValidator',
+    'ContentValidationResult',
+    'ContentIssue',
+    'PatternMissing',
+    'ForbiddenPattern',
+    'DiscouragedPattern',
+    'ContentTooShort',
+    'ContentTooLong',
+]
--- a/markitect/validators/content_validator.py
+++ b/markitect/validators/content_validator.py
@@ -0,0 +1,316 @@
+"""
+Content Validator for markdown documents.
+
+Validates content against x-markitect-content-control rules:
+- Required patterns: Regex patterns that must appear in content
+- Discouraged patterns: Patterns that should be avoided (warnings)
+- Forbidden patterns: Patterns that must not appear (errors)
+- Quality metrics: Word counts, sentence counts, readability
+"""
+
+from dataclasses import dataclass
+from typing import List, Dict, Any, Optional
+import re
+
+
+@dataclass
+class ContentIssue:
+    """Base class for content validation issues."""
+    section_name: str
+    severity: str  # 'ERROR', 'WARNING', 'INFO'
+    message: str
+    line_number: Optional[int] = None
+    matched_text: Optional[str] = None
+
+    def __str__(self) -> str:
+        location = f" (line {self.line_number})" if self.line_number else ""
+        match_info = f": '{self.matched_text}'" if self.matched_text else ""
+        return f"[{self.severity}]{location} {self.section_name} - {self.message}{match_info}"
+
+
+@dataclass
+class PatternMissing(ContentIssue):
+    """Required pattern not found in content."""
+    pattern: str = ""
+
+
+@dataclass
+class ForbiddenPattern(ContentIssue):
+    """Forbidden pattern found in content."""
+    pattern: str = ""
+
+
+@dataclass
+class DiscouragedPattern(ContentIssue):
+    """Discouraged pattern found in content."""
+    pattern: str = ""
+
+
+@dataclass
+class ContentTooShort(ContentIssue):
+    """Content does not meet minimum word/sentence count."""
+    actual: int = 0
+    required: int = 0
+
+
+@dataclass
+class ContentTooLong(ContentIssue):
+    """Content exceeds maximum word/sentence count."""
+    actual: int = 0
+    limit: int = 0
+
+
+@dataclass
+class ContentValidationResult:
+    """Result of content validation."""
+    issues: List[ContentIssue]
+    sections_checked: int
+
+    def has_errors(self) -> bool:
+        """Check if there are any ERROR-level issues."""
+        return any(issue.severity == 'ERROR' for issue in self.issues)
+
+    def has_warnings(self) -> bool:
+        """Check if there are any WARNING-level issues."""
+        return any(issue.severity == 'WARNING' for issue in self.issues)
+
+    def is_valid(self) -> bool:
+        """Check if validation passed (no errors)."""
+        return not self.has_errors()
+
+    def get_errors(self) -> List[ContentIssue]:
+        """Get all ERROR-level issues."""
+        return [issue for issue in self.issues if issue.severity == 'ERROR']
+
+    def get_warnings(self) -> List[ContentIssue]:
+        """Get all WARNING-level issues."""
+        return [issue for issue in self.issues if issue.severity == 'WARNING']
+
+
+class ContentValidator:
+    """
+    Validates content against x-markitect-content-control rules.
+
+    Checks content patterns, quality metrics, and readability for each section.
+    """
+
+    def __init__(self, schema: Dict[str, Any]):
+        """
+        Initialize validator with a schema.
+
+        Args:
+            schema: JSON schema with x-markitect-content-control extension
+        """
+        self.schema = schema
+        self.content_rules = schema.get('x-markitect-content-control', {})
+
+    def check(self, document: 'MarkdownDocument') -> ContentValidationResult:
+        """
+        Validate content against schema rules.
+
+        Args:
+            document: Parsed markdown document
+
+        Returns:
+            ContentValidationResult with any issues found
+        """
+        issues = []
+        sections_checked = 0
+
+        # Check each section that has content rules
+        for section_key, rules in self.content_rules.items():
+            sections_checked += 1
+
+            # Get section from document
+            section = self._get_section(document, section_key)
+
+            if not section:
+                # Section validator handles missing sections
+                continue
+
+            section_content = section.get('content', '')
+            section_name = section.get('name', section_key)
+
+            # Check required patterns
+            issues.extend(self._check_required_patterns(
+                section_name, section_content, rules
+            ))
+
+            # Check forbidden patterns
+            issues.extend(self._check_forbidden_patterns(
+                section_name, section_content, rules
+            ))
+
+            # Check discouraged patterns
+            issues.extend(self._check_discouraged_patterns(
+                section_name, section_content, rules
+            ))
+
+            # Check content quality metrics
+            issues.extend(self._check_quality_metrics(
+                section_name, section_content, rules
+            ))
+
+        return ContentValidationResult(
+            issues=issues,
+            sections_checked=sections_checked
+        )
+
+    def _get_section(self, document: 'MarkdownDocument',
+                     section_key: str) -> Optional[Dict[str, Any]]:
+        """
+        Get a section from the document.
+
+        Args:
+            document: Parsed markdown document
+            section_key: Section name (lowercase in rules, uppercase in document)
+
+        Returns:
+            Section dict with name and content, or None if not found
+        """
+        # Convert section_key to uppercase for matching
+        section_name = section_key.upper()
+
+        # Try to get section content
+        if hasattr(document, 'get_section'):
+            return document.get_section(section_name)
+
+        # Fallback: search headings
+        if hasattr(document, 'get_headings_by_level'):
+            headings = document.get_headings_by_level(2)
+            for heading in headings:
+                if isinstance(heading, dict):
+                    if heading.get('content', '').strip().upper() == section_name:
+                        # Found the section, need to extract content
+                        return {
+                            'name': section_name,
+                            'content': heading.get('text_content', '')
+                        }
+
+        return None
+
+    def _check_required_patterns(self, section_name: str, content: str,
+                                rules: Dict[str, Any]) -> List[ContentIssue]:
+        """Check that all required patterns appear in content."""
+        issues = []
+        required_patterns = rules.get('required_patterns', [])
+
+        for pattern in required_patterns:
+            try:
+                if not re.search(pattern, content, re.MULTILINE):
+                    issues.append(PatternMissing(
+                        section_name=section_name,
+                        severity='ERROR',
+                        message=f'Required pattern not found',
+                        pattern=pattern
+                    ))
+            except re.error as e:
+                # Invalid regex pattern in schema
+                issues.append(ContentIssue(
+                    section_name=section_name,
+                    severity='ERROR',
+                    message=f'Invalid regex pattern in schema: {e}'
+                ))
+
+        return issues
+
+    def _check_forbidden_patterns(self, section_name: str, content: str,
+                                  rules: Dict[str, Any]) -> List[ContentIssue]:
+        """Check that no forbidden patterns appear in content."""
+        issues = []
+        forbidden_patterns = rules.get('forbidden_patterns', [])
+
+        for pattern in forbidden_patterns:
+            try:
+                match = re.search(pattern, content, re.MULTILINE)
+                if match:
+                    issues.append(ForbiddenPattern(
+                        section_name=section_name,
+                        severity='ERROR',
+                        message=f'Forbidden pattern found',
+                        pattern=pattern,
+                        matched_text=match.group(0)[:50]  # Limit to 50 chars
+                    ))
+            except re.error as e:
+                issues.append(ContentIssue(
+                    section_name=section_name,
+                    severity='ERROR',
+                    message=f'Invalid regex pattern in schema: {e}'
+                ))
+
+        return issues
+
+    def _check_discouraged_patterns(self, section_name: str, content: str,
+                                   rules: Dict[str, Any]) -> List[ContentIssue]:
+        """Check for discouraged patterns (warnings)."""
+        issues = []
+        discouraged_patterns = rules.get('discouraged_patterns', [])
+
+        for pattern in discouraged_patterns:
+            try:
+                match = re.search(pattern, content, re.MULTILINE)
+                if match:
+                    issues.append(DiscouragedPattern(
+                        section_name=section_name,
+                        severity='WARNING',
+                        message=f'Discouraged pattern found',
+                        pattern=pattern,
+                        matched_text=match.group(0)[:50]
+                    ))
+            except re.error as e:
+                issues.append(ContentIssue(
+                    section_name=section_name,
+                    severity='WARNING',
+                    message=f'Invalid regex pattern in schema: {e}'
+                ))
+
+        return issues
+
+    def _check_quality_metrics(self, section_name: str, content: str,
+                              rules: Dict[str, Any]) -> List[ContentIssue]:
+        """Check content quality metrics (word count, sentence count)."""
+        issues = []
+        quality = rules.get('content_quality', {})
+
+        if not quality:
+            return issues
+
+        # Word count validation
+        word_count = len(content.split())
+
+        min_words = quality.get('min_words')
+        if min_words is not None and word_count < min_words:
+            issues.append(ContentTooShort(
+                section_name=section_name,
+                severity='WARNING',
+                message=f'Content too short ({word_count} words, minimum {min_words})',
+                actual=word_count,
+                required=min_words
+            ))
+
+        max_words = quality.get('max_words')
+        if max_words is not None and word_count > max_words:
+            issues.append(ContentTooLong(
+                section_name=section_name,
+                severity='WARNING',
+                message=f'Content too long ({word_count} words, maximum {max_words})',
+                actual=word_count,
+                limit=max_words
+            ))
+
+        # Sentence count validation
+        min_sentences = quality.get('min_sentences')
+        if min_sentences is not None:
+            # Simple sentence count (split by .!?)
+            sentence_count = len(re.findall(r'[.!?]+', content))
+
+            if sentence_count < min_sentences:
+                issues.append(ContentTooShort(
+                    section_name=section_name,
+                    severity='WARNING',
+                    message=f'Too few sentences ({sentence_count}, minimum {min_sentences})',
+                    actual=sentence_count,
+                    required=min_sentences
+                ))
+
+        return issues
--- a/markitect/validators/section_validator.py
+++ b/markitect/validators/section_validator.py
@@ -0,0 +1,226 @@
+"""
+Section Validator for markdown documents.
+
+Validates that document sections comply with x-markitect-sections classifications:
+- REQUIRED: Section must be present (ERROR if missing)
+- RECOMMENDED: Section should be present (WARNING if missing)
+- OPTIONAL: Section may be present (no check)
+- DISCOURAGED: Section should not be present (WARNING if present)
+- IMPROPER: Section must not be present (ERROR if present)
+"""
+
+from dataclasses import dataclass
+from typing import List, Dict, Any, Optional
+from pathlib import Path
+
+
+@dataclass
+class SectionIssue:
+    """Base class for section validation issues."""
+    section_name: str
+    severity: str  # 'ERROR', 'WARNING', 'INFO'
+    message: str
+    classification: str  # 'required', 'recommended', etc.
+    line_number: Optional[int] = None
+
+    def __str__(self) -> str:
+        location = f" (line {self.line_number})" if self.line_number else ""
+        return f"[{self.severity}]{location} {self.section_name}: {self.message}"
+
+
+@dataclass
+class SectionMissing(SectionIssue):
+    """Section is missing from document."""
+    pass
+
+
+@dataclass
+class SectionImproper(SectionIssue):
+    """Improper section found in document."""
+    pass
+
+
+@dataclass
+class SectionDiscouraged(SectionIssue):
+    """Discouraged section found in document."""
+    pass
+
+
+@dataclass
+class SectionValidationResult:
+    """Result of section validation."""
+    issues: List[SectionIssue]
+    sections_checked: int
+    sections_found: int
+
+    def has_errors(self) -> bool:
+        """Check if there are any ERROR-level issues."""
+        return any(issue.severity == 'ERROR' for issue in self.issues)
+
+    def has_warnings(self) -> bool:
+        """Check if there are any WARNING-level issues."""
+        return any(issue.severity == 'WARNING' for issue in self.issues)
+
+    def is_valid(self) -> bool:
+        """Check if validation passed (no errors)."""
+        return not self.has_errors()
+
+    def get_errors(self) -> List[SectionIssue]:
+        """Get all ERROR-level issues."""
+        return [issue for issue in self.issues if issue.severity == 'ERROR']
+
+    def get_warnings(self) -> List[SectionIssue]:
+        """Get all WARNING-level issues."""
+        return [issue for issue in self.issues if issue.severity == 'WARNING']
+
+
+class SectionValidator:
+    """
+    Validates section presence and classification compliance.
+
+    Checks that markdown documents have the correct sections based on
+    x-markitect-sections classifications in the schema.
+    """
+
+    def __init__(self, schema: Dict[str, Any]):
+        """
+        Initialize validator with a schema.
+
+        Args:
+            schema: JSON schema with x-markitect-sections extension
+        """
+        self.schema = schema
+        self.sections_spec = schema.get('x-markitect-sections', {})
+
+    def check(self, document: 'MarkdownDocument') -> SectionValidationResult:
+        """
+        Validate section presence against schema classifications.
+
+        Args:
+            document: Parsed markdown document
+
+        Returns:
+            SectionValidationResult with any issues found
+        """
+        issues = []
+
+        # Get level-2 headings (main sections) from document
+        doc_sections = self._get_document_sections(document)
+
+        # Check each specification
+        for section_name, spec in self.sections_spec.items():
+            classification = spec.get('classification')
+            section_in_doc = self._find_section(section_name, doc_sections, spec)
+
+            if classification == 'required':
+                if not section_in_doc:
+                    issues.append(SectionMissing(
+                        section_name=section_name,
+                        severity='ERROR',
+                        message=spec.get('error_message', f'{section_name} section is required'),
+                        classification='required'
+                    ))
+
+            elif classification == 'improper':
+                if section_in_doc:
+                    issues.append(SectionImproper(
+                        section_name=section_name,
+                        severity='ERROR',
+                        message=spec.get('error_message', f'{section_name} section must not appear'),
+                        classification='improper',
+                        line_number=section_in_doc.get('line_number')
+                    ))
+
+            elif classification == 'recommended':
+                if not section_in_doc:
+                    issues.append(SectionMissing(
+                        section_name=section_name,
+                        severity='WARNING',
+                        message=spec.get('warning_if_missing', f'{section_name} section is recommended'),
+                        classification='recommended'
+                    ))
+
+            elif classification == 'discouraged':
+                if section_in_doc:
+                    issues.append(SectionDiscouraged(
+                        section_name=section_name,
+                        severity='WARNING',
+                        message=spec.get('warning_if_present', f'{section_name} section is discouraged'),
+                        classification='discouraged',
+                        line_number=section_in_doc.get('line_number')
+                    ))
+
+        return SectionValidationResult(
+            issues=issues,
+            sections_checked=len(self.sections_spec),
+            sections_found=len(doc_sections)
+        )
+
+    def _get_document_sections(self, document: 'MarkdownDocument') -> List[Dict[str, Any]]:
+        """
+        Extract level-2 headings from document.
+
+        Args:
+            document: Parsed markdown document
+
+        Returns:
+            List of section dicts with name and line_number
+        """
+        sections = []
+
+        # Get headings from document
+        if hasattr(document, 'get_headings_by_level'):
+            level_2_headings = document.get_headings_by_level(2)
+        elif hasattr(document, 'headings'):
+            level_2_headings = [
+                h for h in document.headings
+                if h.get('level') == 2
+            ]
+        else:
+            # Fallback: parse from AST
+            level_2_headings = []
+
+        for heading in level_2_headings:
+            if isinstance(heading, dict):
+                sections.append({
+                    'name': heading.get('content', '').strip().upper(),
+                    'line_number': heading.get('line_number')
+                })
+            elif isinstance(heading, str):
+                sections.append({
+                    'name': heading.strip().upper(),
+                    'line_number': None
+                })
+
+        return sections
+
+    def _find_section(self, section_name: str, doc_sections: List[Dict[str, Any]],
+                     spec: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        """
+        Find a section in document, checking alternatives.
+
+        Args:
+            section_name: Primary section name to find
+            doc_sections: List of sections in document
+            spec: Section specification with potential alternatives
+
+        Returns:
+            Section dict if found, None otherwise
+        """
+        # Normalize section name for comparison
+        normalized_name = section_name.upper().strip()
+
+        # Check primary name
+        for section in doc_sections:
+            if section['name'] == normalized_name:
+                return section
+
+        # Check alternatives
+        alternatives = spec.get('alternatives', [])
+        for alt_name in alternatives:
+            normalized_alt = alt_name.upper().strip()
+            for section in doc_sections:
+                if section['name'] == normalized_alt:
+                    return section
+
+        return None
--- a/roadmap/20260106-semantic-document-validation/WORKPLAN.md
+++ b/roadmap/20260106-semantic-document-validation/WORKPLAN.md
@@ -0,0 +1,573 @@
+# Plan: Schema System Enhancement - Semantic Document Validation
+
+## Overview
+
+The schema management system has **complete schema structure analysis tools** (schema-analyze, schema-refine) and **structural AST validation** (markitect validate), but is missing **semantic validation capabilities**. This plan enhances validation to check sections, content patterns, and quality metrics defined in x-markitect extensions.
+
+## Current State Assessment
+
+### ✅ Already Implemented
+- **schema-analyze**: Detects rigid constraints, calculates rigidity score (markitect/schema_analyzer.py)
+- **schema-refine**: Automatically loosens rigid constraints (markitect/schema_refiner.py)
+- **markitect validate**: Validates AST structure against JSON schemas (cli.py:1493-1600)
+  - Checks headings, paragraphs, code_blocks counts match schema
+  - Validates document structure against JSON Schema properties
+  - Does NOT check x-markitect-sections classifications
+  - Does NOT validate x-markitect-content-control patterns
+- **X-Markitect Extensions**: Full system with sections, content-control, metadata
+- **Metaschema Validation**: Validates schema structure and extensions
+- **4 Production Schemas**: manpage, API docs, terminology, schema-schema
+- **Comprehensive Documentation**: User guides, specifications, tests (97 tests passing)
+
+### ❌ Missing Capabilities (Semantic Validation)
+1. **Section Classification Enforcement**: required/recommended/optional/discouraged/improper not checked
+2. **Content Pattern Validation**: required_patterns, forbidden_patterns not matched
+3. **Quality Metrics Validation**: min_words, max_words, min_sentences not enforced
+4. **Link Validation**: Internal/external link checking not implemented
+5. **Content Instructions**: content_instruction fields defined but not validated
+
+## What We Have vs What We Need
+
+**Current `markitect validate`** (Structural):
+```bash
+markitect validate doc.md --schema schema.json
+# ✅ Checks: headings.level_2 has 5-30 items
+# ✅ Checks: paragraphs has 10-500 items
+# ✅ Checks: code_blocks has 1-50 items
+# ❌ Does NOT check: SYNOPSIS section present (required)
+# ❌ Does NOT check: INTERNAL_NOTES absent (improper)
+# ❌ Does NOT check: Synopsis contains bold command name
+# ❌ Does NOT check: Description has min 50 words
+```
+
+**Enhanced `markitect validate`** (Structural + Semantic):
+```bash
+markitect validate doc.md --schema manpage-schema-v1.0.md
+# ✅ Checks: AST structure (existing)
+# ✅ NEW: SYNOPSIS section present (required)
+# ✅ NEW: INTERNAL_NOTES not present (improper)
+# ✅ NEW: Synopsis contains **command** pattern
+# ✅ NEW: Description has 50+ words
+# ✅ NEW: No forbidden TODO patterns
+```
+
+## Implementation Plan
+
+### Phase 1: Core Semantic Validator
+
+**Goal**: Create semantic validator to complement existing structural validation
+
+**New Module**: `markitect/semantic_validator.py`
+
+**Key Components**:
+
+```python
+class SemanticValidator:
+    """Validates markdown documents against x-markitect extensions.
+
+    Complements existing SchemaValidator which handles structural AST validation.
+    This validator checks semantic aspects defined in x-markitect-* extensions.
+    """
+
+    def __init__(self, schema_path: str):
+        # Load schema (supports .md schemas with embedded JSON)
+        self.schema = load_schema_with_extensions(schema_path)
+
+        # Initialize sub-validators
+        self.section_validator = SectionValidator(self.schema)
+        self.content_validator = ContentValidator(self.schema)
+        self.link_validator = LinkValidator(self.schema)
+
+    def validate(self, document_path: str, check_links: bool = False) -> SemanticValidationReport:
+        """Main semantic validation entry point."""
+        doc = parse_markdown_document(document_path)
+
+        results = {
+            'sections': self.section_validator.check(doc),
+            'content': self.content_validator.check(doc)
+        }
+
+        if check_links:
+            results['links'] = self.link_validator.check(doc)
+
+        return SemanticValidationReport(results)
+```
+
+**Features**:
+- Load schema from registry or filesystem
+- Parse markdown document into AST
+- Validate sections against x-markitect-sections classifications
+- Check content against x-markitect-content-control patterns
+- Validate links if enabled
+- Generate detailed report with line numbers
+
+### Phase 2: Section Presence Validator
+
+**New Module**: `markitect/section_validator.py`
+
+**Validation Rules**:
+
+```python
+class SectionValidator:
+    """Validates section presence and classification compliance."""
+
+    def check(self, document: MarkdownDocument) -> SectionValidationResult:
+        sections_spec = self.schema.get('x-markitect-sections', {})
+        doc_sections = document.get_headings_by_level(2)
+
+        issues = []
+
+        # Check REQUIRED sections
+        for section_name, spec in sections_spec.items():
+            if spec['classification'] == 'required':
+                if section_name not in doc_sections:
+                    issues.append(SectionMissing(
+                        section=section_name,
+                        severity='ERROR',
+                        message=spec.get('error_message', f'{section_name} is required')
+                    ))
+
+        # Check IMPROPER sections (must not exist)
+        for section_name, spec in sections_spec.items():
+            if spec['classification'] == 'improper':
+                if section_name in doc_sections:
+                    issues.append(SectionImproper(
+                        section=section_name,
+                        severity='ERROR',
+                        message=spec.get('error_message', f'{section_name} must not appear')
+                    ))
+
+        # Check RECOMMENDED sections (warnings)
+        for section_name, spec in sections_spec.items():
+            if spec['classification'] == 'recommended':
+                if section_name not in doc_sections:
+                    issues.append(SectionMissing(
+                        section=section_name,
+                        severity='WARNING',
+                        message=spec.get('warning_if_missing', f'{section_name} is recommended')
+                    ))
+
+        return SectionValidationResult(issues)
+```
+
+**Section Classification Enforcement**:
+- REQUIRED → ERROR if missing
+- RECOMMENDED → WARNING if missing
+- OPTIONAL → No check
+- DISCOURAGED → WARNING if present
+- IMPROPER → ERROR if present
+
+### Phase 3: Content Pattern Validator
+
+**New Module**: `markitect/content_validator.py`
+
+**Pattern Matching**:
+
+```python
+class ContentValidator:
+    """Validates content against x-markitect-content-control rules."""
+
+    def check(self, document: MarkdownDocument) -> ContentValidationResult:
+        content_rules = self.schema.get('x-markitect-content-control', {})
+        issues = []
+
+        for section_key, rules in content_rules.items():
+            section = document.get_section(section_key.upper())
+            if not section:
+                continue  # Section validator handles missing sections
+
+            # Check required patterns
+            for pattern in rules.get('required_patterns', []):
+                if not re.search(pattern, section.content):
+                    issues.append(PatternMissing(
+                        section=section.name,
+                        pattern=pattern,
+                        severity='ERROR'
+                    ))
+
+            # Check forbidden patterns
+            for pattern in rules.get('forbidden_patterns', []):
+                if re.search(pattern, section.content):
+                    issues.append(ForbiddenPattern(
+                        section=section.name,
+                        pattern=pattern,
+                        severity='ERROR',
+                        matched_text=match.group(0)
+                    ))
+
+            # Check content quality
+            quality = rules.get('content_quality', {})
+            word_count = len(section.content.split())
+
+            if 'min_words' in quality and word_count < quality['min_words']:
+                issues.append(ContentTooShort(
+                    section=section.name,
+                    actual=word_count,
+                    required=quality['min_words'],
+                    severity='WARNING'
+                ))
+
+            if 'max_words' in quality and word_count > quality['max_words']:
+                issues.append(ContentTooLong(
+                    section=section.name,
+                    actual=word_count,
+                    limit=quality['max_words'],
+                    severity='WARNING'
+                ))
+
+        return ContentValidationResult(issues)
+```
+
+**Content Rules Checked**:
+- Required patterns (regex matches)
+- Discouraged patterns (warnings)
+- Forbidden patterns (errors)
+- Word count ranges (min/max)
+- Sentence counts (if specified)
+
+### Phase 4: Link Validator
+
+**New Module**: `markitect/link_validator.py`
+
+**Link Checking**:
+
+```python
+class LinkValidator:
+    """Validates links according to x-markitect-content-control.link_validation."""
+
+    def check(self, document: MarkdownDocument) -> LinkValidationResult:
+        link_config = self.schema.get('x-markitect-content-control', {}).get('link_validation', {})
+
+        if not any(link_config.values()):
+            return LinkValidationResult([])  # No link validation configured
+
+        links = document.extract_links()
+        issues = []
+
+        for link in links:
+            # Check internal links
+            if link.is_internal() and link_config.get('check_internal', False):
+                target = document.resolve_internal_link(link.target)
+                if not target:
+                    issues.append(BrokenInternalLink(
+                        link=link.target,
+                        line=link.line_number,
+                        severity='ERROR'
+                    ))
+
+            # Check external links
+            if link.is_external() and link_config.get('check_external', False):
+                # HTTP HEAD request with timeout
+                if not self._check_url_exists(link.target):
+                    issues.append(BrokenExternalLink(
+                        link=link.target,
+                        line=link.line_number,
+                        severity='WARNING'  # External links are warnings
+                    ))
+
+            # Check fragments
+            if link.has_fragment() and not link_config.get('allow_fragments', True):
+                issues.append(FragmentNotAllowed(
+                    link=link.target,
+                    line=link.line_number,
+                    severity='WARNING'
+                ))
+
+        return LinkValidationResult(issues)
+```
+
+**Link Types Validated**:
+- Internal links (to other sections/documents)
+- External links (HTTP/HTTPS URLs)
+- Fragment identifiers (#section-name)
+- Email links (mailto:)
+
+### Phase 5: CLI Integration
+
+**Enhance Existing Command**: `markitect validate` (cli.py:1493-1600)
+
+**New Options to Add**:
+
+```python
+@cli.command('validate')
+@click.argument('file_path', type=click.Path(exists=True, path_type=Path))
+@click.option('--schema', '-s', type=click.Path(exists=True, path_type=Path),
+              help='Path to JSON schema file')
+@click.option('--schema-json', type=str,
+              help='JSON schema provided as a string')
+@click.option('--quiet', '-q', is_flag=True,
+              help='Only output validation result (true/false)')
+@click.option('--detailed-errors', '--errors', is_flag=True,
+              help='Show detailed validation errors (Issue #8)')
+@click.option('--error-format', type=click.Choice(['text', 'json', 'markdown']), default='text',
+              help='Format for detailed error output')
+# NEW OPTIONS:
+@click.option('--semantic/--no-semantic', default=True,
+              help='Enable/disable semantic validation (sections, patterns, quality)')
+@click.option('--check-links', is_flag=True,
+              help='Enable link validation (may be slow)')
+@click.option('--strict', is_flag=True,
+              help='Treat warnings as errors')
+@pass_config
+def validate(config, file_path, schema, schema_json, quiet, detailed_errors, error_format,
+             semantic, check_links, strict):
+    """
+    Validate a markdown file against a JSON schema.
+
+    ENHANCED: Now includes semantic validation of x-markitect extensions:
+    - Section classifications (required, recommended, optional, discouraged, improper)
+    - Content patterns (required_patterns, forbidden_patterns)
+    - Quality metrics (min_words, max_words, min_sentences)
+    - Link validation (internal/external)
+
+    Examples:
+        # Structural + semantic validation (default)
+        markitect validate doc.md --schema manpage-schema-v1.0.md
+
+        # Only structural validation (classic mode)
+        markitect validate doc.md --schema schema.json --no-semantic
+
+        # With link checking
+        markitect validate doc.md --schema 1 --check-links
+
+        # Strict mode (warnings become errors)
+        markitect validate doc.md --schema manpage-schema-v1.0.md --strict
+    """
+    # Existing structural validation code...
+    # (Keep all existing logic for SchemaValidator)
+
+    # NEW: Add semantic validation if enabled and schema has x-markitect extensions
+    if semantic:
+        semantic_validator = SemanticValidator(schema_path)
+        semantic_report = semantic_validator.validate(file_path, check_links=check_links)
+
+        # Combine structural and semantic results
+        combined_report = CombinedValidationReport(structural_result, semantic_report)
+
+        # Output combined results
+        if not quiet:
+            click.echo(combined_report.format(error_format))
+
+        # Exit codes
+        if combined_report.has_errors():
+            sys.exit(1)
+        elif strict and combined_report.has_warnings():
+            sys.exit(1)
+```
+
+**Integration Strategy**:
+1. Keep existing structural validation (SchemaValidator) unchanged
+2. Add new semantic validation layer on top
+3. Use --no-semantic flag to disable new validation (backward compatibility)
+4. Combine structural + semantic results in unified report
+5. Default to semantic=True for new markdown schemas with extensions
+
+**Output Format** (text):
+```
+Validating: my-command.1.md
+Schema: manpage-schema-v1.0.md (v1.0.0)
+
+Section Validation:
+  ✅ SYNOPSIS - Present (required)
+  ✅ DESCRIPTION - Present (required)
+  ⚠️  EXAMPLES - Missing (recommended)
+  ❌ INTERNAL_NOTES - Must not appear (improper)
+
+Content Validation:
+  ✅ SYNOPSIS - Patterns matched
+  ⚠️  DESCRIPTION - Too short (35 words, minimum 50)
+  ❌ SYNOPSIS - Forbidden pattern found: "TODO"
+
+Link Validation: (skipped - use --check-links)
+
+Summary:
+  Errors: 2
+  Warnings: 2
+  Status: FAILED ❌
+
+Failed validations:
+  Line 12: INTERNAL_NOTES section must not appear in published manpages
+  Line 5: SYNOPSIS contains forbidden pattern "TODO"
+```
+
+### Phase 6: Batch Document Validation
+
+**New Command**: `markitect validate-batch`
+
+```python
+@cli.command('validate-batch')
+@click.argument('directory', type=click.Path(exists=True, file_okay=False))
+@click.option('--schema', '-s', type=str, required=True)
+@click.option('--pattern', default='*.md', help='File pattern to match')
+@click.option('--strict', is_flag=True)
+@click.option('--summary-only', is_flag=True, help='Show only summary table')
+@pass_config
+def validate_batch_cmd(config, directory, schema, pattern, strict, summary_only):
+    """Validate multiple documents in a directory.
+
+    Example:
+        markitect validate-batch docs/manpages/ --schema manpage-schema-v1.0.md
+    """
+    # Find all matching documents
+    docs = list(Path(directory).glob(pattern))
+
+    # Validate each
+    results = []
+    for doc in docs:
+        validator = DocumentValidator(schema)
+        report = validator.validate(doc)
+        results.append((doc.name, report))
+
+    # Show summary table
+    display_batch_results(results)
+```
+
+## Implementation Phases
+
+### Phase 1 (Core - 1 session)
+- DocumentValidator class
+- Basic section validation
+- CLI validate command
+- Simple text output format
+
+### Phase 2 (Content - 1 session)
+- ContentValidator with pattern matching
+- Word count validation
+- Quality metrics checking
+- Enhanced reporting
+
+### Phase 3 (Links - 1 session)
+- LinkValidator with internal link checking
+- Optional external link validation
+- Fragment validation
+- Performance optimization (caching)
+
+### Phase 4 (Polish - 1 session)
+- Batch validation support
+- JSON/table output formats
+- Integration tests
+- Documentation updates
+
+## Critical Files
+
+**New Files**:
+- `markitect/semantic_validator.py` - Main semantic validator (complements existing SchemaValidator)
+- `markitect/validators/section_validator.py` - Section classification enforcement
+- `markitect/validators/content_validator.py` - Content pattern matching and quality
+- `markitect/validators/link_validator.py` - Link validation
+- `markitect/validators/__init__.py` - Validators package
+- `tests/test_semantic_validator.py` - Semantic validator tests
+- `tests/validators/test_section_validator.py` - Section validator tests
+- `tests/validators/test_content_validator.py` - Content validator tests
+- `tests/validators/test_link_validator.py` - Link validator tests
+
+**Modified Files**:
+- `markitect/cli.py` (lines 1493-1600) - Enhance validate command with semantic validation
+- `markitect/schema_loader.py` - May need utility to extract x-markitect extensions
+- `docs/SCHEMA_MANAGEMENT_GUIDE.md` - Add semantic validation section
+- `examples/manpages/README.md` - Add validation examples
+- `examples/terminology/README.md` - Add validation examples
+
+**Reference Files** (unchanged, used for integration):
+- `markitect/validator.py` - Existing SchemaValidator for structural validation
+- `markitect/schema_analyzer.py` - Reference for schema extension parsing
+
+## Design Decisions
+
+### 1. Markdown Parsing
+**Decision**: Use existing markdown parser from markitect core
+**Rationale**: Already handles frontmatter, sections, AST generation
+
+### 2. Link Validation Default
+**Decision**: Internal links checked by default, external links opt-in
+**Rationale**: External link checking is slow (network requests), internal is fast
+
+### 3. Severity Levels
+**Decision**: ERROR (required violations), WARNING (recommended violations), INFO (suggestions)
+**Rationale**: Matches schema classification system semantics
+
+### 4. Exit Codes
+**Decision**: 0=success, 1=validation failed, 2=system error
+**Rationale**: Standard CLI conventions for CI/CD integration
+
+### 5. Pattern Syntax
+**Decision**: Use Python regex patterns directly
+**Rationale**: Schemas already use regex strings, no need for new syntax
+
+## Testing Strategy
+
+### Unit Tests
+- SectionValidator: Test all classification types
+- ContentValidator: Test pattern matching, word counts
+- LinkValidator: Test internal/external link checking
+- ValidationReport: Test formatting and aggregation
+
+### Integration Tests
+- Validate real manpage documents against manpage schema
+- Validate terminology documents against terminology schema
+- Test batch validation across multiple documents
+- Test CLI output formats
+
+### Edge Cases
+- Documents with no schema sections defined
+- Schemas with no content-control rules
+- Empty documents
+- Documents with malformed links
+- Unicode in patterns and content
+
+## User Workflows
+
+### Workflow 1: Validate Single Document
+```bash
+# Validate a manpage
+markitect validate my-command.1.md --schema manpage-schema-v1.0.md
+
+# With link checking
+markitect validate my-command.1.md --schema 1 --check-links
+```
+
+### Workflow 2: CI/CD Integration
+```bash
+#!/bin/bash
+# Validate all manpages in CI
+if ! markitect validate-batch docs/man/ --schema 1 --strict; then
+    echo "Manpage validation failed!"
+    exit 1
+fi
+```
+
+### Workflow 3: Pre-commit Hook
+```bash
+# .git/hooks/pre-commit
+files=$(git diff --cached --name-only --diff-filter=ACM | grep '\.1\.md$')
+for file in $files; do
+    if ! markitect validate "$file" --schema manpage-schema-v1.0.md; then
+        echo "Fix validation errors before committing"
+        exit 1
+    fi
+done
+```
+
+### Workflow 4: Interactive Editing
+```bash
+# Validate while editing
+watch -n 2 'markitect validate draft.md --schema api-documentation-schema-v1.0.md'
+```
+
+## Success Metrics
+
+1. **Core Functionality**: Can validate documents against all 4 production schemas
+2. **Classification Enforcement**: Required/improper sections properly checked
+3. **Pattern Matching**: Content patterns validated with regex
+4. **Performance**: Validate 100 documents in < 5 seconds (without link checking)
+5. **Test Coverage**: > 90% coverage for new validator modules
+6. **Documentation**: Complete examples for each schema type
+
+## Future Enhancements (Out of Scope)
+
+- Auto-fixing document validation errors
+- Suggestion engine for missing content
+- Readability scoring with specific algorithms
+- Image validation (size, format, accessibility)
+- Schema evolution analysis (breaking changes between versions)
+- Document-to-schema generation (inverse of current flow)
--- a/tests/test_semantic_validator.py
+++ b/tests/test_semantic_validator.py
@@ -0,0 +1,506 @@
+"""
+Tests for SemanticValidator.
+
+Tests semantic validation of markdown documents against x-markitect extensions.
+"""
+
+import pytest
+from pathlib import Path
+import tempfile
+import json
+
+from markitect.semantic_validator import (
+    SemanticValidator,
+    SemanticValidationReport,
+    load_schema_from_path
+)
+from markitect.validators.section_validator import (
+    SectionValidator,
+    SectionMissing,
+    SectionImproper
+)
+from markitect.validators.content_validator import (
+    ContentValidator,
+    PatternMissing,
+    ForbiddenPattern,
+    DiscouragedPattern,
+    ContentTooShort,
+    ContentTooLong
+)
+
+
+class TestSectionValidator:
+    """Test section validation functionality."""
+
+    def test_required_section_missing(self):
+        """Test that missing required sections are detected as errors."""
+        schema = {
+            'x-markitect-sections': {
+                'SYNOPSIS': {
+                    'classification': 'required',
+                    'heading_level': 2,
+                    'error_message': 'SYNOPSIS section is mandatory'
+                }
+            }
+        }
+
+        validator = SectionValidator(schema)
+
+        # Create a mock document without SYNOPSIS
+        class MockDocument:
+            def get_headings_by_level(self, level):
+                return ['DESCRIPTION', 'EXAMPLES']
+
+        doc = MockDocument()
+        result = validator.check(doc)
+
+        # Should have one error
+        assert not result.is_valid()
+        assert result.has_errors()
+        assert len(result.get_errors()) == 1
+
+        error = result.get_errors()[0]
+        assert isinstance(error, SectionMissing)
+        assert error.section_name == 'SYNOPSIS'
+        assert error.severity == 'ERROR'
+        assert 'mandatory' in error.message
+
+    def test_improper_section_present(self):
+        """Test that improper sections are detected as errors."""
+        schema = {
+            'x-markitect-sections': {
+                'INTERNAL_NOTES': {
+                    'classification': 'improper',
+                    'heading_level': 2,
+                    'error_message': 'Internal notes must not appear in published docs'
+                }
+            }
+        }
+
+        validator = SectionValidator(schema)
+
+        # Create a mock document with INTERNAL_NOTES
+        class MockDocument:
+            def get_headings_by_level(self, level):
+                return [
+                    {
+                        'content': 'INTERNAL_NOTES',
+                        'level': 2,
+                        'line_number': 25
+                    }
+                ]
+
+        doc = MockDocument()
+        result = validator.check(doc)
+
+        # Should have one error
+        assert not result.is_valid()
+        assert result.has_errors()
+        assert len(result.get_errors()) == 1
+
+        error = result.get_errors()[0]
+        assert isinstance(error, SectionImproper)
+        assert error.section_name == 'INTERNAL_NOTES'
+        assert error.severity == 'ERROR'
+        assert error.line_number == 25
+
+    def test_recommended_section_missing(self):
+        """Test that missing recommended sections generate warnings."""
+        schema = {
+            'x-markitect-sections': {
+                'EXAMPLES': {
+                    'classification': 'recommended',
+                    'heading_level': 2,
+                    'warning_if_missing': 'Examples improve documentation quality'
+                }
+            }
+        }
+
+        validator = SectionValidator(schema)
+
+        # Create a mock document without EXAMPLES
+        class MockDocument:
+            def get_headings_by_level(self, level):
+                return ['SYNOPSIS', 'DESCRIPTION']
+
+        doc = MockDocument()
+        result = validator.check(doc)
+
+        # Should pass validation (warnings don't fail)
+        assert result.is_valid()
+        assert not result.has_errors()
+        assert result.has_warnings()
+        assert len(result.get_warnings()) == 1
+
+        warning = result.get_warnings()[0]
+        assert warning.section_name == 'EXAMPLES'
+        assert warning.severity == 'WARNING'
+
+    def test_all_required_sections_present(self):
+        """Test that validation passes when all required sections present."""
+        schema = {
+            'x-markitect-sections': {
+                'SYNOPSIS': {
+                    'classification': 'required',
+                    'heading_level': 2
+                },
+                'DESCRIPTION': {
+                    'classification': 'required',
+                    'heading_level': 2
+                }
+            }
+        }
+
+        validator = SectionValidator(schema)
+
+        # Create a mock document with all required sections
+        class MockDocument:
+            def get_headings_by_level(self, level):
+                return [
+                    {'content': 'SYNOPSIS', 'level': 2},
+                    {'content': 'DESCRIPTION', 'level': 2},
+                    {'content': 'EXAMPLES', 'level': 2}
+                ]
+
+        doc = MockDocument()
+        result = validator.check(doc)
+
+        # Should pass
+        assert result.is_valid()
+        assert not result.has_errors()
+        assert not result.has_warnings()
+        assert len(result.issues) == 0
+
+    def test_section_alternatives(self):
+        """Test that alternative section names are recognized."""
+        schema = {
+            'x-markitect-sections': {
+                'OPTIONS': {
+                    'classification': 'required',
+                    'heading_level': 2,
+                    'alternatives': ['FLAGS', 'COMMAND OPTIONS']
+                }
+            }
+        }
+
+        validator = SectionValidator(schema)
+
+        # Document uses alternative name 'FLAGS'
+        class MockDocument:
+            def get_headings_by_level(self, level):
+                return [{'content': 'FLAGS', 'level': 2}]
+
+        doc = MockDocument()
+        result = validator.check(doc)
+
+        # Should pass (alternative is accepted)
+        assert result.is_valid()
+        assert not result.has_errors()
+
+
+class TestSemanticValidator:
+    """Test complete semantic validation."""
+
+    def test_validator_initialization(self):
+        """Test that validator initializes correctly."""
+        schema = {
+            '$schema': 'http://json-schema.org/draft-07/schema#',
+            'x-markitect-sections': {
+                'SYNOPSIS': {'classification': 'required', 'heading_level': 2}
+            }
+        }
+
+        validator = SemanticValidator(schema)
+
+        assert validator.schema == schema
+        assert validator.section_validator is not None
+
+    def test_validation_report_formatting(self):
+        """Test that validation reports format correctly."""
+        from markitect.validators.section_validator import (
+            SectionValidationResult,
+            SectionMissing
+        )
+
+        section_result = SectionValidationResult(
+            issues=[
+                SectionMissing(
+                    section_name='SYNOPSIS',
+                    severity='ERROR',
+                    message='SYNOPSIS is required',
+                    classification='required'
+                )
+            ],
+            sections_checked=2,
+            sections_found=1
+        )
+
+        report = SemanticValidationReport(section_result=section_result)
+
+        # Check report properties
+        assert report.has_errors()
+        assert not report.is_valid()
+
+        # Check text formatting
+        text = report.format_text()
+        assert 'Section Validation:' in text
+        assert 'SYNOPSIS' in text
+        assert 'Errors: 1' in text
+        assert 'FAILED' in text
+
+    def test_load_json_schema(self, tmp_path):
+        """Test loading a JSON schema file."""
+        schema_file = tmp_path / "test-schema.json"
+        schema_data = {
+            '$schema': 'http://json-schema.org/draft-07/schema#',
+            'title': 'Test Schema',
+            'x-markitect-sections': {
+                'SYNOPSIS': {'classification': 'required', 'heading_level': 2}
+            }
+        }
+
+        schema_file.write_text(json.dumps(schema_data, indent=2))
+
+        loaded_schema = load_schema_from_path(schema_file)
+
+        assert loaded_schema == schema_data
+        assert 'x-markitect-sections' in loaded_schema
+
+    def test_schema_not_found(self):
+        """Test that missing schema file raises error."""
+        with pytest.raises(FileNotFoundError):
+            load_schema_from_path('/nonexistent/schema.json')
+
+    def test_unsupported_schema_format(self, tmp_path):
+        """Test that unsupported format raises error."""
+        schema_file = tmp_path / "schema.xml"
+        schema_file.write_text('<schema></schema>')
+
+        with pytest.raises(ValueError, match="Unsupported schema format"):
+            load_schema_from_path(schema_file)
+
+
+class TestContentValidator:
+    """Test content validation functionality."""
+
+    def test_required_pattern_missing(self):
+        """Test that missing required patterns are detected."""
+        schema = {
+            'x-markitect-content-control': {
+                'synopsis': {
+                    'required_patterns': [
+                        r'\*\*[a-z][a-z0-9-]*\*\*'  # Bold command name
+                    ]
+                }
+            }
+        }
+
+        validator = ContentValidator(schema)
+
+        # Create mock document without bold command
+        class MockDocument:
+            def get_section(self, name):
+                if name == 'SYNOPSIS':
+                    return {
+                        'name': 'SYNOPSIS',
+                        'content': 'command [options] arguments'  # No bold
+                    }
+                return None
+
+        doc = MockDocument()
+        result = validator.check(doc)
+
+        # Should have one error
+        assert not result.is_valid()
+        assert result.has_errors()
+        assert len(result.get_errors()) == 1
+
+        error = result.get_errors()[0]
+        assert isinstance(error, PatternMissing)
+        assert error.section_name == 'SYNOPSIS'
+        assert error.severity == 'ERROR'
+
+    def test_forbidden_pattern_found(self):
+        """Test that forbidden patterns are detected."""
+        schema = {
+            'x-markitect-content-control': {
+                'description': {
+                    'forbidden_patterns': [
+                        r'\bTODO\b',
+                        r'\bFIXME\b'
+                    ]
+                }
+            }
+        }
+
+        validator = ContentValidator(schema)
+
+        # Create mock document with forbidden pattern
+        class MockDocument:
+            def get_section(self, name):
+                if name == 'DESCRIPTION':
+                    return {
+                        'name': 'DESCRIPTION',
+                        'content': 'This is a description. TODO: Add more details.'
+                    }
+                return None
+
+        doc = MockDocument()
+        result = validator.check(doc)
+
+        # Should have one error
+        assert not result.is_valid()
+        assert result.has_errors()
+        assert len(result.get_errors()) == 1
+
+        error = result.get_errors()[0]
+        assert isinstance(error, ForbiddenPattern)
+        assert error.section_name == 'DESCRIPTION'
+        assert 'TODO' in error.matched_text
+
+    def test_discouraged_pattern_warning(self):
+        """Test that discouraged patterns generate warnings."""
+        schema = {
+            'x-markitect-content-control': {
+                'description': {
+                    'discouraged_patterns': [
+                        r'\bWIP\b'
+                    ]
+                }
+            }
+        }
+
+        validator = ContentValidator(schema)
+
+        # Create mock document with discouraged pattern
+        class MockDocument:
+            def get_section(self, name):
+                if name == 'DESCRIPTION':
+                    return {
+                        'name': 'DESCRIPTION',
+                        'content': 'This is WIP content.'
+                    }
+                return None
+
+        doc = MockDocument()
+        result = validator.check(doc)
+
+        # Should pass (warnings don't fail)
+        assert result.is_valid()
+        assert not result.has_errors()
+        assert result.has_warnings()
+
+        warning = result.get_warnings()[0]
+        assert isinstance(warning, DiscouragedPattern)
+        assert warning.severity == 'WARNING'
+
+    def test_content_too_short(self):
+        """Test word count validation - too short."""
+        schema = {
+            'x-markitect-content-control': {
+                'description': {
+                    'content_quality': {
+                        'min_words': 50,
+                        'max_words': 1000
+                    }
+                }
+            }
+        }
+
+        validator = ContentValidator(schema)
+
+        # Create mock document with short content
+        class MockDocument:
+            def get_section(self, name):
+                if name == 'DESCRIPTION':
+                    return {
+                        'name': 'DESCRIPTION',
+                        'content': 'Short description.'  # Only 2 words
+                    }
+                return None
+
+        doc = MockDocument()
+        result = validator.check(doc)
+
+        # Should have warning
+        assert result.is_valid()  # Warnings don't fail
+        assert result.has_warnings()
+
+        warning = result.get_warnings()[0]
+        assert isinstance(warning, ContentTooShort)
+        assert warning.actual == 2
+        assert warning.required == 50
+
+    def test_content_too_long(self):
+        """Test word count validation - too long."""
+        schema = {
+            'x-markitect-content-control': {
+                'synopsis': {
+                    'content_quality': {
+                        'min_words': 5,
+                        'max_words': 20
+                    }
+                }
+            }
+        }
+
+        validator = ContentValidator(schema)
+
+        # Create mock document with long content
+        class MockDocument:
+            def get_section(self, name):
+                if name == 'SYNOPSIS':
+                    return {
+                        'name': 'SYNOPSIS',
+                        'content': ' '.join(['word'] * 50)  # 50 words
+                    }
+                return None
+
+        doc = MockDocument()
+        result = validator.check(doc)
+
+        # Should have warning
+        assert result.is_valid()
+        assert result.has_warnings()
+
+        warning = result.get_warnings()[0]
+        assert isinstance(warning, ContentTooLong)
+        assert warning.actual == 50
+        assert warning.limit == 20
+
+    def test_all_content_requirements_met(self):
+        """Test that validation passes when all requirements met."""
+        schema = {
+            'x-markitect-content-control': {
+                'synopsis': {
+                    'required_patterns': [
+                        r'\*\*[a-z]+\*\*'
+                    ],
+                    'content_quality': {
+                        'min_words': 5,
+                        'max_words': 50
+                    }
+                }
+            }
+        }
+
+        validator = ContentValidator(schema)
+
+        # Create valid document
+        class MockDocument:
+            def get_section(self, name):
+                if name == 'SYNOPSIS':
+                    return {
+                        'name': 'SYNOPSIS',
+                        'content': '**command** [options] arguments and more words here'
+                    }
+                return None
+
+        doc = MockDocument()
+        result = validator.check(doc)
+
+        # Should pass
+        assert result.is_valid()
+        assert not result.has_errors()
+        assert not result.has_warnings()
+        assert len(result.issues) == 0