markitect-main/markitect/validators/section_validator.py

"""
Section Validator for markdown documents.

Validates that document sections comply with x-markitect-sections classifications:
- REQUIRED: Section must be present (ERROR if missing)
- RECOMMENDED: Section should be present (WARNING if missing)
- OPTIONAL: Section may be present (no check)
- DISCOURAGED: Section should not be present (WARNING if present)
- IMPROPER: Section must not be present (ERROR if present)
"""

from dataclasses import dataclass
from typing import List, Dict, Any, Optional
from pathlib import Path


@dataclass
class SectionIssue:
    """Base class for section validation issues."""
    section_name: str
    severity: str  # 'ERROR', 'WARNING', 'INFO'
    message: str
    classification: str  # 'required', 'recommended', etc.
    line_number: Optional[int] = None

    def __str__(self) -> str:
        location = f" (line {self.line_number})" if self.line_number else ""
        return f"[{self.severity}]{location} {self.section_name}: {self.message}"


@dataclass
class SectionMissing(SectionIssue):
    """Section is missing from document."""
    pass


@dataclass
class SectionImproper(SectionIssue):
    """Improper section found in document."""
    pass


@dataclass
class SectionDiscouraged(SectionIssue):
    """Discouraged section found in document."""
    pass


@dataclass
class SectionValidationResult:
    """Result of section validation."""
    issues: List[SectionIssue]
    sections_checked: int
    sections_found: int

    def has_errors(self) -> bool:
        """Check if there are any ERROR-level issues."""
        return any(issue.severity == 'ERROR' for issue in self.issues)

    def has_warnings(self) -> bool:
        """Check if there are any WARNING-level issues."""
        return any(issue.severity == 'WARNING' for issue in self.issues)

    def is_valid(self) -> bool:
        """Check if validation passed (no errors)."""
        return not self.has_errors()

    def get_errors(self) -> List[SectionIssue]:
        """Get all ERROR-level issues."""
        return [issue for issue in self.issues if issue.severity == 'ERROR']

    def get_warnings(self) -> List[SectionIssue]:
        """Get all WARNING-level issues."""
        return [issue for issue in self.issues if issue.severity == 'WARNING']


class SectionValidator:
    """
    Validates section presence and classification compliance.

    Checks that markdown documents have the correct sections based on
    x-markitect-sections classifications in the schema.
    """

    def __init__(self, schema: Dict[str, Any]):
        """
        Initialize validator with a schema.

        Args:
            schema: JSON schema with x-markitect-sections extension
        """
        self.schema = schema
        self.sections_spec = schema.get('x-markitect-sections', {})

    def check(self, document: 'MarkdownDocument') -> SectionValidationResult:
        """
        Validate section presence against schema classifications.

        Args:
            document: Parsed markdown document

        Returns:
            SectionValidationResult with any issues found
        """
        issues = []

        # Get level-2 headings (main sections) from document
        doc_sections = self._get_document_sections(document)

        # Check each specification
        for section_name, spec in self.sections_spec.items():
            classification = spec.get('classification')
            section_in_doc = self._find_section(section_name, doc_sections, spec)

            if classification == 'required':
                if not section_in_doc:
                    issues.append(SectionMissing(
                        section_name=section_name,
                        severity='ERROR',
                        message=spec.get('error_message', f'{section_name} section is required'),
                        classification='required'
                    ))

            elif classification == 'improper':
                if section_in_doc:
                    issues.append(SectionImproper(
                        section_name=section_name,
                        severity='ERROR',
                        message=spec.get('error_message', f'{section_name} section must not appear'),
                        classification='improper',
                        line_number=section_in_doc.get('line_number')
                    ))

            elif classification == 'recommended':
                if not section_in_doc:
                    issues.append(SectionMissing(
                        section_name=section_name,
                        severity='WARNING',
                        message=spec.get('warning_if_missing', f'{section_name} section is recommended'),
                        classification='recommended'
                    ))

            elif classification == 'discouraged':
                if section_in_doc:
                    issues.append(SectionDiscouraged(
                        section_name=section_name,
                        severity='WARNING',
                        message=spec.get('warning_if_present', f'{section_name} section is discouraged'),
                        classification='discouraged',
                        line_number=section_in_doc.get('line_number')
                    ))

        return SectionValidationResult(
            issues=issues,
            sections_checked=len(self.sections_spec),
            sections_found=len(doc_sections)
        )

    def _get_document_sections(self, document: 'MarkdownDocument') -> List[Dict[str, Any]]:
        """
        Extract level-2 headings from document.

        Args:
            document: Parsed markdown document

        Returns:
            List of section dicts with name and line_number
        """
        sections = []

        # Get headings from document
        if hasattr(document, 'get_headings_by_level'):
            level_2_headings = document.get_headings_by_level(2)
        elif hasattr(document, 'headings'):
            level_2_headings = [
                h for h in document.headings
                if h.get('level') == 2
            ]
        else:
            # Fallback: parse from AST
            level_2_headings = []

        for heading in level_2_headings:
            if isinstance(heading, dict):
                sections.append({
                    'name': heading.get('content', '').strip().upper(),
                    'line_number': heading.get('line_number')
                })
            elif isinstance(heading, str):
                sections.append({
                    'name': heading.strip().upper(),
                    'line_number': None
                })

        return sections

    def _find_section(self, section_name: str, doc_sections: List[Dict[str, Any]],
                     spec: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        """
        Find a section in document, checking alternatives.

        Args:
            section_name: Primary section name to find
            doc_sections: List of sections in document
            spec: Section specification with potential alternatives

        Returns:
            Section dict if found, None otherwise
        """
        # Normalize section name for comparison
        normalized_name = section_name.upper().strip()

        # Check primary name
        for section in doc_sections:
            if section['name'] == normalized_name:
                return section

        # Check alternatives
        alternatives = spec.get('alternatives', [])
        for alt_name in alternatives:
            normalized_alt = alt_name.upper().strip()
            for section in doc_sections:
                if section['name'] == normalized_alt:
                    return section

        return None