markitect-main/tests/test_issue_52_heading_text_capture.py

"""
Tests for Issue #52: Capture actual heading text in schemas

This test module implements comprehensive tests for capturing actual heading text
from documents and enforcing specific heading text requirements in validation.

Following TDD8 methodology - these tests are written before implementation.
"""

import json
import pytest
from pathlib import Path
from tempfile import NamedTemporaryFile
from click.testing import CliRunner

from markitect.cli import cli
from markitect.schema_generator import SchemaGenerator
from markitect.schema_validator import SchemaValidator
from markitect.exceptions import FileNotFoundError


class TestIssue52HeadingTextCapture:
    """Test suite for heading text capture functionality."""

    def setup_method(self):
        """Set up test fixtures."""
        self.schema_generator = SchemaGenerator()
        self.schema_validator = SchemaValidator()
        self.runner = CliRunner()

    def test_schema_generation_with_heading_text_capture_option(self):
        """Test that schema generation can capture exact heading text as constraints."""
        # Arrange
        markdown_content = """# Architecture Overview
This document describes the system architecture.

## System Design
The core system design principles.

## Implementation Strategy
How we will implement the system.
"""

        with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(markdown_content)
            temp_file = Path(f.name)

        try:
            # Act - Generate schema with heading text capture enabled
            schema = self.schema_generator.generate_schema_from_file(
                temp_file,
                capture_heading_text=True
            )

            # Assert - Schema should contain exact heading text as constraints
            assert "properties" in schema
            assert "headings" in schema["properties"]

            headings = schema["properties"]["headings"]["properties"]

            # Level 1 heading should have exact text constraint
            level_1 = headings["level_1"]
            assert level_1["items"]["properties"]["content"]["enum"] == ["Architecture Overview"]

            # Level 2 headings should have exact text constraints
            level_2 = headings["level_2"]
            expected_level_2_texts = ["System Design", "Implementation Strategy"]
            assert level_2["items"]["properties"]["content"]["enum"] == expected_level_2_texts

        finally:
            temp_file.unlink()

    def test_cli_schema_generate_with_capture_heading_text_option(self):
        """Test CLI supports --capture-heading-text option."""
        # Arrange
        markdown_content = """# Project Documentation

## Overview
Project overview section.

## Requirements
Project requirements section.
"""

        with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(markdown_content)
            temp_file = Path(f.name)

        try:
            # Act
            result = self.runner.invoke(cli, [
                'schema-generate',
                '--capture-heading-text',
                str(temp_file)
            ])

            # Assert
            assert result.exit_code == 0
            schema = json.loads(result.output)

            # Check heading text constraints are present
            headings = schema["properties"]["headings"]["properties"]
            level_1 = headings["level_1"]
            assert "enum" in level_1["items"]["properties"]["content"]
            assert level_1["items"]["properties"]["content"]["enum"] == ["Project Documentation"]

        finally:
            temp_file.unlink()

    def test_schema_validation_enforces_exact_heading_text(self):
        """Test that validation enforces specific heading text requirements."""
        # Arrange
        original_content = """# Architecture Overview
System architecture description.

## System Design
Core design principles.
"""

        wrong_heading_content = """# Different Title
System architecture description.

## System Design
Core design principles.
"""

        with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(original_content)
            original_file = Path(f.name)

        with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(wrong_heading_content)
            wrong_file = Path(f.name)

        try:
            # Generate schema with heading text capture
            schema = self.schema_generator.generate_schema_from_file(
                original_file,
                capture_heading_text=True
            )

            # Act & Assert - Original should validate
            result1 = self.schema_validator.validate_file_against_schema(original_file, schema)
            assert result1 is True, "Original document should validate against its own schema"

            # Act & Assert - Wrong heading text should fail validation
            result2 = self.schema_validator.validate_file_against_schema(wrong_file, schema)
            assert result2 is False, "Document with wrong heading text should fail validation"

        finally:
            original_file.unlink()
            wrong_file.unlink()

    def test_schema_includes_heading_text_capture_metaschema_extension(self):
        """Test that schemas with heading text capture include metaschema extension."""
        # Arrange
        markdown_content = """# Test Document

## Section A
Content for section A.
"""

        with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(markdown_content)
            temp_file = Path(f.name)

        try:
            # Act
            schema = self.schema_generator.generate_schema_from_file(
                temp_file,
                capture_heading_text=True
            )

            # Assert - Should have metaschema extension
            assert "x-markitect-heading-text-capture" in schema
            assert schema["x-markitect-heading-text-capture"] is True

        finally:
            temp_file.unlink()

    def test_outline_mode_with_heading_text_capture_integration(self):
        """Test that outline mode can be combined with heading text capture."""
        # Arrange
        markdown_content = """# Main Document

## Introduction
Introduction content.

### Details
Detailed information.

## Conclusion
Conclusion content.
"""

        with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(markdown_content)
            temp_file = Path(f.name)

        try:
            # Act
            result = self.runner.invoke(cli, [
                'schema-generate',
                '--mode', 'outline',
                '--capture-heading-text',
                '--depth', '2',
                str(temp_file)
            ])

            # Assert
            assert result.exit_code == 0
            schema = json.loads(result.output)

            # Should have both outline mode and heading text capture extensions
            assert schema.get("x-markitect-outline-mode") is True
            assert schema.get("x-markitect-heading-text-capture") is True

            # Should only include headings up to depth 2
            headings = schema["properties"]["headings"]["properties"]
            assert "level_1" in headings
            assert "level_2" in headings
            assert "level_3" not in headings

            # Should have exact heading text constraints
            level_1 = headings["level_1"]
            assert level_1["items"]["properties"]["content"]["enum"] == ["Main Document"]

        finally:
            temp_file.unlink()

    def test_backward_compatibility_without_heading_text_capture(self):
        """Test that existing behavior is maintained when heading text capture is not enabled."""
        # Arrange
        markdown_content = """# Test Document

## Section One
Content here.
"""

        with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(markdown_content)
            temp_file = Path(f.name)

        try:
            # Act - Generate schema without heading text capture (default behavior)
            schema = self.schema_generator.generate_schema_from_file(temp_file)

            # Assert - Should NOT have enum constraints on heading content
            headings = schema["properties"]["headings"]["properties"]
            level_1 = headings["level_1"]

            # Should have string type but no enum constraint
            assert level_1["items"]["properties"]["content"]["type"] == "string"
            assert "enum" not in level_1["items"]["properties"]["content"]

            # Should NOT have heading text capture extension
            assert "x-markitect-heading-text-capture" not in schema

        finally:
            temp_file.unlink()

    def test_validation_error_messages_for_heading_text_mismatches(self):
        """Test that validation provides meaningful error messages for heading text mismatches."""
        # Arrange
        original_content = """# Expected Title

## Expected Section
Content here.
"""

        wrong_content = """# Wrong Title

## Wrong Section
Content here.
"""

        with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(original_content)
            original_file = Path(f.name)

        with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(wrong_content)
            wrong_file = Path(f.name)

        try:
            # Generate schema with heading text capture
            schema = self.schema_generator.generate_schema_from_file(
                original_file,
                capture_heading_text=True
            )

            # Act - Validate with detailed errors
            error_collector = self.schema_validator.validate_file_with_errors(wrong_file, schema)

            # Assert - Should have specific errors about heading text mismatches
            errors = error_collector.errors
            assert len(errors) > 0

            # Look for heading text mismatch errors
            heading_errors = [e for e in errors if "heading" in e.message.lower()]
            assert len(heading_errors) > 0

            # Should mention expected vs actual heading text
            error_text = " ".join([e.message for e in heading_errors])
            assert "Expected Title" in error_text or "Wrong Title" in error_text

        finally:
            original_file.unlink()
            wrong_file.unlink()

    def test_schema_generation_preserves_heading_order_in_constraints(self):
        """Test that heading text constraints preserve the order of headings."""
        # Arrange
        markdown_content = """# First Document

## Beta Section
Second section alphabetically.

## Alpha Section
First section alphabetically.

## Gamma Section
Third section alphabetically.
"""

        with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(markdown_content)
            temp_file = Path(f.name)

        try:
            # Act
            schema = self.schema_generator.generate_schema_from_file(
                temp_file,
                capture_heading_text=True
            )

            # Assert - Level 2 headings should preserve document order, not alphabetical
            level_2 = schema["properties"]["headings"]["properties"]["level_2"]
            expected_order = ["Beta Section", "Alpha Section", "Gamma Section"]
            assert level_2["items"]["properties"]["content"]["enum"] == expected_order

        finally:
            temp_file.unlink()

    def test_cli_help_includes_capture_heading_text_option(self):
        """Test that CLI help includes documentation for the new option."""
        # Act
        result = self.runner.invoke(cli, ['schema-generate', '--help'])

        # Assert
        assert result.exit_code == 0
        help_text = result.output
        assert "--capture-heading-text" in help_text
        assert "exact heading text" in help_text or "heading text constraints" in help_text

    def test_empty_document_with_heading_text_capture(self):
        """Test that heading text capture handles documents with no headings gracefully."""
        # Arrange
        markdown_content = """This is a document with no headings.

Just some regular paragraphs here.
"""

        with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(markdown_content)
            temp_file = Path(f.name)

        try:
            # Act
            schema = self.schema_generator.generate_schema_from_file(
                temp_file,
                capture_heading_text=True
            )

            # Assert - Should generate valid schema even with no headings
            assert "properties" in schema
            # Should still have the metaschema extension
            assert schema.get("x-markitect-heading-text-capture") is True

        finally:
            temp_file.unlink()