""" Tests for Issue #52: Capture actual heading text in schemas This test module implements comprehensive tests for capturing actual heading text from documents and enforcing specific heading text requirements in validation. Following TDD8 methodology - these tests are written before implementation. """ import json import pytest from pathlib import Path from tempfile import NamedTemporaryFile from click.testing import CliRunner from markitect.cli import cli from markitect.schema_generator import SchemaGenerator from markitect.schema_validator import SchemaValidator from markitect.exceptions import FileNotFoundError class TestIssue52HeadingTextCapture: """Test suite for heading text capture functionality.""" def setup_method(self): """Set up test fixtures.""" self.schema_generator = SchemaGenerator() self.schema_validator = SchemaValidator() self.runner = CliRunner() def test_schema_generation_with_heading_text_capture_option(self): """Test that schema generation can capture exact heading text as constraints.""" # Arrange markdown_content = """# Architecture Overview This document describes the system architecture. ## System Design The core system design principles. ## Implementation Strategy How we will implement the system. """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(markdown_content) temp_file = Path(f.name) try: # Act - Generate schema with heading text capture enabled schema = self.schema_generator.generate_schema_from_file( temp_file, capture_heading_text=True ) # Assert - Schema should contain exact heading text as constraints assert "properties" in schema assert "headings" in schema["properties"] headings = schema["properties"]["headings"]["properties"] # Level 1 heading should have exact text constraint level_1 = headings["level_1"] assert level_1["items"]["properties"]["content"]["enum"] == ["Architecture Overview"] # Level 2 headings should have exact text constraints level_2 = headings["level_2"] expected_level_2_texts = ["System Design", "Implementation Strategy"] assert level_2["items"]["properties"]["content"]["enum"] == expected_level_2_texts finally: temp_file.unlink() def test_cli_schema_generate_with_capture_heading_text_option(self): """Test CLI supports --capture-heading-text option.""" # Arrange markdown_content = """# Project Documentation ## Overview Project overview section. ## Requirements Project requirements section. """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(markdown_content) temp_file = Path(f.name) try: # Act result = self.runner.invoke(cli, [ 'schema-generate', '--capture-heading-text', str(temp_file) ]) # Assert assert result.exit_code == 0 schema = json.loads(result.output) # Check heading text constraints are present headings = schema["properties"]["headings"]["properties"] level_1 = headings["level_1"] assert "enum" in level_1["items"]["properties"]["content"] assert level_1["items"]["properties"]["content"]["enum"] == ["Project Documentation"] finally: temp_file.unlink() def test_schema_validation_enforces_exact_heading_text(self): """Test that validation enforces specific heading text requirements.""" # Arrange original_content = """# Architecture Overview System architecture description. ## System Design Core design principles. """ wrong_heading_content = """# Different Title System architecture description. ## System Design Core design principles. """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(original_content) original_file = Path(f.name) with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(wrong_heading_content) wrong_file = Path(f.name) try: # Generate schema with heading text capture schema = self.schema_generator.generate_schema_from_file( original_file, capture_heading_text=True ) # Act & Assert - Original should validate result1 = self.schema_validator.validate_file_against_schema(original_file, schema) assert result1 is True, "Original document should validate against its own schema" # Act & Assert - Wrong heading text should fail validation result2 = self.schema_validator.validate_file_against_schema(wrong_file, schema) assert result2 is False, "Document with wrong heading text should fail validation" finally: original_file.unlink() wrong_file.unlink() def test_schema_includes_heading_text_capture_metaschema_extension(self): """Test that schemas with heading text capture include metaschema extension.""" # Arrange markdown_content = """# Test Document ## Section A Content for section A. """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(markdown_content) temp_file = Path(f.name) try: # Act schema = self.schema_generator.generate_schema_from_file( temp_file, capture_heading_text=True ) # Assert - Should have metaschema extension assert "x-markitect-heading-text-capture" in schema assert schema["x-markitect-heading-text-capture"] is True finally: temp_file.unlink() def test_outline_mode_with_heading_text_capture_integration(self): """Test that outline mode can be combined with heading text capture.""" # Arrange markdown_content = """# Main Document ## Introduction Introduction content. ### Details Detailed information. ## Conclusion Conclusion content. """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(markdown_content) temp_file = Path(f.name) try: # Act result = self.runner.invoke(cli, [ 'schema-generate', '--mode', 'outline', '--capture-heading-text', '--depth', '2', str(temp_file) ]) # Assert assert result.exit_code == 0 schema = json.loads(result.output) # Should have both outline mode and heading text capture extensions assert schema.get("x-markitect-outline-mode") is True assert schema.get("x-markitect-heading-text-capture") is True # Should only include headings up to depth 2 headings = schema["properties"]["headings"]["properties"] assert "level_1" in headings assert "level_2" in headings assert "level_3" not in headings # Should have exact heading text constraints level_1 = headings["level_1"] assert level_1["items"]["properties"]["content"]["enum"] == ["Main Document"] finally: temp_file.unlink() def test_backward_compatibility_without_heading_text_capture(self): """Test that existing behavior is maintained when heading text capture is not enabled.""" # Arrange markdown_content = """# Test Document ## Section One Content here. """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(markdown_content) temp_file = Path(f.name) try: # Act - Generate schema without heading text capture (default behavior) schema = self.schema_generator.generate_schema_from_file(temp_file) # Assert - Should NOT have enum constraints on heading content headings = schema["properties"]["headings"]["properties"] level_1 = headings["level_1"] # Should have string type but no enum constraint assert level_1["items"]["properties"]["content"]["type"] == "string" assert "enum" not in level_1["items"]["properties"]["content"] # Should NOT have heading text capture extension assert "x-markitect-heading-text-capture" not in schema finally: temp_file.unlink() def test_validation_error_messages_for_heading_text_mismatches(self): """Test that validation provides meaningful error messages for heading text mismatches.""" # Arrange original_content = """# Expected Title ## Expected Section Content here. """ wrong_content = """# Wrong Title ## Wrong Section Content here. """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(original_content) original_file = Path(f.name) with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(wrong_content) wrong_file = Path(f.name) try: # Generate schema with heading text capture schema = self.schema_generator.generate_schema_from_file( original_file, capture_heading_text=True ) # Act - Validate with detailed errors error_collector = self.schema_validator.validate_file_with_errors(wrong_file, schema) # Assert - Should have specific errors about heading text mismatches errors = error_collector.errors assert len(errors) > 0 # Look for heading text mismatch errors heading_errors = [e for e in errors if "heading" in e.message.lower()] assert len(heading_errors) > 0 # Should mention expected vs actual heading text error_text = " ".join([e.message for e in heading_errors]) assert "Expected Title" in error_text or "Wrong Title" in error_text finally: original_file.unlink() wrong_file.unlink() def test_schema_generation_preserves_heading_order_in_constraints(self): """Test that heading text constraints preserve the order of headings.""" # Arrange markdown_content = """# First Document ## Beta Section Second section alphabetically. ## Alpha Section First section alphabetically. ## Gamma Section Third section alphabetically. """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(markdown_content) temp_file = Path(f.name) try: # Act schema = self.schema_generator.generate_schema_from_file( temp_file, capture_heading_text=True ) # Assert - Level 2 headings should preserve document order, not alphabetical level_2 = schema["properties"]["headings"]["properties"]["level_2"] expected_order = ["Beta Section", "Alpha Section", "Gamma Section"] assert level_2["items"]["properties"]["content"]["enum"] == expected_order finally: temp_file.unlink() def test_cli_help_includes_capture_heading_text_option(self): """Test that CLI help includes documentation for the new option.""" # Act result = self.runner.invoke(cli, ['schema-generate', '--help']) # Assert assert result.exit_code == 0 help_text = result.output assert "--capture-heading-text" in help_text assert "exact heading text" in help_text or "heading text constraints" in help_text def test_empty_document_with_heading_text_capture(self): """Test that heading text capture handles documents with no headings gracefully.""" # Arrange markdown_content = """This is a document with no headings. Just some regular paragraphs here. """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(markdown_content) temp_file = Path(f.name) try: # Act schema = self.schema_generator.generate_schema_from_file( temp_file, capture_heading_text=True ) # Assert - Should generate valid schema even with no headings assert "properties" in schema # Should still have the metaschema extension assert schema.get("x-markitect-heading-text-capture") is True finally: temp_file.unlink()