Implement comprehensive heading text capture functionality that allows schemas to enforce specific heading text requirements through enum constraints: • New CLI option: --capture-heading-text flag for exact text constraints • Schema generation with heading text as enum constraints (not just structure) • Advanced validation engine that enforces heading text requirements • Metaschema extension: x-markitect-heading-text-capture marker • Full integration with Issue #51 outline mode capabilities • Comprehensive error reporting for heading text mismatches • Complete backward compatibility with existing schema generation Technical implementation: - Extended SchemaGenerator with capture_heading_text parameter - Enhanced validation system to check enum constraints on heading content - Added _validate_heading_text_constraints_with_errors for detailed reporting - Integrated with existing metaschema validation from Issue #50 - Preserved document order of headings in enum constraints Key features: - Schemas can now specify required heading text via enum constraints - Validation rejects documents with incorrect heading text - Detailed error messages show expected vs actual heading text - Works seamlessly with outline mode depth controls - Maintains 100% compatibility with 513 existing tests Usage examples: markitect schema-generate --capture-heading-text document.md markitect schema-generate --mode outline --capture-heading-text --depth 2 document.md 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
381 lines
13 KiB
Python
381 lines
13 KiB
Python
"""
|
|
Tests for Issue #52: Capture actual heading text in schemas
|
|
|
|
This test module implements comprehensive tests for capturing actual heading text
|
|
from documents and enforcing specific heading text requirements in validation.
|
|
|
|
Following TDD8 methodology - these tests are written before implementation.
|
|
"""
|
|
|
|
import json
|
|
import pytest
|
|
from pathlib import Path
|
|
from tempfile import NamedTemporaryFile
|
|
from click.testing import CliRunner
|
|
|
|
from markitect.cli import cli
|
|
from markitect.schema_generator import SchemaGenerator
|
|
from markitect.schema_validator import SchemaValidator
|
|
from markitect.exceptions import FileNotFoundError
|
|
|
|
|
|
class TestIssue52HeadingTextCapture:
|
|
"""Test suite for heading text capture functionality."""
|
|
|
|
def setup_method(self):
|
|
"""Set up test fixtures."""
|
|
self.schema_generator = SchemaGenerator()
|
|
self.schema_validator = SchemaValidator()
|
|
self.runner = CliRunner()
|
|
|
|
def test_schema_generation_with_heading_text_capture_option(self):
|
|
"""Test that schema generation can capture exact heading text as constraints."""
|
|
# Arrange
|
|
markdown_content = """# Architecture Overview
|
|
This document describes the system architecture.
|
|
|
|
## System Design
|
|
The core system design principles.
|
|
|
|
## Implementation Strategy
|
|
How we will implement the system.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act - Generate schema with heading text capture enabled
|
|
schema = self.schema_generator.generate_schema_from_file(
|
|
temp_file,
|
|
capture_heading_text=True
|
|
)
|
|
|
|
# Assert - Schema should contain exact heading text as constraints
|
|
assert "properties" in schema
|
|
assert "headings" in schema["properties"]
|
|
|
|
headings = schema["properties"]["headings"]["properties"]
|
|
|
|
# Level 1 heading should have exact text constraint
|
|
level_1 = headings["level_1"]
|
|
assert level_1["items"]["properties"]["content"]["enum"] == ["Architecture Overview"]
|
|
|
|
# Level 2 headings should have exact text constraints
|
|
level_2 = headings["level_2"]
|
|
expected_level_2_texts = ["System Design", "Implementation Strategy"]
|
|
assert level_2["items"]["properties"]["content"]["enum"] == expected_level_2_texts
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_cli_schema_generate_with_capture_heading_text_option(self):
|
|
"""Test CLI supports --capture-heading-text option."""
|
|
# Arrange
|
|
markdown_content = """# Project Documentation
|
|
|
|
## Overview
|
|
Project overview section.
|
|
|
|
## Requirements
|
|
Project requirements section.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act
|
|
result = self.runner.invoke(cli, [
|
|
'schema-generate',
|
|
'--capture-heading-text',
|
|
str(temp_file)
|
|
])
|
|
|
|
# Assert
|
|
assert result.exit_code == 0
|
|
schema = json.loads(result.output)
|
|
|
|
# Check heading text constraints are present
|
|
headings = schema["properties"]["headings"]["properties"]
|
|
level_1 = headings["level_1"]
|
|
assert "enum" in level_1["items"]["properties"]["content"]
|
|
assert level_1["items"]["properties"]["content"]["enum"] == ["Project Documentation"]
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_schema_validation_enforces_exact_heading_text(self):
|
|
"""Test that validation enforces specific heading text requirements."""
|
|
# Arrange
|
|
original_content = """# Architecture Overview
|
|
System architecture description.
|
|
|
|
## System Design
|
|
Core design principles.
|
|
"""
|
|
|
|
wrong_heading_content = """# Different Title
|
|
System architecture description.
|
|
|
|
## System Design
|
|
Core design principles.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(original_content)
|
|
original_file = Path(f.name)
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(wrong_heading_content)
|
|
wrong_file = Path(f.name)
|
|
|
|
try:
|
|
# Generate schema with heading text capture
|
|
schema = self.schema_generator.generate_schema_from_file(
|
|
original_file,
|
|
capture_heading_text=True
|
|
)
|
|
|
|
# Act & Assert - Original should validate
|
|
result1 = self.schema_validator.validate_file_against_schema(original_file, schema)
|
|
assert result1 is True, "Original document should validate against its own schema"
|
|
|
|
# Act & Assert - Wrong heading text should fail validation
|
|
result2 = self.schema_validator.validate_file_against_schema(wrong_file, schema)
|
|
assert result2 is False, "Document with wrong heading text should fail validation"
|
|
|
|
finally:
|
|
original_file.unlink()
|
|
wrong_file.unlink()
|
|
|
|
def test_schema_includes_heading_text_capture_metaschema_extension(self):
|
|
"""Test that schemas with heading text capture include metaschema extension."""
|
|
# Arrange
|
|
markdown_content = """# Test Document
|
|
|
|
## Section A
|
|
Content for section A.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act
|
|
schema = self.schema_generator.generate_schema_from_file(
|
|
temp_file,
|
|
capture_heading_text=True
|
|
)
|
|
|
|
# Assert - Should have metaschema extension
|
|
assert "x-markitect-heading-text-capture" in schema
|
|
assert schema["x-markitect-heading-text-capture"] is True
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_outline_mode_with_heading_text_capture_integration(self):
|
|
"""Test that outline mode can be combined with heading text capture."""
|
|
# Arrange
|
|
markdown_content = """# Main Document
|
|
|
|
## Introduction
|
|
Introduction content.
|
|
|
|
### Details
|
|
Detailed information.
|
|
|
|
## Conclusion
|
|
Conclusion content.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act
|
|
result = self.runner.invoke(cli, [
|
|
'schema-generate',
|
|
'--mode', 'outline',
|
|
'--capture-heading-text',
|
|
'--depth', '2',
|
|
str(temp_file)
|
|
])
|
|
|
|
# Assert
|
|
assert result.exit_code == 0
|
|
schema = json.loads(result.output)
|
|
|
|
# Should have both outline mode and heading text capture extensions
|
|
assert schema.get("x-markitect-outline-mode") is True
|
|
assert schema.get("x-markitect-heading-text-capture") is True
|
|
|
|
# Should only include headings up to depth 2
|
|
headings = schema["properties"]["headings"]["properties"]
|
|
assert "level_1" in headings
|
|
assert "level_2" in headings
|
|
assert "level_3" not in headings
|
|
|
|
# Should have exact heading text constraints
|
|
level_1 = headings["level_1"]
|
|
assert level_1["items"]["properties"]["content"]["enum"] == ["Main Document"]
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_backward_compatibility_without_heading_text_capture(self):
|
|
"""Test that existing behavior is maintained when heading text capture is not enabled."""
|
|
# Arrange
|
|
markdown_content = """# Test Document
|
|
|
|
## Section One
|
|
Content here.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act - Generate schema without heading text capture (default behavior)
|
|
schema = self.schema_generator.generate_schema_from_file(temp_file)
|
|
|
|
# Assert - Should NOT have enum constraints on heading content
|
|
headings = schema["properties"]["headings"]["properties"]
|
|
level_1 = headings["level_1"]
|
|
|
|
# Should have string type but no enum constraint
|
|
assert level_1["items"]["properties"]["content"]["type"] == "string"
|
|
assert "enum" not in level_1["items"]["properties"]["content"]
|
|
|
|
# Should NOT have heading text capture extension
|
|
assert "x-markitect-heading-text-capture" not in schema
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_validation_error_messages_for_heading_text_mismatches(self):
|
|
"""Test that validation provides meaningful error messages for heading text mismatches."""
|
|
# Arrange
|
|
original_content = """# Expected Title
|
|
|
|
## Expected Section
|
|
Content here.
|
|
"""
|
|
|
|
wrong_content = """# Wrong Title
|
|
|
|
## Wrong Section
|
|
Content here.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(original_content)
|
|
original_file = Path(f.name)
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(wrong_content)
|
|
wrong_file = Path(f.name)
|
|
|
|
try:
|
|
# Generate schema with heading text capture
|
|
schema = self.schema_generator.generate_schema_from_file(
|
|
original_file,
|
|
capture_heading_text=True
|
|
)
|
|
|
|
# Act - Validate with detailed errors
|
|
error_collector = self.schema_validator.validate_file_with_errors(wrong_file, schema)
|
|
|
|
# Assert - Should have specific errors about heading text mismatches
|
|
errors = error_collector.errors
|
|
assert len(errors) > 0
|
|
|
|
# Look for heading text mismatch errors
|
|
heading_errors = [e for e in errors if "heading" in e.message.lower()]
|
|
assert len(heading_errors) > 0
|
|
|
|
# Should mention expected vs actual heading text
|
|
error_text = " ".join([e.message for e in heading_errors])
|
|
assert "Expected Title" in error_text or "Wrong Title" in error_text
|
|
|
|
finally:
|
|
original_file.unlink()
|
|
wrong_file.unlink()
|
|
|
|
def test_schema_generation_preserves_heading_order_in_constraints(self):
|
|
"""Test that heading text constraints preserve the order of headings."""
|
|
# Arrange
|
|
markdown_content = """# First Document
|
|
|
|
## Beta Section
|
|
Second section alphabetically.
|
|
|
|
## Alpha Section
|
|
First section alphabetically.
|
|
|
|
## Gamma Section
|
|
Third section alphabetically.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act
|
|
schema = self.schema_generator.generate_schema_from_file(
|
|
temp_file,
|
|
capture_heading_text=True
|
|
)
|
|
|
|
# Assert - Level 2 headings should preserve document order, not alphabetical
|
|
level_2 = schema["properties"]["headings"]["properties"]["level_2"]
|
|
expected_order = ["Beta Section", "Alpha Section", "Gamma Section"]
|
|
assert level_2["items"]["properties"]["content"]["enum"] == expected_order
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_cli_help_includes_capture_heading_text_option(self):
|
|
"""Test that CLI help includes documentation for the new option."""
|
|
# Act
|
|
result = self.runner.invoke(cli, ['schema-generate', '--help'])
|
|
|
|
# Assert
|
|
assert result.exit_code == 0
|
|
help_text = result.output
|
|
assert "--capture-heading-text" in help_text
|
|
assert "exact heading text" in help_text or "heading text constraints" in help_text
|
|
|
|
def test_empty_document_with_heading_text_capture(self):
|
|
"""Test that heading text capture handles documents with no headings gracefully."""
|
|
# Arrange
|
|
markdown_content = """This is a document with no headings.
|
|
|
|
Just some regular paragraphs here.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act
|
|
schema = self.schema_generator.generate_schema_from_file(
|
|
temp_file,
|
|
capture_heading_text=True
|
|
)
|
|
|
|
# Assert - Should generate valid schema even with no headings
|
|
assert "properties" in schema
|
|
# Should still have the metaschema extension
|
|
assert schema.get("x-markitect-heading-text-capture") is True
|
|
|
|
finally:
|
|
temp_file.unlink() |