Files
markitect-main/tests/test_issue_46_schema_generation_outline.py
tegwick 7198041143 feat: Fix Issue #46 - Schema generation outline mode draft integration
Resolve the integration issue where outline mode schema generation captured
heading text correctly but draft generation didn't use it, resulting in
generic placeholders instead of preserved document structure.

Key changes:
- Enhanced StubGenerator._extract_heading_text_from_schema() to extract actual heading text from enum constraints
- Modified heading generation logic in _generate_content_from_headings() to use captured text
- Fixed both H1 and H2+ heading handling to preserve source document structure
- Added comprehensive test suite covering all outline mode functionality
- Updated end-to-end test to reflect expected behavior (stubs vs full validation)

Impact:
- Outline schemas now properly integrate with draft generation
- Generated drafts preserve actual heading text from source documents
- End-to-end workflow: example → outline schema → draft maintains document structure
- Backward compatibility maintained for existing functionality

Tests: 8/8 passing in test_issue_46_schema_generation_outline.py
Resolves: coulomb/markitect_project#46

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-01 16:16:46 +02:00

403 lines
15 KiB
Python

"""
Test suite for Issue #46: Schema generation capability outline
This test module validates outline mode schema generation improvements including:
- Heading text capture in outline mode schemas
- Integration with draft generation using captured heading text
- Proper title formatting and depth limiting
- Content instruction integration
- End-to-end workflow from example document to generated drafts
Created for Issue #46: https://gitea.coulomb.social/coulomb/markitect_project/issues/46
"""
import pytest
import tempfile
import json
from pathlib import Path
from click.testing import CliRunner
from markitect.cli import cli
class TestIssue46SchemaGenerationOutline:
"""Test suite for schema generation outline mode improvements."""
def setup_method(self):
"""Set up test environment."""
self.runner = CliRunner()
# Create a test markdown file with specific headings
self.test_md_content = """# Project Requirements
## Overview
This is the project overview section.
## Technical Specifications
### Database Requirements
The database should support:
- User management
- Data persistence
- Backup functionality
### API Requirements
The API should provide:
- RESTful endpoints
- Authentication
- Rate limiting
## Implementation Plan
This section covers the implementation approach.
"""
def test_outline_mode_captures_actual_heading_text(self):
"""Test that outline mode captures actual heading text in enum constraints."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(self.test_md_content)
md_file = Path(f.name)
try:
# Act - Generate schema in outline mode with heading text capture
result = self.runner.invoke(cli, [
'schema-generate',
'--mode', 'outline',
'--capture-heading-text',
'--depth', '3',
str(md_file)
])
# Assert - Command should succeed
assert result.exit_code == 0, f"Command failed: {result.output}"
# Parse the generated schema
schema = json.loads(result.output)
# Should have correct title format
assert schema['title'] == f"Schema from {md_file.name}"
# Should capture actual heading text in enum constraints
level_1_content = schema['properties']['headings']['properties']['level_1']['items']['properties']['content']
assert 'enum' in level_1_content
assert "Project Requirements" in level_1_content['enum']
level_2_content = schema['properties']['headings']['properties']['level_2']['items']['properties']['content']
assert 'enum' in level_2_content
assert "Overview" in level_2_content['enum']
assert "Technical Specifications" in level_2_content['enum']
assert "Implementation Plan" in level_2_content['enum']
level_3_content = schema['properties']['headings']['properties']['level_3']['items']['properties']['content']
assert 'enum' in level_3_content
assert "Database Requirements" in level_3_content['enum']
assert "API Requirements" in level_3_content['enum']
finally:
md_file.unlink()
def test_draft_generation_uses_captured_heading_text(self):
"""Test that draft generation uses actual heading text from outline schema."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(self.test_md_content)
md_file = Path(f.name)
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f:
schema_file = Path(schema_f.name)
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as draft_f:
draft_file = Path(draft_f.name)
try:
# Arrange - Generate outline schema with heading text capture
schema_result = self.runner.invoke(cli, [
'schema-generate',
'--mode', 'outline',
'--capture-heading-text',
'--depth', '3',
'--outfile', str(schema_file),
str(md_file)
])
assert schema_result.exit_code == 0
# Act - Generate draft from the outline schema
draft_result = self.runner.invoke(cli, [
'generate-stub',
str(schema_file),
'--output', str(draft_file)
])
# Assert - Draft generation should succeed
assert draft_result.exit_code == 0, f"Draft generation failed: {draft_result.output}"
# Read the generated draft
draft_content = draft_file.read_text()
# Should use actual heading text, not generic placeholders
assert "# Project Requirements" in draft_content
assert "## Overview" in draft_content
assert "## Technical Specifications" in draft_content
assert "## Implementation Plan" in draft_content
assert "### Database Requirements" in draft_content
assert "### API Requirements" in draft_content
# Should NOT have generic headings
assert "## Introduction" not in draft_content
assert "## Main Content" not in draft_content
assert "## Section 1" not in draft_content
finally:
md_file.unlink()
if schema_file.exists():
schema_file.unlink()
if draft_file.exists():
draft_file.unlink()
def test_outline_schema_integration_with_content_instructions(self):
"""Test that outline schemas integrate properly with content instructions."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(self.test_md_content)
md_file = Path(f.name)
try:
# Act - Generate schema with both outline mode and content instructions
result = self.runner.invoke(cli, [
'schema-generate',
'--mode', 'outline',
'--capture-heading-text',
'--include-content-instructions',
'--depth', '2',
str(md_file)
])
# Assert - Command should succeed
assert result.exit_code == 0, f"Command failed: {result.output}"
# Parse the generated schema
schema = json.loads(result.output)
# Should have both heading text capture and content instructions
assert schema.get('x-markitect-heading-text-capture') == True
assert schema.get('x-markitect-content-instructions-enabled') == True
# Check that headings have both enum constraints and content instructions
level_1_items = schema['properties']['headings']['properties']['level_1']['items']['properties']
assert 'enum' in level_1_items['content']
assert 'x-markitect-content-instructions' in level_1_items
finally:
md_file.unlink()
def test_depth_limiting_works_correctly(self):
"""Test that depth parameter correctly limits heading levels in outline mode."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(self.test_md_content)
md_file = Path(f.name)
try:
# Act - Generate schema with depth limit of 2
result = self.runner.invoke(cli, [
'schema-generate',
'--mode', 'outline',
'--capture-heading-text',
'--depth', '2',
str(md_file)
])
# Assert - Command should succeed
assert result.exit_code == 0, f"Command failed: {result.output}"
# Parse the generated schema
schema = json.loads(result.output)
# Should have level 1 and 2 headings
headings = schema['properties']['headings']['properties']
assert 'level_1' in headings
assert 'level_2' in headings
# Should NOT have level 3 headings due to depth limit
assert 'level_3' not in headings
# Verify outline depth is recorded
assert schema.get('x-markitect-outline-depth') == 2
finally:
md_file.unlink()
def test_outline_mode_title_format_correction(self):
"""Test that outline mode generates correct title format."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(self.test_md_content)
md_file = Path(f.name)
try:
# Act - Generate schema in outline mode
result = self.runner.invoke(cli, [
'schema-generate',
'--mode', 'outline',
str(md_file)
])
# Assert
assert result.exit_code == 0, f"Command failed: {result.output}"
schema = json.loads(result.output)
# Should use "Schema from" not "Schema for"
expected_title = f"Schema from {md_file.name}"
assert schema['title'] == expected_title
# Should have outline mode marker
assert schema.get('x-markitect-outline-mode') == True
finally:
md_file.unlink()
def test_end_to_end_outline_workflow(self):
"""Test complete workflow: example -> outline schema -> draft -> validation."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(self.test_md_content)
example_file = Path(f.name)
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f:
schema_file = Path(schema_f.name)
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as draft_f:
draft_file = Path(draft_f.name)
try:
# Step 1: Generate outline schema from example
schema_result = self.runner.invoke(cli, [
'schema-generate',
'--mode', 'outline',
'--capture-heading-text',
'--include-content-instructions',
'--depth', '3',
'--outfile', str(schema_file),
str(example_file)
])
assert schema_result.exit_code == 0
# Step 2: Generate draft from schema
draft_result = self.runner.invoke(cli, [
'generate-stub',
str(schema_file),
'--output', str(draft_file)
])
assert draft_result.exit_code == 0
# Step 3: Validate draft against schema
validate_result = self.runner.invoke(cli, [
'validate',
str(draft_file),
'--schema', str(schema_file)
])
assert validate_result.exit_code == 0, f"Validation failed: {validate_result.output}"
# Step 4: Verify draft content quality
draft_content = draft_file.read_text()
# Should preserve the document structure from example
assert "# Project Requirements" in draft_content
assert "## Overview" in draft_content
assert "## Technical Specifications" in draft_content
assert "### Database Requirements" in draft_content
assert "### API Requirements" in draft_content
assert "## Implementation Plan" in draft_content
# Should have schema reference
assert f"Generated from schema: {schema_file}" in draft_content
finally:
example_file.unlink()
if schema_file.exists():
schema_file.unlink()
if draft_file.exists():
draft_file.unlink()
def test_outline_mode_backwards_compatibility(self):
"""Test that outline mode maintains backwards compatibility."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(self.test_md_content)
md_file = Path(f.name)
try:
# Test both old and new parameter styles work
old_style_result = self.runner.invoke(cli, [
'schema-generate',
'--mode', 'outline',
'--max-depth', '2',
str(md_file)
])
new_style_result = self.runner.invoke(cli, [
'schema-generate',
'--mode', 'outline',
'--depth', '2',
str(md_file)
])
# Both should work
assert old_style_result.exit_code == 0
assert new_style_result.exit_code == 0
# Should produce equivalent schemas
old_schema = json.loads(old_style_result.output)
new_schema = json.loads(new_style_result.output)
assert old_schema['title'] == new_schema['title']
assert old_schema.get('x-markitect-outline-mode') == new_schema.get('x-markitect-outline-mode')
finally:
md_file.unlink()
def test_outline_schema_supports_data_driven_generation(self):
"""Test that outline schemas work with data-driven draft generation."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(self.test_md_content)
md_file = Path(f.name)
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f:
schema_file = Path(schema_f.name)
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as data_f:
data_file = Path(data_f.name)
# Create test data
data_f.write(json.dumps([
{"project": "Alpha", "version": "1.0"},
{"project": "Beta", "version": "2.0"}
]))
data_f.flush()
try:
# Generate outline schema
schema_result = self.runner.invoke(cli, [
'schema-generate',
'--mode', 'outline',
'--capture-heading-text',
'--depth', '2',
'--outfile', str(schema_file),
str(md_file)
])
assert schema_result.exit_code == 0
# Test data-driven generation (if implemented)
# This tests integration with Issue #56
draft_result = self.runner.invoke(cli, [
'generate-drafts',
str(schema_file),
str(data_file),
'--output-dir', '/tmp/outline_drafts'
])
# Should work or gracefully indicate feature not implemented
assert draft_result.exit_code == 0 or "not implemented" in draft_result.output.lower()
finally:
md_file.unlink()
if schema_file.exists():
schema_file.unlink()
if data_file.exists():
data_file.unlink()