diff --git a/markitect/stub_generator.py b/markitect/stub_generator.py index 5e0a16de..12bf645f 100644 --- a/markitect/stub_generator.py +++ b/markitect/stub_generator.py @@ -138,11 +138,15 @@ class StubGenerator: # Generate the content with proper hierarchy if 1 in heading_counts: - # Start with H1 - lines.append(f"# {doc_title}") - lines.append("") # Get the heading schema for level 1 level_1_heading_schema = heading_properties.get('level_1', {}) + + # Try to extract actual H1 heading text from schema, fallback to doc_title + h1_text = self._extract_heading_text_from_schema(level_1_heading_schema, 0) or doc_title + + # Start with H1 + lines.append(f"# {h1_text}") + lines.append("") lines.append(self._get_placeholder_content( placeholder_style, "introduction", @@ -159,15 +163,18 @@ class StubGenerator: count = heading_counts[level] for i in range(count): heading_prefix = '#' * level - section_name = self._generate_section_name(level, i + 1) - - lines.append(f"{heading_prefix} {section_name}") - lines.append("") # Get the heading schema for this level level_key = f"level_{level}" heading_schema = heading_properties.get(level_key, {}) + # Try to extract actual heading text from schema enum constraints + section_name = self._extract_heading_text_from_schema(heading_schema, i) or \ + self._generate_section_name(level, i + 1) + + lines.append(f"{heading_prefix} {section_name}") + lines.append("") + lines.append(self._get_placeholder_content( placeholder_style, f"section_level_{level}", @@ -181,18 +188,23 @@ class StubGenerator: count = heading_counts[level] for i in range(count): heading_prefix = '#' * level - if level == min(heading_counts.keys()) and i == 0: - section_name = doc_title - else: - section_name = self._generate_section_name(level, i + 1) - - lines.append(f"{heading_prefix} {section_name}") - lines.append("") # Get the heading schema for this level level_key = f"level_{level}" heading_schema = heading_properties.get(level_key, {}) + # Try to extract actual heading text from schema enum constraints + if level == min(heading_counts.keys()) and i == 0: + # For the first heading of the minimum level, try schema first, then doc_title + section_name = self._extract_heading_text_from_schema(heading_schema, i) or doc_title + else: + # For other headings, try schema first, then fallback to generic names + section_name = self._extract_heading_text_from_schema(heading_schema, i) or \ + self._generate_section_name(level, i + 1) + + lines.append(f"{heading_prefix} {section_name}") + lines.append("") + lines.append(self._get_placeholder_content( placeholder_style, f"section_level_{level}", @@ -318,4 +330,29 @@ TODO: Add detailed content for this subsection.""", if isinstance(instruction_schema, dict): return instruction_schema.get('const') + return None + + def _extract_heading_text_from_schema(self, heading_schema: Dict[str, Any], index: int) -> Optional[str]: + """ + Extract actual heading text from schema enum constraints for outline mode. + + Args: + heading_schema: The schema definition for a heading level + index: The index of the heading (0-based) + + Returns: + Actual heading text if found in enum constraints, None otherwise + """ + # Navigate through the schema structure to find enum constraints + # Schema structure: heading_schema -> items -> properties -> content -> enum + items_schema = heading_schema.get('items', {}) + if isinstance(items_schema, dict): + properties = items_schema.get('properties', {}) + if isinstance(properties, dict): + content_schema = properties.get('content', {}) + if isinstance(content_schema, dict): + enum_values = content_schema.get('enum', []) + if isinstance(enum_values, list) and 0 <= index < len(enum_values): + return enum_values[index] + return None \ No newline at end of file diff --git a/tests/test_issue_46_schema_generation_outline.py b/tests/test_issue_46_schema_generation_outline.py new file mode 100644 index 00000000..994e05c3 --- /dev/null +++ b/tests/test_issue_46_schema_generation_outline.py @@ -0,0 +1,403 @@ +""" +Test suite for Issue #46: Schema generation capability outline + +This test module validates outline mode schema generation improvements including: +- Heading text capture in outline mode schemas +- Integration with draft generation using captured heading text +- Proper title formatting and depth limiting +- Content instruction integration +- End-to-end workflow from example document to generated drafts + +Created for Issue #46: https://gitea.coulomb.social/coulomb/markitect_project/issues/46 +""" + +import pytest +import tempfile +import json +from pathlib import Path +from click.testing import CliRunner +from markitect.cli import cli + + +class TestIssue46SchemaGenerationOutline: + """Test suite for schema generation outline mode improvements.""" + + def setup_method(self): + """Set up test environment.""" + self.runner = CliRunner() + + # Create a test markdown file with specific headings + self.test_md_content = """# Project Requirements + +## Overview + +This is the project overview section. + +## Technical Specifications + +### Database Requirements + +The database should support: +- User management +- Data persistence +- Backup functionality + +### API Requirements + +The API should provide: +- RESTful endpoints +- Authentication +- Rate limiting + +## Implementation Plan + +This section covers the implementation approach. +""" + + def test_outline_mode_captures_actual_heading_text(self): + """Test that outline mode captures actual heading text in enum constraints.""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: + f.write(self.test_md_content) + md_file = Path(f.name) + + try: + # Act - Generate schema in outline mode with heading text capture + result = self.runner.invoke(cli, [ + 'schema-generate', + '--mode', 'outline', + '--capture-heading-text', + '--depth', '3', + str(md_file) + ]) + + # Assert - Command should succeed + assert result.exit_code == 0, f"Command failed: {result.output}" + + # Parse the generated schema + schema = json.loads(result.output) + + # Should have correct title format + assert schema['title'] == f"Schema from {md_file.name}" + + # Should capture actual heading text in enum constraints + level_1_content = schema['properties']['headings']['properties']['level_1']['items']['properties']['content'] + assert 'enum' in level_1_content + assert "Project Requirements" in level_1_content['enum'] + + level_2_content = schema['properties']['headings']['properties']['level_2']['items']['properties']['content'] + assert 'enum' in level_2_content + assert "Overview" in level_2_content['enum'] + assert "Technical Specifications" in level_2_content['enum'] + assert "Implementation Plan" in level_2_content['enum'] + + level_3_content = schema['properties']['headings']['properties']['level_3']['items']['properties']['content'] + assert 'enum' in level_3_content + assert "Database Requirements" in level_3_content['enum'] + assert "API Requirements" in level_3_content['enum'] + + finally: + md_file.unlink() + + def test_draft_generation_uses_captured_heading_text(self): + """Test that draft generation uses actual heading text from outline schema.""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: + f.write(self.test_md_content) + md_file = Path(f.name) + + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f: + schema_file = Path(schema_f.name) + + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as draft_f: + draft_file = Path(draft_f.name) + + try: + # Arrange - Generate outline schema with heading text capture + schema_result = self.runner.invoke(cli, [ + 'schema-generate', + '--mode', 'outline', + '--capture-heading-text', + '--depth', '3', + '--outfile', str(schema_file), + str(md_file) + ]) + assert schema_result.exit_code == 0 + + # Act - Generate draft from the outline schema + draft_result = self.runner.invoke(cli, [ + 'generate-stub', + str(schema_file), + '--output', str(draft_file) + ]) + + # Assert - Draft generation should succeed + assert draft_result.exit_code == 0, f"Draft generation failed: {draft_result.output}" + + # Read the generated draft + draft_content = draft_file.read_text() + + # Should use actual heading text, not generic placeholders + assert "# Project Requirements" in draft_content + assert "## Overview" in draft_content + assert "## Technical Specifications" in draft_content + assert "## Implementation Plan" in draft_content + assert "### Database Requirements" in draft_content + assert "### API Requirements" in draft_content + + # Should NOT have generic headings + assert "## Introduction" not in draft_content + assert "## Main Content" not in draft_content + assert "## Section 1" not in draft_content + + finally: + md_file.unlink() + if schema_file.exists(): + schema_file.unlink() + if draft_file.exists(): + draft_file.unlink() + + def test_outline_schema_integration_with_content_instructions(self): + """Test that outline schemas integrate properly with content instructions.""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: + f.write(self.test_md_content) + md_file = Path(f.name) + + try: + # Act - Generate schema with both outline mode and content instructions + result = self.runner.invoke(cli, [ + 'schema-generate', + '--mode', 'outline', + '--capture-heading-text', + '--include-content-instructions', + '--depth', '2', + str(md_file) + ]) + + # Assert - Command should succeed + assert result.exit_code == 0, f"Command failed: {result.output}" + + # Parse the generated schema + schema = json.loads(result.output) + + # Should have both heading text capture and content instructions + assert schema.get('x-markitect-heading-text-capture') == True + assert schema.get('x-markitect-content-instructions-enabled') == True + + # Check that headings have both enum constraints and content instructions + level_1_items = schema['properties']['headings']['properties']['level_1']['items']['properties'] + assert 'enum' in level_1_items['content'] + assert 'x-markitect-content-instructions' in level_1_items + + finally: + md_file.unlink() + + def test_depth_limiting_works_correctly(self): + """Test that depth parameter correctly limits heading levels in outline mode.""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: + f.write(self.test_md_content) + md_file = Path(f.name) + + try: + # Act - Generate schema with depth limit of 2 + result = self.runner.invoke(cli, [ + 'schema-generate', + '--mode', 'outline', + '--capture-heading-text', + '--depth', '2', + str(md_file) + ]) + + # Assert - Command should succeed + assert result.exit_code == 0, f"Command failed: {result.output}" + + # Parse the generated schema + schema = json.loads(result.output) + + # Should have level 1 and 2 headings + headings = schema['properties']['headings']['properties'] + assert 'level_1' in headings + assert 'level_2' in headings + + # Should NOT have level 3 headings due to depth limit + assert 'level_3' not in headings + + # Verify outline depth is recorded + assert schema.get('x-markitect-outline-depth') == 2 + + finally: + md_file.unlink() + + def test_outline_mode_title_format_correction(self): + """Test that outline mode generates correct title format.""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: + f.write(self.test_md_content) + md_file = Path(f.name) + + try: + # Act - Generate schema in outline mode + result = self.runner.invoke(cli, [ + 'schema-generate', + '--mode', 'outline', + str(md_file) + ]) + + # Assert + assert result.exit_code == 0, f"Command failed: {result.output}" + + schema = json.loads(result.output) + + # Should use "Schema from" not "Schema for" + expected_title = f"Schema from {md_file.name}" + assert schema['title'] == expected_title + + # Should have outline mode marker + assert schema.get('x-markitect-outline-mode') == True + + finally: + md_file.unlink() + + def test_end_to_end_outline_workflow(self): + """Test complete workflow: example -> outline schema -> draft -> validation.""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: + f.write(self.test_md_content) + example_file = Path(f.name) + + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f: + schema_file = Path(schema_f.name) + + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as draft_f: + draft_file = Path(draft_f.name) + + try: + # Step 1: Generate outline schema from example + schema_result = self.runner.invoke(cli, [ + 'schema-generate', + '--mode', 'outline', + '--capture-heading-text', + '--include-content-instructions', + '--depth', '3', + '--outfile', str(schema_file), + str(example_file) + ]) + assert schema_result.exit_code == 0 + + # Step 2: Generate draft from schema + draft_result = self.runner.invoke(cli, [ + 'generate-stub', + str(schema_file), + '--output', str(draft_file) + ]) + assert draft_result.exit_code == 0 + + # Step 3: Validate draft against schema + validate_result = self.runner.invoke(cli, [ + 'validate', + str(draft_file), + '--schema', str(schema_file) + ]) + assert validate_result.exit_code == 0, f"Validation failed: {validate_result.output}" + + # Step 4: Verify draft content quality + draft_content = draft_file.read_text() + + # Should preserve the document structure from example + assert "# Project Requirements" in draft_content + assert "## Overview" in draft_content + assert "## Technical Specifications" in draft_content + assert "### Database Requirements" in draft_content + assert "### API Requirements" in draft_content + assert "## Implementation Plan" in draft_content + + # Should have schema reference + assert f"Generated from schema: {schema_file}" in draft_content + + finally: + example_file.unlink() + if schema_file.exists(): + schema_file.unlink() + if draft_file.exists(): + draft_file.unlink() + + def test_outline_mode_backwards_compatibility(self): + """Test that outline mode maintains backwards compatibility.""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: + f.write(self.test_md_content) + md_file = Path(f.name) + + try: + # Test both old and new parameter styles work + old_style_result = self.runner.invoke(cli, [ + 'schema-generate', + '--mode', 'outline', + '--max-depth', '2', + str(md_file) + ]) + + new_style_result = self.runner.invoke(cli, [ + 'schema-generate', + '--mode', 'outline', + '--depth', '2', + str(md_file) + ]) + + # Both should work + assert old_style_result.exit_code == 0 + assert new_style_result.exit_code == 0 + + # Should produce equivalent schemas + old_schema = json.loads(old_style_result.output) + new_schema = json.loads(new_style_result.output) + + assert old_schema['title'] == new_schema['title'] + assert old_schema.get('x-markitect-outline-mode') == new_schema.get('x-markitect-outline-mode') + + finally: + md_file.unlink() + + def test_outline_schema_supports_data_driven_generation(self): + """Test that outline schemas work with data-driven draft generation.""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: + f.write(self.test_md_content) + md_file = Path(f.name) + + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f: + schema_file = Path(schema_f.name) + + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as data_f: + data_file = Path(data_f.name) + # Create test data + data_f.write(json.dumps([ + {"project": "Alpha", "version": "1.0"}, + {"project": "Beta", "version": "2.0"} + ])) + data_f.flush() + + try: + # Generate outline schema + schema_result = self.runner.invoke(cli, [ + 'schema-generate', + '--mode', 'outline', + '--capture-heading-text', + '--depth', '2', + '--outfile', str(schema_file), + str(md_file) + ]) + assert schema_result.exit_code == 0 + + # Test data-driven generation (if implemented) + # This tests integration with Issue #56 + draft_result = self.runner.invoke(cli, [ + 'generate-drafts', + str(schema_file), + str(data_file), + '--output-dir', '/tmp/outline_drafts' + ]) + + # Should work or gracefully indicate feature not implemented + assert draft_result.exit_code == 0 or "not implemented" in draft_result.output.lower() + + finally: + md_file.unlink() + if schema_file.exists(): + schema_file.unlink() + if data_file.exists(): + data_file.unlink() \ No newline at end of file