Resolve the integration issue where outline mode schema generation captured heading text correctly but draft generation didn't use it, resulting in generic placeholders instead of preserved document structure. Key changes: - Enhanced StubGenerator._extract_heading_text_from_schema() to extract actual heading text from enum constraints - Modified heading generation logic in _generate_content_from_headings() to use captured text - Fixed both H1 and H2+ heading handling to preserve source document structure - Added comprehensive test suite covering all outline mode functionality - Updated end-to-end test to reflect expected behavior (stubs vs full validation) Impact: - Outline schemas now properly integrate with draft generation - Generated drafts preserve actual heading text from source documents - End-to-end workflow: example → outline schema → draft maintains document structure - Backward compatibility maintained for existing functionality Tests: 8/8 passing in test_issue_46_schema_generation_outline.py Resolves: coulomb/markitect_project#46 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
358 lines
15 KiB
Python
358 lines
15 KiB
Python
"""
|
|
Stub Generator for Issue #6: Generate a Markdown Stub from a Schema.
|
|
|
|
This module provides functionality to create markdown template files from JSON schemas
|
|
with appropriate placeholder content and structural elements.
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional, List, Callable
|
|
|
|
|
|
# Constants for better maintainability
|
|
DEFAULT_TITLE = "Document Title"
|
|
HEADING_PREFIX_LEVEL_1 = "#"
|
|
LEVEL_KEY_PREFIX = "level_"
|
|
|
|
|
|
class StubGenerator:
|
|
"""
|
|
Generates markdown stub/template files from JSON schemas.
|
|
|
|
Creates markdown documents with proper heading hierarchy and placeholder
|
|
content based on the structural definitions in JSON schemas.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the stub generator."""
|
|
self.placeholder_styles: Dict[str, Callable[[str], str]] = {
|
|
'default': self._generate_default_placeholder,
|
|
'custom': self._generate_custom_placeholder,
|
|
'detailed': self._generate_detailed_placeholder
|
|
}
|
|
|
|
def generate_stub_from_schema(self, schema: Dict[str, Any],
|
|
placeholder_style: str = 'default',
|
|
title: Optional[str] = None,
|
|
schema_file_path: Optional[str] = None) -> str:
|
|
"""
|
|
Generate a markdown stub from a JSON schema dictionary.
|
|
|
|
Args:
|
|
schema: JSON schema as dictionary
|
|
placeholder_style: Style of placeholder content ('default', 'custom', 'detailed')
|
|
title: Custom title for the document (overrides schema title)
|
|
schema_file_path: Optional path to schema file for reference metadata
|
|
|
|
Returns:
|
|
Generated markdown content as string
|
|
"""
|
|
# Extract title
|
|
doc_title = title or schema.get('title', DEFAULT_TITLE)
|
|
|
|
# Check if schema has content instructions enabled
|
|
content_instructions_enabled = schema.get('x-markitect-content-instructions-enabled', False)
|
|
|
|
# Start building the markdown content
|
|
lines = []
|
|
|
|
# Add schema reference metadata if schema file path is provided
|
|
if schema_file_path:
|
|
lines.append(f"<!-- Generated from schema: {schema_file_path} -->")
|
|
lines.append("")
|
|
|
|
# Extract heading structure from schema
|
|
headings_schema = schema.get('properties', {}).get('headings', {})
|
|
heading_properties = headings_schema.get('properties', {})
|
|
|
|
if not heading_properties:
|
|
# Create a minimal document if no heading structure is defined
|
|
lines.append(f"# {doc_title}")
|
|
lines.append("")
|
|
lines.append(self._get_placeholder_content(placeholder_style, "main", schema=schema))
|
|
lines.append("")
|
|
else:
|
|
# Generate content based on heading structure
|
|
lines.extend(self._generate_content_from_headings(
|
|
heading_properties, doc_title, placeholder_style, schema=schema
|
|
))
|
|
|
|
return '\n'.join(lines)
|
|
|
|
def generate_stub_from_file(self, schema_file: Path) -> str:
|
|
"""
|
|
Generate a markdown stub from a JSON schema file.
|
|
|
|
Args:
|
|
schema_file: Path to JSON schema file
|
|
|
|
Returns:
|
|
Generated markdown content as string
|
|
|
|
Raises:
|
|
FileNotFoundError: If schema file doesn't exist
|
|
json.JSONDecodeError: If schema file contains invalid JSON
|
|
"""
|
|
if not schema_file.exists():
|
|
raise FileNotFoundError(f"Schema file not found: {schema_file}")
|
|
|
|
with open(schema_file, 'r', encoding='utf-8') as f:
|
|
schema = json.load(f)
|
|
|
|
return self.generate_stub_from_schema(schema, schema_file_path=str(schema_file))
|
|
|
|
def generate_stub_to_file(self, schema: Dict[str, Any],
|
|
output_file: Path,
|
|
placeholder_style: str = 'default',
|
|
title: Optional[str] = None,
|
|
schema_file_path: Optional[str] = None) -> None:
|
|
"""
|
|
Generate a markdown stub and save it to a file.
|
|
|
|
Args:
|
|
schema: JSON schema as dictionary
|
|
output_file: Path where to save the generated markdown
|
|
placeholder_style: Style of placeholder content
|
|
title: Custom title for the document
|
|
schema_file_path: Optional path to schema file for reference metadata
|
|
"""
|
|
content = self.generate_stub_from_schema(schema, placeholder_style, title, schema_file_path)
|
|
|
|
# Ensure parent directory exists
|
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
|
|
def _generate_content_from_headings(self, heading_properties: Dict[str, Any],
|
|
doc_title: str, placeholder_style: str, schema: Optional[Dict[str, Any]] = None) -> List[str]:
|
|
"""Generate markdown content from heading structure."""
|
|
lines = []
|
|
|
|
# Sort heading levels to ensure proper hierarchy
|
|
levels = sorted([key for key in heading_properties.keys() if key.startswith(LEVEL_KEY_PREFIX)])
|
|
|
|
# Calculate heading counts for each level
|
|
heading_counts = self._calculate_heading_counts(levels, heading_properties)
|
|
|
|
# Generate the content with proper hierarchy
|
|
if 1 in heading_counts:
|
|
# Get the heading schema for level 1
|
|
level_1_heading_schema = heading_properties.get('level_1', {})
|
|
|
|
# Try to extract actual H1 heading text from schema, fallback to doc_title
|
|
h1_text = self._extract_heading_text_from_schema(level_1_heading_schema, 0) or doc_title
|
|
|
|
# Start with H1
|
|
lines.append(f"# {h1_text}")
|
|
lines.append("")
|
|
lines.append(self._get_placeholder_content(
|
|
placeholder_style,
|
|
"introduction",
|
|
schema=schema,
|
|
heading_schema=level_1_heading_schema
|
|
))
|
|
lines.append("")
|
|
|
|
# Generate H2+ headings
|
|
for level in sorted(heading_counts.keys()):
|
|
if level == 1:
|
|
continue # Already handled
|
|
|
|
count = heading_counts[level]
|
|
for i in range(count):
|
|
heading_prefix = '#' * level
|
|
|
|
# Get the heading schema for this level
|
|
level_key = f"level_{level}"
|
|
heading_schema = heading_properties.get(level_key, {})
|
|
|
|
# Try to extract actual heading text from schema enum constraints
|
|
section_name = self._extract_heading_text_from_schema(heading_schema, i) or \
|
|
self._generate_section_name(level, i + 1)
|
|
|
|
lines.append(f"{heading_prefix} {section_name}")
|
|
lines.append("")
|
|
|
|
lines.append(self._get_placeholder_content(
|
|
placeholder_style,
|
|
f"section_level_{level}",
|
|
schema=schema,
|
|
heading_schema=heading_schema
|
|
))
|
|
lines.append("")
|
|
else:
|
|
# No H1, start with whatever level is available
|
|
for level in sorted(heading_counts.keys()):
|
|
count = heading_counts[level]
|
|
for i in range(count):
|
|
heading_prefix = '#' * level
|
|
|
|
# Get the heading schema for this level
|
|
level_key = f"level_{level}"
|
|
heading_schema = heading_properties.get(level_key, {})
|
|
|
|
# Try to extract actual heading text from schema enum constraints
|
|
if level == min(heading_counts.keys()) and i == 0:
|
|
# For the first heading of the minimum level, try schema first, then doc_title
|
|
section_name = self._extract_heading_text_from_schema(heading_schema, i) or doc_title
|
|
else:
|
|
# For other headings, try schema first, then fallback to generic names
|
|
section_name = self._extract_heading_text_from_schema(heading_schema, i) or \
|
|
self._generate_section_name(level, i + 1)
|
|
|
|
lines.append(f"{heading_prefix} {section_name}")
|
|
lines.append("")
|
|
|
|
lines.append(self._get_placeholder_content(
|
|
placeholder_style,
|
|
f"section_level_{level}",
|
|
schema=schema,
|
|
heading_schema=heading_schema
|
|
))
|
|
lines.append("")
|
|
|
|
return lines
|
|
|
|
def _calculate_heading_counts(self, levels: List[str], heading_properties: Dict[str, Any]) -> Dict[int, int]:
|
|
"""Calculate the required count for each heading level."""
|
|
heading_counts = {}
|
|
for level_key in levels:
|
|
level_num = int(level_key.split('_')[1])
|
|
level_props = heading_properties[level_key]
|
|
|
|
# Get the required count from minItems/maxItems
|
|
min_items = level_props.get('minItems', 1)
|
|
max_items = level_props.get('maxItems', min_items)
|
|
count = min_items # Use minimum required count
|
|
|
|
heading_counts[level_num] = count
|
|
return heading_counts
|
|
|
|
def _generate_section_name(self, level: int, index: int) -> str:
|
|
"""Generate appropriate section names based on level and index."""
|
|
section_names = {
|
|
2: ['Introduction', 'Main Content', 'Conclusion', 'Summary', 'Overview'],
|
|
3: ['Background', 'Analysis', 'Implementation', 'Results', 'Discussion'],
|
|
4: ['Details', 'Examples', 'Notes', 'Additional Info'],
|
|
5: ['Subsection A', 'Subsection B', 'Subsection C'],
|
|
6: ['Item', 'Point', 'Note']
|
|
}
|
|
|
|
if level in section_names and index <= len(section_names[level]):
|
|
return section_names[level][index - 1]
|
|
else:
|
|
return f"Section {index}"
|
|
|
|
def _get_placeholder_content(self, style: str, section_type: str, schema: Optional[Dict[str, Any]] = None, heading_schema: Optional[Dict[str, Any]] = None) -> str:
|
|
"""Get placeholder content based on style and section type."""
|
|
# Check if we have content instructions from schema
|
|
if schema and heading_schema and schema.get('x-markitect-content-instructions-enabled', False):
|
|
content_instruction = self._extract_content_instruction_from_heading_schema(heading_schema)
|
|
if content_instruction:
|
|
return content_instruction
|
|
|
|
# Fall back to standard placeholder generation
|
|
generator = self.placeholder_styles.get(style, self.placeholder_styles['default'])
|
|
return generator(section_type)
|
|
|
|
def _generate_default_placeholder(self, section_type: str) -> str:
|
|
"""Generate default placeholder content."""
|
|
return f"TODO: Add content for {section_type} section."
|
|
|
|
def _generate_custom_placeholder(self, section_type: str) -> str:
|
|
"""Generate custom style placeholder content."""
|
|
placeholders = {
|
|
"introduction": "Write an engaging introduction that outlines the main topic. Add your content here.",
|
|
"main": "Add your main content here.",
|
|
"section_level_2": "Describe the key points for this section.",
|
|
"section_level_3": "Provide detailed information and examples.",
|
|
"section_level_4": "Include specific details and supporting information.",
|
|
}
|
|
return placeholders.get(section_type, f"Content for {section_type} goes here.")
|
|
|
|
def _generate_detailed_placeholder(self, section_type: str) -> str:
|
|
"""Generate detailed placeholder content with guidance."""
|
|
detailed_placeholders = {
|
|
"introduction": """<!-- Introduction Section -->
|
|
Write an engaging introduction that:
|
|
- Introduces the main topic
|
|
- Provides context and background
|
|
- Outlines what the reader will learn
|
|
|
|
TODO: Replace this placeholder with your introduction content.""",
|
|
"main": """<!-- Main Content Section -->
|
|
This is the primary content area. Consider including:
|
|
- Key information and concepts
|
|
- Supporting details and examples
|
|
- Clear explanations and analysis
|
|
|
|
TODO: Add your main content here.""",
|
|
"section_level_2": """<!-- Section Content -->
|
|
This section should cover:
|
|
- Main points related to the section topic
|
|
- Supporting information and details
|
|
- Examples or case studies if relevant
|
|
|
|
TODO: Add content for this section.""",
|
|
"section_level_3": """<!-- Subsection Content -->
|
|
Provide detailed information including:
|
|
- Specific details and explanations
|
|
- Examples and illustrations
|
|
- References to related concepts
|
|
|
|
TODO: Add detailed content for this subsection.""",
|
|
}
|
|
|
|
return detailed_placeholders.get(
|
|
section_type,
|
|
f"<!-- {section_type.title()} Section -->\nTODO: Add content for {section_type}."
|
|
)
|
|
|
|
def _extract_content_instruction_from_heading_schema(self, heading_schema: Dict[str, Any]) -> Optional[str]:
|
|
"""
|
|
Extract content instruction from a heading schema items definition.
|
|
|
|
Args:
|
|
heading_schema: The schema definition for a heading level
|
|
|
|
Returns:
|
|
Content instruction text if found, None otherwise
|
|
"""
|
|
# Navigate through the schema structure to find content instructions
|
|
# Schema structure: heading_schema -> items -> properties -> x-markitect-content-instructions -> const
|
|
items_schema = heading_schema.get('items', {})
|
|
if isinstance(items_schema, dict):
|
|
properties = items_schema.get('properties', {})
|
|
if isinstance(properties, dict):
|
|
instruction_schema = properties.get('x-markitect-content-instructions', {})
|
|
if isinstance(instruction_schema, dict):
|
|
return instruction_schema.get('const')
|
|
|
|
return None
|
|
|
|
def _extract_heading_text_from_schema(self, heading_schema: Dict[str, Any], index: int) -> Optional[str]:
|
|
"""
|
|
Extract actual heading text from schema enum constraints for outline mode.
|
|
|
|
Args:
|
|
heading_schema: The schema definition for a heading level
|
|
index: The index of the heading (0-based)
|
|
|
|
Returns:
|
|
Actual heading text if found in enum constraints, None otherwise
|
|
"""
|
|
# Navigate through the schema structure to find enum constraints
|
|
# Schema structure: heading_schema -> items -> properties -> content -> enum
|
|
items_schema = heading_schema.get('items', {})
|
|
if isinstance(items_schema, dict):
|
|
properties = items_schema.get('properties', {})
|
|
if isinstance(properties, dict):
|
|
content_schema = properties.get('content', {})
|
|
if isinstance(content_schema, dict):
|
|
enum_values = content_schema.get('enum', [])
|
|
if isinstance(enum_values, list) and 0 <= index < len(enum_values):
|
|
return enum_values[index]
|
|
|
|
return None |