markitect-main/markitect/stub_generator.py

"""
Stub Generator for Issue #6: Generate a Markdown Stub from a Schema.

This module provides functionality to create markdown template files from JSON schemas
with appropriate placeholder content and structural elements.
"""

import json
from pathlib import Path
from typing import Dict, Any, Optional, List, Callable


# Constants for better maintainability
DEFAULT_TITLE = "Document Title"
HEADING_PREFIX_LEVEL_1 = "#"
LEVEL_KEY_PREFIX = "level_"


class StubGenerator:
    """
    Generates markdown stub/template files from JSON schemas.

    Creates markdown documents with proper heading hierarchy and placeholder
    content based on the structural definitions in JSON schemas.
    """

    def __init__(self):
        """Initialize the stub generator."""
        self.placeholder_styles: Dict[str, Callable[[str], str]] = {
            'default': self._generate_default_placeholder,
            'custom': self._generate_custom_placeholder,
            'detailed': self._generate_detailed_placeholder
        }

    def generate_stub_from_schema(self, schema: Dict[str, Any],
                                placeholder_style: str = 'default',
                                title: Optional[str] = None,
                                schema_file_path: Optional[str] = None) -> str:
        """
        Generate a markdown stub from a JSON schema dictionary.

        Args:
            schema: JSON schema as dictionary
            placeholder_style: Style of placeholder content ('default', 'custom', 'detailed')
            title: Custom title for the document (overrides schema title)
            schema_file_path: Optional path to schema file for reference metadata

        Returns:
            Generated markdown content as string
        """
        # Extract title
        doc_title = title or schema.get('title', DEFAULT_TITLE)

        # Check if schema has content instructions enabled
        content_instructions_enabled = schema.get('x-markitect-content-instructions-enabled', False)

        # Start building the markdown content
        lines = []

        # Add schema reference metadata if schema file path is provided
        if schema_file_path:
            lines.append(f"<!-- Generated from schema: {schema_file_path} -->")
            lines.append("")

        # Extract heading structure from schema
        headings_schema = schema.get('properties', {}).get('headings', {})
        heading_properties = headings_schema.get('properties', {})

        if not heading_properties:
            # Create a minimal document if no heading structure is defined
            lines.append(f"# {doc_title}")
            lines.append("")
            lines.append(self._get_placeholder_content(placeholder_style, "main", schema=schema))
            lines.append("")
        else:
            # Generate content based on heading structure
            lines.extend(self._generate_content_from_headings(
                heading_properties, doc_title, placeholder_style, schema=schema
            ))

        return '\n'.join(lines)

    def generate_stub_from_file(self, schema_file: Path) -> str:
        """
        Generate a markdown stub from a JSON schema file.

        Args:
            schema_file: Path to JSON schema file

        Returns:
            Generated markdown content as string

        Raises:
            FileNotFoundError: If schema file doesn't exist
            json.JSONDecodeError: If schema file contains invalid JSON
        """
        if not schema_file.exists():
            raise FileNotFoundError(f"Schema file not found: {schema_file}")

        with open(schema_file, 'r', encoding='utf-8') as f:
            schema = json.load(f)

        return self.generate_stub_from_schema(schema, schema_file_path=str(schema_file))

    def generate_stub_to_file(self, schema: Dict[str, Any],
                            output_file: Path,
                            placeholder_style: str = 'default',
                            title: Optional[str] = None,
                            schema_file_path: Optional[str] = None) -> None:
        """
        Generate a markdown stub and save it to a file.

        Args:
            schema: JSON schema as dictionary
            output_file: Path where to save the generated markdown
            placeholder_style: Style of placeholder content
            title: Custom title for the document
            schema_file_path: Optional path to schema file for reference metadata
        """
        content = self.generate_stub_from_schema(schema, placeholder_style, title, schema_file_path)

        # Ensure parent directory exists
        output_file.parent.mkdir(parents=True, exist_ok=True)

        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(content)

    def _generate_content_from_headings(self, heading_properties: Dict[str, Any],
                                      doc_title: str, placeholder_style: str, schema: Optional[Dict[str, Any]] = None) -> List[str]:
        """Generate markdown content from heading structure."""
        lines = []

        # Sort heading levels to ensure proper hierarchy
        levels = sorted([key for key in heading_properties.keys() if key.startswith(LEVEL_KEY_PREFIX)])

        # Calculate heading counts for each level
        heading_counts = self._calculate_heading_counts(levels, heading_properties)

        # Generate the content with proper hierarchy
        if 1 in heading_counts:
            # Get the heading schema for level 1
            level_1_heading_schema = heading_properties.get('level_1', {})

            # Try to extract actual H1 heading text from schema, fallback to doc_title
            h1_text = self._extract_heading_text_from_schema(level_1_heading_schema, 0) or doc_title

            # Start with H1
            lines.append(f"# {h1_text}")
            lines.append("")
            lines.append(self._get_placeholder_content(
                placeholder_style,
                "introduction",
                schema=schema,
                heading_schema=level_1_heading_schema
            ))
            lines.append("")

            # Generate H2+ headings
            for level in sorted(heading_counts.keys()):
                if level == 1:
                    continue  # Already handled

                count = heading_counts[level]
                for i in range(count):
                    heading_prefix = '#' * level

                    # Get the heading schema for this level
                    level_key = f"level_{level}"
                    heading_schema = heading_properties.get(level_key, {})

                    # Try to extract actual heading text from schema enum constraints
                    section_name = self._extract_heading_text_from_schema(heading_schema, i) or \
                                 self._generate_section_name(level, i + 1)

                    lines.append(f"{heading_prefix} {section_name}")
                    lines.append("")

                    lines.append(self._get_placeholder_content(
                        placeholder_style,
                        f"section_level_{level}",
                        schema=schema,
                        heading_schema=heading_schema
                    ))
                    lines.append("")
        else:
            # No H1, start with whatever level is available
            for level in sorted(heading_counts.keys()):
                count = heading_counts[level]
                for i in range(count):
                    heading_prefix = '#' * level

                    # Get the heading schema for this level
                    level_key = f"level_{level}"
                    heading_schema = heading_properties.get(level_key, {})

                    # Try to extract actual heading text from schema enum constraints
                    if level == min(heading_counts.keys()) and i == 0:
                        # For the first heading of the minimum level, try schema first, then doc_title
                        section_name = self._extract_heading_text_from_schema(heading_schema, i) or doc_title
                    else:
                        # For other headings, try schema first, then fallback to generic names
                        section_name = self._extract_heading_text_from_schema(heading_schema, i) or \
                                     self._generate_section_name(level, i + 1)

                    lines.append(f"{heading_prefix} {section_name}")
                    lines.append("")

                    lines.append(self._get_placeholder_content(
                        placeholder_style,
                        f"section_level_{level}",
                        schema=schema,
                        heading_schema=heading_schema
                    ))
                    lines.append("")

        return lines

    def _calculate_heading_counts(self, levels: List[str], heading_properties: Dict[str, Any]) -> Dict[int, int]:
        """Calculate the required count for each heading level."""
        heading_counts = {}
        for level_key in levels:
            level_num = int(level_key.split('_')[1])
            level_props = heading_properties[level_key]

            # Get the required count from minItems/maxItems
            min_items = level_props.get('minItems', 1)
            max_items = level_props.get('maxItems', min_items)
            count = min_items  # Use minimum required count

            heading_counts[level_num] = count
        return heading_counts

    def _generate_section_name(self, level: int, index: int) -> str:
        """Generate appropriate section names based on level and index."""
        section_names = {
            2: ['Introduction', 'Main Content', 'Conclusion', 'Summary', 'Overview'],
            3: ['Background', 'Analysis', 'Implementation', 'Results', 'Discussion'],
            4: ['Details', 'Examples', 'Notes', 'Additional Info'],
            5: ['Subsection A', 'Subsection B', 'Subsection C'],
            6: ['Item', 'Point', 'Note']
        }

        if level in section_names and index <= len(section_names[level]):
            return section_names[level][index - 1]
        else:
            return f"Section {index}"

    def _get_placeholder_content(self, style: str, section_type: str, schema: Optional[Dict[str, Any]] = None, heading_schema: Optional[Dict[str, Any]] = None) -> str:
        """Get placeholder content based on style and section type."""
        # Check if we have content instructions from schema
        if schema and heading_schema and schema.get('x-markitect-content-instructions-enabled', False):
            content_instruction = self._extract_content_instruction_from_heading_schema(heading_schema)
            if content_instruction:
                return content_instruction

        # Fall back to standard placeholder generation
        generator = self.placeholder_styles.get(style, self.placeholder_styles['default'])
        return generator(section_type)

    def _generate_default_placeholder(self, section_type: str) -> str:
        """Generate default placeholder content."""
        return f"TODO: Add content for {section_type} section."

    def _generate_custom_placeholder(self, section_type: str) -> str:
        """Generate custom style placeholder content."""
        placeholders = {
            "introduction": "Write an engaging introduction that outlines the main topic. Add your content here.",
            "main": "Add your main content here.",
            "section_level_2": "Describe the key points for this section.",
            "section_level_3": "Provide detailed information and examples.",
            "section_level_4": "Include specific details and supporting information.",
        }
        return placeholders.get(section_type, f"Content for {section_type} goes here.")

    def _generate_detailed_placeholder(self, section_type: str) -> str:
        """Generate detailed placeholder content with guidance."""
        detailed_placeholders = {
            "introduction": """<!-- Introduction Section -->
Write an engaging introduction that:
- Introduces the main topic
- Provides context and background
- Outlines what the reader will learn

TODO: Replace this placeholder with your introduction content.""",
            "main": """<!-- Main Content Section -->
This is the primary content area. Consider including:
- Key information and concepts
- Supporting details and examples
- Clear explanations and analysis

TODO: Add your main content here.""",
            "section_level_2": """<!-- Section Content -->
This section should cover:
- Main points related to the section topic
- Supporting information and details
- Examples or case studies if relevant

TODO: Add content for this section.""",
            "section_level_3": """<!-- Subsection Content -->
Provide detailed information including:
- Specific details and explanations
- Examples and illustrations
- References to related concepts

TODO: Add detailed content for this subsection.""",
        }

        return detailed_placeholders.get(
            section_type,
            f"<!-- {section_type.title()} Section -->\nTODO: Add content for {section_type}."
        )

    def _extract_content_instruction_from_heading_schema(self, heading_schema: Dict[str, Any]) -> Optional[str]:
        """
        Extract content instruction from a heading schema items definition.

        Args:
            heading_schema: The schema definition for a heading level

        Returns:
            Content instruction text if found, None otherwise
        """
        # Navigate through the schema structure to find content instructions
        # Schema structure: heading_schema -> items -> properties -> x-markitect-content-instructions -> const
        items_schema = heading_schema.get('items', {})
        if isinstance(items_schema, dict):
            properties = items_schema.get('properties', {})
            if isinstance(properties, dict):
                instruction_schema = properties.get('x-markitect-content-instructions', {})
                if isinstance(instruction_schema, dict):
                    return instruction_schema.get('const')

        return None

    def _extract_heading_text_from_schema(self, heading_schema: Dict[str, Any], index: int) -> Optional[str]:
        """
        Extract actual heading text from schema enum constraints for outline mode.

        Args:
            heading_schema: The schema definition for a heading level
            index: The index of the heading (0-based)

        Returns:
            Actual heading text if found in enum constraints, None otherwise
        """
        # Navigate through the schema structure to find enum constraints
        # Schema structure: heading_schema -> items -> properties -> content -> enum
        items_schema = heading_schema.get('items', {})
        if isinstance(items_schema, dict):
            properties = items_schema.get('properties', {})
            if isinstance(properties, dict):
                content_schema = properties.get('content', {})
                if isinstance(content_schema, dict):
                    enum_values = content_schema.get('enum', [])
                    if isinstance(enum_values, list) and 0 <= index < len(enum_values):
                        return enum_values[index]

        return None