diff --git a/markitect/cli.py b/markitect/cli.py index f7f2e507..ffa3242a 100644 --- a/markitect/cli.py +++ b/markitect/cli.py @@ -1455,8 +1455,10 @@ def ast_stats(config, file_path, format): @click.option('--mode', type=click.Choice(['outline']), help='Generation mode: outline for structure-focused schemas') @click.option('--depth', type=int, help='Maximum depth for outline mode (similar to --max-depth)') @click.option('--capture-heading-text', is_flag=True, help='Capture exact heading text as schema constraints') +@click.option('--include-content-instructions', is_flag=True, help='Include content field instructions for document generation') +@click.option('--instruction-type', type=click.Choice(['description', 'example', 'constraint', 'template']), default='description', help='Type of content instructions to generate') @pass_config -def generate_schema(config, file_path, max_depth, output, outfile, output_format, mode, depth, capture_heading_text): +def generate_schema(config, file_path, max_depth, output, outfile, output_format, mode, depth, capture_heading_text, include_content_instructions, instruction_type): """ Generate a JSON schema from a markdown file's AST structure. @@ -1475,6 +1477,11 @@ def generate_schema(config, file_path, max_depth, output, outfile, output_format markitect schema-generate --capture-heading-text document.md markitect schema-generate --mode outline --capture-heading-text --depth 2 document.md + # Content instructions for document generation guidance + markitect schema-generate --include-content-instructions document.md + markitect schema-generate --include-content-instructions --instruction-type example document.md + markitect schema-generate --mode outline --include-content-instructions --instruction-type template document.md + Modes: Default: Standard schema generation with structural analysis Outline: Structure-focused schema with heading text capture and metaschema extensions @@ -1482,6 +1489,14 @@ def generate_schema(config, file_path, max_depth, output, outfile, output_format Heading Text Capture: When --capture-heading-text is enabled, the schema will include exact heading text as enum constraints, enabling validation to enforce specific heading text requirements. + + Content Instructions: + When --include-content-instructions is enabled, the schema will include guidance fields + for document generation. Use --instruction-type to specify the type of instructions: + - description: Descriptive guidance for content authors + - example: Example-based content guidance + - constraint: Content constraint specifications + - template: Template-based content structure """ try: # Handle parameter conflicts and defaults @@ -1517,7 +1532,9 @@ def generate_schema(config, file_path, max_depth, output, outfile, output_format max_depth=final_depth, mode=mode, outline_depth=depth if mode == 'outline' else None, - capture_heading_text=capture_heading_text + capture_heading_text=capture_heading_text, + include_content_instructions=include_content_instructions, + instruction_type=instruction_type ) # Format output diff --git a/markitect/exceptions.py b/markitect/exceptions.py index 8db9525f..50039117 100644 --- a/markitect/exceptions.py +++ b/markitect/exceptions.py @@ -168,4 +168,15 @@ class InvalidSchemaError(MarkitectError): - Schema doesn't conform to JSON Schema specification - Schema file cannot be loaded or parsed """ + pass + + +class InvalidInstructionTypeError(MarkitectError): + """Errors related to invalid content instruction types. + + Raised when: + - Instruction type is not one of the supported types + - Instruction type parameter is malformed + - Instruction type conflicts with other options + """ pass \ No newline at end of file diff --git a/markitect/schema_generator.py b/markitect/schema_generator.py index 9d2afdbb..b091bc6f 100644 --- a/markitect/schema_generator.py +++ b/markitect/schema_generator.py @@ -12,7 +12,7 @@ from pathlib import Path from typing import Dict, List, Any, Optional, Set from .parser import parse_markdown_to_ast -from .exceptions import FileNotFoundError, InvalidDepthError +from .exceptions import FileNotFoundError, InvalidDepthError, InvalidInstructionTypeError class SchemaGenerator: @@ -34,7 +34,9 @@ class SchemaGenerator: max_depth: Optional[int] = None, mode: Optional[str] = None, outline_depth: Optional[int] = None, - capture_heading_text: bool = False + capture_heading_text: bool = False, + include_content_instructions: bool = False, + instruction_type: str = 'description' ) -> Dict[str, Any]: """ Generate a JSON schema from a markdown file's AST structure. @@ -45,6 +47,8 @@ class SchemaGenerator: mode: Generation mode ('outline' for structure-focused schemas) outline_depth: Depth limit for outline mode capture_heading_text: Whether to capture exact heading text as constraints + include_content_instructions: Whether to include content instruction fields + instruction_type: Type of content instructions ('description', 'example', 'constraint', 'template') Returns: JSON schema as a dictionary @@ -60,6 +64,11 @@ class SchemaGenerator: if max_depth is not None and max_depth < 1: raise InvalidDepthError(f"max_depth must be >= 1, got: {max_depth}") + # Validate instruction type + valid_instruction_types = {'description', 'example', 'constraint', 'template'} + if instruction_type not in valid_instruction_types: + raise InvalidInstructionTypeError(f"Invalid instruction type '{instruction_type}'. Must be one of: {', '.join(valid_instruction_types)}") + # Read and parse the markdown file content = file_path.read_text(encoding='utf-8') ast_tokens = parse_markdown_to_ast(content) @@ -68,7 +77,15 @@ class SchemaGenerator: structure_analysis = self._analyze_ast_structure(ast_tokens, max_depth) # Generate the JSON schema - schema = self._create_json_schema(structure_analysis, file_path.name, mode=mode, outline_depth=outline_depth, capture_heading_text=capture_heading_text) + schema = self._create_json_schema( + structure_analysis, + file_path.name, + mode=mode, + outline_depth=outline_depth, + capture_heading_text=capture_heading_text, + include_content_instructions=include_content_instructions, + instruction_type=instruction_type + ) return schema @@ -186,7 +203,9 @@ class SchemaGenerator: filename: str, mode: Optional[str] = None, outline_depth: Optional[int] = None, - capture_heading_text: bool = False + capture_heading_text: bool = False, + include_content_instructions: bool = False, + instruction_type: str = 'description' ) -> Dict[str, Any]: """ Create a JSON schema from structural analysis. @@ -197,6 +216,8 @@ class SchemaGenerator: mode: Generation mode ('outline' for structure-focused schemas) outline_depth: Depth limit for outline mode capture_heading_text: Whether to capture exact heading text as constraints + include_content_instructions: Whether to include content instruction fields + instruction_type: Type of content instructions to generate Returns: JSON schema dictionary @@ -222,6 +243,10 @@ class SchemaGenerator: if capture_heading_text: schema["x-markitect-heading-text-capture"] = True + # Add metaschema extension for content instructions + if include_content_instructions: + schema["x-markitect-content-instructions-enabled"] = True + # Add heading structure if analysis['headings']: heading_properties = {} @@ -235,16 +260,36 @@ class SchemaGenerator: else: content_property = {"type": "string"} + # Build properties for the heading item + item_properties = { + "content": content_property, + "level": {"type": "integer"}, + "position": {"type": "integer"} + } + + # Add content instruction fields if enabled + if include_content_instructions: + # Generate appropriate instruction text based on heading level + level_num = int(level_key.split('_')[1]) + section_name = f"level {level_num} heading" + instruction_text = self._generate_content_instruction(section_name, instruction_type) + + item_properties["x-markitect-content-instructions"] = { + "type": "string", + "const": instruction_text + } + + item_properties["x-markitect-instruction-type"] = { + "type": "string", + "enum": [instruction_type] + } + heading_properties[level_key] = { "type": "array", "description": f"Headings at {level_key.replace('_', ' ')}", "items": { "type": "object", - "properties": { - "content": content_property, - "level": {"type": "integer"}, - "position": {"type": "integer"} - }, + "properties": item_properties, "required": ["content", "level"] }, "minItems": len(headings), @@ -272,13 +317,33 @@ class SchemaGenerator: for element_name, (description, element_list) in structural_elements.items(): if element_list: - schema["properties"][element_name] = { + # Build base schema for the element + element_schema = { "type": "array", "description": description, "minItems": len(element_list), "maxItems": len(element_list) } + # Add content instructions for paragraphs and lists if enabled + if include_content_instructions and element_name in ["paragraphs", "lists"]: + element_schema["items"] = { + "type": "object", + "properties": { + "content": {"type": "string"}, + "x-markitect-content-instructions": { + "type": "string", + "const": self._generate_content_instruction(element_name, instruction_type) + }, + "x-markitect-instruction-type": { + "type": "string", + "enum": [instruction_type] + } + } + } + + schema["properties"][element_name] = element_schema + # Add metadata schema["properties"]["metadata"] = { "type": "object", @@ -375,4 +440,27 @@ class SchemaGenerator: elif child_type in ['em_open', 'strong_open']: result['emphasis'].append({"type": child_type}) - return result \ No newline at end of file + return result + + def _generate_content_instruction(self, heading_text: str, instruction_type: str) -> str: + """ + Generate appropriate content instruction text based on heading and instruction type. + + Args: + heading_text: The text of the heading + instruction_type: Type of instruction to generate + + Returns: + Instruction text for the content field + """ + if instruction_type == "description": + return f"Provide content for the '{heading_text}' section" + elif instruction_type == "example": + return f"Example content for the '{heading_text}' section" + elif instruction_type == "constraint": + return f"Content must be relevant to '{heading_text}'" + elif instruction_type == "template": + return f"Template content for '{heading_text}' section" + else: + # Default fallback + return f"Content for the '{heading_text}' section" \ No newline at end of file