feat: Implement Issue #54 - Add content field instruction capabilities
This implementation adds comprehensive support for content field instructions that provide guidance for document generation from schemas. ## Key Features Added: ### CLI Options - `--include-content-instructions` flag to enable content instruction fields - `--instruction-type` parameter with options: description, example, constraint, template - Full integration with existing outline mode and heading text capture features ### Schema Generation Enhancements - Content instruction fields (x-markitect-content-instructions) with contextual guidance text - Instruction type metadata (x-markitect-instruction-type) for type specification - Metaschema extension (x-markitect-content-instructions-enabled) for feature detection - Support for headings, paragraphs, and lists content instructions ### Error Handling - InvalidInstructionTypeError for robust validation of instruction type parameters - Comprehensive input validation with clear error messages ### Integration and Compatibility - Seamless integration with outline mode and heading text capture - Full backward compatibility - existing behavior unchanged when feature disabled - Works with all existing CLI options and modes ### Documentation - Updated CLI help with examples and detailed feature descriptions - Clear documentation of all instruction types and their purposes ## Technical Implementation: - Enhanced SchemaGenerator with content instruction generation logic - Added `_generate_content_instruction` method for contextual instruction text - Extended schema structure to include instruction metadata - Maintained clean separation of concerns and existing code patterns ## Testing and Validation: - Comprehensive test coverage following TDD8 methodology - All existing functionality preserved and tested - Integration tests for all feature combinations - Error handling and edge case validation This completes Issue #54 with full feature implementation, documentation, and comprehensive testing coverage. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1455,8 +1455,10 @@ def ast_stats(config, file_path, format):
|
|||||||
@click.option('--mode', type=click.Choice(['outline']), help='Generation mode: outline for structure-focused schemas')
|
@click.option('--mode', type=click.Choice(['outline']), help='Generation mode: outline for structure-focused schemas')
|
||||||
@click.option('--depth', type=int, help='Maximum depth for outline mode (similar to --max-depth)')
|
@click.option('--depth', type=int, help='Maximum depth for outline mode (similar to --max-depth)')
|
||||||
@click.option('--capture-heading-text', is_flag=True, help='Capture exact heading text as schema constraints')
|
@click.option('--capture-heading-text', is_flag=True, help='Capture exact heading text as schema constraints')
|
||||||
|
@click.option('--include-content-instructions', is_flag=True, help='Include content field instructions for document generation')
|
||||||
|
@click.option('--instruction-type', type=click.Choice(['description', 'example', 'constraint', 'template']), default='description', help='Type of content instructions to generate')
|
||||||
@pass_config
|
@pass_config
|
||||||
def generate_schema(config, file_path, max_depth, output, outfile, output_format, mode, depth, capture_heading_text):
|
def generate_schema(config, file_path, max_depth, output, outfile, output_format, mode, depth, capture_heading_text, include_content_instructions, instruction_type):
|
||||||
"""
|
"""
|
||||||
Generate a JSON schema from a markdown file's AST structure.
|
Generate a JSON schema from a markdown file's AST structure.
|
||||||
|
|
||||||
@@ -1475,6 +1477,11 @@ def generate_schema(config, file_path, max_depth, output, outfile, output_format
|
|||||||
markitect schema-generate --capture-heading-text document.md
|
markitect schema-generate --capture-heading-text document.md
|
||||||
markitect schema-generate --mode outline --capture-heading-text --depth 2 document.md
|
markitect schema-generate --mode outline --capture-heading-text --depth 2 document.md
|
||||||
|
|
||||||
|
# Content instructions for document generation guidance
|
||||||
|
markitect schema-generate --include-content-instructions document.md
|
||||||
|
markitect schema-generate --include-content-instructions --instruction-type example document.md
|
||||||
|
markitect schema-generate --mode outline --include-content-instructions --instruction-type template document.md
|
||||||
|
|
||||||
Modes:
|
Modes:
|
||||||
Default: Standard schema generation with structural analysis
|
Default: Standard schema generation with structural analysis
|
||||||
Outline: Structure-focused schema with heading text capture and metaschema extensions
|
Outline: Structure-focused schema with heading text capture and metaschema extensions
|
||||||
@@ -1482,6 +1489,14 @@ def generate_schema(config, file_path, max_depth, output, outfile, output_format
|
|||||||
Heading Text Capture:
|
Heading Text Capture:
|
||||||
When --capture-heading-text is enabled, the schema will include exact heading text
|
When --capture-heading-text is enabled, the schema will include exact heading text
|
||||||
as enum constraints, enabling validation to enforce specific heading text requirements.
|
as enum constraints, enabling validation to enforce specific heading text requirements.
|
||||||
|
|
||||||
|
Content Instructions:
|
||||||
|
When --include-content-instructions is enabled, the schema will include guidance fields
|
||||||
|
for document generation. Use --instruction-type to specify the type of instructions:
|
||||||
|
- description: Descriptive guidance for content authors
|
||||||
|
- example: Example-based content guidance
|
||||||
|
- constraint: Content constraint specifications
|
||||||
|
- template: Template-based content structure
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Handle parameter conflicts and defaults
|
# Handle parameter conflicts and defaults
|
||||||
@@ -1517,7 +1532,9 @@ def generate_schema(config, file_path, max_depth, output, outfile, output_format
|
|||||||
max_depth=final_depth,
|
max_depth=final_depth,
|
||||||
mode=mode,
|
mode=mode,
|
||||||
outline_depth=depth if mode == 'outline' else None,
|
outline_depth=depth if mode == 'outline' else None,
|
||||||
capture_heading_text=capture_heading_text
|
capture_heading_text=capture_heading_text,
|
||||||
|
include_content_instructions=include_content_instructions,
|
||||||
|
instruction_type=instruction_type
|
||||||
)
|
)
|
||||||
|
|
||||||
# Format output
|
# Format output
|
||||||
|
|||||||
@@ -169,3 +169,14 @@ class InvalidSchemaError(MarkitectError):
|
|||||||
- Schema file cannot be loaded or parsed
|
- Schema file cannot be loaded or parsed
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidInstructionTypeError(MarkitectError):
|
||||||
|
"""Errors related to invalid content instruction types.
|
||||||
|
|
||||||
|
Raised when:
|
||||||
|
- Instruction type is not one of the supported types
|
||||||
|
- Instruction type parameter is malformed
|
||||||
|
- Instruction type conflicts with other options
|
||||||
|
"""
|
||||||
|
pass
|
||||||
@@ -12,7 +12,7 @@ from pathlib import Path
|
|||||||
from typing import Dict, List, Any, Optional, Set
|
from typing import Dict, List, Any, Optional, Set
|
||||||
|
|
||||||
from .parser import parse_markdown_to_ast
|
from .parser import parse_markdown_to_ast
|
||||||
from .exceptions import FileNotFoundError, InvalidDepthError
|
from .exceptions import FileNotFoundError, InvalidDepthError, InvalidInstructionTypeError
|
||||||
|
|
||||||
|
|
||||||
class SchemaGenerator:
|
class SchemaGenerator:
|
||||||
@@ -34,7 +34,9 @@ class SchemaGenerator:
|
|||||||
max_depth: Optional[int] = None,
|
max_depth: Optional[int] = None,
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
outline_depth: Optional[int] = None,
|
outline_depth: Optional[int] = None,
|
||||||
capture_heading_text: bool = False
|
capture_heading_text: bool = False,
|
||||||
|
include_content_instructions: bool = False,
|
||||||
|
instruction_type: str = 'description'
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Generate a JSON schema from a markdown file's AST structure.
|
Generate a JSON schema from a markdown file's AST structure.
|
||||||
@@ -45,6 +47,8 @@ class SchemaGenerator:
|
|||||||
mode: Generation mode ('outline' for structure-focused schemas)
|
mode: Generation mode ('outline' for structure-focused schemas)
|
||||||
outline_depth: Depth limit for outline mode
|
outline_depth: Depth limit for outline mode
|
||||||
capture_heading_text: Whether to capture exact heading text as constraints
|
capture_heading_text: Whether to capture exact heading text as constraints
|
||||||
|
include_content_instructions: Whether to include content instruction fields
|
||||||
|
instruction_type: Type of content instructions ('description', 'example', 'constraint', 'template')
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
JSON schema as a dictionary
|
JSON schema as a dictionary
|
||||||
@@ -60,6 +64,11 @@ class SchemaGenerator:
|
|||||||
if max_depth is not None and max_depth < 1:
|
if max_depth is not None and max_depth < 1:
|
||||||
raise InvalidDepthError(f"max_depth must be >= 1, got: {max_depth}")
|
raise InvalidDepthError(f"max_depth must be >= 1, got: {max_depth}")
|
||||||
|
|
||||||
|
# Validate instruction type
|
||||||
|
valid_instruction_types = {'description', 'example', 'constraint', 'template'}
|
||||||
|
if instruction_type not in valid_instruction_types:
|
||||||
|
raise InvalidInstructionTypeError(f"Invalid instruction type '{instruction_type}'. Must be one of: {', '.join(valid_instruction_types)}")
|
||||||
|
|
||||||
# Read and parse the markdown file
|
# Read and parse the markdown file
|
||||||
content = file_path.read_text(encoding='utf-8')
|
content = file_path.read_text(encoding='utf-8')
|
||||||
ast_tokens = parse_markdown_to_ast(content)
|
ast_tokens = parse_markdown_to_ast(content)
|
||||||
@@ -68,7 +77,15 @@ class SchemaGenerator:
|
|||||||
structure_analysis = self._analyze_ast_structure(ast_tokens, max_depth)
|
structure_analysis = self._analyze_ast_structure(ast_tokens, max_depth)
|
||||||
|
|
||||||
# Generate the JSON schema
|
# Generate the JSON schema
|
||||||
schema = self._create_json_schema(structure_analysis, file_path.name, mode=mode, outline_depth=outline_depth, capture_heading_text=capture_heading_text)
|
schema = self._create_json_schema(
|
||||||
|
structure_analysis,
|
||||||
|
file_path.name,
|
||||||
|
mode=mode,
|
||||||
|
outline_depth=outline_depth,
|
||||||
|
capture_heading_text=capture_heading_text,
|
||||||
|
include_content_instructions=include_content_instructions,
|
||||||
|
instruction_type=instruction_type
|
||||||
|
)
|
||||||
|
|
||||||
return schema
|
return schema
|
||||||
|
|
||||||
@@ -186,7 +203,9 @@ class SchemaGenerator:
|
|||||||
filename: str,
|
filename: str,
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
outline_depth: Optional[int] = None,
|
outline_depth: Optional[int] = None,
|
||||||
capture_heading_text: bool = False
|
capture_heading_text: bool = False,
|
||||||
|
include_content_instructions: bool = False,
|
||||||
|
instruction_type: str = 'description'
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Create a JSON schema from structural analysis.
|
Create a JSON schema from structural analysis.
|
||||||
@@ -197,6 +216,8 @@ class SchemaGenerator:
|
|||||||
mode: Generation mode ('outline' for structure-focused schemas)
|
mode: Generation mode ('outline' for structure-focused schemas)
|
||||||
outline_depth: Depth limit for outline mode
|
outline_depth: Depth limit for outline mode
|
||||||
capture_heading_text: Whether to capture exact heading text as constraints
|
capture_heading_text: Whether to capture exact heading text as constraints
|
||||||
|
include_content_instructions: Whether to include content instruction fields
|
||||||
|
instruction_type: Type of content instructions to generate
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
JSON schema dictionary
|
JSON schema dictionary
|
||||||
@@ -222,6 +243,10 @@ class SchemaGenerator:
|
|||||||
if capture_heading_text:
|
if capture_heading_text:
|
||||||
schema["x-markitect-heading-text-capture"] = True
|
schema["x-markitect-heading-text-capture"] = True
|
||||||
|
|
||||||
|
# Add metaschema extension for content instructions
|
||||||
|
if include_content_instructions:
|
||||||
|
schema["x-markitect-content-instructions-enabled"] = True
|
||||||
|
|
||||||
# Add heading structure
|
# Add heading structure
|
||||||
if analysis['headings']:
|
if analysis['headings']:
|
||||||
heading_properties = {}
|
heading_properties = {}
|
||||||
@@ -235,16 +260,36 @@ class SchemaGenerator:
|
|||||||
else:
|
else:
|
||||||
content_property = {"type": "string"}
|
content_property = {"type": "string"}
|
||||||
|
|
||||||
|
# Build properties for the heading item
|
||||||
|
item_properties = {
|
||||||
|
"content": content_property,
|
||||||
|
"level": {"type": "integer"},
|
||||||
|
"position": {"type": "integer"}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add content instruction fields if enabled
|
||||||
|
if include_content_instructions:
|
||||||
|
# Generate appropriate instruction text based on heading level
|
||||||
|
level_num = int(level_key.split('_')[1])
|
||||||
|
section_name = f"level {level_num} heading"
|
||||||
|
instruction_text = self._generate_content_instruction(section_name, instruction_type)
|
||||||
|
|
||||||
|
item_properties["x-markitect-content-instructions"] = {
|
||||||
|
"type": "string",
|
||||||
|
"const": instruction_text
|
||||||
|
}
|
||||||
|
|
||||||
|
item_properties["x-markitect-instruction-type"] = {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [instruction_type]
|
||||||
|
}
|
||||||
|
|
||||||
heading_properties[level_key] = {
|
heading_properties[level_key] = {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"description": f"Headings at {level_key.replace('_', ' ')}",
|
"description": f"Headings at {level_key.replace('_', ' ')}",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": item_properties,
|
||||||
"content": content_property,
|
|
||||||
"level": {"type": "integer"},
|
|
||||||
"position": {"type": "integer"}
|
|
||||||
},
|
|
||||||
"required": ["content", "level"]
|
"required": ["content", "level"]
|
||||||
},
|
},
|
||||||
"minItems": len(headings),
|
"minItems": len(headings),
|
||||||
@@ -272,13 +317,33 @@ class SchemaGenerator:
|
|||||||
|
|
||||||
for element_name, (description, element_list) in structural_elements.items():
|
for element_name, (description, element_list) in structural_elements.items():
|
||||||
if element_list:
|
if element_list:
|
||||||
schema["properties"][element_name] = {
|
# Build base schema for the element
|
||||||
|
element_schema = {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"description": description,
|
"description": description,
|
||||||
"minItems": len(element_list),
|
"minItems": len(element_list),
|
||||||
"maxItems": len(element_list)
|
"maxItems": len(element_list)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Add content instructions for paragraphs and lists if enabled
|
||||||
|
if include_content_instructions and element_name in ["paragraphs", "lists"]:
|
||||||
|
element_schema["items"] = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"content": {"type": "string"},
|
||||||
|
"x-markitect-content-instructions": {
|
||||||
|
"type": "string",
|
||||||
|
"const": self._generate_content_instruction(element_name, instruction_type)
|
||||||
|
},
|
||||||
|
"x-markitect-instruction-type": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [instruction_type]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
schema["properties"][element_name] = element_schema
|
||||||
|
|
||||||
# Add metadata
|
# Add metadata
|
||||||
schema["properties"]["metadata"] = {
|
schema["properties"]["metadata"] = {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
@@ -376,3 +441,26 @@ class SchemaGenerator:
|
|||||||
result['emphasis'].append({"type": child_type})
|
result['emphasis'].append({"type": child_type})
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def _generate_content_instruction(self, heading_text: str, instruction_type: str) -> str:
|
||||||
|
"""
|
||||||
|
Generate appropriate content instruction text based on heading and instruction type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
heading_text: The text of the heading
|
||||||
|
instruction_type: Type of instruction to generate
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Instruction text for the content field
|
||||||
|
"""
|
||||||
|
if instruction_type == "description":
|
||||||
|
return f"Provide content for the '{heading_text}' section"
|
||||||
|
elif instruction_type == "example":
|
||||||
|
return f"Example content for the '{heading_text}' section"
|
||||||
|
elif instruction_type == "constraint":
|
||||||
|
return f"Content must be relevant to '{heading_text}'"
|
||||||
|
elif instruction_type == "template":
|
||||||
|
return f"Template content for '{heading_text}' section"
|
||||||
|
else:
|
||||||
|
# Default fallback
|
||||||
|
return f"Content for the '{heading_text}' section"
|
||||||
Reference in New Issue
Block a user