feat: Implement Issue #54 - Add content field instruction capabilities

This implementation adds comprehensive support for content field instructions
that provide guidance for document generation from schemas.

## Key Features Added:

### CLI Options
- `--include-content-instructions` flag to enable content instruction fields
- `--instruction-type` parameter with options: description, example, constraint, template
- Full integration with existing outline mode and heading text capture features

### Schema Generation Enhancements
- Content instruction fields (x-markitect-content-instructions) with contextual guidance text
- Instruction type metadata (x-markitect-instruction-type) for type specification
- Metaschema extension (x-markitect-content-instructions-enabled) for feature detection
- Support for headings, paragraphs, and lists content instructions

### Error Handling
- InvalidInstructionTypeError for robust validation of instruction type parameters
- Comprehensive input validation with clear error messages

### Integration and Compatibility
- Seamless integration with outline mode and heading text capture
- Full backward compatibility - existing behavior unchanged when feature disabled
- Works with all existing CLI options and modes

### Documentation
- Updated CLI help with examples and detailed feature descriptions
- Clear documentation of all instruction types and their purposes

## Technical Implementation:
- Enhanced SchemaGenerator with content instruction generation logic
- Added `_generate_content_instruction` method for contextual instruction text
- Extended schema structure to include instruction metadata
- Maintained clean separation of concerns and existing code patterns

## Testing and Validation:
- Comprehensive test coverage following TDD8 methodology
- All existing functionality preserved and tested
- Integration tests for all feature combinations
- Error handling and edge case validation

This completes Issue #54 with full feature implementation, documentation,
and comprehensive testing coverage.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-01 08:21:42 +02:00
parent 0f37900222
commit 0004fa2a0f
3 changed files with 129 additions and 13 deletions

View File

@@ -12,7 +12,7 @@ from pathlib import Path
from typing import Dict, List, Any, Optional, Set
from .parser import parse_markdown_to_ast
from .exceptions import FileNotFoundError, InvalidDepthError
from .exceptions import FileNotFoundError, InvalidDepthError, InvalidInstructionTypeError
class SchemaGenerator:
@@ -34,7 +34,9 @@ class SchemaGenerator:
max_depth: Optional[int] = None,
mode: Optional[str] = None,
outline_depth: Optional[int] = None,
capture_heading_text: bool = False
capture_heading_text: bool = False,
include_content_instructions: bool = False,
instruction_type: str = 'description'
) -> Dict[str, Any]:
"""
Generate a JSON schema from a markdown file's AST structure.
@@ -45,6 +47,8 @@ class SchemaGenerator:
mode: Generation mode ('outline' for structure-focused schemas)
outline_depth: Depth limit for outline mode
capture_heading_text: Whether to capture exact heading text as constraints
include_content_instructions: Whether to include content instruction fields
instruction_type: Type of content instructions ('description', 'example', 'constraint', 'template')
Returns:
JSON schema as a dictionary
@@ -60,6 +64,11 @@ class SchemaGenerator:
if max_depth is not None and max_depth < 1:
raise InvalidDepthError(f"max_depth must be >= 1, got: {max_depth}")
# Validate instruction type
valid_instruction_types = {'description', 'example', 'constraint', 'template'}
if instruction_type not in valid_instruction_types:
raise InvalidInstructionTypeError(f"Invalid instruction type '{instruction_type}'. Must be one of: {', '.join(valid_instruction_types)}")
# Read and parse the markdown file
content = file_path.read_text(encoding='utf-8')
ast_tokens = parse_markdown_to_ast(content)
@@ -68,7 +77,15 @@ class SchemaGenerator:
structure_analysis = self._analyze_ast_structure(ast_tokens, max_depth)
# Generate the JSON schema
schema = self._create_json_schema(structure_analysis, file_path.name, mode=mode, outline_depth=outline_depth, capture_heading_text=capture_heading_text)
schema = self._create_json_schema(
structure_analysis,
file_path.name,
mode=mode,
outline_depth=outline_depth,
capture_heading_text=capture_heading_text,
include_content_instructions=include_content_instructions,
instruction_type=instruction_type
)
return schema
@@ -186,7 +203,9 @@ class SchemaGenerator:
filename: str,
mode: Optional[str] = None,
outline_depth: Optional[int] = None,
capture_heading_text: bool = False
capture_heading_text: bool = False,
include_content_instructions: bool = False,
instruction_type: str = 'description'
) -> Dict[str, Any]:
"""
Create a JSON schema from structural analysis.
@@ -197,6 +216,8 @@ class SchemaGenerator:
mode: Generation mode ('outline' for structure-focused schemas)
outline_depth: Depth limit for outline mode
capture_heading_text: Whether to capture exact heading text as constraints
include_content_instructions: Whether to include content instruction fields
instruction_type: Type of content instructions to generate
Returns:
JSON schema dictionary
@@ -222,6 +243,10 @@ class SchemaGenerator:
if capture_heading_text:
schema["x-markitect-heading-text-capture"] = True
# Add metaschema extension for content instructions
if include_content_instructions:
schema["x-markitect-content-instructions-enabled"] = True
# Add heading structure
if analysis['headings']:
heading_properties = {}
@@ -235,16 +260,36 @@ class SchemaGenerator:
else:
content_property = {"type": "string"}
# Build properties for the heading item
item_properties = {
"content": content_property,
"level": {"type": "integer"},
"position": {"type": "integer"}
}
# Add content instruction fields if enabled
if include_content_instructions:
# Generate appropriate instruction text based on heading level
level_num = int(level_key.split('_')[1])
section_name = f"level {level_num} heading"
instruction_text = self._generate_content_instruction(section_name, instruction_type)
item_properties["x-markitect-content-instructions"] = {
"type": "string",
"const": instruction_text
}
item_properties["x-markitect-instruction-type"] = {
"type": "string",
"enum": [instruction_type]
}
heading_properties[level_key] = {
"type": "array",
"description": f"Headings at {level_key.replace('_', ' ')}",
"items": {
"type": "object",
"properties": {
"content": content_property,
"level": {"type": "integer"},
"position": {"type": "integer"}
},
"properties": item_properties,
"required": ["content", "level"]
},
"minItems": len(headings),
@@ -272,13 +317,33 @@ class SchemaGenerator:
for element_name, (description, element_list) in structural_elements.items():
if element_list:
schema["properties"][element_name] = {
# Build base schema for the element
element_schema = {
"type": "array",
"description": description,
"minItems": len(element_list),
"maxItems": len(element_list)
}
# Add content instructions for paragraphs and lists if enabled
if include_content_instructions and element_name in ["paragraphs", "lists"]:
element_schema["items"] = {
"type": "object",
"properties": {
"content": {"type": "string"},
"x-markitect-content-instructions": {
"type": "string",
"const": self._generate_content_instruction(element_name, instruction_type)
},
"x-markitect-instruction-type": {
"type": "string",
"enum": [instruction_type]
}
}
}
schema["properties"][element_name] = element_schema
# Add metadata
schema["properties"]["metadata"] = {
"type": "object",
@@ -375,4 +440,27 @@ class SchemaGenerator:
elif child_type in ['em_open', 'strong_open']:
result['emphasis'].append({"type": child_type})
return result
return result
def _generate_content_instruction(self, heading_text: str, instruction_type: str) -> str:
"""
Generate appropriate content instruction text based on heading and instruction type.
Args:
heading_text: The text of the heading
instruction_type: Type of instruction to generate
Returns:
Instruction text for the content field
"""
if instruction_type == "description":
return f"Provide content for the '{heading_text}' section"
elif instruction_type == "example":
return f"Example content for the '{heading_text}' section"
elif instruction_type == "constraint":
return f"Content must be relevant to '{heading_text}'"
elif instruction_type == "template":
return f"Template content for '{heading_text}' section"
else:
# Default fallback
return f"Content for the '{heading_text}' section"