diff --git a/docs/specifications/schema-extensions-spec.md b/docs/specifications/schema-extensions-spec.md new file mode 100644 index 00000000..e2ac5174 --- /dev/null +++ b/docs/specifications/schema-extensions-spec.md @@ -0,0 +1,662 @@ +# MarkiTect Schema Extensions Specification v1.0 + +## Status: Draft - Phase 1 Implementation + +## Overview + +This specification defines MarkiTect-specific extensions to JSON Schema (draft-07) for markdown document validation with content control, section classification, and flexible structural constraints. + +## Design Principles + +1. **Backward Compatibility**: Existing schemas without extensions continue to work +2. **Namespace Isolation**: All extensions prefixed with `x-markitect-` +3. **Progressive Enhancement**: Extensions add capabilities without breaking standard JSON Schema +4. **Clear Semantics**: Each extension has well-defined validation behavior +5. **Metaschema Validation**: All extensions validated by MarkiTect metaschema + +--- + +## Extension: `x-markitect-sections` + +### Purpose + +Define document sections with classification levels (required, recommended, optional, discouraged, improper) and content control specifications. + +### Schema Location + +Applied at the **root level** of the schema or within **properties** that represent document sections. + +### Format + +```json +{ + "x-markitect-sections": { + "SECTION_NAME": { + "classification": "required|recommended|optional|discouraged|improper", + "heading_level": 1|2|3|4|5|6, + "position": "after_title|before_section_name|after_section_name|anywhere", + "content_instruction": "string", + "min_paragraphs": integer, + "max_paragraphs": integer, + "min_code_blocks": integer, + "max_code_blocks": integer, + "min_lists": integer, + "max_lists": integer, + "warning_if_missing": "string", + "error_message": "string", + "alternatives": ["SECTION_NAME_1", "SECTION_NAME_2"] + } + } +} +``` + +### Property Definitions + +#### `classification` (required) + +Classification level determining validation behavior: + +- **`required`**: Section MUST be present. Validation fails if missing. +- **`recommended`**: Section SHOULD be present. Warning if missing, but validation succeeds. +- **`optional`**: Section MAY be present. No validation impact either way. +- **`discouraged`**: Section SHOULD NOT be present. Warning if present, but validation succeeds. +- **`improper`**: Section MUST NOT be present. Validation fails if present. + +**Type**: String enum +**Required**: Yes +**Values**: `["required", "recommended", "optional", "discouraged", "improper"]` + +#### `heading_level` (optional) + +The heading level (H1-H6) for this section. + +**Type**: Integer +**Range**: 1-6 +**Default**: 2 (for standard sections) + +#### `position` (optional) + +Where this section should appear relative to other sections. + +**Type**: String enum +**Values**: +- `"after_title"` - Immediately after document title (H1) +- `"before_section_name"` - Before another named section +- `"after_section_name"` - After another named section +- `"anywhere"` - No position constraint (default) + +**Default**: `"anywhere"` + +#### `content_instruction` (optional) + +Human-readable instruction describing what content belongs in this section. + +**Type**: String +**Usage**: Displayed in validation warnings, generated templates, and documentation + +**Example**: +```json +"content_instruction": "Brief command syntax showing all options and arguments" +``` + +#### Content Constraints (optional) + +Minimum and maximum counts for content elements within the section: + +- **`min_paragraphs`**: Minimum paragraph count (integer ≥ 0) +- **`max_paragraphs`**: Maximum paragraph count (integer ≥ min_paragraphs) +- **`min_code_blocks`**: Minimum code block count (integer ≥ 0) +- **`max_code_blocks`**: Maximum code block count (integer ≥ min_code_blocks) +- **`min_lists`**: Minimum list count (integer ≥ 0) +- **`max_lists`**: Maximum list count (integer ≥ max_lists) + +**Type**: Integer +**Default**: No constraint if omitted + +#### `warning_if_missing` (optional) + +Custom warning message when a recommended section is missing. + +**Type**: String +**Applies to**: `classification: "recommended"` only + +**Example**: +```json +"warning_if_missing": "Examples greatly improve documentation usability" +``` + +#### `error_message` (optional) + +Custom error message when validation fails. + +**Type**: String +**Applies to**: `classification: "required"` or `"improper"` + +**Example**: +```json +"error_message": "Internal notes must not appear in published documentation" +``` + +#### `alternatives` (optional) + +Array of alternative section names that satisfy the requirement. + +**Type**: Array of strings +**Usage**: If any alternative is present, requirement is satisfied + +**Example**: +```json +{ + "classification": "required", + "alternatives": ["EXAMPLES", "USAGE", "TUTORIAL"] +} +``` + +### Example: Manpage Schema with Sections + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Unix Manpage Schema", + "x-markitect-sections": { + "SYNOPSIS": { + "classification": "required", + "heading_level": 2, + "position": "after_title", + "content_instruction": "Brief command syntax with options and arguments", + "min_paragraphs": 1, + "max_paragraphs": 5, + "min_code_blocks": 0, + "max_code_blocks": 3, + "error_message": "SYNOPSIS section is mandatory for all manpages" + }, + "DESCRIPTION": { + "classification": "required", + "heading_level": 2, + "position": "after_section_name", + "content_instruction": "Detailed explanation of what the command does", + "min_paragraphs": 2, + "error_message": "DESCRIPTION section is mandatory for all manpages" + }, + "EXAMPLES": { + "classification": "recommended", + "heading_level": 2, + "content_instruction": "Practical usage examples with explanations", + "min_code_blocks": 3, + "warning_if_missing": "Examples greatly improve manpage usability" + }, + "SEE ALSO": { + "classification": "recommended", + "heading_level": 2, + "content_instruction": "Related commands and documentation references", + "warning_if_missing": "Cross-references help users discover related functionality" + }, + "BUGS": { + "classification": "optional", + "heading_level": 2, + "content_instruction": "Known issues and bug reporting information" + }, + "DEPRECATED": { + "classification": "discouraged", + "heading_level": 2, + "warning_if_missing": "Consider moving deprecated content to historical documentation" + }, + "INTERNAL_NOTES": { + "classification": "improper", + "heading_level": 2, + "error_message": "Internal notes must not appear in published manpages" + } + } +} +``` + +### Validation Behavior + +#### Required Sections + +```json +"SYNOPSIS": {"classification": "required"} +``` + +**Validation**: +- Section missing → **ERROR** → `is_valid = False` +- Section present → Continue validation +- Custom `error_message` used if provided + +#### Recommended Sections + +```json +"EXAMPLES": {"classification": "recommended"} +``` + +**Validation**: +- Section missing → **WARNING** → `is_valid = True` (with warnings) +- Section present → Continue validation +- Custom `warning_if_missing` used if provided + +#### Optional Sections + +```json +"BUGS": {"classification": "optional"} +``` + +**Validation**: +- Section missing → No impact +- Section present → Continue validation +- No messages generated + +#### Discouraged Sections + +```json +"DEPRECATED": {"classification": "discouraged"} +``` + +**Validation**: +- Section missing → No impact +- Section present → **WARNING** → `is_valid = True` (with warnings) +- Custom warning message used if provided + +#### Improper Sections + +```json +"INTERNAL_NOTES": {"classification": "improper"} +``` + +**Validation**: +- Section missing → No impact +- Section present → **ERROR** → `is_valid = False` +- Custom `error_message` used if provided + +--- + +## Extension: `x-markitect-content-control` + +### Purpose + +Define content validation rules for document sections including pattern matching, quality metrics, and semantic constraints. + +### Schema Location + +Applied at **root level** or within specific **section properties**. + +### Format + +```json +{ + "x-markitect-content-control": { + "section_name": { + "required_patterns": ["regex_pattern_1", "regex_pattern_2"], + "discouraged_patterns": ["regex_pattern_1"], + "forbidden_patterns": ["regex_pattern_1"], + "content_quality": { + "min_words": integer, + "max_words": integer, + "readability_target": "technical|general|simple|advanced", + "min_sentences": integer, + "max_sentences": integer + }, + "content_instructions": ["instruction_1", "instruction_2"], + "link_validation": { + "check_internal": boolean, + "check_external": boolean, + "allow_fragments": boolean + } + } + } +} +``` + +### Property Definitions + +#### `required_patterns` (optional) + +Array of regex patterns that MUST appear in section content. + +**Type**: Array of strings (valid regex patterns) +**Validation**: ERROR if any pattern missing + +**Example**: +```json +"required_patterns": [ + "\\*\\*[a-z-]+\\*\\*", // Bold command name + "\\[.*\\]" // Options in brackets +] +``` + +#### `discouraged_patterns` (optional) + +Array of regex patterns that SHOULD NOT appear in content. + +**Type**: Array of strings (valid regex patterns) +**Validation**: WARNING if any pattern found + +**Example**: +```json +"discouraged_patterns": [ + "TODO", + "FIXME", + "\\bWIP\\b" +] +``` + +#### `forbidden_patterns` (optional) + +Array of regex patterns that MUST NOT appear in content. + +**Type**: Array of strings (valid regex patterns) +**Validation**: ERROR if any pattern found + +**Example**: +```json +"forbidden_patterns": [ + "password\\s*=\\s*[\"'].*[\"']", // Hard-coded passwords + "api[_-]?key\\s*=\\s*[\"'].*[\"']" // Hard-coded API keys +] +``` + +#### `content_quality` (optional) + +Quality metrics for section content: + +**Sub-properties**: +- **`min_words`**: Minimum word count (integer ≥ 0) +- **`max_words`**: Maximum word count (integer ≥ min_words) +- **`readability_target`**: Target readability level (enum) + - `"simple"` - Elementary school level + - `"general"` - General audience + - `"technical"` - Technical audience + - `"advanced"` - Expert/academic level +- **`min_sentences`**: Minimum sentence count (integer ≥ 0) +- **`max_sentences`**: Maximum sentence count (integer ≥ min_sentences) + +**Example**: +```json +"content_quality": { + "min_words": 50, + "max_words": 300, + "readability_target": "technical", + "min_sentences": 3 +} +``` + +#### `content_instructions` (optional) + +Array of human-readable instructions for content creation. + +**Type**: Array of strings +**Usage**: Displayed in templates, validation reports, and documentation + +**Example**: +```json +"content_instructions": [ + "Show command name in bold", + "Include all major options", + "Use italic for arguments and placeholders", + "Keep syntax examples concise (1-3 lines)" +] +``` + +#### `link_validation` (optional) + +Link checking configuration: + +**Sub-properties**: +- **`check_internal`**: Validate internal document links (boolean) +- **`check_external`**: Validate external URLs (boolean) +- **`allow_fragments`**: Allow fragment-only links like `#section` (boolean) + +**Default**: All false (no link validation) + +**Example**: +```json +"link_validation": { + "check_internal": true, + "check_external": false, + "allow_fragments": true +} +``` + +### Example: Content Control for API Documentation + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "API Documentation Schema", + "x-markitect-content-control": { + "synopsis": { + "required_patterns": [ + "\\*\\*[A-Z]+\\*\\*", // HTTP method in bold + "`/api/.*`" // Endpoint path in code + ], + "content_quality": { + "min_words": 10, + "max_words": 100, + "readability_target": "technical" + }, + "content_instructions": [ + "Start with HTTP method in bold (e.g., **GET**)", + "Show endpoint path in code format", + "Include brief one-line description" + ] + }, + "request_parameters": { + "required_patterns": [ + "\\*\\*[a-z_]+\\*\\*.*\\*[A-Za-z]+\\*" // Bold param name with italic type + ], + "content_instructions": [ + "Use bold for parameter names", + "Use italic for parameter types", + "Include description for each parameter", + "Mark required parameters clearly" + ] + }, + "description": { + "discouraged_patterns": [ + "TODO", + "FIXME", + "TBD" + ], + "forbidden_patterns": [ + "password\\s*=", + "secret\\s*=", + "token\\s*=" + ], + "content_quality": { + "min_words": 50, + "max_words": 500, + "readability_target": "technical", + "min_sentences": 3 + }, + "link_validation": { + "check_internal": true, + "check_external": true, + "allow_fragments": true + } + } + } +} +``` + +--- + +## Validation Result Structure + +### Enhanced ValidationResult Class + +```python +class ValidationResult: + """Result of schema validation with classification support.""" + + status: Literal["valid", "valid_with_warnings", "invalid"] + errors: List[ValidationError] # Required/improper violations + warnings: List[ValidationWarning] # Recommended/discouraged violations + suggestions: List[str] # Optional improvements + quality_metrics: Dict[str, Any] # Content quality scores +``` + +### Validation Status Values + +- **`"valid"`**: No errors, no warnings. Document fully conforms. +- **`"valid_with_warnings"`**: No errors, but has warnings. Document acceptable but improvable. +- **`"invalid"`**: Has errors. Document does not conform to schema. + +### Error Types + +```python +class ValidationErrorType(Enum): + MISSING_REQUIRED_SECTION = "missing_required_section" + IMPROPER_SECTION_PRESENT = "improper_section_present" + CONTENT_PATTERN_MISSING = "content_pattern_missing" + CONTENT_PATTERN_FORBIDDEN = "content_pattern_forbidden" + CONTENT_TOO_SHORT = "content_too_short" + CONTENT_TOO_LONG = "content_too_long" + INVALID_LINK = "invalid_link" + STRUCTURE_MISMATCH = "structure_mismatch" +``` + +### Warning Types + +```python +class ValidationWarningType(Enum): + MISSING_RECOMMENDED_SECTION = "missing_recommended_section" + DISCOURAGED_SECTION_PRESENT = "discouraged_section_present" + CONTENT_PATTERN_DISCOURAGED = "content_pattern_discouraged" + CONTENT_QUALITY_BELOW_TARGET = "content_quality_below_target" + READABILITY_MISMATCH = "readability_mismatch" +``` + +--- + +## Metaschema Validation + +### Extension Validation Rules + +The MarkiTect metaschema validates these extensions: + +```json +{ + "x-markitect-sections": { + "type": "object", + "patternProperties": { + "^[A-Z][A-Z0-9_ ]*$": { + "type": "object", + "properties": { + "classification": { + "type": "string", + "enum": ["required", "recommended", "optional", "discouraged", "improper"] + }, + "heading_level": { + "type": "integer", + "minimum": 1, + "maximum": 6 + }, + "position": { + "type": "string", + "enum": ["after_title", "before_section_name", "after_section_name", "anywhere"] + }, + "content_instruction": {"type": "string"}, + "min_paragraphs": {"type": "integer", "minimum": 0}, + "max_paragraphs": {"type": "integer", "minimum": 0}, + "min_code_blocks": {"type": "integer", "minimum": 0}, + "max_code_blocks": {"type": "integer", "minimum": 0}, + "min_lists": {"type": "integer", "minimum": 0}, + "max_lists": {"type": "integer", "minimum": 0}, + "warning_if_missing": {"type": "string"}, + "error_message": {"type": "string"}, + "alternatives": { + "type": "array", + "items": {"type": "string"} + } + }, + "required": ["classification"] + } + } + }, + "x-markitect-content-control": { + "type": "object", + "patternProperties": { + "^[a-z][a-z0-9_]*$": { + "type": "object", + "properties": { + "required_patterns": { + "type": "array", + "items": {"type": "string", "format": "regex"} + }, + "discouraged_patterns": { + "type": "array", + "items": {"type": "string", "format": "regex"} + }, + "forbidden_patterns": { + "type": "array", + "items": {"type": "string", "format": "regex"} + }, + "content_quality": { + "type": "object", + "properties": { + "min_words": {"type": "integer", "minimum": 0}, + "max_words": {"type": "integer", "minimum": 0}, + "readability_target": { + "type": "string", + "enum": ["simple", "general", "technical", "advanced"] + }, + "min_sentences": {"type": "integer", "minimum": 0}, + "max_sentences": {"type": "integer", "minimum": 0} + } + }, + "content_instructions": { + "type": "array", + "items": {"type": "string"} + }, + "link_validation": { + "type": "object", + "properties": { + "check_internal": {"type": "boolean"}, + "check_external": {"type": "boolean"}, + "allow_fragments": {"type": "boolean"} + } + } + } + } + } + } +} +``` + +--- + +## Implementation Notes + +### Phase 1 Scope + +1. Define and document extension formats ✓ +2. Update metaschema to validate extensions +3. Implement basic classification validation (required/recommended/optional/discouraged/improper) +4. Create example schemas demonstrating all features +5. Update CLI to report errors vs warnings separately + +### Future Enhancements (Phase 2+) + +- Content pattern matching implementation +- Quality metrics calculation +- Link validation +- Readability scoring +- Position constraints enforcement + +--- + +## Version History + +- **v1.0 (Draft)** - Initial specification for Phase 1 implementation + - `x-markitect-sections` extension defined + - `x-markitect-content-control` extension defined + - Validation result structure defined + - Metaschema validation rules defined + +--- + +## References + +- JSON Schema Draft-07: https://json-schema.org/draft-07/schema +- MarkiTect Schema Evolution Workplan: `examples/manpages/SCHEMA_EVOLUTION_WORKPLAN.md` +- Existing Metaschema: `markitect/schemas/markitect-metaschema.json` +- Metaschema Validator: `markitect/metaschema.py` diff --git a/examples/manpages/api-documentation-schema.json b/examples/manpages/api-documentation-schema.json new file mode 100644 index 00000000..ab869887 --- /dev/null +++ b/examples/manpages/api-documentation-schema.json @@ -0,0 +1,230 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "API Endpoint Documentation Schema", + "description": "Schema for API endpoint documentation with classification and content control", + "x-markitect-sections": { + "ENDPOINT": { + "classification": "required", + "heading_level": 2, + "position": "after_title", + "content_instruction": "HTTP method and endpoint path (e.g., GET /api/v1/users)", + "min_paragraphs": 1, + "max_paragraphs": 3, + "error_message": "ENDPOINT section must specify the HTTP method and path" + }, + "DESCRIPTION": { + "classification": "required", + "heading_level": 2, + "content_instruction": "What this endpoint does and when to use it", + "min_paragraphs": 2, + "error_message": "DESCRIPTION is required to explain endpoint functionality" + }, + "AUTHENTICATION": { + "classification": "required", + "heading_level": 2, + "content_instruction": "Authentication requirements (API key, OAuth, etc.)", + "min_paragraphs": 1, + "error_message": "AUTHENTICATION requirements must be documented" + }, + "REQUEST PARAMETERS": { + "classification": "recommended", + "heading_level": 2, + "content_instruction": "List all request parameters with types and descriptions", + "alternatives": ["PARAMETERS", "REQUEST", "INPUT"], + "warning_if_missing": "Documenting request parameters helps API consumers use the endpoint correctly" + }, + "RESPONSE": { + "classification": "recommended", + "heading_level": 2, + "content_instruction": "Response format, status codes, and example responses", + "min_code_blocks": 1, + "warning_if_missing": "Response documentation with examples improves API usability" + }, + "EXAMPLES": { + "classification": "recommended", + "heading_level": 2, + "content_instruction": "Complete request/response examples", + "min_code_blocks": 2, + "warning_if_missing": "Examples make API documentation significantly more useful" + }, + "ERROR CODES": { + "classification": "recommended", + "heading_level": 2, + "content_instruction": "Possible error responses and how to handle them", + "alternatives": ["ERRORS", "ERROR HANDLING"], + "warning_if_missing": "Error documentation helps developers handle failures gracefully" + }, + "RATE LIMITING": { + "classification": "optional", + "heading_level": 2, + "content_instruction": "Rate limit information for this endpoint" + }, + "CHANGELOG": { + "classification": "optional", + "heading_level": 2, + "content_instruction": "Version history and changes to this endpoint" + }, + "SEE ALSO": { + "classification": "optional", + "heading_level": 2, + "content_instruction": "Related endpoints and documentation" + }, + "IMPLEMENTATION NOTES": { + "classification": "discouraged", + "heading_level": 2, + "warning_if_missing": "Implementation details should be in developer documentation, not API docs" + }, + "INTERNAL API": { + "classification": "improper", + "heading_level": 2, + "error_message": "Internal API endpoints must not be in public documentation" + }, + "EXPERIMENTAL": { + "classification": "improper", + "heading_level": 2, + "error_message": "Experimental features must not be in stable API documentation" + } + }, + "x-markitect-content-control": { + "endpoint": { + "required_patterns": [ + "\\*\\*[A-Z]+\\*\\*", + "`/api/", + "\\*\\*[A-Z]+\\*\\*\\s+`/[^`]+`" + ], + "content_quality": { + "min_words": 5, + "max_words": 50, + "readability_target": "technical" + }, + "content_instructions": [ + "Format: **METHOD** `endpoint_path`", + "Example: **GET** `/api/v1/users/{id}`", + "Use bold for HTTP method", + "Use code formatting for path", + "Include path parameters in curly braces" + ] + }, + "description": { + "discouraged_patterns": [ + "TODO", + "FIXME", + "TBD", + "Coming soon" + ], + "forbidden_patterns": [ + "password", + "secret", + "api[_-]?key\\s*=", + "token\\s*=" + ], + "content_quality": { + "min_words": 30, + "max_words": 500, + "readability_target": "technical", + "min_sentences": 2 + }, + "content_instructions": [ + "Explain what the endpoint does", + "Describe the main use case", + "Mention any prerequisites", + "Note any side effects", + "Keep concise but complete" + ] + }, + "request_parameters": { + "required_patterns": [ + "\\*\\*[a-z_]+\\*\\*", + "\\*[A-Za-z]+\\*" + ], + "content_instructions": [ + "Use bold for parameter names", + "Use italic for parameter types", + "Include: name, type, required/optional, description", + "Use definition list format", + "Specify default values where applicable" + ] + }, + "response": { + "required_patterns": [ + "```json", + "200", + "\\{[^}]*\\}" + ], + "content_quality": { + "min_words": 50, + "max_words": 500, + "readability_target": "technical" + }, + "content_instructions": [ + "Show example JSON response", + "Document all status codes", + "Explain response fields", + "Include success and error examples", + "Use proper JSON formatting in code blocks" + ] + }, + "examples": { + "required_patterns": [ + "```bash", + "curl", + "```json" + ], + "content_quality": { + "min_words": 100, + "max_words": 1000, + "readability_target": "general" + }, + "content_instructions": [ + "Provide complete curl examples", + "Show request headers", + "Include example responses", + "Add explanatory comments", + "Cover common scenarios" + ], + "link_validation": { + "check_internal": true, + "check_external": true, + "allow_fragments": true + } + } + }, + "type": "object", + "properties": { + "headings": { + "type": "object", + "properties": { + "level_1": { + "type": "array", + "minItems": 1, + "maxItems": 1 + }, + "level_2": { + "type": "array", + "minItems": 3, + "maxItems": 15 + }, + "level_3": { + "type": "array", + "minItems": 0, + "maxItems": 30 + } + } + }, + "paragraphs": { + "type": "array", + "minItems": 8, + "maxItems": 200 + }, + "code_blocks": { + "type": "array", + "minItems": 3, + "maxItems": 30 + }, + "emphasis": { + "type": "array", + "minItems": 15, + "maxItems": 200 + } + } +} diff --git a/examples/manpages/enhanced-manpage-schema.json b/examples/manpages/enhanced-manpage-schema.json new file mode 100644 index 00000000..1533d5e6 --- /dev/null +++ b/examples/manpages/enhanced-manpage-schema.json @@ -0,0 +1,229 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Enhanced Markdown Manpage Schema with Classifications", + "description": "JSON schema for Unix-style manual pages with section classification and content control", + "x-markitect-sections": { + "SYNOPSIS": { + "classification": "required", + "heading_level": 2, + "position": "after_title", + "content_instruction": "Brief command syntax showing all options and arguments in standard format", + "min_paragraphs": 1, + "max_paragraphs": 5, + "min_code_blocks": 0, + "max_code_blocks": 3, + "error_message": "SYNOPSIS section is mandatory for all manpages per Unix conventions" + }, + "DESCRIPTION": { + "classification": "required", + "heading_level": 2, + "content_instruction": "Detailed explanation of what the command does, its purpose, and main functionality", + "min_paragraphs": 2, + "max_paragraphs": 50, + "error_message": "DESCRIPTION section is mandatory for all manpages" + }, + "EXAMPLES": { + "classification": "recommended", + "heading_level": 2, + "content_instruction": "Practical usage examples with explanations demonstrating common use cases", + "min_code_blocks": 3, + "max_code_blocks": 20, + "warning_if_missing": "Examples greatly improve manpage usability - highly recommended" + }, + "SEE ALSO": { + "classification": "recommended", + "heading_level": 2, + "content_instruction": "Related commands, configuration files, and documentation references", + "min_paragraphs": 1, + "warning_if_missing": "Cross-references help users discover related functionality" + }, + "OPTIONS": { + "classification": "recommended", + "heading_level": 2, + "content_instruction": "Detailed option descriptions with all flags and their behaviors", + "alternatives": ["GLOBAL OPTIONS", "COMMAND OPTIONS", "FLAGS"], + "warning_if_missing": "Documenting command options helps users understand available functionality" + }, + "BUGS": { + "classification": "optional", + "heading_level": 2, + "content_instruction": "Known issues, limitations, and bug reporting information" + }, + "AUTHORS": { + "classification": "optional", + "heading_level": 2, + "content_instruction": "List of contributors and maintainers" + }, + "COPYRIGHT": { + "classification": "optional", + "heading_level": 2, + "content_instruction": "Copyright statement and license information" + }, + "HISTORY": { + "classification": "optional", + "heading_level": 2, + "content_instruction": "Historical information about command development" + }, + "DEPRECATED": { + "classification": "discouraged", + "heading_level": 2, + "warning_if_missing": "Consider moving deprecated content to historical documentation or HISTORY section" + }, + "OLD_SYNTAX": { + "classification": "discouraged", + "heading_level": 2, + "warning_if_missing": "Old syntax should be documented in HISTORY or removed entirely" + }, + "INTERNAL_NOTES": { + "classification": "improper", + "heading_level": 2, + "error_message": "Internal notes must not appear in published manpages - move to developer documentation" + }, + "TODO": { + "classification": "improper", + "heading_level": 2, + "error_message": "TODO sections are for development only - remove before publication" + }, + "DRAFT": { + "classification": "improper", + "heading_level": 2, + "error_message": "DRAFT markers must be removed before publication" + } + }, + "x-markitect-content-control": { + "synopsis": { + "required_patterns": [ + "\\*\\*[a-z][a-z0-9-]*\\*\\*", + "\\[.*\\]" + ], + "discouraged_patterns": [ + "TODO", + "FIXME", + "TBD" + ], + "content_quality": { + "min_words": 5, + "max_words": 150, + "readability_target": "technical" + }, + "content_instructions": [ + "Show command name in bold (e.g., **command**)", + "Use brackets [] for optional arguments", + "Use italic *ARG* for required arguments", + "Keep synopsis concise (1-5 lines maximum)", + "Use ellipsis ... to indicate repeatable arguments" + ] + }, + "description": { + "discouraged_patterns": [ + "TODO", + "FIXME", + "\\bWIP\\b", + "\\bXXX\\b" + ], + "forbidden_patterns": [ + "password\\s*=\\s*[\"'].*[\"']", + "api[_-]?key\\s*=\\s*[\"'].*[\"']", + "secret\\s*=\\s*[\"'].*[\"']" + ], + "content_quality": { + "min_words": 50, + "max_words": 1000, + "readability_target": "technical", + "min_sentences": 3 + }, + "content_instructions": [ + "Start with what the command does", + "Explain why users would use it", + "Describe main functionality and features", + "Mention any prerequisites or requirements", + "Keep technical but accessible" + ], + "link_validation": { + "check_internal": true, + "check_external": false, + "allow_fragments": true + } + }, + "examples": { + "required_patterns": [ + "```", + "#" + ], + "content_quality": { + "min_words": 100, + "max_words": 2000, + "readability_target": "general" + }, + "content_instructions": [ + "Use bash code blocks for command examples", + "Include comments explaining what each example does", + "Start with simple examples, progress to complex", + "Show actual output when helpful", + "Cover common use cases first" + ] + } + }, + "type": "object", + "properties": { + "headings": { + "type": "object", + "description": "Document heading structure", + "properties": { + "level_1": { + "type": "array", + "description": "Title heading in format: command(section) - description", + "items": { + "type": "object", + "properties": { + "content": { + "type": "string", + "pattern": "^[a-z0-9-]+\\([0-9]\\) - .+" + } + } + }, + "minItems": 1, + "maxItems": 1 + }, + "level_2": { + "type": "array", + "description": "Main section headings", + "minItems": 3, + "maxItems": 30 + }, + "level_3": { + "type": "array", + "description": "Subsection headings", + "minItems": 0, + "maxItems": 50 + } + }, + "required": ["level_1", "level_2"] + }, + "paragraphs": { + "type": "array", + "description": "Text paragraphs", + "minItems": 10, + "maxItems": 500 + }, + "code_blocks": { + "type": "array", + "description": "Code examples", + "minItems": 1, + "maxItems": 50 + }, + "lists": { + "type": "array", + "description": "Lists for options and structured information", + "minItems": 0, + "maxItems": 100 + }, + "emphasis": { + "type": "array", + "description": "Bold and italic text for commands and arguments", + "minItems": 20, + "maxItems": 500 + } + }, + "required": ["headings", "paragraphs", "code_blocks", "emphasis"] +} diff --git a/markitect/metaschema.py b/markitect/metaschema.py index 1ddb4692..e8df0a62 100644 --- a/markitect/metaschema.py +++ b/markitect/metaschema.py @@ -112,6 +112,8 @@ class MetaschemaValidator: "x-markitect-instruction-type": self._validate_instruction_type, "x-markitect-generation-mode": self._validate_generation_mode, "x-markitect-generated-from": self._validate_generated_from, + "x-markitect-sections": self._validate_sections, + "x-markitect-content-control": self._validate_content_control, } # Apply validation rules @@ -193,4 +195,190 @@ class MetaschemaValidator: "x-markitect-generated-from must be a string", property_name ) + return None + + def _validate_sections(self, value: Any, property_name: str) -> Optional[ValidationError]: + """Validate x-markitect-sections property.""" + if not isinstance(value, dict): + return ValidationError( + "x-markitect-sections must be an object", + property_name + ) + + # Validate each section definition + for section_name, section_def in value.items(): + # Section name should be UPPERCASE (convention) + if not isinstance(section_name, str): + return ValidationError( + f"Section name must be a string: {section_name}", + f"{property_name}.{section_name}" + ) + + if not isinstance(section_def, dict): + return ValidationError( + f"Section definition must be an object: {section_name}", + f"{property_name}.{section_name}" + ) + + # Validate required 'classification' field + if "classification" not in section_def: + return ValidationError( + f"Section '{section_name}' missing required 'classification' field", + f"{property_name}.{section_name}" + ) + + classification = section_def["classification"] + valid_classifications = ["required", "recommended", "optional", "discouraged", "improper"] + if classification not in valid_classifications: + return ValidationError( + f"Section '{section_name}' has invalid classification '{classification}'. " + f"Must be one of {valid_classifications}", + f"{property_name}.{section_name}.classification" + ) + + # Validate optional fields if present + if "heading_level" in section_def: + level = section_def["heading_level"] + if not isinstance(level, int) or level < 1 or level > 6: + return ValidationError( + f"Section '{section_name}' heading_level must be integer 1-6, got {level}", + f"{property_name}.{section_name}.heading_level" + ) + + if "position" in section_def: + position = section_def["position"] + valid_positions = ["after_title", "before_section_name", "after_section_name", "anywhere"] + if position not in valid_positions: + return ValidationError( + f"Section '{section_name}' has invalid position '{position}'. " + f"Must be one of {valid_positions}", + f"{property_name}.{section_name}.position" + ) + + # Validate content constraints are non-negative integers + for constraint in ["min_paragraphs", "max_paragraphs", "min_code_blocks", + "max_code_blocks", "min_lists", "max_lists"]: + if constraint in section_def: + value_check = section_def[constraint] + if not isinstance(value_check, int) or value_check < 0: + return ValidationError( + f"Section '{section_name}' {constraint} must be non-negative integer, got {value_check}", + f"{property_name}.{section_name}.{constraint}" + ) + + # Validate alternatives is array of strings + if "alternatives" in section_def: + alternatives = section_def["alternatives"] + if not isinstance(alternatives, list): + return ValidationError( + f"Section '{section_name}' alternatives must be an array", + f"{property_name}.{section_name}.alternatives" + ) + for alt in alternatives: + if not isinstance(alt, str): + return ValidationError( + f"Section '{section_name}' alternative names must be strings", + f"{property_name}.{section_name}.alternatives" + ) + + return None + + def _validate_content_control(self, value: Any, property_name: str) -> Optional[ValidationError]: + """Validate x-markitect-content-control property.""" + if not isinstance(value, dict): + return ValidationError( + "x-markitect-content-control must be an object", + property_name + ) + + # Validate each section's content control rules + for section_name, control_def in value.items(): + if not isinstance(section_name, str): + return ValidationError( + f"Content control section name must be a string: {section_name}", + f"{property_name}.{section_name}" + ) + + if not isinstance(control_def, dict): + return ValidationError( + f"Content control definition must be an object: {section_name}", + f"{property_name}.{section_name}" + ) + + # Validate pattern arrays + for pattern_type in ["required_patterns", "discouraged_patterns", "forbidden_patterns"]: + if pattern_type in control_def: + patterns = control_def[pattern_type] + if not isinstance(patterns, list): + return ValidationError( + f"Content control '{section_name}' {pattern_type} must be an array", + f"{property_name}.{section_name}.{pattern_type}" + ) + for pattern in patterns: + if not isinstance(pattern, str): + return ValidationError( + f"Content control '{section_name}' pattern must be string", + f"{property_name}.{section_name}.{pattern_type}" + ) + + # Validate content_quality object + if "content_quality" in control_def: + quality = control_def["content_quality"] + if not isinstance(quality, dict): + return ValidationError( + f"Content control '{section_name}' content_quality must be an object", + f"{property_name}.{section_name}.content_quality" + ) + + # Validate word/sentence counts + for count_field in ["min_words", "max_words", "min_sentences", "max_sentences"]: + if count_field in quality: + count = quality[count_field] + if not isinstance(count, int) or count < 0: + return ValidationError( + f"Content quality '{section_name}' {count_field} must be non-negative integer", + f"{property_name}.{section_name}.content_quality.{count_field}" + ) + + # Validate readability_target + if "readability_target" in quality: + target = quality["readability_target"] + valid_targets = ["simple", "general", "technical", "advanced"] + if target not in valid_targets: + return ValidationError( + f"Content quality '{section_name}' readability_target must be one of {valid_targets}", + f"{property_name}.{section_name}.content_quality.readability_target" + ) + + # Validate content_instructions array + if "content_instructions" in control_def: + instructions = control_def["content_instructions"] + if not isinstance(instructions, list): + return ValidationError( + f"Content control '{section_name}' content_instructions must be an array", + f"{property_name}.{section_name}.content_instructions" + ) + for instruction in instructions: + if not isinstance(instruction, str): + return ValidationError( + f"Content control '{section_name}' instruction must be string", + f"{property_name}.{section_name}.content_instructions" + ) + + # Validate link_validation object + if "link_validation" in control_def: + link_val = control_def["link_validation"] + if not isinstance(link_val, dict): + return ValidationError( + f"Content control '{section_name}' link_validation must be an object", + f"{property_name}.{section_name}.link_validation" + ) + for field in ["check_internal", "check_external", "allow_fragments"]: + if field in link_val: + if not isinstance(link_val[field], bool): + return ValidationError( + f"Content control '{section_name}' link_validation.{field} must be boolean", + f"{property_name}.{section_name}.link_validation.{field}" + ) + return None \ No newline at end of file diff --git a/markitect/schemas/markitect-metaschema.json b/markitect/schemas/markitect-metaschema.json index aaea8ef7..625a8f29 100644 --- a/markitect/schemas/markitect-metaschema.json +++ b/markitect/schemas/markitect-metaschema.json @@ -40,6 +40,163 @@ "type": "string", "enum": ["outline", "full"], "description": "Mode used to generate this schema" + }, + "x-markitect-sections": { + "type": "object", + "description": "Section classification and content control for document sections", + "patternProperties": { + "^[A-Z][A-Z0-9_ ]*$": { + "type": "object", + "description": "Section definition with classification and constraints", + "properties": { + "classification": { + "type": "string", + "enum": ["required", "recommended", "optional", "discouraged", "improper"], + "description": "Classification level determining validation behavior" + }, + "heading_level": { + "type": "integer", + "minimum": 1, + "maximum": 6, + "description": "Expected heading level (H1-H6) for this section" + }, + "position": { + "type": "string", + "enum": ["after_title", "before_section_name", "after_section_name", "anywhere"], + "description": "Where this section should appear in the document" + }, + "content_instruction": { + "type": "string", + "description": "Human-readable instruction for section content" + }, + "min_paragraphs": { + "type": "integer", + "minimum": 0, + "description": "Minimum number of paragraphs in this section" + }, + "max_paragraphs": { + "type": "integer", + "minimum": 0, + "description": "Maximum number of paragraphs in this section" + }, + "min_code_blocks": { + "type": "integer", + "minimum": 0, + "description": "Minimum number of code blocks in this section" + }, + "max_code_blocks": { + "type": "integer", + "minimum": 0, + "description": "Maximum number of code blocks in this section" + }, + "min_lists": { + "type": "integer", + "minimum": 0, + "description": "Minimum number of lists in this section" + }, + "max_lists": { + "type": "integer", + "minimum": 0, + "description": "Maximum number of lists in this section" + }, + "warning_if_missing": { + "type": "string", + "description": "Custom warning message for missing recommended sections" + }, + "error_message": { + "type": "string", + "description": "Custom error message for required/improper section violations" + }, + "alternatives": { + "type": "array", + "items": {"type": "string"}, + "description": "Alternative section names that satisfy the requirement" + } + }, + "required": ["classification"] + } + } + }, + "x-markitect-content-control": { + "type": "object", + "description": "Content validation rules including patterns and quality metrics", + "patternProperties": { + "^[a-z][a-z0-9_]*$": { + "type": "object", + "description": "Content control rules for a specific section", + "properties": { + "required_patterns": { + "type": "array", + "items": {"type": "string"}, + "description": "Regex patterns that must appear in section content" + }, + "discouraged_patterns": { + "type": "array", + "items": {"type": "string"}, + "description": "Regex patterns that should not appear in content (warning)" + }, + "forbidden_patterns": { + "type": "array", + "items": {"type": "string"}, + "description": "Regex patterns that must not appear in content (error)" + }, + "content_quality": { + "type": "object", + "description": "Quality metrics for section content", + "properties": { + "min_words": { + "type": "integer", + "minimum": 0, + "description": "Minimum word count" + }, + "max_words": { + "type": "integer", + "minimum": 0, + "description": "Maximum word count" + }, + "readability_target": { + "type": "string", + "enum": ["simple", "general", "technical", "advanced"], + "description": "Target readability level" + }, + "min_sentences": { + "type": "integer", + "minimum": 0, + "description": "Minimum sentence count" + }, + "max_sentences": { + "type": "integer", + "minimum": 0, + "description": "Maximum sentence count" + } + } + }, + "content_instructions": { + "type": "array", + "items": {"type": "string"}, + "description": "Array of human-readable content creation instructions" + }, + "link_validation": { + "type": "object", + "description": "Link checking configuration", + "properties": { + "check_internal": { + "type": "boolean", + "description": "Validate internal document links" + }, + "check_external": { + "type": "boolean", + "description": "Validate external URLs" + }, + "allow_fragments": { + "type": "boolean", + "description": "Allow fragment-only links like #section" + } + } + } + } + } + } } }, "patternProperties": {