feat: add terminology schema example and improve schema-list command

This commit completes Phase 2 of schema evolution work and establishes
a new example demonstrating schema usage for terminology documents.

## New Features

### Terminology Validation Example (examples/terminology/)
- Complete example terminology document with proper structure
- JSON schema with MarkiTect extensions for validation
- Demonstrates schema usage beyond manpages (glossaries, lexicons)
- Validates term structure: Definition, Synonyms, Related Terms, Examples
- Includes content control and quality validation rules
- Full documentation with usage examples and best practices

### Schema Registration System
- Registered terminology schema in markitect database
- Created schema catalog (markitect/schemas/schema-catalog.yaml)
- Copied schema to official location (markitect/schemas/)
- Provides metadata, features, and usage info for all schemas

### Improved schema-list Command
- Now displays creation timestamps in default output
- Table format includes Created/Updated columns
- Cleaner timestamp formatting (removed microseconds)
- Better visibility into when schemas were added

## Files Changed

Added:
- examples/terminology/README.md - Complete documentation
- examples/terminology/terminology-example.md - Example glossary
- examples/terminology/terminology-schema.json - Validation schema
- markitect/schemas/terminology-schema.json - Registered schema
- markitect/schemas/schema-catalog.yaml - Schema registry

Modified:
- markitect/cli.py - Enhanced schema-list with timestamps
- TODO.md - Documented Phase 2 completion and new example

Moved:
- SCHEMA_EVOLUTION_WORKPLAN.md → todo/ directory

## Schema Features Demonstrated

- Heading hierarchy validation (H1 → H2 → H3)
- Term structure validation with required/optional fields
- Content quality metrics (word counts, readability targets)
- MarkiTect extensions (x-markitect-sections, x-markitect-content-control)
- Classification system (required/recommended/optional/discouraged/improper)

## Usage

```bash
# List schemas with timestamps
markitect schema-list

# Validate terminology document
markitect validate glossary.md --schema terminology-schema.json

# View in table format
markitect schema-list --format table
```

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-04 23:07:36 +01:00
parent 82c1a3ab65
commit 6df9b5df05
8 changed files with 927 additions and 3 deletions

View File

@@ -1740,15 +1740,46 @@ def schema_list(config, output_format, names_only):
click.echo()
for schema_info in schemas:
click.echo(f"🔧 {schema_info['filename']}")
# Format timestamp for display (remove microseconds)
created = schema_info['created_at']
if created:
# Format: YYYY-MM-DD HH:MM:SS (remove microseconds if present)
if '.' in created:
created_display = created.split('.')[0]
else:
created_display = created
click.echo(f"🔧 {schema_info['filename']:<40} (added: {created_display})")
else:
click.echo(f"🔧 {schema_info['filename']}")
if config.get('verbose'):
click.echo(f" Title: {schema_info['title']}")
click.echo(f" Created: {schema_info['created_at']}")
click.echo(f" Updated: {schema_info['updated_at']}")
if schema_info['description']:
click.echo(f" Description: {schema_info['description']}")
click.echo()
elif output_format == 'table':
# Custom table format for better readability
table_data = []
for schema in schemas:
# Format timestamps (remove microseconds)
created_date = schema['created_at'].split('.')[0] if schema['created_at'] and '.' in schema['created_at'] else schema['created_at']
updated_date = schema['updated_at'].split('.')[0] if schema['updated_at'] and '.' in schema['updated_at'] else schema['updated_at']
table_data.append({
'Name': schema['filename'],
'Title': schema['title'] or '',
'Created': created_date or '',
'Updated': updated_date or ''
})
if table_data:
headers = ['Name', 'Title', 'Created', 'Updated']
rows = [[row[h] for h in headers] for row in table_data]
click.echo(tabulate(rows, headers=headers, tablefmt='simple'))
else:
# Use structured format (table, json, yaml)
# Use structured format (json, yaml)
formatted_output = format_output(schemas, output_format)
click.echo(formatted_output)

View File

@@ -0,0 +1,77 @@
# MarkiTect Schema Catalog
#
# This catalog provides metadata about available schemas for markdown document validation.
# Schemas can be referenced by name or loaded from their file path.
version: "1.0"
description: "Catalog of registered MarkiTect schemas for document validation"
schemas:
- id: "markitect-metaschema"
name: "MarkiTect Metaschema"
file: "markitect-metaschema.json"
version: "1.0"
description: "Metaschema for validating MarkiTect schema extensions"
type: "metaschema"
usage: "Used internally to validate schema files with MarkiTect-specific extensions"
tags:
- internal
- validation
- metaschema
- id: "terminology-v1"
name: "Terminology Document Schema"
file: "terminology-schema.json"
version: "1.0"
description: "Schema for validating terminology and glossary documents"
type: "document-schema"
usage: "Validates technical glossaries, terminology documents, and definition lists"
document_types:
- glossary
- terminology
- lexicon
- dictionary
features:
- Heading hierarchy validation (H1 → H2 → H3)
- Term structure validation (Definition, Synonyms, Related Terms, etc.)
- Content quality metrics (word counts, readability)
- MarkiTect extensions (x-markitect-sections, x-markitect-content-control)
- Classification system (required/recommended/optional)
example: "examples/terminology/terminology-example.md"
tags:
- documentation
- glossary
- terminology
- definitions
related_schemas: []
author: "MarkiTect Project"
created: "2026-01-04"
updated: "2026-01-04"
# Future schemas to add:
#
# - id: "manpage-v1"
# name: "Unix Manual Page Schema"
# description: "Schema for Unix/Linux manual page documentation"
#
# - id: "api-reference-v1"
# name: "API Reference Schema"
# description: "Schema for API endpoint documentation"
#
# - id: "arc42-v1"
# name: "arc42 Architecture Documentation Schema"
# description: "Schema for arc42 architecture documentation template"
#
# - id: "adr-v1"
# name: "Architecture Decision Record Schema"
# description: "Schema for ADR (Architecture Decision Record) documents"
#
# - id: "rfc-v1"
# name: "RFC/Specification Schema"
# description: "Schema for RFC-style specification documents"
# Schema discovery paths:
# - Built-in: markitect/schemas/*.json
# - User-defined: ~/.markitect/schemas/*.json
# - Project-specific: .markitect/schemas/*.json
# - Custom paths via MARKITECT_SCHEMA_PATH environment variable

View File

@@ -0,0 +1,214 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://markitect.dev/schemas/terminology-v1.json",
"title": "Terminology Document Schema",
"description": "Schema for validating terminology and glossary documents with consistent structure",
"type": "object",
"properties": {
"headings": {
"type": "object",
"properties": {
"level_1": {
"type": "array",
"description": "Main document title",
"items": {
"type": "object",
"properties": {
"content": {
"type": "string",
"pattern": ".*(Terminology|Glossary|Terms|Definitions).*"
}
}
},
"minItems": 1,
"maxItems": 1
},
"level_2": {
"type": "array",
"description": "Category headings (Core Concepts, Document Types, etc.)",
"items": {
"type": "object",
"properties": {
"content": {
"type": "string",
"minLength": 1
}
}
},
"minItems": 1,
"maxItems": 20
},
"level_3": {
"type": "array",
"description": "Individual term headings",
"items": {
"type": "object",
"properties": {
"content": {
"type": "string",
"minLength": 1,
"description": "Term name - should be title case"
}
}
},
"minItems": 1
}
},
"required": ["level_1", "level_2", "level_3"]
},
"paragraphs": {
"type": "array",
"description": "Content paragraphs including definitions and descriptions",
"items": {
"type": "object",
"properties": {
"content": {
"type": "string",
"minLength": 10
}
}
},
"minItems": 3
},
"bold_text": {
"type": "array",
"description": "Bold text used for field labels (Definition, Synonyms, etc.)",
"items": {
"type": "object",
"properties": {
"content": {
"type": "string",
"enum": [
"Definition:",
"Synonyms:",
"Related Terms:",
"Example:",
"Examples:",
"Use Cases:",
"Usage:",
"Format:",
"Components:",
"Steps:",
"Tools:",
"Levels:",
"Status:",
"Migration:",
"Required:",
"Recommended:",
"Optional:",
"Discouraged:",
"Improper:"
]
}
}
},
"minItems": 1
}
},
"required": ["headings", "paragraphs"],
"x-markitect-sections": {
"document_title": {
"classification": "required",
"heading_level": 1,
"content_instruction": "Main title should include words like 'Terminology', 'Glossary', or 'Definitions'",
"pattern": ".*(Terminology|Glossary|Terms|Definitions).*"
},
"category_sections": {
"classification": "required",
"heading_level": 2,
"min_sections": 1,
"content_instruction": "Organize terms into logical categories (e.g., Core Concepts, Document Types, Process Terms)"
},
"term_definitions": {
"classification": "required",
"heading_level": 3,
"min_sections": 1,
"content_instruction": "Each term should be a level 3 heading followed by its definition and optional metadata"
}
},
"x-markitect-content-control": {
"term_structure": {
"required_components": [
{
"label": "Definition:",
"type": "bold_text",
"description": "Clear, concise definition of the term"
}
],
"optional_components": [
{
"label": "Synonyms:",
"type": "bold_text",
"description": "Alternative names or abbreviations"
},
{
"label": "Related Terms:",
"type": "bold_text",
"description": "Links to related concepts"
},
{
"label": "Example:",
"type": "bold_text_or_code",
"description": "Practical example demonstrating the term"
},
{
"label": "Use Cases:",
"type": "list",
"description": "Common scenarios where term applies"
}
],
"content_quality": {
"min_words_per_definition": 10,
"max_words_per_definition": 200,
"readability_target": "technical"
},
"content_instructions": [
"Start each term with a level 3 heading containing the term name",
"Follow immediately with 'Definition:' in bold",
"Provide a clear, self-contained definition",
"Add optional fields (Synonyms, Related Terms, Examples) as needed",
"Use consistent formatting across all terms",
"Group related terms under category headings (level 2)"
]
},
"definition_pattern": {
"description": "Each definition should follow: Term heading (###) → Definition: (bold) → Definition text",
"validation": {
"heading_level_3_followed_by": "bold_text_starting_with_Definition",
"definition_length": {
"min_words": 10,
"max_words": 200
}
}
},
"deprecated_terms": {
"classification": "optional",
"heading_level": 2,
"content_instruction": "Optional section for deprecated terms with migration guidance",
"required_fields": [
"Status: DEPRECATED",
"Migration:"
]
}
},
"x-markitect-validation-rules": {
"term_count": {
"min": 3,
"recommended_min": 10,
"description": "Terminology document should define at least 3 terms, 10+ recommended"
},
"category_balance": {
"description": "Each category should have at least 2 terms",
"min_terms_per_category": 2
},
"definition_quality": {
"all_terms_must_have_definition": true,
"definition_must_follow_term_heading": true,
"definition_min_words": 10
},
"consistency": {
"use_consistent_field_labels": true,
"maintain_heading_hierarchy": true
}
}
}