Files
markitect-main/markitect/schema/naming.py
tegwick 9b12875681 feat(spaces): implement Phase 0-1 of Information Space Service
Phase 0 - Project Organization:
- Create docs/PROJECT_STRUCTURE.md documenting codebase layout
- Create markitect/core/ with parser, serializer, document_manager, workspace
- Create markitect/schema/ consolidating 6 schema_*.py modules
- Create markitect/storage/ with database module
- Maintain backward compatibility via re-exports from original locations
- Add docs/roadmap/information-space-service/ with README and WORKPLAN

Phase 1 - Foundation (Weeks 1-3):
- Week 1: Core domain models (InformationSpace, SpaceDocument, SpaceConfig,
  SpaceMetadata, SpaceVariable, TransclusionReference, SpaceStatus)
- Week 2: Repository layer with interfaces (ISpaceRepository,
  IDocumentAssociationRepository, IVariableRepository, IReferenceRepository)
  and SQLite implementations with foreign key cascade deletes
- Week 3: SpaceService orchestration layer with full CRUD, document,
  variable, and reference tracking operations

Test coverage: 124 tests (25 model + 63 repository + 36 integration)

Capabilities delivered:
- CAP-001: InformationSpace entity with lifecycle management
- CAP-002: SpaceRepository CRUD with SQLite backing
- CAP-003: Document-Space associations with path-based organization
- CAP-004: Space metadata and configuration schemas
- CAP-005: Database schema with migrations

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-08 02:02:46 +01:00

370 lines
10 KiB
Python

"""
Schema Naming Validation - Enforce filename conventions for schemas.
This module provides validation and utilities for schema filename conventions
to ensure consistency across the MarkiTect schema ecosystem.
Naming Convention:
Format: {domain}-schema-v{major}.{minor}.md
Components:
- domain: lowercase, hyphen-separated identifier (e.g., "manpage", "api-documentation")
- schema: literal string "schema"
- version: SemVer major.minor (e.g., "v1.0", "v2.1")
- extension: ".md" (markdown)
Valid Examples:
- manpage-schema-v1.0.md
- terminology-schema-v1.0.md
- api-documentation-schema-v1.0.md
- my-custom-type-schema-v2.1.md
Invalid Examples:
- manpage.json (missing version and wrong extension)
- manpage-v1.md (missing "schema" keyword)
- ManPage-Schema-v1.0.md (wrong case - must be lowercase)
- manpage-schema-1.0.md (missing 'v' prefix)
- manpage-schema-v1.md (missing minor version)
"""
import re
from pathlib import Path
from typing import Tuple, Optional, Dict, Any
# Regex pattern for schema filename validation
# Matches: {domain}-schema-v{major}.{minor}.md
# Where domain is lowercase letters/numbers/hyphens starting with letter
SCHEMA_FILENAME_PATTERN = re.compile(
r'^(?P<domain>[a-z][a-z0-9-]*)-schema-v(?P<major>\d+)\.(?P<minor>\d+)\.md$'
)
class SchemaFilenameError(Exception):
"""Exception raised for invalid schema filenames."""
pass
def validate_schema_filename(filename: str) -> Tuple[bool, Optional[Dict[str, Any]]]:
"""
Validate schema filename against naming convention.
Args:
filename: The filename to validate (e.g., "manpage-schema-v1.0.md")
Returns:
Tuple of (is_valid, metadata_dict or None)
If valid, metadata_dict contains:
- domain: str - The domain identifier
- version: str - Full version string (e.g., "1.0")
- major: int - Major version number
- minor: int - Minor version number
- filename: str - The original filename
If invalid, metadata_dict is None
Examples:
>>> validate_schema_filename("manpage-schema-v1.0.md")
(True, {'domain': 'manpage', 'version': '1.0', ...})
>>> validate_schema_filename("invalid.json")
(False, None)
"""
match = SCHEMA_FILENAME_PATTERN.match(filename)
if not match:
return False, None
return True, {
'domain': match.group('domain'),
'version': f"{match.group('major')}.{match.group('minor')}",
'major': int(match.group('major')),
'minor': int(match.group('minor')),
'filename': filename
}
def suggest_valid_filename(
domain: str,
version: str = "1.0",
normalize: bool = True
) -> str:
"""
Generate a valid schema filename from domain and version.
Args:
domain: The schema domain (e.g., "manpage", "API Documentation")
version: Version string in format "major.minor" (default: "1.0")
normalize: Whether to normalize domain to lowercase/hyphenated
Returns:
Valid schema filename
Raises:
ValueError: If domain or version format is invalid
Examples:
>>> suggest_valid_filename("manpage", "1.0")
'manpage-schema-v1.0.md'
>>> suggest_valid_filename("API Documentation", "2.1")
'api-documentation-schema-v2.1.md'
>>> suggest_valid_filename("My_Custom_Type", "1.0")
'my-custom-type-schema-v1.0.md'
"""
if not domain:
raise ValueError("Domain cannot be empty")
if normalize:
# Normalize domain: lowercase, replace spaces/underscores with hyphens
domain_clean = domain.lower()
domain_clean = domain_clean.replace(' ', '-').replace('_', '-')
# Remove consecutive hyphens
domain_clean = re.sub(r'-+', '-', domain_clean)
# Remove leading/trailing hyphens
domain_clean = domain_clean.strip('-')
else:
domain_clean = domain
# Validate domain format (must start with letter, contain only lowercase, numbers, hyphens)
if not re.match(r'^[a-z][a-z0-9-]*$', domain_clean):
raise ValueError(
f"Invalid domain '{domain_clean}': must start with lowercase letter "
"and contain only lowercase letters, numbers, and hyphens"
)
# Parse and validate version
version_parts = version.split('.')
if len(version_parts) != 2:
raise ValueError(
f"Invalid version '{version}': must be in format 'major.minor' (e.g., '1.0')"
)
try:
major = int(version_parts[0])
minor = int(version_parts[1])
except ValueError:
raise ValueError(
f"Invalid version '{version}': major and minor must be integers"
)
if major < 0 or minor < 0:
raise ValueError(
f"Invalid version '{version}': major and minor must be non-negative"
)
return f"{domain_clean}-schema-v{major}.{minor}.md"
# Alias for backward compatibility
suggest_schema_filename = suggest_valid_filename
def extract_schema_domain(filename: str) -> str:
"""
Extract the domain from a valid schema filename.
Args:
filename: Schema filename to parse
Returns:
The domain identifier
Raises:
SchemaFilenameError: If filename is invalid
Examples:
>>> extract_schema_domain("manpage-schema-v1.0.md")
'manpage'
"""
is_valid, metadata = validate_schema_filename(filename)
if not is_valid:
raise SchemaFilenameError(
f"Invalid schema filename: {filename}\n"
f"Expected format: {{domain}}-schema-v{{major}}.{{minor}}.md"
)
return metadata['domain']
def get_schema_version(filename: str) -> str:
"""
Get the version string from a valid schema filename.
Args:
filename: Schema filename to parse
Returns:
Version string (e.g., "1.0")
Raises:
SchemaFilenameError: If filename is invalid
Examples:
>>> get_schema_version("manpage-schema-v1.0.md")
'1.0'
"""
is_valid, metadata = validate_schema_filename(filename)
if not is_valid:
raise SchemaFilenameError(
f"Invalid schema filename: {filename}\n"
f"Expected format: {{domain}}-schema-v{{major}}.{{minor}}.md"
)
return metadata['version']
def extract_schema_metadata(filename: str) -> Dict[str, Any]:
"""
Extract metadata from a valid schema filename.
Args:
filename: Schema filename to parse
Returns:
Dictionary with metadata
Raises:
SchemaFilenameError: If filename is invalid
Examples:
>>> extract_schema_metadata("manpage-schema-v1.0.md")
{'domain': 'manpage', 'version': '1.0', 'major': 1, 'minor': 0}
"""
is_valid, metadata = validate_schema_filename(filename)
if not is_valid:
raise SchemaFilenameError(
f"Invalid schema filename: {filename}\n"
f"Expected format: {{domain}}-schema-v{{major}}.{{minor}}.md"
)
return metadata
def get_validation_errors(filename: str) -> list:
"""
Get detailed validation errors for a filename.
Args:
filename: Filename to validate
Returns:
List of error messages (empty if valid)
Examples:
>>> get_validation_errors("manpage-schema-v1.0.md")
[]
>>> get_validation_errors("invalid.json")
['Filename does not match pattern: {domain}-schema-v{major}.{minor}.md', ...]
"""
errors = []
# Check basic pattern match
is_valid, _ = validate_schema_filename(filename)
if is_valid:
return errors
# Provide detailed feedback
errors.append(
f"Filename does not match pattern: {{domain}}-schema-v{{major}}.{{minor}}.md"
)
# Check extension
if not filename.endswith('.md'):
errors.append(f"Extension must be '.md', got: {Path(filename).suffix}")
# Check for version
if '-v' not in filename:
errors.append("Missing version: filename must include '-v{major}.{minor}'")
elif not re.search(r'-v\d+\.\d+', filename):
errors.append(
"Invalid version format: must be '-v{major}.{minor}' (e.g., '-v1.0')"
)
# Check for schema keyword
if '-schema-' not in filename:
errors.append("Missing '-schema-' keyword in filename")
# Check for uppercase (must be lowercase)
if any(c.isupper() for c in filename):
errors.append("Filename must be lowercase")
# Check domain format (if we can isolate it)
parts = filename.split('-schema-')
if len(parts) >= 1:
domain = parts[0]
if domain and not re.match(r'^[a-z][a-z0-9-]*$', domain):
errors.append(
f"Invalid domain '{domain}': must start with lowercase letter "
"and contain only lowercase letters, numbers, and hyphens"
)
return errors
def is_valid_schema_filename(filename: str) -> bool:
"""
Check if filename is valid (convenience function).
Args:
filename: Filename to check
Returns:
True if valid, False otherwise
Examples:
>>> is_valid_schema_filename("manpage-schema-v1.0.md")
True
>>> is_valid_schema_filename("invalid.json")
False
"""
is_valid, _ = validate_schema_filename(filename)
return is_valid
def format_validation_message(filename: str) -> str:
"""
Format a user-friendly validation message.
Args:
filename: Filename that failed validation
Returns:
Formatted error message with suggestions
Examples:
>>> print(format_validation_message("manpage.json"))
Invalid schema filename: manpage.json
...
"""
errors = get_validation_errors(filename)
if not errors:
return f"\u2705 Valid schema filename: {filename}"
message = f"\u274c Invalid schema filename: {filename}\n\n"
message += "Errors:\n"
for i, error in enumerate(errors, 1):
message += f" {i}. {error}\n"
message += "\nExpected format: {domain}-schema-v{major}.{minor}.md\n"
message += "Example: manpage-schema-v1.0.md\n"
# Try to suggest a corrected filename
try:
# Extract domain guess (everything before first hyphen or dot)
domain_guess = filename.split('-')[0].split('.')[0]
suggestion = suggest_valid_filename(domain_guess, "1.0")
message += f"\nSuggested filename: {suggestion}\n"
except Exception:
pass
return message