""" Schema Naming Validation - Enforce filename conventions for schemas. This module provides validation and utilities for schema filename conventions to ensure consistency across the MarkiTect schema ecosystem. Naming Convention: Format: {domain}-schema-v{major}.{minor}.md Components: - domain: lowercase, hyphen-separated identifier (e.g., "manpage", "api-documentation") - schema: literal string "schema" - version: SemVer major.minor (e.g., "v1.0", "v2.1") - extension: ".md" (markdown) Valid Examples: - manpage-schema-v1.0.md - terminology-schema-v1.0.md - api-documentation-schema-v1.0.md - my-custom-type-schema-v2.1.md Invalid Examples: - manpage.json (missing version and wrong extension) - manpage-v1.md (missing "schema" keyword) - ManPage-Schema-v1.0.md (wrong case - must be lowercase) - manpage-schema-1.0.md (missing 'v' prefix) - manpage-schema-v1.md (missing minor version) """ import re from pathlib import Path from typing import Tuple, Optional, Dict, Any # Regex pattern for schema filename validation # Matches: {domain}-schema-v{major}.{minor}.md # Where domain is lowercase letters/numbers/hyphens starting with letter SCHEMA_FILENAME_PATTERN = re.compile( r'^(?P[a-z][a-z0-9-]*)-schema-v(?P\d+)\.(?P\d+)\.md$' ) class SchemaFilenameError(Exception): """Exception raised for invalid schema filenames.""" pass def validate_schema_filename(filename: str) -> Tuple[bool, Optional[Dict[str, Any]]]: """ Validate schema filename against naming convention. Args: filename: The filename to validate (e.g., "manpage-schema-v1.0.md") Returns: Tuple of (is_valid, metadata_dict or None) If valid, metadata_dict contains: - domain: str - The domain identifier - version: str - Full version string (e.g., "1.0") - major: int - Major version number - minor: int - Minor version number - filename: str - The original filename If invalid, metadata_dict is None Examples: >>> validate_schema_filename("manpage-schema-v1.0.md") (True, {'domain': 'manpage', 'version': '1.0', ...}) >>> validate_schema_filename("invalid.json") (False, None) """ match = SCHEMA_FILENAME_PATTERN.match(filename) if not match: return False, None return True, { 'domain': match.group('domain'), 'version': f"{match.group('major')}.{match.group('minor')}", 'major': int(match.group('major')), 'minor': int(match.group('minor')), 'filename': filename } def suggest_valid_filename( domain: str, version: str = "1.0", normalize: bool = True ) -> str: """ Generate a valid schema filename from domain and version. Args: domain: The schema domain (e.g., "manpage", "API Documentation") version: Version string in format "major.minor" (default: "1.0") normalize: Whether to normalize domain to lowercase/hyphenated Returns: Valid schema filename Raises: ValueError: If domain or version format is invalid Examples: >>> suggest_valid_filename("manpage", "1.0") 'manpage-schema-v1.0.md' >>> suggest_valid_filename("API Documentation", "2.1") 'api-documentation-schema-v2.1.md' >>> suggest_valid_filename("My_Custom_Type", "1.0") 'my-custom-type-schema-v1.0.md' """ if not domain: raise ValueError("Domain cannot be empty") if normalize: # Normalize domain: lowercase, replace spaces/underscores with hyphens domain_clean = domain.lower() domain_clean = domain_clean.replace(' ', '-').replace('_', '-') # Remove consecutive hyphens domain_clean = re.sub(r'-+', '-', domain_clean) # Remove leading/trailing hyphens domain_clean = domain_clean.strip('-') else: domain_clean = domain # Validate domain format (must start with letter, contain only lowercase, numbers, hyphens) if not re.match(r'^[a-z][a-z0-9-]*$', domain_clean): raise ValueError( f"Invalid domain '{domain_clean}': must start with lowercase letter " "and contain only lowercase letters, numbers, and hyphens" ) # Parse and validate version version_parts = version.split('.') if len(version_parts) != 2: raise ValueError( f"Invalid version '{version}': must be in format 'major.minor' (e.g., '1.0')" ) try: major = int(version_parts[0]) minor = int(version_parts[1]) except ValueError: raise ValueError( f"Invalid version '{version}': major and minor must be integers" ) if major < 0 or minor < 0: raise ValueError( f"Invalid version '{version}': major and minor must be non-negative" ) return f"{domain_clean}-schema-v{major}.{minor}.md" # Alias for backward compatibility suggest_schema_filename = suggest_valid_filename def extract_schema_domain(filename: str) -> str: """ Extract the domain from a valid schema filename. Args: filename: Schema filename to parse Returns: The domain identifier Raises: SchemaFilenameError: If filename is invalid Examples: >>> extract_schema_domain("manpage-schema-v1.0.md") 'manpage' """ is_valid, metadata = validate_schema_filename(filename) if not is_valid: raise SchemaFilenameError( f"Invalid schema filename: {filename}\n" f"Expected format: {{domain}}-schema-v{{major}}.{{minor}}.md" ) return metadata['domain'] def get_schema_version(filename: str) -> str: """ Get the version string from a valid schema filename. Args: filename: Schema filename to parse Returns: Version string (e.g., "1.0") Raises: SchemaFilenameError: If filename is invalid Examples: >>> get_schema_version("manpage-schema-v1.0.md") '1.0' """ is_valid, metadata = validate_schema_filename(filename) if not is_valid: raise SchemaFilenameError( f"Invalid schema filename: {filename}\n" f"Expected format: {{domain}}-schema-v{{major}}.{{minor}}.md" ) return metadata['version'] def extract_schema_metadata(filename: str) -> Dict[str, Any]: """ Extract metadata from a valid schema filename. Args: filename: Schema filename to parse Returns: Dictionary with metadata Raises: SchemaFilenameError: If filename is invalid Examples: >>> extract_schema_metadata("manpage-schema-v1.0.md") {'domain': 'manpage', 'version': '1.0', 'major': 1, 'minor': 0} """ is_valid, metadata = validate_schema_filename(filename) if not is_valid: raise SchemaFilenameError( f"Invalid schema filename: {filename}\n" f"Expected format: {{domain}}-schema-v{{major}}.{{minor}}.md" ) return metadata def get_validation_errors(filename: str) -> list: """ Get detailed validation errors for a filename. Args: filename: Filename to validate Returns: List of error messages (empty if valid) Examples: >>> get_validation_errors("manpage-schema-v1.0.md") [] >>> get_validation_errors("invalid.json") ['Filename does not match pattern: {domain}-schema-v{major}.{minor}.md', ...] """ errors = [] # Check basic pattern match is_valid, _ = validate_schema_filename(filename) if is_valid: return errors # Provide detailed feedback errors.append( f"Filename does not match pattern: {{domain}}-schema-v{{major}}.{{minor}}.md" ) # Check extension if not filename.endswith('.md'): errors.append(f"Extension must be '.md', got: {Path(filename).suffix}") # Check for version if '-v' not in filename: errors.append("Missing version: filename must include '-v{major}.{minor}'") elif not re.search(r'-v\d+\.\d+', filename): errors.append( "Invalid version format: must be '-v{major}.{minor}' (e.g., '-v1.0')" ) # Check for schema keyword if '-schema-' not in filename: errors.append("Missing '-schema-' keyword in filename") # Check for uppercase (must be lowercase) if any(c.isupper() for c in filename): errors.append("Filename must be lowercase") # Check domain format (if we can isolate it) parts = filename.split('-schema-') if len(parts) >= 1: domain = parts[0] if domain and not re.match(r'^[a-z][a-z0-9-]*$', domain): errors.append( f"Invalid domain '{domain}': must start with lowercase letter " "and contain only lowercase letters, numbers, and hyphens" ) return errors def is_valid_schema_filename(filename: str) -> bool: """ Check if filename is valid (convenience function). Args: filename: Filename to check Returns: True if valid, False otherwise Examples: >>> is_valid_schema_filename("manpage-schema-v1.0.md") True >>> is_valid_schema_filename("invalid.json") False """ is_valid, _ = validate_schema_filename(filename) return is_valid def format_validation_message(filename: str) -> str: """ Format a user-friendly validation message. Args: filename: Filename that failed validation Returns: Formatted error message with suggestions Examples: >>> print(format_validation_message("manpage.json")) Invalid schema filename: manpage.json ... """ errors = get_validation_errors(filename) if not errors: return f"\u2705 Valid schema filename: {filename}" message = f"\u274c Invalid schema filename: {filename}\n\n" message += "Errors:\n" for i, error in enumerate(errors, 1): message += f" {i}. {error}\n" message += "\nExpected format: {domain}-schema-v{major}.{minor}.md\n" message += "Example: manpage-schema-v1.0.md\n" # Try to suggest a corrected filename try: # Extract domain guess (everything before first hyphen or dot) domain_guess = filename.split('-')[0].split('.')[0] suggestion = suggest_valid_filename(domain_guess, "1.0") message += f"\nSuggested filename: {suggestion}\n" except Exception: pass return message