Files
markitect-main/capabilities/markitect-utils/src/markitect_utils/validation_utils.py

160 lines
4.4 KiB
Python

"""
Validation utility functions for MarkiTect ecosystem.
Provides common validation functions for various data types and formats
that are frequently needed across different MarkiTect capabilities.
"""
import re
from typing import Any, Dict, List, Optional, Union
def is_valid_email(email: str) -> bool:
"""
Check if a string is a valid email address format.
Args:
email: The email address to validate
Returns:
True if the email format is valid, False otherwise
Examples:
>>> is_valid_email("user@example.com")
True
>>> is_valid_email("invalid.email")
False
"""
if not email or not isinstance(email, str):
return False
# Basic email regex pattern
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return bool(re.match(pattern, email))
def is_valid_url(url: str) -> bool:
"""
Check if a string is a valid URL format.
Args:
url: The URL to validate
Returns:
True if the URL format is valid, False otherwise
Examples:
>>> is_valid_url("https://example.com")
True
>>> is_valid_url("not-a-url")
False
"""
if not url or not isinstance(url, str):
return False
# URL regex pattern
pattern = re.compile(
r'^https?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain...
r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
return bool(pattern.match(url))
def is_valid_semver(version: str) -> bool:
"""
Check if a string is a valid semantic version (semver) format.
Args:
version: The version string to validate
Returns:
True if the version follows semver format, False otherwise
Examples:
>>> is_valid_semver("1.0.0")
True
>>> is_valid_semver("1.0.0-alpha.1")
True
>>> is_valid_semver("1.0")
False
"""
if not version or not isinstance(version, str):
return False
# Semantic version regex pattern
pattern = re.compile(
r'^(?P<major>0|[1-9]\d*)\.'
r'(?P<minor>0|[1-9]\d*)\.'
r'(?P<patch>0|[1-9]\d*)'
r'(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)'
r'(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?'
r'(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$'
)
return bool(pattern.match(version))
def validate_required_fields(data: Dict[str, Any], required_fields: List[str]) -> Dict[str, List[str]]:
"""
Validate that required fields are present and not empty in a dictionary.
Args:
data: Dictionary to validate
required_fields: List of field names that are required
Returns:
Dictionary with 'missing' and 'empty' keys containing lists of field names
Examples:
>>> validate_required_fields({"name": "John", "email": ""}, ["name", "email", "age"])
{'missing': ['age'], 'empty': ['email']}
>>> validate_required_fields({"name": "John", "email": "john@example.com"}, ["name", "email"])
{'missing': [], 'empty': []}
"""
result = {
'missing': [],
'empty': []
}
if not isinstance(data, dict) or not isinstance(required_fields, list):
return result
for field in required_fields:
if field not in data:
result['missing'].append(field)
elif _is_empty_value(data[field]):
result['empty'].append(field)
return result
def _is_empty_value(value: Any) -> bool:
"""
Check if a value should be considered empty for validation purposes.
Args:
value: The value to check
Returns:
True if the value is considered empty, False otherwise
"""
if value is None:
return True
if isinstance(value, str):
return not value.strip()
if isinstance(value, (list, tuple, dict, set)):
return len(value) == 0
# For numeric types (int, float), only None is considered empty
# Zero and False are valid values
if isinstance(value, (int, float, bool)):
return False
# For other types, use Python's truthiness
return not bool(value)