chore: Issue closure 125 cleanup
This commit is contained in:
50
capabilities/markitect-utils/src/markitect_utils/__init__.py
Normal file
50
capabilities/markitect-utils/src/markitect_utils/__init__.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""
|
||||
MarkiTect Utils - A collection of utility functions for the MarkiTect ecosystem.
|
||||
|
||||
This capability provides commonly used utility functions that can be shared
|
||||
across different MarkiTect capabilities and projects.
|
||||
"""
|
||||
|
||||
from .string_utils import (
|
||||
slugify,
|
||||
truncate,
|
||||
camel_to_snake,
|
||||
snake_to_camel,
|
||||
strip_ansi_codes,
|
||||
)
|
||||
|
||||
from .file_utils import (
|
||||
safe_filename,
|
||||
ensure_extension,
|
||||
get_file_size,
|
||||
is_text_file,
|
||||
normalize_path,
|
||||
)
|
||||
|
||||
from .validation_utils import (
|
||||
is_valid_email,
|
||||
is_valid_url,
|
||||
is_valid_semver,
|
||||
validate_required_fields,
|
||||
)
|
||||
|
||||
__version__ = "0.1.0-dev"
|
||||
__all__ = [
|
||||
# String utilities
|
||||
"slugify",
|
||||
"truncate",
|
||||
"camel_to_snake",
|
||||
"snake_to_camel",
|
||||
"strip_ansi_codes",
|
||||
# File utilities
|
||||
"safe_filename",
|
||||
"ensure_extension",
|
||||
"get_file_size",
|
||||
"is_text_file",
|
||||
"normalize_path",
|
||||
# Validation utilities
|
||||
"is_valid_email",
|
||||
"is_valid_url",
|
||||
"is_valid_semver",
|
||||
"validate_required_fields",
|
||||
]
|
||||
168
capabilities/markitect-utils/src/markitect_utils/file_utils.py
Normal file
168
capabilities/markitect-utils/src/markitect_utils/file_utils.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""
|
||||
File utility functions for MarkiTect ecosystem.
|
||||
|
||||
Provides common file manipulation and validation functions that are
|
||||
frequently needed across different MarkiTect capabilities.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
|
||||
|
||||
def safe_filename(filename: str, replacement: str = "_") -> str:
|
||||
"""
|
||||
Convert a string to a safe filename by removing/replacing unsafe characters.
|
||||
|
||||
Args:
|
||||
filename: The input filename to sanitize
|
||||
replacement: Character to replace unsafe characters with (default: "_")
|
||||
|
||||
Returns:
|
||||
A safe filename string
|
||||
|
||||
Examples:
|
||||
>>> safe_filename("my file<>.txt")
|
||||
'my_file__.txt'
|
||||
>>> safe_filename("file/with\\path.txt")
|
||||
'file_with_path.txt'
|
||||
"""
|
||||
if not filename:
|
||||
return ""
|
||||
|
||||
# Replace unsafe characters
|
||||
unsafe_chars = r'[<>:"/\\|?*\x00-\x1f]'
|
||||
safe_name = re.sub(unsafe_chars, replacement, filename)
|
||||
|
||||
# Remove leading/trailing dots and spaces
|
||||
safe_name = safe_name.strip('. ')
|
||||
|
||||
# Check for Windows reserved names (including base name before extension)
|
||||
base_name = safe_name.split('.')[0].upper() if safe_name else ""
|
||||
reserved_names = {
|
||||
'CON', 'PRN', 'AUX', 'NUL',
|
||||
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
|
||||
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'
|
||||
}
|
||||
|
||||
# Ensure not empty and not reserved names
|
||||
if not safe_name or base_name in reserved_names:
|
||||
safe_name = f"file{replacement}{safe_name}"
|
||||
|
||||
return safe_name
|
||||
|
||||
|
||||
def ensure_extension(filename: str, extension: str) -> str:
|
||||
"""
|
||||
Ensure a filename has the specified extension.
|
||||
|
||||
Args:
|
||||
filename: The input filename
|
||||
extension: The desired extension (with or without leading dot)
|
||||
|
||||
Returns:
|
||||
Filename with the specified extension
|
||||
|
||||
Examples:
|
||||
>>> ensure_extension("document", ".md")
|
||||
'document.md'
|
||||
>>> ensure_extension("document.txt", ".md")
|
||||
'document.txt.md'
|
||||
>>> ensure_extension("document.md", "md")
|
||||
'document.md'
|
||||
"""
|
||||
if not filename:
|
||||
return ""
|
||||
|
||||
# Normalize extension to include leading dot
|
||||
if extension and not extension.startswith('.'):
|
||||
extension = f".{extension}"
|
||||
|
||||
if extension and not filename.endswith(extension):
|
||||
return filename + extension
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_file_size(file_path: Union[str, Path]) -> Optional[int]:
|
||||
"""
|
||||
Get the size of a file in bytes.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
|
||||
Returns:
|
||||
File size in bytes, or None if file doesn't exist or can't be accessed
|
||||
|
||||
Examples:
|
||||
>>> get_file_size("document.txt") # doctest: +SKIP
|
||||
1024
|
||||
"""
|
||||
try:
|
||||
return os.path.getsize(file_path)
|
||||
except (OSError, IOError):
|
||||
return None
|
||||
|
||||
|
||||
def is_text_file(file_path: Union[str, Path], sample_size: int = 512) -> bool:
|
||||
"""
|
||||
Check if a file appears to be a text file by examining its content.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
sample_size: Number of bytes to sample from the file (default: 512)
|
||||
|
||||
Returns:
|
||||
True if the file appears to be text, False otherwise
|
||||
|
||||
Examples:
|
||||
>>> is_text_file("document.txt") # doctest: +SKIP
|
||||
True
|
||||
"""
|
||||
try:
|
||||
with open(file_path, 'rb') as f:
|
||||
sample = f.read(sample_size)
|
||||
|
||||
if not sample:
|
||||
return True # Empty file is considered text
|
||||
|
||||
# Check for null bytes (common in binary files)
|
||||
if b'\x00' in sample:
|
||||
return False
|
||||
|
||||
# Check if most bytes are printable ASCII or common UTF-8
|
||||
try:
|
||||
sample.decode('utf-8')
|
||||
return True
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
sample.decode('ascii')
|
||||
return True
|
||||
except UnicodeDecodeError:
|
||||
return False
|
||||
|
||||
except (OSError, IOError):
|
||||
return False
|
||||
|
||||
|
||||
def normalize_path(path: Union[str, Path]) -> str:
|
||||
"""
|
||||
Normalize a file path by resolving relative components and converting to absolute.
|
||||
|
||||
Args:
|
||||
path: The input path to normalize
|
||||
|
||||
Returns:
|
||||
Normalized absolute path as a string
|
||||
|
||||
Examples:
|
||||
>>> normalize_path("./dir/../file.txt") # doctest: +SKIP
|
||||
'/current/working/directory/file.txt'
|
||||
"""
|
||||
if not path:
|
||||
return ""
|
||||
|
||||
return str(Path(path).resolve())
|
||||
162
capabilities/markitect-utils/src/markitect_utils/string_utils.py
Normal file
162
capabilities/markitect-utils/src/markitect_utils/string_utils.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
String utility functions for MarkiTect ecosystem.
|
||||
|
||||
Provides common string manipulation and formatting functions that are
|
||||
frequently needed across different MarkiTect capabilities.
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def slugify(text: str, separator: str = "-") -> str:
|
||||
"""
|
||||
Convert a string to a URL-friendly slug.
|
||||
|
||||
Args:
|
||||
text: The input string to convert
|
||||
separator: Character to use for word separation (default: "-")
|
||||
|
||||
Returns:
|
||||
A lowercase string with special characters removed and words separated
|
||||
|
||||
Examples:
|
||||
>>> slugify("Hello World!")
|
||||
'hello-world'
|
||||
>>> slugify("My Great Article", "_")
|
||||
'my_great_article'
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# Convert to lowercase and normalize unicode
|
||||
text = text.lower()
|
||||
# Remove unicode accents by replacing with ASCII equivalents
|
||||
text = re.sub(r'[àáâãäå]', 'a', text)
|
||||
text = re.sub(r'[èéêë]', 'e', text)
|
||||
text = re.sub(r'[ìíîï]', 'i', text)
|
||||
text = re.sub(r'[òóôõö]', 'o', text)
|
||||
text = re.sub(r'[ùúûü]', 'u', text)
|
||||
text = re.sub(r'[ýÿ]', 'y', text)
|
||||
text = re.sub(r'[ç]', 'c', text)
|
||||
text = re.sub(r'[ñ]', 'n', text)
|
||||
|
||||
# Replace non-alphanumeric characters (except underscores and dashes) with separator
|
||||
text = re.sub(r'[^\w\s-]', '', text)
|
||||
# Replace whitespace and underscores with separator
|
||||
text = re.sub(r'[\s_]+', separator, text)
|
||||
# Replace multiple separators with single separator
|
||||
text = re.sub(f'[{re.escape(separator)}]+', separator, text)
|
||||
# Remove leading/trailing separators
|
||||
text = text.strip(separator)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def truncate(text: str, max_length: int, suffix: str = "...") -> str:
|
||||
"""
|
||||
Truncate a string to a maximum length, adding a suffix if truncated.
|
||||
|
||||
Args:
|
||||
text: The input string to truncate
|
||||
max_length: Maximum length of the result (including suffix)
|
||||
suffix: String to append if truncation occurs (default: "...")
|
||||
|
||||
Returns:
|
||||
The truncated string with suffix if needed
|
||||
|
||||
Examples:
|
||||
>>> truncate("This is a long string", 10)
|
||||
'This is...'
|
||||
>>> truncate("Short", 10)
|
||||
'Short'
|
||||
"""
|
||||
if not text or len(text) <= max_length:
|
||||
return text
|
||||
|
||||
if max_length <= len(suffix):
|
||||
return suffix[:max_length]
|
||||
|
||||
truncate_at = max_length - len(suffix)
|
||||
return text[:truncate_at] + suffix
|
||||
|
||||
|
||||
def camel_to_snake(text: str) -> str:
|
||||
"""
|
||||
Convert camelCase or PascalCase to snake_case.
|
||||
|
||||
Args:
|
||||
text: The input string in camelCase or PascalCase
|
||||
|
||||
Returns:
|
||||
String converted to snake_case
|
||||
|
||||
Examples:
|
||||
>>> camel_to_snake("camelCase")
|
||||
'camel_case'
|
||||
>>> camel_to_snake("PascalCase")
|
||||
'pascal_case'
|
||||
>>> camel_to_snake("XMLHttpRequest")
|
||||
'xml_http_request'
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
# Insert underscore before uppercase letters that follow lowercase letters
|
||||
text = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', text)
|
||||
# Insert underscore before uppercase letters that follow lowercase letters or digits
|
||||
text = re.sub('([a-z0-9])([A-Z])', r'\1_\2', text)
|
||||
|
||||
return text.lower()
|
||||
|
||||
|
||||
def snake_to_camel(text: str, pascal_case: bool = False) -> str:
|
||||
"""
|
||||
Convert snake_case to camelCase or PascalCase.
|
||||
|
||||
Args:
|
||||
text: The input string in snake_case
|
||||
pascal_case: If True, return PascalCase; otherwise camelCase (default: False)
|
||||
|
||||
Returns:
|
||||
String converted to camelCase or PascalCase
|
||||
|
||||
Examples:
|
||||
>>> snake_to_camel("snake_case")
|
||||
'snakeCase'
|
||||
>>> snake_to_camel("snake_case", pascal_case=True)
|
||||
'SnakeCase'
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
components = text.split('_')
|
||||
if not components:
|
||||
return text
|
||||
|
||||
if pascal_case:
|
||||
return ''.join(word.capitalize() for word in components)
|
||||
else:
|
||||
return components[0] + ''.join(word.capitalize() for word in components[1:])
|
||||
|
||||
|
||||
def strip_ansi_codes(text: str) -> str:
|
||||
"""
|
||||
Remove ANSI escape sequences from a string.
|
||||
|
||||
Args:
|
||||
text: String that may contain ANSI escape sequences
|
||||
|
||||
Returns:
|
||||
String with ANSI codes removed
|
||||
|
||||
Examples:
|
||||
>>> strip_ansi_codes("\\033[31mRed text\\033[0m")
|
||||
'Red text'
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
# ANSI escape sequence pattern
|
||||
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
||||
return ansi_escape.sub('', text)
|
||||
@@ -0,0 +1,160 @@
|
||||
"""
|
||||
Validation utility functions for MarkiTect ecosystem.
|
||||
|
||||
Provides common validation functions for various data types and formats
|
||||
that are frequently needed across different MarkiTect capabilities.
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
|
||||
def is_valid_email(email: str) -> bool:
|
||||
"""
|
||||
Check if a string is a valid email address format.
|
||||
|
||||
Args:
|
||||
email: The email address to validate
|
||||
|
||||
Returns:
|
||||
True if the email format is valid, False otherwise
|
||||
|
||||
Examples:
|
||||
>>> is_valid_email("user@example.com")
|
||||
True
|
||||
>>> is_valid_email("invalid.email")
|
||||
False
|
||||
"""
|
||||
if not email or not isinstance(email, str):
|
||||
return False
|
||||
|
||||
# Basic email regex pattern
|
||||
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
|
||||
return bool(re.match(pattern, email))
|
||||
|
||||
|
||||
def is_valid_url(url: str) -> bool:
|
||||
"""
|
||||
Check if a string is a valid URL format.
|
||||
|
||||
Args:
|
||||
url: The URL to validate
|
||||
|
||||
Returns:
|
||||
True if the URL format is valid, False otherwise
|
||||
|
||||
Examples:
|
||||
>>> is_valid_url("https://example.com")
|
||||
True
|
||||
>>> is_valid_url("not-a-url")
|
||||
False
|
||||
"""
|
||||
if not url or not isinstance(url, str):
|
||||
return False
|
||||
|
||||
# URL regex pattern
|
||||
pattern = re.compile(
|
||||
r'^https?://' # http:// or https://
|
||||
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain...
|
||||
r'localhost|' # localhost...
|
||||
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
|
||||
r'(?::\d+)?' # optional port
|
||||
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
|
||||
|
||||
return bool(pattern.match(url))
|
||||
|
||||
|
||||
def is_valid_semver(version: str) -> bool:
|
||||
"""
|
||||
Check if a string is a valid semantic version (semver) format.
|
||||
|
||||
Args:
|
||||
version: The version string to validate
|
||||
|
||||
Returns:
|
||||
True if the version follows semver format, False otherwise
|
||||
|
||||
Examples:
|
||||
>>> is_valid_semver("1.0.0")
|
||||
True
|
||||
>>> is_valid_semver("1.0.0-alpha.1")
|
||||
True
|
||||
>>> is_valid_semver("1.0")
|
||||
False
|
||||
"""
|
||||
if not version or not isinstance(version, str):
|
||||
return False
|
||||
|
||||
# Semantic version regex pattern
|
||||
pattern = re.compile(
|
||||
r'^(?P<major>0|[1-9]\d*)\.'
|
||||
r'(?P<minor>0|[1-9]\d*)\.'
|
||||
r'(?P<patch>0|[1-9]\d*)'
|
||||
r'(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)'
|
||||
r'(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?'
|
||||
r'(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$'
|
||||
)
|
||||
|
||||
return bool(pattern.match(version))
|
||||
|
||||
|
||||
def validate_required_fields(data: Dict[str, Any], required_fields: List[str]) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Validate that required fields are present and not empty in a dictionary.
|
||||
|
||||
Args:
|
||||
data: Dictionary to validate
|
||||
required_fields: List of field names that are required
|
||||
|
||||
Returns:
|
||||
Dictionary with 'missing' and 'empty' keys containing lists of field names
|
||||
|
||||
Examples:
|
||||
>>> validate_required_fields({"name": "John", "email": ""}, ["name", "email", "age"])
|
||||
{'missing': ['age'], 'empty': ['email']}
|
||||
>>> validate_required_fields({"name": "John", "email": "john@example.com"}, ["name", "email"])
|
||||
{'missing': [], 'empty': []}
|
||||
"""
|
||||
result = {
|
||||
'missing': [],
|
||||
'empty': []
|
||||
}
|
||||
|
||||
if not isinstance(data, dict) or not isinstance(required_fields, list):
|
||||
return result
|
||||
|
||||
for field in required_fields:
|
||||
if field not in data:
|
||||
result['missing'].append(field)
|
||||
elif _is_empty_value(data[field]):
|
||||
result['empty'].append(field)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _is_empty_value(value: Any) -> bool:
|
||||
"""
|
||||
Check if a value should be considered empty for validation purposes.
|
||||
|
||||
Args:
|
||||
value: The value to check
|
||||
|
||||
Returns:
|
||||
True if the value is considered empty, False otherwise
|
||||
"""
|
||||
if value is None:
|
||||
return True
|
||||
|
||||
if isinstance(value, str):
|
||||
return not value.strip()
|
||||
|
||||
if isinstance(value, (list, tuple, dict, set)):
|
||||
return len(value) == 0
|
||||
|
||||
# For numeric types (int, float), only None is considered empty
|
||||
# Zero and False are valid values
|
||||
if isinstance(value, (int, float, bool)):
|
||||
return False
|
||||
|
||||
# For other types, use Python's truthiness
|
||||
return not bool(value)
|
||||
Reference in New Issue
Block a user