chore: Issue closure 125 cleanup

This commit is contained in:
2025-10-05 12:49:28 +02:00
parent 20e7f0f5bd
commit bce680e6cb
26 changed files with 2362 additions and 388 deletions

View File

@@ -0,0 +1,162 @@
"""
String utility functions for MarkiTect ecosystem.
Provides common string manipulation and formatting functions that are
frequently needed across different MarkiTect capabilities.
"""
import re
from typing import Optional
def slugify(text: str, separator: str = "-") -> str:
"""
Convert a string to a URL-friendly slug.
Args:
text: The input string to convert
separator: Character to use for word separation (default: "-")
Returns:
A lowercase string with special characters removed and words separated
Examples:
>>> slugify("Hello World!")
'hello-world'
>>> slugify("My Great Article", "_")
'my_great_article'
"""
if not text:
return ""
# Convert to lowercase and normalize unicode
text = text.lower()
# Remove unicode accents by replacing with ASCII equivalents
text = re.sub(r'[àáâãäå]', 'a', text)
text = re.sub(r'[èéêë]', 'e', text)
text = re.sub(r'[ìíîï]', 'i', text)
text = re.sub(r'[òóôõö]', 'o', text)
text = re.sub(r'[ùúûü]', 'u', text)
text = re.sub(r'[ýÿ]', 'y', text)
text = re.sub(r'[ç]', 'c', text)
text = re.sub(r'[ñ]', 'n', text)
# Replace non-alphanumeric characters (except underscores and dashes) with separator
text = re.sub(r'[^\w\s-]', '', text)
# Replace whitespace and underscores with separator
text = re.sub(r'[\s_]+', separator, text)
# Replace multiple separators with single separator
text = re.sub(f'[{re.escape(separator)}]+', separator, text)
# Remove leading/trailing separators
text = text.strip(separator)
return text
def truncate(text: str, max_length: int, suffix: str = "...") -> str:
"""
Truncate a string to a maximum length, adding a suffix if truncated.
Args:
text: The input string to truncate
max_length: Maximum length of the result (including suffix)
suffix: String to append if truncation occurs (default: "...")
Returns:
The truncated string with suffix if needed
Examples:
>>> truncate("This is a long string", 10)
'This is...'
>>> truncate("Short", 10)
'Short'
"""
if not text or len(text) <= max_length:
return text
if max_length <= len(suffix):
return suffix[:max_length]
truncate_at = max_length - len(suffix)
return text[:truncate_at] + suffix
def camel_to_snake(text: str) -> str:
"""
Convert camelCase or PascalCase to snake_case.
Args:
text: The input string in camelCase or PascalCase
Returns:
String converted to snake_case
Examples:
>>> camel_to_snake("camelCase")
'camel_case'
>>> camel_to_snake("PascalCase")
'pascal_case'
>>> camel_to_snake("XMLHttpRequest")
'xml_http_request'
"""
if not text:
return text
# Insert underscore before uppercase letters that follow lowercase letters
text = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', text)
# Insert underscore before uppercase letters that follow lowercase letters or digits
text = re.sub('([a-z0-9])([A-Z])', r'\1_\2', text)
return text.lower()
def snake_to_camel(text: str, pascal_case: bool = False) -> str:
"""
Convert snake_case to camelCase or PascalCase.
Args:
text: The input string in snake_case
pascal_case: If True, return PascalCase; otherwise camelCase (default: False)
Returns:
String converted to camelCase or PascalCase
Examples:
>>> snake_to_camel("snake_case")
'snakeCase'
>>> snake_to_camel("snake_case", pascal_case=True)
'SnakeCase'
"""
if not text:
return text
components = text.split('_')
if not components:
return text
if pascal_case:
return ''.join(word.capitalize() for word in components)
else:
return components[0] + ''.join(word.capitalize() for word in components[1:])
def strip_ansi_codes(text: str) -> str:
"""
Remove ANSI escape sequences from a string.
Args:
text: String that may contain ANSI escape sequences
Returns:
String with ANSI codes removed
Examples:
>>> strip_ansi_codes("\\033[31mRed text\\033[0m")
'Red text'
"""
if not text:
return text
# ANSI escape sequence pattern
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
return ansi_escape.sub('', text)