""" String utility functions for MarkiTect ecosystem. Provides common string manipulation and formatting functions that are frequently needed across different MarkiTect capabilities. """ import re from typing import Optional def slugify(text: str, separator: str = "-") -> str: """ Convert a string to a URL-friendly slug. Args: text: The input string to convert separator: Character to use for word separation (default: "-") Returns: A lowercase string with special characters removed and words separated Examples: >>> slugify("Hello World!") 'hello-world' >>> slugify("My Great Article", "_") 'my_great_article' """ if not text: return "" # Convert to lowercase and normalize unicode text = text.lower() # Remove unicode accents by replacing with ASCII equivalents text = re.sub(r'[àáâãäå]', 'a', text) text = re.sub(r'[èéêë]', 'e', text) text = re.sub(r'[ìíîï]', 'i', text) text = re.sub(r'[òóôõö]', 'o', text) text = re.sub(r'[ùúûü]', 'u', text) text = re.sub(r'[ýÿ]', 'y', text) text = re.sub(r'[ç]', 'c', text) text = re.sub(r'[ñ]', 'n', text) # Replace non-alphanumeric characters (except underscores and dashes) with separator text = re.sub(r'[^\w\s-]', '', text) # Replace whitespace and underscores with separator text = re.sub(r'[\s_]+', separator, text) # Replace multiple separators with single separator text = re.sub(f'[{re.escape(separator)}]+', separator, text) # Remove leading/trailing separators text = text.strip(separator) return text def truncate(text: str, max_length: int, suffix: str = "...") -> str: """ Truncate a string to a maximum length, adding a suffix if truncated. Args: text: The input string to truncate max_length: Maximum length of the result (including suffix) suffix: String to append if truncation occurs (default: "...") Returns: The truncated string with suffix if needed Examples: >>> truncate("This is a long string", 10) 'This is...' >>> truncate("Short", 10) 'Short' """ if not text or len(text) <= max_length: return text if max_length <= len(suffix): return suffix[:max_length] truncate_at = max_length - len(suffix) return text[:truncate_at] + suffix def camel_to_snake(text: str) -> str: """ Convert camelCase or PascalCase to snake_case. Args: text: The input string in camelCase or PascalCase Returns: String converted to snake_case Examples: >>> camel_to_snake("camelCase") 'camel_case' >>> camel_to_snake("PascalCase") 'pascal_case' >>> camel_to_snake("XMLHttpRequest") 'xml_http_request' """ if not text: return text # Insert underscore before uppercase letters that follow lowercase letters text = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', text) # Insert underscore before uppercase letters that follow lowercase letters or digits text = re.sub('([a-z0-9])([A-Z])', r'\1_\2', text) return text.lower() def snake_to_camel(text: str, pascal_case: bool = False) -> str: """ Convert snake_case to camelCase or PascalCase. Args: text: The input string in snake_case pascal_case: If True, return PascalCase; otherwise camelCase (default: False) Returns: String converted to camelCase or PascalCase Examples: >>> snake_to_camel("snake_case") 'snakeCase' >>> snake_to_camel("snake_case", pascal_case=True) 'SnakeCase' """ if not text: return text components = text.split('_') if not components: return text if pascal_case: return ''.join(word.capitalize() for word in components) else: return components[0] + ''.join(word.capitalize() for word in components[1:]) def strip_ansi_codes(text: str) -> str: """ Remove ANSI escape sequences from a string. Args: text: String that may contain ANSI escape sequences Returns: String with ANSI codes removed Examples: >>> strip_ansi_codes("\\033[31mRed text\\033[0m") 'Red text' """ if not text: return text # ANSI escape sequence pattern ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') return ansi_escape.sub('', text)