chore: Issue closure 125 cleanup

2025-10-05 12:49:28 +02:00
parent 20e7f0f5bd
commit bce680e6cb
26 changed files with 2362 additions and 388 deletions
--- a/capabilities/markitect-utils/src/markitect_utils/string_utils.py
+++ b/capabilities/markitect-utils/src/markitect_utils/string_utils.py
@@ -0,0 +1,162 @@
+"""
+String utility functions for MarkiTect ecosystem.
+
+Provides common string manipulation and formatting functions that are
+frequently needed across different MarkiTect capabilities.
+"""
+
+import re
+from typing import Optional
+
+
+def slugify(text: str, separator: str = "-") -> str:
+    """
+    Convert a string to a URL-friendly slug.
+
+    Args:
+        text: The input string to convert
+        separator: Character to use for word separation (default: "-")
+
+    Returns:
+        A lowercase string with special characters removed and words separated
+
+    Examples:
+        >>> slugify("Hello World!")
+        'hello-world'
+        >>> slugify("My Great Article", "_")
+        'my_great_article'
+    """
+    if not text:
+        return ""
+
+    # Convert to lowercase and normalize unicode
+    text = text.lower()
+    # Remove unicode accents by replacing with ASCII equivalents
+    text = re.sub(r'[àáâãäå]', 'a', text)
+    text = re.sub(r'[èéêë]', 'e', text)
+    text = re.sub(r'[ìíîï]', 'i', text)
+    text = re.sub(r'[òóôõö]', 'o', text)
+    text = re.sub(r'[ùúûü]', 'u', text)
+    text = re.sub(r'[ýÿ]', 'y', text)
+    text = re.sub(r'[ç]', 'c', text)
+    text = re.sub(r'[ñ]', 'n', text)
+
+    # Replace non-alphanumeric characters (except underscores and dashes) with separator
+    text = re.sub(r'[^\w\s-]', '', text)
+    # Replace whitespace and underscores with separator
+    text = re.sub(r'[\s_]+', separator, text)
+    # Replace multiple separators with single separator
+    text = re.sub(f'[{re.escape(separator)}]+', separator, text)
+    # Remove leading/trailing separators
+    text = text.strip(separator)
+
+    return text
+
+
+def truncate(text: str, max_length: int, suffix: str = "...") -> str:
+    """
+    Truncate a string to a maximum length, adding a suffix if truncated.
+
+    Args:
+        text: The input string to truncate
+        max_length: Maximum length of the result (including suffix)
+        suffix: String to append if truncation occurs (default: "...")
+
+    Returns:
+        The truncated string with suffix if needed
+
+    Examples:
+        >>> truncate("This is a long string", 10)
+        'This is...'
+        >>> truncate("Short", 10)
+        'Short'
+    """
+    if not text or len(text) <= max_length:
+        return text
+
+    if max_length <= len(suffix):
+        return suffix[:max_length]
+
+    truncate_at = max_length - len(suffix)
+    return text[:truncate_at] + suffix
+
+
+def camel_to_snake(text: str) -> str:
+    """
+    Convert camelCase or PascalCase to snake_case.
+
+    Args:
+        text: The input string in camelCase or PascalCase
+
+    Returns:
+        String converted to snake_case
+
+    Examples:
+        >>> camel_to_snake("camelCase")
+        'camel_case'
+        >>> camel_to_snake("PascalCase")
+        'pascal_case'
+        >>> camel_to_snake("XMLHttpRequest")
+        'xml_http_request'
+    """
+    if not text:
+        return text
+
+    # Insert underscore before uppercase letters that follow lowercase letters
+    text = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', text)
+    # Insert underscore before uppercase letters that follow lowercase letters or digits
+    text = re.sub('([a-z0-9])([A-Z])', r'\1_\2', text)
+
+    return text.lower()
+
+
+def snake_to_camel(text: str, pascal_case: bool = False) -> str:
+    """
+    Convert snake_case to camelCase or PascalCase.
+
+    Args:
+        text: The input string in snake_case
+        pascal_case: If True, return PascalCase; otherwise camelCase (default: False)
+
+    Returns:
+        String converted to camelCase or PascalCase
+
+    Examples:
+        >>> snake_to_camel("snake_case")
+        'snakeCase'
+        >>> snake_to_camel("snake_case", pascal_case=True)
+        'SnakeCase'
+    """
+    if not text:
+        return text
+
+    components = text.split('_')
+    if not components:
+        return text
+
+    if pascal_case:
+        return ''.join(word.capitalize() for word in components)
+    else:
+        return components[0] + ''.join(word.capitalize() for word in components[1:])
+
+
+def strip_ansi_codes(text: str) -> str:
+    """
+    Remove ANSI escape sequences from a string.
+
+    Args:
+        text: String that may contain ANSI escape sequences
+
+    Returns:
+        String with ANSI codes removed
+
+    Examples:
+        >>> strip_ansi_codes("\\033[31mRed text\\033[0m")
+        'Red text'
+    """
+    if not text:
+        return text
+
+    # ANSI escape sequence pattern
+    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
+    return ansi_escape.sub('', text)