chore: Issue closure 125 cleanup
This commit is contained in:
168
capabilities/markitect-utils/src/markitect_utils/file_utils.py
Normal file
168
capabilities/markitect-utils/src/markitect_utils/file_utils.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""
|
||||
File utility functions for MarkiTect ecosystem.
|
||||
|
||||
Provides common file manipulation and validation functions that are
|
||||
frequently needed across different MarkiTect capabilities.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
|
||||
|
||||
def safe_filename(filename: str, replacement: str = "_") -> str:
|
||||
"""
|
||||
Convert a string to a safe filename by removing/replacing unsafe characters.
|
||||
|
||||
Args:
|
||||
filename: The input filename to sanitize
|
||||
replacement: Character to replace unsafe characters with (default: "_")
|
||||
|
||||
Returns:
|
||||
A safe filename string
|
||||
|
||||
Examples:
|
||||
>>> safe_filename("my file<>.txt")
|
||||
'my_file__.txt'
|
||||
>>> safe_filename("file/with\\path.txt")
|
||||
'file_with_path.txt'
|
||||
"""
|
||||
if not filename:
|
||||
return ""
|
||||
|
||||
# Replace unsafe characters
|
||||
unsafe_chars = r'[<>:"/\\|?*\x00-\x1f]'
|
||||
safe_name = re.sub(unsafe_chars, replacement, filename)
|
||||
|
||||
# Remove leading/trailing dots and spaces
|
||||
safe_name = safe_name.strip('. ')
|
||||
|
||||
# Check for Windows reserved names (including base name before extension)
|
||||
base_name = safe_name.split('.')[0].upper() if safe_name else ""
|
||||
reserved_names = {
|
||||
'CON', 'PRN', 'AUX', 'NUL',
|
||||
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
|
||||
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'
|
||||
}
|
||||
|
||||
# Ensure not empty and not reserved names
|
||||
if not safe_name or base_name in reserved_names:
|
||||
safe_name = f"file{replacement}{safe_name}"
|
||||
|
||||
return safe_name
|
||||
|
||||
|
||||
def ensure_extension(filename: str, extension: str) -> str:
|
||||
"""
|
||||
Ensure a filename has the specified extension.
|
||||
|
||||
Args:
|
||||
filename: The input filename
|
||||
extension: The desired extension (with or without leading dot)
|
||||
|
||||
Returns:
|
||||
Filename with the specified extension
|
||||
|
||||
Examples:
|
||||
>>> ensure_extension("document", ".md")
|
||||
'document.md'
|
||||
>>> ensure_extension("document.txt", ".md")
|
||||
'document.txt.md'
|
||||
>>> ensure_extension("document.md", "md")
|
||||
'document.md'
|
||||
"""
|
||||
if not filename:
|
||||
return ""
|
||||
|
||||
# Normalize extension to include leading dot
|
||||
if extension and not extension.startswith('.'):
|
||||
extension = f".{extension}"
|
||||
|
||||
if extension and not filename.endswith(extension):
|
||||
return filename + extension
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_file_size(file_path: Union[str, Path]) -> Optional[int]:
|
||||
"""
|
||||
Get the size of a file in bytes.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
|
||||
Returns:
|
||||
File size in bytes, or None if file doesn't exist or can't be accessed
|
||||
|
||||
Examples:
|
||||
>>> get_file_size("document.txt") # doctest: +SKIP
|
||||
1024
|
||||
"""
|
||||
try:
|
||||
return os.path.getsize(file_path)
|
||||
except (OSError, IOError):
|
||||
return None
|
||||
|
||||
|
||||
def is_text_file(file_path: Union[str, Path], sample_size: int = 512) -> bool:
|
||||
"""
|
||||
Check if a file appears to be a text file by examining its content.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
sample_size: Number of bytes to sample from the file (default: 512)
|
||||
|
||||
Returns:
|
||||
True if the file appears to be text, False otherwise
|
||||
|
||||
Examples:
|
||||
>>> is_text_file("document.txt") # doctest: +SKIP
|
||||
True
|
||||
"""
|
||||
try:
|
||||
with open(file_path, 'rb') as f:
|
||||
sample = f.read(sample_size)
|
||||
|
||||
if not sample:
|
||||
return True # Empty file is considered text
|
||||
|
||||
# Check for null bytes (common in binary files)
|
||||
if b'\x00' in sample:
|
||||
return False
|
||||
|
||||
# Check if most bytes are printable ASCII or common UTF-8
|
||||
try:
|
||||
sample.decode('utf-8')
|
||||
return True
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
sample.decode('ascii')
|
||||
return True
|
||||
except UnicodeDecodeError:
|
||||
return False
|
||||
|
||||
except (OSError, IOError):
|
||||
return False
|
||||
|
||||
|
||||
def normalize_path(path: Union[str, Path]) -> str:
|
||||
"""
|
||||
Normalize a file path by resolving relative components and converting to absolute.
|
||||
|
||||
Args:
|
||||
path: The input path to normalize
|
||||
|
||||
Returns:
|
||||
Normalized absolute path as a string
|
||||
|
||||
Examples:
|
||||
>>> normalize_path("./dir/../file.txt") # doctest: +SKIP
|
||||
'/current/working/directory/file.txt'
|
||||
"""
|
||||
if not path:
|
||||
return ""
|
||||
|
||||
return str(Path(path).resolve())
|
||||
Reference in New Issue
Block a user