""" Path utilities for packaging operations. Provides utilities for path resolution, rewriting, and normalization within packages. """ import re from pathlib import Path from typing import Dict, Set, List, Tuple from urllib.parse import urlparse from .errors import PackagingError class PathUtils: """Utilities for path handling in packages.""" # Common markdown link patterns IMAGE_PATTERN = re.compile(r'!\[([^\]]*)\]\(([^)]+)\)') LINK_PATTERN = re.compile(r'(? str: """ Rewrite asset paths in markdown content. Args: content: Markdown content to process asset_map: Mapping from original paths to new paths Returns: Content with rewritten asset paths """ def replace_link(match): text = match.group(1) url = match.group(2) # Skip external URLs if PathUtils.is_external_url(url): return match.group(0) # Check if this path needs rewriting normalized_path = str(Path(url).as_posix()) if normalized_path in asset_map: return f'![{text}]({asset_map[normalized_path]})' return match.group(0) def replace_markdown_link(match): text = match.group(1) url = match.group(2) # Skip external URLs and anchors if PathUtils.is_external_url(url) or url.startswith('#'): return match.group(0) # Check if this path needs rewriting normalized_path = str(Path(url).as_posix()) if normalized_path in asset_map: return f'[{text}]({asset_map[normalized_path]})' return match.group(0) # Process images first content = PathUtils.IMAGE_PATTERN.sub(replace_link, content) # Process links content = PathUtils.LINK_PATTERN.sub(replace_markdown_link, content) return content @staticmethod def is_external_url(url: str) -> bool: """ Check if a URL is external (has a scheme). Args: url: URL to check Returns: True if external, False if local """ try: parsed = urlparse(url) return bool(parsed.scheme) except Exception: return False @staticmethod def normalize_path(path: str, base_path: Path = None) -> str: """ Normalize a path for consistent handling. Args: path: Path to normalize base_path: Base path for relative resolution Returns: Normalized path string """ try: path_obj = Path(path) # Resolve relative to base if provided if base_path and not path_obj.is_absolute(): path_obj = base_path / path_obj # Normalize and return as POSIX path return str(path_obj.resolve().as_posix()) except Exception as e: raise PackagingError(f"Failed to normalize path '{path}': {e}") @staticmethod def extract_referenced_paths(content: str) -> Set[str]: """ Extract all referenced paths from markdown content. Args: content: Markdown content to analyze Returns: Set of referenced paths """ paths = set() # Extract image references for match in PathUtils.IMAGE_PATTERN.finditer(content): url = match.group(2) if not PathUtils.is_external_url(url): paths.add(url) # Extract link references for match in PathUtils.LINK_PATTERN.finditer(content): url = match.group(2) if not PathUtils.is_external_url(url) and not url.startswith('#'): paths.add(url) return paths @staticmethod def resolve_relative_paths(paths: Set[str], base_path: Path) -> Dict[str, Path]: """ Resolve relative paths against a base path. Args: paths: Set of paths to resolve base_path: Base path for resolution Returns: Dictionary mapping original paths to resolved Path objects """ resolved = {} for path_str in paths: try: path_obj = Path(path_str) if not path_obj.is_absolute(): resolved_path = base_path / path_obj else: resolved_path = path_obj resolved[path_str] = resolved_path.resolve() except Exception as e: # Skip problematic paths but log the issue continue return resolved @staticmethod def create_package_path(original_path: Path, package_root: str = "assets") -> str: """ Create a package-internal path for an asset. Args: original_path: Original file path package_root: Root directory within package Returns: Package-internal path """ # Use just the filename to avoid deep nesting filename = original_path.name return f"{package_root}/{filename}" # Standalone utility functions for convenience def rewrite_asset_paths(content: str, asset_map: Dict[str, str]) -> str: """ Standalone wrapper for PathUtils.rewrite_asset_paths. Args: content: Markdown content to process asset_map: Mapping from original paths to new paths Returns: Content with rewritten asset paths """ return PathUtils.rewrite_asset_paths(content, asset_map)