""" Associated Files Manager for Issue #40: Associated Files Management. This module provides functionality to manage associated markdown and schema files with convention-based naming and automatic file placement. """ import os from pathlib import Path from typing import Dict, List, Optional, Any class AssociatedFilesError(Exception): """Base exception for associated files operations.""" pass class InvalidFileTypeError(AssociatedFilesError): """Raised when file has unexpected extension.""" pass class DirectoryAccessError(AssociatedFilesError): """Raised when directory cannot be accessed.""" pass class AssociatedFilesManager: """ Manages associated markdown and schema files with convention-based naming. Provides functionality to find, create, and manage pairs of markdown and JSON schema files that follow the convention of having identical basenames with different extensions. """ def __init__(self, markdown_extension: str = '.md', schema_extension: str = '.json'): """ Initialize the associated files manager. Args: markdown_extension: File extension for markdown files (default: '.md') schema_extension: File extension for schema files (default: '.json') Raises: ValueError: If extensions are invalid or identical """ # Validate extensions if not markdown_extension.startswith('.'): raise ValueError("Markdown extension must start with '.'") if not schema_extension.startswith('.'): raise ValueError("Schema extension must start with '.'") if markdown_extension.lower() == schema_extension.lower(): raise ValueError("Markdown and schema extensions must be different") self.markdown_extension = markdown_extension.lower() self.schema_extension = schema_extension.lower() def _validate_file_extension(self, file_path: Path, expected_extension: str, file_type: str) -> None: """ Validate file has expected extension. Args: file_path: Path to validate expected_extension: Expected extension (e.g., '.md') file_type: Description of file type for error messages Raises: InvalidFileTypeError: If file doesn't have expected extension """ if file_path.suffix.lower() != expected_extension: raise InvalidFileTypeError( f"Expected {file_type} file with {expected_extension} extension, got {file_path.suffix}" ) def _validate_markdown_file(self, file_path: Path) -> None: """Validate file is a markdown file.""" self._validate_file_extension(file_path, self.markdown_extension, "markdown") def _validate_schema_file(self, file_path: Path) -> None: """Validate file is a schema file.""" self._validate_file_extension(file_path, self.schema_extension, "schema") def _validate_directory(self, directory: Path) -> None: """ Validate directory access and permissions. Args: directory: Directory path to validate Raises: DirectoryAccessError: If directory cannot be accessed """ if not directory.exists(): raise DirectoryAccessError(f"Directory does not exist: {directory}") if not directory.is_dir(): raise DirectoryAccessError(f"Path is not a directory: {directory}") if not os.access(directory, os.R_OK): raise DirectoryAccessError(f"No read permission for directory: {directory}") def get_associated_schema_path(self, markdown_file: Path) -> Path: """ Get the path for the associated schema file of a markdown file. Args: markdown_file: Path to the markdown file Returns: Path where the associated schema should be located Raises: InvalidFileTypeError: If the file doesn't have .md extension """ self._validate_markdown_file(markdown_file) return markdown_file.with_suffix(self.schema_extension) def get_associated_markdown_path(self, schema_file: Path) -> Path: """ Get the path for the associated markdown file of a schema file. Args: schema_file: Path to the schema file Returns: Path where the associated markdown should be located Raises: InvalidFileTypeError: If the file doesn't have .json extension """ self._validate_schema_file(schema_file) return schema_file.with_suffix(self.markdown_extension) def find_associated_schema(self, markdown_file: Path) -> Optional[Path]: """ Find the associated schema file for a markdown file. Args: markdown_file: Path to the markdown file Returns: Path to associated schema file if it exists, None otherwise """ schema_path = self.get_associated_schema_path(markdown_file) return schema_path if schema_path.exists() else None def find_associated_markdown(self, schema_file: Path) -> Optional[Path]: """ Find the associated markdown file for a schema file. Args: schema_file: Path to the schema file Returns: Path to associated markdown file if it exists, None otherwise """ markdown_path = self.get_associated_markdown_path(schema_file) return markdown_path if markdown_path.exists() else None def has_associated_schema(self, markdown_file: Path) -> bool: """ Check if a markdown file has an associated schema file. Args: markdown_file: Path to the markdown file Returns: True if associated schema exists, False otherwise """ return self.find_associated_schema(markdown_file) is not None def has_associated_markdown(self, schema_file: Path) -> bool: """ Check if a schema file has an associated markdown file. Args: schema_file: Path to the schema file Returns: True if associated markdown exists, False otherwise """ return self.find_associated_markdown(schema_file) is not None def list_file_pairs(self, directory: Path) -> List[Dict[str, Any]]: """ List all associated file pairs in a directory. Optimized version that reduces filesystem calls by collecting all files at once and finding pairs through basename intersection. Args: directory: Directory to search for file pairs Returns: List of dictionaries containing information about each file pair """ pairs = [] # Get all files at once and group by extension (more efficient) try: all_files = [f for f in directory.iterdir() if f.is_file()] except (OSError, PermissionError): # Return empty list if directory cannot be read return pairs md_files = {f.stem: f for f in all_files if f.suffix.lower() == self.markdown_extension} json_files = {f.stem: f for f in all_files if f.suffix.lower() == self.schema_extension} # Find pairs by checking intersection of basenames (no additional filesystem calls) paired_basenames = set(md_files.keys()) & set(json_files.keys()) for basename in sorted(paired_basenames): # Sort for consistent output pairs.append({ 'basename': basename, 'markdown_file': md_files[basename], 'schema_file': json_files[basename], 'both_exist': True }) return pairs def get_file_pair_info(self, file_path: Path) -> Dict[str, Any]: """ Get detailed information about a file pair. Args: file_path: Path to either markdown or schema file Returns: Dictionary with detailed information about the file pair """ if file_path.suffix.lower() == self.markdown_extension: md_file = file_path schema_file = self.get_associated_schema_path(md_file) elif file_path.suffix.lower() == self.schema_extension: schema_file = file_path md_file = self.get_associated_markdown_path(schema_file) else: raise ValueError(f"Unsupported file type: {file_path.suffix}") info = { 'basename': file_path.stem, 'markdown_file': md_file, 'schema_file': schema_file, 'both_exist': md_file.exists() and schema_file.exists() } # Add file sizes if files exist if md_file.exists(): info['markdown_size'] = md_file.stat().st_size info['markdown_modified'] = md_file.stat().st_mtime if schema_file.exists(): info['schema_size'] = schema_file.stat().st_size info['schema_modified'] = schema_file.stat().st_mtime return info def list_orphaned_files(self, directory: Path) -> Dict[str, List[Path]]: """ List orphaned files (files without their associated counterpart). Optimized version that reuses file discovery from list_file_pairs logic. Args: directory: Directory to search Returns: Dictionary with 'orphaned_markdown' and 'orphaned_schemas' lists """ orphaned_markdown = [] orphaned_schemas = [] # Get all files at once (reusing optimization pattern) try: all_files = [f for f in directory.iterdir() if f.is_file()] except (OSError, PermissionError): return { 'orphaned_markdown': orphaned_markdown, 'orphaned_schemas': orphaned_schemas } md_files = {f.stem: f for f in all_files if f.suffix.lower() == self.markdown_extension} json_files = {f.stem: f for f in all_files if f.suffix.lower() == self.schema_extension} # Find orphaned files by checking set differences orphaned_md_basenames = set(md_files.keys()) - set(json_files.keys()) orphaned_json_basenames = set(json_files.keys()) - set(md_files.keys()) # Collect orphaned files for basename in sorted(orphaned_md_basenames): orphaned_markdown.append(md_files[basename]) for basename in sorted(orphaned_json_basenames): orphaned_schemas.append(json_files[basename]) return { 'orphaned_markdown': orphaned_markdown, 'orphaned_schemas': orphaned_schemas } def get_directory_status(self, directory: Path) -> Dict[str, Any]: """ Get comprehensive status of associated files in a directory. Args: directory: Directory to analyze Returns: Dictionary with status information """ pairs = self.list_file_pairs(directory) orphaned = self.list_orphaned_files(directory) return { 'directory': directory, 'paired_files': len(pairs), 'orphaned_markdown': len(orphaned['orphaned_markdown']), 'orphaned_schemas': len(orphaned['orphaned_schemas']), 'pairs': pairs, 'orphaned': orphaned } def suggest_output_path(self, input_file: Path, target_extension: str) -> Path: """ Suggest an output path for generating an associated file. Args: input_file: Source file path target_extension: Desired extension for output file (e.g., '.json', '.md') Returns: Suggested path for the output file """ return input_file.with_suffix(target_extension) def validate_file_pair_naming(self, markdown_file: Path, schema_file: Path) -> bool: """ Validate that two files follow the associated files naming convention. Args: markdown_file: Path to markdown file schema_file: Path to schema file Returns: True if files follow naming convention, False otherwise """ return ( markdown_file.stem == schema_file.stem and markdown_file.suffix.lower() == self.markdown_extension and schema_file.suffix.lower() == self.schema_extension and markdown_file.parent == schema_file.parent )