""" Git State Tracker - Bind legacy versions to specific git commits. Provides functionality to track git repository state and bind legacy versions to specific commits, enabling precise version restoration and compatibility. """ import os import subprocess import json from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Tuple, Any from dataclasses import dataclass, asdict from .exceptions import GitStateError @dataclass class GitState: """Represents a git repository state.""" commit_hash: str commit_message: str author: str date: str branch: str tag: Optional[str] = None is_dirty: bool = False modified_files: List[str] = None def __post_init__(self): if self.modified_files is None: self.modified_files = [] @dataclass class LegacyBinding: """Represents a binding between a legacy version and git state.""" command: str version: str git_state: GitState bound_at: str description: str = "" validation_files: List[str] = None def __post_init__(self): if self.validation_files is None: self.validation_files = [] class GitStateTracker: """ Tracks git repository state and manages version bindings. Responsibilities: - Capture current git state information - Bind legacy versions to specific commits - Validate git state for legacy implementations - Restore git state for testing legacy versions - Track changes between versions """ def __init__(self, repo_path: Optional[Path] = None): """ Initialize the git state tracker. Args: repo_path: Path to the git repository (default: current directory) """ self.repo_path = repo_path or Path.cwd() self._bindings: Dict[str, Dict[str, LegacyBinding]] = {} def get_current_state(self) -> GitState: """ Get the current git repository state. Returns: GitState object representing current state Raises: GitStateError: If git operations fail """ try: # Get current commit information commit_info = self._run_git_command([ 'log', '-1', '--format=%H|%s|%an|%ai' ]).strip() if not commit_info: raise GitStateError("No commits found in repository") hash_val, message, author, date = commit_info.split('|', 3) # Get current branch try: branch = self._run_git_command(['rev-parse', '--abbrev-ref', 'HEAD']).strip() except subprocess.CalledProcessError: branch = "HEAD" # Detached HEAD state # Check for tags on current commit try: tag = self._run_git_command(['describe', '--exact-match', '--tags', 'HEAD']).strip() except subprocess.CalledProcessError: tag = None # Check if repository is dirty status_output = self._run_git_command(['status', '--porcelain']) is_dirty = bool(status_output.strip()) # Get modified files if dirty modified_files = [] if is_dirty: modified_files = [ line[3:] for line in status_output.strip().split('\n') if line.strip() ] return GitState( commit_hash=hash_val, commit_message=message, author=author, date=date, branch=branch, tag=tag, is_dirty=is_dirty, modified_files=modified_files ) except subprocess.CalledProcessError as e: raise GitStateError(f"Git command failed: {e}") except Exception as e: raise GitStateError(f"Failed to get git state: {e}") def bind_version_to_commit( self, command: str, version: str, commit_hash: Optional[str] = None, description: str = "", validation_files: List[str] = None ) -> LegacyBinding: """ Bind a legacy version to a specific git commit. Args: command: Command name version: Version identifier commit_hash: Git commit hash (default: current commit) description: Description of this binding validation_files: Files to validate for this version Returns: LegacyBinding object Raises: GitStateError: If git operations fail """ if validation_files is None: validation_files = [] # Get git state for the specified or current commit if commit_hash: git_state = self._get_commit_state(commit_hash) else: git_state = self.get_current_state() commit_hash = git_state.commit_hash # Create binding binding = LegacyBinding( command=command, version=version, git_state=git_state, bound_at=datetime.now().isoformat(), description=description, validation_files=validation_files ) # Store binding if command not in self._bindings: self._bindings[command] = {} self._bindings[command][version] = binding return binding def get_version_binding(self, command: str, version: str) -> Optional[LegacyBinding]: """ Get the git binding for a specific version. Args: command: Command name version: Version identifier Returns: LegacyBinding if found, None otherwise """ return self._bindings.get(command, {}).get(version) def get_commit_for_version(self, command: str, version: str) -> Optional[str]: """ Get the git commit hash for a legacy version. Args: command: Command name version: Version identifier Returns: Commit hash if found, None otherwise """ binding = self.get_version_binding(command, version) return binding.git_state.commit_hash if binding else None def validate_version_files(self, command: str, version: str) -> Dict[str, bool]: """ Validate that files exist for a legacy version. Args: command: Command name version: Version identifier Returns: Dictionary mapping file paths to existence status """ binding = self.get_version_binding(command, version) if not binding or not binding.validation_files: return {} validation_results = {} for file_path in binding.validation_files: full_path = self.repo_path / file_path validation_results[file_path] = full_path.exists() return validation_results def get_changes_since_version(self, command: str, version: str) -> Dict[str, List[str]]: """ Get changes made since a legacy version was bound. Args: command: Command name version: Version identifier Returns: Dictionary with added, modified, and deleted files """ binding = self.get_version_binding(command, version) if not binding: raise GitStateError(f"No binding found for {command} {version}") try: # Get diff between bound commit and current state diff_output = self._run_git_command([ 'diff', '--name-status', binding.git_state.commit_hash, 'HEAD' ]) changes = { 'added': [], 'modified': [], 'deleted': [] } for line in diff_output.strip().split('\n'): if not line: continue status, filename = line.split('\t', 1) if status == 'A': changes['added'].append(filename) elif status == 'M': changes['modified'].append(filename) elif status == 'D': changes['deleted'].append(filename) return changes except subprocess.CalledProcessError as e: raise GitStateError(f"Failed to get changes: {e}") def create_version_snapshot(self, command: str, version: str, output_dir: Path): """ Create a snapshot of files at the time a version was bound. Args: command: Command name version: Version identifier output_dir: Directory to write snapshot files Raises: GitStateError: If git operations fail """ binding = self.get_version_binding(command, version) if not binding: raise GitStateError(f"No binding found for {command} {version}") output_dir.mkdir(parents=True, exist_ok=True) try: # Export files from the bound commit if binding.validation_files: for file_path in binding.validation_files: try: content = self._run_git_command([ 'show', f"{binding.git_state.commit_hash}:{file_path}" ]) output_file = output_dir / file_path output_file.parent.mkdir(parents=True, exist_ok=True) output_file.write_text(content, encoding='utf-8') except subprocess.CalledProcessError: # File might not have existed at that commit pass # Write metadata metadata = { 'command': command, 'version': version, 'git_state': asdict(binding.git_state), 'bound_at': binding.bound_at, 'description': binding.description, 'validation_files': binding.validation_files } metadata_file = output_dir / 'version_metadata.json' metadata_file.write_text(json.dumps(metadata, indent=2), encoding='utf-8') except subprocess.CalledProcessError as e: raise GitStateError(f"Failed to create snapshot: {e}") def _get_commit_state(self, commit_hash: str) -> GitState: """Get git state for a specific commit.""" try: # Get commit information commit_info = self._run_git_command([ 'log', '-1', '--format=%H|%s|%an|%ai', commit_hash ]).strip() hash_val, message, author, date = commit_info.split('|', 3) # Check for tags on this commit try: tag = self._run_git_command([ 'describe', '--exact-match', '--tags', commit_hash ]).strip() except subprocess.CalledProcessError: tag = None return GitState( commit_hash=hash_val, commit_message=message, author=author, date=date, branch="unknown", # Can't determine branch for historical commit tag=tag, is_dirty=False, modified_files=[] ) except subprocess.CalledProcessError as e: raise GitStateError(f"Invalid commit hash {commit_hash}: {e}") def _run_git_command(self, args: List[str]) -> str: """Run a git command and return output.""" cmd = ['git'] + args try: result = subprocess.run( cmd, cwd=self.repo_path, capture_output=True, text=True, check=True ) return result.stdout except subprocess.CalledProcessError as e: raise GitStateError(f"Git command failed: {' '.join(cmd)}\n{e.stderr}") def export_bindings(self) -> Dict[str, Any]: """Export all version bindings for backup/sharing.""" bindings_data = {} for command, versions in self._bindings.items(): bindings_data[command] = {} for version, binding in versions.items(): bindings_data[command][version] = asdict(binding) return { 'version': '1.0', 'exported_at': datetime.now().isoformat(), 'bindings': bindings_data } def import_bindings(self, data: Dict[str, Any]): """Import version bindings from exported data.""" if data.get('version') != '1.0': raise GitStateError("Unsupported bindings format version") for command, versions in data.get('bindings', {}).items(): if command not in self._bindings: self._bindings[command] = {} for version, binding_data in versions.items(): git_state = GitState(**binding_data['git_state']) binding = LegacyBinding( command=binding_data['command'], version=binding_data['version'], git_state=git_state, bound_at=binding_data['bound_at'], description=binding_data.get('description', ''), validation_files=binding_data.get('validation_files', []) ) self._bindings[command][version] = binding