markitect-main/tools/run_randomized_tests.py

#!/usr/bin/env python3
"""
MarkiTect Randomized Test Runner

Executes tests in randomized order to identify hidden dependencies and improve
test robustness. This helps ensure tests are truly independent and don't rely
on execution order or shared state.

Usage:
    python run_randomized_tests.py                    # Run all tests randomly
    python run_randomized_tests.py --seed 12345       # Use specific seed for reproducibility
    python run_randomized_tests.py --repeat 3         # Run multiple randomized iterations
    python run_randomized_tests.py --shuffle-within-file  # Shuffle methods within test files too
"""

import subprocess
import sys
import time
import argparse
import os
import random
from pathlib import Path
from typing import List, Tuple, Optional

# ANSI color codes for better output
class Colors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'

def print_colored(message: str, color: str = Colors.ENDC) -> None:
    """Print message with color."""
    print(f"{color}{message}{Colors.ENDC}")

def get_all_test_files() -> List[Path]:
    """Get all test files in the tests directory."""
    test_files = list(Path("tests").glob("test_*.py"))
    return sorted(test_files)  # Sort for consistent baseline

def run_randomized_tests(
    test_files: List[Path],
    seed: Optional[int] = None,
    verbose: bool = False,
    shuffle_within_file: bool = False
) -> Tuple[bool, float, int, List[str]]:
    """
    Run tests in randomized order.

    Returns:
        Tuple of (success, execution_time, test_count, failed_tests)
    """
    if seed is None:
        seed = random.randint(1, 1000000)

    print_colored(f"🎲 Randomizing tests with seed: {seed}", Colors.HEADER)
    print_colored("=" * 60, Colors.HEADER)

    # Set random seed for reproducibility
    random.seed(seed)

    # Randomize file order
    randomized_files = test_files.copy()
    random.shuffle(randomized_files)

    print_colored(f"📁 Test files in randomized order:", Colors.OKBLUE)
    for i, test_file in enumerate(randomized_files, 1):
        print_colored(f"   {i:2d}. {test_file.name}", Colors.ENDC)
    print()

    start_time = time.time()

    # Build pytest command
    cmd = [
        "python", "-m", "pytest",
        "--tb=short",
        "--durations=10",
        "-o", "addopts="  # Override addopts to avoid config conflicts
    ]

    # Add randomization options if pytest-randomly is available
    try:
        result_check = subprocess.run(["python", "-m", "pytest", "--help"],
                                     capture_output=True, text=True)
        if "--random-order" in result_check.stdout or "randomly" in result_check.stdout:
            cmd.extend(["--random-order", f"--random-order-seed={seed}"])
            print_colored("✅ Using pytest-randomly for within-file randomization", Colors.OKGREEN)
        elif shuffle_within_file:
            print_colored("⚠️  pytest-randomly not available - file order only", Colors.WARNING)
    except Exception:
        pass

    # Add each test file in randomized order
    for test_file in randomized_files:
        cmd.append(str(test_file))

    if verbose:
        cmd.append("-v")
    else:
        cmd.append("-q")

    # Add coverage if available
    try:
        result_check = subprocess.run(["python", "-m", "pytest", "--help"],
                                     capture_output=True, text=True)
        if "--cov" in result_check.stdout:
            cmd.extend(["--cov=.", "--cov-report=term-missing"])
    except Exception:
        pass

    # Execute tests with proper environment
    env = os.environ.copy()
    env['PYTHONPATH'] = 'src'
    env['PYTEST_RANDOM_SEED'] = str(seed)  # For custom seed handling

    print_colored(f"🧪 Executing {len(randomized_files)} test files...", Colors.HEADER)
    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
    execution_time = time.time() - start_time

    # Parse test results
    test_count = 0
    failed_tests = []
    output_lines = result.stdout.split('\n')

    # Look for collected line pattern: "collected X items"
    for line in output_lines:
        if 'collected' in line and 'item' in line:
            try:
                words = line.split()
                collected_idx = words.index('collected')
                if collected_idx + 1 < len(words):
                    test_count = int(words[collected_idx + 1])
                    break
            except (ValueError, IndexError):
                pass

    # Alternative: count PASSED/FAILED lines
    if test_count == 0:
        passed_lines = [line for line in output_lines if ' PASSED ' in line]
        failed_lines = [line for line in output_lines if ' FAILED ' in line]
        skipped_lines = [line for line in output_lines if ' SKIPPED ' in line]
        test_count = len(passed_lines) + len(failed_lines) + len(skipped_lines)

    # Extract failed test names
    for line in output_lines:
        if ' FAILED ' in line:
            # Extract test name from pytest output
            parts = line.split(' FAILED ')
            if parts:
                failed_tests.append(parts[0].strip())

    success = result.returncode == 0

    # Print results
    if success:
        print_colored(f"✅ Randomized tests: {test_count} tests PASSED in {execution_time:.2f}s", Colors.OKGREEN)
        print_colored(f"🎲 Seed: {seed} (use this seed to reproduce exact order)", Colors.OKCYAN)
    else:
        print_colored(f"❌ Randomized tests: FAILED in {execution_time:.2f}s", Colors.FAIL)
        print_colored(f"🎲 Seed: {seed} (use this seed to reproduce failure)", Colors.WARNING)
        if failed_tests:
            print_colored(f"💥 Failed tests ({len(failed_tests)}):", Colors.FAIL)
            for test in failed_tests[:10]:  # Show first 10 failures
                print_colored(f"   - {test}", Colors.FAIL)
            if len(failed_tests) > 10:
                print_colored(f"   ... and {len(failed_tests) - 10} more", Colors.FAIL)

        if verbose or not success:
            print_colored("📋 Full output:", Colors.WARNING)
            print(result.stdout)
            if result.stderr:
                print_colored("📋 Error output:", Colors.WARNING)
                print(result.stderr)

    return success, execution_time, test_count, failed_tests

def run_multiple_iterations(
    test_files: List[Path],
    iterations: int,
    verbose: bool = False,
    shuffle_within_file: bool = False
) -> None:
    """Run multiple randomized test iterations to find flaky tests."""
    print_colored(f"🔄 Running {iterations} randomized test iterations", Colors.BOLD)
    print_colored("=" * 60, Colors.HEADER)

    all_results = []
    all_failed_tests = set()
    iteration_seeds = []

    for i in range(iterations):
        seed = random.randint(1, 1000000)
        iteration_seeds.append(seed)

        print_colored(f"\n🎯 Iteration {i + 1}/{iterations}", Colors.HEADER)
        success, duration, test_count, failed_tests = run_randomized_tests(
            test_files, seed, verbose, shuffle_within_file
        )

        all_results.append((success, duration, test_count, failed_tests, seed))
        all_failed_tests.update(failed_tests)

        if not success:
            print_colored(f"💥 Iteration {i + 1} failed with seed {seed}", Colors.FAIL)

    # Summary
    print_colored("\n" + "=" * 60, Colors.HEADER)
    print_colored("📊 Multi-Iteration Summary", Colors.BOLD)
    print_colored("=" * 60, Colors.HEADER)

    successful_runs = sum(1 for result in all_results if result[0])
    failed_runs = iterations - successful_runs

    if failed_runs == 0:
        print_colored(f"✅ All {iterations} iterations PASSED", Colors.OKGREEN)
        print_colored("🎉 Tests appear to be robust and order-independent!", Colors.OKGREEN)
    else:
        print_colored(f"❌ {failed_runs}/{iterations} iterations FAILED", Colors.FAIL)
        print_colored("⚠️  Potential test dependencies or flaky tests detected!", Colors.WARNING)

        if all_failed_tests:
            print_colored(f"\n🔍 Tests that failed in any iteration ({len(all_failed_tests)}):", Colors.WARNING)
            for test in sorted(all_failed_tests):
                print_colored(f"   - {test}", Colors.FAIL)

        print_colored(f"\n🎲 Seeds that caused failures:", Colors.WARNING)
        for i, (success, duration, test_count, failed_tests, seed) in enumerate(all_results):
            if not success:
                print_colored(f"   Iteration {i + 1}: seed {seed} ({len(failed_tests)} failures)", Colors.FAIL)

    # Performance stats
    total_time = sum(result[1] for result in all_results)
    avg_time = total_time / iterations
    min_time = min(result[1] for result in all_results)
    max_time = max(result[1] for result in all_results)

    print_colored(f"\n⏱️  Performance Summary:", Colors.OKBLUE)
    print_colored(f"   Total time: {total_time:.2f}s", Colors.ENDC)
    print_colored(f"   Average time: {avg_time:.2f}s", Colors.ENDC)
    print_colored(f"   Min time: {min_time:.2f}s", Colors.ENDC)
    print_colored(f"   Max time: {max_time:.2f}s", Colors.ENDC)

def main():
    """Execute randomized test suite."""
    parser = argparse.ArgumentParser(description="MarkiTect Randomized Test Runner")
    parser.add_argument("--seed", type=int, help="Random seed for reproducible test order")
    parser.add_argument("--repeat", type=int, default=1, help="Number of randomized iterations to run")
    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
    parser.add_argument("--shuffle-within-file", action="store_true",
                       help="Also shuffle test methods within files (requires pytest-randomly)")
    parser.add_argument("--install-randomly", action="store_true",
                       help="Install pytest-randomly plugin for better randomization")

    args = parser.parse_args()

    # Install pytest-randomly if requested
    if args.install_randomly:
        print_colored("📦 Installing pytest-randomly for enhanced randomization...", Colors.OKBLUE)
        try:
            subprocess.run(["pip", "install", "pytest-randomly"], check=True)
            print_colored("✅ pytest-randomly installed successfully", Colors.OKGREEN)
        except subprocess.CalledProcessError:
            print_colored("❌ Failed to install pytest-randomly", Colors.FAIL)
            return 1

    # Get all test files
    test_files = get_all_test_files()

    if not test_files:
        print_colored("❌ No test files found in tests/ directory", Colors.FAIL)
        return 1

    # Print header
    print_colored("🎲 MarkiTect Randomized Test Runner", Colors.BOLD)
    print_colored("Executing tests in random order to identify dependencies...", Colors.OKBLUE)
    print_colored(f"Found {len(test_files)} test files", Colors.OKBLUE)
    print()

    if args.repeat > 1:
        run_multiple_iterations(test_files, args.repeat, args.verbose, args.shuffle_within_file)
    else:
        success, execution_time, test_count, failed_tests = run_randomized_tests(
            test_files, args.seed, args.verbose, args.shuffle_within_file
        )

        if not success:
            print_colored(f"\n💡 Reproduction command:", Colors.WARNING)
            seed = args.seed if args.seed else "unknown"
            print_colored(f"   python run_randomized_tests.py --seed {seed}", Colors.OKCYAN)
            return 1

    return 0

if __name__ == "__main__":
    sys.exit(main())