Files
markitect-main/tools/run_randomized_tests.py
2025-10-03 03:39:43 +02:00

299 lines
11 KiB
Python

#!/usr/bin/env python3
"""
MarkiTect Randomized Test Runner
Executes tests in randomized order to identify hidden dependencies and improve
test robustness. This helps ensure tests are truly independent and don't rely
on execution order or shared state.
Usage:
python run_randomized_tests.py # Run all tests randomly
python run_randomized_tests.py --seed 12345 # Use specific seed for reproducibility
python run_randomized_tests.py --repeat 3 # Run multiple randomized iterations
python run_randomized_tests.py --shuffle-within-file # Shuffle methods within test files too
"""
import subprocess
import sys
import time
import argparse
import os
import random
from pathlib import Path
from typing import List, Tuple, Optional
# ANSI color codes for better output
class Colors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
def print_colored(message: str, color: str = Colors.ENDC) -> None:
"""Print message with color."""
print(f"{color}{message}{Colors.ENDC}")
def get_all_test_files() -> List[Path]:
"""Get all test files in the tests directory."""
test_files = list(Path("tests").glob("test_*.py"))
return sorted(test_files) # Sort for consistent baseline
def run_randomized_tests(
test_files: List[Path],
seed: Optional[int] = None,
verbose: bool = False,
shuffle_within_file: bool = False
) -> Tuple[bool, float, int, List[str]]:
"""
Run tests in randomized order.
Returns:
Tuple of (success, execution_time, test_count, failed_tests)
"""
if seed is None:
seed = random.randint(1, 1000000)
print_colored(f"🎲 Randomizing tests with seed: {seed}", Colors.HEADER)
print_colored("=" * 60, Colors.HEADER)
# Set random seed for reproducibility
random.seed(seed)
# Randomize file order
randomized_files = test_files.copy()
random.shuffle(randomized_files)
print_colored(f"📁 Test files in randomized order:", Colors.OKBLUE)
for i, test_file in enumerate(randomized_files, 1):
print_colored(f" {i:2d}. {test_file.name}", Colors.ENDC)
print()
start_time = time.time()
# Build pytest command
cmd = [
"python", "-m", "pytest",
"--tb=short",
"--durations=10",
"-o", "addopts=" # Override addopts to avoid config conflicts
]
# Add randomization options if pytest-randomly is available
try:
result_check = subprocess.run(["python", "-m", "pytest", "--help"],
capture_output=True, text=True)
if "--random-order" in result_check.stdout or "randomly" in result_check.stdout:
cmd.extend(["--random-order", f"--random-order-seed={seed}"])
print_colored("✅ Using pytest-randomly for within-file randomization", Colors.OKGREEN)
elif shuffle_within_file:
print_colored("⚠️ pytest-randomly not available - file order only", Colors.WARNING)
except Exception:
pass
# Add each test file in randomized order
for test_file in randomized_files:
cmd.append(str(test_file))
if verbose:
cmd.append("-v")
else:
cmd.append("-q")
# Add coverage if available
try:
result_check = subprocess.run(["python", "-m", "pytest", "--help"],
capture_output=True, text=True)
if "--cov" in result_check.stdout:
cmd.extend(["--cov=.", "--cov-report=term-missing"])
except Exception:
pass
# Execute tests with proper environment
env = os.environ.copy()
env['PYTHONPATH'] = 'src'
env['PYTEST_RANDOM_SEED'] = str(seed) # For custom seed handling
print_colored(f"🧪 Executing {len(randomized_files)} test files...", Colors.HEADER)
result = subprocess.run(cmd, capture_output=True, text=True, env=env)
execution_time = time.time() - start_time
# Parse test results
test_count = 0
failed_tests = []
output_lines = result.stdout.split('\n')
# Look for collected line pattern: "collected X items"
for line in output_lines:
if 'collected' in line and 'item' in line:
try:
words = line.split()
collected_idx = words.index('collected')
if collected_idx + 1 < len(words):
test_count = int(words[collected_idx + 1])
break
except (ValueError, IndexError):
pass
# Alternative: count PASSED/FAILED lines
if test_count == 0:
passed_lines = [line for line in output_lines if ' PASSED ' in line]
failed_lines = [line for line in output_lines if ' FAILED ' in line]
skipped_lines = [line for line in output_lines if ' SKIPPED ' in line]
test_count = len(passed_lines) + len(failed_lines) + len(skipped_lines)
# Extract failed test names
for line in output_lines:
if ' FAILED ' in line:
# Extract test name from pytest output
parts = line.split(' FAILED ')
if parts:
failed_tests.append(parts[0].strip())
success = result.returncode == 0
# Print results
if success:
print_colored(f"✅ Randomized tests: {test_count} tests PASSED in {execution_time:.2f}s", Colors.OKGREEN)
print_colored(f"🎲 Seed: {seed} (use this seed to reproduce exact order)", Colors.OKCYAN)
else:
print_colored(f"❌ Randomized tests: FAILED in {execution_time:.2f}s", Colors.FAIL)
print_colored(f"🎲 Seed: {seed} (use this seed to reproduce failure)", Colors.WARNING)
if failed_tests:
print_colored(f"💥 Failed tests ({len(failed_tests)}):", Colors.FAIL)
for test in failed_tests[:10]: # Show first 10 failures
print_colored(f" - {test}", Colors.FAIL)
if len(failed_tests) > 10:
print_colored(f" ... and {len(failed_tests) - 10} more", Colors.FAIL)
if verbose or not success:
print_colored("📋 Full output:", Colors.WARNING)
print(result.stdout)
if result.stderr:
print_colored("📋 Error output:", Colors.WARNING)
print(result.stderr)
return success, execution_time, test_count, failed_tests
def run_multiple_iterations(
test_files: List[Path],
iterations: int,
verbose: bool = False,
shuffle_within_file: bool = False
) -> None:
"""Run multiple randomized test iterations to find flaky tests."""
print_colored(f"🔄 Running {iterations} randomized test iterations", Colors.BOLD)
print_colored("=" * 60, Colors.HEADER)
all_results = []
all_failed_tests = set()
iteration_seeds = []
for i in range(iterations):
seed = random.randint(1, 1000000)
iteration_seeds.append(seed)
print_colored(f"\n🎯 Iteration {i + 1}/{iterations}", Colors.HEADER)
success, duration, test_count, failed_tests = run_randomized_tests(
test_files, seed, verbose, shuffle_within_file
)
all_results.append((success, duration, test_count, failed_tests, seed))
all_failed_tests.update(failed_tests)
if not success:
print_colored(f"💥 Iteration {i + 1} failed with seed {seed}", Colors.FAIL)
# Summary
print_colored("\n" + "=" * 60, Colors.HEADER)
print_colored("📊 Multi-Iteration Summary", Colors.BOLD)
print_colored("=" * 60, Colors.HEADER)
successful_runs = sum(1 for result in all_results if result[0])
failed_runs = iterations - successful_runs
if failed_runs == 0:
print_colored(f"✅ All {iterations} iterations PASSED", Colors.OKGREEN)
print_colored("🎉 Tests appear to be robust and order-independent!", Colors.OKGREEN)
else:
print_colored(f"{failed_runs}/{iterations} iterations FAILED", Colors.FAIL)
print_colored("⚠️ Potential test dependencies or flaky tests detected!", Colors.WARNING)
if all_failed_tests:
print_colored(f"\n🔍 Tests that failed in any iteration ({len(all_failed_tests)}):", Colors.WARNING)
for test in sorted(all_failed_tests):
print_colored(f" - {test}", Colors.FAIL)
print_colored(f"\n🎲 Seeds that caused failures:", Colors.WARNING)
for i, (success, duration, test_count, failed_tests, seed) in enumerate(all_results):
if not success:
print_colored(f" Iteration {i + 1}: seed {seed} ({len(failed_tests)} failures)", Colors.FAIL)
# Performance stats
total_time = sum(result[1] for result in all_results)
avg_time = total_time / iterations
min_time = min(result[1] for result in all_results)
max_time = max(result[1] for result in all_results)
print_colored(f"\n⏱️ Performance Summary:", Colors.OKBLUE)
print_colored(f" Total time: {total_time:.2f}s", Colors.ENDC)
print_colored(f" Average time: {avg_time:.2f}s", Colors.ENDC)
print_colored(f" Min time: {min_time:.2f}s", Colors.ENDC)
print_colored(f" Max time: {max_time:.2f}s", Colors.ENDC)
def main():
"""Execute randomized test suite."""
parser = argparse.ArgumentParser(description="MarkiTect Randomized Test Runner")
parser.add_argument("--seed", type=int, help="Random seed for reproducible test order")
parser.add_argument("--repeat", type=int, default=1, help="Number of randomized iterations to run")
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
parser.add_argument("--shuffle-within-file", action="store_true",
help="Also shuffle test methods within files (requires pytest-randomly)")
parser.add_argument("--install-randomly", action="store_true",
help="Install pytest-randomly plugin for better randomization")
args = parser.parse_args()
# Install pytest-randomly if requested
if args.install_randomly:
print_colored("📦 Installing pytest-randomly for enhanced randomization...", Colors.OKBLUE)
try:
subprocess.run(["pip", "install", "pytest-randomly"], check=True)
print_colored("✅ pytest-randomly installed successfully", Colors.OKGREEN)
except subprocess.CalledProcessError:
print_colored("❌ Failed to install pytest-randomly", Colors.FAIL)
return 1
# Get all test files
test_files = get_all_test_files()
if not test_files:
print_colored("❌ No test files found in tests/ directory", Colors.FAIL)
return 1
# Print header
print_colored("🎲 MarkiTect Randomized Test Runner", Colors.BOLD)
print_colored("Executing tests in random order to identify dependencies...", Colors.OKBLUE)
print_colored(f"Found {len(test_files)} test files", Colors.OKBLUE)
print()
if args.repeat > 1:
run_multiple_iterations(test_files, args.repeat, args.verbose, args.shuffle_within_file)
else:
success, execution_time, test_count, failed_tests = run_randomized_tests(
test_files, args.seed, args.verbose, args.shuffle_within_file
)
if not success:
print_colored(f"\n💡 Reproduction command:", Colors.WARNING)
seed = args.seed if args.seed else "unknown"
print_colored(f" python run_randomized_tests.py --seed {seed}", Colors.OKCYAN)
return 1
return 0
if __name__ == "__main__":
sys.exit(main())