chore: history cleanup
This commit is contained in:
299
tools/run_randomized_tests.py
Normal file
299
tools/run_randomized_tests.py
Normal file
@@ -0,0 +1,299 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MarkiTect Randomized Test Runner
|
||||
|
||||
Executes tests in randomized order to identify hidden dependencies and improve
|
||||
test robustness. This helps ensure tests are truly independent and don't rely
|
||||
on execution order or shared state.
|
||||
|
||||
Usage:
|
||||
python run_randomized_tests.py # Run all tests randomly
|
||||
python run_randomized_tests.py --seed 12345 # Use specific seed for reproducibility
|
||||
python run_randomized_tests.py --repeat 3 # Run multiple randomized iterations
|
||||
python run_randomized_tests.py --shuffle-within-file # Shuffle methods within test files too
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
# ANSI color codes for better output
|
||||
class Colors:
|
||||
HEADER = '\033[95m'
|
||||
OKBLUE = '\033[94m'
|
||||
OKCYAN = '\033[96m'
|
||||
OKGREEN = '\033[92m'
|
||||
WARNING = '\033[93m'
|
||||
FAIL = '\033[91m'
|
||||
ENDC = '\033[0m'
|
||||
BOLD = '\033[1m'
|
||||
|
||||
def print_colored(message: str, color: str = Colors.ENDC) -> None:
|
||||
"""Print message with color."""
|
||||
print(f"{color}{message}{Colors.ENDC}")
|
||||
|
||||
def get_all_test_files() -> List[Path]:
|
||||
"""Get all test files in the tests directory."""
|
||||
test_files = list(Path("tests").glob("test_*.py"))
|
||||
return sorted(test_files) # Sort for consistent baseline
|
||||
|
||||
def run_randomized_tests(
|
||||
test_files: List[Path],
|
||||
seed: Optional[int] = None,
|
||||
verbose: bool = False,
|
||||
shuffle_within_file: bool = False
|
||||
) -> Tuple[bool, float, int, List[str]]:
|
||||
"""
|
||||
Run tests in randomized order.
|
||||
|
||||
Returns:
|
||||
Tuple of (success, execution_time, test_count, failed_tests)
|
||||
"""
|
||||
if seed is None:
|
||||
seed = random.randint(1, 1000000)
|
||||
|
||||
print_colored(f"🎲 Randomizing tests with seed: {seed}", Colors.HEADER)
|
||||
print_colored("=" * 60, Colors.HEADER)
|
||||
|
||||
# Set random seed for reproducibility
|
||||
random.seed(seed)
|
||||
|
||||
# Randomize file order
|
||||
randomized_files = test_files.copy()
|
||||
random.shuffle(randomized_files)
|
||||
|
||||
print_colored(f"📁 Test files in randomized order:", Colors.OKBLUE)
|
||||
for i, test_file in enumerate(randomized_files, 1):
|
||||
print_colored(f" {i:2d}. {test_file.name}", Colors.ENDC)
|
||||
print()
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Build pytest command
|
||||
cmd = [
|
||||
"python", "-m", "pytest",
|
||||
"--tb=short",
|
||||
"--durations=10",
|
||||
"-o", "addopts=" # Override addopts to avoid config conflicts
|
||||
]
|
||||
|
||||
# Add randomization options if pytest-randomly is available
|
||||
try:
|
||||
result_check = subprocess.run(["python", "-m", "pytest", "--help"],
|
||||
capture_output=True, text=True)
|
||||
if "--random-order" in result_check.stdout or "randomly" in result_check.stdout:
|
||||
cmd.extend(["--random-order", f"--random-order-seed={seed}"])
|
||||
print_colored("✅ Using pytest-randomly for within-file randomization", Colors.OKGREEN)
|
||||
elif shuffle_within_file:
|
||||
print_colored("⚠️ pytest-randomly not available - file order only", Colors.WARNING)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Add each test file in randomized order
|
||||
for test_file in randomized_files:
|
||||
cmd.append(str(test_file))
|
||||
|
||||
if verbose:
|
||||
cmd.append("-v")
|
||||
else:
|
||||
cmd.append("-q")
|
||||
|
||||
# Add coverage if available
|
||||
try:
|
||||
result_check = subprocess.run(["python", "-m", "pytest", "--help"],
|
||||
capture_output=True, text=True)
|
||||
if "--cov" in result_check.stdout:
|
||||
cmd.extend(["--cov=.", "--cov-report=term-missing"])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Execute tests with proper environment
|
||||
env = os.environ.copy()
|
||||
env['PYTHONPATH'] = 'src'
|
||||
env['PYTEST_RANDOM_SEED'] = str(seed) # For custom seed handling
|
||||
|
||||
print_colored(f"🧪 Executing {len(randomized_files)} test files...", Colors.HEADER)
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, env=env)
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
# Parse test results
|
||||
test_count = 0
|
||||
failed_tests = []
|
||||
output_lines = result.stdout.split('\n')
|
||||
|
||||
# Look for collected line pattern: "collected X items"
|
||||
for line in output_lines:
|
||||
if 'collected' in line and 'item' in line:
|
||||
try:
|
||||
words = line.split()
|
||||
collected_idx = words.index('collected')
|
||||
if collected_idx + 1 < len(words):
|
||||
test_count = int(words[collected_idx + 1])
|
||||
break
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
# Alternative: count PASSED/FAILED lines
|
||||
if test_count == 0:
|
||||
passed_lines = [line for line in output_lines if ' PASSED ' in line]
|
||||
failed_lines = [line for line in output_lines if ' FAILED ' in line]
|
||||
skipped_lines = [line for line in output_lines if ' SKIPPED ' in line]
|
||||
test_count = len(passed_lines) + len(failed_lines) + len(skipped_lines)
|
||||
|
||||
# Extract failed test names
|
||||
for line in output_lines:
|
||||
if ' FAILED ' in line:
|
||||
# Extract test name from pytest output
|
||||
parts = line.split(' FAILED ')
|
||||
if parts:
|
||||
failed_tests.append(parts[0].strip())
|
||||
|
||||
success = result.returncode == 0
|
||||
|
||||
# Print results
|
||||
if success:
|
||||
print_colored(f"✅ Randomized tests: {test_count} tests PASSED in {execution_time:.2f}s", Colors.OKGREEN)
|
||||
print_colored(f"🎲 Seed: {seed} (use this seed to reproduce exact order)", Colors.OKCYAN)
|
||||
else:
|
||||
print_colored(f"❌ Randomized tests: FAILED in {execution_time:.2f}s", Colors.FAIL)
|
||||
print_colored(f"🎲 Seed: {seed} (use this seed to reproduce failure)", Colors.WARNING)
|
||||
if failed_tests:
|
||||
print_colored(f"💥 Failed tests ({len(failed_tests)}):", Colors.FAIL)
|
||||
for test in failed_tests[:10]: # Show first 10 failures
|
||||
print_colored(f" - {test}", Colors.FAIL)
|
||||
if len(failed_tests) > 10:
|
||||
print_colored(f" ... and {len(failed_tests) - 10} more", Colors.FAIL)
|
||||
|
||||
if verbose or not success:
|
||||
print_colored("📋 Full output:", Colors.WARNING)
|
||||
print(result.stdout)
|
||||
if result.stderr:
|
||||
print_colored("📋 Error output:", Colors.WARNING)
|
||||
print(result.stderr)
|
||||
|
||||
return success, execution_time, test_count, failed_tests
|
||||
|
||||
def run_multiple_iterations(
|
||||
test_files: List[Path],
|
||||
iterations: int,
|
||||
verbose: bool = False,
|
||||
shuffle_within_file: bool = False
|
||||
) -> None:
|
||||
"""Run multiple randomized test iterations to find flaky tests."""
|
||||
print_colored(f"🔄 Running {iterations} randomized test iterations", Colors.BOLD)
|
||||
print_colored("=" * 60, Colors.HEADER)
|
||||
|
||||
all_results = []
|
||||
all_failed_tests = set()
|
||||
iteration_seeds = []
|
||||
|
||||
for i in range(iterations):
|
||||
seed = random.randint(1, 1000000)
|
||||
iteration_seeds.append(seed)
|
||||
|
||||
print_colored(f"\n🎯 Iteration {i + 1}/{iterations}", Colors.HEADER)
|
||||
success, duration, test_count, failed_tests = run_randomized_tests(
|
||||
test_files, seed, verbose, shuffle_within_file
|
||||
)
|
||||
|
||||
all_results.append((success, duration, test_count, failed_tests, seed))
|
||||
all_failed_tests.update(failed_tests)
|
||||
|
||||
if not success:
|
||||
print_colored(f"💥 Iteration {i + 1} failed with seed {seed}", Colors.FAIL)
|
||||
|
||||
# Summary
|
||||
print_colored("\n" + "=" * 60, Colors.HEADER)
|
||||
print_colored("📊 Multi-Iteration Summary", Colors.BOLD)
|
||||
print_colored("=" * 60, Colors.HEADER)
|
||||
|
||||
successful_runs = sum(1 for result in all_results if result[0])
|
||||
failed_runs = iterations - successful_runs
|
||||
|
||||
if failed_runs == 0:
|
||||
print_colored(f"✅ All {iterations} iterations PASSED", Colors.OKGREEN)
|
||||
print_colored("🎉 Tests appear to be robust and order-independent!", Colors.OKGREEN)
|
||||
else:
|
||||
print_colored(f"❌ {failed_runs}/{iterations} iterations FAILED", Colors.FAIL)
|
||||
print_colored("⚠️ Potential test dependencies or flaky tests detected!", Colors.WARNING)
|
||||
|
||||
if all_failed_tests:
|
||||
print_colored(f"\n🔍 Tests that failed in any iteration ({len(all_failed_tests)}):", Colors.WARNING)
|
||||
for test in sorted(all_failed_tests):
|
||||
print_colored(f" - {test}", Colors.FAIL)
|
||||
|
||||
print_colored(f"\n🎲 Seeds that caused failures:", Colors.WARNING)
|
||||
for i, (success, duration, test_count, failed_tests, seed) in enumerate(all_results):
|
||||
if not success:
|
||||
print_colored(f" Iteration {i + 1}: seed {seed} ({len(failed_tests)} failures)", Colors.FAIL)
|
||||
|
||||
# Performance stats
|
||||
total_time = sum(result[1] for result in all_results)
|
||||
avg_time = total_time / iterations
|
||||
min_time = min(result[1] for result in all_results)
|
||||
max_time = max(result[1] for result in all_results)
|
||||
|
||||
print_colored(f"\n⏱️ Performance Summary:", Colors.OKBLUE)
|
||||
print_colored(f" Total time: {total_time:.2f}s", Colors.ENDC)
|
||||
print_colored(f" Average time: {avg_time:.2f}s", Colors.ENDC)
|
||||
print_colored(f" Min time: {min_time:.2f}s", Colors.ENDC)
|
||||
print_colored(f" Max time: {max_time:.2f}s", Colors.ENDC)
|
||||
|
||||
def main():
|
||||
"""Execute randomized test suite."""
|
||||
parser = argparse.ArgumentParser(description="MarkiTect Randomized Test Runner")
|
||||
parser.add_argument("--seed", type=int, help="Random seed for reproducible test order")
|
||||
parser.add_argument("--repeat", type=int, default=1, help="Number of randomized iterations to run")
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
||||
parser.add_argument("--shuffle-within-file", action="store_true",
|
||||
help="Also shuffle test methods within files (requires pytest-randomly)")
|
||||
parser.add_argument("--install-randomly", action="store_true",
|
||||
help="Install pytest-randomly plugin for better randomization")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Install pytest-randomly if requested
|
||||
if args.install_randomly:
|
||||
print_colored("📦 Installing pytest-randomly for enhanced randomization...", Colors.OKBLUE)
|
||||
try:
|
||||
subprocess.run(["pip", "install", "pytest-randomly"], check=True)
|
||||
print_colored("✅ pytest-randomly installed successfully", Colors.OKGREEN)
|
||||
except subprocess.CalledProcessError:
|
||||
print_colored("❌ Failed to install pytest-randomly", Colors.FAIL)
|
||||
return 1
|
||||
|
||||
# Get all test files
|
||||
test_files = get_all_test_files()
|
||||
|
||||
if not test_files:
|
||||
print_colored("❌ No test files found in tests/ directory", Colors.FAIL)
|
||||
return 1
|
||||
|
||||
# Print header
|
||||
print_colored("🎲 MarkiTect Randomized Test Runner", Colors.BOLD)
|
||||
print_colored("Executing tests in random order to identify dependencies...", Colors.OKBLUE)
|
||||
print_colored(f"Found {len(test_files)} test files", Colors.OKBLUE)
|
||||
print()
|
||||
|
||||
if args.repeat > 1:
|
||||
run_multiple_iterations(test_files, args.repeat, args.verbose, args.shuffle_within_file)
|
||||
else:
|
||||
success, execution_time, test_count, failed_tests = run_randomized_tests(
|
||||
test_files, args.seed, args.verbose, args.shuffle_within_file
|
||||
)
|
||||
|
||||
if not success:
|
||||
print_colored(f"\n💡 Reproduction command:", Colors.WARNING)
|
||||
seed = args.seed if args.seed else "unknown"
|
||||
print_colored(f" python run_randomized_tests.py --seed {seed}", Colors.OKCYAN)
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user