generated from coulomb/repo-seed
Add bridge maintenance cleanup to detect reverse tunnels whose remote port is bound but no longer forwards (zombie sshd sessions), kill the stale listeners on the remote host, and optionally restart the tunnel. Includes install-cron/uninstall-cron/show-cron helpers and README notes for the actcore-state-hub-bridge failure mode we hit on railiance01.
132 lines
4.4 KiB
Python
132 lines
4.4 KiB
Python
"""Tests for stale SSH forward cleanup."""
|
|
from __future__ import annotations
|
|
|
|
import textwrap
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
from typer.testing import CliRunner
|
|
|
|
from bridge.cleanup import (
|
|
CleanupAction,
|
|
CleanupReport,
|
|
build_cron_line,
|
|
cleanup_all_tunnels,
|
|
remote_forward_health_url,
|
|
should_cleanup_tunnel,
|
|
)
|
|
from bridge.cli import app
|
|
from bridge.config import load_config
|
|
from bridge.models import HealthCheckConfig, TunnelConfig
|
|
from bridge.state import StateManager
|
|
|
|
|
|
def _tunnel(**overrides) -> TunnelConfig:
|
|
base = dict(
|
|
name="state-hub-railiance01",
|
|
host="92.205.62.239",
|
|
remote_port=18000,
|
|
local_port=8000,
|
|
ssh_user="tegwick",
|
|
ssh_key="~/.ssh/id_ops",
|
|
actor="agt-claude-railiance01",
|
|
health_check=HealthCheckConfig(
|
|
url="http://127.0.0.1:8000/state/health",
|
|
timeout_seconds=5,
|
|
),
|
|
)
|
|
base.update(overrides)
|
|
return TunnelConfig(**base)
|
|
|
|
|
|
class TestRemoteForwardHealthUrl:
|
|
def test_maps_local_port_to_remote(self):
|
|
cfg = _tunnel()
|
|
assert remote_forward_health_url(cfg) == "http://127.0.0.1:18000/state/health"
|
|
|
|
def test_returns_none_for_local_tunnel(self):
|
|
cfg = _tunnel(direction="local")
|
|
assert remote_forward_health_url(cfg) is None
|
|
|
|
|
|
class TestShouldCleanupTunnel:
|
|
def test_skips_healthy_remote_forward(self, tmp_path):
|
|
cfg = _tunnel()
|
|
state_mgr = StateManager(state_dir=tmp_path)
|
|
with (
|
|
patch("bridge.cleanup.remote_port_listening", return_value=True),
|
|
patch("bridge.cleanup.probe_remote_forward", return_value=(True, "ok")),
|
|
):
|
|
needed, reason = should_cleanup_tunnel(cfg, state_mgr)
|
|
assert needed is False
|
|
|
|
def test_detects_stale_forward_when_local_ok_remote_fails(self, tmp_path):
|
|
cfg = _tunnel()
|
|
state_mgr = StateManager(state_dir=tmp_path)
|
|
with (
|
|
patch("bridge.cleanup.remote_port_listening", return_value=True),
|
|
patch("bridge.cleanup.probe_remote_forward", return_value=(False, "timeout")),
|
|
patch("bridge.cleanup.local_service_healthy", return_value=True),
|
|
patch(
|
|
"bridge.cleanup.check_tunnel",
|
|
return_value=MagicMock(ssh_process="ok", remote_port="listening"),
|
|
),
|
|
):
|
|
needed, reason = should_cleanup_tunnel(cfg, state_mgr)
|
|
assert needed is True
|
|
assert "stale forward" in reason
|
|
|
|
|
|
class TestCleanupAllTunnels:
|
|
def test_reports_cleaned_tunnel(self, tmp_path, monkeypatch):
|
|
monkeypatch.setenv("BRIDGE_CONFIG", str(tmp_path / "tunnels.yaml"))
|
|
(tmp_path / "tunnels.yaml").write_text(
|
|
textwrap.dedent(
|
|
"""\
|
|
tunnels:
|
|
state-hub-railiance01:
|
|
host: 92.205.62.239
|
|
remote_port: 18000
|
|
local_port: 8000
|
|
ssh_user: tegwick
|
|
ssh_key: ~/.ssh/id_ops
|
|
actor: agt-claude-railiance01
|
|
health_check:
|
|
url: http://127.0.0.1:8000/state/health
|
|
actors:
|
|
agt-claude-railiance01:
|
|
class: agt
|
|
"""
|
|
)
|
|
)
|
|
cfg = load_config()
|
|
state_mgr = StateManager(state_dir=tmp_path / "state")
|
|
with patch(
|
|
"bridge.cleanup.cleanup_tunnel",
|
|
return_value=CleanupAction("state-hub-railiance01", "cleaned", "cleared"),
|
|
):
|
|
report = cleanup_all_tunnels(cfg, state_mgr, restart=False)
|
|
assert report.cleaned_count == 1
|
|
assert report.actions[0].action == "cleaned"
|
|
|
|
|
|
class TestMaintenanceCli:
|
|
def test_cleanup_help(self):
|
|
runner = CliRunner()
|
|
result = runner.invoke(app, ["maintenance", "cleanup", "--help"])
|
|
assert result.exit_code == 0
|
|
assert "restart" in result.output.lower()
|
|
|
|
def test_show_cron_prints_template_when_not_installed(self):
|
|
runner = CliRunner()
|
|
with patch("bridge.cli.read_installed_cron", return_value=None):
|
|
result = runner.invoke(app, ["maintenance", "show-cron"])
|
|
assert result.exit_code == 0
|
|
assert "0 3 * * *" in result.output
|
|
|
|
|
|
def test_build_cron_line_contains_marker():
|
|
line = build_cron_line()
|
|
assert "0 3 * * *" in line
|
|
assert "maintenance cleanup --restart" in line
|
|
assert "ops-bridge: maintenance cleanup" in line |