Files
ops-bridge/tests/test_diagnostics.py
tegwick a55c685f89 feat(diagnostics): end-to-end tunnel check, stale state detection, MCP extensions
- diagnostics.py: TunnelCheckResult with SSH process liveness, port
  probe, and optional API health check; check_tunnel / check_all_tunnels
- cli.py: bridge status shows LIVE column and [STALE] marker when state
  says connected but PID is dead; bridge check wired to diagnostics
- state.py: read_raw_pid helper; _pid_alive exported for reuse
- capabilities.py: capabilities registry stubs
- mcp_server/server.py: expose check_tunnel and tunnel capabilities
  over MCP
- SCOPE.md: rapid orientation document
- workplans/OPS-WP-0001-diagnostics.md: workplan backing this feature
- tests: 207 passing (test_cli, test_mcp, test_diagnostics)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 15:07:47 +01:00

178 lines
6.7 KiB
Python

"""Tests for bridge.diagnostics — check_tunnel() logic."""
from __future__ import annotations
import subprocess
from unittest.mock import MagicMock, patch
import pytest
from bridge.diagnostics import TunnelCheckResult, check_all_tunnels, check_tunnel
from bridge.models import BridgeState, TunnelConfig
from bridge.state import StateManager
@pytest.fixture
def tcfg():
return TunnelConfig(
name="test-tunnel",
host="coulombcore.local",
remote_port=18000,
local_port=8000,
ssh_user="ubuntu",
ssh_key="~/.ssh/id_ops",
actor="operator.bernd",
)
@pytest.fixture
def state_mgr(tmp_path):
d = tmp_path / "state"
d.mkdir()
return StateManager(state_dir=d)
class TestCheckTunnel:
def test_no_pid(self, tcfg, state_mgr):
"""No PID file → ssh_process='no_pid', ok=False."""
with patch("bridge.diagnostics.subprocess.run") as mock_run:
mock_run.return_value = MagicMock(stdout="closed\n", stderr="", returncode=1)
result = check_tunnel(tcfg, state_mgr)
assert result.ssh_process == "no_pid"
assert result.pid is None
assert result.stale_state is False
assert result.ok is False
def test_pid_dead(self, tcfg, state_mgr):
"""Dead PID + connected state → ssh_process='dead', stale_state=True."""
state_mgr.write_pid("test-tunnel", 99999)
state_mgr.write_state("test-tunnel", BridgeState.CONNECTED)
with (
patch("bridge.diagnostics._pid_alive", return_value=False),
patch("bridge.diagnostics.subprocess.run") as mock_run,
):
mock_run.return_value = MagicMock(stdout="closed\n", stderr="", returncode=1)
result = check_tunnel(tcfg, state_mgr)
assert result.ssh_process == "dead"
assert result.stale_state is True
assert result.ok is False
def test_pid_alive_port_listening(self, tcfg, state_mgr):
"""Alive PID + SSH reports port listening → remote_port='listening', ok=True."""
state_mgr.write_pid("test-tunnel", 12345)
with (
patch("bridge.diagnostics._pid_alive", return_value=True),
patch("bridge.diagnostics.subprocess.run") as mock_run,
):
mock_run.return_value = MagicMock(stdout="ok\n", stderr="", returncode=0)
result = check_tunnel(tcfg, state_mgr)
assert result.ssh_process == "ok"
assert result.pid == 12345
assert result.remote_port == "listening"
assert result.ok is True
def test_pid_alive_port_closed(self, tcfg, state_mgr):
"""Alive PID + SSH reports port closed → remote_port='closed', ok=False."""
state_mgr.write_pid("test-tunnel", 12345)
with (
patch("bridge.diagnostics._pid_alive", return_value=True),
patch("bridge.diagnostics.subprocess.run") as mock_run,
):
mock_run.return_value = MagicMock(stdout="closed\n", stderr="", returncode=1)
result = check_tunnel(tcfg, state_mgr)
assert result.ssh_process == "ok"
assert result.remote_port == "closed"
assert result.ok is False
def test_ssh_timeout(self, tcfg, state_mgr):
"""SSH probe timeout → remote_port='error:timeout'."""
state_mgr.write_pid("test-tunnel", 12345)
with (
patch("bridge.diagnostics._pid_alive", return_value=True),
patch(
"bridge.diagnostics.subprocess.run",
side_effect=subprocess.TimeoutExpired(cmd=["ssh"], timeout=10),
),
):
result = check_tunnel(tcfg, state_mgr)
assert result.remote_port == "error:timeout"
assert result.ok is False
def test_stale_state_not_flagged_when_stopped(self, tcfg, state_mgr):
"""State=stopped + no PID → stale_state is False (not connected/degraded)."""
with patch("bridge.diagnostics.subprocess.run") as mock_run:
mock_run.return_value = MagicMock(stdout="closed\n", stderr="", returncode=1)
result = check_tunnel(tcfg, state_mgr)
assert result.stale_state is False
def test_local_api_ok(self, tcfg, state_mgr, tmp_path):
"""With health_check configured, ok response sets local_api='ok'."""
from bridge.models import HealthCheckConfig
tcfg_with_health = TunnelConfig(
name="test-tunnel",
host="coulombcore.local",
remote_port=18000,
local_port=8000,
ssh_user="ubuntu",
ssh_key="~/.ssh/id_ops",
actor="operator.bernd",
health_check=HealthCheckConfig(url="http://127.0.0.1:8000/health"),
)
state_mgr.write_pid("test-tunnel", 12345)
mock_resp = MagicMock()
mock_resp.is_success = True
with (
patch("bridge.diagnostics._pid_alive", return_value=True),
patch("bridge.diagnostics.subprocess.run") as mock_run,
patch("bridge.diagnostics.httpx.get", return_value=mock_resp),
):
mock_run.return_value = MagicMock(stdout="ok\n", stderr="", returncode=0)
result = check_tunnel(tcfg_with_health, state_mgr)
assert result.local_api == "ok"
assert result.latency_ms is not None
class TestCheckAllTunnels:
def test_check_all_iterates_tunnels(self, tmp_path):
"""check_all_tunnels returns one result per tunnel in cfg."""
from bridge.config import load_config
import textwrap, os
cfg_file = tmp_path / "tunnels.yaml"
cfg_file.write_text(textwrap.dedent("""\
tunnels:
t1:
host: h1.local
remote_port: 18001
local_port: 8001
ssh_user: ubuntu
ssh_key: ~/.ssh/id_ops
actor: operator.bernd
t2:
host: h2.local
remote_port: 18002
local_port: 8002
ssh_user: ubuntu
ssh_key: ~/.ssh/id_ops
actor: operator.bernd
actors:
operator.bernd:
class: human
description: Bernd
"""))
os.environ["BRIDGE_CONFIG"] = str(cfg_file)
try:
cfg = load_config()
finally:
del os.environ["BRIDGE_CONFIG"]
state_dir = tmp_path / "state"
state_dir.mkdir()
state_mgr = StateManager(state_dir=state_dir)
with patch("bridge.diagnostics.subprocess.run") as mock_run:
mock_run.return_value = MagicMock(stdout="closed\n", stderr="", returncode=1)
results = check_all_tunnels(cfg, state_mgr)
assert len(results) == 2
assert {r.tunnel for r in results} == {"t1", "t2"}