Files
ops-bridge/tests/test_integration.py
tegwick 365c0d611a feat(BRIDGE-WP-0003): MCP server, /bridge-status skill, cross-mode coverage enforcement
Implements the full BRIDGE-WP-0003 workplan: 188 tests passing, 0 lint errors.

## What's added

**Capability registry** (`src/bridge/capabilities.py`):
- 10 capabilities with required_access_modes (cli/mcp/skill)
- Single source of truth for what OpsBridge does and where

**MCP server** (`src/bridge/mcp_server/server.py`):
- 10 FastMCP tools: bridge_up/down/restart/status/logs + 5 catalog_* tools
- 3 resources: bridge://status, catalog://domains, catalog://targets
- `.mcp.json` for project-scope auto-registration
- `scripts/register_mcp.py` for user-scope machine-global registration

**Skill** (`~/.claude/plugins/ops-bridge/bridge-status.md`):
- /bridge-status: health table with emoji indicators + remediation advice

**Cross-mode test coverage enforcement**:
- `tests/conftest.py`: capability/access_mode marks + collect_capability_coverage()
- `tests/test_mcp.py`: 31 FastMCP in-process client tests (Client(mcp) pattern)
- `tests/test_skill.py`: static skill lint against capability registry
- `tests/test_coverage_completeness.py`: meta-test that fails if any required
  (capability × mode) pair lacks a test; also validates CLI commands and MCP
  tools are registered in the capability registry

**ADR** (`architecture/adr-001-cross-mode-capability-registry.md`):
- Documents the registry pattern and FastMCP 3.x testing approach

Key implementation note: FastMCP 3.x in-process results are in
result.content[0].text (JSON string), not result.data directly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 11:33:16 +01:00

214 lines
6.8 KiB
Python

"""Integration tests for OpsBridge."""
import textwrap
from unittest.mock import MagicMock, patch
import pytest
from bridge.config import load_config
from bridge.manager import TunnelManager
from bridge.models import BridgeState, ReconnectPolicy, TunnelConfig
from bridge.state import StateManager
MINIMAL_CONFIG = textwrap.dedent("""\
tunnels:
local-test:
host: 127.0.0.1
remote_port: 19000
local_port: 8000
ssh_user: testuser
ssh_key: ~/.ssh/id_rsa
actor: operator.bernd
reconnect:
max_attempts: 2
backoff_initial: 1
backoff_max: 2
actors:
operator.bernd:
class: human
description: Bernd
""")
@pytest.fixture
def config_file(tmp_path):
f = tmp_path / "tunnels.yaml"
f.write_text(MINIMAL_CONFIG)
return f
@pytest.fixture
def state_dir(tmp_path):
return tmp_path / "bridge"
@pytest.fixture
def tunnel_cfg():
return TunnelConfig(
name="local-test",
host="127.0.0.1",
remote_port=19000,
local_port=8000,
ssh_user="testuser",
ssh_key="~/.ssh/id_rsa",
actor="operator.bernd",
reconnect=ReconnectPolicy(max_attempts=2, backoff_initial=1, backoff_max=2),
)
class TestConfigRoundtrip:
def test_load_config_from_file(self, config_file, monkeypatch):
monkeypatch.setenv("BRIDGE_CONFIG", str(config_file))
cfg = load_config()
assert "local-test" in cfg.tunnels
t = cfg.tunnels["local-test"]
assert t.host == "127.0.0.1"
assert t.reconnect.max_attempts == 2
assert t.reconnect.backoff_initial == 1
class TestStateRoundtrip:
def test_state_persists_across_manager_instances(self, state_dir, tunnel_cfg):
mgr1 = TunnelManager(tunnel_cfg, state_dir=state_dir)
mgr1._state.write_state(tunnel_cfg.name, BridgeState.CONNECTED)
mgr2 = TunnelManager(tunnel_cfg, state_dir=state_dir)
assert mgr2.get_state() == BridgeState.CONNECTED
def test_stale_pid_cleanup(self, state_dir, tunnel_cfg):
sm = StateManager(state_dir=state_dir)
sm.write_pid(tunnel_cfg.name, 999999) # guaranteed not alive
sm.write_state(tunnel_cfg.name, BridgeState.CONNECTED)
# is_running should return False for dead pid
mgr = TunnelManager(tunnel_cfg, state_dir=state_dir)
assert not mgr.is_running()
class TestReconnectLoop:
def test_reconnect_loop_gives_up_after_max_attempts(self, state_dir, tunnel_cfg):
"""Manager should set FAILED state after exhausting max_attempts."""
mgr = TunnelManager(tunnel_cfg, state_dir=state_dir)
attempt_count = [0]
def fake_popen(cmd, **kwargs):
proc = MagicMock()
proc.poll.return_value = 1 # immediately "dead"
proc.returncode = 1
attempt_count[0] += 1
return proc
with patch("subprocess.Popen", side_effect=fake_popen), \
patch("time.sleep"): # skip sleeps for speed
mgr._run_loop()
assert attempt_count[0] >= 1
assert mgr.get_state() == BridgeState.FAILED
def test_reconnect_logs_events(self, state_dir, tunnel_cfg):
"""Audit log should contain reconnect events."""
mgr = TunnelManager(tunnel_cfg, state_dir=state_dir)
def fake_popen(cmd, **kwargs):
proc = MagicMock()
proc.poll.return_value = 1
proc.returncode = 1
return proc
with patch("subprocess.Popen", side_effect=fake_popen), \
patch("time.sleep"):
mgr._run_loop()
events = mgr._audit.read_events(tunnel_cfg.name)
event_types = [e["event"] for e in events]
assert "bridge_started" in event_types or "bridge_reconnecting" in event_types or "bridge_disconnected" in event_types
class TestHealthCheckDegradedPath:
def test_degraded_state_on_health_failure(self, state_dir):
"""Health check failure sets state to DEGRADED."""
from bridge.health import HealthResult
hc_cfg = MagicMock()
hc_cfg.url = "http://127.0.0.1:19001/health"
hc_cfg.interval_seconds = 0
hc_cfg.timeout_seconds = 1
tunnel_cfg = TunnelConfig(
name="hc-test",
host="127.0.0.1",
remote_port=19001,
local_port=8001,
ssh_user="u",
ssh_key="k",
actor="operator.bernd",
reconnect=ReconnectPolicy(max_attempts=1, backoff_initial=1, backoff_max=1),
health_check=hc_cfg,
)
mgr = TunnelManager(tunnel_cfg, state_dir=state_dir)
proc_call_count = [0]
def fake_popen(cmd, **kwargs):
proc = MagicMock()
# First call: "alive" for 1 health check cycle then dies
proc_call_count[0] += 1
if proc_call_count[0] == 1:
# Poll returns None (alive) once then dies
poll_calls = [None, 1]
proc.poll.side_effect = poll_calls + [1] * 100
proc.returncode = 1
else:
proc.poll.return_value = 1
proc.returncode = 1
return proc
failed_result = HealthResult(ok=False, error="connection refused")
async def fake_check_failing():
return failed_result
with patch("subprocess.Popen", side_effect=fake_popen), \
patch("time.sleep"), \
patch("bridge.manager.HealthChecker") as mock_hc_cls:
mock_checker = MagicMock()
mock_checker.check = MagicMock(side_effect=lambda: failed_result)
# Use asyncio.run compatibility
mock_hc_cls.return_value = mock_checker
with patch("asyncio.run", side_effect=lambda coro: failed_result):
mgr._run_loop()
# Should have set degraded at some point — check audit log
events = mgr._audit.read_events("hc-test")
event_types = [e["event"] for e in events]
assert "health_check_failed" in event_types or "bridge_disconnected" in event_types
class TestAuditTrail:
def test_full_lifecycle_logged(self, state_dir, tunnel_cfg):
"""A start + immediate-exit SSH produces at minimum started + disconnected events."""
mgr = TunnelManager(tunnel_cfg, state_dir=state_dir)
def fake_popen(cmd, **kwargs):
proc = MagicMock()
proc.poll.return_value = 1
proc.returncode = 1
return proc
with patch("subprocess.Popen", side_effect=fake_popen), \
patch("time.sleep"):
mgr._run_loop()
events = mgr._audit.read_events(tunnel_cfg.name)
assert len(events) >= 2
# Each event has required fields
for e in events:
assert "timestamp" in e
assert "tunnel" in e
assert "actor" in e
assert "event" in e