feat: implement OpsBridge CLI (BRIDGE-WP-0001)

Full TDD implementation of the `bridge` CLI tool covering all phases
from BRIDGE-WP-0001: project scaffolding, config loading, state
management, audit logging, health checks, tunnel lifecycle manager, and
all CLI commands (up/down/restart/status/logs). 77 tests, all green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-12 01:40:08 +00:00
parent 2c7c440ea7
commit a7eaf59ced
18 changed files with 1803 additions and 0 deletions

219
tests/test_integration.py Normal file
View File

@@ -0,0 +1,219 @@
"""Integration tests for OpsBridge."""
import json
import os
import textwrap
import time
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from bridge.config import load_config
from bridge.manager import TunnelManager
from bridge.models import BridgeState, ReconnectPolicy, TunnelConfig
from bridge.state import StateManager
MINIMAL_CONFIG = textwrap.dedent("""\
tunnels:
local-test:
host: 127.0.0.1
remote_port: 19000
local_port: 8000
ssh_user: testuser
ssh_key: ~/.ssh/id_rsa
actor: operator.bernd
reconnect:
max_attempts: 2
backoff_initial: 1
backoff_max: 2
actors:
operator.bernd:
class: human
description: Bernd
""")
@pytest.fixture
def config_file(tmp_path):
f = tmp_path / "tunnels.yaml"
f.write_text(MINIMAL_CONFIG)
return f
@pytest.fixture
def state_dir(tmp_path):
return tmp_path / "bridge"
@pytest.fixture
def tunnel_cfg():
return TunnelConfig(
name="local-test",
host="127.0.0.1",
remote_port=19000,
local_port=8000,
ssh_user="testuser",
ssh_key="~/.ssh/id_rsa",
actor="operator.bernd",
reconnect=ReconnectPolicy(max_attempts=2, backoff_initial=1, backoff_max=2),
)
class TestConfigRoundtrip:
def test_load_config_from_file(self, config_file, monkeypatch):
monkeypatch.setenv("BRIDGE_CONFIG", str(config_file))
cfg = load_config()
assert "local-test" in cfg.tunnels
t = cfg.tunnels["local-test"]
assert t.host == "127.0.0.1"
assert t.reconnect.max_attempts == 2
assert t.reconnect.backoff_initial == 1
class TestStateRoundtrip:
def test_state_persists_across_manager_instances(self, state_dir, tunnel_cfg):
mgr1 = TunnelManager(tunnel_cfg, state_dir=state_dir)
mgr1._state.write_state(tunnel_cfg.name, BridgeState.CONNECTED)
mgr2 = TunnelManager(tunnel_cfg, state_dir=state_dir)
assert mgr2.get_state() == BridgeState.CONNECTED
def test_stale_pid_cleanup(self, state_dir, tunnel_cfg):
sm = StateManager(state_dir=state_dir)
sm.write_pid(tunnel_cfg.name, 999999) # guaranteed not alive
sm.write_state(tunnel_cfg.name, BridgeState.CONNECTED)
# is_running should return False for dead pid
mgr = TunnelManager(tunnel_cfg, state_dir=state_dir)
assert not mgr.is_running()
class TestReconnectLoop:
def test_reconnect_loop_gives_up_after_max_attempts(self, state_dir, tunnel_cfg):
"""Manager should set FAILED state after exhausting max_attempts."""
mgr = TunnelManager(tunnel_cfg, state_dir=state_dir)
attempt_count = [0]
def fake_popen(cmd, **kwargs):
proc = MagicMock()
proc.poll.return_value = 1 # immediately "dead"
proc.returncode = 1
attempt_count[0] += 1
return proc
with patch("subprocess.Popen", side_effect=fake_popen), \
patch("time.sleep"): # skip sleeps for speed
mgr._run_loop()
assert attempt_count[0] >= 1
assert mgr.get_state() == BridgeState.FAILED
def test_reconnect_logs_events(self, state_dir, tunnel_cfg):
"""Audit log should contain reconnect events."""
mgr = TunnelManager(tunnel_cfg, state_dir=state_dir)
def fake_popen(cmd, **kwargs):
proc = MagicMock()
proc.poll.return_value = 1
proc.returncode = 1
return proc
with patch("subprocess.Popen", side_effect=fake_popen), \
patch("time.sleep"):
mgr._run_loop()
events = mgr._audit.read_events(tunnel_cfg.name)
event_types = [e["event"] for e in events]
assert "bridge_started" in event_types or "bridge_reconnecting" in event_types or "bridge_disconnected" in event_types
class TestHealthCheckDegradedPath:
def test_degraded_state_on_health_failure(self, state_dir):
"""Health check failure sets state to DEGRADED."""
from bridge.health import HealthChecker, HealthResult
hc_cfg = MagicMock()
hc_cfg.url = "http://127.0.0.1:19001/health"
hc_cfg.interval_seconds = 0
hc_cfg.timeout_seconds = 1
tunnel_cfg = TunnelConfig(
name="hc-test",
host="127.0.0.1",
remote_port=19001,
local_port=8001,
ssh_user="u",
ssh_key="k",
actor="operator.bernd",
reconnect=ReconnectPolicy(max_attempts=1, backoff_initial=1, backoff_max=1),
health_check=hc_cfg,
)
mgr = TunnelManager(tunnel_cfg, state_dir=state_dir)
proc_call_count = [0]
def fake_popen(cmd, **kwargs):
proc = MagicMock()
# First call: "alive" for 1 health check cycle then dies
proc_call_count[0] += 1
if proc_call_count[0] == 1:
# Poll returns None (alive) once then dies
poll_calls = [None, 1]
proc.poll.side_effect = poll_calls + [1] * 100
proc.returncode = 1
else:
proc.poll.return_value = 1
proc.returncode = 1
return proc
failed_result = HealthResult(ok=False, error="connection refused")
recovered_result = HealthResult(ok=True, status_code=200)
import asyncio
async def fake_check_failing():
return failed_result
with patch("subprocess.Popen", side_effect=fake_popen), \
patch("time.sleep"), \
patch("bridge.manager.HealthChecker") as mock_hc_cls:
mock_checker = MagicMock()
mock_checker.check = MagicMock(side_effect=lambda: failed_result)
# Use asyncio.run compatibility
mock_hc_cls.return_value = mock_checker
with patch("asyncio.run", side_effect=lambda coro: failed_result):
mgr._run_loop()
# Should have set degraded at some point — check audit log
events = mgr._audit.read_events("hc-test")
event_types = [e["event"] for e in events]
assert "health_check_failed" in event_types or "bridge_disconnected" in event_types
class TestAuditTrail:
def test_full_lifecycle_logged(self, state_dir, tunnel_cfg):
"""A start + immediate-exit SSH produces at minimum started + disconnected events."""
mgr = TunnelManager(tunnel_cfg, state_dir=state_dir)
def fake_popen(cmd, **kwargs):
proc = MagicMock()
proc.poll.return_value = 1
proc.returncode = 1
return proc
with patch("subprocess.Popen", side_effect=fake_popen), \
patch("time.sleep"):
mgr._run_loop()
events = mgr._audit.read_events(tunnel_cfg.name)
assert len(events) >= 2
# Each event has required fields
for e in events:
assert "timestamp" in e
assert "tunnel" in e
assert "actor" in e
assert "event" in e