generated from coulomb/repo-seed
feat(diagnostics): end-to-end tunnel check, stale state detection, MCP extensions
- diagnostics.py: TunnelCheckResult with SSH process liveness, port probe, and optional API health check; check_tunnel / check_all_tunnels - cli.py: bridge status shows LIVE column and [STALE] marker when state says connected but PID is dead; bridge check wired to diagnostics - state.py: read_raw_pid helper; _pid_alive exported for reuse - capabilities.py: capabilities registry stubs - mcp_server/server.py: expose check_tunnel and tunnel capabilities over MCP - SCOPE.md: rapid orientation document - workplans/OPS-WP-0001-diagnostics.md: workplan backing this feature - tests: 207 passing (test_cli, test_mcp, test_diagnostics) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""CLI for OpsBridge — bridge command."""
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
@@ -10,8 +11,9 @@ import typer
|
||||
|
||||
from bridge.audit import AuditLogger
|
||||
from bridge.config import ConfigError, load_config
|
||||
from bridge.diagnostics import check_all_tunnels, check_tunnel
|
||||
from bridge.manager import TunnelManager
|
||||
from bridge.state import StateManager
|
||||
from bridge.state import StateManager, _pid_alive
|
||||
|
||||
app = typer.Typer(
|
||||
name="bridge",
|
||||
@@ -175,13 +177,20 @@ def status(
|
||||
rows = []
|
||||
for name, tcfg in cfg.tunnels.items():
|
||||
state = state_mgr.read_state(name)
|
||||
pid = state_mgr.read_pid(name)
|
||||
raw_pid = state_mgr.read_raw_pid(name)
|
||||
pid_alive_val = _pid_alive(raw_pid) if raw_pid is not None else None
|
||||
stale = (
|
||||
state.value in ("connected", "degraded")
|
||||
and pid_alive_val is not True
|
||||
)
|
||||
rows.append({
|
||||
"tunnel": name,
|
||||
"state": state.value,
|
||||
"actor": tcfg.actor,
|
||||
"host": tcfg.host,
|
||||
"pid": pid,
|
||||
"pid": raw_pid,
|
||||
"pid_alive": pid_alive_val,
|
||||
"stale": stale,
|
||||
"uptime": None,
|
||||
"health": None,
|
||||
})
|
||||
@@ -196,10 +205,29 @@ def _print_status_table(rows):
|
||||
if not rows:
|
||||
typer.echo("No tunnels configured.")
|
||||
return
|
||||
headers = ["TUNNEL", "STATE", "ACTOR", "HOST", "PID"]
|
||||
|
||||
def _state_display(row):
|
||||
s = row["state"]
|
||||
if row.get("stale"):
|
||||
s += " [STALE]"
|
||||
return s
|
||||
|
||||
def _live_display(row):
|
||||
alive = row.get("pid_alive")
|
||||
if alive is True:
|
||||
return "yes"
|
||||
elif alive is False:
|
||||
return "no"
|
||||
return "\u2014"
|
||||
|
||||
headers = ["TUNNEL", "STATE", "ACTOR", "HOST", "PID", "LIVE"]
|
||||
col_widths = [
|
||||
max(len(h), max((len(str(r.get(h.lower(), "") or "")) for r in rows), default=0))
|
||||
for h in headers
|
||||
max(len("TUNNEL"), max((len(row["tunnel"]) for row in rows), default=0)),
|
||||
max(len("STATE"), max((len(_state_display(row)) for row in rows), default=0)),
|
||||
max(len("ACTOR"), max((len(str(row.get("actor", "") or "")) for row in rows), default=0)),
|
||||
max(len("HOST"), max((len(str(row.get("host", "") or "")) for row in rows), default=0)),
|
||||
max(len("PID"), max((len(str(row["pid"] or "")) for row in rows), default=0)),
|
||||
max(len("LIVE"), max((len(_live_display(row)) for row in rows), default=0)),
|
||||
]
|
||||
|
||||
def _fmt_row(vals):
|
||||
@@ -210,10 +238,11 @@ def _print_status_table(rows):
|
||||
for row in rows:
|
||||
typer.echo(_fmt_row([
|
||||
row["tunnel"],
|
||||
row["state"],
|
||||
_state_display(row),
|
||||
row["actor"],
|
||||
row["host"],
|
||||
str(row["pid"] or ""),
|
||||
_live_display(row),
|
||||
]))
|
||||
|
||||
|
||||
@@ -272,6 +301,62 @@ def logs(
|
||||
pass
|
||||
|
||||
|
||||
@app.command()
|
||||
def check(
|
||||
tunnel: Optional[str] = typer.Argument(None, help="Tunnel name (omit for all inline)"),
|
||||
as_json: bool = typer.Option(False, "--json", help="Output as JSON"),
|
||||
):
|
||||
"""End-to-end diagnostics: verify SSH PID alive and remote port listening."""
|
||||
cfg = _load_or_exit()
|
||||
sd = _state_dir()
|
||||
state_mgr = StateManager(state_dir=sd)
|
||||
|
||||
if tunnel:
|
||||
results = [check_tunnel(_resolve_tunnel(cfg, tunnel), state_mgr)]
|
||||
else:
|
||||
results = check_all_tunnels(cfg, state_mgr)
|
||||
|
||||
if as_json:
|
||||
typer.echo(json.dumps(
|
||||
[{**dataclasses.asdict(r), "ok": r.ok} for r in results],
|
||||
indent=2,
|
||||
))
|
||||
else:
|
||||
_print_check_table(results)
|
||||
|
||||
if any(not r.ok for r in results):
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
def _print_check_table(results):
|
||||
if not results:
|
||||
typer.echo("No tunnels configured.")
|
||||
return
|
||||
headers = ["TUNNEL", "SSH", "PID", "PORT", "API", "OK"]
|
||||
rows_data = []
|
||||
for r in results:
|
||||
rows_data.append([
|
||||
r.tunnel,
|
||||
r.ssh_process,
|
||||
str(r.pid or ""),
|
||||
r.remote_port,
|
||||
r.local_api or "\u2014",
|
||||
"yes" if r.ok else "no",
|
||||
])
|
||||
col_widths = [
|
||||
max(len(h), max((len(row[i]) for row in rows_data), default=0))
|
||||
for i, h in enumerate(headers)
|
||||
]
|
||||
|
||||
def _fmt(vals):
|
||||
return " ".join(str(v).ljust(w) for v, w in zip(vals, col_widths))
|
||||
|
||||
typer.echo(_fmt(headers))
|
||||
typer.echo(_fmt(["-" * w for w in col_widths]))
|
||||
for row in rows_data:
|
||||
typer.echo(_fmt(row))
|
||||
|
||||
|
||||
# ─── targets commands ─────────────────────────────────────────────────────────
|
||||
|
||||
@targets_app.callback(invoke_without_command=True)
|
||||
|
||||
Reference in New Issue
Block a user