Skip to content

Commit 99cd14e

Browse files
author
Paul C
committed
v22.7.1: Predictive Inbox — multi-cluster fixes + Run buttons + clearer cards
Three fixes/improvements on the v22.7.0 Predictive Inbox surface, all driven by feedback from operators running multi-cluster setups. 1. Multi-cluster filter no longer empties the list The cluster aggregator was indexing nodes by `peer.id` (the locally-assigned cluster key) but each peer's predictive orchestrator stamps proposals with `state.node_id` (the peer's self_id from /etc/wolfstack/node_id). The frontend's `clusterByNode[scope.node_id]` lookup therefore failed for every peer's proposal, defaulted to "WolfStack", and the cluster filter excluded everything when the operator narrowed to anything else. Fix: build_cluster_response now uses `peer.self_id` (with `peer.id` as fallback when self_id hasn't been reported yet) as the canonical NodeAggregateStatus.node_id, matching the same id the proposals already carry. 2. Card clarity — every finding shows runtime + server + resource prominently. New `predictiveRuntimeBadge()` maps each finding_type to a coloured runtime badge (DOCKER 🐳, LXC 📦, VM 💻, CERT 🔒, BACKUP 💾, SSH 🔑, NETWORK 🌐, SERVICE ⚙️, HOST 🖥️). Card header now reads cluster · 🖥️ hostname · `resource_id`, so a Docker restart-loop finding shows up as DOCKER badge + the host's hostname + the container name without having to read the title. 3. Run buttons that drop the operator straight into a terminal - New endpoint: `GET /api/proposals/{id}/command/{idx}` returns the analyzer's recommended command + resolved console target (type+name) + remote node_id when the finding is on a peer. Auth-gated; the command comes from the proposal store on disk, never from URL parameters, so there's no shell-injection surface from a crafted link. - New ▶ Run button per command, alongside the existing 📋 Copy. Clicking opens `console.html` connected to the right place: * `docker:<name>` → docker exec into the container * `lxc:<name>` → lxc-attach * `vm:<name>:...` → VM serial console (qm/virsh/socat) * everything else (host mounts, certs, backups, sshd) → host shell on the node the finding lives on Cross-node findings open via the existing remote-console proxy (`?node_id=...`), so operators on cluster Node A can still run commands surfacing from Node B without first navigating there. - Comment lines (commands starting with `#`) only get Copy, not Run — running a comment is silly. - The console reuses the existing AI-action-id stuffing pattern: fetch the command after WS connects, write it to the PTY's stdin, leaving the operator at the interactive prompt for follow-up. Tests still 157 passing. Code-reviewer reviews from the v22.7.0 deltas remain in effect.
1 parent fc119f5 commit 99cd14e

4 files changed

Lines changed: 228 additions & 24 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "wolfstack"
3-
version = "22.7.0"
3+
version = "22.7.1"
44
edition = "2024"
55
authors = ["Wolf Software Systems Ltd"]
66
description = "Server management platform for the Wolf software suite"

src/api/mod.rs

Lines changed: 111 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17323,23 +17323,32 @@ async fn build_cluster_response(
1732317323
// collect the JoinHandles then await them with `tokio::join!` /
1732417324
// `futures::join_all`-style sequence. Using FuturesUnordered
1732517325
// would be tidier; for a small N this is fine.
17326-
// Keep `(id, hostname, cluster)` alongside each JoinHandle so
17327-
// that even if a task panics we still know which peer it was
17328-
// and which cluster it belongs to. Without these, a panicked
17329-
// task's `JoinError` carries no peer-level attribution and
17330-
// operators get a useless "<panicked>" entry in the warning
17331-
// banner with no cluster context.
17326+
// Each handle carries (node_id, hostname, cluster) so panics
17327+
// still identify the peer. The `node_id` here MUST match what
17328+
// the peer's own predictive orchestrator stamps onto
17329+
// `Proposal.scope.node_id` — otherwise the cluster filter in
17330+
// the Inbox UI maps the proposal to the wrong cluster and the
17331+
// list goes empty when the operator picks a non-default
17332+
// cluster.
17333+
//
17334+
// Each peer's orchestrator uses the local `state.node_id`,
17335+
// which is the value of `/etc/wolfstack/node_id` (the peer's
17336+
// *self_id*). Locally we know that as `peer.self_id` (Option),
17337+
// populated when the peer first reports. Falls back to
17338+
// `peer.id` (the locally-assigned cluster key) for peers that
17339+
// haven't reported a self_id yet.
1733217340
let secret = state.cluster_secret.clone();
1733317341
let mut handles: Vec<(String, String, String, tokio::task::JoinHandle<Result<Vec<crate::predictive::Proposal>, String>>)> =
1733417342
Vec::with_capacity(peers.len());
1733517343
for peer in &peers {
1733617344
let urls = build_node_urls(&peer.address, peer.port, "/api/proposals");
1733717345
let secret = secret.clone();
1733817346
let cluster = resolve_cluster_label(peer);
17347+
let canonical_id = peer.self_id.clone().unwrap_or_else(|| peer.id.clone());
1733917348
let handle = tokio::spawn(async move {
1734017349
fetch_peer_proposals(&urls, &secret).await
1734117350
});
17342-
handles.push((peer.id.clone(), peer.hostname.clone(), cluster, handle));
17351+
handles.push((canonical_id, peer.hostname.clone(), cluster, handle));
1734317352
}
1734417353

1734517354
let mut all_proposals = self_proposals;
@@ -17589,6 +17598,100 @@ pub async fn predictive_proposal_approve(
1758917598
}
1759017599
}
1759117600

17601+
/// GET /api/proposals/{id}/command/{idx} — return one of a
17602+
/// proposal's `Manual` remediation commands plus the resolved
17603+
/// console target (type + name, plus remote `node_id` when the
17604+
/// finding lives on a peer). Used by the Inbox UI's Run button:
17605+
/// the dashboard opens `console.html` with these values, the page
17606+
/// fetches the command via this endpoint, and writes it to the
17607+
/// PTY's stdin so the operator drops into an interactive session
17608+
/// with the suggested command pre-run.
17609+
///
17610+
/// Auth-gated. The command itself comes from the analyzer's
17611+
/// proposal store on disk (never user-supplied), so URL injection
17612+
/// can't smuggle in arbitrary shell — the command index is bounded
17613+
/// by the proposal's recorded list.
17614+
pub async fn predictive_proposal_command(
17615+
req: HttpRequest,
17616+
state: web::Data<AppState>,
17617+
path: web::Path<(String, usize)>,
17618+
) -> HttpResponse {
17619+
if let Err(resp) = require_auth(&req, &state) { return resp; }
17620+
let (id, idx) = path.into_inner();
17621+
17622+
let store = match state.predictive_proposals.read() {
17623+
Ok(g) => g,
17624+
Err(e) => e.into_inner(),
17625+
};
17626+
let Some(p) = store.get(&id) else {
17627+
return HttpResponse::NotFound().json(serde_json::json!({
17628+
"error": "proposal not found",
17629+
}));
17630+
};
17631+
let cmds = match &p.remediation {
17632+
crate::predictive::RemediationPlan::Manual { commands, .. } => commands,
17633+
_ => return HttpResponse::BadRequest().json(serde_json::json!({
17634+
"error": "proposal has no Manual remediation commands",
17635+
})),
17636+
};
17637+
let Some(command) = cmds.get(idx) else {
17638+
return HttpResponse::NotFound().json(serde_json::json!({
17639+
"error": "command index out of range",
17640+
}));
17641+
};
17642+
17643+
// Resolve the console target from scope.resource_id. The format
17644+
// mirrors the resource-id conventions each analyzer uses, so a
17645+
// finding on `docker:postgres` opens a docker exec session into
17646+
// postgres; `lxc:web` → lxc-attach; `vm:opnsense:...` → VM
17647+
// serial console; everything else → host shell on the node the
17648+
// finding lives on.
17649+
let (console_type, console_name) = resolve_console_target(p);
17650+
17651+
// Cross-node: if the finding is on a peer (scope.node_id !=
17652+
// this server's node_id), surface the peer's locally-assigned
17653+
// cluster-key id so console.html can use the existing
17654+
// remote-console proxy. Console.html accepts both peer.id and
17655+
// peer.self_id via `cluster.get_node`'s fallback scan.
17656+
let remote_node_id: Option<String> = if p.scope.node_id != state.node_id {
17657+
Some(p.scope.node_id.clone())
17658+
} else {
17659+
None
17660+
};
17661+
17662+
HttpResponse::Ok().json(serde_json::json!({
17663+
"command": command,
17664+
"console_type": console_type,
17665+
"console_name": console_name,
17666+
"remote_node_id": remote_node_id,
17667+
"title": p.title,
17668+
}))
17669+
}
17670+
17671+
/// Map a proposal's `scope.resource_id` to a `(console_type,
17672+
/// console_name)` pair compatible with the existing `/ws/console/
17673+
/// {type}/{name}` route + `console.html?type=...&name=...` URL.
17674+
/// Conventions match what each analyzer writes into resource_id.
17675+
fn resolve_console_target(p: &crate::predictive::Proposal) -> (String, String) {
17676+
let rid = p.scope.resource_id.as_deref().unwrap_or("");
17677+
if let Some(name) = rid.strip_prefix("docker:") {
17678+
return ("docker".into(), name.to_string());
17679+
}
17680+
if let Some(name) = rid.strip_prefix("lxc:") {
17681+
return ("lxc".into(), name.to_string());
17682+
}
17683+
if let Some(rest) = rid.strip_prefix("vm:") {
17684+
// vm scope is `vm:<name>:<disk_path>` — take the name only.
17685+
let name = rest.split(':').next().unwrap_or(rest);
17686+
return ("vm".into(), name.to_string());
17687+
}
17688+
// host shell for everything else: filesystem mounts (`/var/log`),
17689+
// certs (`letsencrypt:...`, `wolfstack-tls:...`), backup
17690+
// schedules (`backup:<id>`), node-level findings (`host`,
17691+
// `sshd`), and per-unit findings (`my-unit.service`).
17692+
("host".into(), "host".into())
17693+
}
17694+
1759217695
/// POST /api/proposals/run-now — synchronously run one orchestrator
1759317696
/// tick instead of waiting for the next 5-min cadence. Useful right
1759417697
/// after the operator clears a finding to refresh the inbox.
@@ -22053,6 +22156,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
2205322156
.route("/api/proposals/history", web::get().to(predictive_proposals_history))
2205422157
.route("/api/proposals/run-now", web::post().to(predictive_proposals_run_now))
2205522158
.route("/api/proposals/{id}", web::get().to(predictive_proposal_get))
22159+
.route("/api/proposals/{id}/command/{idx}", web::get().to(predictive_proposal_command))
2205622160
.route("/api/proposals/{id}/snooze", web::post().to(predictive_proposal_snooze))
2205722161
.route("/api/proposals/{id}/dismiss", web::post().to(predictive_proposal_dismiss))
2205822162
.route("/api/proposals/{id}/approve", web::post().to(predictive_proposal_approve))

web/console.html

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,32 @@ <h1 id="title">💻 WolfStack Console</h1>
193193
term.writeln('\x1b[31m⚠ Failed to load AI action: ' + e.message + '\x1b[0m\r\n');
194194
});
195195
}
196+
197+
// Same pattern for predictive Inbox "Run" button:
198+
// ?proposal_id=<id>&cmd_idx=<n> fetches the proposal's
199+
// remediation command from the server (auth-gated;
200+
// command comes from the proposal store on disk, not
201+
// from URL params, so there's no injection surface)
202+
// and writes it to the PTY's stdin.
203+
const proposalId = params.get('proposal_id');
204+
const cmdIdx = params.get('cmd_idx');
205+
if (proposalId && cmdIdx && /^[A-Za-z0-9\-]+$/.test(proposalId) && /^\d+$/.test(cmdIdx)) {
206+
fetch('/api/proposals/' + encodeURIComponent(proposalId) + '/command/' + encodeURIComponent(cmdIdx))
207+
.then(r => {
208+
if (!r.ok) throw new Error('Proposal command not found');
209+
return r.json();
210+
})
211+
.then(data => {
212+
if (data.command) {
213+
term.writeln('\x1b[35m🔮 Predictive Inbox: ' + (data.title || 'remediation') + '\x1b[0m');
214+
term.writeln('\x1b[36m$ ' + data.command + '\x1b[0m\r\n');
215+
setTimeout(() => { ws.send(data.command + '\n'); }, 800);
216+
}
217+
})
218+
.catch(e => {
219+
term.writeln('\x1b[31m⚠ Failed to load predictive command: ' + e.message + '\x1b[0m\r\n');
220+
});
221+
}
196222
};
197223

198224
ws.onmessage = (event) => {

0 commit comments

Comments
 (0)