Skip to content

Commit 2280fb0

Browse files
authored
Reduce allocations in sorting and snapshot paths (#222)
* Reduce allocations in sorting and snapshot paths
1 parent 14b305b commit 2280fb0

6 files changed

Lines changed: 385 additions & 113 deletions

File tree

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,10 @@ harness = false
191191
name = "snapshot"
192192
harness = false
193193

194+
[[bench]]
195+
name = "rate_tracker"
196+
harness = false
197+
194198
[profile.release]
195199
lto = "thin"
196200
codegen-units = 1

benches/rate_tracker.rs

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
2+
use rustnet_monitor::network::types::*;
3+
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
4+
5+
/// Create a Connection with a RateTracker filled to `n_samples` entries.
6+
fn make_connection_with_samples(n_samples: usize) -> Connection {
7+
let local = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(192, 168, 1, 100)), 54321);
8+
let remote = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(93, 184, 216, 34)), 443);
9+
let mut conn = Connection::new(
10+
Protocol::Tcp,
11+
local,
12+
remote,
13+
ProtocolState::Tcp(TcpState::Established),
14+
);
15+
16+
for i in 0..n_samples {
17+
conn.bytes_sent += 100;
18+
conn.bytes_received += 200;
19+
conn.rate_tracker
20+
.update(conn.bytes_sent, conn.bytes_received);
21+
// Sprinkle in some pruning to keep the tracker realistic
22+
if i % 500 == 0 {
23+
conn.rate_tracker.prune();
24+
}
25+
}
26+
conn
27+
}
28+
29+
/// Benchmark the per-packet `update()` call on RateTracker.
30+
/// This is the hot path — called for every packet received.
31+
/// The Arc<VecDeque> change adds an `Arc::make_mut` atomic check here.
32+
fn bench_rate_update(c: &mut Criterion) {
33+
let mut group = c.benchmark_group("rate_tracker_update");
34+
35+
for n_samples in [0, 100, 1000, 5000] {
36+
// Unique owner: simulates the normal packet-processing path where
37+
// no snapshot clone is holding a shared reference.
38+
group.bench_with_input(
39+
BenchmarkId::new("unique_owner", n_samples),
40+
&n_samples,
41+
|b, &n| {
42+
let mut conn = make_connection_with_samples(n);
43+
let mut bytes_sent = conn.bytes_sent;
44+
let mut bytes_recv = conn.bytes_received;
45+
b.iter(|| {
46+
bytes_sent += 100;
47+
bytes_recv += 200;
48+
conn.rate_tracker.update(bytes_sent, bytes_recv);
49+
});
50+
},
51+
);
52+
53+
// Shared owner: simulates the case right after a snapshot clone,
54+
// where two Arcs point to the same VecDeque. The first `update()`
55+
// after a clone triggers a full VecDeque copy via Arc::make_mut.
56+
group.bench_with_input(
57+
BenchmarkId::new("after_snapshot_clone", n_samples),
58+
&n_samples,
59+
|b, &n| {
60+
b.iter_batched(
61+
|| {
62+
let conn = make_connection_with_samples(n);
63+
let _snapshot = conn.clone(); // create shared Arc
64+
conn
65+
},
66+
|mut conn| {
67+
conn.bytes_sent += 100;
68+
conn.bytes_received += 200;
69+
conn.rate_tracker
70+
.update(conn.bytes_sent, conn.bytes_received);
71+
},
72+
criterion::BatchSize::SmallInput,
73+
);
74+
},
75+
);
76+
}
77+
78+
group.finish();
79+
}
80+
81+
/// Benchmark `refresh_rates()` (prune + rate calculation + smoothing).
82+
/// Called once per second per connection from the refresh loop.
83+
fn bench_refresh_rates(c: &mut Criterion) {
84+
let mut group = c.benchmark_group("refresh_rates");
85+
86+
for n_samples in [0, 100, 1000, 5000] {
87+
group.bench_with_input(
88+
BenchmarkId::new("unique_owner", n_samples),
89+
&n_samples,
90+
|b, &n| {
91+
let mut conn = make_connection_with_samples(n);
92+
b.iter(|| {
93+
conn.refresh_rates();
94+
});
95+
},
96+
);
97+
}
98+
99+
group.finish();
100+
}
101+
102+
/// Benchmark Connection::clone() to measure the impact of Arc<VecDeque>
103+
/// vs a plain VecDeque. With Arc, clone is O(1) for the samples field
104+
/// (just a refcount bump). Without Arc, it's O(n_samples).
105+
fn bench_connection_clone(c: &mut Criterion) {
106+
let mut group = c.benchmark_group("connection_clone");
107+
108+
for n_samples in [0, 100, 1000, 5000, 10000] {
109+
let conn = make_connection_with_samples(n_samples);
110+
group.bench_with_input(BenchmarkId::new("clone", n_samples), &conn, |b, conn| {
111+
b.iter(|| conn.clone());
112+
});
113+
}
114+
115+
group.finish();
116+
}
117+
118+
/// Benchmark the snapshot-then-mutate cycle that happens in practice:
119+
/// 1. Clone N connections for a UI snapshot
120+
/// 2. Then update each original connection with a new packet
121+
///
122+
/// This measures the real-world cost: cheap clone (Arc refcount) followed
123+
/// by a CoW deep-copy on first mutation.
124+
fn bench_snapshot_then_update(c: &mut Criterion) {
125+
let mut group = c.benchmark_group("snapshot_then_update");
126+
127+
for n_conns in [100, 1000, 5000] {
128+
let connections: Vec<Connection> = (0..n_conns)
129+
.map(|_| make_connection_with_samples(100))
130+
.collect();
131+
132+
group.bench_with_input(
133+
BenchmarkId::new("clone_all_then_update_all", n_conns),
134+
&connections,
135+
|b, connections| {
136+
b.iter_batched(
137+
|| connections.clone(),
138+
|mut conns| {
139+
// Step 1: snapshot clone (simulates UI snapshot)
140+
let _snapshot: Vec<Connection> = conns.iter().map(|c| c.clone()).collect();
141+
// Step 2: mutate originals (simulates incoming packets)
142+
for conn in &mut conns {
143+
conn.bytes_sent += 100;
144+
conn.bytes_received += 200;
145+
conn.rate_tracker
146+
.update(conn.bytes_sent, conn.bytes_received);
147+
}
148+
},
149+
criterion::BatchSize::LargeInput,
150+
);
151+
},
152+
);
153+
}
154+
155+
group.finish();
156+
}
157+
158+
criterion_group!(
159+
benches,
160+
bench_rate_update,
161+
bench_refresh_rates,
162+
bench_connection_clone,
163+
bench_snapshot_then_update,
164+
);
165+
criterion_main!(benches);

src/app.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,10 +1338,14 @@ impl App {
13381338
break;
13391339
}
13401340

1341-
// Refresh rates for all connections
1342-
// This ensures rates decay to zero for idle connections
1341+
// Refresh rates for connections that may still have non-zero rates.
1342+
// Skip connections idle >30s whose rates are already zero.
13431343
for mut entry in connections.iter_mut() {
1344-
entry.value_mut().refresh_rates();
1344+
let conn = entry.value_mut();
1345+
let idle_secs = conn.last_activity.elapsed().unwrap_or_default().as_secs();
1346+
if idle_secs <= 30 || conn.has_nonzero_rates() {
1347+
conn.refresh_rates();
1348+
}
13451349
}
13461350

13471351
// Run every 1 second to balance responsiveness with performance

src/main.rs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -466,21 +466,21 @@ fn sort_connections(
466466
a_process.cmp(b_process)
467467
}
468468

469-
SortColumn::LocalAddress => a.local_addr.to_string().cmp(&b.local_addr.to_string()),
470-
471-
SortColumn::RemoteAddress => a.remote_addr.to_string().cmp(&b.remote_addr.to_string()),
469+
SortColumn::LocalAddress => a
470+
.local_addr
471+
.ip()
472+
.cmp(&b.local_addr.ip())
473+
.then_with(|| a.local_addr.port().cmp(&b.local_addr.port())),
474+
475+
SortColumn::RemoteAddress => a
476+
.remote_addr
477+
.ip()
478+
.cmp(&b.remote_addr.ip())
479+
.then_with(|| a.remote_addr.port().cmp(&b.remote_addr.port())),
472480

473481
SortColumn::Application => {
474-
let a_app = a
475-
.dpi_info
476-
.as_ref()
477-
.map(|dpi| dpi.application.to_string())
478-
.unwrap_or_default();
479-
let b_app = b
480-
.dpi_info
481-
.as_ref()
482-
.map(|dpi| dpi.application.to_string())
483-
.unwrap_or_default();
482+
let a_app = a.dpi_info.as_ref().map(|dpi| dpi.application.sort_key());
483+
let b_app = b.dpi_info.as_ref().map(|dpi| dpi.application.sort_key());
484484
a_app.cmp(&b_app)
485485
}
486486

@@ -490,7 +490,7 @@ fn sort_connections(
490490
a_service.cmp(b_service)
491491
}
492492

493-
SortColumn::State => a.state().cmp(&b.state()),
493+
SortColumn::State => Ord::cmp(&a.state(), &b.state()),
494494

495495
SortColumn::Location => {
496496
let a_loc = a
@@ -506,7 +506,7 @@ fn sort_connections(
506506
a_loc.cmp(b_loc)
507507
}
508508

509-
SortColumn::Protocol => a.protocol.to_string().cmp(&b.protocol.to_string()),
509+
SortColumn::Protocol => a.protocol.cmp(&b.protocol),
510510
};
511511

512512
if ascending {

0 commit comments

Comments
 (0)