Skip to content

Commit b12b7d3

Browse files
authored
fix(linux): resolve eBPF thread names to main process names (#87)
Use periodic procfs PID cache to resolve thread names (e.g. 'Socket Thread') to main process names (e.g. 'firefox'). Falls back to eBPF name for short-lived processes that have already exited.
1 parent c31909b commit b12b7d3

2 files changed

Lines changed: 62 additions & 19 deletions

File tree

src/network/platform/linux/enhanced.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,18 +166,30 @@ mod ebpf_enhanced {
166166
is_tcp,
167167
) {
168168
Some(process_info) => {
169+
// Try to resolve the correct main process name using the PID.
170+
// eBPF captures thread names (e.g., "Socket Thread"), but we want
171+
// the main process name (e.g., "firefox"). The procfs cache maps
172+
// PIDs to main process names from /proc/<pid>/comm.
173+
// For short-lived processes (like curl), the PID won't be in the
174+
// cache (process already exited), so we fall back to the eBPF name.
175+
let resolved_name = self
176+
.procfs_lookup
177+
.get_process_name_by_pid(process_info.pid)
178+
.unwrap_or_else(|| process_info.comm.clone());
179+
169180
debug!(
170-
"eBPF lookup successful for {}:{} -> {}:{} - PID: {}, UID: {}, Comm: {}, Age: {}ns",
181+
"eBPF lookup successful for {}:{} -> {}:{} - PID: {}, UID: {}, eBPF comm: {}, Resolved: {}, Age: {}ns",
171182
conn.local_addr.ip(),
172183
conn.local_addr.port(),
173184
conn.remote_addr.ip(),
174185
conn.remote_addr.port(),
175186
process_info.pid,
176187
process_info.uid,
177188
process_info.comm,
189+
resolved_name,
178190
process_info.timestamp
179191
);
180-
Some((process_info.pid, process_info.comm))
192+
Some((process_info.pid, resolved_name))
181193
}
182194
None => {
183195
debug!(

src/network/platform/linux/process.rs

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,20 @@ use std::collections::HashMap;
77
use std::fs;
88
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
99
use std::sync::RwLock;
10-
use std::time::{Duration, Instant};
10+
use std::time::Instant;
11+
12+
/// Map of socket inode to (PID, process name)
13+
type InodeProcessMap = HashMap<u64, (u32, String)>;
14+
/// Map of PID to process name
15+
type PidNameMap = HashMap<u32, String>;
16+
/// Map of connection key to (PID, process name)
17+
type ConnectionProcessMap = HashMap<ConnectionKey, (u32, String)>;
1118

1219
pub struct LinuxProcessLookup {
1320
// Cache: ConnectionKey -> (pid, process_name)
1421
cache: RwLock<ProcessCache>,
22+
// Cache: PID -> process_name (for resolving eBPF thread names to main process names)
23+
pid_names: RwLock<HashMap<u32, String>>,
1524
}
1625

1726
struct ProcessCache {
@@ -21,20 +30,36 @@ struct ProcessCache {
2130

2231
impl LinuxProcessLookup {
2332
pub fn new() -> Result<Self> {
33+
// Populate the cache immediately so early connections have process names available.
34+
// This ensures the PID→name cache is ready before packet capture starts.
35+
let (process_map, pid_names) = Self::build_process_map()?;
36+
2437
Ok(Self {
2538
cache: RwLock::new(ProcessCache {
26-
lookup: HashMap::new(),
27-
last_refresh: Instant::now() - Duration::from_secs(3600),
39+
lookup: process_map,
40+
last_refresh: Instant::now(),
2841
}),
42+
pid_names: RwLock::new(pid_names),
2943
})
3044
}
3145

32-
/// Build connection -> process mapping
33-
fn build_process_map() -> Result<HashMap<ConnectionKey, (u32, String)>> {
46+
/// Get process name by PID from the cached procfs scan.
47+
/// Returns None if PID not found (process may have exited or not yet scanned).
48+
pub fn get_process_name_by_pid(&self, pid: u32) -> Option<String> {
49+
self.pid_names
50+
.read()
51+
.expect("pid_names lock poisoned")
52+
.get(&pid)
53+
.cloned()
54+
}
55+
56+
/// Build connection -> process mapping and PID -> name mapping
57+
fn build_process_map() -> Result<(ConnectionProcessMap, PidNameMap)>
58+
{
3459
let mut process_map = HashMap::new();
3560

36-
// First, build inode -> process mapping
37-
let inode_to_process = Self::build_inode_map()?;
61+
// First, build inode -> process mapping and PID -> name mapping
62+
let (inode_to_process, pid_names) = Self::build_inode_map()?;
3863

3964
// Then, parse network files to map connections -> inodes -> processes
4065
Self::parse_and_map(
@@ -62,12 +87,13 @@ impl LinuxProcessLookup {
6287
&mut process_map,
6388
)?;
6489

65-
Ok(process_map)
90+
Ok((process_map, pid_names))
6691
}
6792

68-
/// Build inode -> (pid, process_name) mapping
69-
fn build_inode_map() -> Result<HashMap<u64, (u32, String)>> {
93+
/// Build inode -> (pid, process_name) mapping and PID -> process_name mapping
94+
fn build_inode_map() -> Result<(InodeProcessMap, PidNameMap)> {
7095
let mut inode_map = HashMap::new();
96+
let mut pid_names = HashMap::new();
7197

7298
for entry in fs::read_dir("/proc")? {
7399
let entry = entry?;
@@ -87,7 +113,10 @@ impl LinuxProcessLookup {
87113
.trim()
88114
.to_string();
89115

90-
// Check file descriptors
116+
// Store PID -> name mapping for all processes
117+
pid_names.insert(pid, process_name.clone());
118+
119+
// Check file descriptors for socket inodes
91120
let fd_dir = path.join("fd");
92121
if let Ok(fd_entries) = fs::read_dir(&fd_dir) {
93122
for fd_entry in fd_entries.flatten() {
@@ -102,15 +131,15 @@ impl LinuxProcessLookup {
102131
}
103132
}
104133

105-
Ok(inode_map)
134+
Ok((inode_map, pid_names))
106135
}
107136

108137
/// Parse /proc/net file and map connections to processes
109138
fn parse_and_map(
110139
path: &str,
111140
protocol: Protocol,
112-
inode_map: &HashMap<u64, (u32, String)>,
113-
result: &mut HashMap<ConnectionKey, (u32, String)>,
141+
inode_map: &InodeProcessMap,
142+
result: &mut ConnectionProcessMap,
114143
) -> Result<()> {
115144
let content = match fs::read_to_string(path) {
116145
Ok(c) => c,
@@ -201,17 +230,19 @@ impl ProcessLookup for LinuxProcessLookup {
201230
// The enrichment thread (app.rs:495-500) handles periodic refresh every 5 seconds.
202231
// IMPORTANT: Do NOT refresh here as it caused high CPU usage when called for every
203232
// connection without process info (flamegraph showed this was the main bottleneck).
204-
let cache = self.cache.read().unwrap();
233+
let cache = self.cache.read().expect("process cache lock poisoned");
205234
cache.lookup.get(&key).cloned()
206235
}
207236

208237
fn refresh(&self) -> Result<()> {
209-
let process_map = Self::build_process_map()?;
238+
let (process_map, pid_names) = Self::build_process_map()?;
210239

211-
let mut cache = self.cache.write().unwrap();
240+
let mut cache = self.cache.write().expect("process cache lock poisoned");
212241
cache.lookup = process_map;
213242
cache.last_refresh = Instant::now();
214243

244+
*self.pid_names.write().expect("pid_names lock poisoned") = pid_names;
245+
215246
Ok(())
216247
}
217248

0 commit comments

Comments
 (0)