Skip to content

Commit f03530f

Browse files
committed
refactor(hardlink): introduce DeviceNumber newtype, reorder ino before dev
- Add DeviceNumber in src/device_number.rs, mirroring InodeNumber - Reorder InodeKey fields to ino-first for faster derived PartialEq (ino has far more entropy than dev, so comparisons short-circuit earlier) - Use DeviceNumber instead of raw u64 in InodeKey, ReflectionEntry, and all public/internal APIs - Add dev to ConversionError::DuplicatedInode for precise error messages - Add ConversionError::duplicated_inode(InodeKey) constructor - Update sort keys to (ino, dev) order throughout https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok
1 parent b678164 commit f03530f

8 files changed

Lines changed: 85 additions & 48 deletions

File tree

src/device_number.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
use derive_more::{Display, From, Into, LowerHex, Octal, UpperHex};
2+
3+
#[cfg(feature = "json")]
4+
use serde::{Deserialize, Serialize};
5+
6+
/// The device number of a filesystem.
7+
#[derive(
8+
Debug, Display, LowerHex, UpperHex, Octal, Clone, Copy, PartialEq, Eq, Hash, From, Into,
9+
)]
10+
#[cfg_attr(feature = "json", derive(Deserialize, Serialize))]
11+
pub struct DeviceNumber(u64);
12+
13+
/// POSIX-exclusive functions.
14+
#[cfg(unix)]
15+
impl DeviceNumber {
16+
/// Get device number of a [`std::fs::Metadata`].
17+
#[inline]
18+
pub fn get(stats: &std::fs::Metadata) -> Self {
19+
use pipe_trait::Pipe;
20+
use std::os::unix::fs::MetadataExt;
21+
stats.dev().pipe(DeviceNumber)
22+
}
23+
}

src/hardlink/aware.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use super::{
44
};
55
use crate::{
66
data_tree::DataTree,
7+
device_number::DeviceNumber,
78
inode::InodeNumber,
89
os_string_display::OsStringDisplay,
910
reporter::{event::HardlinkDetection, Event, Reporter},
@@ -82,7 +83,7 @@ where
8283
}));
8384

8485
let ino = InodeNumber::get(stats);
85-
let dev = stats.dev();
86+
let dev = DeviceNumber::get(stats);
8687
self.record
8788
.add(ino, dev, size, links, path)
8889
.map_err(ReportHardlinksError::AddToRecord)

src/hardlink/hardlink_list.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ pub use summary::Summary;
99
pub use Reflection as HardlinkListReflection;
1010
pub use Summary as SharedLinkSummary;
1111

12-
use crate::{hardlink::LinkPathList, inode::InodeNumber, size};
12+
use crate::{device_number::DeviceNumber, hardlink::LinkPathList, inode::InodeNumber, size};
1313
use dashmap::DashMap;
1414
use derive_more::{Display, Error};
1515
use smart_default::SmartDefault;
@@ -28,10 +28,10 @@ use std::path::Path;
2828
/// for the same reason.
2929
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
3030
struct InodeKey {
31-
/// Device number of the filesystem the inode belongs to.
32-
dev: u64,
3331
/// Inode number within the device.
3432
ino: InodeNumber,
33+
/// Device number of the filesystem the inode belongs to.
34+
dev: DeviceNumber,
3535
}
3636

3737
/// Map value in [`HardlinkList`].
@@ -126,12 +126,12 @@ where
126126
pub(crate) fn add(
127127
&self,
128128
ino: InodeNumber,
129-
dev: u64,
129+
dev: DeviceNumber,
130130
size: Size,
131131
links: u64,
132132
path: &Path,
133133
) -> Result<(), AddError<Size>> {
134-
let key = InodeKey { dev, ino };
134+
let key = InodeKey { ino, dev };
135135
let mut assertions = Ok(());
136136
self.0
137137
.entry(key)

src/hardlink/hardlink_list/iter.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::{HardlinkList, InodeKey, Value};
2-
use crate::{hardlink::LinkPathList, inode::InodeNumber};
2+
use crate::{device_number::DeviceNumber, hardlink::LinkPathList, inode::InodeNumber};
33
use dashmap::{iter::Iter as DashIter, mapref::multiple::RefMulti};
44
use pipe_trait::Pipe;
55

@@ -30,18 +30,18 @@ impl<'a, Size> Iterator for Iter<'a, Size> {
3030
}
3131

3232
impl<'a, Size> Item<'a, Size> {
33-
/// The device number of the filesystem the inode belongs to.
34-
#[inline]
35-
pub fn dev(&self) -> u64 {
36-
self.0.key().dev
37-
}
38-
3933
/// The inode number of the file.
4034
#[inline]
4135
pub fn ino(&self) -> InodeNumber {
4236
self.0.key().ino
4337
}
4438

39+
/// The device number of the filesystem the inode belongs to.
40+
#[inline]
41+
pub fn dev(&self) -> DeviceNumber {
42+
self.0.key().dev
43+
}
44+
4545
/// Size of the file.
4646
#[inline]
4747
pub fn size(&self) -> &Size {

src/hardlink/hardlink_list/reflection.rs

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::{HardlinkList, InodeKey, Value};
2-
use crate::{hardlink::LinkPathListReflection, inode::InodeNumber};
2+
use crate::{device_number::DeviceNumber, hardlink::LinkPathListReflection, inode::InodeNumber};
33
use dashmap::DashMap;
44
use derive_more::{Display, Error, Into, IntoIterator};
55
use into_sorted::IntoSortedUnstable;
@@ -48,10 +48,10 @@ impl<Size> Reflection<Size> {
4848
#[derive(Debug, Clone, PartialEq, Eq)]
4949
#[cfg_attr(feature = "json", derive(Deserialize, Serialize))]
5050
pub struct ReflectionEntry<Size> {
51-
/// Device number of the filesystem the inode belongs to.
52-
pub dev: u64,
5351
/// The inode number of the file.
5452
pub ino: InodeNumber,
53+
/// Device number of the filesystem the inode belongs to.
54+
pub dev: DeviceNumber,
5555
/// Size of the file.
5656
pub size: Size,
5757
/// Total number of links of the file, both listed (in [`Self::paths`]) and unlisted.
@@ -63,11 +63,11 @@ pub struct ReflectionEntry<Size> {
6363
impl<Size> ReflectionEntry<Size> {
6464
/// Create a new entry.
6565
#[inline]
66-
fn new(InodeKey { dev, ino }: InodeKey, Value { size, links, paths }: Value<Size>) -> Self {
66+
fn new(InodeKey { ino, dev }: InodeKey, Value { size, links, paths }: Value<Size>) -> Self {
6767
let paths = paths.into();
6868
ReflectionEntry {
69-
dev,
7069
ino,
70+
dev,
7171
size,
7272
links,
7373
paths,
@@ -78,21 +78,21 @@ impl<Size> ReflectionEntry<Size> {
7878
#[inline]
7979
fn dissolve(self) -> (InodeKey, Value<Size>) {
8080
let ReflectionEntry {
81-
dev,
8281
ino,
82+
dev,
8383
size,
8484
links,
8585
paths,
8686
} = self;
8787
let paths = paths.into();
88-
(InodeKey { dev, ino }, Value { size, links, paths })
88+
(InodeKey { ino, dev }, Value { size, links, paths })
8989
}
9090
}
9191

9292
impl<Size> From<Vec<ReflectionEntry<Size>>> for Reflection<Size> {
9393
/// Sort the list by `(inode, device)`, then create the reflection.
9494
fn from(list: Vec<ReflectionEntry<Size>>) -> Self {
95-
list.into_sorted_unstable_by_key(|entry| (u64::from(entry.ino), entry.dev))
95+
list.into_sorted_unstable_by_key(|entry| (u64::from(entry.ino), u64::from(entry.dev)))
9696
.pipe(Reflection)
9797
}
9898
}
@@ -111,9 +111,20 @@ impl<Size> From<HardlinkList<Size>> for Reflection<Size> {
111111
#[derive(Debug, Display, Error, Clone, Copy, PartialEq, Eq)]
112112
#[non_exhaustive]
113113
pub enum ConversionError {
114-
/// When the source has duplicated inode numbers.
115-
#[display("Inode number {_0} is duplicated")]
116-
DuplicatedInode(#[error(not(source))] InodeNumber),
114+
/// When the source has a duplicated `(inode, device)` pair.
115+
#[display("Inode {ino} on device {dev} is duplicated")]
116+
DuplicatedInode {
117+
#[error(not(source))]
118+
ino: InodeNumber,
119+
#[error(not(source))]
120+
dev: DeviceNumber,
121+
},
122+
}
123+
124+
impl ConversionError {
125+
fn duplicated_inode(InodeKey { ino, dev }: InodeKey) -> Self {
126+
ConversionError::DuplicatedInode { ino, dev }
127+
}
117128
}
118129

119130
impl<Size> TryFrom<Reflection<Size>> for HardlinkList<Size> {
@@ -124,7 +135,7 @@ impl<Size> TryFrom<Reflection<Size>> for HardlinkList<Size> {
124135
for entry in entries {
125136
let (key, value) = entry.dissolve();
126137
if map.insert(key, value).is_some() {
127-
return key.ino.pipe(ConversionError::DuplicatedInode).pipe(Err);
138+
return key.pipe(ConversionError::duplicated_inode).pipe(Err);
128139
}
129140
}
130141

src/hardlink/hardlink_list/test.rs

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,21 @@ use pipe_trait::Pipe;
44
use pretty_assertions::{assert_eq, assert_ne};
55

66
const TABLE: &[(u64, u64, u64, u64, &str)] = &[
7-
// dev, ino, size, links, path
8-
(0, 241, 3652, 1, "a"),
9-
(0, 569, 2210, 1, "b"),
10-
(0, 110, 2350, 3, "c"),
11-
(0, 110, 2350, 3, "c1"),
12-
(0, 778, 1110, 1, "d"),
13-
(0, 274, 6060, 2, "e"),
14-
(0, 274, 6060, 2, "e1"),
15-
(0, 883, 4530, 1, "f"),
7+
// ino, dev, size, links, path
8+
(241, 0, 3652, 1, "a"),
9+
(569, 0, 2210, 1, "b"),
10+
(110, 0, 2350, 3, "c"),
11+
(110, 0, 2350, 3, "c1"),
12+
(778, 0, 1110, 1, "d"),
13+
(274, 0, 6060, 2, "e"),
14+
(274, 0, 6060, 2, "e1"),
15+
(883, 0, 4530, 1, "f"),
1616
];
1717

1818
fn add<const ROW: usize>(list: HardlinkList<Bytes>) -> HardlinkList<Bytes> {
1919
let values = TABLE[ROW];
20-
let (dev, ino, size, links, path) = values;
21-
if let Err(error) = list.add(ino.into(), dev, size.into(), links, path.as_ref()) {
20+
let (ino, dev, size, links, path) = values;
21+
if let Err(error) = list.add(ino.into(), dev.into(), size.into(), links, path.as_ref()) {
2222
panic!("Failed to add {values:?} (index: {ROW}) to the list: {error}");
2323
}
2424
list
@@ -120,10 +120,10 @@ fn insertion_difference_cause_inequality() {
120120
#[test]
121121
fn detect_size_change() {
122122
let list = HardlinkList::<Bytes>::new();
123-
list.add(123.into(), 0, 100.into(), 1, "a".as_ref())
123+
list.add(123.into(), 0.into(), 100.into(), 1, "a".as_ref())
124124
.expect("add the first path");
125125
let actual = list
126-
.add(123.into(), 0, 110.into(), 1, "b".as_ref())
126+
.add(123.into(), 0.into(), 110.into(), 1, "b".as_ref())
127127
.expect_err("add the second path");
128128
let expected = AddError::SizeConflict(SizeConflictError {
129129
ino: 123.into(),
@@ -136,10 +136,10 @@ fn detect_size_change() {
136136
#[test]
137137
fn detect_number_of_links_change() {
138138
let list = HardlinkList::<Bytes>::new();
139-
list.add(123.into(), 0, 100.into(), 1, "a".as_ref())
139+
list.add(123.into(), 0.into(), 100.into(), 1, "a".as_ref())
140140
.expect("add the first path");
141141
let actual = list
142-
.add(123.into(), 0, 100.into(), 2, "b".as_ref())
142+
.add(123.into(), 0.into(), 100.into(), 2, "b".as_ref())
143143
.expect_err("add the second path");
144144
let expected = AddError::NumberOfLinksConflict(NumberOfLinksConflictError {
145145
ino: 123.into(),
@@ -159,29 +159,29 @@ fn same_ino_on_different_devices_are_treated_separately() {
159159
let list = HardlinkList::<Bytes>::new();
160160

161161
// dev=1, ino=100 — first filesystem
162-
list.add(100.into(), 1, 50.into(), 2, "dev1/file_a".as_ref())
162+
list.add(100.into(), 1.into(), 50.into(), 2, "dev1/file_a".as_ref())
163163
.expect("add dev1/file_a");
164-
list.add(100.into(), 1, 50.into(), 2, "dev1/file_b".as_ref())
164+
list.add(100.into(), 1.into(), 50.into(), 2, "dev1/file_b".as_ref())
165165
.expect("add dev1/file_b (same dev+ino → same inode group)");
166166

167167
// dev=2, ino=100 — second filesystem, coincidentally same inode number
168-
list.add(100.into(), 2, 80.into(), 2, "dev2/file_c".as_ref())
168+
list.add(100.into(), 2.into(), 80.into(), 2, "dev2/file_c".as_ref())
169169
.expect("add dev2/file_c (different dev → separate inode group)");
170-
list.add(100.into(), 2, 80.into(), 2, "dev2/file_d".as_ref())
170+
list.add(100.into(), 2.into(), 80.into(), 2, "dev2/file_d".as_ref())
171171
.expect("add dev2/file_d (same dev+ino → same inode group as file_c)");
172172

173173
// Each device should produce its own entry, so the list should have 2 entries.
174-
assert_eq!(list.len(), 2, "expected one entry per (dev, ino) pair");
174+
assert_eq!(list.len(), 2, "expected one entry per (ino, dev) pair");
175175

176176
let reflection = list.into_reflection();
177177
assert_eq!(reflection.len(), 2);
178178

179179
// Sorted by (ino, dev), so dev=1 comes first.
180180
let entries: Vec<_> = reflection.iter().collect();
181-
assert_eq!(entries[0].dev, 1);
181+
assert_eq!(entries[0].dev, 1.into());
182182
assert_eq!(entries[0].ino, 100.into());
183183
assert_eq!(entries[0].paths.len(), 2);
184-
assert_eq!(entries[1].dev, 2);
184+
assert_eq!(entries[1].dev, 2.into());
185185
assert_eq!(entries[1].ino, 100.into());
186186
assert_eq!(entries[1].paths.len(), 2);
187187
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ pub use clap_utilities;
4141
pub mod bytes_format;
4242
pub mod data_tree;
4343
pub mod device;
44+
pub mod device_number;
4445
pub mod fs_tree_builder;
4546
pub mod get_size;
4647
pub mod hardlink;

tests/_utils.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -586,9 +586,10 @@ pub fn read_inode_number(path: &Path) -> u64 {
586586

587587
/// Read [dev](std::os::unix::fs::MetadataExt::dev) of a path.
588588
#[cfg(unix)]
589-
pub fn read_device_number(path: &Path) -> u64 {
589+
pub fn read_device_number(path: &Path) -> parallel_disk_usage::device_number::DeviceNumber {
590590
use std::os::unix::fs::MetadataExt;
591591
path.pipe(symlink_metadata)
592592
.unwrap_or_else(|error| panic!("Can't read metadata at {path:?}: {error}"))
593593
.dev()
594+
.into()
594595
}

0 commit comments

Comments
 (0)