diff --git a/Cargo.toml b/Cargo.toml index 95794a73..ea38774d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ include = [ "/tests", "/Cargo.toml", "/README.md", + "/USAGE.md", "/LICENSE", ] @@ -42,6 +43,11 @@ name = "pdu-completions" path = "cli/completions.rs" required-features = ["cli-completions"] +[[bin]] +name = "pdu-usage-md" +path = "cli/usage_md.rs" +required-features = ["cli"] + [features] default = ["cli"] json = ["serde/derive", "serde_json"] diff --git a/README.md b/README.md index 3e62d04f..90ba7f0a 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,10 @@ The benchmark was generated by [a GitHub Workflow](https://github.com/KSXGitHub/ * Do not differentiate filesystem: Mounted folders are counted as normal folders. * The runtime is optimized at the expense of binary size. +## Usage + +See [USAGE.md](./USAGE.md) for the full help text. + ## Development ### Prerequisites diff --git a/USAGE.md b/USAGE.md new file mode 100644 index 00000000..98d1f36c --- /dev/null +++ b/USAGE.md @@ -0,0 +1,208 @@ +# Usage + +```sh +pdu [OPTIONS] [FILES]... +``` + +## Arguments + +* `[FILES]...`: List of files and/or directories. + +## Options + + +### `--json-input` + +Read JSON data from stdin. + + +### `--json-output` + +Print JSON data instead of an ASCII chart. + + +### `--bytes-format` + +* _Aliases:_ `-b`. +* _Default:_ `metric`. +* _Choices:_ + - `plain`: Display plain number of bytes without units + - `metric`: Use metric scale, i.e. 1K = 1000B, 1M = 1000K, and so on + - `binary`: Use binary scale, i.e. 1K = 1024B, 1M = 1024K, and so on + +How to display the numbers of bytes. + + +### `--deduplicate-hardlinks` + +* _Aliases:_ `-H`, `--detect-links`, `--dedupe-links`. + +Detect and subtract the sizes of hardlinks from their parent directory totals. + + +### `--top-down` + +Print the tree top-down instead of bottom-up. + + +### `--align-right` + +Set the root of the bars to the right. + + +### `--quantity` + +* _Aliases:_ `-q`. +* _Default:_ `block-size`. +* _Choices:_ + - `apparent-size`: Measure apparent sizes + - `block-size`: Measure block sizes (block-count * 512B) + - `block-count`: Count numbers of blocks + +Aspect of the files/directories to be measured. + + +### `--max-depth` + +* _Aliases:_ `-d`, `--depth`. +* _Default:_ `10`. + +Maximum depth to display the data. Could be either "inf" or a positive integer. + + +### `--total-width` + +* _Aliases:_ `-w`, `--width`. + +Width of the visualization. + + +### `--column-width` + +Maximum widths of the tree column and width of the bar column. + + +### `--min-ratio` + +* _Aliases:_ `-m`. +* _Default:_ `0.01`. + +Minimal size proportion required to appear. + + +### `--no-sort` + +Do not sort the branches in the tree. + + +### `--silent-errors` + +* _Aliases:_ `-s`, `--no-errors`. + +Prevent filesystem error messages from appearing in stderr. + + +### `--progress` + +* _Aliases:_ `-p`. + +Report progress being made at the expense of performance. + + +### `--threads` + +* _Default:_ `auto`. + +Set the maximum number of threads to spawn. Could be either "auto", "max", or a positive integer. + + +### `--omit-json-shared-details` + +Do not output `.shared.details` in the JSON output. + + +### `--omit-json-shared-summary` + +Do not output `.shared.summary` in the JSON output. + + +### `--help` + +* _Aliases:_ `-h`. + +Print help. + + +### `--version` + +* _Aliases:_ `-V`. + +Print version. + +## Examples + +### Show disk usage chart of current working directory + +```sh +pdu +``` + +### Show disk usage chart of a single file or directory + +```sh +pdu path/to/file/or/directory +``` + +### Compare disk usages of multiple files and/or directories + +```sh +pdu file.txt dir/ +``` + +### Show chart in apparent sizes instead of block sizes + +```sh +pdu --quantity=apparent-size +``` + +### Detect and subtract the sizes of hardlinks from their parent nodes + +```sh +pdu --deduplicate-hardlinks +``` + +### Show sizes in plain numbers instead of metric units + +```sh +pdu --bytes-format=plain +``` + +### Show sizes in base 2¹⁰ units (binary) instead of base 10³ units (metric) + +```sh +pdu --bytes-format=binary +``` + +### Show disk usage chart of all entries regardless of size + +```sh +pdu --min-ratio=0 +``` + +### Only show disk usage chart of entries whose size is at least 5% of total + +```sh +pdu --min-ratio=0.05 +``` + +### Show disk usage data as JSON instead of chart + +```sh +pdu --min-ratio=0 --max-depth=inf --json-output | jq +``` + +### Visualize existing JSON representation of disk usage data + +```sh +pdu --json-input < disk-usage.json +``` diff --git a/cli/usage_md.rs b/cli/usage_md.rs new file mode 100644 index 00000000..e1bafbe1 --- /dev/null +++ b/cli/usage_md.rs @@ -0,0 +1,5 @@ +use parallel_disk_usage::usage_md::render_usage_md; + +fn main() { + println!("{}", render_usage_md().trim_end()); +} diff --git a/exports/long.help b/exports/long.help new file mode 100644 index 00000000..efe31299 --- /dev/null +++ b/exports/long.help @@ -0,0 +1,129 @@ +Summarize disk usage of the set of files, recursively for directories. + +Copyright: Apache-2.0 © 2021 Hoàng Văn Khải +Sponsor: https://github.com/sponsors/KSXGitHub + +Usage: pdu [OPTIONS] [FILES]... + +Arguments: + [FILES]... + List of files and/or directories + +Options: + --json-input + Read JSON data from stdin + + --json-output + Print JSON data instead of an ASCII chart + + -b, --bytes-format + How to display the numbers of bytes + + Possible values: + - plain: Display plain number of bytes without units + - metric: Use metric scale, i.e. 1K = 1000B, 1M = 1000K, and so on + - binary: Use binary scale, i.e. 1K = 1024B, 1M = 1024K, and so on + + [default: metric] + + -H, --deduplicate-hardlinks + Detect and subtract the sizes of hardlinks from their parent directory totals + + [aliases: --detect-links, --dedupe-links] + + --top-down + Print the tree top-down instead of bottom-up + + --align-right + Set the root of the bars to the right + + -q, --quantity + Aspect of the files/directories to be measured + + Possible values: + - apparent-size: Measure apparent sizes + - block-size: Measure block sizes (block-count * 512B) + - block-count: Count numbers of blocks + + [default: block-size] + + -d, --max-depth + Maximum depth to display the data. Could be either "inf" or a positive integer + + [default: 10] + [aliases: --depth] + + -w, --total-width + Width of the visualization + + [aliases: --width] + + --column-width + Maximum widths of the tree column and width of the bar column + + -m, --min-ratio + Minimal size proportion required to appear + + [default: 0.01] + + --no-sort + Do not sort the branches in the tree + + -s, --silent-errors + Prevent filesystem error messages from appearing in stderr + + [aliases: --no-errors] + + -p, --progress + Report progress being made at the expense of performance + + --threads + Set the maximum number of threads to spawn. Could be either "auto", "max", or a positive integer + + [default: auto] + + --omit-json-shared-details + Do not output `.shared.details` in the JSON output + + --omit-json-shared-summary + Do not output `.shared.summary` in the JSON output + + -h, --help + Print help (see a summary with '-h') + + -V, --version + Print version + +Examples: + Show disk usage chart of current working directory + $ pdu + + Show disk usage chart of a single file or directory + $ pdu path/to/file/or/directory + + Compare disk usages of multiple files and/or directories + $ pdu file.txt dir/ + + Show chart in apparent sizes instead of block sizes + $ pdu --quantity=apparent-size + + Detect and subtract the sizes of hardlinks from their parent nodes + $ pdu --deduplicate-hardlinks + + Show sizes in plain numbers instead of metric units + $ pdu --bytes-format=plain + + Show sizes in base 2¹⁰ units (binary) instead of base 10³ units (metric) + $ pdu --bytes-format=binary + + Show disk usage chart of all entries regardless of size + $ pdu --min-ratio=0 + + Only show disk usage chart of entries whose size is at least 5% of total + $ pdu --min-ratio=0.05 + + Show disk usage data as JSON instead of chart + $ pdu --min-ratio=0 --max-depth=inf --json-output | jq + + Visualize existing JSON representation of disk usage data + $ pdu --json-input < disk-usage.json diff --git a/exports/short.help b/exports/short.help new file mode 100644 index 00000000..1835edbc --- /dev/null +++ b/exports/short.help @@ -0,0 +1,59 @@ +Summarize disk usage of the set of files, recursively for directories. + +Usage: pdu [OPTIONS] [FILES]... + +Arguments: + [FILES]... List of files and/or directories + +Options: + --json-input + Read JSON data from stdin + --json-output + Print JSON data instead of an ASCII chart + -b, --bytes-format + How to display the numbers of bytes [default: metric] [possible values: plain, metric, binary] + -H, --deduplicate-hardlinks + Detect and subtract the sizes of hardlinks from their parent directory totals [aliases: --detect-links, --dedupe-links] + --top-down + Print the tree top-down instead of bottom-up + --align-right + Set the root of the bars to the right + -q, --quantity + Aspect of the files/directories to be measured [default: block-size] [possible values: apparent-size, block-size, block-count] + -d, --max-depth + Maximum depth to display the data. Could be either "inf" or a positive integer [default: 10] [aliases: --depth] + -w, --total-width + Width of the visualization [aliases: --width] + --column-width + Maximum widths of the tree column and width of the bar column + -m, --min-ratio + Minimal size proportion required to appear [default: 0.01] + --no-sort + Do not sort the branches in the tree + -s, --silent-errors + Prevent filesystem error messages from appearing in stderr [aliases: --no-errors] + -p, --progress + Report progress being made at the expense of performance + --threads + Set the maximum number of threads to spawn. Could be either "auto", "max", or a positive integer [default: auto] + --omit-json-shared-details + Do not output `.shared.details` in the JSON output + --omit-json-shared-summary + Do not output `.shared.summary` in the JSON output + -h, --help + Print help (see more with '--help') + -V, --version + Print version + +Examples: + $ pdu + $ pdu path/to/file/or/directory + $ pdu file.txt dir/ + $ pdu --quantity=apparent-size + $ pdu --deduplicate-hardlinks + $ pdu --bytes-format=plain + $ pdu --bytes-format=binary + $ pdu --min-ratio=0 + $ pdu --min-ratio=0.05 + $ pdu --min-ratio=0 --max-depth=inf --json-output | jq + $ pdu --json-input < disk-usage.json diff --git a/generate-completions.sh b/generate-completions.sh index e1d93592..e031cb1f 100755 --- a/generate-completions.sh +++ b/generate-completions.sh @@ -13,3 +13,7 @@ gen fish completion.fish gen zsh completion.zsh gen powershell completion.ps1 gen elvish completion.elv + +./run.sh pdu --help | sed 's/[[:space:]]*$//' > exports/long.help +./run.sh pdu -h | sed 's/[[:space:]]*$//' > exports/short.help +./run.sh pdu-usage-md > USAGE.md diff --git a/src/lib.rs b/src/lib.rs index f032fa4a..23765add 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,6 +16,8 @@ pub mod app; pub mod args; #[cfg(feature = "cli")] pub mod runtime_error; +#[cfg(feature = "cli")] +pub mod usage_md; /// The main program. #[cfg(feature = "cli")] diff --git a/src/usage_md.rs b/src/usage_md.rs new file mode 100644 index 00000000..33a3fa0b --- /dev/null +++ b/src/usage_md.rs @@ -0,0 +1,234 @@ +use crate::args::Args; +use clap::builder::PossibleValue; +use clap::{Arg, ArgAction, Command, CommandFactory}; +use itertools::Itertools; +use std::borrow::Cow; + +/// Renders a Markdown reference page for `pdu`'s CLI. +pub fn render_usage_md() -> String { + let mut command: Command = Args::command(); + let mut out = String::new(); + + let usage = command.render_usage().to_string(); + if let Some(usage) = usage.strip_prefix("Usage:") { + out.push_str("# Usage\n\n```sh\n"); + out.push_str(usage.trim()); + out.push_str("\n```\n\n"); + } + + let mut arguments_heading_written = false; + for arg in command.get_arguments() { + if !arg.is_positional() || arg.is_hide_set() || arg.is_hide_long_help_set() { + continue; + } + if !arguments_heading_written { + arguments_heading_written = true; + out.push_str("## Arguments\n\n"); + } + render_argument(&mut out, arg); + } + if arguments_heading_written { + out.push('\n'); + } + + let mut options_heading_written = false; + for arg in command.get_arguments() { + if arg.is_positional() || arg.is_hide_set() || arg.is_hide_long_help_set() { + continue; + } + if !options_heading_written { + options_heading_written = true; + out.push_str("## Options\n\n"); + } + render_option(&mut out, arg); + } + + if let Some(after_help) = command.get_after_long_help() { + let text = after_help.to_string(); + let mut lines_iter = text.lines(); + let mut has_examples = false; + for line in lines_iter.by_ref() { + if line.trim() == "Examples:" { + has_examples = true; + break; + } + } + if has_examples { + out.push_str("## Examples\n\n"); + render_examples_section(&mut out, lines_iter); + } + } + + out +} + +fn render_argument(out: &mut String, arg: &Arg) { + let name = arg + .get_value_names() + .and_then(|names| names.first()) + .map(|n| n.as_str()) + .unwrap_or_else(|| arg.get_id().as_str()); + let is_multiple = arg + .get_num_args() + .map(|r| r.max_values() > 1) + .unwrap_or(false); + let display_name = if arg.is_required_set() { + if is_multiple { + format!("<{name}>...") + } else { + format!("<{name}>") + } + } else if is_multiple { + format!("[{name}]...") + } else { + format!("[{name}]") + }; + let desc = get_help_text(arg); + let desc = ensure_ends_with_punctuation(&desc); + out.push_str(&format!("* `{display_name}`: {desc}\n")); +} + +fn render_option(out: &mut String, arg: &Arg) { + let Some(primary_long) = arg.get_long() else { + return; + }; + + write_option_anchors(out, arg, primary_long); + out.push_str(&format!("### `--{primary_long}`\n\n")); + + let aliases = collect_option_display_aliases(arg); + let default_values = collect_option_default_values(arg); + let possible_values = collect_option_possible_values(arg); + + let has_metadata = + !aliases.is_empty() || !default_values.is_empty() || !possible_values.is_empty(); + + if !aliases.is_empty() { + let aliases_str = aliases.iter().map(|alias| format!("`{alias}`")).join(", "); + out.push_str(&format!("* _Aliases:_ {aliases_str}.\n")); + } + if !default_values.is_empty() { + let default_values_str = default_values.join(", "); + out.push_str(&format!("* _Default:_ `{default_values_str}`.\n")); + } + if !possible_values.is_empty() { + out.push_str("* _Choices:_\n"); + for possible_value in &possible_values { + let name = possible_value.get_name(); + if let Some(help) = possible_value.get_help() { + out.push_str(&format!(" - `{name}`: {help}\n")); + } else { + out.push_str(&format!(" - `{name}`\n")); + } + } + } + + if has_metadata { + out.push('\n'); + } + + write_option_description(out, arg); +} + +fn write_option_anchors(out: &mut String, arg: &Arg, primary_long: &str) { + let append_anchor = |out: &mut String, id: &str| { + out.push_str(&format!(r#""#)); + }; + let append_anchor_for_short = |out: &mut String, short: char| { + append_anchor(out, &format!("option-{short}")); + }; + if let Some(short) = arg.get_short() { + append_anchor_for_short(out, short); + } + append_anchor(out, primary_long); + for alias in arg.get_visible_aliases().unwrap_or_default() { + append_anchor(out, alias); + } + for short in arg.get_visible_short_aliases().unwrap_or_default() { + append_anchor_for_short(out, short); + } + out.push('\n'); +} + +fn collect_option_display_aliases(arg: &Arg) -> Vec { + let mut aliases = Vec::::new(); + if let Some(short) = arg.get_short() { + aliases.push(format!("-{short}")); + } + for alias in arg.get_visible_aliases().unwrap_or_default() { + aliases.push(format!("--{alias}")); + } + for alias in arg.get_visible_short_aliases().unwrap_or_default() { + aliases.push(format!("-{alias}")); + } + aliases +} + +fn collect_option_default_values(arg: &Arg) -> Vec> { + if arg.is_hide_default_value_set() { + return Vec::new(); + } + if !arg.is_positional() && matches!(arg.get_action(), ArgAction::SetTrue) { + return Vec::new(); + } + arg.get_default_values() + .iter() + .map(|value| value.to_string_lossy()) + .collect() +} + +fn collect_option_possible_values(arg: &Arg) -> Vec { + if arg.is_hide_possible_values_set() { + return Vec::new(); + } + arg.get_possible_values() + .into_iter() + .filter(|possible_value| !possible_value.is_hide_set()) + .collect() +} + +fn write_option_description(out: &mut String, arg: &Arg) { + let description = get_help_text(arg); + if !description.is_empty() { + let description = ensure_ends_with_punctuation(&description); + out.push_str(&format!("{description}\n\n")); + } else { + out.push('\n'); + } +} + +fn get_help_text(arg: &Arg) -> Cow<'static, str> { + if !arg.is_positional() && arg.get_id() == "help" { + return Cow::Borrowed("Print help"); + } + match (arg.get_help(), arg.get_long_help()) { + (None, None) => Cow::Borrowed(""), + (Some(help), None) | (_, Some(help)) => Cow::Owned(help.to_string()), + } +} + +fn render_examples_section<'a>(out: &mut String, lines: impl Iterator) { + for line in lines { + let line = line.trim(); + + if line.is_empty() { + continue; + } + + if let Some(command) = line.strip_prefix('$') { + let command = command.trim(); + out.push_str(&format!("```sh\n{command}\n```\n\n")); + continue; + } + + out.push_str(&format!("### {line}\n\n")); + } +} + +fn ensure_ends_with_punctuation(line: &str) -> Cow<'_, str> { + if line.is_empty() || line.ends_with('.') || line.ends_with('!') || line.ends_with('?') { + Cow::Borrowed(line) + } else { + Cow::Owned(format!("{line}.")) + } +} diff --git a/tests/sync_help.rs b/tests/sync_help.rs new file mode 100644 index 00000000..38dcde0e --- /dev/null +++ b/tests/sync_help.rs @@ -0,0 +1,40 @@ +//! The following tests check whether the help text files are outdated. +//! +//! If the tests fail, run `./generate-completions.sh` on the root of the repo to update the help files. + +// Since the CLI in Windows look a little different, and I am way too lazy to make two versions +// of help files, the following tests would only run in UNIX-like environment. +#![cfg(unix)] +#![cfg(feature = "cli")] + +use clap::CommandFactory; +use itertools::Itertools; +use parallel_disk_usage::args::Args; + +macro_rules! check { + ($name:ident: $render_help:ident => $path:literal) => { + #[test] + fn $name() { + eprintln!( + "check!({name}: {method} => {path});", + name = stringify!($name), + method = stringify!($render_help), + path = $path, + ); + let received = Args::command() + .$render_help() + .to_string() + .lines() + .map(str::trim_end) + .join("\n"); + let expected = include_str!($path); + assert!( + received.trim_end() == expected.trim_end(), + "help text is outdated, run ./generate-completions.sh to update it", + ); + } + }; +} + +check!(long: render_long_help => "../exports/long.help"); +check!(short: render_help => "../exports/short.help"); diff --git a/tests/sync_usage_md.rs b/tests/sync_usage_md.rs new file mode 100644 index 00000000..5341ccc0 --- /dev/null +++ b/tests/sync_usage_md.rs @@ -0,0 +1,20 @@ +//! The following tests check whether the help text files are outdated. +//! +//! If the tests fail, run `./generate-completions.sh` on the root of the repo to update the help files. + +// Since the CLI in Windows look a little different, and I am way too lazy to make two versions +// of help files, the following tests would only run in UNIX-like environment. +#![cfg(unix)] +#![cfg(feature = "cli")] + +use parallel_disk_usage::usage_md::render_usage_md; + +#[test] +fn usage_md() { + let actual = render_usage_md(); + let expected = include_str!("../USAGE.md"); + assert!( + actual.trim_end() == expected.trim_end(), + "USAGE.md is outdated, run ./generate-completions.sh to update it", + ); +}