Skip to content

Commit d6b16bc

Browse files
mrethomas-zahner
andauthored
Implement --files-from Option (#1836)
* Implement `--files-from` Option This allows providing a list of inputs from a file or `stdin`. ## How it works 1. Create a `inputs.txt` file with all the inputs (files) you'd like to check for broken links. 2. Pass that to `lychee` with ```sh lychee --files-from inputs.txt ``` lychee will then use the files from `inputs.txt` as inputs. You can still provide custom inputs as well with ```sh lychee --files-from inputs.txt another_input.md ``` The inputs file can contain comments, such as ```text # Ignore this line some_input.txt # Ignore this line as well ``` Closes: #662 * Apply suggestion from @thomas-zahner Co-authored-by: Thomas Zahner <thomas.zahner@protonmail.ch> --------- Co-authored-by: Thomas Zahner <thomas.zahner@protonmail.ch>
1 parent da89fd6 commit d6b16bc

File tree

6 files changed

+292
-6
lines changed

6 files changed

+292
-6
lines changed

README.md

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -324,13 +324,29 @@ A fast, async link checker
324324
325325
Finds broken URLs and mail addresses inside Markdown, HTML, `reStructuredText`, websites and more!
326326
327-
Usage: lychee [OPTIONS] <inputs>...
327+
Usage: lychee [OPTIONS] [inputs]...
328328
329329
Arguments:
330-
<inputs>...
331-
The inputs (where to get links to check from). These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`), remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`). NOTE: Use `--` to separate inputs from options that allow multiple arguments
330+
[inputs]...
331+
The inputs (where to get links to check from). These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`), remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`). Alternatively, use `--files-from` to read inputs from a file. NOTE: Use `--` to separate inputs from options that allow multiple arguments
332332
333333
Options:
334+
--files-from <PATH>
335+
Read input filenames from the given file or stdin (if path is '-').
336+
337+
This is useful when you have a large number of inputs that would be
338+
cumbersome to specify on the command line directly.
339+
340+
Examples:
341+
lychee --files-from list.txt
342+
find . -name '*.md' | lychee --files-from -
343+
echo 'README.md' | lychee --files-from -
344+
345+
File Format:
346+
Each line should contain one input (file path, URL, or glob pattern).
347+
Lines starting with '#' are treated as comments and ignored.
348+
Empty lines are also ignored.
349+
334350
-c, --config <CONFIG_FILE>
335351
Configuration file to use
336352

lychee-bin/src/files_from.rs

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
//! File list reading functionality for --files-from option
2+
//!
3+
//! This module provides the `FilesFrom` struct which handles reading input file
4+
//! lists from any reader, with support for comments and empty line filtering.
5+
6+
use anyhow::{Context, Result};
7+
use std::io::{BufRead, BufReader, Read};
8+
use std::path::Path;
9+
10+
/// Comment marker for ignoring lines in files-from input
11+
const COMMENT_MARKER: &str = "#";
12+
13+
/// Represents a source of input file paths that can be read from any reader
14+
#[derive(Debug, Clone)]
15+
pub(crate) struct FilesFrom {
16+
/// The list of input file paths
17+
pub(crate) inputs: Vec<String>,
18+
}
19+
20+
impl FilesFrom {
21+
/// Create `FilesFrom` from any reader
22+
pub(crate) fn from_reader<R: Read>(reader: R) -> Result<Self> {
23+
let buf_reader = BufReader::new(reader);
24+
let lines: Vec<String> = buf_reader
25+
.lines()
26+
.collect::<Result<Vec<_>, _>>()
27+
.context("Cannot read lines from reader")?;
28+
29+
let inputs = Self::filter_lines(lines);
30+
Ok(FilesFrom { inputs })
31+
}
32+
33+
/// Filter out comments and empty lines from input
34+
fn filter_lines(lines: Vec<String>) -> Vec<String> {
35+
lines
36+
.into_iter()
37+
.filter(|line| {
38+
let line = line.trim();
39+
!line.is_empty() && !line.starts_with(COMMENT_MARKER)
40+
})
41+
.collect()
42+
}
43+
}
44+
45+
impl TryFrom<&Path> for FilesFrom {
46+
type Error = anyhow::Error;
47+
48+
fn try_from(path: &Path) -> Result<Self, Self::Error> {
49+
if path == Path::new("-") {
50+
Self::from_reader(std::io::stdin())
51+
} else {
52+
let file = std::fs::File::open(path)
53+
.with_context(|| format!("Cannot open --files-from file: {}", path.display()))?;
54+
Self::from_reader(file)
55+
}
56+
}
57+
}
58+
59+
#[cfg(test)]
60+
mod tests {
61+
use super::*;
62+
use std::fs;
63+
use std::io::Cursor;
64+
use tempfile::tempdir;
65+
66+
#[test]
67+
fn test_filter_lines() {
68+
let input = vec![
69+
"file1.md".to_string(),
70+
String::new(),
71+
"# This is a comment".to_string(),
72+
"file2.md".to_string(),
73+
" ".to_string(),
74+
" # Another comment".to_string(),
75+
"file3.md".to_string(),
76+
];
77+
78+
let result = FilesFrom::filter_lines(input);
79+
assert_eq!(result, vec!["file1.md", "file2.md", "file3.md"]);
80+
}
81+
82+
#[test]
83+
fn test_from_reader() -> Result<()> {
84+
let input = "# Comment\nfile1.md\n\nfile2.md\n# Another comment\nfile3.md\n";
85+
let reader = Cursor::new(input);
86+
87+
let files_from = FilesFrom::from_reader(reader)?;
88+
assert_eq!(files_from.inputs, vec!["file1.md", "file2.md", "file3.md"]);
89+
90+
Ok(())
91+
}
92+
93+
#[test]
94+
fn test_from_reader_empty() -> Result<()> {
95+
let input = "# Only comments\n\n# More comments\n \n";
96+
let reader = Cursor::new(input);
97+
98+
let files_from = FilesFrom::from_reader(reader)?;
99+
assert_eq!(files_from.inputs, Vec::<String>::new());
100+
101+
Ok(())
102+
}
103+
104+
#[test]
105+
fn test_try_from_file() -> Result<()> {
106+
let temp_dir = tempdir()?;
107+
let file_path = temp_dir.path().join("files.txt");
108+
109+
fs::write(
110+
&file_path,
111+
"# Comment\nfile1.md\n\nfile2.md\n# Another comment\nfile3.md\n",
112+
)?;
113+
114+
let files_from = FilesFrom::try_from(file_path.as_path())?;
115+
assert_eq!(files_from.inputs, vec!["file1.md", "file2.md", "file3.md"]);
116+
117+
Ok(())
118+
}
119+
120+
#[test]
121+
fn test_try_from_nonexistent_file() {
122+
let result = FilesFrom::try_from(Path::new("/nonexistent/file.txt"));
123+
assert!(result.is_err());
124+
assert!(
125+
result
126+
.unwrap_err()
127+
.to_string()
128+
.contains("Cannot open --files-from file")
129+
);
130+
}
131+
}

lychee-bin/src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ use lychee_lib::CookieJar;
8484
mod cache;
8585
mod client;
8686
mod commands;
87+
mod files_from;
8788
mod formatters;
8889
mod options;
8990
mod parse;

lychee-bin/src/options.rs

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::files_from::FilesFrom;
12
use crate::parse::parse_base;
23
use crate::verbosity::Verbosity;
34
use anyhow::{Context, Error, Result, anyhow};
@@ -312,10 +313,32 @@ pub(crate) struct LycheeOptions {
312313
/// The inputs (where to get links to check from).
313314
/// These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`),
314315
/// remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`).
316+
/// Alternatively, use `--files-from` to read inputs from a file.
315317
/// NOTE: Use `--` to separate inputs from options that allow multiple arguments.
316-
#[arg(name = "inputs", required = true)]
318+
#[arg(name = "inputs", required_unless_present = "files_from")]
317319
raw_inputs: Vec<String>,
318320

321+
/// Read input filenames from the given file or stdin (if path is '-').
322+
#[arg(
323+
long = "files-from",
324+
value_name = "PATH",
325+
long_help = "Read input filenames from the given file or stdin (if path is '-').
326+
327+
This is useful when you have a large number of inputs that would be
328+
cumbersome to specify on the command line directly.
329+
330+
Examples:
331+
lychee --files-from list.txt
332+
find . -name '*.md' | lychee --files-from -
333+
echo 'README.md' | lychee --files-from -
334+
335+
File Format:
336+
Each line should contain one input (file path, URL, or glob pattern).
337+
Lines starting with '#' are treated as comments and ignored.
338+
Empty lines are also ignored."
339+
)]
340+
files_from: Option<PathBuf>,
341+
319342
/// Configuration file to use
320343
#[arg(short, long = "config")]
321344
#[arg(help = HELP_MSG_CONFIG_FILE)]
@@ -331,7 +354,16 @@ impl LycheeOptions {
331354
// accept a `Vec<Input>` in `LycheeOptions` and do the conversion there, but
332355
// we wouldn't get access to `glob_ignore_case`.
333356
pub(crate) fn inputs(&self) -> Result<HashSet<Input>> {
334-
self.raw_inputs
357+
let mut all_inputs = self.raw_inputs.clone();
358+
359+
// If --files-from is specified, read inputs from the file
360+
if let Some(files_from_path) = &self.files_from {
361+
let files_from = FilesFrom::try_from(files_from_path.as_path())
362+
.context("Cannot read inputs from --files-from")?;
363+
all_inputs.extend(files_from.inputs);
364+
}
365+
366+
all_inputs
335367
.iter()
336368
.map(|raw_input| Input::new(raw_input, None, self.config.glob_ignore_case))
337369
.collect::<Result<_, _>>()

lychee-bin/tests/cli.rs

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2739,4 +2739,110 @@ mod cli {
27392739
.stderr("") // Ensure stderr is empty
27402740
.stdout(contains("https://example.com/sitemap.xml"));
27412741
}
2742+
2743+
#[test]
2744+
fn test_files_from_file() -> Result<()> {
2745+
let temp_dir = tempfile::tempdir()?;
2746+
let files_list_path = temp_dir.path().join("files.txt");
2747+
let test_md = temp_dir.path().join("test.md");
2748+
2749+
// Create test files
2750+
fs::write(&test_md, "# Test\n[link](https://example.com)")?;
2751+
fs::write(&files_list_path, test_md.to_string_lossy().as_ref())?;
2752+
2753+
let mut cmd = main_command();
2754+
cmd.arg("--files-from")
2755+
.arg(&files_list_path)
2756+
.arg("--dump-inputs")
2757+
.assert()
2758+
.success()
2759+
.stdout(contains(test_md.to_string_lossy().as_ref()));
2760+
2761+
Ok(())
2762+
}
2763+
2764+
#[test]
2765+
fn test_files_from_stdin() -> Result<()> {
2766+
let temp_dir = tempfile::tempdir()?;
2767+
let test_md = temp_dir.path().join("test.md");
2768+
2769+
// Create test file
2770+
fs::write(&test_md, "# Test\n[link](https://example.com)")?;
2771+
2772+
let mut cmd = main_command();
2773+
cmd.arg("--files-from")
2774+
.arg("-")
2775+
.arg("--dump-inputs")
2776+
.write_stdin(test_md.to_string_lossy().as_ref())
2777+
.assert()
2778+
.success()
2779+
.stdout(contains(test_md.to_string_lossy().as_ref()));
2780+
2781+
Ok(())
2782+
}
2783+
2784+
#[test]
2785+
fn test_files_from_with_comments_and_empty_lines() -> Result<()> {
2786+
let temp_dir = tempfile::tempdir()?;
2787+
let files_list_path = temp_dir.path().join("files.txt");
2788+
let test_md = temp_dir.path().join("test.md");
2789+
2790+
// Create test files
2791+
fs::write(&test_md, "# Test\n[link](https://example.com)")?;
2792+
fs::write(
2793+
&files_list_path,
2794+
format!(
2795+
"# Comment line\n\n{}\n# Another comment\n",
2796+
test_md.display()
2797+
),
2798+
)?;
2799+
2800+
let mut cmd = main_command();
2801+
cmd.arg("--files-from")
2802+
.arg(&files_list_path)
2803+
.arg("--dump-inputs")
2804+
.assert()
2805+
.success()
2806+
.stdout(contains(test_md.to_string_lossy().as_ref()));
2807+
2808+
Ok(())
2809+
}
2810+
2811+
#[test]
2812+
fn test_files_from_combined_with_regular_inputs() -> Result<()> {
2813+
let temp_dir = tempfile::tempdir()?;
2814+
let files_list_path = temp_dir.path().join("files.txt");
2815+
let test_md1 = temp_dir.path().join("test1.md");
2816+
let test_md2 = temp_dir.path().join("test2.md");
2817+
2818+
// Create test files
2819+
fs::write(&test_md1, "# Test 1")?;
2820+
fs::write(&test_md2, "# Test 2")?;
2821+
fs::write(&files_list_path, test_md1.to_string_lossy().as_ref())?;
2822+
2823+
let mut cmd = main_command();
2824+
cmd.arg("--files-from")
2825+
.arg(&files_list_path)
2826+
.arg(&test_md2) // Regular input argument
2827+
.arg("--dump-inputs")
2828+
.assert()
2829+
.success()
2830+
.stdout(contains(test_md1.to_string_lossy().as_ref()))
2831+
.stdout(contains(test_md2.to_string_lossy().as_ref()));
2832+
2833+
Ok(())
2834+
}
2835+
2836+
#[test]
2837+
fn test_files_from_nonexistent_file_error() -> Result<()> {
2838+
let mut cmd = main_command();
2839+
cmd.arg("--files-from")
2840+
.arg("/nonexistent/file.txt")
2841+
.arg("--dump-inputs")
2842+
.assert()
2843+
.failure()
2844+
.stderr(contains("Cannot open --files-from file"));
2845+
2846+
Ok(())
2847+
}
27422848
}

lychee-bin/tests/usage.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ mod readme {
55
use assert_cmd::Command;
66
use pretty_assertions::assert_eq;
77

8-
const USAGE_STRING: &str = "Usage: lychee [OPTIONS] <inputs>...\n";
8+
const USAGE_STRING: &str = "Usage: lychee [OPTIONS] [inputs]...\n";
99

1010
fn main_command() -> Command {
1111
// this gets the "main" binary name (e.g. `lychee`)

0 commit comments

Comments
 (0)