Skip to content
Merged

Mailify #1985

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,092 changes: 132 additions & 960 deletions Cargo.lock

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions fixtures/TEST_EMAIL.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
https://endler.dev
test@example.com
https://example.com
octocat+github@github.com
mailto:test2@example.com
mailto:info@wikipedia.org
4 changes: 3 additions & 1 deletion lychee-bin/src/formatters/response/color.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ impl ColorFormatter {
Status::Excluded
| Status::Unsupported(_)
| Status::Cached(CacheStatus::Excluded | CacheStatus::Unsupported) => &DIM,
Status::UnknownStatusCode(_) | Status::Timeout(_) => &YELLOW,
Status::UnknownStatusCode(_) | Status::UnknownMailStatus(_) | Status::Timeout(_) => {
&YELLOW
}
Status::Error(_) | Status::RequestError(_) | Status::Cached(CacheStatus::Error(_)) => {
&PINK
}
Expand Down
4 changes: 3 additions & 1 deletion lychee-bin/src/formatters/response/emoji.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ impl EmojiFormatter {
Status::Unsupported(_)
| Status::Cached(CacheStatus::Excluded | CacheStatus::Unsupported) => "🚫",
Status::Redirected(_, _) => "↪️",
Status::UnknownStatusCode(_) | Status::Timeout(_) => "⚠️",
Status::UnknownStatusCode(_) | Status::UnknownMailStatus(_) | Status::Timeout(_) => {
"⚠️"
}
Status::Error(_) | Status::RequestError(_) | Status::Cached(CacheStatus::Error(_)) => {
"❌"
}
Expand Down
4 changes: 2 additions & 2 deletions lychee-bin/src/formatters/stats/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pub(crate) struct ResponseStats {
pub(crate) total: usize,
/// Number of successful responses
pub(crate) successful: usize,
/// Number of responses with an unknown status code
/// Number of responses with an unknown status
pub(crate) unknown: usize,
/// Number of responses, which lychee does not support right now
pub(crate) unsupported: usize,
Expand Down Expand Up @@ -72,7 +72,7 @@ impl ResponseStats {
match status {
Status::Ok(_) => self.successful += 1,
Status::Error(_) | Status::RequestError(_) => self.errors += 1,
Status::UnknownStatusCode(_) => self.unknown += 1,
Status::UnknownStatusCode(_) | Status::UnknownMailStatus(_) => self.unknown += 1,
Status::Timeout(_) => self.timeouts += 1,
Status::Redirected(_, _) => self.redirects += 1,
Status::Excluded => self.excludes += 1,
Expand Down
25 changes: 19 additions & 6 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,12 +294,25 @@ mod cli {

#[test]
fn test_email() -> Result<()> {
cargo_bin_cmd!()
.write_stdin("test@example.com idiomatic-rust-doesnt-exist-man@wikipedia.org")
.arg("--include-mail")
.arg("-")
.assert()
.code(2)
.stdout(contains(
"Unreachable mail address mailto:test@example.com: No MX records found for domain",
))
.stdout(contains(
"Unreachable mail address mailto:idiomatic-rust-doesnt-exist-man@wikipedia.org: Mail server rejects the address",
))
.stdout(contains("2 Errors"));

test_json_output!(
"TEST_EMAIL.md",
MockResponseStats {
total: 5,
excludes: 0,
successful: 5,
total: 3,
successful: 3,
..MockResponseStats::default()
},
"--include-mail"
Expand All @@ -311,9 +324,9 @@ mod cli {
test_json_output!(
"TEST_EMAIL.md",
MockResponseStats {
total: 5,
excludes: 3,
successful: 2,
total: 3,
excludes: 2,
successful: 1,
..MockResponseStats::default()
}
)
Expand Down
6 changes: 3 additions & 3 deletions lychee-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ rust-version = "1.88.0"
async-stream = "0.3.6"
async-trait = "0.1.88"
cached = "0.56.0"
check-if-email-exists = { version = "0.9.1", optional = true }
cookie_store = "0.22.0"
dashmap = { version = "6.1.0" }
email_address = "0.2.9"
Expand All @@ -34,6 +33,7 @@ ignore = "0.4.25"
ip_network = "0.4.1"
linkify = "0.10.0"
log = "0.4.28"
mailify-lib = { version = "0.2.0", optional = true }
octocrab = "0.49.5"
openssl-sys = { version = "0.9.111", optional = true }
path-clean = "1.0.1"
Expand Down Expand Up @@ -84,8 +84,8 @@ wiremock = "0.6.5"

[features]

# Enable checking email addresses. Requires the native-tls feature.
email-check = ["check-if-email-exists"]
# Enable checking email addresses.
email-check = ["mailify-lib"]

# Use platform-native TLS.
native-tls = ["openssl-sys", "reqwest/native-tls"]
Expand Down
69 changes: 42 additions & 27 deletions lychee-lib/src/checker/mail.rs
Original file line number Diff line number Diff line change
@@ -1,57 +1,72 @@
#[cfg(all(feature = "email-check", feature = "native-tls"))]
use http::StatusCode;

#[cfg(all(feature = "email-check", feature = "native-tls"))]
use crate::ErrorKind;

use crate::{Status, Uri};
use std::time::Duration;

#[cfg(all(feature = "email-check", feature = "native-tls"))]
use check_if_email_exists::{CheckEmailInput, Reachable, check_email};

#[cfg(all(feature = "email-check", feature = "native-tls"))]
use crate::types::mail;
#[cfg(feature = "email-check")]
use mailify_lib::{Client, Config};

/// A utility for checking the validity of email addresses.
///
/// `EmailChecker` is responsible for validating email addresses,
/// optionally performing reachability checks when the appropriate
/// features are enabled.
#[derive(Debug, Clone)]
pub(crate) struct MailChecker {}
pub(crate) struct MailChecker {
#[cfg(feature = "email-check")]
client: Client,
}

#[cfg(not(feature = "email-check"))]
impl MailChecker {
/// Creates a new `EmailChecker`.
pub(crate) const fn new() -> Self {
pub(crate) const fn new(_timeout: Option<Duration>) -> Self {
Self {}
}

/// Ignore the mail check if the `email-check` feature is not enabled.
#[allow(
clippy::unused_async,
reason = "Match the signature of the function with the email-check feature"
)]
pub(crate) async fn check_mail(&self, _uri: &Uri) -> Status {
Status::Excluded
}
}

#[cfg(feature = "email-check")]
impl MailChecker {
/// Creates a new `EmailChecker`.
pub(crate) fn new(timeout: Option<Duration>) -> Self {
Self {
client: Client::new(Config {
timeout,
..Default::default()
}),
}
}

/// Check a mail address, or equivalently a `mailto` URI.
///
/// URIs may contain query parameters (e.g. `contact@example.com?subject="Hello"`),
/// which are ignored by this check. They are not part of the mail address
/// and instead passed to a mail client.
#[cfg(all(feature = "email-check", feature = "native-tls"))]
pub(crate) async fn check_mail(&self, uri: &Uri) -> Status {
self.perform_email_check(uri).await
}

/// Ignore the mail check if the `email-check` and `native-tls` features are not enabled.
#[cfg(not(all(feature = "email-check", feature = "native-tls")))]
pub(crate) async fn check_mail(&self, _uri: &Uri) -> Status {
Status::Excluded
}

#[cfg(all(feature = "email-check", feature = "native-tls"))]
async fn perform_email_check(&self, uri: &Uri) -> Status {
use crate::ErrorKind;
use http::StatusCode;
use mailify_lib::CheckResult;

let address = uri.url.path().to_string();
let input = CheckEmailInput::new(address);
let result = &(check_email(&input).await);
let result = self.client.check(&address).await;

if let Reachable::Invalid = result.is_reachable {
ErrorKind::UnreachableEmailAddress(uri.clone(), mail::error_from_output(result)).into()
} else {
Status::Ok(StatusCode::OK)
match result {
CheckResult::Success => Status::Ok(StatusCode::OK),
CheckResult::Uncertain(reason) => Status::UnknownMailStatus(reason.to_string()),
CheckResult::Failure(reason) => {
ErrorKind::UnreachableEmailAddress(uri.clone(), reason.to_string()).into()
}
}
}
}
12 changes: 6 additions & 6 deletions lychee-lib/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ pub const DEFAULT_USER_AGENT: &str = concat!("lychee/", env!("CARGO_PKG_VERSION"

// Constants currently not configurable by the user.
/// A timeout for only the connect phase of a [`Client`].
const CONNECT_TIMEOUT: u64 = 10;
const CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
/// TCP keepalive.
///
/// See <https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html> for more
/// information.
const TCP_KEEPALIVE: u64 = 60;
const TCP_KEEPALIVE: Duration = Duration::from_secs(60);

/// Builder for [`Client`].
///
Expand Down Expand Up @@ -261,7 +261,7 @@ pub struct ClientBuilder {
#[builder(default = DEFAULT_ACCEPTED_STATUS_CODES.clone())]
accepted: HashSet<StatusCode>,

/// Response timeout per request in seconds.
/// Response timeout per request.
timeout: Option<Duration>,

/// Base for resolving paths.
Expand Down Expand Up @@ -395,7 +395,7 @@ impl ClientBuilder {
Ok(Client {
remaps: self.remaps,
filter,
email_checker: MailChecker::new(),
email_checker: MailChecker::new(self.timeout),
website_checker,
file_checker: FileChecker::new(
self.base,
Expand Down Expand Up @@ -430,8 +430,8 @@ impl ClientBuilder {
.gzip(true)
.default_headers(self.default_headers()?)
.danger_accept_invalid_certs(self.allow_insecure)
.connect_timeout(Duration::from_secs(CONNECT_TIMEOUT))
.tcp_keepalive(Duration::from_secs(TCP_KEEPALIVE))
.connect_timeout(CONNECT_TIMEOUT)
.tcp_keepalive(TCP_KEEPALIVE)
.redirect(redirect_policy(
redirect_history.clone(),
self.max_redirects,
Expand Down
18 changes: 9 additions & 9 deletions lychee-lib/src/retry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,18 +104,18 @@ impl RetryExt for ErrorKind {
}

impl RetryExt for Status {
#[allow(clippy::match_same_arms)]
fn should_retry(&self) -> bool {
match self {
Status::Ok(_) => false,
Status::Error(err) => err.should_retry(),
Status::RequestError(_) => false,
Status::Timeout(_) => true,
Status::Redirected(_, _) => false,
Status::UnknownStatusCode(_) => false,
Status::Excluded => false,
Status::Unsupported(_) => false,
Status::Cached(_) => false,
Status::Error(err) => err.should_retry(),
Status::Ok(_)
| Status::RequestError(_)
| Status::Redirected(_, _)
| Status::UnknownStatusCode(_)
| Status::UnknownMailStatus(_)
| Status::Excluded
| Status::Unsupported(_)
| Status::Cached(_) => false,
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion lychee-lib/src/types/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ impl From<&Status> for CacheStatus {
}
_ => Self::Error(None),
},
Status::RequestError(_) => Self::Error(None),
Status::RequestError(_) | Status::UnknownMailStatus(_) => Self::Error(None),
}
}
}
Expand Down
6 changes: 2 additions & 4 deletions lychee-lib/src/types/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ pub enum ErrorKind {
InvalidUrlFromPath(PathBuf),

/// The given mail address is unreachable
#[error("Unreachable mail address: {0}: {1}")]
#[error("Unreachable mail address {0}")]
UnreachableEmailAddress(Uri, String),

/// The given header could not be parsed.
Expand Down Expand Up @@ -277,9 +277,7 @@ impl ErrorKind {
"Cannot convert path to URL: '{}'. Check path format",
path_buf.display()
)),
ErrorKind::UnreachableEmailAddress(uri, reason) => {
Some(format!("Email address unreachable: '{uri}'. {reason}",))
}
ErrorKind::UnreachableEmailAddress(_uri, reason) => Some(reason.clone()),
ErrorKind::InvalidHeader(invalid_header_value) => Some(format!(
"Invalid HTTP header: {invalid_header_value}. Check header format",
)),
Expand Down
21 changes: 0 additions & 21 deletions lychee-lib/src/types/mail.rs

This file was deleted.

1 change: 0 additions & 1 deletion lychee-lib/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ mod cookies;
mod error;
mod file;
mod input;
pub(crate) mod mail;
mod preprocessor;
pub(crate) mod redirect_history;
mod request;
Expand Down
9 changes: 8 additions & 1 deletion lychee-lib/src/types/status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ pub enum Status {
Redirected(StatusCode, Redirects),
/// The given status code is not known by lychee
UnknownStatusCode(StatusCode),
/// The given mail address could not be reliably identified.
/// This normally happens due to restrictive measures by
/// mail servers (blocklisting) or your ISP (port filtering).
UnknownMailStatus(String),
/// Resource was excluded from checking
Excluded,
/// The request type is currently not supported,
Expand All @@ -50,6 +54,7 @@ impl Display for Status {
Status::Ok(code) => write!(f, "{code}"),
Status::Redirected(_, _) => write!(f, "Redirect"),
Status::UnknownStatusCode(code) => write!(f, "Unknown status ({code})"),
Status::UnknownMailStatus(_) => write!(f, "Unknown mail status"),
Status::Timeout(Some(code)) => write!(f, "Timeout ({code})"),
Status::Timeout(None) => f.write_str("Timeout"),
Status::Unsupported(e) => write!(f, "Unsupported: {e}"),
Expand Down Expand Up @@ -159,6 +164,7 @@ impl Status {
Status::RequestError(e) => e.error().details(),
Status::Timeout(_) => None,
Status::UnknownStatusCode(_) => None,
Status::UnknownMailStatus(reason) => Some(reason.clone()),
Status::Unsupported(_) => None,
Status::Cached(_) => None,
Status::Excluded => None,
Expand Down Expand Up @@ -218,7 +224,7 @@ impl Status {
match self {
Status::Ok(_) => ICON_OK,
Status::Redirected(_, _) => ICON_REDIRECTED,
Status::UnknownStatusCode(_) => ICON_UNKNOWN,
Status::UnknownStatusCode(_) | Status::UnknownMailStatus(_) => ICON_UNKNOWN,
Status::Excluded => ICON_EXCLUDED,
Status::Error(_) | Status::RequestError(_) => ICON_ERROR,
Status::Timeout(_) => ICON_TIMEOUT,
Expand Down Expand Up @@ -254,6 +260,7 @@ impl Status {
Status::Ok(code) | Status::Redirected(code, _) | Status::UnknownStatusCode(code) => {
code.as_u16().to_string()
}
Status::UnknownMailStatus(_) => "UNKNOWN".to_string(),
Status::Excluded => "EXCLUDED".to_string(),
Status::Error(e) => match e {
ErrorKind::RejectedStatusCode(code) => code.as_u16().to_string(),
Expand Down