From 8d98e095525ce76c7bfce5cdbc59af09d7084bc8 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Thu, 11 Sep 2025 14:20:01 +0300 Subject: [PATCH 1/3] fix: Add AWS environment variable checks for S3 tests --- datafusion-cli/src/catalog.rs | 15 +++++++++++++++ datafusion-cli/src/exec.rs | 13 +++++++++++++ datafusion-cli/src/object_storage.rs | 22 ++++++++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/datafusion-cli/src/catalog.rs b/datafusion-cli/src/catalog.rs index fd83b52de299..20d62eabc390 100644 --- a/datafusion-cli/src/catalog.rs +++ b/datafusion-cli/src/catalog.rs @@ -230,6 +230,8 @@ pub fn substitute_tilde(cur: String) -> String { } #[cfg(test)] mod tests { + use std::{env, vec}; + use super::*; use datafusion::catalog::SchemaProvider; @@ -284,6 +286,19 @@ mod tests { #[tokio::test] async fn query_s3_location_test() -> Result<()> { + let aws_envs = vec![ + "AWS_ENDPOINT", + "AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY", + "AWS_ALLOW_HTTP", + ]; + for aws_env in aws_envs { + if env::var(aws_env).is_err() { + eprint!("aws envs not set, skipping s3 test"); + return Ok(()); + } + } + let bucket = "examples3bucket"; let location = format!("s3://{bucket}/file.parquet"); diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs index eb7174dbbd6f..d079a88a6440 100644 --- a/datafusion-cli/src/exec.rs +++ b/datafusion-cli/src/exec.rs @@ -582,6 +582,19 @@ mod tests { } #[tokio::test] async fn copy_to_external_object_store_test() -> Result<()> { + let aws_envs = vec![ + "AWS_ENDPOINT", + "AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY", + "AWS_ALLOW_HTTP", + ]; + for aws_env in aws_envs { + if std::env::var(aws_env).is_err() { + eprint!("aws envs not set, skipping s3 test"); + return Ok(()); + } + } + let locations = vec![ "s3://bucket/path/file.parquet", "oss://bucket/path/file.parquet", diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs index de33e11fe010..d6d0b4627b79 100644 --- a/datafusion-cli/src/object_storage.rs +++ b/datafusion-cli/src/object_storage.rs @@ -579,6 +579,9 @@ mod tests { #[tokio::test] async fn s3_object_store_builder_default() -> Result<()> { + + check_aws_envs().await?; + let location = "s3://bucket/path/FAKE/file.parquet"; // Set it to a non-existent file to avoid reading the default configuration file std::env::set_var("AWS_CONFIG_FILE", "data/aws.config"); @@ -733,6 +736,8 @@ mod tests { #[tokio::test] async fn s3_object_store_builder_resolves_region_when_none_provided() -> Result<()> { + check_aws_envs().await?; + let expected_region = "eu-central-1"; let location = "s3://test-bucket/path/file.parquet"; // Set it to a non-existent file to avoid reading the default configuration file @@ -759,6 +764,8 @@ mod tests { #[tokio::test] async fn s3_object_store_builder_overrides_region_when_resolve_region_enabled( ) -> Result<()> { + check_aws_envs().await?; + let original_region = "us-east-1"; let expected_region = "eu-central-1"; // This should be the auto-detected region let location = "s3://test-bucket/path/file.parquet"; @@ -860,4 +867,19 @@ mod tests { .unwrap(); table_options } + + async fn check_aws_envs() -> Result<()> { + let aws_envs = [ + "AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY", + "AWS_REGION", + ]; + for aws_env in aws_envs { + if std::env::var(aws_env).is_err() { + eprint!("aws envs not set, skipping s3 test"); + return Ok(()); + } + } + Ok(()) + } } From c0a75d204cf6512c7a7b708f8e4b6d7281d54e40 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Thu, 11 Sep 2025 14:24:34 +0300 Subject: [PATCH 2/3] chore: fmt --- datafusion-cli/src/object_storage.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs index d6d0b4627b79..efff89c6f290 100644 --- a/datafusion-cli/src/object_storage.rs +++ b/datafusion-cli/src/object_storage.rs @@ -579,7 +579,6 @@ mod tests { #[tokio::test] async fn s3_object_store_builder_default() -> Result<()> { - check_aws_envs().await?; let location = "s3://bucket/path/FAKE/file.parquet"; @@ -869,11 +868,7 @@ mod tests { } async fn check_aws_envs() -> Result<()> { - let aws_envs = [ - "AWS_ACCESS_KEY_ID", - "AWS_SECRET_ACCESS_KEY", - "AWS_REGION", - ]; + let aws_envs = ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION"]; for aws_env in aws_envs { if std::env::var(aws_env).is_err() { eprint!("aws envs not set, skipping s3 test"); From 7a58221710f5bd9d9cb595d25663293d2e56cdf5 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 13 Sep 2025 11:16:18 +0300 Subject: [PATCH 3/3] fix: enhance AWS environment checks in S3 tests to skip if not set --- datafusion-cli/src/object_storage.rs | 33 ++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs index efff89c6f290..a7c5c970f23a 100644 --- a/datafusion-cli/src/object_storage.rs +++ b/datafusion-cli/src/object_storage.rs @@ -579,7 +579,11 @@ mod tests { #[tokio::test] async fn s3_object_store_builder_default() -> Result<()> { - check_aws_envs().await?; + if let Err(DataFusionError::Execution(e)) = check_aws_envs().await { + // Skip test if AWS envs are not set + eprintln!("{e}"); + return Ok(()); + } let location = "s3://bucket/path/FAKE/file.parquet"; // Set it to a non-existent file to avoid reading the default configuration file @@ -735,8 +739,11 @@ mod tests { #[tokio::test] async fn s3_object_store_builder_resolves_region_when_none_provided() -> Result<()> { - check_aws_envs().await?; - + if let Err(DataFusionError::Execution(e)) = check_aws_envs().await { + // Skip test if AWS envs are not set + eprintln!("{e}"); + return Ok(()); + } let expected_region = "eu-central-1"; let location = "s3://test-bucket/path/file.parquet"; // Set it to a non-existent file to avoid reading the default configuration file @@ -763,7 +770,11 @@ mod tests { #[tokio::test] async fn s3_object_store_builder_overrides_region_when_resolve_region_enabled( ) -> Result<()> { - check_aws_envs().await?; + if let Err(DataFusionError::Execution(e)) = check_aws_envs().await { + // Skip test if AWS envs are not set + eprintln!("{e}"); + return Ok(()); + } let original_region = "us-east-1"; let expected_region = "eu-central-1"; // This should be the auto-detected region @@ -868,12 +879,16 @@ mod tests { } async fn check_aws_envs() -> Result<()> { - let aws_envs = ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION"]; + let aws_envs = [ + "AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY", + "AWS_REGION", + "AWS_ALLOW_HTTP", + ]; for aws_env in aws_envs { - if std::env::var(aws_env).is_err() { - eprint!("aws envs not set, skipping s3 test"); - return Ok(()); - } + std::env::var(aws_env).map_err(|_| { + exec_datafusion_err!("aws envs not set, skipping s3 tests") + })?; } Ok(()) }