From aab8b3d8e75983bdec73138a99c35b3a783036fc Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 3 Sep 2021 13:48:52 -0700 Subject: [PATCH 01/27] wip --- pom.xml | 1 + sql/hive-shaded/pom.xml | 144 ++++++++++++++++++++++++++++++++++++++++ sql/hive/pom.xml | 51 ++------------ 3 files changed, 151 insertions(+), 45 deletions(-) create mode 100644 sql/hive-shaded/pom.xml diff --git a/pom.xml b/pom.xml index 2cece056b1c8..4d0da44823d1 100644 --- a/pom.xml +++ b/pom.xml @@ -96,6 +96,7 @@ streaming sql/catalyst sql/core + sql/hive-shaded sql/hive assembly examples diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml new file mode 100644 index 000000000000..221f7ae725bd --- /dev/null +++ b/sql/hive-shaded/pom.xml @@ -0,0 +1,144 @@ + + + + + 4.0.0 + + org.apache.spark + spark-parent_2.12 + 3.3.0-SNAPSHOT + ../../pom.xml + + + spark-hive-shaded_2.12 + jar + Spark Project Hive Shaded + http://spark.apache.org/ + + hive-shaded + + + + + ${hive.group} + hive-common + ${hive.common.scope} + + + ${hive.group} + hive-exec + ${hive.classifier} + + + ${hive.group} + hive-metastore + + + ${hive.group} + hive-serde + ${hive.serde.scope} + + + ${hive.group} + hive-shims + ${hive.shims.scope} + + + org.apache.hive + hive-llap-common + ${hive.llap.scope} + + + org.apache.hive + hive-llap-client + ${hive.llap.scope} + + + + + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes + + + org.apache.maven.plugins + maven-shade-plugin + + false + + + *:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + package + + shade + + + + + + reference.conf + + + log4j.properties + + + + + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + enforce-versions + + enforce + + + + + + *:hive-cli + + + + + + + + + + diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 86cd1eddde33..09391457c102 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -40,6 +40,11 @@ spark-core_${scala.binary.version} ${project.version} + + org.apache.spark + spark-hive-shaded_${scala.binary.version} + ${project.version} + org.apache.spark spark-core_${scala.binary.version} @@ -85,51 +90,7 @@ scala-parallel-collections_${scala.binary.version} --> - - - ${hive.group} - hive-common - ${hive.common.scope} - - - ${hive.group} - hive-exec - ${hive.classifier} - - - ${hive.group} - hive-metastore - - - ${hive.group} - hive-serde - ${hive.serde.scope} - - - ${hive.group} - hive-shims - ${hive.shims.scope} - - - org.apache.hive - hive-llap-common - ${hive.llap.scope} - - - org.apache.hive - hive-llap-client - ${hive.llap.scope} - + org.apache.avro From 8232114ee99edd2bdda91b57ae65b131faac970a Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 3 Sep 2021 16:46:40 -0700 Subject: [PATCH 02/27] fix SBT build --- project/SparkBuild.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index b1531a601ca0..d3bba8497d10 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -43,8 +43,8 @@ object BuildCommons { private val buildLocation = file(".").getAbsoluteFile.getParentFile - val sqlProjects@Seq(catalyst, sql, hive, hiveThriftServer, tokenProviderKafka010, sqlKafka010, avro) = Seq( - "catalyst", "sql", "hive", "hive-thriftserver", "token-provider-kafka-0-10", "sql-kafka-0-10", "avro" + val sqlProjects@Seq(catalyst, sql, hiveShaded, hive, hiveThriftServer, tokenProviderKafka010, sqlKafka010, avro) = Seq( + "catalyst", "sql", "hive-shaded", "hive", "hive-thriftserver", "token-provider-kafka-0-10", "sql-kafka-0-10", "avro" ).map(ProjectRef(buildLocation, _)) val streamingProjects@Seq(streaming, streamingKafka010) = From 1fe61ecbfd4b13d8c08789d80ac9dd390b29e0d6 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 3 Sep 2021 17:17:28 -0700 Subject: [PATCH 03/27] shade guava --- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- project/SparkBuild.scala | 4 ++-- sql/hive-shaded/pom.xml | 6 ++++++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 41c8a24c8fff..c753b09184f8 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -55,7 +55,7 @@ derby/10.14.2.0//derby-10.14.2.0.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar -gson/2.2.4//gson-2.2.4.jar +gson/2.8.6//gson-2.8.6.jar guava/14.0.1//guava-14.0.1.jar hadoop-client-api/3.3.1//hadoop-client-api-3.3.1.jar hadoop-client-runtime/3.3.1//hadoop-client-runtime-3.3.1.jar diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index d3bba8497d10..916b4c87e13f 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -990,10 +990,10 @@ object Unidoc { (ScalaUnidoc / unidoc / unidocProjectFilter) := inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes, - yarn, tags, streamingKafka010, sqlKafka010), + yarn, tags, streamingKafka010, sqlKafka010, hiveShaded), (JavaUnidoc / unidoc / unidocProjectFilter) := inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes, - yarn, tags, streamingKafka010, sqlKafka010), + yarn, tags, streamingKafka010, sqlKafka010, hiveShaded), (ScalaUnidoc / unidoc / unidocAllClasspaths) := { ignoreClasspaths((ScalaUnidoc / unidoc / unidocAllClasspaths).value) diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index 221f7ae725bd..1e64cde6ec77 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -95,6 +95,12 @@ + + + com.google.common + ${spark.shade.packageName}.com.google.common + + From 4b1b7c9413ab92377587c53df61f46e78f3157e3 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 4 Sep 2021 16:39:04 -0700 Subject: [PATCH 04/27] exclude hiveShaded from mima --- project/SparkBuild.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 916b4c87e13f..8768ff28d0e1 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -366,8 +366,8 @@ object SparkBuild extends PomBuild { val mimaProjects = allProjects.filterNot { x => Seq( - spark, hive, hiveThriftServer, catalyst, repl, networkCommon, networkShuffle, networkYarn, - unsafe, tags, tokenProviderKafka010, sqlKafka010, kvstore, avro + spark, hiveShaded, hive, hiveThriftServer, catalyst, repl, networkCommon, networkShuffle, + networkYarn, unsafe, tags, tokenProviderKafka010, sqlKafka010, kvstore, avro ).contains(x) } From 0e3c47edeb08d7263da12a6d5d32e33387d872b7 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 4 Sep 2021 21:46:09 -0700 Subject: [PATCH 05/27] upgrade Guava to 27.0-jre --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 8 +++++++- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 8 +++++++- pom.xml | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index d8f3e2b36403..80c9e32402fc 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -6,6 +6,7 @@ ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.21//aircompressor-0.21.jar algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar +animal-sniffer-annotations/1.17//animal-sniffer-annotations-1.17.jar annotations/17.0.0//annotations-17.0.0.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.8//antlr4-runtime-4.8.jar @@ -31,6 +32,7 @@ bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar breeze_2.12/1.2//breeze_2.12-1.2.jar cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar +checker-qual/2.5.2//checker-qual-2.5.2.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar @@ -62,10 +64,12 @@ datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.14.2.0//derby-10.14.2.0.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar +error_prone_annotations/2.2.0//error_prone_annotations-2.2.0.jar +failureaccess/1.0//failureaccess-1.0.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar gson/2.2.4//gson-2.2.4.jar -guava/14.0.1//guava-14.0.1.jar +guava/27.0-jre//guava-27.0-jre.jar guice-servlet/3.0//guice-servlet-3.0.jar guice/3.0//guice-3.0.jar hadoop-annotations/2.7.4//hadoop-annotations-2.7.4.jar @@ -106,6 +110,7 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.0//ivy-2.5.0.jar +j2objc-annotations/1.1//j2objc-annotations-1.1.jar jackson-annotations/2.12.5//jackson-annotations-2.12.5.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.12.5//jackson-core-2.12.5.jar @@ -179,6 +184,7 @@ lapack/2.2.0//lapack-2.2.0.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar +listenablefuture/9999.0-empty-to-avoid-conflict-with-guava//listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar log4j/1.2.17//log4j-1.2.17.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index c753b09184f8..3fe3d8e30f2e 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -6,6 +6,7 @@ ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.21//aircompressor-0.21.jar algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar +animal-sniffer-annotations/1.17//animal-sniffer-annotations-1.17.jar annotations/17.0.0//annotations-17.0.0.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.8//antlr4-runtime-4.8.jar @@ -26,6 +27,7 @@ bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar breeze_2.12/1.2//breeze_2.12-1.2.jar cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar +checker-qual/2.5.2//checker-qual-2.5.2.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar commons-cli/1.2//commons-cli-1.2.jar @@ -53,10 +55,12 @@ datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.14.2.0//derby-10.14.2.0.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar +error_prone_annotations/2.2.0//error_prone_annotations-2.2.0.jar +failureaccess/1.0//failureaccess-1.0.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar gson/2.8.6//gson-2.8.6.jar -guava/14.0.1//guava-14.0.1.jar +guava/27.0-jre//guava-27.0-jre.jar hadoop-client-api/3.3.1//hadoop-client-api-3.3.1.jar hadoop-client-runtime/3.3.1//hadoop-client-runtime-3.3.1.jar hadoop-shaded-guava/1.1.1//hadoop-shaded-guava-1.1.1.jar @@ -84,6 +88,7 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.0//ivy-2.5.0.jar +j2objc-annotations/1.1//j2objc-annotations-1.1.jar jackson-annotations/2.12.5//jackson-annotations-2.12.5.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.12.5//jackson-core-2.12.5.jar @@ -149,6 +154,7 @@ lapack/2.2.0//lapack-2.2.0.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar +listenablefuture/9999.0-empty-to-avoid-conflict-with-guava//listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar log4j/1.2.17//log4j-1.2.17.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar diff --git a/pom.xml b/pom.xml index 4d0da44823d1..fad4d744415f 100644 --- a/pom.xml +++ b/pom.xml @@ -182,7 +182,7 @@ 2.11.1 4.1.17 - 14.0.1 + 27.0-jre 3.0.16 2.34 2.10.10 From efecbeac1c4d70fbe03b163a15f7d212e2ec563b Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 7 Sep 2021 14:17:34 -0700 Subject: [PATCH 06/27] wip --- assembly/pom.xml | 2 +- build/mvn | 2 +- sql/hive-shaded/pom.xml | 56 ++++++++++++++++++++++------------- sql/hive-thriftserver/pom.xml | 21 ++++--------- sql/hive/pom.xml | 21 ------------- 5 files changed, 42 insertions(+), 60 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index 74c2f44121fc..2f9cb872b494 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -78,7 +78,7 @@ com.google.guava diff --git a/build/mvn b/build/mvn index bf12347c0fb8..3ee45682ea37 100755 --- a/build/mvn +++ b/build/mvn @@ -29,7 +29,7 @@ _DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # Preserve the calling directory _CALLING_DIR="$(pwd)" # Options used during compilation -_COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Xss128m" +_COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Xss512m" # Installs any application tarball given a URL, the expected tarball name, # and, optionally, a checkable binary path to determine if the binary has diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index 1e64cde6ec77..2af579b8a1b5 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -39,38 +39,73 @@ ${hive.group} hive-common ${hive.common.scope} + true ${hive.group} hive-exec ${hive.classifier} + true ${hive.group} hive-metastore + true ${hive.group} hive-serde ${hive.serde.scope} + true ${hive.group} hive-shims ${hive.shims.scope} + true org.apache.hive hive-llap-common ${hive.llap.scope} + true org.apache.hive hive-llap-client ${hive.llap.scope} + true + + + hive-thriftserver + + + ${hive.group} + hive-cli + true + + + ${hive.group} + hive-jdbc + true + + + ${hive.group} + hive-beeline + true + + + ${hive.group} + hive-service-rpc + true + + + + + target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes @@ -124,27 +159,6 @@ - - org.apache.maven.plugins - maven-enforcer-plugin - - - enforce-versions - - enforce - - - - - - *:hive-cli - - - - - - - diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 764712fcbb48..38632a1d8295 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -40,6 +40,11 @@ spark-hive_${scala.binary.version} ${project.version} + + org.apache.spark + spark-hive-shaded_${scala.binary.version} + ${project.version} + org.apache.spark spark-core_${scala.binary.version} @@ -71,22 +76,6 @@ com.google.guava guava - - ${hive.group} - hive-cli - - - ${hive.group} - hive-jdbc - - - ${hive.group} - hive-beeline - - - ${hive.group} - hive-service-rpc - org.eclipse.jetty jetty-server diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 09391457c102..75c616ec72eb 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -198,27 +198,6 @@ -da -Xmx4g -XX:ReservedCodeCacheSize=${CodeCacheSize} -Dio.netty.tryReflectionSetAccessible=true - - org.apache.maven.plugins - maven-enforcer-plugin - - - enforce-versions - - enforce - - - - - - *:hive-cli - - - - - - - From 30a3ee16a6836ff3fbdafeaa9c7553c4c949d8b3 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 7 Sep 2021 19:24:16 -0700 Subject: [PATCH 07/27] use 30.1.1-jre instead --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 44 +++---------------------- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 44 +++---------------------- pom.xml | 2 +- 3 files changed, 11 insertions(+), 79 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 80c9e32402fc..1a9200ae3f95 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -1,14 +1,10 @@ -HikariCP/2.5.1//HikariCP-2.5.1.jar JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar -ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.21//aircompressor-0.21.jar algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar -animal-sniffer-annotations/1.17//animal-sniffer-annotations-1.17.jar annotations/17.0.0//annotations-17.0.0.jar -antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.8//antlr4-runtime-4.8.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar aopalliance/1.0//aopalliance-1.0.jar @@ -28,11 +24,10 @@ avro-ipc/1.10.2//avro-ipc-1.10.2.jar avro-mapred/1.10.2//avro-mapred-1.10.2.jar avro/1.10.2//avro-1.10.2.jar blas/2.2.0//blas-2.2.0.jar -bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar breeze_2.12/1.2//breeze_2.12-1.2.jar cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar -checker-qual/2.5.2//checker-qual-2.5.2.jar +checker-qual/3.8.0//checker-qual-3.8.0.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar @@ -43,7 +38,6 @@ commons-compiler/3.0.16//commons-compiler-3.0.16.jar commons-compress/1.21//commons-compress-1.21.jar commons-configuration/1.6//commons-configuration-1.6.jar commons-crypto/1.1.0//commons-crypto-1.1.0.jar -commons-dbcp/1.4//commons-dbcp-1.4.jar commons-digester/1.8//commons-digester-1.8.jar commons-httpclient/3.1//commons-httpclient-3.1.jar commons-io/2.4//commons-io-2.4.jar @@ -52,24 +46,20 @@ commons-lang3/3.12.0//commons-lang3-3.12.0.jar commons-logging/1.1.3//commons-logging-1.1.3.jar commons-math3/3.4.1//commons-math3-3.4.1.jar commons-net/3.1//commons-net-3.1.jar -commons-pool/1.5.4//commons-pool-1.5.4.jar commons-text/1.6//commons-text-1.6.jar compress-lzf/1.0.3//compress-lzf-1.0.3.jar core/1.1.2//core-1.1.2.jar curator-client/2.7.1//curator-client-2.7.1.jar curator-framework/2.7.1//curator-framework-2.7.1.jar curator-recipes/2.7.1//curator-recipes-2.7.1.jar -datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar -datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.14.2.0//derby-10.14.2.0.jar -dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar -error_prone_annotations/2.2.0//error_prone_annotations-2.2.0.jar -failureaccess/1.0//failureaccess-1.0.jar +error_prone_annotations/2.5.1//error_prone_annotations-2.5.1.jar +failureaccess/1.0.1//failureaccess-1.0.1.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar gson/2.2.4//gson-2.2.4.jar -guava/27.0-jre//guava-27.0-jre.jar +guava/30.1.1-jre//guava-30.1.1-jre.jar guice-servlet/3.0//guice-servlet-3.0.jar guice/3.0//guice-3.0.jar hadoop-annotations/2.7.4//hadoop-annotations-2.7.4.jar @@ -87,21 +77,7 @@ hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar -hive-beeline/2.3.9//hive-beeline-2.3.9.jar -hive-cli/2.3.9//hive-cli-2.3.9.jar -hive-common/2.3.9//hive-common-2.3.9.jar -hive-exec/2.3.9/core/hive-exec-2.3.9-core.jar -hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar -hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar -hive-metastore/2.3.9//hive-metastore-2.3.9.jar -hive-serde/2.3.9//hive-serde-2.3.9.jar -hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar -hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar -hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar -hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar -hive-shims/2.3.9//hive-shims-2.3.9.jar hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar -hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar @@ -110,7 +86,7 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.0//ivy-2.5.0.jar -j2objc-annotations/1.1//j2objc-annotations-1.1.jar +j2objc-annotations/1.3//j2objc-annotations-1.3.jar jackson-annotations/2.12.5//jackson-annotations-2.12.5.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.12.5//jackson-core-2.12.5.jar @@ -130,12 +106,9 @@ jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar janino/3.0.16//janino-3.0.16.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar javax.inject/1//javax.inject-1.jar -javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar -javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar -jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.34//jersey-client-2.34.jar jersey-common/2.34//jersey-common-2.34.jar jersey-container-servlet-core/2.34//jersey-container-servlet-core-2.34.jar @@ -145,18 +118,15 @@ jersey-server/2.34//jersey-server-2.34.jar jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar jetty-util/6.1.26//jetty-util-6.1.26.jar jetty/6.1.26//jetty-6.1.26.jar -jline/2.14.6//jline-2.14.6.jar joda-time/2.10.10//joda-time-2.10.10.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar -json/1.8//json-1.8.jar json4s-ast_2.12/3.7.0-M11//json4s-ast_2.12-3.7.0-M11.jar json4s-core_2.12/3.7.0-M11//json4s-core_2.12-3.7.0-M11.jar json4s-jackson_2.12/3.7.0-M11//json4s-jackson_2.12-3.7.0-M11.jar json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar jsp-api/2.1//jsp-api-2.1.jar jsr305/3.0.0//jsr305-3.0.0.jar -jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client/5.7.3//kubernetes-client-5.7.3.jar @@ -233,14 +203,10 @@ spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar spire_2.12/0.17.0//spire_2.12-0.17.0.jar -stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar -super-csv/2.2.0//super-csv-2.2.0.jar threeten-extra/1.5.0//threeten-extra-1.5.0.jar tink/1.6.0//tink-1.6.0.jar -transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar -velocity/1.5//velocity-1.5.jar xbean-asm9-shaded/4.20//xbean-asm9-shaded-4.20.jar xercesImpl/2.12.0//xercesImpl-2.12.0.jar xml-apis/1.4.01//xml-apis-1.4.01.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 3fe3d8e30f2e..e13cd2ee68cb 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -1,14 +1,10 @@ -HikariCP/2.5.1//HikariCP-2.5.1.jar JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar -ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.21//aircompressor-0.21.jar algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar -animal-sniffer-annotations/1.17//animal-sniffer-annotations-1.17.jar annotations/17.0.0//annotations-17.0.0.jar -antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.8//antlr4-runtime-4.8.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar arpack/2.2.0//arpack-2.2.0.jar @@ -23,11 +19,10 @@ avro-ipc/1.10.2//avro-ipc-1.10.2.jar avro-mapred/1.10.2//avro-mapred-1.10.2.jar avro/1.10.2//avro-1.10.2.jar blas/2.2.0//blas-2.2.0.jar -bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar breeze_2.12/1.2//breeze_2.12-1.2.jar cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar -checker-qual/2.5.2//checker-qual-2.5.2.jar +checker-qual/3.8.0//checker-qual-3.8.0.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar commons-cli/1.2//commons-cli-1.2.jar @@ -36,50 +31,31 @@ commons-collections/3.2.2//commons-collections-3.2.2.jar commons-compiler/3.0.16//commons-compiler-3.0.16.jar commons-compress/1.21//commons-compress-1.21.jar commons-crypto/1.1.0//commons-crypto-1.1.0.jar -commons-dbcp/1.4//commons-dbcp-1.4.jar commons-io/2.11.0//commons-io-2.11.0.jar commons-lang/2.6//commons-lang-2.6.jar commons-lang3/3.12.0//commons-lang3-3.12.0.jar commons-logging/1.1.3//commons-logging-1.1.3.jar commons-math3/3.4.1//commons-math3-3.4.1.jar commons-net/3.1//commons-net-3.1.jar -commons-pool/1.5.4//commons-pool-1.5.4.jar commons-text/1.6//commons-text-1.6.jar compress-lzf/1.0.3//compress-lzf-1.0.3.jar core/1.1.2//core-1.1.2.jar curator-client/2.13.0//curator-client-2.13.0.jar curator-framework/2.13.0//curator-framework-2.13.0.jar curator-recipes/2.13.0//curator-recipes-2.13.0.jar -datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar -datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.14.2.0//derby-10.14.2.0.jar -dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar -error_prone_annotations/2.2.0//error_prone_annotations-2.2.0.jar -failureaccess/1.0//failureaccess-1.0.jar +error_prone_annotations/2.5.1//error_prone_annotations-2.5.1.jar +failureaccess/1.0.1//failureaccess-1.0.1.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar gson/2.8.6//gson-2.8.6.jar -guava/27.0-jre//guava-27.0-jre.jar +guava/30.1.1-jre//guava-30.1.1-jre.jar hadoop-client-api/3.3.1//hadoop-client-api-3.3.1.jar hadoop-client-runtime/3.3.1//hadoop-client-runtime-3.3.1.jar hadoop-shaded-guava/1.1.1//hadoop-shaded-guava-1.1.1.jar hadoop-yarn-server-web-proxy/3.3.1//hadoop-yarn-server-web-proxy-3.3.1.jar -hive-beeline/2.3.9//hive-beeline-2.3.9.jar -hive-cli/2.3.9//hive-cli-2.3.9.jar -hive-common/2.3.9//hive-common-2.3.9.jar -hive-exec/2.3.9/core/hive-exec-2.3.9-core.jar -hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar -hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar -hive-metastore/2.3.9//hive-metastore-2.3.9.jar -hive-serde/2.3.9//hive-serde-2.3.9.jar -hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar -hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar -hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar -hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar -hive-shims/2.3.9//hive-shims-2.3.9.jar hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar -hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar @@ -88,7 +64,7 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.0//ivy-2.5.0.jar -j2objc-annotations/1.1//j2objc-annotations-1.1.jar +j2objc-annotations/1.3//j2objc-annotations-1.3.jar jackson-annotations/2.12.5//jackson-annotations-2.12.5.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.12.5//jackson-core-2.12.5.jar @@ -105,28 +81,22 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar janino/3.0.16//janino-3.0.16.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar -javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar -javolution/5.5.1//javolution-5.5.1.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar -jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.34//jersey-client-2.34.jar jersey-common/2.34//jersey-common-2.34.jar jersey-container-servlet-core/2.34//jersey-container-servlet-core-2.34.jar jersey-container-servlet/2.34//jersey-container-servlet-2.34.jar jersey-hk2/2.34//jersey-hk2-2.34.jar jersey-server/2.34//jersey-server-2.34.jar -jline/2.14.6//jline-2.14.6.jar joda-time/2.10.10//joda-time-2.10.10.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar -json/1.8//json-1.8.jar json4s-ast_2.12/3.7.0-M11//json4s-ast_2.12-3.7.0-M11.jar json4s-core_2.12/3.7.0-M11//json4s-core_2.12-3.7.0-M11.jar json4s-jackson_2.12/3.7.0-M11//json4s-jackson_2.12-3.7.0-M11.jar json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar jsr305/3.0.0//jsr305-3.0.0.jar -jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client/5.7.3//kubernetes-client-5.7.3.jar @@ -203,14 +173,10 @@ spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar spire_2.12/0.17.0//spire_2.12-0.17.0.jar -stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar -super-csv/2.2.0//super-csv-2.2.0.jar threeten-extra/1.5.0//threeten-extra-1.5.0.jar tink/1.6.0//tink-1.6.0.jar -transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar -velocity/1.5//velocity-1.5.jar xbean-asm9-shaded/4.20//xbean-asm9-shaded-4.20.jar xz/1.8//xz-1.8.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar diff --git a/pom.xml b/pom.xml index fad4d744415f..9890feee541c 100644 --- a/pom.xml +++ b/pom.xml @@ -182,7 +182,7 @@ 2.11.1 4.1.17 - 27.0-jre + 30.1.1-jre 3.0.16 2.34 2.10.10 From 835efed74bf014ca47c0cdd99da962bf95a13a40 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 7 Sep 2021 22:04:32 -0700 Subject: [PATCH 08/27] fix config --- sql/hive-shaded/pom.xml | 46 ++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index 2af579b8a1b5..48579c17c896 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -113,30 +113,6 @@ org.apache.maven.plugins maven-shade-plugin - - false - - - *:* - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - com.google.common - ${spark.shade.packageName}.com.google.common - - - package @@ -144,6 +120,28 @@ shade + false + + + *:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + com.google.common + org.apache.spark.com.google.common + + From 005862ce71d3e5b78ed1e6439205c0d88bddf1a1 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Wed, 8 Sep 2021 10:23:51 -0700 Subject: [PATCH 09/27] revert changes on Xss --- build/mvn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/mvn b/build/mvn index 3ee45682ea37..bf12347c0fb8 100755 --- a/build/mvn +++ b/build/mvn @@ -29,7 +29,7 @@ _DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # Preserve the calling directory _CALLING_DIR="$(pwd)" # Options used during compilation -_COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Xss512m" +_COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Xss128m" # Installs any application tarball given a URL, the expected tarball name, # and, optionally, a checkable binary path to determine if the binary has From b91f8ebf88b5448219ded8554e654f834fc9f795 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 10 Sep 2021 17:05:10 -0700 Subject: [PATCH 10/27] make parent's shade plugin not inheritable --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index 9890feee541c..c5864fe54bd4 100644 --- a/pom.xml +++ b/pom.xml @@ -3010,6 +3010,7 @@ org.apache.maven.plugins maven-shade-plugin + false false From 02bf93fbb1f8d88b1a8478baadc62aaeacbb77c7 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 11 Sep 2021 11:42:16 -0700 Subject: [PATCH 11/27] change shade name --- sql/hive-shaded/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index 48579c17c896..be6d360862f6 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -139,7 +139,7 @@ com.google.common - org.apache.spark.com.google.common + ${spark.shade.packageName}.guava From 5f63baba1ea37f010903d4bc3f4d7488379762b6 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 11 Sep 2021 12:05:54 -0700 Subject: [PATCH 12/27] shade more --- sql/hive-shaded/pom.xml | 8 ++++++++ sql/hive/pom.xml | 1 - 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index be6d360862f6..fcbf7e9c4cec 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -141,6 +141,14 @@ com.google.common ${spark.shade.packageName}.guava + + com.google.protobuf + ${spark.shade.packageName}.com.google.protobuf + + + org.apache.commons + ${spark.shade.packageName}.org.apache.commons + diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 75c616ec72eb..1f92b8bc5d4b 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -90,7 +90,6 @@ scala-parallel-collections_${scala.binary.version} --> - org.apache.avro From 170a8c43dee66f29d2f96edc92543bb89789f218 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 14 Sep 2021 15:07:26 -0700 Subject: [PATCH 13/27] properly handle plugin inheritance --- pom.xml | 1 - sql/hive-shaded/pom.xml | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index c5864fe54bd4..9890feee541c 100644 --- a/pom.xml +++ b/pom.xml @@ -3010,7 +3010,6 @@ org.apache.maven.plugins maven-shade-plugin - false false diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index fcbf7e9c4cec..5e85d70d44ab 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -136,18 +136,18 @@ - + com.google.common - ${spark.shade.packageName}.guava + ${spark.shade.packageName}.hive.guava com.google.protobuf - ${spark.shade.packageName}.com.google.protobuf + ${spark.shade.packageName}.hive.com.google.protobuf org.apache.commons - ${spark.shade.packageName}.org.apache.commons + ${spark.shade.packageName}.hive.org.apache.commons From 3294e9833d6ddf211c027d205bb35eaabb515534 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 14 Sep 2021 15:09:27 -0700 Subject: [PATCH 14/27] remove guava from assembly --- assembly/pom.xml | 11 ----------- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 6 ------ dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 6 ------ 3 files changed, 23 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index 2f9cb872b494..f5b6ecebd81e 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -74,17 +74,6 @@ spark-repl_${scala.binary.version} ${project.version} - - - - com.google.guava - guava - ${hadoop.deps.scope} - diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 1a9200ae3f95..2ca4f174d864 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -27,7 +27,6 @@ blas/2.2.0//blas-2.2.0.jar breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar breeze_2.12/1.2//breeze_2.12-1.2.jar cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar -checker-qual/3.8.0//checker-qual-3.8.0.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar @@ -54,12 +53,9 @@ curator-framework/2.7.1//curator-framework-2.7.1.jar curator-recipes/2.7.1//curator-recipes-2.7.1.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar derby/10.14.2.0//derby-10.14.2.0.jar -error_prone_annotations/2.5.1//error_prone_annotations-2.5.1.jar -failureaccess/1.0.1//failureaccess-1.0.1.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar gson/2.2.4//gson-2.2.4.jar -guava/30.1.1-jre//guava-30.1.1-jre.jar guice-servlet/3.0//guice-servlet-3.0.jar guice/3.0//guice-3.0.jar hadoop-annotations/2.7.4//hadoop-annotations-2.7.4.jar @@ -86,7 +82,6 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.0//ivy-2.5.0.jar -j2objc-annotations/1.3//j2objc-annotations-1.3.jar jackson-annotations/2.12.5//jackson-annotations-2.12.5.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.12.5//jackson-core-2.12.5.jar @@ -154,7 +149,6 @@ lapack/2.2.0//lapack-2.2.0.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar -listenablefuture/9999.0-empty-to-avoid-conflict-with-guava//listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar log4j/1.2.17//log4j-1.2.17.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index e13cd2ee68cb..90497ddf5d0a 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -22,7 +22,6 @@ blas/2.2.0//blas-2.2.0.jar breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar breeze_2.12/1.2//breeze_2.12-1.2.jar cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar -checker-qual/3.8.0//checker-qual-3.8.0.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar commons-cli/1.2//commons-cli-1.2.jar @@ -45,12 +44,9 @@ curator-framework/2.13.0//curator-framework-2.13.0.jar curator-recipes/2.13.0//curator-recipes-2.13.0.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar derby/10.14.2.0//derby-10.14.2.0.jar -error_prone_annotations/2.5.1//error_prone_annotations-2.5.1.jar -failureaccess/1.0.1//failureaccess-1.0.1.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar gson/2.8.6//gson-2.8.6.jar -guava/30.1.1-jre//guava-30.1.1-jre.jar hadoop-client-api/3.3.1//hadoop-client-api-3.3.1.jar hadoop-client-runtime/3.3.1//hadoop-client-runtime-3.3.1.jar hadoop-shaded-guava/1.1.1//hadoop-shaded-guava-1.1.1.jar @@ -64,7 +60,6 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.0//ivy-2.5.0.jar -j2objc-annotations/1.3//j2objc-annotations-1.3.jar jackson-annotations/2.12.5//jackson-annotations-2.12.5.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.12.5//jackson-core-2.12.5.jar @@ -124,7 +119,6 @@ lapack/2.2.0//lapack-2.2.0.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar -listenablefuture/9999.0-empty-to-avoid-conflict-with-guava//listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar log4j/1.2.17//log4j-1.2.17.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar From 4001feca42baa198bce53693e281ddaa08e06c86 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 14 Sep 2021 22:30:07 -0700 Subject: [PATCH 15/27] include guava --- sql/hive-shaded/pom.xml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index 5e85d70d44ab..80af7ba7205d 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -76,6 +76,13 @@ ${hive.llap.scope} true + + com.google.guava + guava + 14.0.1 + compile + true + @@ -121,7 +128,7 @@ false - + *:* From 32e4a757a51559f175a25c7c19723cf699673033 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Wed, 15 Sep 2021 14:47:15 -0700 Subject: [PATCH 16/27] Revert "remove guava from assembly" This reverts commit 46243255362f4240a152f0295dfa26dbc22c0284. --- assembly/pom.xml | 11 +++++++++++ dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 6 ++++++ dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 6 ++++++ 3 files changed, 23 insertions(+) diff --git a/assembly/pom.xml b/assembly/pom.xml index f5b6ecebd81e..2f9cb872b494 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -74,6 +74,17 @@ spark-repl_${scala.binary.version} ${project.version} + + + + com.google.guava + guava + ${hadoop.deps.scope} + diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 2ca4f174d864..1a9200ae3f95 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -27,6 +27,7 @@ blas/2.2.0//blas-2.2.0.jar breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar breeze_2.12/1.2//breeze_2.12-1.2.jar cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar +checker-qual/3.8.0//checker-qual-3.8.0.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar @@ -53,9 +54,12 @@ curator-framework/2.7.1//curator-framework-2.7.1.jar curator-recipes/2.7.1//curator-recipes-2.7.1.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar derby/10.14.2.0//derby-10.14.2.0.jar +error_prone_annotations/2.5.1//error_prone_annotations-2.5.1.jar +failureaccess/1.0.1//failureaccess-1.0.1.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar gson/2.2.4//gson-2.2.4.jar +guava/30.1.1-jre//guava-30.1.1-jre.jar guice-servlet/3.0//guice-servlet-3.0.jar guice/3.0//guice-3.0.jar hadoop-annotations/2.7.4//hadoop-annotations-2.7.4.jar @@ -82,6 +86,7 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.0//ivy-2.5.0.jar +j2objc-annotations/1.3//j2objc-annotations-1.3.jar jackson-annotations/2.12.5//jackson-annotations-2.12.5.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.12.5//jackson-core-2.12.5.jar @@ -149,6 +154,7 @@ lapack/2.2.0//lapack-2.2.0.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar +listenablefuture/9999.0-empty-to-avoid-conflict-with-guava//listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar log4j/1.2.17//log4j-1.2.17.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 90497ddf5d0a..e13cd2ee68cb 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -22,6 +22,7 @@ blas/2.2.0//blas-2.2.0.jar breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar breeze_2.12/1.2//breeze_2.12-1.2.jar cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar +checker-qual/3.8.0//checker-qual-3.8.0.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar commons-cli/1.2//commons-cli-1.2.jar @@ -44,9 +45,12 @@ curator-framework/2.13.0//curator-framework-2.13.0.jar curator-recipes/2.13.0//curator-recipes-2.13.0.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar derby/10.14.2.0//derby-10.14.2.0.jar +error_prone_annotations/2.5.1//error_prone_annotations-2.5.1.jar +failureaccess/1.0.1//failureaccess-1.0.1.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar gson/2.8.6//gson-2.8.6.jar +guava/30.1.1-jre//guava-30.1.1-jre.jar hadoop-client-api/3.3.1//hadoop-client-api-3.3.1.jar hadoop-client-runtime/3.3.1//hadoop-client-runtime-3.3.1.jar hadoop-shaded-guava/1.1.1//hadoop-shaded-guava-1.1.1.jar @@ -60,6 +64,7 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.0//ivy-2.5.0.jar +j2objc-annotations/1.3//j2objc-annotations-1.3.jar jackson-annotations/2.12.5//jackson-annotations-2.12.5.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.12.5//jackson-core-2.12.5.jar @@ -119,6 +124,7 @@ lapack/2.2.0//lapack-2.2.0.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar +listenablefuture/9999.0-empty-to-avoid-conflict-with-guava//listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar log4j/1.2.17//log4j-1.2.17.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar From ffd9f2de533c797dbe9221096ab52f0c15dac7f8 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 16 Sep 2021 12:55:17 -0700 Subject: [PATCH 17/27] exclude transitive dependencies from Hive which are already imported by Spark --- pom.xml | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9890feee541c..38a66869dd1c 100644 --- a/pom.xml +++ b/pom.xml @@ -182,6 +182,9 @@ 2.11.1 4.1.17 + 4.2.4 + 4.1.19 + 3.2.0-m3 30.1.1-jre 3.0.16 2.34 @@ -198,6 +201,7 @@ 1.2 1.60 1.6.0 + 2.14.6 + + + org.apache.commons + commons-lang3 + + + joda-time + joda-time + + + io.dropwizard.metrics + metrics-core + + + io.dropwizard.metrics + metrics-jvm + + + io.dropwizard.metrics + metrics-json + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.core + jackson-annotations + + + jline + jline + @@ -1933,6 +1978,23 @@ org.pentaho pentaho-aggdesigner-algorithm + + + org.datanucleus + datanucleus-core + + + commons-io + commons-io + + + oro + oro + + + org.apache.ivy + ivy + @@ -2001,6 +2063,11 @@ org.codehaus.groovy groovy-all + + + org.apache.httpcomponents + httpclient + @@ -2053,6 +2120,31 @@ * + + + org.apache.derby + derby + + + commons-pool + commons-pool + + + org.datanucleus + datanucleus-core + + + org.datanucleus + datanucleus-api-jdo + + + org.datanucleus + datanucleus-rdbms + + + org.datanucleus + javax.jdo + @@ -2236,6 +2328,10 @@ org.slf4j slf4j-api + + org.apache.commons + commons-lang3 + @@ -2377,6 +2473,21 @@ datanucleus-core ${datanucleus-core.version} + + org.datanucleus + datanucleus-api-jdo + ${datanucleus-api-jdo.version} + + + org.datanucleus + datanucleus-rdbms + ${datanucleus-rdbms.version} + + + org.datanucleus + javax.jdo + ${datanucleus-javax.jdo-version} + org.apache.thrift libthrift From 4289065f2c490f57197cee10c354353b30bfaf1c Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 16 Sep 2021 13:22:19 -0700 Subject: [PATCH 18/27] relocate more artifacts --- sql/hive-shaded/pom.xml | 36 ++++++++++++++++++++++++++++++++++++ sql/hive/pom.xml | 16 ++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index 80af7ba7205d..71015497e134 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -152,10 +152,46 @@ com.google.protobuf ${spark.shade.packageName}.hive.com.google.protobuf + + com.google.gson + ${spark.shade.packageName}.hive.com.google.gson + + + com.github.joshelser + ${spark.shade.packageName}.hive.com.github.joshelser + + + com.jolbox.bonecp + ${spark.shade.packageName}.hive.com.jolbox.bonecp + + + com.zaxxer.hikari + ${spark.shade.packageName}.hive.com.zaxxer.hikari + + + au.com.bytecode + ${spark.shade.packageName}.hive.au.com.bytecode + org.apache.commons ${spark.shade.packageName}.hive.org.apache.commons + + org.antlr.runtime + ${spark.shade.packageName}.hive.org.antlr.runtime + + + org.apache.velocity + ${spark.shade.packageName}.hive.org.apache.velocity + + + org.stringtemplate.v4 + ${spark.shade.packageName}.hive.org.stringtemplate.v4 + + + javolution + ${spark.shade.packageName}.hive.javolution + diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 1f92b8bc5d4b..dd625d52bde3 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -128,6 +128,18 @@ org.datanucleus datanucleus-core + + org.datanucleus + datanucleus-api-jdo + + + org.datanucleus + datanucleus-rdbms + + + org.datanucleus + javax.jdo + org.apache.hadoop ${hadoop-client-runtime.artifact} @@ -145,6 +157,10 @@ org.apache.derby derby + + jline + jline + org.scala-lang scala-compiler From b0aba1fd53a69aec5982c2db2647785219724958 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 16 Sep 2021 14:00:01 -0700 Subject: [PATCH 19/27] comments --- assembly/pom.xml | 4 ++++ sql/hive-shaded/pom.xml | 3 +++ 2 files changed, 7 insertions(+) diff --git a/assembly/pom.xml b/assembly/pom.xml index 2f9cb872b494..3cc1ff200153 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -79,6 +79,10 @@ Because we don't shade dependencies anymore, we need to restore Guava to compile scope so that the libraries Spark depend on have it available. We'll package the version that Spark uses which is not the same as Hadoop dependencies, but works. + + TODO: consider removing this. It's only required by a few libraries such as + curator-client. See https://cwiki.apache.org/confluence/display/CURATOR/TN13) on why + curator imposes a hard dependency on Guava. --> com.google.guava diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index 71015497e134..87631e5caa70 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -34,6 +34,8 @@ hive-shaded + ${hive.group} @@ -76,6 +78,7 @@ ${hive.llap.scope} true + com.google.guava guava From 883f99f4af96d56d1cde229a1a63110f677b8680 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 16 Sep 2021 16:29:40 -0700 Subject: [PATCH 20/27] fix manifest --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 5 +++++ dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 1a9200ae3f95..5d4ea9e929f5 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -52,7 +52,9 @@ core/1.1.2//core-1.1.2.jar curator-client/2.7.1//curator-client-2.7.1.jar curator-framework/2.7.1//curator-framework-2.7.1.jar curator-recipes/2.7.1//curator-recipes-2.7.1.jar +datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar +datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.14.2.0//derby-10.14.2.0.jar error_prone_annotations/2.5.1//error_prone_annotations-2.5.1.jar failureaccess/1.0.1//failureaccess-1.0.1.jar @@ -106,6 +108,7 @@ jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar janino/3.0.16//janino-3.0.16.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar javax.inject/1//javax.inject-1.jar +javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar @@ -118,6 +121,7 @@ jersey-server/2.34//jersey-server-2.34.jar jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar jetty-util/6.1.26//jetty-util-6.1.26.jar jetty/6.1.26//jetty-6.1.26.jar +jline/2.14.6//jline-2.14.6.jar joda-time/2.10.10//joda-time-2.10.10.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar @@ -206,6 +210,7 @@ spire_2.12/0.17.0//spire_2.12-0.17.0.jar stream/2.9.6//stream-2.9.6.jar threeten-extra/1.5.0//threeten-extra-1.5.0.jar tink/1.6.0//tink-1.6.0.jar +transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar xbean-asm9-shaded/4.20//xbean-asm9-shaded-4.20.jar xercesImpl/2.12.0//xercesImpl-2.12.0.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index e13cd2ee68cb..408a57455076 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -43,7 +43,9 @@ core/1.1.2//core-1.1.2.jar curator-client/2.13.0//curator-client-2.13.0.jar curator-framework/2.13.0//curator-framework-2.13.0.jar curator-recipes/2.13.0//curator-recipes-2.13.0.jar +datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar +datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.14.2.0//derby-10.14.2.0.jar error_prone_annotations/2.5.1//error_prone_annotations-2.5.1.jar failureaccess/1.0.1//failureaccess-1.0.1.jar @@ -81,6 +83,7 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar janino/3.0.16//janino-3.0.16.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar +javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar jersey-client/2.34//jersey-client-2.34.jar @@ -89,6 +92,7 @@ jersey-container-servlet-core/2.34//jersey-container-servlet-core-2.34.jar jersey-container-servlet/2.34//jersey-container-servlet-2.34.jar jersey-hk2/2.34//jersey-hk2-2.34.jar jersey-server/2.34//jersey-server-2.34.jar +jline/2.14.6//jline-2.14.6.jar joda-time/2.10.10//joda-time-2.10.10.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar @@ -176,6 +180,7 @@ spire_2.12/0.17.0//spire_2.12-0.17.0.jar stream/2.9.6//stream-2.9.6.jar threeten-extra/1.5.0//threeten-extra-1.5.0.jar tink/1.6.0//tink-1.6.0.jar +transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar xbean-asm9-shaded/4.20//xbean-asm9-shaded-4.20.jar xz/1.8//xz-1.8.jar From 0fbd2969e4ee08e33fea3d6f254b4b892a982dd3 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 21 Sep 2021 17:35:08 -0700 Subject: [PATCH 21/27] add source --- sql/hive-shaded/pom.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index 87631e5caa70..6e968f53eb21 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -130,6 +130,8 @@ shade + true + true false From f317b67d08876e11022bc74605cdafe76c58e991 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Wed, 22 Sep 2021 10:33:41 -0700 Subject: [PATCH 22/27] make hive-sharded module to respect hive.deps.scope --- assembly/pom.xml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/assembly/pom.xml b/assembly/pom.xml index 3cc1ff200153..21f9ba5fd456 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -169,6 +169,13 @@ hive + + + org.apache.spark + spark-hive-shaded_${scala.binary.version} + ${project.version} + ${hive.deps.scope} + org.apache.spark spark-hive_${scala.binary.version} @@ -178,7 +185,14 @@ hive-thriftserver + + + org.apache.spark + spark-hive-shaded_${scala.binary.version} + ${project.version} + ${hive.deps.scope} + org.apache.spark spark-hive-thriftserver_${scala.binary.version} From 2862cc906a567a8b128cab4cbfa0e931f34f8b25 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Wed, 22 Sep 2021 11:09:59 -0700 Subject: [PATCH 23/27] add build-helper-maven-plugin for the shaded module --- sql/hive-shaded/pom.xml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index 6e968f53eb21..e9baa0de3dea 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -213,6 +213,30 @@ + + + org.codehaus.mojo + build-helper-maven-plugin + + + compile + package + + attach-artifact + + + + + ${basedir}/target/${project.artifactId}-${project.version}.jar + jar + optional + + + + + + From a490c79821df815da7aab72acf0f172c9adf1d59 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Wed, 22 Sep 2021 13:09:31 -0700 Subject: [PATCH 24/27] exclude more artifacts --- pom.xml | 37 ++++++++++++++++++++++++++++++++++- sql/hive-shaded/pom.xml | 10 ++++++++++ sql/hive-thriftserver/pom.xml | 4 ++++ sql/hive/pom.xml | 8 ++++++++ 4 files changed, 58 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 38a66869dd1c..ccf9a0f15f97 100644 --- a/pom.xml +++ b/pom.xml @@ -185,6 +185,8 @@ 4.2.4 4.1.19 3.2.0-m3 + 3.0.1 + 1.0.1 30.1.1-jre 3.0.16 2.34 @@ -1660,6 +1662,10 @@ commons-io commons-io + + jline + jline + @@ -1716,6 +1722,10 @@ commons-logging commons-logging + + jline + jline + @@ -1995,6 +2005,10 @@ org.apache.ivy ivy + + stax + stax-api + @@ -2068,6 +2082,10 @@ org.apache.httpcomponents httpclient + + org.apache.httpcomponents + httpcore + @@ -2145,6 +2163,10 @@ org.datanucleus javax.jdo + + javax.jdo + jdo-api + @@ -2339,7 +2361,6 @@ org.apache.hive hive-llap-client ${hive23.version} - test org.apache.hive @@ -2365,6 +2386,10 @@ org.apache.zookeeper zookeeper + + org.apache.commons + commons-lang3 + org.slf4j slf4j-api @@ -2488,6 +2513,16 @@ javax.jdo ${datanucleus-javax.jdo-version} + + javax.jdo + jdo-api + ${javax.jdo.version} + + + stax + stax-api + ${stax-api.version} + org.apache.thrift libthrift diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index e9baa0de3dea..3b146111e2e3 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -145,6 +145,12 @@ META-INF/*.SF META-INF/*.DSA META-INF/*.RSA + parquet-logging.properties + hive-log4j2.properties + hive-exec-log4j2.properties + beeline-log4j2.properties + tez-container-log4j2.properties + bonecp-default-config.xml @@ -197,6 +203,10 @@ javolution ${spark.shade.packageName}.hive.javolution + + org.supercsv + ${spark.shade.packageName}.hive.org.supercsv + diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 38632a1d8295..f90e2f7f57f8 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -86,6 +86,10 @@ jetty-servlet provided + + org.apache.httpcomponents + httpcore + org.seleniumhq.selenium diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index dd625d52bde3..c72135d56ee9 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -140,6 +140,14 @@ org.datanucleus javax.jdo + + javax.jdo + jdo-api + + + stax + stax-api + org.apache.hadoop ${hadoop-client-runtime.artifact} From 05e641be9f0dd54d74614872816347f6ed63e56f Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Wed, 22 Sep 2021 13:58:04 -0700 Subject: [PATCH 25/27] update manifests --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 3 +++ dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 3 +++ 2 files changed, 6 insertions(+) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 5d4ea9e929f5..cdc394a893ff 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -112,6 +112,7 @@ javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar +jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.34//jersey-client-2.34.jar jersey-common/2.34//jersey-common-2.34.jar jersey-container-servlet-core/2.34//jersey-container-servlet-core-2.34.jar @@ -131,6 +132,7 @@ json4s-jackson_2.12/3.7.0-M11//json4s-jackson_2.12-3.7.0-M11.jar json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar jsp-api/2.1//jsp-api-2.1.jar jsr305/3.0.0//jsr305-3.0.0.jar +jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client/5.7.3//kubernetes-client-5.7.3.jar @@ -207,6 +209,7 @@ spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar spire_2.12/0.17.0//spire_2.12-0.17.0.jar +stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar threeten-extra/1.5.0//threeten-extra-1.5.0.jar tink/1.6.0//tink-1.6.0.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 408a57455076..76b6ff52f42c 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -86,6 +86,7 @@ javassist/3.25.0-GA//javassist-3.25.0-GA.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar +jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.34//jersey-client-2.34.jar jersey-common/2.34//jersey-common-2.34.jar jersey-container-servlet-core/2.34//jersey-container-servlet-core-2.34.jar @@ -101,6 +102,7 @@ json4s-core_2.12/3.7.0-M11//json4s-core_2.12-3.7.0-M11.jar json4s-jackson_2.12/3.7.0-M11//json4s-jackson_2.12-3.7.0-M11.jar json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar jsr305/3.0.0//jsr305-3.0.0.jar +jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client/5.7.3//kubernetes-client-5.7.3.jar @@ -177,6 +179,7 @@ spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar spire_2.12/0.17.0//spire_2.12-0.17.0.jar +stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar threeten-extra/1.5.0//threeten-extra-1.5.0.jar tink/1.6.0//tink-1.6.0.jar From 6b57ccc43762bcc6cb6a8f74c4d98db51a8a2e80 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Wed, 22 Sep 2021 14:16:55 -0700 Subject: [PATCH 26/27] license and variable --- LICENSE-binary | 5 +++++ assembly/pom.xml | 2 +- pom.xml | 1 + sql/hive-shaded/pom.xml | 2 +- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 32c7fc315d1a..dfbb58736c07 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -316,10 +316,14 @@ com.fasterxml.jackson.module:jackson-module-scala_2.12 com.github.mifmif:generex com.google.code.findbugs:jsr305 com.google.code.gson:gson +com.google.errorprone:error_prone_annotations com.google.flatbuffers:flatbuffers-java +com.google.guava:failureaccess com.google.guava:guava +com.google.guava:listenablefuture com.google.inject:guice com.google.inject.extensions:guice-servlet +com.google.j2objc:j2objc-annotations com.twitter:parquet-hadoop-bundle commons-cli:commons-cli commons-dbcp:commons-dbcp @@ -471,6 +475,7 @@ MIT License ----------- com.microsoft.sqlserver:mssql-jdbc +org.checkerframework:checker-qual org.typelevel:spire_2.12 org.typelevel:spire-macros_2.12 org.typelevel:spire-platform_2.12 diff --git a/assembly/pom.xml b/assembly/pom.xml index 21f9ba5fd456..e3119cf56f34 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -80,7 +80,7 @@ that the libraries Spark depend on have it available. We'll package the version that Spark uses which is not the same as Hadoop dependencies, but works. - TODO: consider removing this. It's only required by a few libraries such as + TODO(SPARK-36676): consider removing this. It's only required by a few libraries such as curator-client. See https://cwiki.apache.org/confluence/display/CURATOR/TN13) on why curator imposes a hard dependency on Guava. --> diff --git a/pom.xml b/pom.xml index ccf9a0f15f97..87907e69cb35 100644 --- a/pom.xml +++ b/pom.xml @@ -188,6 +188,7 @@ 3.0.1 1.0.1 30.1.1-jre + 14.0.1 3.0.16 2.34 2.10.10 diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index 3b146111e2e3..22370c9cb167 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -82,7 +82,7 @@ com.google.guava guava - 14.0.1 + ${hive.guava.version} compile true From c2ec1f8122ea9b871a1679571f0b20009be39cb7 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 24 Sep 2021 11:03:38 -0700 Subject: [PATCH 27/27] address comments --- pom.xml | 1 + project/SparkBuild.scala | 2 +- sql/hive-shaded/pom.xml | 14 -------------- .../sql/hive/client/IsolatedClientLoader.scala | 1 - 4 files changed, 2 insertions(+), 16 deletions(-) diff --git a/pom.xml b/pom.xml index 87907e69cb35..b835ab9295d3 100644 --- a/pom.xml +++ b/pom.xml @@ -187,6 +187,7 @@ 3.2.0-m3 3.0.1 1.0.1 + 30.1.1-jre 14.0.1 3.0.16 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 8768ff28d0e1..528a61ff9f27 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -667,7 +667,7 @@ object KubernetesIntegrationTests { * Overrides to work around sbt's dependency resolution being different from Maven's. */ object DependencyOverrides { - lazy val guavaVersion = sys.props.get("guava.version").getOrElse("14.0.1") + lazy val guavaVersion = sys.props.get("guava.version").getOrElse("30.1.1-jre") lazy val settings = Seq( dependencyOverrides += "com.google.guava" % "guava" % guavaVersion, dependencyOverrides += "xerces" % "xercesImpl" % "2.12.0", diff --git a/sql/hive-shaded/pom.xml b/sql/hive-shaded/pom.xml index 22370c9cb167..10be1ea95d11 100644 --- a/sql/hive-shaded/pom.xml +++ b/sql/hive-shaded/pom.xml @@ -34,49 +34,40 @@ hive-shaded - ${hive.group} hive-common ${hive.common.scope} - true ${hive.group} hive-exec ${hive.classifier} - true ${hive.group} hive-metastore - true ${hive.group} hive-serde ${hive.serde.scope} - true ${hive.group} hive-shims ${hive.shims.scope} - true org.apache.hive hive-llap-common ${hive.llap.scope} - true org.apache.hive hive-llap-client ${hive.llap.scope} - true @@ -84,7 +75,6 @@ guava ${hive.guava.version} compile - true @@ -95,22 +85,18 @@ ${hive.group} hive-cli - true ${hive.group} hive-jdbc - true ${hive.group} hive-beeline - true ${hive.group} hive-service-rpc - true diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 9aa6a09fd57a..c64af2dd29eb 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -202,7 +202,6 @@ private[hive] class IsolatedClientLoader( name.startsWith("org.apache.spark.") || isHadoopClass || name.startsWith("scala.") || - (name.startsWith("com.google") && !name.startsWith("com.google.cloud")) || name.startsWith("java.") || name.startsWith("javax.sql.") || sharedPrefixes.exists(name.startsWith)