Skip to content

Commit e24f002

Browse files
committed
Spark: Initial support for 4.1.0-rc2
1 parent d25407f commit e24f002

57 files changed

Lines changed: 176 additions & 142 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/spark-ci.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,21 @@ jobs:
7272
strategy:
7373
matrix:
7474
jvm: [11, 17, 21]
75-
spark: ['3.4', '3.5', '4.0']
75+
spark: ['3.4', '3.5', '4.0', '4.1']
7676
scala: ['2.12', '2.13']
7777
exclude:
7878
# Spark 3.5 is the first version not failing on Java 21 (https://issues.apache.org/jira/browse/SPARK-42369)
7979
# Full Java 21 support is coming in Spark 4 (https://issues.apache.org/jira/browse/SPARK-43831)
8080
- jvm: 11
8181
spark: '4.0'
82+
- jvm: 11
83+
spark: '4.1'
8284
- jvm: 21
8385
spark: '3.4'
8486
- spark: '4.0'
8587
scala: '2.12'
88+
- spark: '4.1'
89+
scala: '2.12'
8690
env:
8791
SPARK_LOCAL_IP: localhost
8892
steps:

build.gradle

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ allprojects {
118118
group = "org.apache.iceberg"
119119
version = projectVersion
120120
repositories {
121+
maven {
122+
url = "https://repository.apache.org/content/repositories/orgapachespark-1507/"
123+
}
121124
mavenCentral()
122125
mavenLocal()
123126
}

dev/stage-binaries.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
SCALA_VERSION=2.12
2222
FLINK_VERSIONS=1.20,2.0,2.1
23-
SPARK_VERSIONS=3.4,3.5,4.0
23+
SPARK_VERSIONS=3.4,3.5,4.0,4.1
2424
KAFKA_VERSIONS=3
2525

2626
./gradlew -Prelease -DscalaVersion=$SCALA_VERSION -DflinkVersions=$FLINK_VERSIONS -DsparkVersions=$SPARK_VERSIONS -DkafkaVersions=$KAFKA_VERSIONS publishApachePublicationToMavenRepository --no-parallel --no-configuration-cache

gradle.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ jmhJsonOutputPath=build/reports/jmh/results.json
1818
jmhIncludeRegex=.*
1919
systemProp.defaultFlinkVersions=2.1
2020
systemProp.knownFlinkVersions=1.20,2.0,2.1
21-
systemProp.defaultSparkVersions=4.0
22-
systemProp.knownSparkVersions=3.4,3.5,4.0
21+
systemProp.defaultSparkVersions=4.1
22+
systemProp.knownSparkVersions=3.4,3.5,4.0,4.1
2323
systemProp.defaultKafkaVersions=3
2424
systemProp.knownKafkaVersions=3
2525
systemProp.defaultScalaVersion=2.12

gradle/libs.versions.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ snowflake-jdbc = "3.27.1"
8686
spark34 = "3.4.4"
8787
spark35 = "3.5.7"
8888
spark40 = "4.0.1"
89+
spark41 = "4.1.0"
8990
sqlite-jdbc = "3.51.0.0"
9091
testcontainers = "2.0.2"
9192
tez08 = { strictly = "0.8.4"} # see rich version usage explanation above

jmh.gradle

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ if (sparkVersions.contains("4.0")) {
5353
jmhProjects.add(project(":iceberg-spark:iceberg-spark-extensions-4.0_2.13"))
5454
}
5555

56+
if (sparkVersions.contains("4.1")) {
57+
jmhProjects.add(project(":iceberg-spark:iceberg-spark-4.1_2.13"))
58+
jmhProjects.add(project(":iceberg-spark:iceberg-spark-extensions-4.1_2.13"))
59+
}
60+
5661
configure(jmhProjects) {
5762
apply plugin: 'me.champeau.jmh'
5863
apply plugin: 'io.morethan.jmhreport'

settings.gradle

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,18 @@ if (sparkVersions.contains("4.0")) {
175175
project(":iceberg-spark:spark-runtime-4.0_2.13").name = "iceberg-spark-runtime-4.0_2.13"
176176
}
177177

178+
if (sparkVersions.contains("4.1")) {
179+
include ":iceberg-spark:spark-4.1_2.13"
180+
include ":iceberg-spark:spark-extensions-4.1_2.13"
181+
include ":iceberg-spark:spark-runtime-4.1_2.13"
182+
project(":iceberg-spark:spark-4.1_2.13").projectDir = file('spark/v4.1/spark')
183+
project(":iceberg-spark:spark-4.1_2.13").name = "iceberg-spark-4.1_2.13"
184+
project(":iceberg-spark:spark-extensions-4.1_2.13").projectDir = file('spark/v4.1/spark-extensions')
185+
project(":iceberg-spark:spark-extensions-4.1_2.13").name = "iceberg-spark-extensions-4.1_2.13"
186+
project(":iceberg-spark:spark-runtime-4.1_2.13").projectDir = file('spark/v4.1/spark-runtime')
187+
project(":iceberg-spark:spark-runtime-4.1_2.13").name = "iceberg-spark-runtime-4.1_2.13"
188+
}
189+
178190
if (kafkaVersions.contains("3")) {
179191
include 'kafka-connect'
180192
project(':kafka-connect').name = 'iceberg-kafka-connect'

spark/build.gradle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,7 @@ if (sparkVersions.contains("3.5")) {
3131
if (sparkVersions.contains("4.0")) {
3232
apply from: file("$projectDir/v4.0/build.gradle")
3333
}
34+
35+
if (sparkVersions.contains("4.1")) {
36+
apply from: file("$projectDir/v4.1/build.gradle")
37+
}

spark/v4.1/build.gradle

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717
* under the License.
1818
*/
1919

20-
String sparkMajorVersion = '4.0'
20+
String sparkMajorVersion = '4.1'
2121
String scalaVersion = '2.13'
2222

2323
JavaVersion javaVersion = JavaVersion.current()
2424
Boolean javaVersionSupported = javaVersion == JavaVersion.VERSION_17 || javaVersion == JavaVersion.VERSION_21
2525
if (!javaVersionSupported) {
26-
logger.warn("Skip Spark 4.0 build which requires JDK 17 or 21 but was executed with JDK " + javaVersion)
26+
logger.warn("Skip Spark 4.1 build which requires JDK 17 or 21 but was executed with JDK " + javaVersion)
2727
}
2828

2929
def sparkProjects = [
@@ -51,6 +51,14 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {
5151
apply plugin: 'scala'
5252
apply plugin: 'com.github.alisiikh.scalastyle'
5353

54+
// Set target to JDK17 for Spark 4.1 to fix following error
55+
// "spark/v4.1/spark/src/main/scala/org/apache/spark/sql/stats/ThetaSketchAgg.scala:52:12: Class java.lang.Record not found"
56+
tasks.withType(ScalaCompile.class) {
57+
sourceCompatibility = "17"
58+
targetCompatibility = "17"
59+
scalaCompileOptions.additionalParameters.add("-release:17")
60+
}
61+
5462
sourceSets {
5563
main {
5664
scala.srcDirs = ['src/main/scala', 'src/main/java']
@@ -74,7 +82,7 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {
7482

7583
compileOnly libs.errorprone.annotations
7684
compileOnly libs.avro.avro
77-
compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark40.get()}") {
85+
compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark41.get()}") {
7886
exclude group: 'org.apache.avro', module: 'avro'
7987
exclude group: 'org.apache.arrow'
8088
exclude group: 'org.apache.parquet'
@@ -84,7 +92,8 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {
8492
exclude group: 'org.roaringbitmap'
8593
}
8694

87-
compileOnly "org.apache.datafusion:comet-spark-spark${sparkMajorVersion}_2.13:${libs.versions.comet.get()}"
95+
// TODO: datafusion-comet Spark 4.1 support
96+
compileOnly "org.apache.datafusion:comet-spark-spark4.0_2.13:${libs.versions.comet.get()}"
8897

8998
implementation libs.parquet.column
9099
implementation libs.parquet.hadoop
@@ -165,7 +174,7 @@ project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVer
165174
compileOnly project(':iceberg-core')
166175
compileOnly project(':iceberg-common')
167176
compileOnly project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}")
168-
compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark40.get()}") {
177+
compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark41.get()}") {
169178
exclude group: 'org.apache.avro', module: 'avro'
170179
exclude group: 'org.apache.arrow'
171180
exclude group: 'org.apache.parquet'
@@ -194,7 +203,8 @@ project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVer
194203
testImplementation libs.avro.avro
195204
testImplementation libs.parquet.hadoop
196205
testImplementation libs.awaitility
197-
testImplementation "org.apache.datafusion:comet-spark-spark${sparkMajorVersion}_2.13:${libs.versions.comet.get()}"
206+
// TODO: datafusion-comet Spark 4.1 support
207+
testImplementation "org.apache.datafusion:comet-spark-spark4.0_2.13:${libs.versions.comet.get()}"
198208
testImplementation(testFixtures(project(':iceberg-parquet')))
199209

200210
// Required because we remove antlr plugin dependencies from the compile configuration, see note above
@@ -267,7 +277,7 @@ project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersio
267277
}
268278

269279
integrationImplementation "org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}"
270-
integrationImplementation "org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark40.get()}"
280+
integrationImplementation "org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark41.get()}"
271281
integrationImplementation libs.junit.jupiter
272282
integrationImplementation libs.junit.platform.launcher
273283
integrationImplementation libs.slf4j.simple

spark/v4.1/spark-extensions/src/jmh/java/org/apache/iceberg/DeleteFileIndexBenchmark.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@
5454
/**
5555
* A benchmark that evaluates the delete file index build and lookup performance.
5656
*
57-
* <p>To run this benchmark for spark-4.0: <code>
58-
* ./gradlew -DsparkVersions=4.0 :iceberg-spark:iceberg-spark-extensions-4.0_2.13:jmh
57+
* <p>To run this benchmark for spark-4.1: <code>
58+
* ./gradlew -DsparkVersions=4.1 :iceberg-spark:iceberg-spark-extensions-4.1_2.13:jmh
5959
* -PjmhIncludeRegex=DeleteFileIndexBenchmark
6060
* -PjmhOutputPath=benchmark/iceberg-delete-file-index-benchmark.txt
6161
* </code>

0 commit comments

Comments
 (0)