Skip to content

Commit 9b72dc4

Browse files
committed
Merge branch 'master' into SPARK-40193-merge-filters
2 parents 5c7c0c5 + 4f65413 commit 9b72dc4

2,572 files changed

Lines changed: 66679 additions & 23851 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/labeler.yml

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,6 @@
1717
# under the License.
1818
#
1919

20-
#
21-
# Pull Request Labeler Github Action Configuration: https://github.com/marketplace/actions/labeler
22-
#
23-
# Note that we currently cannot use the negatioon operator (i.e. `!`) for miniglob matches as they
24-
# would match any file that doesn't touch them. What's needed is the concept of `any `, which takes a
25-
# list of constraints / globs and then matches all of the constraints for either `any` of the files or
26-
# `all` of the files in the change set.
27-
#
28-
# However, `any`/`all` are not supported in a released version and testing off of the `main` branch
29-
# resulted in some other errors when testing.
30-
#
31-
# An issue has been opened upstream requesting that a release be cut that has support for all/any:
32-
# - https://github.com/actions/labeler/issues/111
33-
#
34-
# While we wait for this issue to be handled upstream, we can remove
35-
# the negated / `!` matches for now and at least have labels again.
36-
#
3720
INFRA:
3821
- ".github/**/*"
3922
- "appveyor.yml"
@@ -45,32 +28,24 @@ INFRA:
4528
- "dev/merge_spark_pr.py"
4629
- "dev/run-tests-jenkins*"
4730
BUILD:
48-
# Can be supported when a stable release with correct all/any is released
49-
#- any: ['dev/**/*', '!dev/merge_spark_pr.py', '!dev/.rat-excludes']
50-
- "dev/**/*"
31+
- any: ['dev/**/*', '!dev/merge_spark_pr.py', '!dev/run-tests-jenkins*']
5132
- "build/**/*"
5233
- "project/**/*"
5334
- "assembly/**/*"
5435
- "**/*pom.xml"
5536
- "bin/docker-image-tool.sh"
5637
- "bin/find-spark-home*"
5738
- "scalastyle-config.xml"
58-
# These can be added in the above `any` clause (and the /dev/**/* glob removed) when
59-
# `any`/`all` support is released
60-
# - "!dev/merge_spark_pr.py"
61-
# - "!dev/run-tests-jenkins*"
62-
# - "!dev/.rat-excludes"
6339
DOCS:
6440
- "docs/**/*"
6541
- "**/README.md"
6642
- "**/CONTRIBUTING.md"
43+
- "python/docs/**/*"
6744
EXAMPLES:
6845
- "examples/**/*"
6946
- "bin/run-example*"
70-
# CORE needs to be updated when all/any are released upstream.
7147
CORE:
72-
# - any: ["core/**/*", "!**/*UI.scala", "!**/ui/**/*"] # If any file matches all of the globs defined in the list started by `any`, label is applied.
73-
- "core/**/*"
48+
- any: ["core/**/*", "!**/*UI.scala", "!**/ui/**/*"]
7449
- "common/kvstore/**/*"
7550
- "common/network-common/**/*"
7651
- "common/network-shuffle/**/*"
@@ -82,12 +57,8 @@ SPARK SHELL:
8257
- "repl/**/*"
8358
- "bin/spark-shell*"
8459
SQL:
85-
#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/streaming/test_streaming.py"]
86-
- "**/sql/**/*"
60+
- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/streaming/test_streaming*.py"]
8761
- "common/unsafe/**/*"
88-
#- "!python/pyspark/sql/avro/**/*"
89-
#- "!python/pyspark/sql/streaming/**/*"
90-
#- "!python/pyspark/sql/tests/streaming/test_streaming.py"
9162
- "bin/spark-sql*"
9263
- "bin/beeline*"
9364
- "sbin/*thriftserver*.sh"
@@ -123,7 +94,7 @@ STRUCTURED STREAMING:
12394
- "**/sql/**/streaming/**/*"
12495
- "connector/kafka-0-10-sql/**/*"
12596
- "python/pyspark/sql/streaming/**/*"
126-
- "python/pyspark/sql/tests/streaming/test_streaming.py"
97+
- "python/pyspark/sql/tests/streaming/test_streaming*.py"
12798
- "**/*streaming.R"
12899
PYTHON:
129100
- "bin/pyspark*"
@@ -148,7 +119,6 @@ DEPLOY:
148119
- "sbin/**/*"
149120
CONNECT:
150121
- "connector/connect/**/*"
151-
- "**/sql/sparkconnect/**/*"
152122
- "python/pyspark/sql/**/connect/**/*"
153123
- "python/pyspark/ml/**/connect/**/*"
154124
PROTOBUF:

.github/workflows/benchmark.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ jobs:
6565
SPARK_LOCAL_IP: localhost
6666
steps:
6767
- name: Checkout Spark repository
68-
uses: actions/checkout@v3
68+
uses: actions/checkout@v4
6969
# In order to get diff files
7070
with:
7171
fetch-depth: 0
@@ -95,7 +95,7 @@ jobs:
9595
key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
9696
- name: Checkout tpcds-kit repository
9797
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
98-
uses: actions/checkout@v3
98+
uses: actions/checkout@v4
9999
with:
100100
repository: databricks/tpcds-kit
101101
ref: 2a5078a782192ddb6efbcead8de9973d6ab4f069
@@ -105,7 +105,7 @@ jobs:
105105
run: cd tpcds-kit/tools && make OS=LINUX
106106
- name: Install Java ${{ github.event.inputs.jdk }}
107107
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
108-
uses: actions/setup-java@v3
108+
uses: actions/setup-java@v4
109109
with:
110110
distribution: zulu
111111
java-version: ${{ github.event.inputs.jdk }}
@@ -134,7 +134,7 @@ jobs:
134134
SPARK_TPCDS_DATA: ${{ github.workspace }}/tpcds-sf-1
135135
steps:
136136
- name: Checkout Spark repository
137-
uses: actions/checkout@v3
137+
uses: actions/checkout@v4
138138
# In order to get diff files
139139
with:
140140
fetch-depth: 0
@@ -157,7 +157,7 @@ jobs:
157157
restore-keys: |
158158
benchmark-coursier-${{ github.event.inputs.jdk }}
159159
- name: Install Java ${{ github.event.inputs.jdk }}
160-
uses: actions/setup-java@v3
160+
uses: actions/setup-java@v4
161161
with:
162162
distribution: zulu
163163
java-version: ${{ github.event.inputs.jdk }}
@@ -177,7 +177,7 @@ jobs:
177177
# In benchmark, we use local as master so set driver memory only. Note that GitHub Actions has 7 GB memory limit.
178178
bin/spark-submit \
179179
--driver-memory 6g --class org.apache.spark.benchmark.Benchmarks \
180-
--jars "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`" \
180+
--jars "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`,`find ~/.cache/coursier -name 'curator-test-*.jar'`" \
181181
"`find . -name 'spark-core*-SNAPSHOT-tests.jar'`" \
182182
"${{ github.event.inputs.class }}"
183183
# To keep the directory structure and file permissions, tar them

0 commit comments

Comments
 (0)