From 5d475edd7c808fabd8df0d93db568e5369947702 Mon Sep 17 00:00:00 2001 From: eschcam Date: Wed, 30 Jul 2025 11:43:06 +0100 Subject: [PATCH] [SPARK-52844][PYTHON] Update protobuf to 5.29.5 This resolves CVE-2025-4565 --- .github/workflows/build_and_test.yml | 4 ++-- .github/workflows/build_python_connect.yml | 2 +- .github/workflows/build_python_connect35.yml | 2 +- .github/workflows/maven_test.yml | 2 +- .github/workflows/pages.yml | 2 +- .github/workflows/python_hosted_runner_test.yml | 2 +- dev/create-release/spark-rm/Dockerfile | 2 +- dev/infra/Dockerfile | 2 +- dev/requirements.txt | 2 +- dev/spark-test-image/docs/Dockerfile | 2 +- dev/spark-test-image/numpy-213/Dockerfile | 2 +- dev/spark-test-image/python-310/Dockerfile | 2 +- dev/spark-test-image/python-311/Dockerfile | 2 +- dev/spark-test-image/python-312/Dockerfile | 2 +- dev/spark-test-image/python-313-nogil/Dockerfile | 2 +- dev/spark-test-image/python-313/Dockerfile | 2 +- 16 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 4fad15d872835..66370a01601d0 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -362,7 +362,7 @@ jobs: - name: Install Python packages (Python 3.11) if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') || contains(matrix.modules, 'yarn') run: | - python3.11 -m pip install 'numpy>=1.22' pyarrow pandas pyyaml scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' + python3.11 -m pip install 'numpy>=1.22' pyarrow pandas pyyaml scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' python3.11 -m pip list # Run the tests. - name: Run tests @@ -766,7 +766,7 @@ jobs: python-version: '3.11' - name: Install dependencies for Python CodeGen check run: | - python3.11 -m pip install 'black==23.12.1' 'protobuf==5.29.1' 'mypy==1.8.0' 'mypy-protobuf==3.3.0' + python3.11 -m pip install 'black==23.12.1' 'protobuf==5.29.5' 'mypy==1.8.0' 'mypy-protobuf==3.3.0' python3.11 -m pip list - name: Python CodeGen check for branch-3.5 if: inputs.branch == 'branch-3.5' diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml index bf247db613dba..f2289a35a559f 100644 --- a/.github/workflows/build_python_connect.yml +++ b/.github/workflows/build_python_connect.yml @@ -72,7 +72,7 @@ jobs: python packaging/client/setup.py sdist cd dist pip install pyspark*client-*.tar.gz - pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' 'six==1.16.0' 'pandas==2.3.2' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting + pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' 'six==1.16.0' 'pandas==2.3.2' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting - name: List Python packages run: python -m pip list - name: Run tests (local) diff --git a/.github/workflows/build_python_connect35.yml b/.github/workflows/build_python_connect35.yml index 6c37091afcb4f..0512e33d6cbea 100644 --- a/.github/workflows/build_python_connect35.yml +++ b/.github/workflows/build_python_connect35.yml @@ -71,7 +71,7 @@ jobs: pip install 'numpy==1.25.1' 'pyarrow>=18.0.0' 'pandas<=2.0.3' scipy unittest-xml-reporting 'plotly<6.0.0' 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*' # Add Python deps for Spark Connect. - pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' + pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' # Add torch as a testing dependency for TorchDistributor pip install 'torch==2.0.1' 'torchvision==0.15.2' torcheval diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml index dfd61cc463892..93f6e38a8ee12 100644 --- a/.github/workflows/maven_test.yml +++ b/.github/workflows/maven_test.yml @@ -175,7 +175,7 @@ jobs: - name: Install Python packages (Python 3.11) if: contains(matrix.modules, 'resource-managers#yarn') || (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect') run: | - python3.11 -m pip install 'numpy>=1.22' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' + python3.11 -m pip install 'numpy>=1.22' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' python3.11 -m pip list # Run the tests using script command. # BSD's script command doesn't support -c option, and the usage is different from Linux's one. diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 8293c2429c9d1..86ef00220b373 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -63,7 +63,7 @@ jobs: pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 'pandas==2.3.2' 'plotly>=4.8' 'docutils<0.18.0' \ 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \ - 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ + 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' - name: Install Ruby for documentation generation uses: ruby/setup-ruby@v1 diff --git a/.github/workflows/python_hosted_runner_test.yml b/.github/workflows/python_hosted_runner_test.yml index 0586c7a1fa707..8b823bbd9e42f 100644 --- a/.github/workflows/python_hosted_runner_test.yml +++ b/.github/workflows/python_hosted_runner_test.yml @@ -151,7 +151,7 @@ jobs: python${{matrix.python}} -m pip install --ignore-installed 'blinker>=1.6.2' python${{matrix.python}} -m pip install --ignore-installed 'six==1.16.0' python${{matrix.python}} -m pip install numpy 'pyarrow>=21.0.0' 'six==1.16.0' 'pandas==2.3.2' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \ - python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \ + python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \ python${{matrix.python}} -m pip cache purge - name: List Python packages run: python${{matrix.python}} -m pip list diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index f80949058e952..e53ac6b439c87 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -94,7 +94,7 @@ ENV R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.3.2 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 8813112127d11..873d572063118 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -96,7 +96,7 @@ RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.3.0' scipy coverage matp ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.3.0 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 diff --git a/dev/requirements.txt b/dev/requirements.txt index de397c4578dbf..76652df744815 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -64,7 +64,7 @@ py grpcio>=1.67.0 grpcio-status>=1.67.0 googleapis-common-protos>=1.65.0 -protobuf==5.29.1 +protobuf==5.29.5 # Spark Connect python proto generation plugin (optional) mypy-protobuf==3.3.0 diff --git a/dev/spark-test-image/docs/Dockerfile b/dev/spark-test-image/docs/Dockerfile index 2068d53d6aba5..c4cd43b9eb3ba 100644 --- a/dev/spark-test-image/docs/Dockerfile +++ b/dev/spark-test-image/docs/Dockerfile @@ -90,6 +90,6 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 RUN python3.11 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 'pandas==2.3.2' 'plotly>=4.8' 'docutils<0.18.0' \ 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \ - 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ + 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' \ && python3.11 -m pip cache purge diff --git a/dev/spark-test-image/numpy-213/Dockerfile b/dev/spark-test-image/numpy-213/Dockerfile index f3ce7b7091e12..af1a19c4859c8 100644 --- a/dev/spark-test-image/numpy-213/Dockerfile +++ b/dev/spark-test-image/numpy-213/Dockerfile @@ -70,7 +70,7 @@ RUN apt-get update && apt-get install -y \ # Pin numpy==2.1.3 ARG BASIC_PIP_PKGS="numpy==2.1.3 pyarrow==19.0.0 six==1.16.0 pandas==2.2.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" # Install Python 3.11 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 diff --git a/dev/spark-test-image/python-310/Dockerfile b/dev/spark-test-image/python-310/Dockerfile index bb82132024552..cfc03bccdf7c1 100644 --- a/dev/spark-test-image/python-310/Dockerfile +++ b/dev/spark-test-image/python-310/Dockerfile @@ -65,7 +65,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.2 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 diff --git a/dev/spark-test-image/python-311/Dockerfile b/dev/spark-test-image/python-311/Dockerfile index 99b9afc0259cc..962f6427de6a8 100644 --- a/dev/spark-test-image/python-311/Dockerfile +++ b/dev/spark-test-image/python-311/Dockerfile @@ -69,7 +69,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.2 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" # Install Python 3.11 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 diff --git a/dev/spark-test-image/python-312/Dockerfile b/dev/spark-test-image/python-312/Dockerfile index 47e4120b2dadf..afa24025c46c7 100644 --- a/dev/spark-test-image/python-312/Dockerfile +++ b/dev/spark-test-image/python-312/Dockerfile @@ -69,7 +69,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.2 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" # Install Python 3.12 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 diff --git a/dev/spark-test-image/python-313-nogil/Dockerfile b/dev/spark-test-image/python-313-nogil/Dockerfile index 2819ac3114f3a..c7d2faed010f1 100644 --- a/dev/spark-test-image/python-313-nogil/Dockerfile +++ b/dev/spark-test-image/python-313-nogil/Dockerfile @@ -68,7 +68,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.2 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" # Install Python 3.13 packages diff --git a/dev/spark-test-image/python-313/Dockerfile b/dev/spark-test-image/python-313/Dockerfile index 7284747bc5da7..dcc68575c496e 100644 --- a/dev/spark-test-image/python-313/Dockerfile +++ b/dev/spark-test-image/python-313/Dockerfile @@ -69,7 +69,7 @@ RUN apt-get update && apt-get install -y \ ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.2 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5 googleapis-common-protos==1.65.0 graphviz==0.20.3" # Install Python 3.13 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.13