diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 36732a2777e9..000000000000 --- a/.dockerignore +++ /dev/null @@ -1,59 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Turn .dockerignore to .dockerallow by excluding everything and explicitly -# allowing specific files and directories. This enables us to quickly add -# dependency files to the docker content without scanning the whole directory. -# This setup requires to all of our docker containers have arrow's source -# as a mounted directory. - -# exclude everything -** - -# include explicitly -!ci/** -!c_glib/Gemfile -!dev/archery/requirements*.txt -!python/requirements*.txt -!python/manylinux1/** -!python/manylinux2010/** -!r/DESCRIPTION -!ruby/Gemfile -!ruby/red-arrow/Gemfile -!ruby/red-arrow/lib/arrow/version.rb -!ruby/red-arrow/red-arrow.gemspec -!ruby/red-arrow-cuda/Gemfile -!ruby/red-arrow-cuda/lib/arrow-cuda/version.rb -!ruby/red-arrow-cuda/red-arrow-cuda.gemspec -!ruby/red-gandiva/Gemfile -!ruby/red-gandiva/lib/gandiva/version.rb -!ruby/red-gandiva/red-gandiva.gemspec -!ruby/red-parquet/Gemfile -!ruby/red-parquet/lib/parquet/version.rb -!ruby/red-parquet/red-parquet.gemspec -!ruby/red-plasma/Gemfile -!ruby/red-plasma/lib/plasma/version.rb -!ruby/red-plasma/red-plasma.gemspec -!rust/Cargo.toml -!rust/arrow/Cargo.toml -!rust/arrow/benches -!rust/arrow-flight/Cargo.toml -!rust/parquet/Cargo.toml -!rust/parquet/build.rs -!rust/parquet_derive/Cargo.toml -!rust/parquet_derive_test/Cargo.toml -!rust/integration-testing/Cargo.toml diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat deleted file mode 100644 index 6b9309396604..000000000000 --- a/ci/appveyor-cpp-build.bat +++ /dev/null @@ -1,162 +0,0 @@ -@rem Licensed to the Apache Software Foundation (ASF) under one -@rem or more contributor license agreements. See the NOTICE file -@rem distributed with this work for additional information -@rem regarding copyright ownership. The ASF licenses this file -@rem to you under the Apache License, Version 2.0 (the -@rem "License"); you may not use this file except in compliance -@rem with the License. You may obtain a copy of the License at -@rem -@rem http://www.apache.org/licenses/LICENSE-2.0 -@rem -@rem Unless required by applicable law or agreed to in writing, -@rem software distributed under the License is distributed on an -@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -@rem KIND, either express or implied. See the License for the -@rem specific language governing permissions and limitations -@rem under the License. - -@echo on - -git config core.symlinks true -git reset --hard - -@rem Retrieve git submodules, configure env var for Parquet unit tests -git submodule update --init || exit /B - -set ARROW_TEST_DATA=%CD%\testing\data -set PARQUET_TEST_DATA=%CD%\cpp\submodules\parquet-testing\data - -@rem -@rem In the configurations below we disable building the Arrow static library -@rem to save some time. Unfortunately this will still build the Parquet static -@rem library because of PARQUET-1420 (Thrift-generated symbols not exported in DLL). -@rem -if "%JOB%" == "Build_Debug" ( - mkdir cpp\build-debug - pushd cpp\build-debug - - cmake -G "%GENERATOR%" ^ - -DARROW_BOOST_USE_SHARED=OFF ^ - -DARROW_BUILD_EXAMPLES=ON ^ - -DARROW_BUILD_STATIC=OFF ^ - -DARROW_BUILD_TESTS=ON ^ - -DARROW_CXXFLAGS="/MP" ^ - -DARROW_ENABLE_TIMING_TESTS=OFF ^ - -DARROW_USE_PRECOMPILED_HEADERS=OFF ^ - -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^ - -DCMAKE_BUILD_TYPE="Debug" ^ - -DCMAKE_UNITY_BUILD=ON ^ - .. || exit /B - - cmake --build . --config Debug || exit /B - ctest --output-on-failure -j2 || exit /B - popd - - @rem Finish Debug build successfully - exit /B 0 -) - -call activate arrow - -@rem Use Boost from Anaconda -set BOOST_ROOT=%CONDA_PREFIX%\Library -set BOOST_LIBRARYDIR=%CONDA_PREFIX%\Library\lib - -@rem The "main" C++ build script for Windows CI -@rem (i.e. for usual configurations) - -if "%JOB%" == "Toolchain" ( - set CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=CONDA -DARROW_WITH_BZ2=ON -) else ( - @rem We're in a conda environment but don't want to use it for the dependencies - set CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=AUTO -) - -@rem Enable warnings-as-errors -set ARROW_CXXFLAGS=/WX /MP - -@rem -@rem Build and test Arrow C++ libraries (including Parquet) -@rem - -mkdir cpp\build -pushd cpp\build - -@rem XXX Without forcing CMAKE_CXX_COMPILER, CMake can re-run itself and -@rem unfortunately switch from Release to Debug mode... -@rem -@rem In release mode, disable optimizations (/Od) for faster compiling -@rem and enable runtime assertions. - -cmake -G "%GENERATOR%" %CMAKE_ARGS% ^ - -DARROW_BOOST_USE_SHARED=ON ^ - -DARROW_BUILD_EXAMPLES=ON ^ - -DARROW_BUILD_STATIC=OFF ^ - -DARROW_BUILD_TESTS=ON ^ - -DARROW_CSV=ON ^ - -DARROW_CXXFLAGS="%ARROW_CXXFLAGS%" ^ - -DARROW_DATASET=ON ^ - -DARROW_ENABLE_TIMING_TESTS=OFF ^ - -DARROW_FLIGHT=%ARROW_BUILD_FLIGHT% ^ - -DARROW_GANDIVA=%ARROW_BUILD_GANDIVA% ^ - -DARROW_MIMALLOC=ON ^ - -DARROW_PARQUET=ON ^ - -DARROW_PYTHON=ON ^ - -DARROW_S3=%ARROW_S3% ^ - -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^ - -DARROW_WITH_BROTLI=ON ^ - -DARROW_WITH_LZ4=ON ^ - -DARROW_WITH_SNAPPY=ON ^ - -DARROW_WITH_ZLIB=ON ^ - -DARROW_WITH_ZSTD=ON ^ - -DCMAKE_BUILD_TYPE="Release" ^ - -DCMAKE_CXX_COMPILER=clcache ^ - -DCMAKE_CXX_FLAGS_RELEASE="/MD /Od /UNDEBUG" ^ - -DCMAKE_INSTALL_PREFIX=%CONDA_PREFIX%\Library ^ - -DCMAKE_UNITY_BUILD=ON ^ - -DCMAKE_VERBOSE_MAKEFILE=OFF ^ - -DPARQUET_BUILD_EXECUTABLES=ON ^ - -DPARQUET_REQUIRE_ENCRYPTION=ON ^ - .. || exit /B -cmake --build . --target install --config %CONFIGURATION% || exit /B - -@rem Needed so arrow-python-test.exe works -set OLD_PYTHONHOME=%PYTHONHOME% -set PYTHONHOME=%CONDA_PREFIX% - -ctest --output-on-failure -j2 || exit /B - -set PYTHONHOME=%OLD_PYTHONHOME% -popd - -@rem -@rem Build and install pyarrow -@rem - -pushd python - -set PYARROW_BUNDLE_BOOST=OFF -set PYARROW_CMAKE_GENERATOR=%GENERATOR% -set PYARROW_CXXFLAGS=%ARROW_CXXFLAGS% -set PYARROW_PARALLEL=2 -set PYARROW_WITH_DATASET=ON -set PYARROW_WITH_FLIGHT=%ARROW_BUILD_FLIGHT% -set PYARROW_WITH_GANDIVA=%ARROW_BUILD_GANDIVA% -set PYARROW_WITH_PARQUET=ON -set PYARROW_WITH_S3=%ARROW_S3% -set PYARROW_WITH_STATIC_BOOST=ON - -set ARROW_HOME=%CONDA_PREFIX%\Library -@rem ARROW-3075; pkgconfig is broken for Parquet for now -set PARQUET_HOME=%CONDA_PREFIX%\Library - -python setup.py develop -q || exit /B - -set PYTHONDEVMODE=1 - -py.test -r sxX --durations=15 --pyargs pyarrow.tests || exit /B - -@rem -@rem Wheels are built and tested separately (see ARROW-5142). -@rem - diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat deleted file mode 100644 index 616232d202c9..000000000000 --- a/ci/appveyor-cpp-setup.bat +++ /dev/null @@ -1,101 +0,0 @@ -@rem Licensed to the Apache Software Foundation (ASF) under one -@rem or more contributor license agreements. See the NOTICE file -@rem distributed with this work for additional information -@rem regarding copyright ownership. The ASF licenses this file -@rem to you under the Apache License, Version 2.0 (the -@rem "License"); you may not use this file except in compliance -@rem with the License. You may obtain a copy of the License at -@rem -@rem http://www.apache.org/licenses/LICENSE-2.0 -@rem -@rem Unless required by applicable law or agreed to in writing, -@rem software distributed under the License is distributed on an -@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -@rem KIND, either express or implied. See the License for the -@rem specific language governing permissions and limitations -@rem under the License. - -@echo on - -set "PATH=C:\Miniconda37-x64;C:\Miniconda37-x64\Scripts;C:\Miniconda37-x64\Library\bin;%PATH%" -set BOOST_ROOT=C:\Libraries\boost_1_67_0 -set BOOST_LIBRARYDIR=C:\Libraries\boost_1_67_0\lib64-msvc-14.0 - -@rem -@rem Avoid picking up AppVeyor-installed OpenSSL (linker errors with gRPC) -@rem XXX Perhaps there is a smarter way of solving this issue? -@rem -rd /s /q C:\OpenSSL-Win32 -rd /s /q C:\OpenSSL-Win64 -rd /s /q C:\OpenSSL-v11-Win32 -rd /s /q C:\OpenSSL-v11-Win64 -rd /s /q C:\OpenSSL-v111-Win32 -rd /s /q C:\OpenSSL-v111-Win64 - -@rem -@rem Configure miniconda -@rem -conda config --set auto_update_conda false -conda config --set show_channel_urls True -@rem Help with SSL timeouts to S3 -conda config --set remote_connect_timeout_secs 12 -conda info -a - -@rem -@rem Create conda environment for Build and Toolchain jobs -@rem -@rem Avoid Boost 1.70 because of https://github.com/boostorg/process/issues/85 - -set CONDA_PACKAGES= - -if "%ARROW_BUILD_GANDIVA%" == "ON" ( - @rem Install llvmdev in the toolchain if building gandiva.dll - set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_gandiva_win.yml -) -if "%JOB%" == "Toolchain" ( - @rem Install pre-built "toolchain" packages for faster builds - set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.yml -) -if "%JOB%" NEQ "Build_Debug" ( - @rem Arrow conda environment is only required for the Build and Toolchain jobs - conda create -n arrow -q -y -c conda-forge ^ - --file=ci\conda_env_python.yml ^ - %CONDA_PACKAGES% ^ - "cmake=3.17" ^ - "ninja" ^ - "nomkl" ^ - "pandas" ^ - "fsspec" ^ - "python=%PYTHON%" ^ - || exit /B -) - -@rem -@rem Configure compiler -@rem -if "%GENERATOR%"=="Ninja" set need_vcvarsall=1 -if defined need_vcvarsall ( - @rem Select desired compiler version - if "%APPVEYOR_BUILD_WORKER_IMAGE%" == "Visual Studio 2017" ( - call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 - ) else ( - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - ) -) - -@rem -@rem Use clcache for faster builds -@rem -pip install -q git+https://github.com/frerich/clcache.git -@rem Limit cache size to 500 MB -clcache -M 500000000 -clcache -c -clcache -s -powershell.exe -Command "Start-Process clcache-server" - -@rem -@rem Download Minio somewhere on PATH, for unit tests -@rem -if "%ARROW_S3%" == "ON" ( - appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/minio.exe -FileName C:\Windows\Minio.exe || exit /B -) diff --git a/ci/conda_env_archery.yml b/ci/conda_env_archery.yml deleted file mode 100644 index ace7a42acb02..000000000000 --- a/ci/conda_env_archery.yml +++ /dev/null @@ -1,42 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# cli -click - -# bot, crossbow -github3.py -jinja2 -jira -pygit2 -pygithub -ruamel.yaml -setuptools_scm -toolz - -# benchmark -pandas - -# docker -python-dotenv -#ruamel.yaml - -# release -gitpython -#jinja2 -#jira -semver diff --git a/ci/conda_env_cpp.yml b/ci/conda_env_cpp.yml deleted file mode 100644 index 390eb7dcdd5c..000000000000 --- a/ci/conda_env_cpp.yml +++ /dev/null @@ -1,42 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -aws-sdk-cpp -benchmark=1.5.2 -boost-cpp>=1.68.0 -brotli -bzip2 -c-ares -cmake -gflags -glog -gmock>=1.10.0 -grpc-cpp>=1.27.3 -gtest=1.10.0 -libprotobuf -libutf8proc -lz4-c -make -ninja -pkg-config -python -rapidjson -re2 -snappy -thrift-cpp>=0.11.0 -zlib -zstd diff --git a/ci/conda_env_crossbow.txt b/ci/conda_env_crossbow.txt deleted file mode 100644 index 347294650ca2..000000000000 --- a/ci/conda_env_crossbow.txt +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -click -github3.py -jinja2 -jira -pygit2 -ruamel.yaml -setuptools_scm -toolz diff --git a/ci/conda_env_gandiva.yml b/ci/conda_env_gandiva.yml deleted file mode 100644 index 024b9fe74c19..000000000000 --- a/ci/conda_env_gandiva.yml +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -clang=11 -llvmdev=11 diff --git a/ci/conda_env_gandiva_win.yml b/ci/conda_env_gandiva_win.yml deleted file mode 100644 index 9098b53d1f53..000000000000 --- a/ci/conda_env_gandiva_win.yml +++ /dev/null @@ -1,20 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# llvmdev=9 or later require Visual Studio 2017 -clangdev=8 -llvmdev=8 diff --git a/ci/conda_env_python.yml b/ci/conda_env_python.yml deleted file mode 100644 index 9124c7e84ec1..000000000000 --- a/ci/conda_env_python.yml +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# don't add pandas here, because it is not a mandatory test dependency -boto3 # not a direct dependency of s3fs, but needed for our s3fs fixture -cffi -cython -cloudpickle -fsspec -hypothesis -numpy>=1.16.6 -pytest -pytest-faulthandler -pytest-lazy-fixture -pytz -s3fs>=0.4 -setuptools -setuptools_scm diff --git a/ci/conda_env_r.yml b/ci/conda_env_r.yml deleted file mode 100644 index 03d5f3b625c8..000000000000 --- a/ci/conda_env_r.yml +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -r-assertthat -r-base -r-bit64 -r-dplyr -r-purrr -r-r6 -r-cpp11 -r-rlang -r-tidyselect -r-vctrs -# Test/"Suggests" dependencies -pandoc -r-covr -r-hms -r-lubridate -r-rcmdcheck -r-reticulate -r-rmarkdown -r-testthat -r-tibble diff --git a/ci/conda_env_sphinx.yml b/ci/conda_env_sphinx.yml deleted file mode 100644 index 49388e2b437f..000000000000 --- a/ci/conda_env_sphinx.yml +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Requirements for building the documentation -breathe -doxygen -ipython -# Pinned per ARROW-9693 -sphinx=3.1.2 -pydata-sphinx-theme diff --git a/ci/conda_env_unix.yml b/ci/conda_env_unix.yml deleted file mode 100644 index 1973238adff5..000000000000 --- a/ci/conda_env_unix.yml +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# conda package dependencies specific to Unix-like environments (Linux and macOS) - -autoconf -ccache -orc -pkg-config diff --git a/ci/detect-changes.py b/ci/detect-changes.py deleted file mode 100644 index c32f6e040dd7..000000000000 --- a/ci/detect-changes.py +++ /dev/null @@ -1,365 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import print_function - -import functools -import os -import pprint -import re -import sys -import subprocess - - -perr = functools.partial(print, file=sys.stderr) - - -def dump_env_vars(prefix, pattern=None): - if pattern is not None: - match = lambda s: re.search(pattern, s) - else: - match = lambda s: True - for name in sorted(os.environ): - if name.startswith(prefix) and match(name): - perr("- {0}: {1!r}".format(name, os.environ[name])) - - -def run_cmd(cmdline): - proc = subprocess.Popen(cmdline, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = proc.communicate() - if proc.returncode != 0: - raise RuntimeError("Command {cmdline} failed with code {returncode}, " - "stderr was:\n{stderr}\n" - .format(cmdline=cmdline, returncode=proc.returncode, - stderr=err.decode())) - return out - - -def get_commit_description(commit): - """ - Return the textual description (title + body) of the given git commit. - """ - out = run_cmd(["git", "show", "--no-patch", "--pretty=format:%B", - commit]) - return out.decode('utf-8', 'ignore') - - -def list_affected_files(commit_range): - """ - Return a list of files changed by the given git commit range. - """ - perr("Getting affected files from", repr(commit_range)) - out = run_cmd(["git", "diff", "--name-only", commit_range]) - return list(filter(None, (s.strip() for s in out.decode().splitlines()))) - - -def get_travis_head_commit(): - return os.environ['TRAVIS_COMMIT'] - - -def get_travis_commit_range(): - if os.environ['TRAVIS_EVENT_TYPE'] == 'pull_request': - # TRAVIS_COMMIT_RANGE is too pessimistic for PRs, as it may contain - # unrelated changes. Instead, use the same strategy as on AppVeyor - # below. - run_cmd(["git", "fetch", "-q", "origin", - "+refs/heads/{0}".format(os.environ['TRAVIS_BRANCH'])]) - merge_base = run_cmd(["git", "merge-base", - "HEAD", "FETCH_HEAD"]).decode().strip() - return "{0}..HEAD".format(merge_base) - else: - cr = os.environ['TRAVIS_COMMIT_RANGE'] - # See - # https://github.com/travis-ci/travis-ci/issues/4596#issuecomment-139811122 - return cr.replace('...', '..') - - -def get_travis_commit_description(): - # Prefer this to get_commit_description(get_travis_head_commit()), - # as rebasing or other repository events may make TRAVIS_COMMIT invalid - # at the time we inspect it - return os.environ['TRAVIS_COMMIT_MESSAGE'] - - -def list_travis_affected_files(): - """ - Return a list of files affected in the current Travis build. - """ - commit_range = get_travis_commit_range() - try: - return list_affected_files(commit_range) - except RuntimeError: - # TRAVIS_COMMIT_RANGE can contain invalid revisions when - # building a branch (not a PR) after rebasing: - # https://github.com/travis-ci/travis-ci/issues/2668 - if os.environ['TRAVIS_EVENT_TYPE'] == 'pull_request': - raise - # If it's a rebase, it's probably enough to use the last commit only - commit_range = '{0}^..'.format(get_travis_head_commit()) - return list_affected_files(commit_range) - - -def list_appveyor_affected_files(): - """ - Return a list of files affected in the current AppVeyor build. - This only works for PR builds. - """ - # Re-fetch PR base branch (e.g. origin/master), pointing FETCH_HEAD to it - run_cmd(["git", "fetch", "-q", "origin", - "+refs/heads/{0}".format(os.environ['APPVEYOR_REPO_BRANCH'])]) - # Compute base changeset between FETCH_HEAD (PR base) and HEAD (PR head) - merge_base = run_cmd(["git", "merge-base", - "HEAD", "FETCH_HEAD"]).decode().strip() - # Compute changes files between base changeset and HEAD - return list_affected_files("{0}..HEAD".format(merge_base)) - - -def list_github_actions_affected_files(): - """ - Return a list of files affected in the current GitHub Actions build. - """ - # GitHub Actions checkout `refs/remotes/pull/$PR/merge` where `HEAD` points - # to the merge commit while `HEAD^` points to the commit before. Hence, - # `..HEAD^` points to all commit between master and the PR. - return list_affected_files("HEAD^..") - - -LANGUAGE_TOPICS = ['c_glib', 'cpp', 'docs', 'go', 'java', 'js', 'python', - 'r', 'ruby', 'rust', 'csharp'] - -ALL_TOPICS = LANGUAGE_TOPICS + ['integration', 'dev'] - - -AFFECTED_DEPENDENCIES = { - 'java': ['integration', 'python'], - 'js': ['integration'], - 'ci': ALL_TOPICS, - 'cpp': ['python', 'c_glib', 'r', 'ruby', 'integration'], - 'format': LANGUAGE_TOPICS, - 'go': ['integration'], - '.travis.yml': ALL_TOPICS, - 'appveyor.yml': ALL_TOPICS, - # In theory, it should ignore CONTRIBUTING.md and ISSUE_TEMPLATE.md, but in - # practice it's going to be CI - '.github': ALL_TOPICS, - 'c_glib': ['ruby'] -} - -COMPONENTS = {'cpp', 'java', 'c_glib', 'r', 'ruby', 'integration', 'js', - 'rust', 'csharp', 'go', 'docs', 'python', 'dev'} - - -def get_affected_topics(affected_files): - """ - Return a dict of topics affected by the given files. - Each dict value is True if affected, False otherwise. - """ - affected = dict.fromkeys(ALL_TOPICS, False) - - for path in affected_files: - parts = [] - head = path - while head: - head, tail = os.path.split(head) - parts.append(tail) - parts.reverse() - assert parts - p = parts[0] - fn = parts[-1] - if fn.startswith('README'): - continue - - if p in COMPONENTS: - affected[p] = True - - _path_already_affected = {} - - def _affect_dependencies(component): - if component in _path_already_affected: - # For circular dependencies, terminate - return - for topic in AFFECTED_DEPENDENCIES.get(component, ()): - affected[topic] = True - _affect_dependencies(topic) - _path_already_affected[topic] = True - - _affect_dependencies(p) - - return affected - - -def make_env_for_topics(affected): - return {'ARROW_CI_{0}_AFFECTED'.format(k.upper()): '1' if v else '0' - for k, v in affected.items()} - - -def get_unix_shell_eval(env): - """ - Return a shell-evalable string to setup some environment variables. - """ - return "; ".join(("export {0}='{1}'".format(k, v) - for k, v in env.items())) - - -def get_windows_shell_eval(env): - """ - Return a shell-evalable string to setup some environment variables. - """ - return "\n".join(('set "{0}={1}"'.format(k, v) - for k, v in env.items())) - - -def run_from_travis(): - perr("Environment variables (excerpt):") - dump_env_vars('TRAVIS_', '(BRANCH|COMMIT|PULL)') - if (os.environ['TRAVIS_REPO_SLUG'] == 'apache/arrow' and - os.environ['TRAVIS_BRANCH'] == 'master' and - os.environ['TRAVIS_EVENT_TYPE'] != 'pull_request'): - # Never skip anything on master builds in the official repository - affected = dict.fromkeys(ALL_TOPICS, True) - else: - desc = get_travis_commit_description() - if '[skip travis]' in desc: - # Skip everything - affected = dict.fromkeys(ALL_TOPICS, False) - elif '[force ci]' in desc or '[force travis]' in desc: - # Test everything - affected = dict.fromkeys(ALL_TOPICS, True) - else: - # Test affected topics - affected_files = list_travis_affected_files() - perr("Affected files:", affected_files) - affected = get_affected_topics(affected_files) - assert set(affected) <= set(ALL_TOPICS), affected - - perr("Affected topics:") - perr(pprint.pformat(affected)) - return get_unix_shell_eval(make_env_for_topics(affected)) - - -def run_from_appveyor(): - perr("Environment variables (excerpt):") - dump_env_vars('APPVEYOR_', '(PULL|REPO)') - if not os.environ.get('APPVEYOR_PULL_REQUEST_HEAD_COMMIT'): - # Not a PR build, test everything - affected = dict.fromkeys(ALL_TOPICS, True) - else: - affected_files = list_appveyor_affected_files() - perr("Affected files:", affected_files) - affected = get_affected_topics(affected_files) - assert set(affected) <= set(ALL_TOPICS), affected - - perr("Affected topics:") - perr(pprint.pformat(affected)) - return get_windows_shell_eval(make_env_for_topics(affected)) - - -def run_from_github(): - perr("Environment variables (excerpt):") - dump_env_vars('GITHUB_', '(REPOSITORY|ACTOR|SHA|REF|HEAD_REF|BASE_REF|EVENT_NAME)') - if os.environ['GITHUB_EVENT_NAME'] != 'pull_request': - # Not a PR build, test everything - affected = dict.fromkeys(ALL_TOPICS, True) - else: - affected_files = list_github_actions_affected_files() - perr("Affected files:", affected_files) - affected = get_affected_topics(affected_files) - assert set(affected) <= set(ALL_TOPICS), affected - - perr("Affected topics:") - perr(pprint.pformat(affected)) - return get_unix_shell_eval(make_env_for_topics(affected)) - - -def test_get_affected_topics(): - affected_topics = get_affected_topics(['cpp/CMakeLists.txt']) - assert affected_topics == { - 'c_glib': True, - 'cpp': True, - 'docs': False, - 'go': False, - 'java': False, - 'js': False, - 'python': True, - 'r': True, - 'ruby': True, - 'rust': False, - 'csharp': False, - 'integration': True, - 'dev': False - } - - affected_topics = get_affected_topics(['format/Schema.fbs']) - assert affected_topics == { - 'c_glib': True, - 'cpp': True, - 'docs': True, - 'go': True, - 'java': True, - 'js': True, - 'python': True, - 'r': True, - 'ruby': True, - 'rust': True, - 'csharp': True, - 'integration': True, - 'dev': False - } - - affected_topics = get_affected_topics(['.github/workflows']) - assert affected_topics == { - 'c_glib': True, - 'cpp': True, - 'docs': True, - 'go': True, - 'java': True, - 'js': True, - 'python': True, - 'r': True, - 'ruby': True, - 'rust': True, - 'csharp': True, - 'integration': True, - 'dev': True, - } - - -if __name__ == "__main__": - # This script should have its output evaluated by a shell, - # e.g. "eval `python ci/detect-changes.py`" - if os.environ.get('TRAVIS'): - try: - print(run_from_travis()) - except Exception: - # Make sure the enclosing eval will return an error - print("exit 1") - raise - elif os.environ.get('APPVEYOR'): - try: - print(run_from_appveyor()) - except Exception: - print("exit 1") - raise - elif os.environ.get('GITHUB_WORKFLOW'): - try: - print(run_from_github()) - except Exception: - print("exit 1") - raise - else: - sys.exit("Script must be run under Travis-CI, AppVeyor or GitHub Actions") diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile deleted file mode 100644 index 1a5b87ef7296..000000000000 --- a/ci/docker/conda-cpp.dockerfile +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG repo -ARG arch -FROM ${repo}:${arch}-conda - -# install the required conda packages into the test environment -COPY ci/conda_env_cpp.yml \ - ci/conda_env_gandiva.yml \ - /arrow/ci/ -RUN conda install \ - --file arrow/ci/conda_env_cpp.yml \ - --file arrow/ci/conda_env_gandiva.yml \ - compilers \ - doxygen \ - gdb \ - valgrind && \ - conda clean --all - -ENV ARROW_BUILD_TESTS=ON \ - ARROW_DATASET=ON \ - ARROW_DEPENDENCY_SOURCE=CONDA \ - ARROW_FLIGHT=ON \ - ARROW_GANDIVA=ON \ - ARROW_HOME=$CONDA_PREFIX \ - ARROW_ORC=ON \ - ARROW_PARQUET=ON \ - ARROW_PLASMA=ON \ - ARROW_S3=ON \ - ARROW_USE_CCACHE=ON \ - ARROW_WITH_BROTLI=ON \ - ARROW_WITH_BZ2=ON \ - ARROW_WITH_LZ4=ON \ - ARROW_WITH_SNAPPY=ON \ - ARROW_WITH_ZLIB=ON \ - ARROW_WITH_ZSTD=ON \ - PARQUET_BUILD_EXAMPLES=ON \ - PARQUET_BUILD_EXECUTABLES=ON \ - PARQUET_HOME=$CONDA_PREFIX diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile deleted file mode 100644 index 1f2c9ac5da21..000000000000 --- a/ci/docker/conda-integration.dockerfile +++ /dev/null @@ -1,69 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG repo -ARG arch=amd64 -FROM ${repo}:${arch}-conda-cpp - -ARG arch=amd64 -ARG maven=3.5 -ARG node=14 -ARG jdk=8 -ARG go=1.15 - -# Install Archery and integration dependencies -COPY ci/conda_env_archery.yml /arrow/ci/ -RUN conda install -q \ - --file arrow/ci/conda_env_cpp.yml \ - --file arrow/ci/conda_env_archery.yml \ - numpy \ - compilers \ - maven=${maven} \ - nodejs=${node} \ - yarn \ - openjdk=${jdk} && \ - conda clean --all --force-pkgs-dirs - -# Install Rust with only the needed components -# (rustfmt is needed for tonic-build to compile the protobuf definitions) -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --profile=minimal -y && \ - $HOME/.cargo/bin/rustup component add rustfmt - -ENV GOROOT=/opt/go \ - GOBIN=/opt/go/bin \ - GOPATH=/go \ - PATH=/opt/go/bin:$PATH -RUN wget -nv -O - https://dl.google.com/go/go${go}.linux-${arch}.tar.gz | tar -xzf - -C /opt - -ENV ARROW_BUILD_INTEGRATION=ON \ - ARROW_BUILD_STATIC=OFF \ - ARROW_BUILD_TESTS=OFF \ - ARROW_COMPUTE=OFF \ - ARROW_CSV=OFF \ - ARROW_DATASET=OFF \ - ARROW_FILESYSTEM=OFF \ - ARROW_FLIGHT=ON \ - ARROW_GANDIVA=OFF \ - ARROW_HDFS=OFF \ - ARROW_JEMALLOC=OFF \ - ARROW_JSON=OFF \ - ARROW_ORC=OFF \ - ARROW_PARQUET=OFF \ - ARROW_PLASMA=OFF \ - ARROW_S3=OFF \ - ARROW_USE_GLOG=OFF \ - CMAKE_UNITY_BUILD=ON diff --git a/ci/docker/conda-python-dask.dockerfile b/ci/docker/conda-python-dask.dockerfile deleted file mode 100644 index cd59a5538a9a..000000000000 --- a/ci/docker/conda-python-dask.dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG repo -ARG arch=amd64 -ARG python=3.6 -FROM ${repo}:${arch}-conda-python-${python} - -ARG dask=latest -COPY ci/scripts/install_dask.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_dask.sh ${dask} \ No newline at end of file diff --git a/ci/docker/conda-python-hdfs.dockerfile b/ci/docker/conda-python-hdfs.dockerfile deleted file mode 100644 index f6ffc71ce625..000000000000 --- a/ci/docker/conda-python-hdfs.dockerfile +++ /dev/null @@ -1,52 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG repo -ARG arch=amd64 -ARG python=3.6 -FROM ${repo}:${arch}-conda-python-${python} - -ARG jdk=8 -ARG maven=3.5 -RUN conda install -q \ - maven=${maven} \ - openjdk=${jdk} \ - pandas && \ - conda clean --all - -# installing libhdfs (JNI) -ARG hdfs=3.2.1 -ENV HADOOP_HOME=/opt/hadoop-${hdfs} \ - HADOOP_OPTS=-Djava.library.path=/opt/hadoop-${hdfs}/lib/native \ - PATH=$PATH:/opt/hadoop-${hdfs}/bin:/opt/hadoop-${hdfs}/sbin -COPY ci/scripts/util_download_apache.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/util_download_apache.sh \ - "hadoop/common/hadoop-${hdfs}/hadoop-${hdfs}.tar.gz" /opt - -COPY ci/etc/hdfs-site.xml $HADOOP_HOME/etc/hadoop/ - -# build cpp with tests -ENV CC=gcc \ - CXX=g++ \ - ARROW_FLIGHT=OFF \ - ARROW_GANDIVA=OFF \ - ARROW_PLASMA=OFF \ - ARROW_PARQUET=ON \ - ARROW_ORC=OFF \ - ARROW_HDFS=ON \ - ARROW_PYTHON=ON \ - ARROW_BUILD_TESTS=ON diff --git a/ci/docker/conda-python-jpype.dockerfile b/ci/docker/conda-python-jpype.dockerfile deleted file mode 100644 index f77ef9bf66b4..000000000000 --- a/ci/docker/conda-python-jpype.dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG repo -ARG arch=amd64 -ARG python=3.6 -FROM ${repo}:${arch}-conda-python-${python} - -ARG jdk=11 -ARG maven=3.6 -RUN conda install -q \ - maven=${maven} \ - openjdk=${jdk} \ - jpype1 && \ - conda clean --all diff --git a/ci/docker/conda-python-kartothek.dockerfile b/ci/docker/conda-python-kartothek.dockerfile deleted file mode 100644 index d523161822c0..000000000000 --- a/ci/docker/conda-python-kartothek.dockerfile +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG repo -ARG arch=amd64 -ARG python=3.6 -FROM ${repo}:${arch}-conda-python-${python} - -# install kartothek dependencies from conda-forge -RUN conda install -c conda-forge -q \ - attrs \ - click \ - cloudpickle \ - dask \ - decorator \ - freezegun \ - msgpack-python \ - prompt-toolkit \ - pytest-mock \ - pytest-xdist \ - pyyaml \ - simplejson \ - simplekv \ - storefact \ - toolz \ - urlquote \ - zstandard && \ - conda clean --all - -ARG kartothek=latest -COPY ci/scripts/install_kartothek.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_kartothek.sh ${kartothek} /kartothek diff --git a/ci/docker/conda-python-pandas.dockerfile b/ci/docker/conda-python-pandas.dockerfile deleted file mode 100644 index 303cc80e48a0..000000000000 --- a/ci/docker/conda-python-pandas.dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG repo -ARG arch=amd64 -ARG python=3.6 -FROM ${repo}:${arch}-conda-python-${python} - -ARG pandas=latest -ARG numpy=latest -COPY ci/scripts/install_pandas.sh /arrow/ci/scripts/ -RUN conda uninstall -q -y numpy && \ - /arrow/ci/scripts/install_pandas.sh ${pandas} ${numpy} diff --git a/ci/docker/conda-python-spark.dockerfile b/ci/docker/conda-python-spark.dockerfile deleted file mode 100644 index a2af2ac135c9..000000000000 --- a/ci/docker/conda-python-spark.dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG repo -ARG arch=amd64 -ARG python=3.6 -FROM ${repo}:${arch}-conda-python-${python} - -ARG jdk=8 -ARG maven=3.5 - -RUN conda install -q \ - openjdk=${jdk} \ - maven=${maven} \ - pandas && \ - conda clean --all - -# installing specific version of spark -ARG spark=master -COPY ci/scripts/install_spark.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark - -# build cpp with tests -ENV CC=gcc \ - CXX=g++ \ - ARROW_PYTHON=ON \ - ARROW_HDFS=ON \ - ARROW_BUILD_TESTS=OFF \ - SPARK_VERSION=${spark} diff --git a/ci/docker/conda-python-turbodbc.dockerfile b/ci/docker/conda-python-turbodbc.dockerfile deleted file mode 100644 index ff7fdf6e1d0b..000000000000 --- a/ci/docker/conda-python-turbodbc.dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG repo -ARG arch=amd64 -ARG python=3.6 -FROM ${repo}:${arch}-conda-python-${python} - -RUN export DEBIAN_FRONTEND=noninteractive && \ - apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - odbc-postgresql \ - postgresql \ - sudo && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# install turbodbc dependencies from conda-forge -RUN conda install -c conda-forge -q\ - pybind11 \ - pytest-cov \ - mock \ - unixodbc && \ - conda clean --all - -RUN service postgresql start && \ - sudo -u postgres psql -U postgres -c \ - "CREATE DATABASE test_db;" && \ - sudo -u postgres psql -U postgres -c \ - "ALTER USER postgres WITH PASSWORD 'password';" - -ARG turbodbc=latest -COPY ci/scripts/install_turbodbc.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_turbodbc.sh ${turbodbc} /turbodbc - -ENV TURBODBC_TEST_CONFIGURATION_FILES "query_fixtures_postgresql.json" diff --git a/ci/docker/conda-python.dockerfile b/ci/docker/conda-python.dockerfile deleted file mode 100644 index a7e76974825d..000000000000 --- a/ci/docker/conda-python.dockerfile +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG repo -ARG arch -FROM ${repo}:${arch}-conda-cpp - -# install python specific packages -ARG python=3.6 -COPY ci/conda_env_python.yml /arrow/ci/ -RUN conda install -q \ - --file arrow/ci/conda_env_python.yml \ - $([ "$python" == "3.6" -o "$python" == "3.7" ] && echo "pickle5") \ - python=${python} \ - nomkl && \ - conda clean --all - -ENV ARROW_PYTHON=ON \ - ARROW_BUILD_STATIC=OFF \ - ARROW_BUILD_TESTS=OFF \ - ARROW_BUILD_UTILITIES=OFF \ - ARROW_TENSORFLOW=ON \ - ARROW_USE_GLOG=OFF diff --git a/ci/docker/conda.dockerfile b/ci/docker/conda.dockerfile deleted file mode 100644 index 94de009904a4..000000000000 --- a/ci/docker/conda.dockerfile +++ /dev/null @@ -1,53 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG arch=amd64 -FROM ${arch}/ubuntu:18.04 - -# arch is unset after the FROM statement, so need to define it again -ARG arch=amd64 -ARG prefix=/opt/conda - -# install build essentials -RUN export DEBIAN_FRONTEND=noninteractive && \ - apt-get update -y -q && \ - apt-get install -y -q wget tzdata libc6-dbg \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -ENV PATH=${prefix}/bin:$PATH -# install conda and minio -COPY ci/scripts/install_conda.sh \ - ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_conda.sh ${arch} linux latest ${prefix} -RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest ${prefix} - -# create a conda environment -ADD ci/conda_env_unix.yml /arrow/ci/ -RUN conda create -n arrow --file arrow/ci/conda_env_unix.yml git && \ - conda clean --all - -# activate the created environment by default -RUN echo "conda activate arrow" >> ~/.profile -ENV CONDA_PREFIX=${prefix}/envs/arrow - -# use login shell to activate arrow environment un the RUN commands -SHELL [ "/bin/bash", "-c", "-l" ] - -# use login shell when running the container -ENTRYPOINT [ "/bin/bash", "-c", "-l" ] diff --git a/ci/docker/debian-10-cpp.dockerfile b/ci/docker/debian-10-cpp.dockerfile deleted file mode 100644 index 83f8ce529cb8..000000000000 --- a/ci/docker/debian-10-cpp.dockerfile +++ /dev/null @@ -1,104 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG arch=amd64 -FROM ${arch}/debian:10 -ARG arch - -ENV DEBIAN_FRONTEND noninteractive - -RUN \ - echo "deb http://deb.debian.org/debian buster-backports main" > \ - /etc/apt/sources.list.d/backports.list - -ARG llvm -RUN apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - gnupg \ - wget && \ - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ - echo "deb https://apt.llvm.org/buster/ llvm-toolchain-buster-${llvm} main" > \ - /etc/apt/sources.list.d/llvm.list && \ - apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - autoconf \ - ccache \ - clang-${llvm} \ - cmake \ - g++ \ - gcc \ - gdb \ - git \ - libbenchmark-dev \ - libboost-all-dev \ - libbrotli-dev \ - libbz2-dev \ - libc-ares-dev \ - libcurl4-openssl-dev \ - libgflags-dev \ - libgmock-dev \ - libgoogle-glog-dev \ - liblz4-dev \ - libre2-dev \ - libsnappy-dev \ - libssl-dev \ - libthrift-dev \ - libutf8proc-dev \ - libzstd-dev \ - llvm-${llvm}-dev \ - make \ - ninja-build \ - pkg-config \ - protobuf-compiler \ - rapidjson-dev \ - tzdata \ - zlib1g-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -COPY ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local - -ENV ARROW_BUILD_TESTS=ON \ - ARROW_DEPENDENCY_SOURCE=SYSTEM \ - ARROW_DATASET=ON \ - ARROW_FLIGHT=ON \ - ARROW_GANDIVA=ON \ - ARROW_HOME=/usr/local \ - ARROW_ORC=ON \ - ARROW_PARQUET=ON \ - ARROW_PLASMA=ON \ - ARROW_S3=ON \ - ARROW_USE_CCACHE=ON \ - ARROW_WITH_BROTLI=ON \ - ARROW_WITH_BZ2=ON \ - ARROW_WITH_LZ4=ON \ - ARROW_WITH_SNAPPY=ON \ - ARROW_WITH_ZLIB=ON \ - ARROW_WITH_ZSTD=ON \ - AWSSDK_SOURCE=BUNDLED \ - cares_SOURCE=BUNDLED \ - CC=gcc \ - CXX=g++ \ - gRPC_SOURCE=BUNDLED \ - GTest_SOURCE=BUNDLED \ - ORC_SOURCE=BUNDLED \ - PATH=/usr/lib/ccache/:$PATH \ - Protobuf_SOURCE=BUNDLED diff --git a/ci/docker/debian-10-go.dockerfile b/ci/docker/debian-10-go.dockerfile deleted file mode 100644 index 199f09e24fcb..000000000000 --- a/ci/docker/debian-10-go.dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG arch=amd64 -ARG go=1.15 -FROM ${arch}/golang:${go} - -# TODO(kszucs): -# 1. add the files required to install the dependencies to .dockerignore -# 2. copy these files to their appropriate path -# 3. download and compile the dependencies diff --git a/ci/docker/debian-10-js.dockerfile b/ci/docker/debian-10-js.dockerfile deleted file mode 100644 index 5bb31f2e32ef..000000000000 --- a/ci/docker/debian-10-js.dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG arch=amd64 -ARG node=14 -FROM ${arch}/node:${node} - -ENV NODE_NO_WARNINGS=1 - -# TODO(kszucs): -# 1. add the files required to install the dependencies to .dockerignore -# 2. copy these files to their appropriate path -# 3. download and compile the dependencies diff --git a/ci/docker/debian-9-java.dockerfile b/ci/docker/debian-9-java.dockerfile deleted file mode 100644 index 2cc36e3eafba..000000000000 --- a/ci/docker/debian-9-java.dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG arch=amd64 -ARG jdk=8 -ARG maven=3.5.4 -FROM ${arch}/maven:${maven}-jdk-${jdk} - -ENV ARROW_JAVA_SHADE_FLATBUFS=ON - -# TODO(kszucs): -# 1. add the files required to install the dependencies to .dockerignore -# 2. copy these files to their appropriate path -# 3. download and compile the dependencies diff --git a/ci/docker/fedora-33-cpp.dockerfile b/ci/docker/fedora-33-cpp.dockerfile deleted file mode 100644 index 9dde6999510a..000000000000 --- a/ci/docker/fedora-33-cpp.dockerfile +++ /dev/null @@ -1,92 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG arch -FROM ${arch}/fedora:33 -ARG arch - -# install dependencies -RUN dnf update -y && \ - dnf install -y \ - autoconf \ - boost-devel \ - brotli-devel \ - bzip2-devel \ - c-ares-devel \ - ccache \ - clang-devel \ - cmake \ - curl-devel \ - flatbuffers-devel \ - gcc \ - gcc-c++ \ - gflags-devel \ - git \ - glog-devel \ - gmock-devel \ - google-benchmark-devel \ - grpc-devel \ - grpc-plugins \ - gtest-devel \ - java-latest-openjdk-devel \ - java-latest-openjdk-headless \ - libzstd-devel \ - llvm-devel \ - llvm-static \ - lz4-devel \ - make \ - ninja-build \ - openssl-devel \ - protobuf-devel \ - python \ - rapidjson-devel \ - re2-devel \ - snappy-devel \ - thrift-devel \ - utf8proc-devel \ - wget \ - which \ - zlib-devel - -COPY ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local - -ENV ARROW_BUILD_TESTS=ON \ - ARROW_DEPENDENCY_SOURCE=SYSTEM \ - ARROW_DATASET=ON \ - ARROW_FLIGHT=ON \ - ARROW_GANDIVA_JAVA=ON \ - ARROW_GANDIVA=ON \ - ARROW_HOME=/usr/local \ - ARROW_ORC=ON \ - ARROW_PARQUET=ON \ - ARROW_S3=ON \ - ARROW_USE_CCACHE=ON \ - ARROW_WITH_BROTLI=ON \ - ARROW_WITH_BZ2=ON \ - ARROW_WITH_LZ4=ON \ - ARROW_WITH_SNAPPY=ON \ - ARROW_WITH_ZLIB=ON \ - ARROW_WITH_ZSTD=ON \ - AWSSDK_SOURCE=BUNDLED \ - CC=gcc \ - CXX=g++ \ - ORC_SOURCE=BUNDLED \ - PARQUET_BUILD_EXECUTABLES=ON \ - PARQUET_BUILD_EXAMPLES=ON \ - PATH=/usr/lib/ccache/:$PATH diff --git a/ci/docker/linux-apt-c-glib.dockerfile b/ci/docker/linux-apt-c-glib.dockerfile deleted file mode 100644 index 12c6e23a00d8..000000000000 --- a/ci/docker/linux-apt-c-glib.dockerfile +++ /dev/null @@ -1,65 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG base -FROM ${base} - -RUN apt-get update -y -q && \ - apt-get install -y -q \ - python3 \ - python3-pip \ - gtk-doc-tools \ - libgirepository1.0-dev \ - libglib2.0-doc \ - lsb-release \ - luarocks \ - pkg-config \ - ruby-dev && \ - if [ "$(lsb_release --codename --short)" = "xenial" ]; then \ - apt-get install -y -q --no-install-recommends -t xenial-backports \ - ninja-build; \ - fi && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -RUN luarocks install lgi - -# pip on Ubuntu 20.04 may be buggy: -# -# Collecting meson -# Downloading meson-0.53.2.tar.gz (1.6 MB) -# Installing build dependencies: started -# Installing build dependencies: finished with status 'done' -# Getting requirements to build wheel: started -# Getting requirements to build wheel: finished with status 'error' -# ERROR: Command errored out with exit status 1: -# command: /usr/bin/python3 /usr/share/python-wheels/pep517-0.7.0-py2.py3-none-any.whl/pep517/_in_process.py get_requires_for_build_wheel /tmp/tmpsk4jveay -# cwd: /tmp/pip-install-jn79a_kh/meson -# Complete output (1 lines): -# /usr/bin/python3: can't find '__main__' module in '/usr/share/python-wheels/pep517-0.7.0-py2.py3-none-any.whl/pep517/_in_process.py' -# ---------------------------------------- -# ERROR: Command errored out with exit status 1: /usr/bin/python3 /usr/share/python-wheels/pep517-0.7.0-py2.py3-none-any.whl/pep517/_in_process.py get_requires_for_build_wheel /tmp/tmpsk4jveay Check the logs for full command output. -RUN (python3 -m pip install meson || \ - python3 -m pip install --no-use-pep517 meson) && \ - gem install --no-document bundler - -COPY c_glib/Gemfile /arrow/c_glib/ -RUN bundle install --gemfile /arrow/c_glib/Gemfile - -ENV ARROW_BUILD_TESTS=OFF \ - ARROW_BUILD_UTILITIES=OFF \ - ARROW_INSTALL_NAME_RPATH=OFF diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile deleted file mode 100644 index 20cb889f28d8..000000000000 --- a/ci/docker/linux-apt-docs.dockerfile +++ /dev/null @@ -1,107 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG base -FROM ${base} - -ARG r=4.0 -ARG jdk=8 - -# See R install instructions at https://cloud.r-project.org/bin/linux/ubuntu/ -RUN apt-get update -y && \ - apt-get install -y \ - dirmngr \ - apt-transport-https \ - software-properties-common && \ - apt-key adv \ - --keyserver keyserver.ubuntu.com \ - --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \ - add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran40/' && \ - apt-get install -y --no-install-recommends \ - autoconf-archive \ - automake \ - curl \ - doxygen \ - gobject-introspection \ - gtk-doc-tools \ - libcurl4-openssl-dev \ - libfontconfig1-dev \ - libfribidi-dev \ - libgirepository1.0-dev \ - libglib2.0-doc \ - libharfbuzz-dev \ - libtiff-dev \ - libtool \ - libxml2-dev \ - ninja-build \ - nvidia-cuda-toolkit \ - openjdk-${jdk}-jdk-headless \ - pandoc \ - r-base=${r}* \ - rsync \ - ruby-dev \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -ENV JAVA_HOME=/usr/lib/jvm/java-${jdk}-openjdk-amd64 - -ARG maven=3.5.4 -COPY ci/scripts/util_download_apache.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/util_download_apache.sh \ - "maven/maven-3/${maven}/binaries/apache-maven-${maven}-bin.tar.gz" /opt -ENV PATH=/opt/apache-maven-${maven}/bin:$PATH -RUN mvn -version - -ARG node=14 -RUN wget -q -O - https://deb.nodesource.com/setup_${node}.x | bash - && \ - apt-get install -y nodejs && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* && \ - npm install -g yarn - -RUN pip install \ - meson \ - breathe \ - ipython \ - sphinx \ - pydata-sphinx-theme - -COPY c_glib/Gemfile /arrow/c_glib/ -RUN gem install --no-document bundler && \ - bundle install --gemfile /arrow/c_glib/Gemfile - -# Ensure parallel R package installation, set CRAN repo mirror, -# and use pre-built binaries where possible -COPY ci/etc/rprofile /arrow/ci/etc/ -RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site -# Also ensure parallel compilation of C/C++ code -RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Makeconf - -COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ -COPY r/DESCRIPTION /arrow/r/ -RUN /arrow/ci/scripts/r_deps.sh /arrow && \ - R -e "install.packages('pkgdown')" - -ENV ARROW_FLIGHT=ON \ - ARROW_PYTHON=ON \ - ARROW_S3=ON \ - ARROW_BUILD_STATIC=OFF \ - ARROW_BUILD_TESTS=OFF \ - ARROW_BUILD_UTILITIES=OFF \ - ARROW_USE_GLOG=OFF \ - CMAKE_UNITY_BUILD=ON \ diff --git a/ci/docker/linux-apt-jni.dockerfile b/ci/docker/linux-apt-jni.dockerfile deleted file mode 100644 index 1abbf05af3bc..000000000000 --- a/ci/docker/linux-apt-jni.dockerfile +++ /dev/null @@ -1,86 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG base -FROM ${base} - -# pipefail is enabled for proper error detection in the `wget | apt-key add` -# step -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -ENV DEBIAN_FRONTEND noninteractive - -ARG llvm -RUN apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - apt-transport-https \ - lsb-release \ - software-properties-common \ - wget && \ - code_name=$(lsb_release --codename --short) && \ - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ - apt-add-repository -y \ - "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" && \ - apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - ca-certificates \ - ccache \ - clang-${llvm} \ - cmake \ - git \ - g++ \ - gcc \ - libboost-all-dev \ - libgflags-dev \ - libgoogle-glog-dev \ - libgtest-dev \ - liblz4-dev \ - libre2-dev \ - libsnappy-dev \ - libssl-dev \ - llvm-${llvm}-dev \ - make \ - ninja-build \ - pkg-config \ - protobuf-compiler \ - rapidjson-dev \ - tzdata \ - zlib1g-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -ARG cmake=3.11.4 -RUN wget -nv -O - https://github.com/Kitware/CMake/releases/download/v${cmake}/cmake-${cmake}-Linux-x86_64.tar.gz | tar -xzf - -C /opt -ENV PATH=/opt/cmake-${cmake}-Linux-x86_64/bin:$PATH - -ENV ARROW_BUILD_TESTS=OFF \ - ARROW_DATASET=ON \ - ARROW_FLIGHT=OFF \ - ARROW_GANDIVA_JAVA=ON \ - ARROW_GANDIVA=ON \ - ARROW_HOME=/usr/local \ - ARROW_JNI=ON \ - ARROW_ORC=ON \ - ARROW_PARQUET=ON \ - ARROW_PLASMA_JAVA_CLIENT=ON \ - ARROW_PLASMA=ON \ - ARROW_USE_CCACHE=ON \ - CC=gcc \ - CXX=g++ \ - ORC_SOURCE=BUNDLED \ - PATH=/usr/lib/ccache/:$PATH \ - Protobuf_SOURCE=BUNDLED diff --git a/ci/docker/linux-apt-lint.dockerfile b/ci/docker/linux-apt-lint.dockerfile deleted file mode 100644 index 66538919c499..000000000000 --- a/ci/docker/linux-apt-lint.dockerfile +++ /dev/null @@ -1,69 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG base -FROM hadolint/hadolint:v1.17.2 AS hadolint -FROM ${base} - -ARG clang_tools -RUN apt-get update && \ - apt-get install -y -q \ - clang-${clang_tools} \ - clang-format-${clang_tools} \ - clang-tidy-${clang_tools} \ - clang-tools-${clang_tools} \ - cmake \ - curl \ - libclang-${clang_tools}-dev \ - llvm-${clang_tools}-dev \ - openjdk-11-jdk-headless \ - python3 \ - python3-dev \ - python3-pip \ - ruby \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Docker linter -COPY --from=hadolint /bin/hadolint /usr/bin/hadolint - -# IWYU -COPY ci/scripts/install_iwyu.sh /arrow/ci/scripts/ -RUN arrow/ci/scripts/install_iwyu.sh /tmp/iwyu /usr/local ${clang_tools} - -# Rust linter -ARG rust=nightly-2021-03-24 -RUN curl https://sh.rustup.rs -sSf | \ - sh -s -- --default-toolchain stable -y -ENV PATH /root/.cargo/bin:$PATH -RUN rustup install ${rust} && \ - rustup default ${rust} && \ - rustup component add rustfmt - -# Use python3 by default in scripts -RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ - ln -s /usr/bin/pip3 /usr/local/bin/pip - -COPY dev/archery/requirements.txt \ - dev/archery/requirements-lint.txt \ - /arrow/dev/archery/ -RUN pip install \ - -r arrow/dev/archery/requirements.txt \ - -r arrow/dev/archery/requirements-lint.txt - -ENV LC_ALL=C.UTF-8 \ - LANG=C.UTF-8 diff --git a/ci/docker/linux-apt-python-3.dockerfile b/ci/docker/linux-apt-python-3.dockerfile deleted file mode 100644 index 753ba0d3aea4..000000000000 --- a/ci/docker/linux-apt-python-3.dockerfile +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG base -FROM ${base} - -RUN apt-get update -y -q && \ - apt-get install -y -q \ - python3 \ - python3-pip \ - python3-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ - ln -s /usr/bin/pip3 /usr/local/bin/pip - -RUN pip install -U pip setuptools - -COPY python/requirements-build.txt \ - python/requirements-test.txt \ - /arrow/python/ - -RUN pip install \ - -r arrow/python/requirements-build.txt \ - -r arrow/python/requirements-test.txt - -ENV ARROW_PYTHON=ON \ - ARROW_BUILD_STATIC=OFF \ - ARROW_BUILD_TESTS=OFF \ - ARROW_BUILD_UTILITIES=OFF \ - ARROW_USE_GLOG=OFF \ diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile deleted file mode 100644 index f47044e334b9..000000000000 --- a/ci/docker/linux-apt-r.dockerfile +++ /dev/null @@ -1,100 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG base -FROM ${base} -ARG arch - -# Build R -# [1] https://www.digitalocean.com/community/tutorials/how-to-install-r-on-ubuntu-18-04 -# [2] https://linuxize.com/post/how-to-install-r-on-ubuntu-18-04/#installing-r-packages-from-cran -ARG r=3.6 -RUN apt-get update -y && \ - apt-get install -y \ - dirmngr \ - apt-transport-https \ - software-properties-common && \ - apt-key adv \ - --keyserver keyserver.ubuntu.com \ - --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \ - # NOTE: R 3.5 and 3.6 are available in the repos with -cran35 suffix - # for trusty, xenial, bionic, and eoan (as of May 2020) - # -cran40 has 4.0 versions for bionic and focal - # R 3.2, 3.3, 3.4 are available without the suffix but only for trusty and xenial - # TODO: make sure OS version and R version are valid together and conditionally set repo suffix - # This is a hack to turn 3.6 into 35 and 4.0 into 40: - add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5)'/' && \ - apt-get install -y \ - r-base=${r}* \ - # system libs needed by core R packages - libxml2-dev \ - libgit2-dev \ - libssl-dev \ - # install clang to mirror what was done on Travis - clang \ - clang-format \ - clang-tidy \ - # R CMD CHECK --as-cran needs pdflatex to build the package manual - texlive-latex-base \ - # Need locales so we can set UTF-8 - locales \ - # Need Python to check py-to-r bridge - python3 \ - python3-pip \ - python3-dev && \ - locale-gen en_US.UTF-8 && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# Ensure parallel R package installation, set CRAN repo mirror, -# and use pre-built binaries where possible -COPY ci/etc/rprofile /arrow/ci/etc/ -RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site -# Also ensure parallel compilation of C/C++ code -RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Makeconf - -COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ -COPY r/DESCRIPTION /arrow/r/ -RUN /arrow/ci/scripts/r_deps.sh /arrow - -COPY ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local - -# Set up Python 3 and its dependencies -RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ - ln -s /usr/bin/pip3 /usr/local/bin/pip - -COPY python/requirements-build.txt /arrow/python/ -RUN pip install -r arrow/python/requirements-build.txt - -ENV \ - ARROW_BUILD_STATIC=OFF \ - ARROW_BUILD_TESTS=OFF \ - ARROW_BUILD_UTILITIES=OFF \ - ARROW_DEPENDENCY_SOURCE=SYSTEM \ - ARROW_FLIGHT=OFF \ - ARROW_GANDIVA=OFF \ - ARROW_NO_DEPRECATED_API=ON \ - ARROW_ORC=OFF \ - ARROW_PARQUET=ON \ - ARROW_PLASMA=OFF \ - ARROW_PYTHON=ON \ - ARROW_S3=ON \ - ARROW_USE_CCACHE=ON \ - ARROW_USE_GLOG=OFF \ - LC_ALL=en_US.UTF-8 diff --git a/ci/docker/linux-apt-ruby.dockerfile b/ci/docker/linux-apt-ruby.dockerfile deleted file mode 100644 index 58fd65bd57a7..000000000000 --- a/ci/docker/linux-apt-ruby.dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# depends on a C GLib image -ARG base -FROM ${base} - -COPY ruby/ /arrow/ruby/ -RUN bundle install --gemfile /arrow/ruby/Gemfile -RUN \ - for package in /arrow/ruby/*; do \ - bundle install --gemfile ${package}/Gemfile; \ - done diff --git a/ci/docker/linux-dnf-python-3.dockerfile b/ci/docker/linux-dnf-python-3.dockerfile deleted file mode 100644 index 8c3c5c701339..000000000000 --- a/ci/docker/linux-dnf-python-3.dockerfile +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG base -FROM ${base} - -RUN dnf install -y \ - python3 \ - python3-pip \ - python3-devel - -RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ - ln -s /usr/bin/pip3 /usr/local/bin/pip - -COPY python/requirements-build.txt \ - python/requirements-test.txt \ - /arrow/python/ - -RUN pip install \ - -r arrow/python/requirements-build.txt \ - -r arrow/python/requirements-test.txt - -ENV ARROW_PYTHON=ON \ - ARROW_BUILD_STATIC=OFF \ - ARROW_BUILD_TESTS=OFF \ - ARROW_BUILD_UTILITIES=OFF \ - ARROW_USE_GLOG=OFF \ diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile deleted file mode 100644 index ac414829d42b..000000000000 --- a/ci/docker/linux-r.dockerfile +++ /dev/null @@ -1,44 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# General purpose Dockerfile to take a Docker image containing R -# and install Arrow R package dependencies - -ARG base -FROM ${base} - -ARG r_bin=R -ENV R_BIN=${r_bin} - -ARG r_dev=FALSE -ENV ARROW_R_DEV=${r_dev} - -ARG devtoolset_version=-1 -ENV DEVTOOLSET_VERSION=${devtoolset_version} - -# Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its dockerfile) -ENV PATH "${RPREFIX}/bin:${PATH}" - -# Patch up some of the docker images -COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/ -COPY ci/etc/rprofile /arrow/ci/etc/ -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/r_docker_configure.sh - -COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ -COPY r/DESCRIPTION /arrow/r/ -RUN /arrow/ci/scripts/r_deps.sh /arrow diff --git a/ci/docker/python-sdist.dockerfile b/ci/docker/python-sdist.dockerfile deleted file mode 100644 index 853b532ab5e9..000000000000 --- a/ci/docker/python-sdist.dockerfile +++ /dev/null @@ -1,36 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -FROM amd64/ubuntu:20.04 - -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -RUN echo "debconf debconf/frontend select Noninteractive" | \ - debconf-set-selections - -RUN apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - git \ - python3-pip && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists* - -COPY python/requirements-build.txt \ - /arrow/python/requirements-build.txt -RUN pip3 install --requirement /arrow/python/requirements-build.txt - -ENV PYTHON=/usr/bin/python3 diff --git a/ci/docker/python-wheel-manylinux-201x.dockerfile b/ci/docker/python-wheel-manylinux-201x.dockerfile deleted file mode 100644 index 19246a46764b..000000000000 --- a/ci/docker/python-wheel-manylinux-201x.dockerfile +++ /dev/null @@ -1,110 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG base -FROM ${base} - -ARG arch_alias -ARG arch_short_alias - -RUN yum install -y git flex curl autoconf zip wget - -# Install CMake -ARG cmake=3.19.3 -RUN wget -q https://github.com/Kitware/CMake/releases/download/v${cmake}/cmake-${cmake}-Linux-${arch_alias}.tar.gz -O - | \ - tar -xzf - --directory /usr/local --strip-components=1 - -# Install Ninja -ARG ninja=1.10.2 -RUN mkdir /tmp/ninja && \ - wget -q https://github.com/ninja-build/ninja/archive/v${ninja}.tar.gz -O - | \ - tar -xzf - --directory /tmp/ninja --strip-components=1 && \ - cd /tmp/ninja && \ - ./configure.py --bootstrap && \ - mv ninja /usr/local/bin && \ - rm -rf /tmp/ninja - -# Install ccache -ARG ccache=4.1 -RUN mkdir /tmp/ccache && \ - wget -q https://github.com/ccache/ccache/archive/v${ccache}.tar.gz -O - | \ - tar -xzf - --directory /tmp/ccache --strip-components=1 && \ - cd /tmp/ccache && \ - mkdir build && \ - cd build && \ - cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DZSTD_FROM_INTERNET=ON .. && \ - ninja install && \ - rm -rf /tmp/ccache - -# Install vcpkg -ARG vcpkg -RUN git clone https://github.com/microsoft/vcpkg /opt/vcpkg && \ - git -C /opt/vcpkg checkout ${vcpkg} && \ - /opt/vcpkg/bootstrap-vcpkg.sh -useSystemBinaries -disableMetrics && \ - ln -s /opt/vcpkg/vcpkg /usr/bin/vcpkg - -# Patch ports files as needed -COPY ci/vcpkg arrow/ci/vcpkg -RUN cd /opt/vcpkg && git apply --ignore-whitespace /arrow/ci/vcpkg/ports.patch - -ARG build_type=release -ENV CMAKE_BUILD_TYPE=${build_type} \ - VCPKG_FORCE_SYSTEM_BINARIES=1 \ - VCPKG_OVERLAY_TRIPLETS=/arrow/ci/vcpkg \ - VCPKG_DEFAULT_TRIPLET=${arch_short_alias}-linux-static-${build_type} \ - VCPKG_FEATURE_FLAGS=-manifests - -# Need to install the boost-build prior installing the boost packages, otherwise -# vcpkg will raise an error. -# TODO(kszucs): factor out the package enumeration to a text file and reuse it -# from the windows image and potentially in a future macos wheel build -RUN vcpkg install --clean-after-build \ - boost-build:${arch_short_alias}-linux && \ - vcpkg install --clean-after-build \ - abseil \ - aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \ - boost-filesystem \ - brotli \ - bzip2 \ - c-ares \ - curl \ - flatbuffers \ - gflags \ - glog \ - grpc \ - lz4 \ - openssl \ - orc \ - protobuf \ - rapidjson \ - re2 \ - snappy \ - thrift \ - utf8proc \ - zlib \ - zstd - -ARG python=3.6 -ENV PYTHON_VERSION=${python} -RUN PYTHON_ROOT=$(find /opt/python -name cp${PYTHON_VERSION/./}-*) && \ - echo "export PATH=$PYTHON_ROOT/bin:\$PATH" >> /etc/profile.d/python.sh - -SHELL ["/bin/bash", "-i", "-c"] -ENTRYPOINT ["/bin/bash", "-i", "-c"] - -COPY python/requirements-wheel-build.txt /arrow/python/ -RUN pip install -r /arrow/python/requirements-wheel-build.txt diff --git a/ci/docker/python-wheel-manylinux-test.dockerfile b/ci/docker/python-wheel-manylinux-test.dockerfile deleted file mode 100644 index 55c27d1d7bbd..000000000000 --- a/ci/docker/python-wheel-manylinux-test.dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG arch -ARG python -FROM ${arch}/python:${python} - -# RUN pip install --upgrade pip - -# pandas doesn't provide wheel for aarch64 yet, so cache the compiled -# test dependencies in a docker image -COPY python/requirements-wheel-test.txt /arrow/python/ -RUN pip install -r /arrow/python/requirements-wheel-test.txt diff --git a/ci/docker/python-wheel-windows-vs2017.dockerfile b/ci/docker/python-wheel-windows-vs2017.dockerfile deleted file mode 100644 index 0f66a20396eb..000000000000 --- a/ci/docker/python-wheel-windows-vs2017.dockerfile +++ /dev/null @@ -1,99 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# based on mcr.microsoft.com/windows/servercore:ltsc2019 -# contains choco and vs2017 preinstalled -FROM abrarov/msvc-2017:2.10.0 - -# Install CMake and Ninja -RUN choco install --no-progress -r -y cmake --installargs 'ADD_CMAKE_TO_PATH=System' && \ - choco install --no-progress -r -y gzip wget ninja - -# Add unix tools to path -RUN setx path "%path%;C:\Program Files\Git\usr\bin" - -# Install vcpkg -ARG vcpkg -RUN git clone https://github.com/Microsoft/vcpkg && \ - git -C vcpkg checkout %vcpkg% && \ - vcpkg\bootstrap-vcpkg.bat -disableMetrics -win64 && \ - setx PATH "%PATH%;C:\vcpkg" - -# Patch ports files as needed -COPY ci/vcpkg arrow/ci/vcpkg -RUN cd vcpkg && git apply --ignore-whitespace C:/arrow/ci/vcpkg/ports.patch - -# Configure vcpkg and install dependencies -# NOTE: use windows batch environment notation for build arguments in RUN -# statements but bash notation in ENV statements -# VCPKG_FORCE_SYSTEM_BINARIES=1 spare around ~750MB of image size if the system -# cmake's and ninja's versions are recent enough -COPY ci/vcpkg arrow/ci/vcpkg -ARG build_type=release -ENV CMAKE_BUILD_TYPE=${build_type} \ - VCPKG_OVERLAY_TRIPLETS=C:\\arrow\\ci\\vcpkg \ - VCPKG_DEFAULT_TRIPLET=x64-windows-static-md-${build_type} \ - VCPKG_FEATURE_FLAGS=-manifests -RUN vcpkg install --clean-after-build \ - abseil \ - aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \ - boost-filesystem \ - boost-multiprecision \ - boost-system \ - brotli \ - bzip2 \ - c-ares \ - curl \ - flatbuffers \ - gflags \ - glog \ - grpc \ - lz4 \ - openssl \ - orc \ - protobuf \ - rapidjson \ - re2 \ - snappy \ - thrift \ - utf8proc \ - zlib \ - zstd - -# Remove previous installations of python from the base image -RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \ - rm -rf Python* - -# Define the full version number otherwise choco falls back to patch number 0 (3.7 => 3.7.0) -ARG python=3.6 -RUN (if "%python%"=="3.6" setx PYTHON_VERSION 3.6.8) & \ - (if "%python%"=="3.7" setx PYTHON_VERSION 3.7.4) & \ - (if "%python%"=="3.8" setx PYTHON_VERSION 3.8.6) & \ - (if "%python%"=="3.9" setx PYTHON_VERSION 3.9.1) -RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION% -RUN python -m pip install -U pip - -COPY python/requirements-wheel-build.txt arrow/python/ -RUN pip install -r arrow/python/requirements-wheel-build.txt - -# TODO(kszucs): set clcache as the compiler -ENV CLCACHE_DIR="C:\clcache" -RUN pip install clcache - -# For debugging purposes -# RUN wget --no-check-certificate https://github.com/lucasg/Dependencies/releases/download/v1.10/Dependencies_x64_Release.zip -# RUN unzip Dependencies_x64_Release.zip -d Dependencies && setx path "%path%;C:\Depencencies" diff --git a/ci/docker/ubuntu-18.04-cpp.dockerfile b/ci/docker/ubuntu-18.04-cpp.dockerfile deleted file mode 100644 index 4b855b526108..000000000000 --- a/ci/docker/ubuntu-18.04-cpp.dockerfile +++ /dev/null @@ -1,128 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG base=amd64/ubuntu:18.04 -FROM ${base} - -# pipefail is enabled for proper error detection in the `wget | apt-key add` -# step -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -ENV DEBIAN_FRONTEND=noninteractive - -# Installs LLVM toolchain, for Gandiva and testing other compilers -# -# Note that this is installed before the base packages to improve iteration -# while debugging package list with docker build. -ARG clang_tools -ARG llvm -RUN apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - gnupg \ - wget && \ - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ - echo "deb https://apt.llvm.org/bionic/ llvm-toolchain-bionic-${llvm} main" > \ - /etc/apt/sources.list.d/llvm.list && \ - if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -ge 10 ]; then \ - echo "deb https://apt.llvm.org/bionic/ llvm-toolchain-bionic-${clang_tools} main" > \ - /etc/apt/sources.list.d/clang-tools.list; \ - fi && \ - apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - clang-${clang_tools} \ - clang-${llvm} \ - clang-format-${clang_tools} \ - clang-tidy-${clang_tools} \ - llvm-${llvm}-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists* - -# Installs C++ toolchain and dependencies -RUN apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - autoconf \ - ca-certificates \ - ccache \ - cmake \ - g++ \ - gcc \ - gdb \ - git \ - libbenchmark-dev \ - libboost-filesystem-dev \ - libboost-system-dev \ - libbrotli-dev \ - libbz2-dev \ - libcurl4-openssl-dev \ - libgflags-dev \ - libgoogle-glog-dev \ - liblz4-dev \ - libprotobuf-dev \ - libprotoc-dev \ - libre2-dev \ - libsnappy-dev \ - libssl-dev \ - libutf8proc-dev \ - libzstd-dev \ - ninja-build \ - pkg-config \ - protobuf-compiler \ - rapidjson-dev \ - tzdata && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists* - -# Prioritize system packages and local installation -# The following dependencies will be downloaded due to missing/invalid packages -# provided by the distribution: -# - libc-ares-dev does not install CMake config files -# - flatbuffer is not packaged -# - libgtest-dev only provide sources -# - libprotobuf-dev only provide sources -# - thrift is too old -# - s3 tests would require boost-asio that is included since Boost 1.66.0 -ENV ARROW_BUILD_TESTS=ON \ - ARROW_DEPENDENCY_SOURCE=SYSTEM \ - ARROW_DATASET=ON \ - ARROW_FLIGHT=OFF \ - ARROW_GANDIVA=ON \ - ARROW_HDFS=ON \ - ARROW_HOME=/usr/local \ - ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ - ARROW_ORC=ON \ - ARROW_PARQUET=ON \ - ARROW_PLASMA=ON \ - ARROW_USE_ASAN=OFF \ - ARROW_USE_CCACHE=ON \ - ARROW_USE_TSAN=OFF \ - ARROW_USE_UBSAN=OFF \ - ARROW_WITH_BROTLI=ON \ - ARROW_WITH_BZ2=ON \ - ARROW_WITH_LZ4=ON \ - ARROW_WITH_SNAPPY=ON \ - ARROW_WITH_ZLIB=ON \ - ARROW_WITH_ZSTD=ON \ - AWSSDK_SOURCE=BUNDLED \ - GTest_SOURCE=BUNDLED \ - ORC_SOURCE=BUNDLED \ - PARQUET_BUILD_EXECUTABLES=ON \ - PARQUET_BUILD_EXAMPLES=ON \ - PATH=/usr/lib/ccache/:$PATH \ - Thrift_SOURCE=BUNDLED diff --git a/ci/docker/ubuntu-18.04-csharp.dockerfile b/ci/docker/ubuntu-18.04-csharp.dockerfile deleted file mode 100644 index 624ce259d5ae..000000000000 --- a/ci/docker/ubuntu-18.04-csharp.dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG platform=bionic -ARG dotnet=3.1 -FROM mcr.microsoft.com/dotnet/core/sdk:${dotnet}-${platform} - -RUN dotnet tool install --tool-path /usr/local/bin sourcelink diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile deleted file mode 100644 index 3a37ace13811..000000000000 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ /dev/null @@ -1,135 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG base=amd64/ubuntu:20.04 -FROM ${base} -ARG arch - -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -RUN echo "debconf debconf/frontend select Noninteractive" | \ - debconf-set-selections - -# Installs LLVM toolchain, for Gandiva and testing other compilers -# -# Note that this is installed before the base packages to improve iteration -# while debugging package list with docker build. -ARG clang_tools -ARG llvm -RUN if [ "${llvm}" -gt "10" ]; then \ - apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - gnupg \ - wget && \ - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ - echo "deb https://apt.llvm.org/focal/ llvm-toolchain-focal-${llvm} main" > \ - /etc/apt/sources.list.d/llvm.list && \ - if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \ - echo "deb https://apt.llvm.org/focal/ llvm-toolchain-focal-${clang_tools} main" > \ - /etc/apt/sources.list.d/clang-tools.list; \ - fi \ - fi && \ - apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - clang-${clang_tools} \ - clang-${llvm} \ - clang-format-${clang_tools} \ - clang-tidy-${clang_tools} \ - llvm-${llvm}-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists* - -# Installs C++ toolchain and dependencies -RUN apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - autoconf \ - ca-certificates \ - ccache \ - cmake \ - g++ \ - gcc \ - gdb \ - git \ - libbenchmark-dev \ - libboost-filesystem-dev \ - libboost-system-dev \ - libbrotli-dev \ - libbz2-dev \ - libgflags-dev \ - libcurl4-openssl-dev \ - libgoogle-glog-dev \ - liblz4-dev \ - libprotobuf-dev \ - libprotoc-dev \ - libre2-dev \ - libsnappy-dev \ - libssl-dev \ - libthrift-dev \ - libutf8proc-dev \ - libzstd-dev \ - make \ - ninja-build \ - pkg-config \ - protobuf-compiler \ - rapidjson-dev \ - tzdata \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists* - -COPY ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local - -# Prioritize system packages and local installation -# The following dependencies will be downloaded due to missing/invalid packages -# provided by the distribution: -# - libc-ares-dev does not install CMake config files -# - flatbuffer is not packaged -# - libgtest-dev only provide sources -# - libprotobuf-dev only provide sources -ENV ARROW_BUILD_TESTS=ON \ - ARROW_DEPENDENCY_SOURCE=SYSTEM \ - ARROW_DATASET=ON \ - ARROW_FLIGHT=OFF \ - ARROW_GANDIVA=ON \ - ARROW_HDFS=ON \ - ARROW_HOME=/usr/local \ - ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ - ARROW_ORC=ON \ - ARROW_PARQUET=ON \ - ARROW_PLASMA=ON \ - ARROW_S3=ON \ - ARROW_USE_ASAN=OFF \ - ARROW_USE_CCACHE=ON \ - ARROW_USE_UBSAN=OFF \ - ARROW_WITH_BROTLI=ON \ - ARROW_WITH_BZ2=ON \ - ARROW_WITH_LZ4=ON \ - ARROW_WITH_SNAPPY=ON \ - ARROW_WITH_ZLIB=ON \ - ARROW_WITH_ZSTD=ON \ - AWSSDK_SOURCE=BUNDLED \ - GTest_SOURCE=BUNDLED \ - ORC_SOURCE=BUNDLED \ - PARQUET_BUILD_EXAMPLES=ON \ - PARQUET_BUILD_EXECUTABLES=ON \ - PATH=/usr/lib/ccache/:$PATH \ - PYTHON=python3 diff --git a/ci/docker/ubuntu-20.10-cpp.dockerfile b/ci/docker/ubuntu-20.10-cpp.dockerfile deleted file mode 100644 index 80eb072e7ed4..000000000000 --- a/ci/docker/ubuntu-20.10-cpp.dockerfile +++ /dev/null @@ -1,137 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG base=amd64/ubuntu:20.10 -FROM ${base} -ARG arch - -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -RUN echo "debconf debconf/frontend select Noninteractive" | \ - debconf-set-selections - -# Installs LLVM toolchain, for Gandiva and testing other compilers -# -# Note that this is installed before the base packages to improve iteration -# while debugging package list with docker build. -ARG clang_tools -ARG llvm -RUN if [ "${llvm}" -gt "10" ]; then \ - apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - gnupg \ - wget && \ - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ - echo "deb https://apt.llvm.org/groovy/ llvm-toolchain-groovy-${llvm} main" > \ - /etc/apt/sources.list.d/llvm.list && \ - if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \ - echo "deb https://apt.llvm.org/groovy/ llvm-toolchain-groovy-${clang_tools} main" > \ - /etc/apt/sources.list.d/clang-tools.list; \ - fi \ - fi && \ - apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - clang-${clang_tools} \ - clang-${llvm} \ - clang-format-${clang_tools} \ - clang-tidy-${clang_tools} \ - llvm-${llvm}-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists* - -# Installs C++ toolchain and dependencies -RUN apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - autoconf \ - ca-certificates \ - ccache \ - cmake \ - g++ \ - gcc \ - gdb \ - git \ - libbenchmark-dev \ - libboost-filesystem-dev \ - libboost-system-dev \ - libbrotli-dev \ - libbz2-dev \ - libgflags-dev \ - libcurl4-openssl-dev \ - libgoogle-glog-dev \ - libgrpc++-dev \ - liblz4-dev \ - libprotobuf-dev \ - libprotoc-dev \ - libre2-dev \ - libsnappy-dev \ - libssl-dev \ - libthrift-dev \ - libutf8proc-dev \ - libzstd-dev \ - make \ - ninja-build \ - pkg-config \ - protobuf-compiler \ - protobuf-compiler-grpc \ - rapidjson-dev \ - tzdata \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists* - -COPY ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local - -# Prioritize system packages and local installation -# The following dependencies will be downloaded due to missing/invalid packages -# provided by the distribution: -# - libc-ares-dev does not install CMake config files -# - flatbuffer is not packaged -# - libgtest-dev only provide sources -# - libprotobuf-dev only provide sources -ENV ARROW_BUILD_TESTS=ON \ - ARROW_DEPENDENCY_SOURCE=SYSTEM \ - ARROW_DATASET=ON \ - ARROW_FLIGHT=OFF \ - ARROW_GANDIVA=ON \ - ARROW_HDFS=ON \ - ARROW_HOME=/usr/local \ - ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ - ARROW_ORC=ON \ - ARROW_PARQUET=ON \ - ARROW_PLASMA=ON \ - ARROW_S3=ON \ - ARROW_USE_ASAN=OFF \ - ARROW_USE_CCACHE=ON \ - ARROW_USE_UBSAN=OFF \ - ARROW_WITH_BROTLI=ON \ - ARROW_WITH_BZ2=ON \ - ARROW_WITH_LZ4=ON \ - ARROW_WITH_SNAPPY=ON \ - ARROW_WITH_ZLIB=ON \ - ARROW_WITH_ZSTD=ON \ - AWSSDK_SOURCE=BUNDLED \ - GTest_SOURCE=BUNDLED \ - ORC_SOURCE=BUNDLED \ - PARQUET_BUILD_EXAMPLES=ON \ - PARQUET_BUILD_EXECUTABLES=ON \ - PATH=/usr/lib/ccache/:$PATH \ - PYTHON=python3 diff --git a/ci/etc/hdfs-site.xml b/ci/etc/hdfs-site.xml deleted file mode 100644 index 97214337f5e2..000000000000 --- a/ci/etc/hdfs-site.xml +++ /dev/null @@ -1,52 +0,0 @@ - - - - - - - - - dfs.replication - 2 - - - dfs.datanode.data.dir - file:///data/dfs/data - - - dfs.namenode.name.dir - file:///data/dfs/name - - - dfs.namenode.checkpoint.dir - file:///data/dfs/namesecondary - - - dfs.namenode.datanode.registration.ip-hostname-check - false - - - dfs.default.replica - 1 - - - dfs.support.append - true - - - dfs.client.block.write.replace-datanode-on-failure.enable - false - - diff --git a/ci/etc/rprofile b/ci/etc/rprofile deleted file mode 100644 index 229a0101a252..000000000000 --- a/ci/etc/rprofile +++ /dev/null @@ -1,53 +0,0 @@ - local({ - .pick_cran <- function() { - # Return a CRAN repo URL, preferring RSPM binaries if available for this OS - rspm_template <- "https://packagemanager.rstudio.com/cran/__linux__/%s/latest" - supported_os <- c("focal", "xenial", "bionic", "centos7", "centos8", "opensuse42", "opensuse15", "opensuse152") - - if (nzchar(Sys.which("lsb_release"))) { - os <- tolower(system("lsb_release -cs", intern = TRUE)) - if (os %in% supported_os) { - return(sprintf(rspm_template, os)) - } - } - if (file.exists("/etc/os-release")) { - os_release <- readLines("/etc/os-release") - vals <- sub("^.*=(.*)$", "\\1", os_release) - os <- intersect(vals, supported_os) - if (length(os)) { - # e.g. "bionic" - return(sprintf(rspm_template, os)) - } else { - names(vals) <- sub("^(.*)=.*$", "\\1", os_release) - if (vals["ID"] == "opensuse") { - version <- sub('^"?([0-9]+).*"?.*$', "\\1", vals["VERSION_ID"]) - os <- paste0("opensuse", version) - if (os %in% supported_os) { - return(sprintf(rspm_template, os)) - } - } - } - } - if (file.exists("/etc/system-release")) { - # Something like "CentOS Linux release 7.7.1908 (Core)" - system_release <- tolower(utils::head(readLines("/etc/system-release"), 1)) - # Extract from that the distro and the major version number - os <- sub("^([a-z]+) .* ([0-9]+).*$", "\\1\\2", system_release) - if (os %in% supported_os) { - return(sprintf(rspm_template, os)) - } - } - - return("https://cloud.r-project.org") - } - - options( - Ncpus = parallel::detectCores(), - repos = tryCatch(.pick_cran(), error = function(e) "https://cloud.r-project.org"), - HTTPUserAgent = sprintf( - 'R/%s R (%s)', - getRversion(), - paste(getRversion(), R.version$platform, R.version$arch, R.version$os) - ) - ) -}) diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD deleted file mode 100644 index c5b55eef42ae..000000000000 --- a/ci/scripts/PKGBUILD +++ /dev/null @@ -1,134 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -_realname=arrow -pkgbase=mingw-w64-${_realname} -pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" -pkgver=3.0.0.9000 -pkgrel=8000 -pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" -arch=("any") -url="https://arrow.apache.org/" -license=("Apache-2.0") -depends=("${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp" - "${MINGW_PACKAGE_PREFIX}-libutf8proc" - "${MINGW_PACKAGE_PREFIX}-re2" - "${MINGW_PACKAGE_PREFIX}-thrift" - "${MINGW_PACKAGE_PREFIX}-snappy" - "${MINGW_PACKAGE_PREFIX}-zlib" - "${MINGW_PACKAGE_PREFIX}-lz4" - "${MINGW_PACKAGE_PREFIX}-zstd") -makedepends=("${MINGW_PACKAGE_PREFIX}-ccache" - "${MINGW_PACKAGE_PREFIX}-cmake" - "${MINGW_PACKAGE_PREFIX}-gcc") -options=("staticlibs" "strip" "!buildflags") - -# For installing from a local checkout, set source_dir to . and don't include -# a "source" param below -source_dir="$ARROW_HOME" -# else -# source_dir=apache-${_realname}-${pkgver} - -# For released version: -#source=("https://archive.apache.org/dist/arrow/arrow-${pkgver}/apache-arrow-${pkgver}.tar.gz") -#sha256sums=("ac2a77dd9168e9892e432c474611e86ded0be6dfe15f689c948751d37f81391a") -# For github dev version: -# Append `#commit=54b1b2f688e5e84b4c664b1e12a95f93b94ab2f3` to the URL to select a revision -# source=("${source_dir}"::"git+https://github.com/apache/arrow") -# sha256sums=("SKIP") -# source_dir="${APPVEYOR_BUILD_FOLDER}/${source_dir}" - -cpp_build_dir=build-${CARCH}-cpp - -pkgver() { - # The only purpose of this here is to cause the job to error if the - # version in pkgver is different from what is in r/DESCRIPTION - grep Version "${source_dir}/r/DESCRIPTION" | cut -d " " -f 2 -} - -build() { - ARROW_CPP_DIR="${source_dir}/cpp" - [[ -d ${cpp_build_dir} ]] && rm -rf ${cpp_build_dir} - mkdir -p ${cpp_build_dir} - pushd ${cpp_build_dir} - - # The Rtools libutf8proc is a static lib, but Findutf8proc.cmake doesn't - # set the appropriate compiler definition. - export CPPFLAGS="-DUTF8PROC_STATIC" - - # This is the difference between rtools-packages and rtools-backports - # Remove this when submitting to rtools-packages - if [ "$RTOOLS_VERSION" = "35" ]; then - export CC="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin/gcc" - export CXX="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin/g++" - export PATH="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin:$PATH" - export CPPFLAGS="${CPPFLAGS} -I${MINGW_PREFIX}/include" - export LIBS="-L${MINGW_PREFIX}/libs" - export ARROW_S3=OFF - export ARROW_WITH_RE2=OFF - else - export ARROW_S3=ON - export ARROW_WITH_RE2=ON - fi - - MSYS2_ARG_CONV_EXCL="-DCMAKE_INSTALL_PREFIX=" \ - ${MINGW_PREFIX}/bin/cmake.exe \ - ${ARROW_CPP_DIR} \ - -G "MSYS Makefiles" \ - -DARROW_BUILD_SHARED=OFF \ - -DARROW_BUILD_STATIC=ON \ - -DARROW_BUILD_UTILITIES=OFF \ - -DARROW_COMPUTE=ON \ - -DARROW_CSV=ON \ - -DARROW_DATASET=ON \ - -DARROW_FILESYSTEM=ON \ - -DARROW_HDFS=OFF \ - -DARROW_JEMALLOC=OFF \ - -DARROW_JSON=ON \ - -DARROW_LZ4_USE_SHARED=OFF \ - -DARROW_MIMALLOC=ON \ - -DARROW_PACKAGE_PREFIX="${MINGW_PREFIX}" \ - -DARROW_PARQUET=ON \ - -DARROW_S3="${ARROW_S3}" \ - -DARROW_SNAPPY_USE_SHARED=OFF \ - -DARROW_USE_GLOG=OFF \ - -DARROW_WITH_LZ4=ON \ - -DARROW_WITH_RE2="${ARROW_WITH_RE2}" \ - -DARROW_WITH_SNAPPY=ON \ - -DARROW_WITH_ZLIB=ON \ - -DARROW_WITH_ZSTD=ON \ - -DARROW_ZSTD_USE_SHARED=OFF \ - -DARROW_CXXFLAGS="${CPPFLAGS}" \ - -DCMAKE_BUILD_TYPE="release" \ - -DCMAKE_INSTALL_PREFIX=${MINGW_PREFIX} \ - -DCMAKE_UNITY_BUILD=ON \ - -DCMAKE_VERBOSE_MAKEFILE=ON - - make -j3 - popd -} - -package() { - make -C ${cpp_build_dir} DESTDIR="${pkgdir}" install - - local PREFIX_DEPS=$(cygpath -am ${MINGW_PREFIX}) - pushd "${pkgdir}${MINGW_PREFIX}/lib/pkgconfig" - for pc in *.pc; do - sed -s "s|${PREFIX_DEPS}|${MINGW_PREFIX}|g" -i $pc - done - popd -} diff --git a/ci/scripts/c_glib_build.sh b/ci/scripts/c_glib_build.sh deleted file mode 100755 index ce3cea18e715..000000000000 --- a/ci/scripts/c_glib_build.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1}/c_glib -build_dir=${2}/c_glib -: ${ARROW_GLIB_GTK_DOC:=false} -: ${ARROW_GLIB_DEVELOPMENT_MODE:=false} - -export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig - -export CFLAGS="-DARROW_NO_DEPRECATED_API" -export CXXFLAGS="-DARROW_NO_DEPRECATED_API" - -mkdir -p ${build_dir} - -# Build with Meson -meson --prefix=$ARROW_HOME \ - --libdir=lib \ - -Ddevelopment_mode=${ARROW_GLIB_DEVELOPMENT_MODE} \ - -Dgtk_doc=${ARROW_GLIB_GTK_DOC} \ - ${build_dir} \ - ${source_dir} - -pushd ${build_dir} -ninja -ninja install -popd diff --git a/ci/scripts/c_glib_test.sh b/ci/scripts/c_glib_test.sh deleted file mode 100755 index 25c54138ed65..000000000000 --- a/ci/scripts/c_glib_test.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1}/c_glib -build_dir=${2}/c_glib - -export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} -export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig -export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0 - -pushd ${source_dir} - -ruby test/run-test.rb - -if [[ "$(uname -s)" == "Linux" ]]; then - # TODO(kszucs): on osx it fails to load 'lgi.corelgilua51' despite that lgi - # was installed by luarocks - pushd example/lua - lua write-batch.lua - lua read-batch.lua - lua write-stream.lua - lua read-stream.lua - popd -fi - -popd - -pushd ${build_dir} -example/extension-type -popd diff --git a/ci/scripts/ccache_setup.sh b/ci/scripts/ccache_setup.sh deleted file mode 100755 index f77fbb373647..000000000000 --- a/ci/scripts/ccache_setup.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -eux - -echo "ARROW_USE_CCACHE=ON" >> $GITHUB_ENV -echo "CCACHE_COMPILERCHECK=content" >> $GITHUB_ENV -echo "CCACHE_COMPRESS=1" >> $GITHUB_ENV -echo "CCACHE_COMPRESSLEVEL=6" >> $GITHUB_ENV -echo "CCACHE_MAXSIZE=500M" >> $GITHUB_ENV diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh deleted file mode 100755 index 8a1e4f32f3a9..000000000000 --- a/ci/scripts/cpp_build.sh +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1}/cpp -build_dir=${2}/cpp -with_docs=${3:-false} - -: ${ARROW_USE_CCACHE:=OFF} - -# TODO(kszucs): consider to move these to CMake -if [ ! -z "${CONDA_PREFIX}" ]; then - echo -e "===\n=== Conda environment for build\n===" - conda list - - export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_AR=${AR} -DCMAKE_RANLIB=${RANLIB}" - export ARROW_GANDIVA_PC_CXX_FLAGS=$(echo | ${CXX} -E -Wp,-v -xc++ - 2>&1 | grep '^ ' | awk '{print "-isystem;" substr($1, 1)}' | tr '\n' ';') -elif [ -x "$(command -v xcrun)" ]; then - export ARROW_GANDIVA_PC_CXX_FLAGS="-isysroot;$(xcrun --show-sdk-path)" -fi - -if [ "${ARROW_USE_CCACHE}" == "ON" ]; then - echo -e "===\n=== ccache statistics before build\n===" - ccache -s -fi - -mkdir -p ${build_dir} -pushd ${build_dir} - -cmake -G "${CMAKE_GENERATOR:-Ninja}" \ - -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \ - -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \ - -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \ - -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \ - -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \ - -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \ - -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \ - -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \ - -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \ - -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \ - -DARROW_CSV=${ARROW_CSV:-ON} \ - -DARROW_CUDA=${ARROW_CUDA:-OFF} \ - -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \ - -DARROW_DATASET=${ARROW_DATASET:-ON} \ - -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \ - -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \ - -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \ - -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \ - -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \ - -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \ - -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA:-OFF} \ - -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \ - -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \ - -DARROW_HDFS=${ARROW_HDFS:-ON} \ - -DARROW_HIVESERVER2=${ARROW_HIVESERVER2:-OFF} \ - -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \ - -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \ - -DARROW_JNI=${ARROW_JNI:-OFF} \ - -DARROW_JSON=${ARROW_JSON:-ON} \ - -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \ - -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \ - -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \ - -DARROW_ORC=${ARROW_ORC:-OFF} \ - -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ - -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT:-OFF} \ - -DARROW_PLASMA=${ARROW_PLASMA:-OFF} \ - -DARROW_PYTHON=${ARROW_PYTHON:-OFF} \ - -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ - -DARROW_S3=${ARROW_S3:-OFF} \ - -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ - -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \ - -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \ - -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \ - -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \ - -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \ - -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \ - -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \ - -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \ - -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \ - -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \ - -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \ - -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \ - -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \ - -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \ - -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \ - -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \ - -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \ - -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \ - -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \ - -DBOOST_SOURCE=${BOOST_SOURCE:-} \ - -DBrotli_SOURCE=${Brotli_SOURCE:-} \ - -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \ - -Dc-ares_SOURCE=${cares_SOURCE:-} \ - -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \ - -DCMAKE_C_FLAGS="${CFLAGS:-}" \ - -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \ - -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \ - -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \ - -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ - -Dgflags_SOURCE=${gflags_SOURCE:-} \ - -DgRPC_SOURCE=${gRPC_SOURCE:-} \ - -DGTest_SOURCE=${GTest_SOURCE:-} \ - -DLz4_SOURCE=${Lz4_SOURCE:-} \ - -DORC_SOURCE=${ORC_SOURCE:-} \ - -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \ - -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \ - -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \ - -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \ - -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \ - -Dre2_SOURCE=${re2_SOURCE:-} \ - -DSnappy_SOURCE=${Snappy_SOURCE:-} \ - -DThrift_SOURCE=${Thrift_SOURCE:-} \ - -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \ - -Dzstd_SOURCE=${zstd_SOURCE:-} \ - ${CMAKE_ARGS} \ - ${source_dir} - -if [ ! -z "${CPP_MAKE_PARALLELISM}" ]; then - time cmake --build . --target install -- -j${CPP_MAKE_PARALLELISM} -else - time cmake --build . --target install -fi - -popd - -if [ -x "$(command -v ldconfig)" ]; then - ldconfig -fi - -if [ "${ARROW_USE_CCACHE}" == "ON" ]; then - echo -e "===\n=== ccache statistics after build\n===" - ccache -s -fi - -if [ "${with_docs}" == "true" ]; then - pushd ${source_dir}/apidoc - doxygen - popd -fi diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh deleted file mode 100755 index 1bf0a3b88940..000000000000 --- a/ci/scripts/cpp_test.sh +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -arrow_dir=${1} -source_dir=${1}/cpp -build_dir=${2}/cpp -binary_output_dir=${build_dir}/${ARROW_BUILD_TYPE:-debug} - -export ARROW_TEST_DATA=${arrow_dir}/testing/data -export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data -export LD_LIBRARY_PATH=${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib}:${LD_LIBRARY_PATH} - -# By default, aws-sdk tries to contact a non-existing local ip host -# to retrieve metadata. Disable this so that S3FileSystem tests run faster. -export AWS_EC2_METADATA_DISABLED=TRUE - -ctest_options=() -case "$(uname)" in - Linux) - n_jobs=$(nproc) - ;; - Darwin) - n_jobs=$(sysctl -n hw.ncpu) - ;; - MINGW*) - n_jobs=${NUMBER_OF_PROCESSORS:-1} - # TODO: Enable these crashed tests. - # https://issues.apache.org/jira/browse/ARROW-9072 - exclude_tests="gandiva-internals-test" - exclude_tests="${exclude_tests}|gandiva-projector-test" - exclude_tests="${exclude_tests}|gandiva-utf8-test" - if [ "${MSYSTEM}" = "MINGW32" ]; then - exclude_tests="${exclude_tests}|gandiva-projector-test" - exclude_tests="${exclude_tests}|gandiva-binary-test" - exclude_tests="${exclude_tests}|gandiva-boolean-expr-test" - exclude_tests="${exclude_tests}|gandiva-date-time-test" - exclude_tests="${exclude_tests}|gandiva-decimal-single-test" - exclude_tests="${exclude_tests}|gandiva-decimal-test" - exclude_tests="${exclude_tests}|gandiva-filter-project-test" - exclude_tests="${exclude_tests}|gandiva-filter-test" - exclude_tests="${exclude_tests}|gandiva-hash-test" - exclude_tests="${exclude_tests}|gandiva-if-expr-test" - exclude_tests="${exclude_tests}|gandiva-in-expr-test" - exclude_tests="${exclude_tests}|gandiva-literal-test" - exclude_tests="${exclude_tests}|gandiva-null-validity-test" - fi - ctest_options+=(--exclude-regex "${exclude_tests}") - ;; - *) - n_jobs=${NPROC:-1} - ;; -esac - -pushd ${build_dir} - -if ! which python > /dev/null 2>&1; then - export PYTHON=python3 -fi -ctest \ - --label-regex unittest \ - --output-on-failure \ - --parallel ${n_jobs} \ - --timeout 300 \ - "${ctest_options[@]}" - -if [ "${ARROW_FUZZING}" == "ON" ]; then - # Fuzzing regression tests - ${binary_output_dir}/arrow-ipc-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-stream/crash-* - ${binary_output_dir}/arrow-ipc-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-stream/*-testcase-* - ${binary_output_dir}/arrow-ipc-file-fuzz ${ARROW_TEST_DATA}/arrow-ipc-file/*-testcase-* - ${binary_output_dir}/arrow-ipc-tensor-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-tensor-stream/*-testcase-* - if [ "${ARROW_PARQUET}" == "ON" ]; then - ${binary_output_dir}/parquet-arrow-fuzz ${ARROW_TEST_DATA}/parquet/fuzzing/*-testcase-* - fi -fi - -popd diff --git a/ci/scripts/csharp_build.sh b/ci/scripts/csharp_build.sh deleted file mode 100755 index 5a3976794874..000000000000 --- a/ci/scripts/csharp_build.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1}/csharp - -pushd ${source_dir} -dotnet build -popd diff --git a/ci/scripts/csharp_pack.sh b/ci/scripts/csharp_pack.sh deleted file mode 100755 index e9dfc664ec55..000000000000 --- a/ci/scripts/csharp_pack.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -eux - -source_dir=${1}/csharp - -pushd ${source_dir} -dotnet pack -c Release -popd diff --git a/ci/scripts/csharp_test.sh b/ci/scripts/csharp_test.sh deleted file mode 100755 index 9e4e35dd40d1..000000000000 --- a/ci/scripts/csharp_test.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1}/csharp - -pushd ${source_dir} -dotnet test -for pdb in artifacts/Apache.Arrow/*/*/Apache.Arrow.pdb; do - sourcelink test ${pdb} -done -popd diff --git a/ci/scripts/docs_build.sh b/ci/scripts/docs_build.sh deleted file mode 100755 index e6ee768ee876..000000000000 --- a/ci/scripts/docs_build.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -ex - -arrow_dir=${1} -build_dir=${2}/docs - -export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} -export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig:${PKG_CONFIG_PATH} -export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0 -export CFLAGS="-DARROW_NO_DEPRECATED_API" -export CXXFLAGS="-DARROW_NO_DEPRECATED_API" - -ncpus=$(python3 -c "import os; print(os.cpu_count())") - -# Sphinx docs -sphinx-build -b html -j ${ncpus} ${arrow_dir}/docs/source ${build_dir} - -# C++ - original doxygen -# rsync -a ${arrow_dir}/cpp/apidoc/ ${build_dir}/cpp - -# R -rsync -a ${arrow_dir}/r/docs/ ${build_dir}/r - -# C GLib -rsync -a ${ARROW_HOME}/share/gtk-doc/html/ ${build_dir}/c_glib - -# Java -rsync -a ${arrow_dir}/java/target/site/apidocs/ ${build_dir}/java/reference - -# Javascript -rsync -a ${arrow_dir}/js/doc/ ${build_dir}/js diff --git a/ci/scripts/go_build.sh b/ci/scripts/go_build.sh deleted file mode 100755 index 7093be4d2386..000000000000 --- a/ci/scripts/go_build.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1}/go - -pushd ${source_dir}/arrow - -go get -d -t -v ./... -go install -v ./... - -popd - -pushd ${source_dir}/parquet - -go get -d -t -v ./... -go install -v ./... - -popd diff --git a/ci/scripts/go_test.sh b/ci/scripts/go_test.sh deleted file mode 100755 index 7dd873df3e1b..000000000000 --- a/ci/scripts/go_test.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1}/go - -pushd ${source_dir}/arrow - -for d in $(go list ./... | grep -v vendor); do - go test $d -done - -popd - -pushd ${source_dir}/parquet - -for d in $(go list ./... | grep -v vendor); do - go test $d -done - -popd diff --git a/ci/scripts/install_conda.sh b/ci/scripts/install_conda.sh deleted file mode 100755 index f4d313b63dfb..000000000000 --- a/ci/scripts/install_conda.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -declare -A archs -archs=([amd64]=x86_64 - [arm32v7]=armv7l - [ppc64le]=ppc64le - [i386]=x86) - -declare -A platforms -platforms=([windows]=Windows - [macos]=MacOSX - [linux]=Linux) - -if [ "$#" -ne 4 ]; then - echo "Usage: $0 " - exit 1 -elif [[ -z ${archs[$1]} ]]; then - echo "Unexpected architecture: ${1}" - exit 1 -elif [[ -z ${platforms[$2]} ]]; then - echo "Unexpected platform: ${2}" - exit 1 -fi - -arch=${archs[$1]} -platform=${platforms[$2]} -version=$3 -prefix=$4 - -echo "Downloading Miniconda installer..." -wget -nv https://repo.continuum.io/miniconda/Miniconda3-${version}-${platform}-${arch}.sh -O /tmp/miniconda.sh -bash /tmp/miniconda.sh -b -p ${prefix} -rm /tmp/miniconda.sh - -# Like "conda init", but for POSIX sh rather than bash -ln -s ${prefix}/etc/profile.d/conda.sh /etc/profile.d/conda.sh - -# Configure -source /etc/profile.d/conda.sh -conda config --add channels conda-forge -conda config --set channel_priority strict -conda config --set show_channel_urls True -conda config --set remote_connect_timeout_secs 12 - -# Update and clean -conda update --all -y -conda clean --all -y diff --git a/ci/scripts/install_dask.sh b/ci/scripts/install_dask.sh deleted file mode 100755 index 954ce3249d9d..000000000000 --- a/ci/scripts/install_dask.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -if [ "$#" -ne 1 ]; then - echo "Usage: $0 " - exit 1 -fi - -dask=$1 - -if [ "${dask}" = "master" ]; then - pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe] -elif [ "${dask}" = "latest" ]; then - conda install -q dask -else - conda install -q dask=${dask} -fi -conda clean --all diff --git a/ci/scripts/install_iwyu.sh b/ci/scripts/install_iwyu.sh deleted file mode 100755 index 3cd2cbc95fe6..000000000000 --- a/ci/scripts/install_iwyu.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -eu - -source_dir=${1:-/tmp/iwyu} -install_prefix=${2:-/usr/local} -clang_tools_version=${3:-8} - -iwyu_branch_name="clang_${clang_tools_version}" -if [ ${clang_tools_version} -lt 10 ]; then - iwyu_branch_name="${iwyu_branch_name}.0" -fi - -git clone --single-branch --branch ${iwyu_branch_name} \ - https://github.com/include-what-you-use/include-what-you-use.git ${source_dir} - -mkdir -p ${source_dir}/build -pushd ${source_dir}/build - -# Build IWYU for current Clang -export CC=clang-${clang_tools_version} -export CXX=clang++-${clang_tools_version} - -cmake -DCMAKE_PREFIX_PATH=/usr/lib/llvm-${clang_tools_version} \ - -DCMAKE_INSTALL_PREFIX=${install_prefix} \ - ${source_dir} -make -j4 -make install - -popd - -rm -rf ${source_dir} diff --git a/ci/scripts/install_kartothek.sh b/ci/scripts/install_kartothek.sh deleted file mode 100755 index 4d88943b6a9e..000000000000 --- a/ci/scripts/install_kartothek.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -if [ "$#" -ne 2 ]; then - echo "Usage: $0 " - exit 1 -fi - -karthothek=$1 -target=$2 - -git clone --recurse-submodules https://github.com/JDASoftwareGroup/kartothek "${target}" -if [ "${kartothek}" = "master" ]; then - git -C "${target}" checkout master; -elif [ "${kartothek}" = "latest" ]; then - git -C "${target}" checkout $(git describe --tags); -else - git -C "${target}" checkout ${kartothek}; -fi - -pushd "${target}" -pip install --no-deps . -popd diff --git a/ci/scripts/install_minio.sh b/ci/scripts/install_minio.sh deleted file mode 100755 index 42f7ce040e04..000000000000 --- a/ci/scripts/install_minio.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -declare -A archs -archs=([amd64]=amd64 - [arm64v8]=arm64 - [arm32v7]=arm - [s390x]=s390x) - -declare -A platforms -platforms=([linux]=linux - [macos]=darwin) - -arch=${archs[$1]} -platform=${platforms[$2]} -version=$3 -prefix=$4 - -if [ "$#" -ne 4 ]; then - echo "Usage: $0 " - exit 1 -elif [[ -z ${arch} ]]; then - echo "Unexpected architecture: ${1}" - exit 1 -elif [[ -z ${platform} ]]; then - echo "Unexpected platform: ${2}" - exit 1 -elif [[ ${version} != "latest" ]]; then - echo "Cannot fetch specific versions of minio, only latest is supported." - exit 1 -fi - -wget -nv -P ${prefix}/bin https://dl.min.io/server/minio/release/${platform}-${arch}/minio -chmod +x ${prefix}/bin/minio diff --git a/ci/scripts/install_osx_sdk.sh b/ci/scripts/install_osx_sdk.sh deleted file mode 100755 index 896d084e0b9d..000000000000 --- a/ci/scripts/install_osx_sdk.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -if [ ${using_homebrew} != "yes" ]; then - export MACOSX_DEPLOYMENT_TARGET="10.9" - export CONDA_BUILD_SYSROOT="$(xcode-select -p)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk" - - if [[ ! -d ${CONDA_BUILD_SYSROOT} || "$OSX_FORCE_SDK_DOWNLOAD" == "1" ]]; then - echo "downloading ${macosx_deployment_target} sdk" - curl -L -O https://github.com/phracker/MacOSX-SDKs/releases/download/10.13/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz - tar -xf MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz -C "$(dirname "$CONDA_BUILD_SYSROOT")" - # set minimum sdk version to our target - plutil -replace MinimumSDKVersion -string ${MACOSX_DEPLOYMENT_TARGET} $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist - plutil -replace DTSDKName -string macosx${MACOSX_DEPLOYMENT_TARGET}internal $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist - fi - - if [ -d "${CONDA_BUILD_SYSROOT}" ]; then - echo "Found CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}" - else - echo "Missing CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}" - exit 1 - fi -fi diff --git a/ci/scripts/install_pandas.sh b/ci/scripts/install_pandas.sh deleted file mode 100755 index 5aca65f825a5..000000000000 --- a/ci/scripts/install_pandas.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -if [ "$#" -lt 1 ]; then - echo "Usage: $0 " - exit 1 -fi - -pandas=$1 -numpy=${2:-"latest"} - -if [ "${numpy}" = "nightly" ]; then - pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre numpy -elif [ "${numpy}" = "latest" ]; then - pip install numpy -else - pip install numpy==${numpy} -fi - -if [ "${pandas}" = "master" ]; then - pip install git+https://github.com/pandas-dev/pandas.git --no-build-isolation -elif [ "${pandas}" = "nightly" ]; then - pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas -elif [ "${pandas}" = "latest" ]; then - pip install pandas -else - pip install pandas==${pandas} -fi diff --git a/ci/scripts/install_spark.sh b/ci/scripts/install_spark.sh deleted file mode 100755 index 936313fd809c..000000000000 --- a/ci/scripts/install_spark.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -if [ "$#" -ne 2 ]; then - echo "Usage: $0 " - exit 1 -fi - -spark=$1 -target=$2 - -git clone https://github.com/apache/spark "${target}" -git -C "${target}" checkout "${spark}" diff --git a/ci/scripts/install_turbodbc.sh b/ci/scripts/install_turbodbc.sh deleted file mode 100755 index a71520bebf48..000000000000 --- a/ci/scripts/install_turbodbc.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -if [ "$#" -ne 2 ]; then - echo "Usage: $0 " - exit 1 -fi - -turbodbc=$1 -target=$2 - -git clone --recurse-submodules https://github.com/blue-yonder/turbodbc "${target}" -if [ "${turbodbc}" = "master" ]; then - git -C "${target}" checkout master; -elif [ "${turbodbc}" = "latest" ]; then - git -C "${target}" checkout $(git describe --tags); -else - git -C "${target}" checkout ${turbodbc}; -fi diff --git a/ci/scripts/integration_arrow.sh b/ci/scripts/integration_arrow.sh deleted file mode 100755 index 5d2e71916ed3..000000000000 --- a/ci/scripts/integration_arrow.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -arrow_dir=${1} -source_dir=${1}/cpp -build_dir=${2}/cpp - -gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration - -pip install -e $arrow_dir/dev/archery - -archery integration --with-all --run-flight \ - --gold-dirs=$gold_dir/0.14.1 \ - --gold-dirs=$gold_dir/0.17.1 \ - --gold-dirs=$gold_dir/1.0.0-bigendian \ - --gold-dirs=$gold_dir/1.0.0-littleendian \ - --gold-dirs=$gold_dir/2.0.0-compression \ - --gold-dirs=$gold_dir/4.0.0-shareddict \ diff --git a/ci/scripts/integration_dask.sh b/ci/scripts/integration_dask.sh deleted file mode 100755 index a73592877588..000000000000 --- a/ci/scripts/integration_dask.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -# check that optional pyarrow modules are available -# because pytest would just skip the dask tests -python -c "import pyarrow.orc" -python -c "import pyarrow.parquet" - -# check that dask.dataframe is correctly installed -python -c "import dask.dataframe" - -# TODO(kszucs): the following tests are also uses pyarrow -# pytest -sv --pyargs dask.bytes.tests.test_s3 -# pytest -sv --pyargs dask.bytes.tests.test_hdfs -# pytest -sv --pyargs dask.bytes.tests.test_local - -# skip failing pickle test, see https://github.com/dask/dask/issues/6374 -pytest -v --pyargs dask.dataframe.tests.test_dataframe -k "not test_dataframe_picklable" -pytest -v --pyargs dask.dataframe.io.tests.test_orc -# skip failing parquet tests, see https://github.com/dask/dask/issues/6243 -# test_illegal_column_name can be removed once next dask release is out -# (https://github.com/dask/dask/pull/6378) -pytest -v --pyargs dask.dataframe.io.tests.test_parquet \ - -k "not test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_fails_by_default and not test_timeseries_nulls_in_schema and not test_illegal_column_name" diff --git a/ci/scripts/integration_hdfs.sh b/ci/scripts/integration_hdfs.sh deleted file mode 100755 index c95449379c32..000000000000 --- a/ci/scripts/integration_hdfs.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -source_dir=${1}/cpp -build_dir=${2}/cpp - -export CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath --glob) -export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop -export LIBHDFS3_CONF=$HADOOP_CONF_DIR/hdfs-site.xml -export ARROW_LIBHDFS3_DIR=$CONDA_PREFIX/lib - -libhdfs_dir=$HADOOP_HOME/lib/native -hadoop_home=$HADOOP_HOME - -function use_hadoop_home() { - unset ARROW_LIBHDFS_DIR - export HADOOP_HOME=$hadoop_home -} - -function use_libhdfs_dir() { - unset HADOOP_HOME - export ARROW_LIBHDFS_DIR=$libhdfs_dir -} - -# execute cpp tests -export ARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON -pushd ${build_dir} - -debug/arrow-io-hdfs-test -debug/arrow-hdfs-test - -use_libhdfs_dir -debug/arrow-io-hdfs-test -debug/arrow-hdfs-test -use_hadoop_home - -popd - -# cannot use --pyargs with custom arguments like --hdfs or --only-hdfs, because -# pytest ignores them, see https://github.com/pytest-dev/pytest/issues/3517 -export PYARROW_TEST_HDFS=ON - -export PYARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON - -pytest -vs --pyargs pyarrow.tests.test_fs -pytest -vs --pyargs pyarrow.tests.test_hdfs - -use_libhdfs_dir -pytest -vs --pyargs pyarrow.tests.test_fs -pytest -vs --pyargs pyarrow.tests.test_hdfs -use_hadoop_home diff --git a/ci/scripts/integration_hiveserver2.sh b/ci/scripts/integration_hiveserver2.sh deleted file mode 100755 index 36fba5ca8d8a..000000000000 --- a/ci/scripts/integration_hiveserver2.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -e - -arrow_dir=${1} -source_dir=${1}/cpp -build_dir=${2}/cpp - -${arrow_dir}/ci/scripts/util_wait_for_it.sh impala:21050 -t 300 -s -- echo "impala is up" - -pushd ${build_dir} - -# ninja hiveserver2-test -debug/hiveserver2-test - -popd diff --git a/ci/scripts/integration_kartothek.sh b/ci/scripts/integration_kartothek.sh deleted file mode 100755 index 379569b9c996..000000000000 --- a/ci/scripts/integration_kartothek.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -# check that optional pyarrow modules are available -# because pytest would just skip the pyarrow tests -python -c "import pyarrow.parquet" - -# check that kartothek is correctly installed -python -c "import kartothek" - -pushd /kartothek -# See ARROW-12314, test_load_dataframes_columns_raises_missing skipped because of changed error message -pytest -n0 --ignore tests/cli/test_query.py -k "not test_load_dataframes_columns_raises_missing" diff --git a/ci/scripts/integration_spark.sh b/ci/scripts/integration_spark.sh deleted file mode 100755 index a53a62971caf..000000000000 --- a/ci/scripts/integration_spark.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# exit on any error -set -eu - -source_dir=${1} -spark_dir=${2} - -# Test Spark with latest PyArrow only, don't build with latest Arrow Java -test_pyarrow_only=${3:-false} - -# Spark branch to checkout -spark_version=${SPARK_VERSION:-master} - -# Use old behavior that always dropped tiemzones. -export PYARROW_IGNORE_TIMEZONE=1 - -if [ "${SPARK_VERSION:0:2}" == "2." ]; then - # https://github.com/apache/spark/blob/master/docs/sql-pyspark-pandas-with-arrow.md#compatibility-setting-for-pyarrow--0150-and-spark-23x-24x - export ARROW_PRE_0_15_IPC_FORMAT=1 -fi - -# Get Arrow Java version -pushd ${source_dir}/java - arrow_version=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'` -popd - -export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -Dorg.slf4j.simpleLogger.defaultLogLevel=warn" -export MAVEN_OPTS="${MAVEN_OPTS} -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" - -pushd ${spark_dir} - - if [ "${test_pyarrow_only}" == "true" ]; then - echo "Building Spark ${SPARK_VERSION} to test pyarrow only" - - # Build Spark only - build/mvn -B -DskipTests package - - else - - # Update Spark pom with the Arrow version just installed and build Spark, need package phase for pyspark - echo "Building Spark ${SPARK_VERSION} with Arrow ${arrow_version}" - mvn versions:set-property -Dproperty=arrow.version -DnewVersion=${arrow_version} - - # Build Spark with new Arrow Java - build/mvn -B -DskipTests package -pl sql/core -pl assembly -am - - spark_scala_tests=( - "org.apache.spark.sql.execution.arrow" - "org.apache.spark.sql.execution.vectorized.ColumnarBatchSuite" - "org.apache.spark.sql.execution.vectorized.ArrowColumnVectorSuite") - - (echo "Testing Spark:"; IFS=$'\n'; echo "${spark_scala_tests[*]}") - - # TODO: should be able to only build spark-sql tests with adding "-pl sql/core" but not currently working - build/mvn -B -Dtest=none -DwildcardSuites=$(IFS=,; echo "${spark_scala_tests[*]}") test - fi - - # Run pyarrow related Python tests only - spark_python_tests=( - "pyspark.sql.tests.test_arrow" - "pyspark.sql.tests.test_pandas_map" - "pyspark.sql.tests.test_pandas_cogrouped_map" - "pyspark.sql.tests.test_pandas_grouped_map" - "pyspark.sql.tests.test_pandas_udf" - "pyspark.sql.tests.test_pandas_udf_scalar" - "pyspark.sql.tests.test_pandas_udf_grouped_agg" - "pyspark.sql.tests.test_pandas_udf_window") - - (echo "Testing PySpark:"; IFS=$'\n'; echo "${spark_python_tests[*]}") - python/run-tests --testnames "$(IFS=,; echo "${spark_python_tests[*]}")" --python-executables python -popd diff --git a/ci/scripts/integration_turbodbc.sh b/ci/scripts/integration_turbodbc.sh deleted file mode 100755 index f56074358a6e..000000000000 --- a/ci/scripts/integration_turbodbc.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -source_dir=${1} -build_dir=${2}/turbodbc - -# check that optional pyarrow modules are available -# because pytest would just skip the pyarrow tests -python -c "import pyarrow.orc" -python -c "import pyarrow.parquet" - -mkdir -p ${build_dir} -pushd ${build_dir} - -cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \ - -DCMAKE_CXX_FLAGS=${CXXFLAGS} \ - -DPYTHON_EXECUTABLE=$(which python) \ - -GNinja \ - ${source_dir} -ninja install - -# TODO(ARROW-5074) -export LD_LIBRARY_PATH="${ARROW_HOME}/lib:${LD_LIBRARY_PATH}" -export ODBCSYSINI="${source_dir}/travis/odbc/" - -service postgresql start -ctest --output-on-failure - -popd diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh deleted file mode 100755 index b8a7f7ced747..000000000000 --- a/ci/scripts/java_build.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -arrow_dir=${1} -source_dir=${1}/java -cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} -with_docs=${3:-false} - -if [[ "$(uname -s)" == "Linux" ]] && [[ "$(uname -m)" == "s390x" ]]; then - # Since some files for s390_64 are not available at maven central, - # download pre-build files from bintray and install them explicitly - mvn_install="mvn install:install-file" - wget="wget" - bintray_base_url="https://dl.bintray.com/apache/arrow" - - bintray_dir="protoc-binary" - group="com.google.protobuf" - artifact="protoc" - ver="3.7.1" - classifier="linux-s390_64" - extension="exe" - target=${artifact}-${ver}-${classifier}.${extension} - ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target} - ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target} - # protoc requires libprotoc.so.18 libprotobuf.so.18 - ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/libprotoc.so.18 - ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/libprotobuf.so.18 - mkdir -p ${ARROW_HOME}/lib - cp lib*.so.18 ${ARROW_HOME}/lib - export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${ARROW_HOME}/lib - - bintray_dir="protoc-gen-grpc-java-binary" - group="io.grpc" - artifact="protoc-gen-grpc-java" - ver="1.30.2" - classifier="linux-s390_64" - extension="exe" - target=${artifact}-${ver}-${classifier}.${extension} - ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target} - ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target} - - bintray_dir="netty-binary" - group="io.netty" - artifact="netty-transport-native-unix-common" - ver="4.1.48.Final" - classifier="linux-s390_64" - extension="jar" - target=${artifact}-${ver}-${classifier}.${extension} - ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target} - ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target} - artifact="netty-transport-native-epoll" - extension="jar" - target=${artifact}-${ver}-${classifier}.${extension} - ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target} - ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target} -fi - -mvn="mvn -B -DskipTests -Drat.skip=true -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" -# Use `2 * ncores` threads -mvn="${mvn} -T 2C" - -pushd ${source_dir} - -${mvn} install - -if [ "${ARROW_JAVA_SHADE_FLATBUFFERS}" == "ON" ]; then - ${mvn} -Pshade-flatbuffers install -fi - -if [ "${ARROW_GANDIVA_JAVA}" = "ON" ]; then - ${mvn} -Darrow.cpp.build.dir=${cpp_build_dir} -Parrow-jni install -fi - -if [ "${ARROW_PLASMA}" = "ON" ]; then - pushd ${source_dir}/plasma - ${mvn} clean install - popd -fi - -if [ "${with_docs}" == "true" ]; then - ${mvn} -Dcheckstyle.skip=true install site -fi - -popd diff --git a/ci/scripts/java_test.sh b/ci/scripts/java_test.sh deleted file mode 100755 index da9e45280ec9..000000000000 --- a/ci/scripts/java_test.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -arrow_dir=${1} -source_dir=${1}/java -cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} - -# For JNI and Plasma tests -export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} -export PLASMA_STORE=${ARROW_HOME}/bin/plasma-store-server - -mvn="mvn -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" -# Use `2 * ncores` threads -mvn="${mvn} -T 2C" - -pushd ${source_dir} - -${mvn} test - -if [ "${ARROW_JNI}" = "ON" ]; then - ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir} -fi - -if [ "${ARROW_PLASMA}" = "ON" ]; then - pushd ${source_dir}/plasma - java -cp target/test-classes:target/classes \ - -Djava.library.path=${cpp_build_dir} \ - org.apache.arrow.plasma.PlasmaClientTest - popd -fi - -popd diff --git a/ci/scripts/js_build.sh b/ci/scripts/js_build.sh deleted file mode 100755 index 10ceb41ee658..000000000000 --- a/ci/scripts/js_build.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1}/js -with_docs=${2:-false} - -pushd ${source_dir} - -yarn --frozen-lockfile -# TODO(kszucs): linting should be moved to archery -yarn lint:ci -yarn build - -if [ "${with_docs}" == "true" ]; then - yarn doc -fi - -popd diff --git a/ci/scripts/js_test.sh b/ci/scripts/js_test.sh deleted file mode 100755 index 345d6cb811e7..000000000000 --- a/ci/scripts/js_test.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1}/js - -pushd ${source_dir} - -yarn lint -yarn test - -popd diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh deleted file mode 100755 index cb6ca30a64ea..000000000000 --- a/ci/scripts/msys2_setup.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -eux - -target=$1 - -packages=() -case "${target}" in - cpp|c_glib|ruby) - packages+=(${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp) - packages+=(${MINGW_PACKAGE_PREFIX}-boost) - packages+=(${MINGW_PACKAGE_PREFIX}-brotli) - packages+=(${MINGW_PACKAGE_PREFIX}-ccache) - packages+=(${MINGW_PACKAGE_PREFIX}-clang) - packages+=(${MINGW_PACKAGE_PREFIX}-cmake) - packages+=(${MINGW_PACKAGE_PREFIX}-gcc) - packages+=(${MINGW_PACKAGE_PREFIX}-gflags) - packages+=(${MINGW_PACKAGE_PREFIX}-grpc) - packages+=(${MINGW_PACKAGE_PREFIX}-gtest) - packages+=(${MINGW_PACKAGE_PREFIX}-libutf8proc) - packages+=(${MINGW_PACKAGE_PREFIX}-llvm) - packages+=(${MINGW_PACKAGE_PREFIX}-lz4) - packages+=(${MINGW_PACKAGE_PREFIX}-ninja) - packages+=(${MINGW_PACKAGE_PREFIX}-polly) - packages+=(${MINGW_PACKAGE_PREFIX}-protobuf) - packages+=(${MINGW_PACKAGE_PREFIX}-python3-numpy) - packages+=(${MINGW_PACKAGE_PREFIX}-rapidjson) - packages+=(${MINGW_PACKAGE_PREFIX}-re2) - packages+=(${MINGW_PACKAGE_PREFIX}-snappy) - packages+=(${MINGW_PACKAGE_PREFIX}-thrift) - packages+=(${MINGW_PACKAGE_PREFIX}-zlib) - packages+=(${MINGW_PACKAGE_PREFIX}-zstd) - ;; -esac - -case "${target}" in - c_glib|ruby) - packages+=(${MINGW_PACKAGE_PREFIX}-gobject-introspection) - packages+=(${MINGW_PACKAGE_PREFIX}-gtk-doc) - packages+=(${MINGW_PACKAGE_PREFIX}-meson) - ;; -esac - -pacman \ - --needed \ - --noconfirm \ - --refresh \ - --sync \ - "${packages[@]}" - -"$(dirname $0)/ccache_setup.sh" -echo "CCACHE_DIR=$(cygpath --absolute --windows ccache)" >> $GITHUB_ENV diff --git a/ci/scripts/msys2_system_clean.sh b/ci/scripts/msys2_system_clean.sh deleted file mode 100755 index a356aee66600..000000000000 --- a/ci/scripts/msys2_system_clean.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -eux - -pacman \ - --cascade \ - --noconfirm \ - --nosave \ - --recursive \ - --remove \ - ${MINGW_PACKAGE_PREFIX}-clang-tools-extra \ - ${MINGW_PACKAGE_PREFIX}-gcc-ada \ - ${MINGW_PACKAGE_PREFIX}-gcc-fortran \ - ${MINGW_PACKAGE_PREFIX}-gcc-libgfortran \ - ${MINGW_PACKAGE_PREFIX}-gcc-objc \ - ${MINGW_PACKAGE_PREFIX}-libgccjit diff --git a/ci/scripts/msys2_system_upgrade.sh b/ci/scripts/msys2_system_upgrade.sh deleted file mode 100755 index aecd30893320..000000000000 --- a/ci/scripts/msys2_system_upgrade.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -eux - -# https://www.msys2.org/news/#2020-06-29-new-packagers -msys2_repo_base_url=https://repo.msys2.org/msys -# Mirror -msys2_repo_base_url=https://sourceforge.net/projects/msys2/files/REPOS/MSYS2 -msys2_keyring_pkg=msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz -for suffix in "" ".sig"; do - curl \ - --location \ - --remote-name \ - --show-error \ - --silent \ - ${msys2_repo_base_url}/x86_64/${msys2_keyring_pkg}${suffix} -done -pacman-key --verify ${msys2_keyring_pkg}.sig -pacman \ - --noconfirm \ - --upgrade \ - ${msys2_keyring_pkg} - - -pacman \ - --noconfirm \ - --refresh \ - --refresh \ - --sync \ - --sysupgrade \ - --sysupgrade diff --git a/ci/scripts/python_benchmark.sh b/ci/scripts/python_benchmark.sh deleted file mode 100755 index 3a35298dc044..000000000000 --- a/ci/scripts/python_benchmark.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Check the ASV benchmarking setup. -# Unfortunately this won't ensure that all benchmarks succeed -# (see https://github.com/airspeed-velocity/asv/issues/449) -source deactivate -conda create -y -q -n pyarrow_asv python=$PYTHON_VERSION -conda activate pyarrow_asv -pip install -q git+https://github.com/pitrou/asv.git@customize_commands - -export PYARROW_WITH_PARQUET=1 -export PYARROW_WITH_PLASMA=1 -export PYARROW_WITH_ORC=0 -export PYARROW_WITH_GANDIVA=0 - -pushd $ARROW_PYTHON_DIR -# Workaround for https://github.com/airspeed-velocity/asv/issues/631 -git fetch --depth=100 origin master:master -# Generate machine information (mandatory) -asv machine --yes -# Run benchmarks on the changeset being tested -asv run --no-pull --show-stderr --quick HEAD^! -popd # $ARROW_PYTHON_DIR diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh deleted file mode 100755 index ec6d723b2a73..000000000000 --- a/ci/scripts/python_build.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1}/python -build_dir=${2}/python - -if [ ! -z "${CONDA_PREFIX}" ]; then - echo -e "===\n=== Conda environment for build\n===" - conda list -fi - -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} -export PYARROW_WITH_S3=${ARROW_S3:-OFF} -export PYARROW_WITH_ORC=${ARROW_ORC:-OFF} -export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} -export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF} -export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF} -export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF} -export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF} -export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF} -export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF} - -export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} - -pushd ${source_dir} - -relative_build_dir=$(realpath --relative-to=. $build_dir) - -# not nice, but prevents mutating the mounted the source directory for docker -${PYTHON:-python} \ - setup.py build --build-base $build_dir \ - install --single-version-externally-managed \ - --record $relative_build_dir/record.txt - -popd diff --git a/ci/scripts/python_sdist_build.sh b/ci/scripts/python_sdist_build.sh deleted file mode 100755 index f9e9359b6f66..000000000000 --- a/ci/scripts/python_sdist_build.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -eux - -source_dir=${1}/python - -pushd ${source_dir} -export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-} -${PYTHON:-python} setup.py sdist -popd diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh deleted file mode 100755 index 154c1b3cf8b0..000000000000 --- a/ci/scripts/python_sdist_test.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -eux - -arrow_dir=${1} - -export ARROW_SOURCE_DIR=${arrow_dir} -export ARROW_TEST_DATA=${arrow_dir}/testing/data -export PARQUET_TEST_DATA=${arrow_dir}/parquet-testing/data - -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} -export PYARROW_WITH_S3=${ARROW_S3:-OFF} -export PYARROW_WITH_ORC=${ARROW_ORC:-OFF} -export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} -export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF} -export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF} -export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF} -export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF} -export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF} -export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF} - -# TODO: Users should not require ARROW_HOME and pkg-config to find Arrow C++. -# Related: ARROW-9171 -# unset ARROW_HOME -# apt purge -y pkg-config - -if [ -n "${PYARROW_VERSION:-}" ]; then - sdist="${arrow_dir}/python/dist/pyarrow-${PYARROW_VERSION}.tar.gz" -else - sdist=$(ls "${arrow_dir}/python/dist/pyarrow-*.tar.gz" | sort -r | head -n1) -fi -${PYTHON:-python} -m pip install ${sdist} - -pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh deleted file mode 100755 index 5af3caf2c6d3..000000000000 --- a/ci/scripts/python_test.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -arrow_dir=${1} - -export ARROW_SOURCE_DIR=${arrow_dir} -export ARROW_TEST_DATA=${arrow_dir}/testing/data -export PARQUET_TEST_DATA=${arrow_dir}/parquet-testing/data -export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} - -# Enable some checks inside Python itself -export PYTHONDEVMODE=1 - -pytest -r s ${PYTEST_ARGS} --pyargs pyarrow diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh deleted file mode 100755 index 93e4939af23d..000000000000 --- a/ci/scripts/python_wheel_macos_build.sh +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1} -build_dir=${2} - -echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ===" -# Clear output directories and leftovers -rm -rf ${build_dir}/install -rm -rf ${source_dir}/python/dist -rm -rf ${source_dir}/python/build -rm -rf ${source_dir}/python/repaired_wheels -rm -rf ${source_dir}/python/pyarrow/*.so -rm -rf ${source_dir}/python/pyarrow/*.so.* - -echo "=== (${PYTHON_VERSION}) Set OSX SDK and C flags ===" -# Arrow is 64-bit-only at the moment -export CFLAGS="-fPIC -arch x86_64 ${CFLAGS//-arch i386/}" -export CXXFLAGS="-fPIC -arch x86_64 ${CXXFLAGS//-arch i386} -std=c++11" -export SDKROOT="$(xcrun --show-sdk-path)" - -echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ===" -: ${ARROW_DATASET:=ON} -: ${ARROW_FLIGHT:=ON} -: ${ARROW_GANDIVA:=OFF} -: ${ARROW_HDFS:=ON} -: ${ARROW_JEMALLOC:=ON} -: ${ARROW_MIMALLOC:=ON} -: ${ARROW_ORC:=ON} -: ${ARROW_PARQUET:=ON} -: ${ARROW_PLASMA:=ON} -: ${ARROW_S3:=ON} -: ${ARROW_TENSORFLOW:=ON} -: ${ARROW_WITH_BROTLI:=ON} -: ${ARROW_WITH_BZ2:=ON} -: ${ARROW_WITH_LZ4:=ON} -: ${ARROW_WITH_SNAPPY:=ON} -: ${ARROW_WITH_ZLIB:=ON} -: ${ARROW_WITH_ZSTD:=ON} -: ${CMAKE_BUILD_TYPE:=release} -: ${CMAKE_GENERATOR:=Ninja} -: ${VCPKG_FEATURE_FLAGS:=-manifests} -: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-osx-static-${CMAKE_BUILD_TYPE}}} - -mkdir -p ${build_dir}/build -pushd ${build_dir}/build -cmake \ - -DARROW_BUILD_SHARED=ON \ - -DARROW_BUILD_STATIC=OFF \ - -DARROW_BUILD_TESTS=OFF \ - -DARROW_DATASET=${ARROW_DATASET} \ - -DARROW_DEPENDENCY_SOURCE="VCPKG" \ - -DARROW_DEPENDENCY_USE_SHARED=OFF \ - -DARROW_FLIGHT==${ARROW_FLIGHT} \ - -DARROW_GANDIVA=${ARROW_GANDIVA} \ - -DARROW_HDFS=${ARROW_HDFS} \ - -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ - -DARROW_MIMALLOC=${ARROW_MIMALLOC} \ - -DARROW_ORC=${ARROW_ORC} \ - -DARROW_PACKAGE_KIND="python-wheel-macos" \ - -DARROW_PARQUET=${ARROW_PARQUET} \ - -DARROW_PLASMA=${ARROW_PLASMA} \ - -DARROW_PYTHON=ON \ - -DARROW_RPATH_ORIGIN=ON \ - -DARROW_S3=${ARROW_S3} \ - -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \ - -DARROW_USE_CCACHE=ON \ - -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \ - -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \ - -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \ - -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \ - -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \ - -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \ - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ - -DCMAKE_INSTALL_LIBDIR=lib \ - -DCMAKE_INSTALL_PREFIX=${build_dir}/install \ - -DCMAKE_UNITY_BUILD=ON \ - -DOPENSSL_USE_STATIC_LIBS=ON \ - -DVCPKG_MANIFEST_MODE=OFF \ - -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ - -G ${CMAKE_GENERATOR} \ - ${source_dir}/cpp -cmake --build . --target install -popd - -# Check that we don't expose any unwanted symbols -# check_arrow_visibility - -echo "=== (${PYTHON_VERSION}) Building wheel ===" -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} -export PYARROW_BUNDLE_ARROW_CPP=1 -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} -export PYARROW_INSTALL_TESTS=1 -export PYARROW_WITH_DATASET=${ARROW_DATASET} -export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT} -export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA} -export PYARROW_WITH_HDFS=${ARROW_HDFS} -export PYARROW_WITH_ORC=${ARROW_ORC} -export PYARROW_WITH_PARQUET=${ARROW_PARQUET} -export PYARROW_WITH_PLASMA=${ARROW_PLASMA} -export PYARROW_WITH_S3=${ARROW_S3} -# PyArrow build configuration -export PKG_CONFIG_PATH=/usr/lib/pkgconfig:${build_dir}/install/lib/pkgconfig - -pushd ${source_dir}/python -python setup.py bdist_wheel -popd - -echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ===" -deps=$(delocate-listdeps ${source_dir}/python/dist/*.whl) - -if echo $deps | grep -v "^@rpath/lib\(arrow\|gandiva\|parquet\|plasma\)"; then - echo "There are non-bundled shared library dependencies." - exit 1 -fi diff --git a/ci/scripts/python_wheel_macos_test.sh b/ci/scripts/python_wheel_macos_test.sh deleted file mode 100755 index 6ac8576d484c..000000000000 --- a/ci/scripts/python_wheel_macos_test.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1} - -: ${ARROW_S3:=ON} - -export PYARROW_TEST_CYTHON=OFF -export PYARROW_TEST_DATASET=ON -export PYARROW_TEST_GANDIVA=OFF -export PYARROW_TEST_HDFS=ON -export PYARROW_TEST_ORC=ON -export PYARROW_TEST_PANDAS=ON -export PYARROW_TEST_PARQUET=ON -export PYARROW_TEST_PLASMA=ON -export PYARROW_TEST_S3=${ARROW_S3} -export PYARROW_TEST_TENSORFLOW=ON -export PYARROW_TEST_FLIGHT=ON - -export ARROW_TEST_DATA=${source_dir}/testing/data -export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data - -# Install the built wheels -pip install ${source_dir}/python/dist/*.whl - -# Test that the modules are importable -python -c " -import pyarrow -import pyarrow._hdfs -import pyarrow.csv -import pyarrow.dataset -import pyarrow.flight -import pyarrow.fs -import pyarrow.json -import pyarrow.orc -import pyarrow.parquet -import pyarrow.plasma -" - -if [ "${PYARROW_TEST_S3}" == "ON" ]; then - python -c "import pyarrow._s3fs" -fi - -# Install testing dependencies -pip install -r ${source_dir}/python/requirements-wheel-test.txt - -# Execute unittest -pytest -r s --pyargs pyarrow diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh deleted file mode 100755 index 312e1c3b9b7c..000000000000 --- a/ci/scripts/python_wheel_manylinux_build.sh +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -function check_arrow_visibility { - nm --demangle --dynamic /tmp/arrow-dist/lib/libarrow.so > nm_arrow.log - - # Filter out Arrow symbols and see if anything remains. - # '_init' and '_fini' symbols may or not be present, we don't care. - # (note we must ignore the grep exit status when no match is found) - grep ' T ' nm_arrow.log | grep -v -E '(arrow|\b_init\b|\b_fini\b)' | cat - > visible_symbols.log - - if [[ -f visible_symbols.log && `cat visible_symbols.log | wc -l` -eq 0 ]]; then - return 0 - else - echo "== Unexpected symbols exported by libarrow.so ==" - cat visible_symbols.log - echo "================================================" - - exit 1 - fi -} - -echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ===" -# Clear output directories and leftovers -rm -rf /tmp/arrow-build -rm -rf /arrow/python/dist -rm -rf /arrow/python/build -rm -rf /arrow/python/repaired_wheels -rm -rf /arrow/python/pyarrow/*.so -rm -rf /arrow/python/pyarrow/*.so.* - -echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ===" -: ${ARROW_DATASET:=ON} -: ${ARROW_FLIGHT:=ON} -: ${ARROW_GANDIVA:=OFF} -: ${ARROW_HDFS:=ON} -: ${ARROW_JEMALLOC:=ON} -: ${ARROW_MIMALLOC:=ON} -: ${ARROW_ORC:=ON} -: ${ARROW_PARQUET:=ON} -: ${ARROW_PLASMA:=ON} -: ${ARROW_S3:=ON} -: ${ARROW_TENSORFLOW:=ON} -: ${ARROW_WITH_BROTLI:=ON} -: ${ARROW_WITH_BZ2:=ON} -: ${ARROW_WITH_LZ4:=ON} -: ${ARROW_WITH_SNAPPY:=ON} -: ${ARROW_WITH_ZLIB:=ON} -: ${ARROW_WITH_ZSTD:=ON} -: ${CMAKE_BUILD_TYPE:=release} -: ${CMAKE_UNITY_BUILD:=ON} -: ${CMAKE_GENERATOR:=Ninja} -: ${VCPKG_FEATURE_FLAGS:=-manifests} -: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}} - -mkdir /tmp/arrow-build -pushd /tmp/arrow-build -cmake \ - -DARROW_BROTLI_USE_SHARED=OFF \ - -DARROW_BUILD_SHARED=ON \ - -DARROW_BUILD_STATIC=OFF \ - -DARROW_BUILD_TESTS=OFF \ - -DARROW_DATASET=${ARROW_DATASET} \ - -DARROW_DEPENDENCY_SOURCE="VCPKG" \ - -DARROW_DEPENDENCY_USE_SHARED=OFF \ - -DARROW_FLIGHT==${ARROW_FLIGHT} \ - -DARROW_GANDIVA=${ARROW_GANDIVA} \ - -DARROW_HDFS=${ARROW_HDFS} \ - -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ - -DARROW_MIMALLOC=${ARROW_MIMALLOC} \ - -DARROW_ORC=${ARROW_ORC} \ - -DARROW_PACKAGE_KIND="python-wheel-manylinux${MANYLINUX_VERSION}" \ - -DARROW_PARQUET=${ARROW_PARQUET} \ - -DARROW_PLASMA=${ARROW_PLASMA} \ - -DARROW_PYTHON=ON \ - -DARROW_RPATH_ORIGIN=ON \ - -DARROW_S3=${ARROW_S3} \ - -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \ - -DARROW_USE_CCACHE=ON \ - -DARROW_UTF8PROC_USE_SHARED=OFF \ - -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \ - -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \ - -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \ - -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \ - -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \ - -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \ - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ - -DCMAKE_INSTALL_LIBDIR=lib \ - -DCMAKE_INSTALL_PREFIX=/tmp/arrow-dist \ - -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ - -DOPENSSL_USE_STATIC_LIBS=ON \ - -DVCPKG_MANIFEST_MODE=OFF \ - -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ - -G ${CMAKE_GENERATOR} \ - /arrow/cpp -cmake --build . --target install -popd - -# Check that we don't expose any unwanted symbols -check_arrow_visibility - -echo "=== (${PYTHON_VERSION}) Building wheel ===" -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} -export PYARROW_BUNDLE_ARROW_CPP=1 -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} -export PYARROW_INSTALL_TESTS=1 -export PYARROW_WITH_DATASET=${ARROW_DATASET} -export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT} -export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA} -export PYARROW_WITH_HDFS=${ARROW_HDFS} -export PYARROW_WITH_ORC=${ARROW_ORC} -export PYARROW_WITH_PARQUET=${ARROW_PARQUET} -export PYARROW_WITH_PLASMA=${ARROW_PLASMA} -export PYARROW_WITH_S3=${ARROW_S3} -# PyArrow build configuration -export PKG_CONFIG_PATH=/usr/lib/pkgconfig:/tmp/arrow-dist/lib/pkgconfig - -pushd /arrow/python -python setup.py bdist_wheel - -echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux${MANYLINUX_VERSION} ===" -auditwheel repair -L . dist/pyarrow-*.whl -w repaired_wheels -popd diff --git a/ci/scripts/python_wheel_manylinux_test.sh b/ci/scripts/python_wheel_manylinux_test.sh deleted file mode 100755 index 21987748f73f..000000000000 --- a/ci/scripts/python_wheel_manylinux_test.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e -set -x -set -o pipefail - -case $# in - 1) KIND="$1" - case $KIND in - imports|unittests) ;; - *) echo "Invalid argument: '${KIND}', valid options are 'imports', 'unittests'" - exit 1 - ;; - esac - ;; - *) echo "Usage: $0 imports|unittests" - exit 1 - ;; -esac - -export PYARROW_TEST_CYTHON=OFF -export PYARROW_TEST_DATASET=ON -export PYARROW_TEST_GANDIVA=OFF -export PYARROW_TEST_HDFS=ON -export PYARROW_TEST_ORC=ON -export PYARROW_TEST_PANDAS=ON -export PYARROW_TEST_PARQUET=ON -export PYARROW_TEST_PLASMA=ON -export PYARROW_TEST_S3=ON -export PYARROW_TEST_TENSORFLOW=ON -export PYARROW_TEST_FLIGHT=ON - -export ARROW_TEST_DATA=/arrow/testing/data -export PARQUET_TEST_DATA=/arrow/submodules/parquet-testing/data - -# Install the built wheels -pip install /arrow/python/repaired_wheels/*.whl - -if [ "${KIND}" == "imports" ]; then - # Test that the modules are importable - python -c " -import pyarrow -import pyarrow._hdfs -import pyarrow._s3fs -import pyarrow.csv -import pyarrow.dataset -import pyarrow.flight -import pyarrow.fs -import pyarrow.json -import pyarrow.orc -import pyarrow.parquet -import pyarrow.plasma" -elif [ "${KIND}" == "unittests" ]; then - # Execute unittest, test dependencies must be installed - pytest -r s --pyargs pyarrow -fi diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat deleted file mode 100644 index 23be7f512d6e..000000000000 --- a/ci/scripts/python_wheel_windows_build.bat +++ /dev/null @@ -1,109 +0,0 @@ -@rem Licensed to the Apache Software Foundation (ASF) under one -@rem or more contributor license agreements. See the NOTICE file -@rem distributed with this work for additional information -@rem regarding copyright ownership. The ASF licenses this file -@rem to you under the Apache License, Version 2.0 (the -@rem "License"); you may not use this file except in compliance -@rem with the License. You may obtain a copy of the License at -@rem -@rem http://www.apache.org/licenses/LICENSE-2.0 -@rem -@rem Unless required by applicable law or agreed to in writing, -@rem software distributed under the License is distributed on an -@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -@rem KIND, either express or implied. See the License for the -@rem specific language governing permissions and limitations -@rem under the License. - -@echo on - -echo "Building windows wheel..." - -call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat" - -echo "=== (%PYTHON_VERSION%) Clear output directories and leftovers ===" -del /s /q C:\arrow-build -del /s /q C:\arrow-dist -del /s /q C:\arrow\python\dist -del /s /q C:\arrow\python\build -del /s /q C:\arrow\python\pyarrow\*.so -del /s /q C:\arrow\python\pyarrow\*.so.* - -echo "=== (%PYTHON_VERSION%) Building Arrow C++ libraries ===" -set ARROW_DATASET=ON -set ARROW_FLIGHT=ON -set ARROW_GANDIVA=OFF -set ARROW_HDFS=ON -set ARROW_ORC=OFF -set ARROW_PARQUET=ON -set ARROW_MIMALLOC=ON -set ARROW_S3=ON -set ARROW_TENSORFLOW=ON -set ARROW_WITH_BROTLI=ON -set ARROW_WITH_BZ2=ON -set ARROW_WITH_LZ4=ON -set ARROW_WITH_SNAPPY=ON -set ARROW_WITH_ZLIB=ON -set ARROW_WITH_ZSTD=ON -set CMAKE_UNITY_BUILD=ON -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 -set VCPKG_FEATURE_FLAGS=-manifests - -mkdir C:\arrow-build -pushd C:\arrow-build -cmake ^ - -DARROW_BUILD_SHARED=ON ^ - -DARROW_BUILD_STATIC=OFF ^ - -DARROW_BUILD_TESTS=OFF ^ - -DARROW_CXXFLAGS="/MP" ^ - -DARROW_DATASET=%ARROW_DATASET% ^ - -DARROW_DEPENDENCY_SOURCE=VCPKG ^ - -DARROW_DEPENDENCY_USE_SHARED=OFF ^ - -DARROW_FLIGHT=%ARROW_FLIGHT% ^ - -DARROW_GANDIVA=%ARROW_GANDIVA% ^ - -DARROW_HDFS=%ARROW_HDFS% ^ - -DARROW_MIMALLOC=%ARROW_MIMALLOC% ^ - -DARROW_ORC=%ARROW_ORC% ^ - -DARROW_PACKAGE_KIND="python-wheel-windows" ^ - -DARROW_PARQUET=%ARROW_PARQUET% ^ - -DARROW_PYTHON=ON ^ - -DARROW_S3=%ARROW_S3% ^ - -DARROW_TENSORFLOW=%ARROW_TENSORFLOW% ^ - -DARROW_WITH_BROTLI=%ARROW_WITH_BROTLI% ^ - -DARROW_WITH_BZ2=%ARROW_WITH_BZ2% ^ - -DARROW_WITH_LZ4=%ARROW_WITH_LZ4% ^ - -DARROW_WITH_SNAPPY=%ARROW_WITH_SNAPPY% ^ - -DARROW_WITH_ZLIB=%ARROW_WITH_ZLIB% ^ - -DARROW_WITH_ZSTD=%ARROW_WITH_ZSTD% ^ - -DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% ^ - -DCMAKE_CXX_COMPILER=clcache ^ - -DCMAKE_INSTALL_PREFIX=C:\arrow-dist ^ - -DCMAKE_UNITY_BUILD=%CMAKE_UNITY_BUILD% ^ - -DMSVC_LINK_VERBOSE=ON ^ - -DVCPKG_MANIFEST_MODE=OFF ^ - -DVCPKG_TARGET_TRIPLET=x64-windows-static-md-%CMAKE_BUILD_TYPE% ^ - -G "%CMAKE_GENERATOR%" ^ - C:\arrow\cpp || exit /B -cmake --build . --config %CMAKE_BUILD_TYPE% --target install || exit /B -popd - -echo "=== (%PYTHON_VERSION%) Building wheel ===" -set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE% -set PYARROW_BUNDLE_ARROW_CPP=ON -set PYARROW_BUNDLE_BOOST=OFF -set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR% -set PYARROW_INSTALL_TESTS=ON -set PYARROW_WITH_DATASET=%ARROW_DATASET% -set PYARROW_WITH_FLIGHT=%ARROW_FLIGHT% -set PYARROW_WITH_GANDIVA=%ARROW_GANDIVA% -set PYARROW_WITH_HDFS=%ARROW_HDFS% -set PYARROW_WITH_ORC=%ARROW_ORC% -set PYARROW_WITH_PARQUET=%ARROW_PARQUET% -set PYARROW_WITH_S3=%ARROW_S3% -set ARROW_HOME=C:\arrow-dist - -pushd C:\arrow\python -@REM bundle the msvc runtime -cp "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Redist\MSVC\14.16.27012\x64\Microsoft.VC141.CRT\msvcp140.dll" pyarrow\ -python setup.py bdist_wheel || exit /B -popd diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat deleted file mode 100755 index 8352e5862266..000000000000 --- a/ci/scripts/python_wheel_windows_test.bat +++ /dev/null @@ -1,54 +0,0 @@ -@rem Licensed to the Apache Software Foundation (ASF) under one -@rem or more contributor license agreements. See the NOTICE file -@rem distributed with this work for additional information -@rem regarding copyright ownership. The ASF licenses this file -@rem to you under the Apache License, Version 2.0 (the -@rem "License"); you may not use this file except in compliance -@rem with the License. You may obtain a copy of the License at -@rem -@rem http://www.apache.org/licenses/LICENSE-2.0 -@rem -@rem Unless required by applicable law or agreed to in writing, -@rem software distributed under the License is distributed on an -@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -@rem KIND, either express or implied. See the License for the -@rem specific language governing permissions and limitations -@rem under the License. - -@echo on - -set PYARROW_TEST_CYTHON=OFF -set PYARROW_TEST_DATASET=ON -set PYARROW_TEST_GANDIVA=OFF -set PYARROW_TEST_HDFS=ON -set PYARROW_TEST_ORC=OFF -set PYARROW_TEST_PANDAS=ON -set PYARROW_TEST_PARQUET=ON -set PYARROW_TEST_PLASMA=OFF -set PYARROW_TEST_S3=OFF -set PYARROW_TEST_TENSORFLOW=ON -set PYARROW_TEST_FLIGHT=ON - -set ARROW_TEST_DATA=C:\arrow\testing\data -set PARQUET_TEST_DATA=C:\arrow\submodules\parquet-testing\data - -@REM Install the built wheels -python -m pip install numpy -python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B - -@REM Test that the modules are importable -python -c "import pyarrow" -python -c "import pyarrow._hdfs" -python -c "import pyarrow._s3fs" -python -c "import pyarrow.csv" -python -c "import pyarrow.dataset" -python -c "import pyarrow.flight" -python -c "import pyarrow.fs" -python -c "import pyarrow.json" -python -c "import pyarrow.parquet" - -@REM Install testing dependencies -pip install -r C:\arrow\python\requirements-wheel-test.txt - -@REM Execute unittest -pytest -r s --pyargs pyarrow diff --git a/ci/scripts/r_build.sh b/ci/scripts/r_build.sh deleted file mode 100755 index d1907a87c894..000000000000 --- a/ci/scripts/r_build.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -: ${R_BIN:=R} -source_dir=${1}/r -with_docs=${2:-false} - -pushd ${source_dir} - -${R_BIN} CMD INSTALL . - -if [ "${with_docs}" == "true" ]; then - ${R_BIN} -e "pkgdown::build_site(install = FALSE)" -fi - -popd \ No newline at end of file diff --git a/ci/scripts/r_deps.sh b/ci/scripts/r_deps.sh deleted file mode 100755 index 7e9d2eac7a96..000000000000 --- a/ci/scripts/r_deps.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -: ${R_BIN:=R} - -source_dir=${1}/r - -pushd ${source_dir} - -# Install R package dependencies -${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck', 'sys'))" -${R_BIN} -e "remotes::install_deps(dependencies = TRUE)" - -popd diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh deleted file mode 100755 index 3e553fe9edda..000000000000 --- a/ci/scripts/r_docker_configure.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -: ${R_BIN:=R} - -# The Dockerfile should have put this file here -if [ -f "/arrow/ci/etc/rprofile" ]; then - # Ensure parallel R package installation, set CRAN repo mirror, - # and use pre-built binaries where possible - cat /arrow/ci/etc/rprofile >> $(${R_BIN} RHOME)/etc/Rprofile.site -fi - -# Ensure parallel compilation of C/C++ code -echo "MAKEFLAGS=-j$(${R_BIN} -s -e 'cat(parallel::detectCores())')" >> $(${R_BIN} RHOME)/etc/Makeconf - -# Special hacking to try to reproduce quirks on fedora-clang-devel on CRAN -# which uses a bespoke clang compiled to use libc++ -# https://www.stats.ox.ac.uk/pub/bdr/Rconfig/r-devel-linux-x86_64-fedora-clang -if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then - dnf install -y libcxx-devel - sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(${R_BIN} RHOME)/etc/Makeconf - rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak -fi - -# Special hacking to try to reproduce quirks on centos using non-default build -# tooling. -if [[ "$DEVTOOLSET_VERSION" -gt 0 ]]; then - if [ "`which dnf`" ]; then - dnf install -y centos-release-scl - dnf install -y "devtoolset-$DEVTOOLSET_VERSION" - else - yum install -y centos-release-scl - yum install -y "devtoolset-$DEVTOOLSET_VERSION" - fi -fi - -# Install openssl for S3 support -if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then - if [ "`which dnf`" ]; then - dnf install -y libcurl-devel openssl-devel - elif [ "`which yum`" ]; then - yum install -y libcurl-devel openssl-devel - elif [ "`which zypper`" ]; then - zypper install -y libcurl-devel libopenssl-devel - else - apt-get update - apt-get install -y libcurl4-openssl-dev libssl-dev - fi - - # The Dockerfile should have put this file here - if [ -f "/arrow/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then - /arrow/ci/scripts/install_minio.sh amd64 linux latest /usr/local - fi -fi - -# Workaround for html help install failure; see https://github.com/r-lib/devtools/issues/2084#issuecomment-530912786 -Rscript -e 'x <- file.path(R.home("doc"), "html"); if (!file.exists(x)) {dir.create(x, recursive=TRUE); file.copy(system.file("html/R.css", package="stats"), x)}' - -if [ "`which curl`" ]; then - # We need this on R >= 4.0 - curl -L https://sourceforge.net/projects/checkbaskisms/files/2.0.0.2/checkbashisms/download > /usr/local/bin/checkbashisms - chmod 755 /usr/local/bin/checkbashisms -fi diff --git a/ci/scripts/r_pkgdown_check.sh b/ci/scripts/r_pkgdown_check.sh deleted file mode 100755 index 327480a6b348..000000000000 --- a/ci/scripts/r_pkgdown_check.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Use this script to prevent errors in the pkgdown site being rendered due to missing YAML entries - -# all .Rd files in the repo -all_rd_files=`find ./r/man -maxdepth 1 -name "*.Rd" | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort` - -# .Rd files to exclude from search (i.e. are internal) -exclusions=`grep "\keyword{internal}" -rl ./r/man --include=*.Rd | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort` - -# .Rd files to check against pkgdown.yml -rd_files=`echo ${exclusions[@]} ${all_rd_files[@]} | tr ' ' '\n' | sort | uniq -u` - -# pkgdown sections -pkgdown_sections=`awk '/^[^ ]/{ f=/reference:/; next } f{ if (sub(/:$/,"")) pkg=$2; else print pkg, $2 }' ./r/_pkgdown.yml | grep -v "title:" | sort` - -# get things that appear in man files that don't appear in pkgdown sections -pkgdown_missing=`echo ${pkgdown_sections[@]} ${pkgdown_sections[@]} ${rd_files[@]} | tr ' ' '\n' | sort | uniq -u` - -# if any sections are missing raise an error -if ([ ${#pkgdown_missing} -ge 1 ]); then - echo "Error! $pkgdown_missing missing from ./r/_pkgdown.yml" - exit 1 -fi diff --git a/ci/scripts/r_sanitize.sh b/ci/scripts/r_sanitize.sh deleted file mode 100755 index 89963eb2dd86..000000000000 --- a/ci/scripts/r_sanitize.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -: ${R_BIN:=RDsan} - -source_dir=${1}/r - -${R_BIN} CMD INSTALL ${source_dir} -pushd ${source_dir}/tests - -export TEST_R_WITH_ARROW=TRUE -export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp" -${R_BIN} < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; } - -cat testthat.out -if grep -q "runtime error" testthat.out; then - exit 1 -fi -popd diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh deleted file mode 100755 index 1f6a8721efc1..000000000000 --- a/ci/scripts/r_test.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -: ${R_BIN:=R} - -source_dir=${1}/r - -pushd ${source_dir} - -printenv - -if [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then - export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} - export R_LD_LIBRARY_PATH=${LD_LIBRARY_PATH} -fi -export _R_CHECK_COMPILATION_FLAGS_KNOWN_=${ARROW_R_CXXFLAGS} -if [ "$ARROW_R_DEV" = "TRUE" ]; then - # These are used in the Arrow C++ build and are not a problem - export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-attributes -msse4.2" - # Note that NOT_CRAN=true means (among other things) that optional dependencies are built - export NOT_CRAN=true -fi -: ${TEST_R_WITH_ARROW:=TRUE} -export TEST_R_WITH_ARROW=$TEST_R_WITH_ARROW - -export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE -# --run-donttest was used in R < 4.0, this is used now -export _R_CHECK_DONTTEST_EXAMPLES_=$TEST_R_WITH_ARROW -# Not all Suggested packages are needed for checking, so in case they aren't installed don't fail -export _R_CHECK_FORCE_SUGGESTS_=FALSE -export _R_CHECK_LIMIT_CORES_=FALSE -export _R_CHECK_TESTS_NLINES_=0 - -# By default, aws-sdk tries to contact a non-existing local ip host -# to retrieve metadata. Disable this so that S3FileSystem tests run faster. -export AWS_EC2_METADATA_DISABLED=TRUE - -# Hack so that texlive2020 doesn't pollute the home dir -export TEXMFCONFIG=/tmp/texmf-config -export TEXMFVAR=/tmp/texmf-var - -if [[ "$DEVTOOLSET_VERSION" -gt 0 ]]; then - # enable the devtoolset version to use it - source /opt/rh/devtoolset-$DEVTOOLSET_VERSION/enable -fi - -# Make sure we aren't writing to the home dir (CRAN _hates_ this but there is no official check) -BEFORE=$(ls -alh ~/) - -SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true') - run_donttest <- identical(tolower(Sys.getenv('_R_CHECK_DONTTEST_EXAMPLES_', 'true')), 'true') - if (as_cran) { - rcmdcheck::rcmdcheck(args = c('--as-cran', if (run_donttest) '--run-donttest'), error_on = 'warning', check_dir = 'check', timeout = 3600) - } else { - if (nzchar(Sys.which('minio'))) { - message('Running minio for S3 tests (if build supports them)') - minio_dir <- tempfile() - dir.create(minio_dir) - pid <- sys::exec_background('minio', c('server', minio_dir)) - on.exit(tools::pskill(pid)) - } - rcmdcheck::rcmdcheck(build_args = '--no-build-vignettes', args = c('--no-manual', '--ignore-vignettes', if (run_donttest) '--run-donttest'), error_on = 'warning', check_dir = 'check', timeout = 3600) - }" -echo "$SCRIPT" | ${R_BIN} --no-save - -AFTER=$(ls -alh ~/) -if [ "$NOT_CRAN" != "true" ] && [ "$BEFORE" != "$AFTER" ]; then - ls -alh ~/.cmake/packages - exit 1 -fi -popd diff --git a/ci/scripts/r_windows_build.sh b/ci/scripts/r_windows_build.sh deleted file mode 100755 index 9988dfb64946..000000000000 --- a/ci/scripts/r_windows_build.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -: ${ARROW_HOME:=$(pwd)} -# Make sure it is absolute and exported -export ARROW_HOME="$(cd "${ARROW_HOME}" && pwd)" - -if [ "$RTOOLS_VERSION" = "35" ]; then - # Use rtools-backports if building with rtools35 - curl https://raw.githubusercontent.com/r-windows/rtools-backports/master/pacman.conf > /etc/pacman.conf - # Update keys: https://www.msys2.org/news/#2020-06-29-new-packagers - msys2_repo_base_url=https://repo.msys2.org/msys - curl -OSsL "${msys2_repo_base_url}/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz" - pacman -U --noconfirm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz && rm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz - pacman --noconfirm -Scc - pacman --noconfirm -Syy - # lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5) - RWINLIB_LIB_DIR="lib-4.9.3" -else - # Uncomment L38-41 if you're testing a new rtools dependency that hasn't yet sync'd to CRAN - # curl https://raw.githubusercontent.com/r-windows/rtools-packages/master/pacman.conf > /etc/pacman.conf - # curl -OSsl "http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz" - # pacman -U --noconfirm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz && rm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz - # pacman --noconfirm -Scc - - pacman --noconfirm -Syy - RWINLIB_LIB_DIR="lib" -fi - -cp $ARROW_HOME/ci/scripts/PKGBUILD . -printenv -makepkg-mingw --noconfirm --noprogressbar --skippgpcheck --nocheck --syncdeps --cleanbuild - -VERSION=$(grep Version $ARROW_HOME/r/DESCRIPTION | cut -d " " -f 2) -DST_DIR="arrow-$VERSION" - -# Collect the build artifacts and make the shape of zip file that rwinlib expects -ls -mkdir -p build -mv mingw* build -cd build - -# This may vary by system/CI provider -MSYS_LIB_DIR="/c/rtools40" - -ls $MSYS_LIB_DIR/mingw64/lib/ -ls $MSYS_LIB_DIR/mingw32/lib/ - -# Untar the two builds we made -ls *.xz | xargs -n 1 tar -xJf -mkdir -p $DST_DIR -# Grab the headers from one, either one is fine -# (if we're building twice to combine old and new toolchains, this may already exist) -if [ ! -d $DST_DIR/include ]; then - mv mingw64/include $DST_DIR -fi - -# Make the rest of the directory structure -# lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5) -mkdir -p $DST_DIR/${RWINLIB_LIB_DIR}/x64 -mkdir -p $DST_DIR/${RWINLIB_LIB_DIR}/i386 -# lib is for the new gcc 8 toolchain (Rtools 4.0) -mkdir -p $DST_DIR/lib/x64 -mkdir -p $DST_DIR/lib/i386 - -# Move the 64-bit versions of libarrow into the expected location -mv mingw64/lib/*.a $DST_DIR/${RWINLIB_LIB_DIR}/x64 -# Same for the 32-bit versions -mv mingw32/lib/*.a $DST_DIR/${RWINLIB_LIB_DIR}/i386 - -# These may be from https://dl.bintray.com/rtools/backports/ -cp $MSYS_LIB_DIR/mingw64/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/x64 -cp $MSYS_LIB_DIR/mingw32/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/i386 - -# These are from https://dl.bintray.com/rtools/mingw{32,64}/ -cp $MSYS_LIB_DIR/mingw64/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64 -cp $MSYS_LIB_DIR/mingw32/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/i386 - -# Create build artifact -zip -r ${DST_DIR}.zip $DST_DIR - -# Copy that to a file name/path that does not vary by version number so we -# can easily find it in the R package tests on CI -cp ${DST_DIR}.zip ../libarrow.zip diff --git a/ci/scripts/release_test.sh b/ci/scripts/release_test.sh deleted file mode 100755 index ae2ab328884b..000000000000 --- a/ci/scripts/release_test.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -eux - -arrow_dir=${1} - -pushd ${arrow_dir} - -dev/release/run-test.rb - -popd diff --git a/ci/scripts/ruby_test.sh b/ci/scripts/ruby_test.sh deleted file mode 100755 index 03d20e19831f..000000000000 --- a/ci/scripts/ruby_test.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1}/ruby -build_dir=${2}/ruby - -export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} -export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig -export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0 - -rake -f ${source_dir}/Rakefile BUILD_DIR=${build_dir} USE_BUNDLER=yes diff --git a/ci/scripts/rust_build.sh b/ci/scripts/rust_build.sh deleted file mode 100755 index 8099d3024579..000000000000 --- a/ci/scripts/rust_build.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -source_dir=${1}/rust - -# This file is used to build the rust binaries needed for the -# archery integration tests. Testing of the rust implementation -# in normal CI is handled by github workflows - -# Disable full debug symbol generation to speed up CI build / reduce memory required -export RUSTFLAGS="-C debuginfo=1" - -export ARROW_TEST_DATA=${arrow_dir}/testing/data -export PARQUET_TEST_DATA=${arrow_dir}/parquet-testing/data - -# show activated toolchain -rustup show - -pushd ${source_dir} - -# build only the integration testing binaries -cargo build -p arrow-integration-testing - -# Remove incremental build artifacts to save space -rm -rf target/debug/deps/ target/debug/build/ - -popd diff --git a/ci/scripts/util_checkout.sh b/ci/scripts/util_checkout.sh deleted file mode 100755 index 25fe69aa13f0..000000000000 --- a/ci/scripts/util_checkout.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# this script is github actions specific to check out the submodules and tags - -# TODO(kszucs): remove it once the "submodules: recursive" feature is released -auth_header="$(git config --local --get http.https://github.com/.extraheader)" -git submodule sync --recursive -git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1 - -# fetch all the tags -git fetch --depth=1 origin +refs/tags/*:refs/tags/* diff --git a/ci/scripts/util_cleanup.sh b/ci/scripts/util_cleanup.sh deleted file mode 100755 index 3a13a1a784af..000000000000 --- a/ci/scripts/util_cleanup.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# This script is Github Actions-specific to free up disk space, -# to avoid disk full errors on some builds - -if [ $RUNNER_OS = "Linux" ]; then - df -h - - # remove swap - sudo swapoff -a - sudo rm -f /swapfile - - # clean apt cache - sudo apt clean - - # remove haskell, consumes 8.6 GB - sudo rm -rf /opt/ghc - - # 1 GB - sudo rm -rf /home/linuxbrew/.linuxbrew - - # 1+ GB - sudo rm -rf /opt/hostedtoolcache/CodeQL - - # 1+ GB - sudo rm -rf /usr/share/swift - - # 12 GB, but takes a lot of time to delete - #sudo rm -rf /usr/local/lib/android - - # remove cached docker images, around 13 GB - docker rmi $(docker image ls -aq) - - # NOTE: /usr/share/dotnet is 25 GB -fi - -df -h diff --git a/ci/scripts/util_download_apache.sh b/ci/scripts/util_download_apache.sh deleted file mode 100755 index 5d37f9c0333e..000000000000 --- a/ci/scripts/util_download_apache.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash -x -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -if [ "$#" -ne 2 ]; then - echo "Usage: $0 " - exit 1 -fi - -tarball_path=$1 -target_dir=$2 - -APACHE_MIRRORS=( - "http://www.apache.org/dyn/closer.cgi?action=download&filename=" - "https://downloads.apache.org" - "https://apache.claz.org" - "https://apache.cs.utah.edu" - "https://apache.mirrors.lucidnetworks.net" - "https://apache.osuosl.org" - "https://ftp.wayne.edu/apache" - "https://mirror.olnevhost.net/pub/apache" - "https://mirrors.gigenet.com/apache" - "https://mirrors.koehn.com/apache" - "https://mirrors.ocf.berkeley.edu/apache" - "https://mirrors.sonic.net/apache" - "https://us.mirrors.quenda.co/apache" -) - -mkdir -p "${target_dir}" - -for mirror in ${APACHE_MIRRORS[*]} -do - curl -SL "${mirror}/${tarball_path}" | tar -xzf - -C "${target_dir}" - if [ $? == 0 ]; then - exit 0 - fi -done - -exit 1 diff --git a/ci/scripts/util_wait_for_it.sh b/ci/scripts/util_wait_for_it.sh deleted file mode 100755 index 51ce816eb8c2..000000000000 --- a/ci/scripts/util_wait_for_it.sh +++ /dev/null @@ -1,199 +0,0 @@ -#!/usr/bin/env bash - -# The MIT License (MIT) -# Copyright (c) 2016 Giles Hall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of -# this software and associated documentation files (the "Software"), to deal in -# the Software without restriction, including without limitation the rights to -# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -# of the Software, and to permit persons to whom the Software is furnished to do -# so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# -# Use this script to test if a given TCP host/port are available - -cmdname=$(basename $0) - -echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi } - -usage() -{ - cat << USAGE >&2 -Usage: - $cmdname host:port [-s] [-t timeout] [-- command args] - -h HOST | --host=HOST Host or IP under test - -p PORT | --port=PORT TCP port under test - Alternatively, you specify the host and port as host:port - -s | --strict Only execute subcommand if the test succeeds - -q | --quiet Don't output any status messages - -t TIMEOUT | --timeout=TIMEOUT - Timeout in seconds, zero for no timeout - -- COMMAND ARGS Execute command with args after the test finishes -USAGE - exit 1 -} - -wait_for() -{ - if [[ $TIMEOUT -gt 0 ]]; then - echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT" - else - echoerr "$cmdname: waiting for $HOST:$PORT without a timeout" - fi - start_ts=$(date +%s) - while : - do - if [[ $ISBUSY -eq 1 ]]; then - nc -z $HOST $PORT - result=$? - else - (echo > /dev/tcp/$HOST/$PORT) >/dev/null 2>&1 - result=$? - fi - if [[ $result -eq 0 ]]; then - end_ts=$(date +%s) - echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds" - break - fi - sleep 1 - done - return $result -} - -wait_for_wrapper() -{ - # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692 - if [[ $QUIET -eq 1 ]]; then - timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT & - else - timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT & - fi - PID=$! - trap "kill -INT -$PID" INT - wait $PID - RESULT=$? - if [[ $RESULT -ne 0 ]]; then - echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT" - fi - return $RESULT -} - -# process arguments -while [[ $# -gt 0 ]] -do - case "$1" in - *:* ) - hostport=(${1//:/ }) - HOST=${hostport[0]} - PORT=${hostport[1]} - shift 1 - ;; - --child) - CHILD=1 - shift 1 - ;; - -q | --quiet) - QUIET=1 - shift 1 - ;; - -s | --strict) - STRICT=1 - shift 1 - ;; - -h) - HOST="$2" - if [[ $HOST == "" ]]; then break; fi - shift 2 - ;; - --host=*) - HOST="${1#*=}" - shift 1 - ;; - -p) - PORT="$2" - if [[ $PORT == "" ]]; then break; fi - shift 2 - ;; - --port=*) - PORT="${1#*=}" - shift 1 - ;; - -t) - TIMEOUT="$2" - if [[ $TIMEOUT == "" ]]; then break; fi - shift 2 - ;; - --timeout=*) - TIMEOUT="${1#*=}" - shift 1 - ;; - --) - shift - CLI=("$@") - break - ;; - --help) - usage - ;; - *) - echoerr "Unknown argument: $1" - usage - ;; - esac -done - -if [[ "$HOST" == "" || "$PORT" == "" ]]; then - echoerr "Error: you need to provide a host and port to test." - usage -fi - -TIMEOUT=${TIMEOUT:-15} -STRICT=${STRICT:-0} -CHILD=${CHILD:-0} -QUIET=${QUIET:-0} - -# check to see if timeout is from busybox? -# check to see if timeout is from busybox? -TIMEOUT_PATH=$(realpath $(which timeout)) -if [[ $TIMEOUT_PATH =~ "busybox" ]]; then - ISBUSY=1 - BUSYTIMEFLAG="-t" -else - ISBUSY=0 - BUSYTIMEFLAG="" -fi - -if [[ $CHILD -gt 0 ]]; then - wait_for - RESULT=$? - exit $RESULT -else - if [[ $TIMEOUT -gt 0 ]]; then - wait_for_wrapper - RESULT=$? - else - wait_for - RESULT=$? - fi -fi - -if [[ $CLI != "" ]]; then - if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then - echoerr "$cmdname: strict mode, refusing to execute subprocess" - exit $RESULT - fi - exec "${CLI[@]}" -else - exit $RESULT -fi diff --git a/ci/vcpkg/arm64-linux-static-debug.cmake b/ci/vcpkg/arm64-linux-static-debug.cmake deleted file mode 100644 index 6fea43694cd1..000000000000 --- a/ci/vcpkg/arm64-linux-static-debug.cmake +++ /dev/null @@ -1,28 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set(VCPKG_TARGET_ARCHITECTURE arm64) -set(VCPKG_CRT_LINKAGE dynamic) -set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_CMAKE_SYSTEM_NAME Linux) -set(VCPKG_BUILD_TYPE debug) - -if(NOT CMAKE_HOST_SYSTEM_PROCESSOR) - execute_process(COMMAND "uname" "-m" - OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR - OUTPUT_STRIP_TRAILING_WHITESPACE) -endif() diff --git a/ci/vcpkg/arm64-linux-static-release.cmake b/ci/vcpkg/arm64-linux-static-release.cmake deleted file mode 100644 index 4012848b8496..000000000000 --- a/ci/vcpkg/arm64-linux-static-release.cmake +++ /dev/null @@ -1,28 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set(VCPKG_TARGET_ARCHITECTURE arm64) -set(VCPKG_CRT_LINKAGE dynamic) -set(VCPKG_LIBRARY_LINKAGE static) -set(VCPKG_CMAKE_SYSTEM_NAME Linux) -set(VCPKG_BUILD_TYPE release) - -if(NOT CMAKE_HOST_SYSTEM_PROCESSOR) - execute_process(COMMAND "uname" "-m" - OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR - OUTPUT_STRIP_TRAILING_WHITESPACE) -endif() diff --git a/ci/vcpkg/ports.patch b/ci/vcpkg/ports.patch deleted file mode 100644 index 14b9678690e0..000000000000 --- a/ci/vcpkg/ports.patch +++ /dev/null @@ -1,63 +0,0 @@ -diff --git a/ports/aws-c-common/portfile.cmake b/ports/aws-c-common/portfile.cmake -index f3704ef05..3af543058 100644 ---- a/ports/aws-c-common/portfile.cmake -+++ b/ports/aws-c-common/portfile.cmake -@@ -1,8 +1,8 @@ - vcpkg_from_github( - OUT_SOURCE_PATH SOURCE_PATH - REPO awslabs/aws-c-common -- REF 4a21a1c0757083a16497fea27886f5f20ccdf334 # v0.4.56 -- SHA512 68898a8ac15d5490f45676eabfbe0df9e45370a74c543a28909fd0d85fed48dfcf4bcd6ea2d01d1a036dd352e2e4e0b08c48c63ab2a2b477fe150b46a827136e -+ REF 13adef72b7813ec878817c6d50a7a3f241015d8a # v0.4.57 -+ SHA512 28256522ac6af544d7464e3e7dcd4dc802ae2b09728bf8f167f86a6487bb756d0cad5eb4a2480610b2967b9c24c4a7f70621894517aa2828ffdeb0479453803b - HEAD_REF master - PATCHES - disable-error-4068.patch # This patch fixes dependency port compilation failure -diff --git a/ports/curl/portfile.cmake b/ports/curl/portfile.cmake -index 6e18aecd0..2ccecf33c 100644 ---- a/ports/curl/portfile.cmake -+++ b/ports/curl/portfile.cmake -@@ -76,6 +76,8 @@ vcpkg_configure_cmake( - -DCMAKE_DISABLE_FIND_PACKAGE_Perl=ON - -DENABLE_DEBUG=ON - -DCURL_CA_FALLBACK=ON -+ -DCURL_CA_PATH=none -+ -DCURL_CA_BUNDLE=none - ) - - vcpkg_install_cmake() -diff --git a/ports/snappy/portfile.cmake b/ports/snappy/portfile.cmake -index 75dd13302..84345c7ca 100644 ---- a/ports/snappy/portfile.cmake -+++ b/ports/snappy/portfile.cmake -@@ -4,6 +4,7 @@ vcpkg_from_github( - REF 537f4ad6240e586970fe554614542e9717df7902 # 1.1.8 - SHA512 555d3b69a6759592736cbaae8f41654f0cf14e8be693b5dde37640191e53daec189f895872557b173e905d10681ef502f3e6ed8566811add963ffef96ce4016d - HEAD_REF master -+ PATCHES "snappy-disable-bmi.patch" - ) - - vcpkg_configure_cmake( -diff --git a/ports/snappy/snappy-disable-bmi.patch b/ports/snappy/snappy-disable-bmi.patch -new file mode 100644 -index 000000000..2cbb1533a ---- /dev/null -+++ b/ports/snappy/snappy-disable-bmi.patch -@@ -0,0 +1,17 @@ -+--- snappy.cc 2020-06-27 17:38:49.718993748 -0500 -++++ snappy.cc 2020-06-27 17:37:57.543268213 -0500 -+@@ -717,14 +717,10 @@ -+ static inline uint32 ExtractLowBytes(uint32 v, int n) { -+ assert(n >= 0); -+ assert(n <= 4); -+-#if SNAPPY_HAVE_BMI2 -+- return _bzhi_u32(v, 8 * n); -+-#else -+ // This needs to be wider than uint32 otherwise `mask << 32` will be -+ // undefined. -+ uint64 mask = 0xffffffff; -+ return v & ~(mask << (8 * n)); -+-#endif -+ } -+ -+ static inline bool LeftShiftOverflows(uint8 value, uint32 shift) { diff --git a/ci/vcpkg/x64-linux-static-debug.cmake b/ci/vcpkg/x64-linux-static-debug.cmake deleted file mode 100644 index 3acee2ee469e..000000000000 --- a/ci/vcpkg/x64-linux-static-debug.cmake +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set(VCPKG_TARGET_ARCHITECTURE x64) -set(VCPKG_CRT_LINKAGE dynamic) -set(VCPKG_LIBRARY_LINKAGE static) - -set(VCPKG_CMAKE_SYSTEM_NAME Linux) - -set(VCPKG_BUILD_TYPE debug) diff --git a/ci/vcpkg/x64-linux-static-release.cmake b/ci/vcpkg/x64-linux-static-release.cmake deleted file mode 100644 index c2caa49fa115..000000000000 --- a/ci/vcpkg/x64-linux-static-release.cmake +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set(VCPKG_TARGET_ARCHITECTURE x64) -set(VCPKG_CRT_LINKAGE dynamic) -set(VCPKG_LIBRARY_LINKAGE static) - -set(VCPKG_CMAKE_SYSTEM_NAME Linux) - -set(VCPKG_BUILD_TYPE release) diff --git a/ci/vcpkg/x64-osx-static-debug.cmake b/ci/vcpkg/x64-osx-static-debug.cmake deleted file mode 100644 index e8a321ec71af..000000000000 --- a/ci/vcpkg/x64-osx-static-debug.cmake +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set(VCPKG_TARGET_ARCHITECTURE x64) -set(VCPKG_CRT_LINKAGE dynamic) -set(VCPKG_LIBRARY_LINKAGE static) - -set(VCPKG_CMAKE_SYSTEM_NAME Darwin) -set(VCPKG_OSX_ARCHITECTURES x86_64) - -set(VCPKG_BUILD_TYPE debug) diff --git a/ci/vcpkg/x64-osx-static-release.cmake b/ci/vcpkg/x64-osx-static-release.cmake deleted file mode 100644 index 956d5b92e731..000000000000 --- a/ci/vcpkg/x64-osx-static-release.cmake +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set(VCPKG_TARGET_ARCHITECTURE x64) -set(VCPKG_CRT_LINKAGE dynamic) -set(VCPKG_LIBRARY_LINKAGE static) - -set(VCPKG_CMAKE_SYSTEM_NAME Darwin) -set(VCPKG_OSX_ARCHITECTURES x86_64) - -set(VCPKG_BUILD_TYPE release) diff --git a/ci/vcpkg/x64-windows-static-md-debug.cmake b/ci/vcpkg/x64-windows-static-md-debug.cmake deleted file mode 100644 index 3eae3cfdaa85..000000000000 --- a/ci/vcpkg/x64-windows-static-md-debug.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set(VCPKG_TARGET_ARCHITECTURE x64) -set(VCPKG_CRT_LINKAGE dynamic) -set(VCPKG_LIBRARY_LINKAGE static) - -set(VCPKG_BUILD_TYPE debug) diff --git a/ci/vcpkg/x64-windows-static-md-release.cmake b/ci/vcpkg/x64-windows-static-md-release.cmake deleted file mode 100644 index b8dfbc8848df..000000000000 --- a/ci/vcpkg/x64-windows-static-md-release.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set(VCPKG_TARGET_ARCHITECTURE x64) -set(VCPKG_CRT_LINKAGE dynamic) -set(VCPKG_LIBRARY_LINKAGE static) - -set(VCPKG_BUILD_TYPE release) diff --git a/dev/README.md b/dev/README.md index 258792b805a0..b4ea02bc73ff 100644 --- a/dev/README.md +++ b/dev/README.md @@ -22,101 +22,6 @@ This directory contains scripts useful to developers when packaging, testing, or committing to Arrow. -Merging a pull request requires being a committer on the project. In addition -you need to have linked your GitHub and ASF accounts on -https://gitbox.apache.org/setup/ to be able to push to GitHub as the main -remote. - -NOTE: It may take some time (a few hours) between when you complete -the setup at GitBox, and when your GitHub account will be added as a -committer. - -## How to merge a Pull request - -Please don't merge PRs using the Github Web interface. Instead, set up -your git clone such as to have a remote named ``apache`` pointing to the -official Arrow repository: -``` -git remote add apache git@github.com:apache/arrow.git -``` - -and then run the following command: -``` -./dev/merge_arrow_pr.sh -``` - -This creates a new Python virtual environment under `dev/.venv[PY_VERSION]` -and installs all the necessary dependencies to run the Arrow merge script. -After installed, it runs the merge script. - -(we don't provide a wrapper script for Windows yet, so under Windows you'll -have to install Python dependencies yourself and then run `dev/merge_arrow_pr.py` -directly) - -The merge script uses the GitHub REST API; if you encounter rate limit issues, -you may set a `ARROW_GITHUB_API_TOKEN` environment variable to use a Personal -Access Token. - -You can specify the username and the password of your JIRA account in -`APACHE_JIRA_USERNAME` and `APACHE_JIRA_PASSWORD` environment variables. -If these aren't supplied, the script will ask you the values of them. - -Note that the directory name of your Arrow git clone must be called `arrow`. - -example output: -``` -Which pull request would you like to merge? (e.g. 34): -``` -Type the pull request number (from https://github.com/apache/arrow/pulls) and hit enter. -``` -=== Pull Request #X === -title Blah Blah Blah -source repo/branch -target master -url https://api.github.com/repos/apache/arrow/pulls/X - -Proceed with merging pull request #3? (y/n): -``` -If this looks good, type y and hit enter. -``` -From git-wip-us.apache.org:/repos/asf/arrow.git - * [new branch] master -> PR_TOOL_MERGE_PR_3_MASTER -Switched to branch 'PR_TOOL_MERGE_PR_3_MASTER' - -Merge complete (local ref PR_TOOL_MERGE_PR_3_MASTER). Push to apache? (y/n): -``` -A local branch with the merge has been created. -type y and hit enter to push it to apache master -``` -Counting objects: 67, done. -Delta compression using up to 4 threads. -Compressing objects: 100% (26/26), done. -Writing objects: 100% (36/36), 5.32 KiB, done. -Total 36 (delta 17), reused 0 (delta 0) -To git-wip-us.apache.org:/repos/arrow-mr.git - b767ac4..485658a PR_TOOL_MERGE_PR_X_MASTER -> master -Restoring head pointer to b767ac4e -Note: checking out 'b767ac4e'. - -You are in 'detached HEAD' state. You can look around, make experimental -changes and commit them, and you can discard any commits you make in this -state without impacting any branches by performing another checkout. - -If you want to create a new branch to retain commits you create, you may -do so (now or later) by using -b with the checkout command again. Example: - - git checkout -b new_branch_name - -HEAD is now at b767ac4... Update README.md -Deleting local branch PR_TOOL_MERGE_PR_X -Deleting local branch PR_TOOL_MERGE_PR_X_MASTER -Pull request #X merged! -Merge hash: 485658a5 - -Would you like to pick 485658a5 into another branch? (y/n): -``` -For now just say n as we have 1 branch - ## Verifying Release Candidates We have provided a script to assist with verifying release candidates: @@ -150,40 +55,3 @@ Build the following base image used by multiple tests: ```shell docker build -t arrow_integration_xenial_base -f docker_common/Dockerfile.xenial.base . ``` - -## HDFS C++ / Python support - -```shell -docker-compose build conda-cpp -docker-compose build conda-python -docker-compose build conda-python-hdfs -docker-compose run --rm conda-python-hdfs -``` - -## Apache Spark Integration Tests - -Tests can be run to ensure that the current snapshot of Java and Python Arrow -works with Spark. This will run a docker image to build Arrow C++ -and Python in a Conda environment, build and install Arrow Java to the local -Maven repository, build Spark with the new Arrow artifact, and run Arrow -related unit tests in Spark for Java and Python. Any errors will exit with a -non-zero value. To run, use the following command: - -```shell -docker-compose build conda-cpp -docker-compose build conda-python -docker-compose build conda-python-spark -docker-compose run --rm conda-python-spark -``` - -If you already are building Spark, these commands will map your local Maven -repo to the image and save time by not having to download all dependencies. -Be aware, that docker write files as root, which can cause problems for maven -on the host. - -```shell -docker-compose run --rm -v $HOME/.m2:/root/.m2 conda-python-spark -``` - -NOTE: If the Java API has breaking changes, a patched version of Spark might -need to be used to successfully build. diff --git a/dev/benchmarking/.env b/dev/benchmarking/.env deleted file mode 100644 index 7485f5866d7a..000000000000 --- a/dev/benchmarking/.env +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -PG_USER=benchmark -PG_PASS=benchmark diff --git a/dev/benchmarking/.gitignore b/dev/benchmarking/.gitignore deleted file mode 100644 index cda00d658189..000000000000 --- a/dev/benchmarking/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/machine.json diff --git a/dev/benchmarking/Dockerfile b/dev/benchmarking/Dockerfile deleted file mode 100644 index f470333979ca..000000000000 --- a/dev/benchmarking/Dockerfile +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -FROM postgres:11-alpine - -# Any `.sh` and `.sql` files copied to the entrypoint directory -# will be run during startup. See `docker-entrypoint.sh` in -# https://github.com/docker-library/postgres/blob/master/11/alpine/ -COPY ddl/* /docker-entrypoint-initdb.d/ diff --git a/dev/benchmarking/README.md b/dev/benchmarking/README.md deleted file mode 100644 index c5ddd62e026f..000000000000 --- a/dev/benchmarking/README.md +++ /dev/null @@ -1,255 +0,0 @@ - - - -> NOTE: For those deploying this database, Postgres does not by default use -> UTF-8, however it is [required for the jsonb][pg-jsonb] format used in -> some columns to always work. This [stackoverflow post][so-utf8] describes -> how to do it for Amazon RDS. This [section of the docs][pg-charset] -> states how to do it in general, i.e.: `initdb -E UTF8`. - -# Benchmark database - -This directory contains files related to the benchmark database. - -- 'ddl/\*.sql' contains the database definition. -- 'examples/' contain code to test the database and demonstrate its use. -- 'Dockerfile' and 'docker-compose.yml' are for developing benchmarks - against a testing database. -- An auto-generated summary of views in the [Data model][./data_model.rst]. - -## Setup - -To create a 'machine.json' file that will uniquely identify a computer for -benchmark submission, run the provided shell script and fill in the prompts -to identify the GPU. - -> NOTE: this does not work on VMs or Windows. - -```shell -./make_machine_json.sh -``` - -Submit the machine details via http using the command - -> NOTE: This will only work if we have selected graphql as a client -> and have it running in production or if during development -> you have run `docker-compose up` to create and run both a -> database Docker container and graphql client Docker container. - -```shell -./graphql_submit.sh machine machine.json localhost:5000/graphql -``` - -or submit after starting up the psql client from this directory, using - -``` -\set content `cat machine.json` -SELECT ingest_machine_view(:'content'::jsonb); -``` - -> NOTE: If you don't have a "machine.json" file generated, -> use the example file "examples/machine.json" instead. - -## Local testing - -There is a file named "[.env][.env]" in this directory that is used by -`docker-compose` to set up the postgres user and password for the -local containers. Currently the name and password are both -`benchmark`. This will be the password for the psql client as well. - -The Postgres Alpine image runs any added '\*.sql' and '\*.sh' scripts placed -in '/docker-entrypoint-initdb.d/' during its startup script, so the local -database will be set up automatically once the container is running. - -To start the containers, be sure to have [Docker installed][docker], -and then run the following from this directory (arrow/dev/benchmarking). - - -``` -docker-compose up -``` - -This will start a process that will show logs from both the running -Postgres container and the running GraphQL container. -To stop the running containers gracefully, background the process -and run - -``` -docker-compose down -fg # To re-foreground the backgrounded process while it exits -``` - -You will still have the container images "benchmarking_pg", -"graphile/postgraphile", and "postgres:11-alpine" on your -computer. You should keep them if you want to run this again. -If you don't, then remove them with the command: - -``` -docker rmi benchmarking_pg postgres:11-alpine graphile/postgraphile -``` - -### Postgres client - -The `psql` shell client is bundled with the PostgreSQL core distribution -available from the [Postgres download page][postgres-downloads]. -Using the `PG_USER` defined in the `.env` file (currently "benchmark"), -the command to connect to the container is: -```shell -psql -h localhost -p 5432 -U benchmark -``` -There is an example script in [examples/example.sql](examples/example.sql) that -runs some queries against the database. To run it in the psql client, type -the following in the psql command-line interface: - -``` -\i examples/example.sql -``` - -#### Bulk ingestion using CSV - -An example CSV file for bulk ingestion is in -[examples/benchmark_run_example.csv](examples/benchmark_run_example.csv). -The columns are listed in the same order as they are defined, to avoid having -to explicitly name every column in ingestion. The "id" column is left empty -and will be automatically assigned on insert. - -To ingest the example CSV file from the command line, -use the command below: - -```shell -CSV='examples/benchmark_run_example.csv' && \ -psql -U benchmark -h localhost -p 5432 \ - -c "\copy benchmark_run_view FROM '${CSV}' WITH (FORMAT csv, HEADER);" -``` - -#### Bulk ingestion using JSON - -To ingest the example JSON file using the psql client, use the command below. - -``` -\set content `cat examples/benchmark_example.json` -SELECT ingest_benchmark_view(:'content'::jsonb); -``` - -### HTTP client - -This section requires an actual HTTP client to be up, either -for the production database or via the testing setup. -(See the [local testing section](#local-testing) for how to set it up). - -The 'graphile/postgraphile' container provides an HTTP interface -to the database via two url routes: - -- A GraphiQL page ([localhost:5000/graphiql][graphiql]) - to aid visual exploration of the data model. - (The `--watch` flag on the command line. Not recommended for production.) -- An endpoint that receives POST requests only (localhost:5000/graphql). - -#### Ingestion - -The script [graphql_submit.sh](./graphql_submit.sh) simplifies submission -to the database via curl. Examples: - -```shell -./graphql_submit.sh benchmarks examples/benchmark_example.json -./graphql_submit.sh runs examples/benchmark_run_example.json -``` - -#### Querying - -The output of the query is a JSON object that is hard to read on the command line. -Here is an example query in the shell: -```shell -curl -X POST \ - -H "Content-Type: application/json" \ - --data '{"query": "{projectDetails{ projectName }}"}' \ - localhost:5000/graphql -``` - -which (if you have previously run the "examples.sql" command) yields - -``` -{"data":{"projectDetails":{"projectName":"Apache Arrow"}}} -``` - -Here is an example query using Python: -```python -import json -import requests - -uri = "http://localhost:5000/graphql" -query = json.load(open("examples/graphql_query_environment_view.json")) -response = requests.post(uri, json=query) -message = "{benchmarkLanguage}: {languageImplementationVersion}, {dependencies}" - -for row in response.json()['data']['allEnvironmentViews']['edges']: - print(message.format(**row['node'])) - -# result: -# -# Python: CPython 2.7, {"six":"","numpy":"1.14","other_lib":"1.0"} -# Python: CPython 2.7, {"six":"","numpy":"1.15","other_lib":"1.0"} -# Python: CPython 3.6, {"boost":"1.42","numpy":"1.15"} -``` - -## Deployment - -(work in progress). - -> NOTE: For those deploying this database, Postgres does not by default use -> UTF-8, however it is [required for the jsonb][pg-jsonb] format used in -> some columns to always work. This [stackoverflow post][so-utf8] describes -> how to do it for Amazon RDS. This [section of the docs][pg-charset] -> states how to do it in general, i.e.: `initdb -E UTF8`. - - -## Quick reference - -- String variables `'have single quotes'` -- Arrays `'{"have", "curly", "braces"}'::text[]` or `'{1, 2, 3}'::integer[]` -- JSONb `'{"has":"this", "format":42}'::jsonb` -- Elements inserted using JSON-formatted strings can use standard - JSON-formatted arrays (`[1, 2, 3]`) and do not have to use the above - string formats. -- When comparing nullable values use `x IS NOT DISTINCT FROM y` rather than `x = y` -- An auto-generated summary of the [Data model][./data_model.rst]. - -## Data model documentation - -To recreate the data model documentation, -(1) install the [psql client][postgres-downloads] -(sorry you need to download the whole thing), -(2) start the docker container using `docker-compose up`, -(3) and then run these scripts: - -``` -./make_dotfile.sh -./make_data_model_rst.sh -``` - -[pg-jsonb]: https://www.postgresql.org/docs/11/datatype-json.html#id-1.5.7.22.3 -[so-utf8]: https://stackoverflow.com/a/33557023 -[pg-charset]: https://www.postgresql.org/docs/9.3/multibyte.html#AEN34424 -[docker]: https://www.docker.com/get-started -[citext-limitations]: https://www.postgresql.org/docs/11/citext.html#id-1.11.7.17.7 -[postgres-downloads]: https://www.postgresql.org/download/ -[graphiql]: http://localhost:5000/graphiql -[postgraphile-lambda]: https://github.com/graphile/postgraphile-lambda-example -[postgraphile-cli]: https://www.graphile.org/postgraphile/usage-cli/ diff --git a/dev/benchmarking/data_model.dot b/dev/benchmarking/data_model.dot deleted file mode 100644 index d311acd4e5f1..000000000000 --- a/dev/benchmarking/data_model.dot +++ /dev/null @@ -1,219 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements.See the NOTICE file - distributed with this work for additional information - regarding copyright ownership.The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License.You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied.See the License for the - specific language governing permissions and limitations - under the License. -*/ - -/* - WARNING - This is an auto-generated file. Please do not edit. - - To reproduce, please run :code:`./make_data_model_rst.sh`. - (This requires you have the - `psql client `_ - and have started the docker containers using - :code:`docker-compose up`). -*/ -digraph database { - concentrate = true; - rankdir = LR; - ratio = ".75"; - node [shape = none, fontsize="11", fontname="Helvetica"]; - edge [fontsize="8", fontname="Helvetica"]; -legend -[fontsize = "14" -label = -< - - - - - - -
Legend
pk = primary key
fk = foreign key
u = unique*
o = optional
* multiple uniques in the same table are a unique group
> -]; -benchmark -[label = - < - - - - - - - - -
benchmark
benchmark_id (pk)
benchmark_language_id (pk)
benchmark_name (u)
parameter_names (o)
benchmark_description
benchmark_version (u)
unit_id (fk)
> -]; -benchmark_language -[label = - < - - - -
benchmark_language
benchmark_language_id (pk)
benchmark_language (u)
> -]; -benchmark_run -[label = - < - - - - - - - - - - - - - - - - - - - - -
benchmark_run
benchmark_run_id (pk)
parameter_values (u)
value
git_commit_timestamp (u)
git_hash
val_min (o)
val_q1 (o)
val_q3 (o)
val_max (o)
std_dev
n_obs
run_timestamp (u)
run_metadata (o)
run_notes (o)
machine_id (u) (fk)
environment_id (u) (fk)
language_implementation_version_id (fk)
benchmark_language_id (fk)
benchmark_id (u) (fk)
> -]; -benchmark_type -[label = - < - - - - -
benchmark_type
benchmark_type_id (pk)
benchmark_type (u)
lessisbetter
> -]; -cpu -[label = - < - - - - - - - - - - - -
cpu
cpu_id (pk)
cpu_model_name (u)
cpu_core_count
cpu_thread_count
cpu_frequency_max_hz
cpu_frequency_min_hz
cpu_l1d_cache_bytes
cpu_l1i_cache_bytes
cpu_l2_cache_bytes
cpu_l3_cache_bytes
> -]; -dependencies -[label = - < - - - -
dependencies
dependencies_id (pk)
dependencies (u)
> -]; -gpu -[label = - < - - - - - -
gpu
gpu_id (pk)
gpu_information (u)
gpu_part_number
gpu_product_name
> -]; -language_implementation_version -[label = - < - - - - -
language_implementation_version
language_implementation_version_id (pk)
benchmark_language_id (pk)
language_implementation_version (u)
> -]; -machine -[label = - < - - - - - - - - - - -
machine
machine_id (pk)
machine_name
mac_address (u)
memory_bytes
cpu_actual_frequency_hz
machine_other_attributes (o)
cpu_id (fk)
gpu_id (fk)
os_id (fk)
> -]; -os -[label = - < - - - - - -
os
os_id (pk)
os_name (u)
architecture_name (u)
kernel_name (u)
> -]; -project -[label = - < - - - - - - -
project
project_id (pk)
project_name (u)
project_url (u)
repo_url (u)
last_changed
> -]; -unit -[label = - < - - - - -
unit
unit_id (pk)
units (u)
benchmark_type_id (fk)
> -]; -environment -[label = - < - - - - - -
environment
environment_id (pk)
language_implementation_version_id (pk)
benchmark_language_id (pk)
dependencies_id (u) (fk)
> -]; -machine:cpu_id -> cpu:cpu_id; -machine:gpu_id -> gpu:gpu_id; -machine:os_id -> os:os_id; -benchmark:benchmark_language_id -> benchmark_language:benchmark_language_id; -environment:benchmark_language_id -> benchmark_language:benchmark_language_id; -language_implementation_version:benchmark_language_id -> benchmark_language:benchmark_language_id; -environment:dependencies_id -> dependencies:dependencies_id; -environment:benchmark_language_id -> language_implementation_version:benchmark_language_id; -environment:language_implementation_version_id -> language_implementation_version:language_implementation_version_id; -unit:benchmark_type_id -> benchmark_type:benchmark_type_id; -benchmark_run:machine_id -> machine:machine_id; -benchmark:unit_id -> unit:unit_id; -benchmark_run:language_implementation_version_id -> environment:language_implementation_version_id; -benchmark_run:benchmark_language_id -> environment:benchmark_language_id; -benchmark_run:environment_id -> environment:environment_id; -benchmark_run:benchmark_language_id -> benchmark:benchmark_language_id; -benchmark_run:benchmark_id -> benchmark:benchmark_id; -} - diff --git a/dev/benchmarking/data_model.rst b/dev/benchmarking/data_model.rst deleted file mode 100644 index d0f3dc7fc996..000000000000 --- a/dev/benchmarking/data_model.rst +++ /dev/null @@ -1,373 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, -.. software distributed under the License is distributed on an -.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -.. KIND, either express or implied. See the License for the -.. specific language governing permissions and limitations -.. under the License. - - -.. WARNING -.. This is an auto-generated file. Please do not edit. - -.. To reproduce, please run :code:`./make_data_model_rst.sh`. -.. (This requires you have the -.. `psql client `_ -.. and have started the docker containers using -.. :code:`docker-compose up`). - - -.. _benchmark-data-model: - -Benchmark data model -==================== - - -.. graphviz:: data_model.dot - - -.. _benchmark-ingestion: - -Benchmark ingestion helper functions -==================================== - -ingest_benchmark_run_view -------------------------- - -:code:`ingest_benchmark_run_view(from_jsonb jsonb)` - -The argument is a JSON object. NOTE: key names must be entirely -lowercase, or the insert will fail. Extra key-value pairs are ignored. -Example:: - - [ - { - "benchmark_name": "Benchmark 2", - "benchmark_version": "version 0", - "parameter_values": {"arg0": 100, "arg1": 5}, - "value": 2.5, - "git_commit_timestamp": "2019-02-08 22:35:53 +0100", - "git_hash": "324d3cf198444a", - "val_min": 1, - "val_q1": 2, - "val_q3": 3, - "val_max": 4, - "std_dev": 1.41, - "n_obs": 8, - "run_timestamp": "2019-02-14 03:00:05 -0600", - "mac_address": "08:00:2b:01:02:03", - "benchmark_language": "Python", - "language_implementation_version": "CPython 2.7", - "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"} - }, - { - "benchmark_name": "Benchmark 2", - "benchmark_version": "version 0", - "parameter_values": {"arg0": 1000, "arg1": 5}, - "value": 5, - "git_commit_timestamp": "2019-02-08 22:35:53 +0100", - "git_hash": "324d3cf198444a", - "std_dev": 3.14, - "n_obs": 8, - "run_timestamp": "2019-02-14 03:00:10 -0600", - "mac_address": "08:00:2b:01:02:03", - "benchmark_language": "Python", - "language_implementation_version": "CPython 2.7", - "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"} - } - ] -To identify which columns in "benchmark_run_view" are required, -please see the view documentation in :ref:`benchmark-data-model`. - - - -back to `Benchmark data model `_ - - -ingest_benchmark_view ---------------------- - -:code:`ingest_benchmark_view(from_jsonb jsonb)` - -The argument is a JSON object. NOTE: key names must be entirely -lowercase, or the insert will fail. Extra key-value pairs are ignored. -Example:: - - [ - { - "benchmark_name": "Benchmark 1", - "parameter_names": ["arg0", "arg1", "arg2"], - "benchmark_description": "First benchmark", - "benchmark_type": "Time", - "units": "miliseconds", - "lessisbetter": true, - "benchmark_version": "second version", - "benchmark_language": "Python" - }, - { - "benchmark_name": "Benchmark 2", - "parameter_names": ["arg0", "arg1"], - "benchmark_description": "Description 2.", - "benchmark_type": "Time", - "units": "nanoseconds", - "lessisbetter": true, - "benchmark_version": "second version", - "benchmark_language": "Python" - } - ] - -To identify which columns in "benchmark_view" are required, -please see the view documentation in :ref:`benchmark-data-model`. - - - -back to `Benchmark data model `_ - - -ingest_benchmark_runs_with_context ----------------------------------- - -:code:`ingest_benchmark_runs_with_context(from_jsonb jsonb)` - -The argument is a JSON object. NOTE: key names must be entirely -lowercase, or the insert will fail. Extra key-value pairs are ignored. -The object contains three key-value pairs:: - - {"context": { - "mac_address": "08:00:2b:01:02:03", - "benchmark_language": "Python", - "language_implementation_version": "CPython 3.6", - "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}, - "git_commit_timestamp": "2019-02-14 22:42:22 +0100", - "git_hash": "123456789abcde", - "run_timestamp": "2019-02-14 03:00:40 -0600", - "extra stuff": "does not hurt anything and will not be added." - }, - "benchmark_version": { - "Benchmark Name 1": "Any string can be a version.", - "Benchmark Name 2": "A git hash can be a version.", - "An Unused Benchmark Name": "Will be ignored." - }, - "benchmarks": [ - { - "benchmark_name": "Benchmark Name 1", - "parameter_values": {"argument1": 1, "argument2": "value2"}, - "value": 42, - "val_min": 41.2, - "val_q1": 41.5, - "val_q3": 42.5, - "val_max": 42.8, - "std_dev": 0.5, - "n_obs": 100, - "run_metadata": {"any": "key-value pairs"}, - "run_notes": "Any relevant notes." - }, - { - "benchmark_name": "Benchmark Name 2", - "parameter_values": {"not nullable": "Use {} if no params."}, - "value": 8, - "std_dev": 1, - "n_obs": 2, - } - ] - } - -- The entry for "context" contains the machine, environment, and timestamp - information common to all of the runs -- The entry for "benchmark_version" maps benchmark - names to their version strings. (Which can be a git hash, - the entire code string, a number, or any other string of your choice.) -- The entry for "benchmarks" is a list of benchmark run data - for the given context and benchmark versions. The first example - benchmark run entry contains all possible values, even - nullable ones, and the second entry omits all nullable values. - - - - -back to `Benchmark data model `_ - - -ingest_machine_view -------------------- - -:code:`ingest_machine_view(from_jsonb jsonb)` - -The argument is a JSON object. NOTE: key names must be entirely -lowercase, or the insert will fail. Extra key-value pairs are ignored. -Example:: - - { - "mac_address": "0a:00:2d:01:02:03", - "machine_name": "Yet-Another-Machine-Name", - "memory_bytes": 8589934592, - "cpu_actual_frequency_hz": 2300000000, - "os_name": "OSX", - "architecture_name": "x86_64", - "kernel_name": "18.2.0", - "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz", - "cpu_core_count": 2, - "cpu_thread_count": 4, - "cpu_frequency_max_hz": 2300000000, - "cpu_frequency_min_hz": 2300000000, - "cpu_l1d_cache_bytes": 32768, - "cpu_l1i_cache_bytes": 32768, - "cpu_l2_cache_bytes": 262144, - "cpu_l3_cache_bytes": 4194304, - "machine_other_attributes": {"just": "an example"}, - "gpu_information": "", - "gpu_part_number": "", - "gpu_product_name": "" - } - -To identify which columns in "machine_view" are required, -please see the view documentation in :ref:`benchmark-data-model`. - - - -back to `Benchmark data model `_ - - - -.. _benchmark-views: - -Benchmark views -=============== - - -benchmark_run_view ------------------- - -Each benchmark run. - -- Each entry is unique on the machine, environment, benchmark, - and git commit timestamp. - -=============================== =========== ======== =========== =========== -Column Type Nullable Default Description -=============================== =========== ======== =========== =========== -benchmark_run_id int8 not null serial primary key -benchmark_name citext not null unique -benchmark_version citext not null unique -parameter_values jsonb not null '{}'::jsonb unique -value numeric not null -git_commit_timestamp timestamptz not null unique -git_hash text not null -val_min numeric -val_q1 numeric -val_q3 numeric -val_max numeric -std_dev numeric not null -n_obs int4 not null -run_timestamp timestamptz not null unique -run_metadata jsonb -run_notes text -mac_address macaddr not null unique -benchmark_language citext not null unique -language_implementation_version citext not null ''::citext unique -dependencies jsonb not null '{}'::jsonb unique -=============================== =========== ======== =========== =========== - -back to `Benchmark data model `_ - -benchmark_view --------------- - -The details about a particular benchmark. - -- "benchmark_name" is unique for a given "benchmark_language" -- Each entry is unique on - ("benchmark_language", "benchmark_name", "benchmark_version") - -===================== ====== ======== ======= =========== -Column Type Nullable Default Description -===================== ====== ======== ======= =========== -benchmark_id int4 not null serial primary key -benchmark_name citext not null unique -parameter_names _text -benchmark_description text not null -benchmark_type citext not null unique -units citext not null unique -lessisbetter bool not null -benchmark_version citext not null unique -benchmark_language citext not null unique -===================== ====== ======== ======= =========== - -back to `Benchmark data model `_ - -environment_view ----------------- - -The build environment used for a reported benchmark run. -(Will be inferred from each "benchmark_run" if not explicitly added). - -- Each entry is unique on - ("benchmark_language", "language_implementation_version", "dependencies") -- "benchmark_language" is unique in the "benchmark_language" table -- "benchmark_language" plus "language_implementation_version" is unique in - the "language_implementation_version" table -- "dependencies" is unique in the "dependencies" table - -=============================== ====== ======== =========== =========== -Column Type Nullable Default Description -=============================== ====== ======== =========== =========== -environment_id int4 not null serial primary key -benchmark_language citext not null unique -language_implementation_version citext not null ''::citext unique -dependencies jsonb not null '{}'::jsonb unique -=============================== ====== ======== =========== =========== - -back to `Benchmark data model `_ - -machine_view ------------- - -The machine environment (CPU, GPU, OS) used for each benchmark run. - -- "mac_address" is unique in the "machine" table -- "gpu_part_number" is unique in the "gpu" (graphics processing unit) table - Empty string (''), not null, is used for machines that won't use the GPU -- "cpu_model_name" is unique in the "cpu" (central processing unit) table -- "os_name", "os_architecture_name", and "os_kernel_name" - are unique in the "os" (operating system) table -- "machine_other_attributes" is a key-value store for any other relevant - data, e.g. '{"hard_disk_type": "solid state"}' - -======================== ======= ======== ========== =========== -Column Type Nullable Default Description -======================== ======= ======== ========== =========== -machine_id int4 not null serial primary key -mac_address macaddr not null unique -machine_name citext not null -memory_bytes int8 not null -cpu_actual_frequency_hz int8 not null -os_name citext not null unique -architecture_name citext not null unique -kernel_name citext not null ''::citext unique -cpu_model_name citext not null unique -cpu_core_count int4 not null -cpu_thread_count int4 not null -cpu_frequency_max_hz int8 not null -cpu_frequency_min_hz int8 not null -cpu_l1d_cache_bytes int4 not null -cpu_l1i_cache_bytes int4 not null -cpu_l2_cache_bytes int4 not null -cpu_l3_cache_bytes int4 not null -gpu_information citext not null ''::citext unique -gpu_part_number citext not null ''::citext -gpu_product_name citext not null ''::citext -machine_other_attributes jsonb -======================== ======= ======== ========== =========== - -back to `Benchmark data model `_ - - diff --git a/dev/benchmarking/ddl/0_setup.sql b/dev/benchmarking/ddl/0_setup.sql deleted file mode 100644 index ec1044641243..000000000000 --- a/dev/benchmarking/ddl/0_setup.sql +++ /dev/null @@ -1,23 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - -CREATE EXTENSION IF NOT EXISTS "citext"; -- type for case-insensitive text - --- For future fine-grained control over function execution by user group. -ALTER DEFAULT PRIVILEGES REVOKE EXECUTE ON functions FROM public; diff --git a/dev/benchmarking/ddl/1_00_table_public_project.sql b/dev/benchmarking/ddl/1_00_table_public_project.sql deleted file mode 100644 index c52d66cfd950..000000000000 --- a/dev/benchmarking/ddl/1_00_table_public_project.sql +++ /dev/null @@ -1,45 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- PROJECT -CREATE TABLE IF NOT EXISTS public.project -( - project_id SERIAL PRIMARY KEY - , project_name citext NOT NULL - , project_url text NOT NULL - , repo_url text NOT NULL - , last_changed timestamp (0) without time zone NOT NULL DEFAULT now() -); -COMMENT ON TABLE public.project - IS 'Project name and relevant URLs.'; -COMMENT ON COLUMN public.project.project_url - IS 'Homepage URL.'; -COMMENT ON COLUMN public.project.repo_url - IS 'Git repo URL to link stored commit hashes to code in a webpage.'; -COMMENT ON COLUMN public.project.last_changed - IS 'New project details are added with a new timestamp. ' - 'The project details with the newest timestamp will be used.'; - --- CONSTRAINTS -CREATE UNIQUE INDEX project_unique_index_on_project_name_urls - ON public.project(project_name, project_url, repo_url); -COMMENT ON INDEX - public.project_unique_index_on_project_name_urls - IS 'Enforce uniqueness of project name and urls.'; diff --git a/dev/benchmarking/ddl/1_01_table_public_cpu.sql b/dev/benchmarking/ddl/1_01_table_public_cpu.sql deleted file mode 100644 index df1a9e757d25..000000000000 --- a/dev/benchmarking/ddl/1_01_table_public_cpu.sql +++ /dev/null @@ -1,63 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- CPU -CREATE TABLE IF NOT EXISTS public.cpu -( - cpu_id SERIAL PRIMARY KEY - , cpu_model_name citext NOT NULL UNIQUE - , cpu_core_count integer NOT NULL - , cpu_thread_count integer NOT NULL - , cpu_frequency_max_Hz bigint NOT NULL - , cpu_frequency_min_Hz bigint NOT NULL - , cpu_L1d_cache_bytes integer NOT NULL - , cpu_L1i_cache_bytes integer NOT NULL - , cpu_L2_cache_bytes integer NOT NULL - , cpu_L3_cache_bytes integer NOT NULL -); -COMMENT ON TABLE public.cpu - IS 'CPU model and its specifications.'; -COMMENT ON COLUMN public.cpu.cpu_id - IS 'The primary key for the CPU table. ' - 'NOTE: This is a synthetic primary key and not meant to represent a ' - 'processor instruction to read capabilities.'; -COMMENT ON COLUMN public.cpu.cpu_model_name - IS 'The output of `sysctl -n machdep.cpu.brand_stringp`.'; -COMMENT ON COLUMN public.cpu.cpu_core_count - IS 'The output of `sysctl -n hw.physicalcpu`.'; -COMMENT ON COLUMN public.cpu.cpu_thread_count - IS 'The output of `sysctl -n hw.logicalcpu`.'; -COMMENT ON COLUMN public.cpu.cpu_frequency_max_Hz - IS 'The output of `sysctl -n hw.cpufrequency_max`.'; -COMMENT ON COLUMN public.cpu.cpu_frequency_min_Hz - IS 'The output of `sysctl -n hw.cpufrequency_min`.'; -COMMENT ON COLUMN public.cpu.cpu_L1d_cache_bytes - IS 'The output of `sysctl -n hw.l1dcachesize`.'; -COMMENT ON COLUMN public.cpu.cpu_L1i_cache_bytes - IS 'The output of `sysctl -n hw.l1icachesize`.'; -COMMENT ON COLUMN public.cpu.cpu_L2_cache_bytes - IS 'The output of `sysctl -n hw.l2cachesize`.'; -COMMENT ON COLUMN public.cpu.cpu_L3_cache_bytes - IS 'The output of `sysctl -n hw.l3cachesize`.'; - --- CONSTRAINTS -ALTER TABLE public.cpu - ADD CONSTRAINT cpu_check_cpu_model_name_length - CHECK (char_length(cpu_model_name) < 255); diff --git a/dev/benchmarking/ddl/1_02_table_public_gpu.sql b/dev/benchmarking/ddl/1_02_table_public_gpu.sql deleted file mode 100644 index 564af19de7a6..000000000000 --- a/dev/benchmarking/ddl/1_02_table_public_gpu.sql +++ /dev/null @@ -1,43 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- GPU -CREATE TABLE IF NOT EXISTS public.gpu -( - gpu_id SERIAL PRIMARY KEY - , gpu_information citext UNIQUE NOT NULL DEFAULT '' - , gpu_part_number citext NOT NULL DEFAULT '' - , gpu_product_name citext NOT NULL DEFAULT '' -); -COMMENT ON TABLE public.gpu IS 'GPU specifications.'; -COMMENT ON COLUMN public.gpu.gpu_information - IS 'The output of `nvidia-smi -q` (on Linux or Windows), or `cuda-smi` ' - 'or `kextstat | grep -i cuda` on OSX, or another command; anything ' - 'that gets a string to uniquely identify the GPU.'; - --- CONSTRAINTS -CREATE INDEX gpu_index_on_part_number - ON public.gpu (gpu_part_number); - -CREATE INDEX gpu_index_on_product_name - ON public.gpu (gpu_product_name); - -CREATE INDEX gpu_index_on_product_name_and_part_number - ON public.gpu (gpu_product_name, gpu_part_number); diff --git a/dev/benchmarking/ddl/1_03_table_public_os.sql b/dev/benchmarking/ddl/1_03_table_public_os.sql deleted file mode 100644 index 7b03d82f4874..000000000000 --- a/dev/benchmarking/ddl/1_03_table_public_os.sql +++ /dev/null @@ -1,57 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- OS -CREATE TABLE IF NOT EXISTS public.os -( - os_id SERIAL PRIMARY KEY - , os_name citext NOT NULL - , architecture_name citext NOT NULL - , kernel_name citext NOT NULL DEFAULT '' -); --- @name os. forces retention of an 's' in the Graphile GraphQL api. -COMMENT ON TABLE public.os - IS E'@name os.\nOperating system name and kernel (version).'; -COMMENT ON COLUMN public.os.os_name - IS 'Operating system name. For example, OSX, Ubuntu, Windows`.'; -COMMENT ON COLUMN public.os.architecture_name - IS 'Operating system architecture; the output of `uname -m`.'; -COMMENT ON COLUMN public.os.kernel_name - IS 'Operating system kernel, or NULL. ' - 'On Linux/OSX, the output of `uname -r`. ' - 'On Windows, the output of `ver`.'; - --- CONSTRAINTS -ALTER TABLE public.os - ADD CONSTRAINT os_check_os_name_length - CHECK (char_length(os_name) < 63); - -ALTER TABLE public.os - ADD CONSTRAINT os_check_architecture_name_length - CHECK (char_length(architecture_name) < 63); - -ALTER TABLE public.os - ADD CONSTRAINT os_check_kernel_name_length - CHECK (char_length(kernel_name) < 63); - -CREATE UNIQUE INDEX os_unique_index - ON public.os(os_name, architecture_name, kernel_name); -COMMENT ON INDEX public.os_unique_index - IS 'Enforce uniqueness of os, architecture, and kernel names.'; diff --git a/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql b/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql deleted file mode 100644 index 2e3553677093..000000000000 --- a/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql +++ /dev/null @@ -1,35 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- BENCHMARK_LANGUAGE -CREATE TABLE IF NOT EXISTS public.benchmark_language -( - benchmark_language_id SERIAL PRIMARY KEY - , benchmark_language citext NOT NULL UNIQUE -); -COMMENT ON TABLE public.benchmark_language - IS 'The language the benchmark was written in (and presumably for).'; -COMMENT ON COLUMN public.benchmark_language.benchmark_language - IS 'The benchmark language. For example: Python'; - --- CONSTRAINTS -ALTER TABLE public.benchmark_language - ADD CONSTRAINT benchmark_language_check_language_length - CHECK (char_length(benchmark_language) < 63); diff --git a/dev/benchmarking/ddl/1_05_table_public_dependencies.sql b/dev/benchmarking/ddl/1_05_table_public_dependencies.sql deleted file mode 100644 index 3744a0c35a87..000000000000 --- a/dev/benchmarking/ddl/1_05_table_public_dependencies.sql +++ /dev/null @@ -1,31 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- DEPENDENCIES -CREATE TABLE IF NOT EXISTS public.dependencies -( - dependencies_id SERIAL PRIMARY KEY - , dependencies jsonb UNIQUE NOT NULL DEFAULT '{}'::jsonb -); -COMMENT ON TABLE public.dependencies - IS E'@name dependencies.\n' - 'A JSON object mapping dependencies to their versions.'; -COMMENT ON COLUMN public.dependencies.dependencies - IS 'For example: ''{"boost": "1.69", "conda": "", "numpy": "1.15"}''.'; diff --git a/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql b/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql deleted file mode 100644 index f7d26e4e2d2e..000000000000 --- a/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql +++ /dev/null @@ -1,46 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- LANGUAGE_IMPLEMENTATION_VERSION -CREATE TABLE IF NOT EXISTS public.language_implementation_version -( - language_implementation_version_id SERIAL - , language_implementation_version citext NOT NULL DEFAULT '' - , benchmark_language_id integer NOT NULL - , PRIMARY KEY (language_implementation_version_id, benchmark_language_id) - , FOREIGN KEY (benchmark_language_id) REFERENCES public.benchmark_language -); -COMMENT ON TABLE public.language_implementation_version - IS 'The benchmark language implementation or compiler version, e.g. ' - '''CPython 2.7'' or ''PyPy x.y'' or ''gcc 7.3.0'' or ' - '''gcc (Ubuntu 7.3.0-27ubuntu1~18.04) 7.3.0''.'; -COMMENT ON COLUMN public.language_implementation_version.language_implementation_version - IS 'The version number used in the benchmark environment (e.g. ''2.7'').'; - --- CONSTRAINTS -ALTER TABLE public.language_implementation_version - ADD CONSTRAINT language_implementation_version_check_version_length - CHECK (char_length(language_implementation_version) < 255); - -CREATE UNIQUE INDEX language_implementation_version_unique_index - ON public.language_implementation_version - (benchmark_language_id, language_implementation_version); -COMMENT ON INDEX language_implementation_version_unique_index - IS 'Enforce unique implementation versions for the languages.'; diff --git a/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql b/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql deleted file mode 100644 index 1143cdb0015d..000000000000 --- a/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql +++ /dev/null @@ -1,39 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- BENCHMARK_TYPE -CREATE TABLE IF NOT EXISTS public.benchmark_type -( - benchmark_type_id SERIAL PRIMARY KEY - , benchmark_type citext NOT NULL UNIQUE - , lessisbetter boolean NOT NULL -); -COMMENT ON TABLE public.benchmark_type - IS 'The type of benchmark. For example "time", "mem", "peakmem", "track"'; -COMMENT ON COLUMN public.benchmark_type.benchmark_type - IS 'The type of units, so ''time'' for seconds, miliseconds, or ' - '''mem'' for kilobytes, megabytes.'; -COMMENT ON COLUMN public.benchmark_type.lessisbetter - IS 'True if a smaller benchmark value is better.'; - --- CONSTRAINTS -ALTER TABLE public.benchmark_type - ADD CONSTRAINT benchmark_type_check_benchmark_type_char_length - CHECK (char_length(benchmark_type) < 63); diff --git a/dev/benchmarking/ddl/1_08_table_public_machine.sql b/dev/benchmarking/ddl/1_08_table_public_machine.sql deleted file mode 100644 index 8f219d3e0cfa..000000000000 --- a/dev/benchmarking/ddl/1_08_table_public_machine.sql +++ /dev/null @@ -1,69 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- MACHINE -CREATE TABLE IF NOT EXISTS public.machine -( - machine_id SERIAL PRIMARY KEY - , machine_name citext NOT NULL - , mac_address macaddr NOT NULL - , memory_bytes bigint NOT NULL - , cpu_actual_frequency_Hz bigint NOT NULL - , machine_other_attributes jsonb - , cpu_id integer NOT NULL - , gpu_id integer NOT NULL - , os_id integer NOT NULL - , FOREIGN KEY (cpu_id) REFERENCES public.cpu - , FOREIGN KEY (gpu_id) REFERENCES public.gpu - , FOREIGN KEY (os_id) REFERENCES public.os -); -COMMENT ON TABLE public.machine - IS 'Unique identifiers for a machine.'; -COMMENT ON COLUMN public.machine.machine_name - IS 'A machine name of your choice.'; -COMMENT ON COLUMN public.machine.mac_address - IS 'The mac_address of a physical network interface to uniquely ' - 'identify a computer. Postgres accepts standard formats, including ' - '''08:00:2b:01:02:03'', ''08-00-2b-01-02-03'', ''08002b:010203'''; -COMMENT ON COLUMN public.machine.memory_bytes - IS 'The output of `sysctl -n hw.memsize`.'; -COMMENT ON COLUMN public.machine.cpu_actual_frequency_Hz - IS 'The output of `sysctl -n hw.cpufrequency`.'; -COMMENT ON COLUMN public.machine.machine_other_attributes - IS 'Additional attributes of interest, as a JSON object. ' - 'For example: ''{"hard_disk_type": "solid state"}''::jsonb.'; - --- CONSTRAINTS -CREATE UNIQUE INDEX machine_index_on_mac_address - ON public.machine(mac_address); -COMMENT ON INDEX machine_index_on_mac_address - IS 'Enforce unique mac address'; - -CREATE INDEX machine_index_on_cpu_id - ON public.machine(cpu_id); - -CREATE INDEX machine_index_on_gpu_id - ON public.machine(gpu_id); - -CREATE INDEX machine_index_on_os_id - ON public.machine(os_id); - -CREATE INDEX machine_index_on_cpu_gpu_os_id - ON public.machine(cpu_id, gpu_id, os_id); diff --git a/dev/benchmarking/ddl/1_09_table_public_unit.sql b/dev/benchmarking/ddl/1_09_table_public_unit.sql deleted file mode 100644 index a8cf576696d1..000000000000 --- a/dev/benchmarking/ddl/1_09_table_public_unit.sql +++ /dev/null @@ -1,37 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- UNIT -CREATE TABLE IF NOT EXISTS public.unit -( - unit_id SERIAL PRIMARY KEY - , units citext NOT NULL UNIQUE - , benchmark_type_id integer NOT NULL - , FOREIGN KEY (benchmark_type_id) - REFERENCES public.benchmark_type(benchmark_type_id) -); -COMMENT ON TABLE public.unit IS 'The actual units for a reported benchmark.'; -COMMENT ON COLUMN public.unit.units - IS 'For example: nanoseconds, microseconds, bytes, megabytes.'; - --- CONSTRAINTS -ALTER TABLE public.unit - ADD CONSTRAINT unit_check_units_string_length - CHECK (char_length(units) < 63); diff --git a/dev/benchmarking/ddl/1_10_table_public_environment.sql b/dev/benchmarking/ddl/1_10_table_public_environment.sql deleted file mode 100644 index e3a6d23957f2..000000000000 --- a/dev/benchmarking/ddl/1_10_table_public_environment.sql +++ /dev/null @@ -1,51 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- ENVIRONMENT -CREATE TABLE IF NOT EXISTS public.environment -( - environment_id SERIAL - , language_implementation_version_id integer NOT NULL - , benchmark_language_id integer NOT NULL - , dependencies_id integer NOT NULL - , PRIMARY KEY - (environment_id, benchmark_language_id, language_implementation_version_id) - , FOREIGN KEY - (benchmark_language_id) - REFERENCES public.benchmark_language - , FOREIGN KEY - (language_implementation_version_id, benchmark_language_id) - REFERENCES public.language_implementation_version( - language_implementation_version_id - , benchmark_language_id - ) - , FOREIGN KEY - (dependencies_id) - REFERENCES public.dependencies -); -COMMENT ON TABLE public.environment - IS 'Identifies a build environment for a specific suite of benchmarks.'; - --- CONSTRAINTS -CREATE UNIQUE INDEX environment_unique_index - ON public.environment - (benchmark_language_id, language_implementation_version_id, dependencies_id); -COMMENT ON INDEX environment_unique_index - IS 'Enforce unique combinations of language version and dependencies.'; diff --git a/dev/benchmarking/ddl/1_11_table_public_benchmark.sql b/dev/benchmarking/ddl/1_11_table_public_benchmark.sql deleted file mode 100644 index 18895823df68..000000000000 --- a/dev/benchmarking/ddl/1_11_table_public_benchmark.sql +++ /dev/null @@ -1,54 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- BENCHMARK -CREATE TABLE IF NOT EXISTS public.benchmark -( - benchmark_id SERIAL - , benchmark_name citext NOT NULL - , parameter_names text[] - , benchmark_description text NOT NULL - , benchmark_version citext NOT NULL - , unit_id integer NOT NULL - , benchmark_language_id integer NOT NULL - , PRIMARY KEY (benchmark_id, benchmark_language_id) - , FOREIGN KEY (benchmark_language_id) REFERENCES public.benchmark_language - , FOREIGN KEY (unit_id) REFERENCES public.unit -); -COMMENT ON TABLE public.benchmark - IS 'Identifies an individual benchmark.'; -COMMENT ON COLUMN public.benchmark.parameter_names - IS 'A list of strings identifying the parameter names in the benchmark.'; -COMMENT ON COLUMN public.benchmark.benchmark_version - IS 'Can be any string. In Airspeed Velocity, the version is ' - 'by default the hash of the entire code string for the benchmark.'; - --- CONSTRAINTS -CREATE INDEX benchmark_index_on_benchmark_language_id - ON public.benchmark(benchmark_language_id); - -CREATE INDEX benchmark_index_on_unit_id - ON public.benchmark(unit_id); - -CREATE UNIQUE INDEX benchmark_unique_index_on_language_benchmark_version - ON public.benchmark - (benchmark_language_id, benchmark_name, benchmark_version); -COMMENT ON INDEX public.benchmark_unique_index_on_language_benchmark_version - IS 'Enforce uniqueness of benchmark name and version for a given language.'; diff --git a/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql b/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql deleted file mode 100644 index 20b9ef0bb963..000000000000 --- a/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql +++ /dev/null @@ -1,112 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- BENCHMARK_RUN -CREATE TABLE IF NOT EXISTS public.benchmark_run -( - benchmark_run_id BIGSERIAL PRIMARY KEY - , parameter_values jsonb NOT NULL DEFAULT '{}'::jsonb - , value numeric NOT NULL - , git_commit_timestamp timestamp (0) with time zone NOT NULL - , git_hash text NOT NULL - , val_min numeric - , val_q1 numeric - , val_q3 numeric - , val_max numeric - , std_dev numeric NOT NULL - , n_obs integer NOT NULL - , run_timestamp timestamp (0) with time zone NOT NULL - , run_metadata jsonb - , run_notes text - , machine_id integer NOT NULL - , environment_id integer NOT NULL - , language_implementation_version_id integer NOT NULL - , benchmark_language_id integer NOT NULL - , benchmark_id integer NOT NULL - , FOREIGN KEY (machine_id) REFERENCES public.machine - , FOREIGN KEY - (environment_id, benchmark_language_id, language_implementation_version_id) - REFERENCES public.environment - , FOREIGN KEY (benchmark_id, benchmark_language_id) - REFERENCES public.benchmark(benchmark_id, benchmark_language_id) -); -COMMENT ON TABLE public.benchmark_run - IS 'One run per benchmark run.'; -COMMENT ON COLUMN public.benchmark_run.parameter_values - IS 'A JSON object mapping the parameter names from ' - '"benchmark.parameter_names" to values.'; -COMMENT ON COLUMN public.benchmark_run.value - IS 'The average value from the benchmark run.'; -COMMENT ON COLUMN public.benchmark_run.git_commit_timestamp - IS 'Get this using `git show -s --date=local --format="%ci" `. ' - 'ISO 8601 is recommended, e.g. ''2019-01-30 03:12 -0600''.'; -COMMENT ON COLUMN public.benchmark_run.git_hash - IS 'The commit has of the codebase currently being benchmarked.'; -COMMENT ON COLUMN public.benchmark_run.val_min - IS 'The smallest benchmark run value for this run.'; -COMMENT ON COLUMN public.benchmark_run.val_q1 - IS 'The first quartile of the benchmark run values for this run.'; -COMMENT ON COLUMN public.benchmark_run.val_q3 - IS 'The third quartile of the benchmark run values for this run.'; -COMMENT ON COLUMN public.benchmark_run.val_max - IS 'The largest benchmark run value for this run.'; -COMMENT ON COLUMN public.benchmark_run.std_dev - IS 'The standard deviation of the run values for this benchmark run.'; -COMMENT ON COLUMN public.benchmark_run.n_obs - IS 'The number of observations for this benchmark run.'; -COMMENT ON COLUMN public.benchmark_run.run_metadata - IS 'Additional metadata of interest, as a JSON object. ' - 'For example: ''{"ci_99": [2.7e-06, 3.1e-06]}''::jsonb.'; -COMMENT ON COLUMN public.benchmark_run.run_notes - IS 'Additional notes of interest, as a text string. '; - --- CONSTRAINTS -ALTER TABLE public.benchmark_run - ADD CONSTRAINT benchmark_run_check_std_dev_nonnegative - CHECK (std_dev >= 0); - -ALTER TABLE public.benchmark_run - ADD CONSTRAINT benchmark_run_check_n_obs_positive - CHECK (n_obs > 0); - -CREATE INDEX benchmark_run_index_on_environment_id - ON public.benchmark_run(environment_id); - -CREATE INDEX benchmark_run_index_on_machine_id - ON public.benchmark_run(machine_id); - -CREATE INDEX benchmark_run_index_on_benchmark_id - ON public.benchmark_run(benchmark_id, benchmark_language_id); - -CREATE INDEX benchmark_run_index_on_benchmark_environment_time - ON public.benchmark_run - (benchmark_id, environment_id, git_commit_timestamp); -COMMENT ON INDEX - public.benchmark_run_index_on_benchmark_environment_time - IS 'Index to improve sorting by benchmark, environment, and timestamp.'; - -CREATE UNIQUE INDEX - benchmark_run_unique_index_on_env_benchmark_timestamp_params - ON public.benchmark_run - (machine_id, environment_id, benchmark_id, git_commit_timestamp, parameter_values, run_timestamp); -COMMENT ON INDEX - public.benchmark_run_unique_index_on_env_benchmark_timestamp_params - IS 'Enforce uniqueness of benchmark run for a given machine, ' - 'environment, benchmark, git commit timestamp, and parameter values.'; diff --git a/dev/benchmarking/ddl/2_00_views.sql b/dev/benchmarking/ddl/2_00_views.sql deleted file mode 100644 index cbd295e506d8..000000000000 --- a/dev/benchmarking/ddl/2_00_views.sql +++ /dev/null @@ -1,324 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - --- NOTE: --- The function for documentation depends on view columns --- being named exactly the same as in the table view. - --- MACHINE_VIEW -CREATE OR REPLACE VIEW public.machine_view AS - SELECT - machine.machine_id - , mac_address - , machine_name - , memory_bytes - , cpu_actual_frequency_Hz - , os_name - , architecture_name - , kernel_name - , cpu_model_name - , cpu_core_count - , cpu_thread_count - , cpu_frequency_max_Hz - , cpu_frequency_min_Hz - , cpu_L1d_cache_bytes - , cpu_L1i_cache_bytes - , cpu_L2_cache_bytes - , cpu_L3_cache_bytes - , gpu_information - , gpu_part_number - , gpu_product_name - , machine_other_attributes - FROM public.machine AS machine - JOIN public.cpu AS cpu ON machine.cpu_id = cpu.cpu_id - JOIN public.gpu AS gpu ON machine.gpu_id = gpu.gpu_id - JOIN public.os AS os ON machine.os_id = os.os_id; -COMMENT ON VIEW public.machine_view IS -E'The machine environment (CPU, GPU, OS) used for each benchmark run.\n\n' - '- "mac_address" is unique in the "machine" table\n' - '- "gpu_part_number" is unique in the "gpu" (graphics processing unit) table\n' - ' Empty string (''''), not null, is used for machines that won''t use the GPU\n' - '- "cpu_model_name" is unique in the "cpu" (central processing unit) table\n' - '- "os_name", "os_architecture_name", and "os_kernel_name"\n' - ' are unique in the "os" (operating system) table\n' - '- "machine_other_attributes" is a key-value store for any other relevant\n' - ' data, e.g. ''{"hard_disk_type": "solid state"}'''; - - --- LANGUAGE_IMPLEMENTATION_VERSION_VIEW -CREATE OR REPLACE VIEW public.language_implementation_version_view AS - SELECT - lv.language_implementation_version_id - , bl.benchmark_language - , lv.language_implementation_version - FROM public.language_implementation_version AS lv - JOIN public.benchmark_language AS bl - ON lv.benchmark_language_id = bl.benchmark_language_id; - --- ENVIRONMENT_VIEW -CREATE OR REPLACE VIEW public.environment_view AS - SELECT - env.environment_id - , benchmark_language - , language_implementation_version - , dependencies - FROM public.environment AS env - JOIN public.benchmark_language AS language - ON env.benchmark_language_id = language.benchmark_language_id - JOIN public.language_implementation_version AS version - ON env.language_implementation_version_id = version.language_implementation_version_id - JOIN public.dependencies AS deps - ON env.dependencies_id = deps.dependencies_id; -COMMENT ON VIEW public.environment_view IS -E'The build environment used for a reported benchmark run.\n' - '(Will be inferred from each "benchmark_run" if not explicitly added).\n\n' - '- Each entry is unique on\n' - ' ("benchmark_language", "language_implementation_version", "dependencies")\n' - '- "benchmark_language" is unique in the "benchmark_language" table\n' - '- "benchmark_language" plus "language_implementation_version" is unique in\n' - ' the "language_implementation_version" table\n' - '- "dependencies" is unique in the "dependencies" table'; - --- UNIT_VIEW -CREATE OR REPLACE VIEW public.unit_view AS - SELECT - unit.unit_id - , units - , benchmark_type - , lessisbetter - FROM public.unit AS unit - JOIN public.benchmark_type AS bt - ON unit.benchmark_type_id = bt.benchmark_type_id; - --- BENCHMARK_VIEW -CREATE OR REPLACE VIEW public.benchmark_view AS - SELECT - b.benchmark_id - , benchmark_name - , parameter_names - , benchmark_description - , benchmark_type - , units - , lessisbetter - , benchmark_version - , benchmark_language - FROM public.benchmark AS b - JOIN public.benchmark_language AS benchmark_language - ON b.benchmark_language_id = benchmark_language.benchmark_language_id - JOIN public.unit AS unit - ON b.unit_id = unit.unit_id - JOIN public.benchmark_type AS benchmark_type - ON unit.benchmark_type_id = benchmark_type.benchmark_type_id; -COMMENT ON VIEW public.benchmark_view IS -E'The details about a particular benchmark.\n\n' - '- "benchmark_name" is unique for a given "benchmark_language"\n' - '- Each entry is unique on\n' - ' ("benchmark_language", "benchmark_name", "benchmark_version")'; - --- BENCHMARK_RUN_VIEW -CREATE OR REPLACE VIEW public.benchmark_run_view AS - SELECT - run.benchmark_run_id - -- benchmark_view (name, version, language only) - , benchmark_name - , benchmark_version - -- datum - , parameter_values - , value - , git_commit_timestamp - , git_hash - , val_min - , val_q1 - , val_q3 - , val_max - , std_dev - , n_obs - , run_timestamp - , run_metadata - , run_notes - -- machine_view (mac address only) - , mac_address - -- environment_view - , env.benchmark_language - , language_implementation_version - , dependencies - FROM public.benchmark_run AS run - JOIN public.benchmark_view AS benchmark - ON run.benchmark_id = benchmark.benchmark_id - JOIN public.machine_view AS machine - ON run.machine_id = machine.machine_id - JOIN public.environment_view AS env - ON run.environment_id = env.environment_id; -COMMENT ON VIEW public.benchmark_run_view IS -E'Each benchmark run.\n\n' - '- Each entry is unique on the machine, environment, benchmark,\n' - ' and git commit timestamp.'; - --- FULL_BENCHMARK_RUN_VIEW -CREATE OR REPLACE VIEW public.full_benchmark_run_view AS - SELECT - run.benchmark_run_id - -- benchmark_view - , benchmark_name - , parameter_names - , benchmark_description - , benchmark_type - , units - , lessisbetter - , benchmark_version - -- datum - , parameter_values - , value - , git_commit_timestamp - , git_hash - , val_min - , val_q1 - , val_q3 - , val_max - , std_dev - , n_obs - , run_timestamp - , run_metadata - , run_notes - -- machine_view - , machine_name - , mac_address - , memory_bytes - , cpu_actual_frequency_Hz - , os_name - , architecture_name - , kernel_name - , cpu_model_name - , cpu_core_count - , cpu_thread_count - , cpu_frequency_max_Hz - , cpu_frequency_min_Hz - , cpu_L1d_cache_bytes - , cpu_L1i_cache_bytes - , cpu_L2_cache_bytes - , cpu_L3_cache_bytes - , gpu_information - , gpu_part_number - , gpu_product_name - , machine_other_attributes - -- environment_view - , env.benchmark_language - , env.language_implementation_version - , dependencies - FROM public.benchmark_run AS run - JOIN public.benchmark_view AS benchmark - ON run.benchmark_id = benchmark.benchmark_id - JOIN public.machine_view AS machine - ON run.machine_id = machine.machine_id - JOIN public.environment_view AS env - ON run.environment_id = env.environment_id; - --- SUMMARIZED_TABLES_VIEW -CREATE VIEW public.summarized_tables_view AS - WITH chosen AS ( - SELECT - cls.oid AS id - , cls.relname as tbl_name - FROM pg_catalog.pg_class AS cls - JOIN pg_catalog.pg_namespace AS ns ON cls.relnamespace = ns.oid - WHERE - cls.relkind = 'r' - AND ns.nspname = 'public' - ), all_constraints AS ( - SELECT - chosen.id AS tbl_id - , chosen.tbl_name - , unnest(conkey) AS col_id - , 'foreign key' AS col_constraint - FROM pg_catalog.pg_constraint - JOIN chosen ON chosen.id = conrelid - WHERE contype = 'f' - - UNION - - SELECT - chosen.id - , chosen.tbl_name - , unnest(indkey) - , 'unique' - FROM pg_catalog.pg_index i - JOIN chosen ON chosen.id = i.indrelid - WHERE i.indisunique AND NOT i.indisprimary - - UNION - - SELECT - chosen.id - , chosen.tbl_name - , unnest(indkey) - , 'primary key' - FROM pg_catalog.pg_index i - JOIN chosen on chosen.id = i.indrelid - WHERE i.indisprimary - ), gathered_constraints AS ( - SELECT - tbl_id - , tbl_name - , col_id - , string_agg(col_constraint, ', ' ORDER BY col_constraint) - AS col_constraint - FROM all_constraints - GROUP BY tbl_id, tbl_name, col_id - ) - SELECT - chosen.tbl_name AS table_name - , columns.attnum AS column_number - , columns.attname AS column_name - , typ.typname AS type_name - , CASE - WHEN columns.attnotnull - THEN 'not null' - ELSE '' - END AS nullable - , CASE - WHEN defaults.adsrc like 'nextval%' - THEN 'serial' - ELSE defaults.adsrc - END AS default_value - , CASE - WHEN gc.col_constraint = '' OR gc.col_constraint IS NULL - THEN cnstrnt.consrc - WHEN cnstrnt.consrc IS NULL - THEN gc.col_constraint - ELSE gc.col_constraint || ', ' || cnstrnt.consrc - END AS description - FROM pg_catalog.pg_attribute AS columns - JOIN chosen ON columns.attrelid = chosen.id - JOIN pg_catalog.pg_type AS typ - ON typ.oid = columns.atttypid - LEFT JOIN gathered_constraints AS gc - ON gc.col_id = columns.attnum - AND gc.tbl_id = columns.attrelid - LEFT JOIN pg_attrdef AS defaults - ON defaults.adrelid = chosen.id - AND defaults.adnum = columns.attnum - LEFT JOIN pg_catalog.pg_constraint AS cnstrnt - ON cnstrnt.conrelid = columns.attrelid - AND columns.attrelid = ANY(cnstrnt.conkey) - WHERE - columns.attnum > 0 - ORDER BY table_name, column_number; -COMMENT ON VIEW public.summarized_tables_view - IS 'A summary of all columns from all tables in the public schema, ' - ' identifying nullability, primary/foreign keys, and data type.'; diff --git a/dev/benchmarking/ddl/3_00_functions_helpers.sql b/dev/benchmarking/ddl/3_00_functions_helpers.sql deleted file mode 100644 index b10b69a4e914..000000000000 --- a/dev/benchmarking/ddl/3_00_functions_helpers.sql +++ /dev/null @@ -1,643 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- PROJECT_DETAILS -CREATE TYPE public.type_project_details AS ( - project_name text - , project_url text - , repo_url text -); - -CREATE OR REPLACE FUNCTION public.project_details() -RETURNS public.type_project_details AS -$$ - SELECT project_name, project_url, repo_url - FROM public.project - ORDER BY last_changed DESC - LIMIT 1 -$$ -LANGUAGE sql STABLE; -COMMENT ON FUNCTION public.project_details() -IS 'Get the current project name, url, and repo url.'; - - --------------------------- GET-OR-SET FUNCTIONS -------------------------- --- The following functions have the naming convention "get__id". --- All of them attempt to SELECT the desired row given the column --- values, and if it does not exist will INSERT it. --- --- When functions are overloaded with fewer columns, it is to allow --- selection only, given columns that comprise a unique index. - --- GET_CPU_ID -CREATE OR REPLACE FUNCTION public.get_cpu_id( - cpu_model_name citext - , cpu_core_count integer - , cpu_thread_count integer - , cpu_frequency_max_Hz bigint - , cpu_frequency_min_Hz bigint - , cpu_L1d_cache_bytes integer - , cpu_L1i_cache_bytes integer - , cpu_L2_cache_bytes integer - , cpu_L3_cache_bytes integer -) -RETURNS integer AS -$$ - DECLARE - result integer; - BEGIN - SELECT cpu_id INTO result FROM public.cpu AS cpu - WHERE cpu.cpu_model_name = $1 - AND cpu.cpu_core_count = $2 - AND cpu.cpu_thread_count = $3 - AND cpu.cpu_frequency_max_Hz = $4 - AND cpu.cpu_frequency_min_Hz = $5 - AND cpu.cpu_L1d_cache_bytes = $6 - AND cpu.cpu_L1i_cache_bytes = $7 - AND cpu.cpu_L2_cache_bytes = $8 - AND cpu.cpu_L3_cache_bytes = $9; - - IF result IS NULL THEN - INSERT INTO public.cpu( - cpu_model_name - , cpu_core_count - , cpu_thread_count - , cpu_frequency_max_Hz - , cpu_frequency_min_Hz - , cpu_L1d_cache_bytes - , cpu_L1i_cache_bytes - , cpu_L2_cache_bytes - , cpu_L3_cache_bytes - ) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) - RETURNING cpu_id INTO result; - END IF; - - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.get_cpu_id( - citext - , integer - , integer - , bigint -- cpu_frequency_max_Hz - , bigint -- cpu_frequency_min_Hz - , integer - , integer - , integer - , integer -) -IS 'Insert or select CPU data, returning "cpu.cpu_id".'; - --- GET_GPU_ID -CREATE OR REPLACE FUNCTION public.get_gpu_id( - gpu_information citext DEFAULT NULL - , gpu_part_number citext DEFAULT NULL - , gpu_product_name citext DEFAULT NULL -) -RETURNS integer AS -$$ - DECLARE - result integer; - BEGIN - SELECT gpu_id INTO result FROM public.gpu AS gpu - WHERE - gpu.gpu_information = COALESCE($1, '') - AND gpu.gpu_part_number = COALESCE($2, '') - AND gpu.gpu_product_name = COALESCE($3, ''); - - IF result IS NULL THEN - INSERT INTO public.gpu( - gpu_information - , gpu_part_number - , gpu_product_name - ) - VALUES (COALESCE($1, ''), COALESCE($2, ''), COALESCE($3, '')) - RETURNING gpu_id INTO result; - END IF; - - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.get_gpu_id(citext, citext, citext) -IS 'Insert or select GPU data, returning "gpu.gpu_id".'; - --- GET_OS_ID -CREATE OR REPLACE FUNCTION public.get_os_id( - os_name citext - , architecture_name citext - , kernel_name citext DEFAULT '' -) -RETURNS integer AS -$$ - DECLARE - result integer; - BEGIN - SELECT os_id INTO result FROM public.os AS os - WHERE os.os_name = $1 - AND os.architecture_name = $2 - AND os.kernel_name = COALESCE($3, ''); - - IF result is NULL THEN - INSERT INTO public.os(os_name, architecture_name, kernel_name) - VALUES ($1, $2, COALESCE($3, '')) - RETURNING os_id INTO result; - END IF; - - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.get_os_id(citext, citext, citext) -IS 'Insert or select OS data, returning "os.os_id".'; - --- GET_MACHINE_ID (full signature) -CREATE OR REPLACE FUNCTION public.get_machine_id( - mac_address macaddr - , machine_name citext - , memory_bytes bigint - , cpu_actual_frequency_Hz bigint - -- os - , os_name citext - , architecture_name citext - , kernel_name citext - -- cpu - , cpu_model_name citext - , cpu_core_count integer - , cpu_thread_count integer - , cpu_frequency_max_Hz bigint - , cpu_frequency_min_Hz bigint - , L1d_cache_bytes integer - , L1i_cache_bytes integer - , L2_cache_bytes integer - , L3_cache_bytes integer - -- gpu - , gpu_information citext DEFAULT '' - , gpu_part_number citext DEFAULT NULL - , gpu_product_name citext DEFAULT NULL - -- nullable machine attributes - , machine_other_attributes jsonb DEFAULT NULL -) -RETURNS integer AS -$$ - DECLARE - found_cpu_id integer; - found_gpu_id integer; - found_os_id integer; - result integer; - BEGIN - -- Can't bypass looking up all the values because of unique constraint. - SELECT public.get_cpu_id( - cpu_model_name - , cpu_core_count - , cpu_thread_count - , cpu_frequency_max_Hz - , cpu_frequency_min_Hz - , L1d_cache_bytes - , L1i_cache_bytes - , L2_cache_bytes - , L3_cache_bytes - ) INTO found_cpu_id; - - SELECT public.get_gpu_id( - gpu_information - , gpu_part_number - , gpu_product_name - ) INTO found_gpu_id; - - SELECT public.get_os_id( - os_name - , architecture_name - , kernel_name - ) INTO found_os_id; - - SELECT machine_id INTO result FROM public.machine AS m - WHERE m.os_id = found_os_id - AND m.cpu_id = found_cpu_id - AND m.gpu_id = found_gpu_id - AND m.mac_address = $1 - AND m.machine_name = $2 - AND m.memory_bytes = $3 - AND m.cpu_actual_frequency_Hz = $4; - - IF result IS NULL THEN - INSERT INTO public.machine( - os_id - , cpu_id - , gpu_id - , mac_address - , machine_name - , memory_bytes - , cpu_actual_frequency_Hz - , machine_other_attributes - ) - VALUES (found_os_id, found_cpu_id, found_gpu_id, $1, $2, $3, $4, $20) - RETURNING machine_id INTO result; - END IF; - - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.get_machine_id( - macaddr - , citext - , bigint -- memory_bytes - , bigint -- cpu_frequency_actual_Hz - -- os - , citext - , citext - , citext - -- cpu - , citext - , integer - , integer - , bigint -- cpu_frequency_max_Hz - , bigint -- cpu_frequency_min_Hz - , integer - , integer - , integer - , integer - -- gpu - , citext - , citext - , citext - -- nullable machine attributes - , jsonb -) -IS 'Insert or select machine data, returning "machine.machine_id".'; - --- GET_MACHINE_ID (given unique mac_address) -CREATE OR REPLACE FUNCTION public.get_machine_id(mac_address macaddr) -RETURNS integer AS -$$ - SELECT machine_id FROM public.machine AS m - WHERE m.mac_address = $1; -$$ -LANGUAGE sql STABLE; -COMMENT ON FUNCTION public.get_machine_id(macaddr) -IS 'Select machine_id given its mac address, returning "machine.machine_id".'; - --- GET_BENCHMARK_LANGUAGE_ID -CREATE OR REPLACE FUNCTION public.get_benchmark_language_id(language citext) -RETURNS integer AS -$$ - DECLARE - result integer; - BEGIN - SELECT benchmark_language_id INTO result - FROM public.benchmark_language AS bl - WHERE bl.benchmark_language = language; - - IF result IS NULL THEN - INSERT INTO public.benchmark_language(benchmark_language) - VALUES (language) - RETURNING benchmark_language_id INTO result; - END IF; - - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.get_benchmark_language_id(citext) -IS 'Insert or select benchmark_language returning ' - '"benchmark_language.benchmark_language_id".'; - --- GET_LANGUAGE_IMPLEMENTATION_VERSION_ID -CREATE OR REPLACE FUNCTION public.get_language_implementation_version_id( - language citext - , language_implementation_version citext DEFAULT '' -) -RETURNS integer AS -$$ - DECLARE - language_id integer; - result integer; - BEGIN - SELECT public.get_benchmark_language_id($1) INTO language_id; - - SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv - WHERE lv.benchmark_language_id = language_id - AND lv.language_implementation_version = COALESCE($2, ''); - - IF result IS NULL THEN - INSERT INTO - public.language_implementation_version(benchmark_language_id, language_implementation_version) - VALUES (language_id, COALESCE($2, '')) - RETURNING language_implementation_version_id INTO result; - END IF; - - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.get_language_implementation_version_id(citext, citext) -IS 'Insert or select language and version data, ' - 'returning "language_implementation_version.language_implementation_version_id".'; - -CREATE OR REPLACE FUNCTION public.get_language_implementation_version_id( - -- overload for when language_id is known - language_id integer - , language_implementation_version citext DEFAULT '' -) -RETURNS integer AS -$$ - DECLARE - result integer; - BEGIN - SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv - WHERE lv.benchmark_language_id = language_id - AND lv.language_implementation_version = COALESCE($2, ''); - - IF result IS NULL THEN - INSERT INTO - public.language_implementation_version(benchmark_language_id, language_implementation_version) - VALUES (language_id, COALESCE($2, '')) - RETURNING language_implementation_version_id INTO result; - END IF; - - RETURN result; - END -$$ -LANGUAGE plpgsql; - --- GET_LANGUAGE_DEPENDENCY_LOOKUP_ID -CREATE OR REPLACE FUNCTION public.get_dependencies_id( - dependencies jsonb DEFAULT '{}'::jsonb -) -RETURNS integer AS -$$ - DECLARE - result integer; - BEGIN - SELECT dependencies_id INTO result - FROM public.dependencies AS ldl - WHERE ldl.dependencies = COALESCE($1, '{}'::jsonb); - - IF result IS NULL THEN - INSERT INTO - public.dependencies(dependencies) - VALUES (COALESCE($1, '{}'::jsonb)) - RETURNING dependencies_id INTO result; - END IF; - - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.get_dependencies_id(jsonb) -IS 'Insert or select dependencies, returning "dependencies.dependencies_id".'; - --- GET_ENVIRONMENT_ID -CREATE OR REPLACE FUNCTION public.get_environment_id( - language citext, - language_implementation_version citext DEFAULT '', - dependencies jsonb DEFAULT '{}'::jsonb -) -RETURNS integer AS -$$ - DECLARE - found_language_id integer; - found_version_id integer; - found_dependencies_id integer; - result integer; - BEGIN - SELECT public.get_benchmark_language_id($1) INTO found_language_id; - SELECT - public.get_language_implementation_version_id(found_language_id, $2) - INTO found_version_id; - SELECT - public.get_dependencies_id ($3) - INTO found_dependencies_id; - - SELECT environment_id INTO result FROM public.environment AS e - WHERE e.benchmark_language_id = found_language_id - AND e.language_implementation_version_id = found_version_id - AND e.dependencies_id = found_dependencies_id; - - IF result IS NULL THEN - INSERT INTO - public.environment( - benchmark_language_id - , language_implementation_version_id - , dependencies_id - ) - VALUES (found_language_id, found_version_id, found_dependencies_id) - RETURNING environment_id INTO result; - END IF; - - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.get_environment_id(citext, citext, jsonb) -IS 'Insert or select language, language version, and dependencies, ' - 'returning "environment.environment_id".'; - --- GET_BENCHMARK_TYPE_ID (full signature) -CREATE OR REPLACE FUNCTION public.get_benchmark_type_id( - benchmark_type citext - , lessisbetter boolean -) -RETURNS integer AS -$$ - DECLARE - result integer; - BEGIN - SELECT benchmark_type_id INTO result FROM public.benchmark_type AS bt - WHERE bt.benchmark_type = $1 - AND bt.lessisbetter = $2; - - IF result IS NULL THEN - INSERT INTO public.benchmark_type(benchmark_type, lessisbetter) - VALUES($1, $2) - RETURNING benchmark_type_id INTO result; - END IF; - - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.get_benchmark_type_id(citext, boolean) -IS 'Insert or select benchmark type and lessisbetter, ' - 'returning "benchmark_type.benchmark_type_id".'; - --- GET_BENCHMARK_TYPE_ID (given unique benchmark_type string only) -CREATE OR REPLACE FUNCTION public.get_benchmark_type_id( - benchmark_type citext -) -RETURNS integer AS -$$ - DECLARE - result integer; - BEGIN - SELECT benchmark_type_id INTO result FROM public.benchmark_type AS bt - WHERE bt.benchmark_type = $1; - - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.get_benchmark_type_id(citext) -IS 'Select benchmark_type_id given benchmark type (e.g. ''time''), ' - 'returning "benchmark_type.benchmark_type_id".'; - --- GET_UNIT_ID (full signature) -CREATE OR REPLACE FUNCTION public.get_unit_id( - benchmark_type citext - , units citext - , lessisbetter boolean DEFAULT NULL -) -RETURNS integer AS -$$ - DECLARE - found_benchmark_type_id integer; - result integer; - BEGIN - - IF ($3 IS NOT NULL) -- if lessisbetter is not null - THEN - SELECT public.get_benchmark_type_id($1, $3) - INTO found_benchmark_type_id; - ELSE - SELECT public.get_benchmark_type_id($1) - INTO found_benchmark_type_id; - END IF; - - SELECT unit_id INTO result FROM public.unit AS u - WHERE u.benchmark_type_id = found_benchmark_type_id - AND u.units = $2; - - IF result IS NULL THEN - INSERT INTO public.unit(benchmark_type_id, units) - VALUES(found_benchmark_type_id, $2) - RETURNING unit_id INTO result; - END IF; - - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.get_unit_id(citext, citext, boolean) -IS 'Insert or select benchmark type (e.g. ''time''), ' - 'units string (e.g. ''miliseconds''), ' - 'and "lessisbetter" (true if smaller benchmark values are better), ' - 'returning "unit.unit_id".'; - --- GET_UNIT_ID (given unique units string only) -CREATE OR REPLACE FUNCTION public.get_unit_id(units citext) -RETURNS integer AS -$$ - SELECT unit_id FROM public.unit AS u - WHERE u.units = units; -$$ -LANGUAGE sql STABLE; -COMMENT ON FUNCTION public.get_unit_id(citext) -IS 'Select unit_id given unit name, returning "unit.unit_id".'; - --- GET_BENCHMARK_ID (full signature) -CREATE OR REPLACE FUNCTION public.get_benchmark_id( - benchmark_language citext - , benchmark_name citext - , parameter_names text[] - , benchmark_description text - , benchmark_version citext - , benchmark_type citext - , units citext - , lessisbetter boolean -) -RETURNS integer AS -$$ - DECLARE - found_benchmark_language_id integer; - found_unit_id integer; - result integer; - BEGIN - SELECT public.get_benchmark_language_id( - benchmark_language - ) INTO found_benchmark_language_id; - - SELECT public.get_unit_id( - benchmark_type - , units - , lessisbetter - ) INTO found_unit_id; - - SELECT benchmark_id INTO result FROM public.benchmark AS b - WHERE b.benchmark_language_id = found_benchmark_language_id - AND b.benchmark_name = $2 - -- handle nullable "parameter_names" - AND b.parameter_names IS NOT DISTINCT FROM $3 - AND b.benchmark_description = $4 - AND b.benchmark_version = $5 - AND b.unit_id = found_unit_id; - - IF result IS NULL THEN - INSERT INTO public.benchmark( - benchmark_language_id - , benchmark_name - , parameter_names - , benchmark_description - , benchmark_version - , unit_id - ) - VALUES (found_benchmark_language_id, $2, $3, $4, $5, found_unit_id) - RETURNING benchmark_id INTO result; - END IF; - - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.get_benchmark_id( - citext - , citext - , text[] - , text - , citext - , citext - , citext - , boolean -) -IS 'Insert/select benchmark given data, returning "benchmark.benchmark_id".'; - --- GET_BENCHMARK_ID (by unique columns) -CREATE OR REPLACE FUNCTION public.get_benchmark_id( - benchmark_language citext - , benchmark_name citext - , benchmark_version citext -) -RETURNS integer AS -$$ - WITH language AS ( - SELECT public.get_benchmark_language_id(benchmark_language) AS id - ) - SELECT b.benchmark_id - FROM public.benchmark AS b - JOIN language ON b.benchmark_language_id = language.id - WHERE b.benchmark_name = benchmark_name - AND benchmark_version = benchmark_version -$$ -LANGUAGE sql STABLE; -COMMENT ON FUNCTION public.get_benchmark_id(citext, citext, citext) -IS 'Select existing benchmark given unique columns, ' - 'returning "benchmark.benchmark_id".'; diff --git a/dev/benchmarking/ddl/3_01_functions_triggers.sql b/dev/benchmarking/ddl/3_01_functions_triggers.sql deleted file mode 100644 index b6ce4741ac0f..000000000000 --- a/dev/benchmarking/ddl/3_01_functions_triggers.sql +++ /dev/null @@ -1,574 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --------------------------- TRIGGER FUNCTIONS -------------------------- --- Views that do not select from a single table or view are not --- automatically updatable. These trigger functions are intended --- to be run instead of INSERT into the complicated views. - - --- LANGUAGE_IMPLEMENTATION_VERSION_VIEW_INSERT_ROW -CREATE OR REPLACE FUNCTION public.language_implementation_version_view_insert_row() -RETURNS trigger AS -$$ - DECLARE - language_id integer; - result integer; - BEGIN - IF NEW.benchmark_language IS NULL THEN - RAISE EXCEPTION 'Column "benchmark_language" cannot be NULL.'; - END IF; - IF NEW.language_implementation_version IS NULL THEN - RAISE EXCEPTION - 'Column "language_implementation_version" cannot be NULL (use '''' instead).'; - END IF; - - SELECT public.get_benchmark_language_id(NEW.benchmark_language) - INTO language_id; - - SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv - WHERE lv.benchmark_language_id = language_id - AND lv.language_implementation_version = NEW.language_implementation_version; - - IF result IS NOT NULL THEN - -- row already exists - RETURN NULL; - ELSE - INSERT INTO - public.language_implementation_version( - benchmark_language_id - , language_implementation_version - ) - VALUES (language_id, NEW.language_implementation_version) - RETURNING language_implementation_version_id INTO NEW.language_implementation_version_id; - END IF; - - RETURN NEW; - END -$$ -LANGUAGE plpgsql; - --- ENVIRONMENT_VIEW_INSERT_ROW -CREATE OR REPLACE FUNCTION public.environment_view_insert_row() -RETURNS trigger AS -$$ - DECLARE - found_language_id integer; - found_version_id integer; - found_dependencies_id integer; - result integer; - BEGIN - IF NEW.benchmark_language IS NULL - THEN - RAISE EXCEPTION 'Column "benchmark_language" cannot be NULL.'; - END IF; - IF NEW.language_implementation_version IS NULL THEN - RAISE EXCEPTION - 'Column "language_implementation_version" cannot be NULL (use '''' instead).'; - END IF; - - SELECT public.get_benchmark_language_id(NEW.benchmark_language) - INTO found_language_id; - - SELECT public.get_language_implementation_version_id( - found_language_id - , NEW.language_implementation_version - ) - INTO found_version_id; - - SELECT public.get_dependencies_id(NEW.dependencies) - INTO found_dependencies_id; - - SELECT environment_id INTO result FROM public.environment AS e - WHERE e.benchmark_language_id = found_language_id - AND e.language_implementation_version_id = found_version_id - AND e.dependencies_id = found_dependencies_id; - - IF result IS NOT NULL THEN - -- row already exists - RETURN NULL; - ELSE - INSERT INTO - public.environment( - benchmark_language_id - , language_implementation_version_id - , dependencies_id - ) - VALUES (found_language_id, found_version_id, found_dependencies_id) - RETURNING environment_id INTO NEW.environment_id; - END IF; - - RETURN NEW; - END -$$ -LANGUAGE plpgsql; - --- MACHINE_VIEW_INSERT_ROW -CREATE OR REPLACE FUNCTION public.machine_view_insert_row() -RETURNS trigger AS -$$ - DECLARE - found_cpu_id integer; - found_gpu_id integer; - found_os_id integer; - result integer; - BEGIN - IF ( - NEW.machine_name IS NULL - OR NEW.memory_bytes IS NULL - OR NEW.cpu_model_name IS NULL - OR NEW.cpu_core_count IS NULL - OR NEW.cpu_thread_count IS NULL - OR NEW.cpu_frequency_max_Hz IS NULL - OR NEW.cpu_frequency_min_Hz IS NULL - OR NEW.cpu_L1d_cache_bytes IS NULL - OR NEW.cpu_L1i_cache_bytes IS NULL - OR NEW.cpu_L2_cache_bytes IS NULL - OR NEW.cpu_L3_cache_bytes IS NULL - OR NEW.os_name IS NULL - OR NEW.architecture_name IS NULL - ) - THEN - RAISE EXCEPTION 'None of the columns in "machine_view" can be NULL. ' - 'all columns in table "gpu" will default to the empty string '''', ' - 'as will blank "os.kernel_name". This is to allow uniqueness ' - 'constraints to work. Thank you!.'; - END IF; - - SELECT public.get_cpu_id( - NEW.cpu_model_name - , NEW.cpu_core_count - , NEW.cpu_thread_count - , NEW.cpu_frequency_max_Hz - , NEW.cpu_frequency_min_Hz - , NEW.cpu_L1d_cache_bytes - , NEW.cpu_L1i_cache_bytes - , NEW.cpu_L2_cache_bytes - , NEW.cpu_L3_cache_bytes - ) INTO found_cpu_id; - - SELECT public.get_gpu_id( - NEW.gpu_information - , NEW.gpu_part_number - , NEW.gpu_product_name - ) INTO found_gpu_id; - - SELECT public.get_os_id( - NEW.os_name - , NEW.architecture_name - , NEW.kernel_name - ) INTO found_os_id; - - SELECT machine_id INTO result FROM public.machine AS m - WHERE m.os_id = found_os_id - AND m.cpu_id = found_cpu_id - AND m.gpu_id = found_gpu_id - AND m.machine_name = NEW.machine_name - AND m.memory_bytes = NEW.memory_bytes - AND m.cpu_actual_frequency_Hz = NEW.cpu_actual_frequency_Hz; - - IF result IS NOT NULL THEN - -- row already exists - RETURN NULL; - ELSE - INSERT INTO public.machine( - os_id - , cpu_id - , gpu_id - , machine_name - , mac_address - , memory_bytes - , cpu_actual_frequency_Hz - , machine_other_attributes - ) - VALUES ( - found_os_id - , found_cpu_id - , found_gpu_id - , NEW.machine_name - , NEW.mac_address - , NEW.memory_bytes - , NEW.cpu_actual_frequency_Hz - , NEW.machine_other_attributes - ) - RETURNING machine_id INTO NEW.machine_id; - END IF; - - RETURN NEW; - END -$$ -LANGUAGE plpgsql; - --- UNIT_VIEW_INSERT_ROW -CREATE OR REPLACE FUNCTION public.unit_view_insert_row() -RETURNS trigger AS -$$ - DECLARE - found_benchmark_type_id integer; - result integer; - BEGIN - IF (NEW.benchmark_type IS NULL OR NEW.units IS NULL) - THEN - RAISE EXCEPTION E'"benchmark_type" and "units" cannot be NULL.\n' - 'Further, if the "benchmark_type" has never been defined, ' - '"lessisbetter" must be defined or there will be an error.'; - END IF; - - -- It's OK for "lessisbetter" = NULL if "benchmark_type" already exists. - SELECT public.get_benchmark_type_id(NEW.benchmark_type, NEW.lessisbetter) - INTO found_benchmark_type_id; - - SELECT unit_id INTO result FROM public.unit AS u - WHERE u.benchmark_type_id = found_benchmark_type_id - AND u.units = NEW.units; - - IF result IS NOT NULL THEN - -- row already exists - RETURN NULL; - ELSE - INSERT INTO public.unit ( - benchmark_type_id - , units - ) - VALUES ( - found_benchmark_type_id - , NEW.units - ) - RETURNING unit_id INTO NEW.unit_id; - END IF; - - RETURN NEW; - END -$$ -LANGUAGE plpgsql; - --- BENCHMARK_VIEW_INSERT_ROW -CREATE OR REPLACE FUNCTION public.benchmark_view_insert_row() -RETURNS trigger AS -$$ - DECLARE - found_benchmark_language_id integer; - found_units_id integer; - result integer; - BEGIN - IF ( - NEW.benchmark_name IS NULL - OR NEW.benchmark_version IS NULL - OR NEW.benchmark_language IS NULL - OR NEW.benchmark_type IS NULL - OR NEW.benchmark_description IS NULL - OR NEW.units IS NULL - ) - THEN - RAISE EXCEPTION 'The only nullable column in this view is ' - '"benchmark.parameter_names".'; - END IF; - - SELECT public.get_benchmark_language_id( - NEW.benchmark_language - ) INTO found_benchmark_language_id; - - SELECT public.get_unit_id(NEW.units) INTO found_units_id; - - SELECT benchmark_id INTO result FROM public.benchmark AS b - WHERE b.benchmark_language_id = found_benchmark_language_id - AND b.benchmark_name = NEW.benchmark_name - -- handle nullable "parameter_names" - AND b.parameter_names IS NOT DISTINCT FROM NEW.parameter_names - AND b.benchmark_description = NEW.benchmark_description - AND b.benchmark_version = NEW.benchmark_version - AND b.unit_id = found_units_id; - - IF result IS NOT NULL THEN - -- row already exists - RETURN NULL; - ELSE - INSERT INTO public.benchmark( - benchmark_language_id - , benchmark_name - , parameter_names - , benchmark_description - , benchmark_version - , unit_id - ) - VALUES ( - found_benchmark_language_id - , NEW.benchmark_name - , NEW.parameter_names - , NEW.benchmark_description - , NEW.benchmark_version - , found_units_id - ) - RETURNING benchmark_id INTO NEW.benchmark_id; - END IF; - - RETURN NEW; - END -$$ -LANGUAGE plpgsql; - --- BENCHMARK_RUN_VIEW_INSERT_ROW -CREATE OR REPLACE FUNCTION public.benchmark_run_view_insert_row() -RETURNS trigger AS -$$ - DECLARE - found_benchmark_id integer; - found_benchmark_language_id integer; - found_machine_id integer; - found_environment_id integer; - found_language_implementation_version_id integer; - BEGIN - IF ( - NEW.benchmark_name IS NULL - OR NEW.benchmark_version IS NULL - OR NEW.benchmark_language IS NULL - OR NEW.value IS NULL - OR NEW.run_timestamp IS NULL - OR NEW.git_commit_timestamp IS NULL - OR NEW.git_hash IS NULL - OR NEW.language_implementation_version IS NULL - OR NEW.mac_address IS NULL - ) - THEN - RAISE EXCEPTION 'Only the following columns can be NULL: ' - '"parameter_names", "val_min", "val_q1", "val_q3", "val_max".'; - END IF; - - SELECT public.get_benchmark_id( - NEW.benchmark_language - , NEW.benchmark_name - , NEW.benchmark_version - ) INTO found_benchmark_id; - - SELECT public.get_benchmark_language_id( - NEW.benchmark_language - ) INTO found_benchmark_language_id; - - SELECT public.get_machine_id( - NEW.mac_address - ) INTO found_machine_id; - - SELECT public.get_environment_id( - NEW.benchmark_language - , NEW.language_implementation_version - , NEW.dependencies - ) INTO found_environment_id; - - SELECT public.get_language_implementation_version_id( - found_benchmark_language_id, - NEW.language_implementation_version - ) INTO found_language_implementation_version_id; - - INSERT INTO public.benchmark_run ( - parameter_values - , value - , git_commit_timestamp - , git_hash - , val_min - , val_q1 - , val_q3 - , val_max - , std_dev - , n_obs - , run_timestamp - , run_metadata - , run_notes - , machine_id - , benchmark_language_id - , language_implementation_version_id - , environment_id - , benchmark_id - ) - VALUES ( - COALESCE(NEW.parameter_values, '{}'::jsonb) - , NEW.value - , NEW.git_commit_timestamp - , NEW.git_hash - , NEW.val_min - , NEW.val_q1 - , NEW.val_q3 - , NEW.val_max - , NEW.std_dev - , NEW.n_obs - , NEW.run_timestamp - , NEW.run_metadata - , NEW.run_notes - , found_machine_id - , found_benchmark_language_id - , found_language_implementation_version_id - , found_environment_id - , found_benchmark_id - ) returning benchmark_run_id INTO NEW.benchmark_run_id; - - RETURN NEW; - END -$$ -LANGUAGE plpgsql; - --- FULL_BENCHMARK_RUN_VIEW_INSERT_ROW -CREATE OR REPLACE FUNCTION public.full_benchmark_run_view_insert_row() -RETURNS trigger AS -$$ - DECLARE - found_benchmark_id integer; - found_benchmark_language_id integer; - found_machine_id integer; - found_environment_id integer; - found_language_implementation_version_id integer; - BEGIN - IF ( - NEW.value IS NULL - OR NEW.git_hash IS NULL - OR NEW.git_commit_timestamp IS NULL - OR NEW.run_timestamp IS NULL - -- benchmark - OR NEW.benchmark_name IS NULL - OR NEW.benchmark_description IS NULL - OR NEW.benchmark_version IS NULL - OR NEW.benchmark_language IS NULL - -- unit - OR NEW.benchmark_type IS NULL - OR NEW.units IS NULL - OR NEW.lessisbetter IS NULL - -- machine - OR NEW.machine_name IS NULL - OR NEW.memory_bytes IS NULL - OR NEW.cpu_model_name IS NULL - OR NEW.cpu_core_count IS NULL - OR NEW.os_name IS NULL - OR NEW.architecture_name IS NULL - OR NEW.kernel_name IS NULL - OR NEW.cpu_model_name IS NULL - OR NEW.cpu_core_count IS NULL - OR NEW.cpu_thread_count IS NULL - OR NEW.cpu_frequency_max_Hz IS NULL - OR NEW.cpu_frequency_min_Hz IS NULL - OR NEW.cpu_L1d_cache_bytes IS NULL - OR NEW.cpu_L1i_cache_bytes IS NULL - OR NEW.cpu_L2_cache_bytes IS NULL - OR NEW.cpu_L3_cache_bytes IS NULL - ) - THEN - RAISE EXCEPTION 'Only the following columns can be NULL: ' - '"machine_other_attributes", "parameter_names", "val_min", ' - '"val_q1", "val_q3", "val_max", "run_metadata", "run_notes". ' - 'If "gpu_information", "gpu_part_number", "gpu_product_name", or ' - '"kernel_name" are null, they will be silently turned into an ' - 'empty string ('''').'; - END IF; - - SELECT public.get_benchmark_id( - NEW.benchmark_language - , NEW.benchmark_name - , NEW.parameter_names - , NEW.benchmark_description - , NEW.benchmark_version - , NEW.benchmark_type - , NEW.units - , NEW.lessisbetter - ) INTO found_benchmark_id; - - SELECT public.get_benchmark_language_id( - NEW.benchmark_language - ) INTO found_benchmark_language_id; - - SELECT public.get_machine_id( - NEW.mac_address - , NEW.machine_name - , NEW.memory_bytes - , NEW.cpu_actual_frequency_Hz - -- os - , NEW.os_name - , NEW.architecture_name - , NEW.kernel_name - -- cpu - , NEW.cpu_model_name - , NEW.cpu_core_count - , NEW.cpu_thread_count - , NEW.cpu_frequency_max_Hz - , NEW.cpu_frequency_min_Hz - , NEW.cpu_L1d_cache_bytes - , NEW.cpu_L1i_cache_bytes - , NEW.cpu_L2_cache_bytes - , NEW.cpu_L3_cache_bytes - -- gpu - , NEW.gpu_information - , NEW.gpu_part_number - , NEW.gpu_product_name - -- nullable machine attributes - , NEW.machine_other_attributes - ) INTO found_machine_id; - - SELECT public.get_environment_id( - NEW.benchmark_language - , NEW.language_implementation_version - , NEW.dependencies - ) INTO found_environment_id; - - SELECT public.get_language_implementation_version_id( - found_benchmark_language_id, - NEW.language_implementation_version - ) INTO found_language_implementation_version_id; - - INSERT INTO public.benchmark_run ( - parameter_values - , value - , git_commit_timestamp - , git_hash - , val_min - , val_q1 - , val_q3 - , val_max - , std_dev - , n_obs - , run_timestamp - , run_metadata - , run_notes - , machine_id - , benchmark_language_id - , language_implementation_version_id - , environment_id - , benchmark_id - ) - VALUES ( - NEW.parameter_values - , NEW.value - , NEW.git_commit_timestamp - , NEW.git_hash - , NEW.val_min - , NEW.val_q1 - , NEW.val_q3 - , NEW.val_max - , NEW.std_dev - , NEW.n_obs - , NEW.run_timestamp - , NEW.run_metadata - , NEW.run_notes - , found_machine_id - , found_benchmark_language_id - , found_language_implementation_version_id - , found_environment_id - , found_benchmark_id - ) returning benchmark_run_id INTO NEW.benchmark_run_id; - - RETURN NEW; - END -$$ -LANGUAGE plpgsql; diff --git a/dev/benchmarking/ddl/3_02_functions_ingestion.sql b/dev/benchmarking/ddl/3_02_functions_ingestion.sql deleted file mode 100644 index 000c61d00e7b..000000000000 --- a/dev/benchmarking/ddl/3_02_functions_ingestion.sql +++ /dev/null @@ -1,323 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --------------------------- IMPORT HELPERS -------------------------- --- Load from JSON (from https://stackoverflow.com/a/48396608) --- How to use it in the psql client: --- \set content `cat /examples/machine.json` --- select ingest_machine(:'content'::jsonb); --- INGEST_MACHINE_VIEW -CREATE OR REPLACE FUNCTION public.ingest_machine_view(from_jsonb jsonb) -RETURNS integer AS -$$ - DECLARE - result integer; - BEGIN - INSERT INTO public.machine_view - SELECT * FROM jsonb_populate_record(null::public.machine_view, from_jsonb) - RETURNING machine_id INTO result; - RETURN result; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.ingest_machine_view(jsonb) IS - E'The argument is a JSON object. NOTE: key names must be entirely\n' - 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' - 'Example::\n\n' - ' {\n' - ' "mac_address": "0a:00:2d:01:02:03",\n' - ' "machine_name": "Yet-Another-Machine-Name",\n' - ' "memory_bytes": 8589934592,\n' - ' "cpu_actual_frequency_hz": 2300000000,\n' - ' "os_name": "OSX",\n' - ' "architecture_name": "x86_64",\n' - ' "kernel_name": "18.2.0",\n' - ' "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz",\n' - ' "cpu_core_count": 2,\n' - ' "cpu_thread_count": 4,\n' - ' "cpu_frequency_max_hz": 2300000000,\n' - ' "cpu_frequency_min_hz": 2300000000,\n' - ' "cpu_l1d_cache_bytes": 32768,\n' - ' "cpu_l1i_cache_bytes": 32768,\n' - ' "cpu_l2_cache_bytes": 262144,\n' - ' "cpu_l3_cache_bytes": 4194304,\n' - ' "machine_other_attributes": {"just": "an example"},\n' - ' "gpu_information": "",\n' - ' "gpu_part_number": "",\n' - ' "gpu_product_name": ""\n' - ' }\n\n' - 'To identify which columns in "machine_view" are required,\n' - 'please see the view documentation in :ref:`benchmark-data-model`.\n'; - --- INGEST_BENCHMARK_VIEW -CREATE OR REPLACE FUNCTION public.ingest_benchmark_view(from_jsonb jsonb) -RETURNS setof integer AS -$$ - BEGIN - RETURN QUERY - INSERT INTO public.benchmark_view - SELECT * FROM jsonb_populate_recordset( - null::public.benchmark_view - , from_jsonb - ) - RETURNING benchmark_id; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.ingest_benchmark_view(jsonb) IS - E'The argument is a JSON object. NOTE: key names must be entirely\n' - 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' - 'Example::\n\n' - ' [\n' - ' {\n' - ' "benchmark_name": "Benchmark 1",\n' - ' "parameter_names": ["arg0", "arg1", "arg2"],\n' - ' "benchmark_description": "First benchmark",\n' - ' "benchmark_type": "Time",\n' - ' "units": "miliseconds",\n' - ' "lessisbetter": true,\n' - ' "benchmark_version": "second version",\n' - ' "benchmark_language": "Python"\n' - ' },\n' - ' {\n' - ' "benchmark_name": "Benchmark 2",\n' - ' "parameter_names": ["arg0", "arg1"],\n' - ' "benchmark_description": "Description 2.",\n' - ' "benchmark_type": "Time",\n' - ' "units": "nanoseconds",\n' - ' "lessisbetter": true,\n' - ' "benchmark_version": "second version",\n' - ' "benchmark_language": "Python"\n' - ' }\n' - ' ]\n\n' - 'To identify which columns in "benchmark_view" are required,\n' - 'please see the view documentation in :ref:`benchmark-data-model`.\n'; - --- INGEST_BENCHMARK_RUN_VIEW -CREATE OR REPLACE FUNCTION public.ingest_benchmark_run_view(from_jsonb jsonb) -RETURNS setof bigint AS -$$ - BEGIN - RETURN QUERY - INSERT INTO public.benchmark_run_view - SELECT * FROM - jsonb_populate_recordset(null::public.benchmark_run_view, from_jsonb) - RETURNING benchmark_run_id; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.ingest_benchmark_run_view(jsonb) IS - E'The argument is a JSON object. NOTE: key names must be entirely\n' - 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' - 'Example::\n\n' - ' [\n' - ' {\n' - ' "benchmark_name": "Benchmark 2",\n' - ' "benchmark_version": "version 0",\n' - ' "parameter_values": {"arg0": 100, "arg1": 5},\n' - ' "value": 2.5,\n' - ' "git_commit_timestamp": "2019-02-08 22:35:53 +0100",\n' - ' "git_hash": "324d3cf198444a",\n' - ' "val_min": 1,\n' - ' "val_q1": 2,\n' - ' "val_q3": 3,\n' - ' "val_max": 4,\n' - ' "std_dev": 1.41,\n' - ' "n_obs": 8,\n' - ' "run_timestamp": "2019-02-14 03:00:05 -0600",\n' - ' "mac_address": "08:00:2b:01:02:03",\n' - ' "benchmark_language": "Python",\n' - ' "language_implementation_version": "CPython 2.7",\n' - ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}\n' - ' },\n' - ' {\n' - ' "benchmark_name": "Benchmark 2",\n' - ' "benchmark_version": "version 0",\n' - ' "parameter_values": {"arg0": 1000, "arg1": 5},\n' - ' "value": 5,\n' - ' "git_commit_timestamp": "2019-02-08 22:35:53 +0100",\n' - ' "git_hash": "324d3cf198444a",\n' - ' "std_dev": 3.14,\n' - ' "n_obs": 8,\n' - ' "run_timestamp": "2019-02-14 03:00:10 -0600",\n' - ' "mac_address": "08:00:2b:01:02:03",\n' - ' "benchmark_language": "Python",\n' - ' "language_implementation_version": "CPython 2.7",\n' - ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}\n' - ' }\n' - ' ]\n' - 'To identify which columns in "benchmark_run_view" are required,\n' - 'please see the view documentation in :ref:`benchmark-data-model`.\n'; - --- INGEST_BENCHMARK_RUNS_WITH_CONTEXT -CREATE OR REPLACE FUNCTION public.ingest_benchmark_runs_with_context(from_jsonb jsonb) -RETURNS setof bigint AS -$$ - DECLARE - context_jsonb jsonb; - found_environment_id integer; - found_machine_id integer; - BEGIN - SELECT from_jsonb -> 'context' INTO context_jsonb; - - SELECT public.get_machine_id((context_jsonb ->> 'mac_address')::macaddr) - INTO found_machine_id; - - SELECT get_environment_id( - (context_jsonb ->> 'benchmark_language')::citext - , (context_jsonb ->> 'language_implementation_version')::citext - , context_jsonb -> 'dependencies' - ) INTO found_environment_id; - - RETURN QUERY - WITH run_datum AS ( - SELECT * - FROM jsonb_to_recordset(from_jsonb -> 'benchmarks') - AS x( - benchmark_name citext - , parameter_values jsonb - , value numeric - , val_min numeric - , val_q1 numeric - , val_q3 numeric - , val_max numeric - , std_dev numeric - , n_obs integer - , run_timestamp timestamp (0) with time zone - , run_metadata jsonb - , run_notes text - ) - ), benchmark_name_and_id AS ( - SELECT - key AS benchmark_name - , public.get_benchmark_id( - (context_jsonb ->> 'benchmark_language')::citext - , key::citext -- benchmark_name - , value::citext -- benchmark_version - ) AS benchmark_id - FROM jsonb_each_text(from_jsonb -> 'benchmark_version') - ) - INSERT INTO public.benchmark_run ( - benchmark_id - -- run_datum - , parameter_values - , value - , val_min - , val_q1 - , val_q3 - , val_max - , std_dev - , n_obs - , run_metadata - , run_notes - -- additional context information - , git_commit_timestamp - , git_hash - , run_timestamp - -- machine - , machine_id - -- environment - , environment_id - , language_implementation_version_id - , benchmark_language_id - ) - SELECT - b.benchmark_id - -- run_datum - , run_datum.parameter_values - , run_datum.value - , run_datum.val_min - , run_datum.val_q1 - , run_datum.val_q3 - , run_datum.val_max - , run_datum.std_dev - , run_datum.n_obs - , run_datum.run_metadata - , run_datum.run_notes - -- additional context information - , (context_jsonb ->> 'git_commit_timestamp')::timestamp (0) with time zone - , context_jsonb ->> 'git_hash' - , (context_jsonb ->> 'run_timestamp')::timestamp (0) with time zone - -- machine - , found_machine_id - -- environment - , e.environment_id - , e.language_implementation_version_id - , e.benchmark_language_id - FROM run_datum - JOIN public.environment AS e - ON e.environment_id = found_environment_id - JOIN benchmark_name_and_id AS b - ON b.benchmark_name = run_datum.benchmark_name - RETURNING benchmark_run_id; - END -$$ -LANGUAGE plpgsql; -COMMENT ON FUNCTION public.ingest_benchmark_runs_with_context(jsonb) IS - E'The argument is a JSON object. NOTE: key names must be entirely\n' - 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' - 'The object contains three key-value pairs::\n\n' - ' {"context": {\n' - ' "mac_address": "08:00:2b:01:02:03",\n' - ' "benchmark_language": "Python",\n' - ' "language_implementation_version": "CPython 3.6",\n' - ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"},\n' - ' "git_commit_timestamp": "2019-02-14 22:42:22 +0100",\n' - ' "git_hash": "123456789abcde",\n' - ' "run_timestamp": "2019-02-14 03:00:40 -0600",\n' - ' "extra stuff": "does not hurt anything and will not be added."\n' - ' },\n' - ' "benchmark_version": {\n' - ' "Benchmark Name 1": "Any string can be a version.",\n' - ' "Benchmark Name 2": "A git hash can be a version.",\n' - ' "An Unused Benchmark Name": "Will be ignored."\n' - ' },\n' - ' "benchmarks": [\n' - ' {\n' - ' "benchmark_name": "Benchmark Name 1",\n' - ' "parameter_values": {"argument1": 1, "argument2": "value2"},\n' - ' "value": 42,\n' - ' "val_min": 41.2,\n' - ' "val_q1": 41.5,\n' - ' "val_q3": 42.5,\n' - ' "val_max": 42.8,\n' - ' "std_dev": 0.5,\n' - ' "n_obs": 100,\n' - ' "run_metadata": {"any": "key-value pairs"},\n' - ' "run_notes": "Any relevant notes."\n' - ' },\n' - ' {\n' - ' "benchmark_name": "Benchmark Name 2",\n' - ' "parameter_values": {"not nullable": "Use {} if no params."},\n' - ' "value": 8,\n' - ' "std_dev": 1,\n' - ' "n_obs": 2,\n' - ' }\n' - ' ]\n' - ' }\n\n' - '- The entry for "context" contains the machine, environment, and timestamp\n' - ' information common to all of the runs\n' - '- The entry for "benchmark_version" maps benchmark\n' - ' names to their version strings. (Which can be a git hash,\n' - ' the entire code string, a number, or any other string of your choice.)\n' - '- The entry for "benchmarks" is a list of benchmark run data\n' - ' for the given context and benchmark versions. The first example\n' - ' benchmark run entry contains all possible values, even\n' - ' nullable ones, and the second entry omits all nullable values.\n\n'; diff --git a/dev/benchmarking/ddl/3_10_functions_documentation.sql b/dev/benchmarking/ddl/3_10_functions_documentation.sql deleted file mode 100644 index 6b2a057909f8..000000000000 --- a/dev/benchmarking/ddl/3_10_functions_documentation.sql +++ /dev/null @@ -1,395 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- _DOCUMENTATION_INGESTION -CREATE OR REPLACE FUNCTION public._documentation_ingestion() -RETURNS text AS -$$ - WITH ingestion_docs AS ( - SELECT - proname || E'\n' - || rpad('', character_length(proname), '-') - || E'\n\n:code:`' - || proname || '(' - || string_agg(a.argname || ' ' || typname , ', ') - || E')`\n\n' - || description - || E'\n\n\nback to `Benchmark data model `_\n' - AS docs - FROM pg_catalog.pg_proc - JOIN pg_catalog.pg_namespace - ON nspname='public' - AND pg_namespace.oid = pronamespace - AND proname LIKE '%ingest%' - JOIN pg_catalog.pg_description - ON pg_description.objoid=pg_proc.oid, - LATERAL unnest(proargnames, proargtypes) AS a(argname, argtype) - JOIN pg_catalog.pg_type - ON pg_type.oid = a.argtype - GROUP BY proname, description - ) - SELECT - string_agg(docs, E'\n\n') AS docs - FROM ingestion_docs; -$$ -LANGUAGE sql STABLE; - --- _DOCUMENTATION_VIEW_DETAILS -CREATE OR REPLACE FUNCTION public._documentation_view_details(view_name citext) -RETURNS TABLE( - column_name name - , type_name name - , nullable text - , default_value text - , description text -) AS -$$ - WITH view_columns AS ( - SELECT - attname AS column_name - , attnum AS column_order - FROM pg_catalog.pg_attribute - WHERE attrelid=view_name::regclass - ) - SELECT - t.column_name - , type_name - , coalesce(nullable, '') - , coalesce(default_value, '') - , coalesce(description, '') - FROM public.summarized_tables_view AS t - JOIN view_columns AS v ON v.column_name = t.column_name - WHERE t.table_name || '_view' = view_name OR t.column_name NOT LIKE '%_id' - ORDER BY column_order; -$$ -LANGUAGE sql STABLE; - - --- _DOCUMENTATION_VIEW_PIECES -CREATE OR REPLACE FUNCTION public._documentation_view_pieces(view_name citext) -RETURNS TABLE (rst_formatted text) -AS -$$ -DECLARE - column_length integer; - type_length integer; - nullable_length integer; - default_length integer; - description_length integer; - sep text; - border text; -BEGIN - - -- All of the hard-coded constants here are the string length of the table - -- column headers: 'Column', 'Type', 'Nullable', 'Default', 'Description' - SELECT greatest(6, max(character_length(column_name))) - FROM public._documentation_view_details(view_name) INTO column_length; - - SELECT greatest(4, max(character_length(type_name))) - FROM public._documentation_view_details(view_name) INTO type_length; - - SELECT greatest(8, max(character_length(nullable))) - FROM public._documentation_view_details(view_name) INTO nullable_length; - - SELECT greatest(7, max(character_length(default_value))) - FROM public._documentation_view_details(view_name) INTO default_length; - - SELECT greatest(11, max(character_length(description))) - FROM public._documentation_view_details(view_name) INTO description_length; - - SELECT ' ' INTO sep; - - SELECT - concat_ws(sep - , rpad('', column_length, '=') - , rpad('', type_length, '=') - , rpad('', nullable_length, '=') - , rpad('', default_length, '=') - , rpad('', description_length, '=') - ) - INTO border; - - RETURN QUERY - SELECT - border - UNION ALL - SELECT - concat_ws(sep - , rpad('Column', column_length, ' ') - , rpad('Type', type_length, ' ') - , rpad('Nullable', nullable_length, ' ') - , rpad('Default', default_length, ' ') - , rpad('Description', description_length, ' ') - ) - UNION ALL - SELECT border - UNION ALL - SELECT - concat_ws(sep - , rpad(v.column_name, column_length, ' ') - , rpad(v.type_name, type_length, ' ') - , rpad(v.nullable, nullable_length, ' ') - , rpad(v.default_value, default_length, ' ') - , rpad(v.description, description_length, ' ') - ) - FROM public._documentation_view_details(view_name) AS v - UNION ALL - SELECT border; - -END -$$ -LANGUAGE plpgsql STABLE; - - --- DOCUMENTATION_FOR -CREATE OR REPLACE FUNCTION public.documentation_for(view_name citext) -RETURNS text AS -$$ - DECLARE - view_description text; - view_table_markup text; - BEGIN - SELECT description FROM pg_catalog.pg_description - WHERE pg_description.objoid = view_name::regclass - INTO view_description; - - SELECT - view_name || E'\n' || rpad('', length(view_name), '-') || E'\n\n' || - view_description || E'\n\n' || - string_agg(rst_formatted, E'\n') - INTO view_table_markup - FROM public._documentation_view_pieces(view_name); - - RETURN view_table_markup; - END -$$ -LANGUAGE plpgsql STABLE; -COMMENT ON FUNCTION public.documentation_for(citext) -IS E'Create an ".rst"-formatted table describing a specific view.\n' - 'Example: SELECT public.documentation_for(''endpoint'');'; - - --- DOCUMENTATION -CREATE OR REPLACE FUNCTION public.documentation(dotfile_name text) -RETURNS TABLE (full_text text) AS -$$ - WITH v AS ( - SELECT - public.documentation_for(relname::citext) - || E'\n\nback to `Benchmark data model `_\n' - AS view_documentation - FROM pg_catalog.pg_trigger - JOIN pg_catalog.pg_class ON pg_trigger.tgrelid = pg_class.oid - WHERE NOT tgisinternal - ) - SELECT - E'\n.. _benchmark-data-model:\n\n' - 'Benchmark data model\n' - '====================\n\n\n' - '.. graphviz:: ' - || dotfile_name - || E'\n\n\n.. _benchmark-ingestion:\n\n' - 'Benchmark ingestion helper functions\n' - '====================================\n\n' - || public._documentation_ingestion() - || E'\n\n\n.. _benchmark-views:\n\n' - 'Benchmark views\n' - '===============\n\n\n' - || string_agg(v.view_documentation, E'\n') - FROM v - GROUP BY True; -$$ -LANGUAGE sql STABLE; -COMMENT ON FUNCTION public.documentation(text) -IS E'Create an ".rst"-formatted file that shows the columns in ' - 'every insertable view in the "public" schema.\n' - 'The text argument is the name of the generated dotfile to be included.\n' - 'Example: SELECT public.documentation(''data_model.dot'');'; - - --- _DOCUMENTATION_DOTFILE_NODE_FOR -CREATE OR REPLACE FUNCTION public._documentation_dotfile_node_for(tablename name) -RETURNS text AS -$$ -DECLARE - result text; -BEGIN - WITH node AS ( - SELECT - tablename::text AS lines - UNION ALL - SELECT - E'[label = \n' - ' <' - UNION ALL - -- table name - SELECT - ' ' - UNION ALL - -- primary keys - SELECT - ' ' - FROM public.summarized_tables_view - WHERE table_name = tablename - AND description LIKE '%primary key%' - UNION ALL - -- columns - SELECT - ' ' - FROM public.summarized_tables_view - WHERE table_name = tablename - AND (description IS NULL OR description not like '%key%') - UNION ALL - -- foreign keys - SELECT - ' ' - FROM public.summarized_tables_view - WHERE table_name = tablename - AND description LIKE '%foreign key%' - AND description NOT LIKE '%primary key%' - UNION ALL - SELECT - E'
' - || tablename - || '
' - || column_name - || ' (pk)
' - || column_name - || CASE WHEN description LIKE '%unique' THEN ' (u)' ELSE '' END - || CASE WHEN nullable <> 'not null' THEN ' (o)' ELSE '' END - || '
' - || column_name - || CASE WHEN description LIKE '%unique' THEN ' (u)' ELSE '' END - || ' (fk)
>\n];' - ) - SELECT - string_agg(lines, E'\n') - INTO result - FROM node; - - RETURN result; -END -$$ -LANGUAGE plpgsql STABLE; - - --- _DOCUMENTATION_DOTFILE_EDGES -CREATE OR REPLACE FUNCTION public._documentation_dotfile_edges() -RETURNS text AS -$$ -DECLARE - result text; -BEGIN - WITH relationship AS ( - SELECT - conrelid AS fk_table_id - , confrelid AS pk_table_id - , unnest(conkey) AS fk_colnum - , unnest(confkey) AS pk_colnum - FROM pg_catalog.pg_constraint - WHERE confkey IS NOT NULL - AND connamespace='public'::regnamespace - ), all_edges AS ( - SELECT - fk_tbl.relname || ':' || fk_col.attname - || ' -> ' - || pk_tbl.relname || ':' || pk_col.attname - || ';' AS lines - FROM relationship - -- foreign key table + column - JOIN pg_catalog.pg_attribute AS fk_col - ON fk_col.attrelid = relationship.fk_table_id - AND fk_col.attnum = relationship.fk_colnum - JOIN pg_catalog.pg_class AS fk_tbl - ON fk_tbl.oid = relationship.fk_table_id - -- primary key table + column - JOIN pg_catalog.pg_attribute AS pk_col - ON pk_col.attrelid = relationship.pk_table_id - AND pk_col.attnum = relationship.pk_colnum - JOIN pg_catalog.pg_class AS pk_tbl - ON pk_tbl.oid = relationship.pk_table_id - ) - SELECT - string_agg(lines, E'\n') - INTO result - FROM all_edges; - - RETURN result; -END -$$ -LANGUAGE plpgsql STABLE; - - --- DOCUMENTATION_DOTFILE -CREATE OR REPLACE FUNCTION public.documentation_dotfile() -RETURNS text AS -$$ -DECLARE - schemaname name := 'public'; - result text; -BEGIN - WITH file_contents AS ( - SELECT - E'digraph database {\n concentrate = true;\n' - ' rankdir = LR;\n' - ' ratio = ".75";\n' - ' node [shape = none, fontsize="11", fontname="Helvetica"];\n' - ' edge [fontsize="8", fontname="Helvetica"];' - AS lines - UNION ALL - SELECT - E'legend\n[fontsize = "14"\nlabel =\n' - '<\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
Legend
pk = primary key
fk = foreign key
u = unique*
o = optional
' - '* multiple uniques in the same table are a unique group
>\n];' - UNION ALL - SELECT - string_agg( - public._documentation_dotfile_node_for(relname), - E'\n' -- Forcing the 'env' table to the end makes a better image - ORDER BY (CASE WHEN relname LIKE 'env%' THEN 'z' ELSE relname END) - ) - FROM pg_catalog.pg_class - WHERE relkind='r' AND relnamespace = schemaname::regnamespace - UNION ALL - SELECT - public._documentation_dotfile_edges() - UNION ALL - SELECT - '}' - ) - SELECT - string_agg(lines, E'\n') AS dotfile - INTO result - FROM file_contents; - RETURN result; -END -$$ -LANGUAGE plpgsql STABLE; -COMMENT ON FUNCTION public.documentation_dotfile() -IS E'Create a Graphviz dotfile of the data model: ' - 'every table in the "public" schema.\n' - 'Example: SELECT public.documentation_dotfile();'; diff --git a/dev/benchmarking/ddl/4_00_triggers.sql b/dev/benchmarking/ddl/4_00_triggers.sql deleted file mode 100644 index 5fb0e5018595..000000000000 --- a/dev/benchmarking/ddl/4_00_triggers.sql +++ /dev/null @@ -1,61 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- LANGUAGE_IMPLEMENTATION_VERSION_VIEW_TRIGGER_INSERT -CREATE TRIGGER language_implementation_version_view_trigger_insert - INSTEAD OF INSERT ON public.language_implementation_version_view - FOR EACH ROW - EXECUTE FUNCTION public.language_implementation_version_view_insert_row(); - --- ENVIRONMENT_VIEW_TRIGGER_INSERT -CREATE TRIGGER environment_view_trigger_insert - INSTEAD OF INSERT ON public.environment_view - FOR EACH ROW - EXECUTE FUNCTION public.environment_view_insert_row(); - --- MACHINE_VIEW_TRIGGER_INSERT -CREATE TRIGGER machine_view_trigger_insert - INSTEAD OF INSERT ON public.machine_view - FOR EACH ROW - EXECUTE FUNCTION public.machine_view_insert_row(); - --- UNIT_VIEW_TRIGGER_INSERT -CREATE TRIGGER unit_view_trigger_insert - INSTEAD OF INSERT ON public.unit_view - FOR EACH ROW - EXECUTE FUNCTION public.unit_view_insert_row(); - --- BENCHMARK_VIEW_TRIGGER_INSERT -CREATE TRIGGER benchmark_view_trigger_insert - INSTEAD OF INSERT ON public.benchmark_view - FOR EACH ROW - EXECUTE FUNCTION public.benchmark_view_insert_row(); - --- BENCHMARK_RUN_VIEW_TRIGGER_INSERT -CREATE TRIGGER benchmark_run_view_trigger_insert - INSTEAD OF INSERT ON public.benchmark_run_view - FOR EACH ROW - EXECUTE FUNCTION public.benchmark_run_view_insert_row(); - --- FULL_BENCHMARK_RUN_VIEW_TRIGGER_INSERT -CREATE TRIGGER full_benchmark_run_view_trigger_insert - INSTEAD OF INSERT ON public.full_benchmark_run_view - FOR EACH ROW - EXECUTE FUNCTION public.full_benchmark_run_view_insert_row(); diff --git a/dev/benchmarking/ddl/5_00_permissions.sql b/dev/benchmarking/ddl/5_00_permissions.sql deleted file mode 100644 index dd72c40db313..000000000000 --- a/dev/benchmarking/ddl/5_00_permissions.sql +++ /dev/null @@ -1,73 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ ----------------------------- ROLES ---------------------------- --- ARROW_WEB -CREATE ROLE arrow_web login password 'arrow'; -COMMENT ON ROLE arrow_web IS 'Anonymous login user.'; - --- ARROW_ADMIN -CREATE ROLE arrow_admin; -COMMENT ON ROLE arrow_admin - IS 'Can select, insert, update, and delete on all public tables.'; - --- ARROW_ANONYMOUS -CREATE ROLE arrow_anonymous; -COMMENT ON ROLE arrow_anonymous - IS 'Can insert and select on all public tables.'; - -GRANT arrow_anonymous TO arrow_web; - - ----------------------------- PRIVILEGES ---------------------------- -GRANT USAGE ON SCHEMA public TO arrow_anonymous, arrow_admin; - --- ARROW_ADMIN -GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO arrow_admin; -GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public to arrow_admin; -GRANT SELECT, UPDATE, INSERT, DELETE ON ALL TABLES IN SCHEMA public - TO arrow_admin; - --- ARROW_ANONYMOUS -GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO arrow_anonymous; -GRANT SELECT ON ALL TABLES IN SCHEMA public TO arrow_anonymous; -GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public to arrow_anonymous; -GRANT INSERT ON - public.benchmark - , public.benchmark_language - , public.dependencies - , public.language_implementation_version - , public.benchmark_run - , public.benchmark_type - , public.cpu - , public.environment - , public.environment_view - , public.gpu - , public.machine - , public.machine_view - , public.os - , public.unit - --, public.project -- The only disallowed table is `project`. - , public.benchmark_run_view - , public.benchmark_view - , public.environment_view - , public.full_benchmark_run_view - , public.language_implementation_version_view - , public.machine_view - , public.unit_view -TO arrow_anonymous; diff --git a/dev/benchmarking/docker-compose.yml b/dev/benchmarking/docker-compose.yml deleted file mode 100644 index ca60206bfdfb..000000000000 --- a/dev/benchmarking/docker-compose.yml +++ /dev/null @@ -1,43 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -version: '3' -services: - - pg: - build: - context: . - dockerfile: Dockerfile - restart: always - ports: - - '5432:5432' - environment: - - POSTGRES_PASSWORD=${PG_PASS} - - POSTGRES_USER=${PG_USER} - - graphile: - image: graphile/postgraphile - restart: always - ports: - - 5000:5000 - depends_on: - - pg - command: - - --connection - - postgres://${PG_USER}:${PG_PASS}@pg:5432/${PG_USER} - - --schema - - public - - --watch diff --git a/dev/benchmarking/examples/benchmark_example.json b/dev/benchmarking/examples/benchmark_example.json deleted file mode 100644 index d6f58c2862ec..000000000000 --- a/dev/benchmarking/examples/benchmark_example.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "benchmark_name": "Benchmark 1", - "parameter_names": ["arg0", "arg1", "arg2"], - "benchmark_description": "First benchmark", - "benchmark_type": "Time", - "units": "miliseconds", - "lessisbetter": true, - "benchmark_version": "second version", - "benchmark_language": "Python" - }, - { - "benchmark_name": "Benchmark 2", - "parameter_names": ["arg0", "arg1"], - "benchmark_description": "Description 2.", - "benchmark_type": "Time", - "units": "nanoseconds", - "lessisbetter": true, - "benchmark_version": "second version", - "benchmark_language": "Python" - }, - { - "benchmark_name": "Benchmark 3", - "parameter_names": ["arg0"], - "benchmark_description": "Third benchmark", - "benchmark_type": "Memory", - "units": "kilobytes", - "lessisbetter": true, - "benchmark_version": "1", - "benchmark_language": "Python" - } -] diff --git a/dev/benchmarking/examples/benchmark_run_example.csv b/dev/benchmarking/examples/benchmark_run_example.csv deleted file mode 100644 index eab208a1c709..000000000000 --- a/dev/benchmarking/examples/benchmark_run_example.csv +++ /dev/null @@ -1,6 +0,0 @@ -benchmark_run_id,benchmark_name,benchmark_version,parameter_values,value,git_commit_timestamp,git_hash,val_min,val_q1,val_q3,val_max,std_dev,n_obs,run_timestamp,run_metadata,run_notes,mac_address,benchmark_language,language_implementation_version,dependencies -,Benchmark 2,version 0,"{""arg0"": 100, ""arg1"": 5}",2.5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,1,2,3,4,1.41,8,2019-02-14 02:00:00 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.14"", ""other_lib"": ""1.0""}" -,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 5}",5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,2,4,6,8,3.14,8,2019-02-14 02:01:00 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.14"", ""other_lib"": ""1.0""}" -,Benchmark 2,version 0,"{""arg0"": 100, ""arg1"": 5}",2.5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,0.5,1,3,5,3,8,2019-02-14 02:02:00 -0600,,,08:00:2b:01:02:03,Python,CPython 3.6,"{""boost"": ""1.42"", ""numpy"": ""1.15""}" -,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 5}",3,2019-01-31 14:31:10 -0600,8136c46d5c60fb,2,2.5,4,4.5,1.5,8,2019-02-14 02:03:00 -0600,,,08:00:2b:01:02:03,Python,CPython 3.6,"{""boost"": ""1.42"", ""numpy"": ""1.15""}" -,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 10}",3,2019-01-31 14:31:10 -0600,8136c46d5c60fb,1,2,4,5,2,8,2019-02-14 02:03:30 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.15"", ""other_lib"": ""1.0""}" diff --git a/dev/benchmarking/examples/benchmark_run_example.json b/dev/benchmarking/examples/benchmark_run_example.json deleted file mode 100644 index 2ded776c9898..000000000000 --- a/dev/benchmarking/examples/benchmark_run_example.json +++ /dev/null @@ -1,97 +0,0 @@ -[ - { - "benchmark_name": "Benchmark 2", - "benchmark_version": "version 0", - "parameter_values": {"arg0": 100, "arg1": 5}, - "value": 2.5, - "git_commit_timestamp": "2019-02-08 22:35:53 +0100", - "git_hash": "324d3cf198444a", - "val_min": 1, - "val_q1": 2, - "val_q3": 3, - "val_max": 4, - "std_dev": 1.41, - "n_obs": 8, - "run_timestamp": "2019-02-14 03:00:05 -0600", - "mac_address": "08:00:2b:01:02:03", - "benchmark_language": "Python", - "language_implementation_version": "CPython 2.7", - "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"} - }, - { - "benchmark_name": "Benchmark 2", - "benchmark_version": "version 0", - "parameter_values": {"arg0": 1000, "arg1": 5}, - "value": 5, - "git_commit_timestamp": "2019-02-08 22:35:53 +0100", - "git_hash": "324d3cf198444a", - "val_min": 2, - "val_q1": 4, - "val_q3": 6, - "val_max": 8, - "std_dev": 3.14, - "n_obs": 8, - "run_timestamp": "2019-02-14 03:00:10 -0600", - "mac_address": "08:00:2b:01:02:03", - "benchmark_language": "Python", - "language_implementation_version": "CPython 2.7", - "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"} - }, - { - "benchmark_name": "Benchmark 2", - "benchmark_version": "version 0", - "parameter_values": {"arg0": 100, "arg1": 5}, - "value": 2.5, - "git_commit_timestamp": "2019-02-08 22:35:53 +0100", - "git_hash": "324d3cf198444a", - "val_min": 0.5, - "val_q1": 1, - "val_q3": 3, - "val_max": 5, - "std_dev": 3, - "n_obs": 8, - "run_timestamp": "2019-02-14 03:00:20 -0600", - "mac_address": "08:00:2b:01:02:03", - "benchmark_language": "Python", - "language_implementation_version": "CPython 2.7", - "dependencies": {"boost": "1.42", "numpy": "1.15"} - }, - { - "benchmark_name": "Benchmark 2", - "benchmark_version": "version 0", - "parameter_values": {"arg0": 1000, "arg1": 5}, - "value": 3, - "git_commit_timestamp": "2019-02-08 22:35:53 +0100", - "git_hash": "324d3cf198444a", - "val_min": 2, - "val_q1": 2.5, - "val_q3": 4, - "val_max": 4.5, - "std_dev": 1.5, - "n_obs": 8, - "run_timestamp": "2019-02-14 03:00:30 -0600", - "mac_address": "08:00:2b:01:02:03", - "benchmark_language": "Python", - "language_implementation_version": "CPython 2.7", - "dependencies": {"boost": "1.42", "numpy": "1.15"} - }, - { - "benchmark_name": "Benchmark 2", - "benchmark_version": "version 0", - "parameter_values": {"arg0": 1000, "arg1": 10}, - "value": 3, - "git_commit_timestamp": "2019-02-08 22:35:53 +0100", - "git_hash": "324d3cf198444a", - "val_min": 1, - "val_q1": 2, - "val_q3": 4, - "val_max": 5, - "std_dev": 2, - "n_obs": 8, - "run_timestamp": "2019-02-14 03:00:40 -0600", - "mac_address": "08:00:2b:01:02:03", - "benchmark_language": "Python", - "language_implementation_version": "CPython 2.7", - "dependencies": {"six": "", "numpy": "1.15", "other_lib": "1.0"} - } -] diff --git a/dev/benchmarking/examples/benchmark_with_context_example.json b/dev/benchmarking/examples/benchmark_with_context_example.json deleted file mode 100644 index f9e6e31309f6..000000000000 --- a/dev/benchmarking/examples/benchmark_with_context_example.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "context": { - "mac_address": "08:00:2b:01:02:03", - "benchmark_language": "Python", - "language_implementation_version": "CPython 2.7", - "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}, - "git_commit_timestamp": "2019-02-14 22:42:22 +0100", - "git_hash": "123456789abcde", - "run_timestamp": "2019-02-25 03:00:40 -0600", - "Extra stuff": "does not hurt anything and won't be added.", - "However": "all of the entries above 'Extra stuff' are required." - }, - "benchmark_version": { - "Benchmark 2": "version 0", - "Benchmark 3": "any string is a version. (Benchmark 3 not actually used)" - }, - "benchmarks": [ - { - "benchmark_name": "Benchmark 2", - "parameter_values": {"arg0": 1, "arg1": 5}, - "value": 2.5, - "val_min": 1, - "val_q1": 2, - "val_q3": 3, - "val_max": 4, - "std_dev": 1.41, - "n_obs": 8, - "run_metadata": {"any": "json object is admissible"}, - "run_notes": "This value is an arbitrary-length string." - }, - { - "benchmark_name": "Benchmark 2", - "parameter_values": {"arg0": 2, "arg1": 5}, - "value": 5, - "std_dev": 3.14, - "n_obs": 8 - }, - { - "benchmark_name": "Benchmark 2", - "parameter_values": {"arg0": 3, "arg1": 5}, - "value": 2.5, - "val_min": 0.5, - "val_q1": 1, - "val_q3": 3, - "val_max": 5, - "std_dev": 3, - "n_obs": 8, - "run_notes": "The previous run in this list has the minimal set of keys." - }, - { - "benchmark_name": "Benchmark 2", - "parameter_values": {"arg0": 4, "arg1": 5}, - "value": 3, - "val_min": 2, - "val_q1": 2.5, - "val_q3": 4, - "val_max": 4.5, - "std_dev": 1.5, - "n_obs": 8 - }, - { - "benchmark_name": "Benchmark 2", - "parameter_values": {"arg0": 5, "arg1": 5}, - "value": 3, - "val_min": 1, - "val_q1": 2, - "val_q3": 4, - "val_max": 5, - "std_dev": 2, - "n_obs": 8 - } - ] -} diff --git a/dev/benchmarking/examples/example.sql b/dev/benchmarking/examples/example.sql deleted file mode 100644 index e93269af75bd..000000000000 --- a/dev/benchmarking/examples/example.sql +++ /dev/null @@ -1,232 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - - --- Example insert into each of the views: -INSERT INTO public.project(project_name, project_url, repo_url) -VALUES ( - 'Apache Arrow' - , 'https://arrow.apache.org/' - , 'https://github.com/apache/arrow'); - -INSERT INTO public.environment_view - (benchmark_language, language_implementation_version, dependencies) -VALUES - ('Python', 'CPython 2.7', '{"six": "", "numpy": "1.14", "other_lib": "1.0"}'), - ('Python', 'CPython 3.6', '{"boost": "1.42", "numpy": "1.15"}'); - -INSERT INTO public.dependencies(dependencies) -VALUES - ('{"boost": "1.68", "numpy": "1.14"}'), - ('{"boost": "1.42", "numpy": "1.16"}'); - -INSERT INTO public.language_implementation_version_view - (benchmark_language, language_implementation_version) -VALUES - ('Python', 'CPython 2.7'), - ('Python', 'CPython 3.6'); - -INSERT INTO public.unit_view - (benchmark_type, units, lessisbetter) -VALUES - ('Memory', 'gigabytes', True), - ('Memory', 'kilobytes', True); - - -\echo 'use \\dv to list the views views'; -\dv - - -SELECT * FROM environment_view; -SELECT * FROM unit_view; - - -INSERT INTO public.machine_view ( - mac_address - , machine_name - , memory_bytes - , cpu_actual_frequency_hz - , os_name - , architecture_name - , kernel_name - , cpu_model_name - , cpu_core_count - , cpu_thread_count - , cpu_frequency_max_hz - , cpu_frequency_min_hz - , cpu_l1d_cache_bytes - , cpu_l1i_cache_bytes - , cpu_l2_cache_bytes - , cpu_l3_cache_bytes - , machine_other_attributes -) VALUES ( - '08:00:2b:01:02:03' -- mac_address - , 'My-Machine-Name' -- machine_name - , 8589934592 -- memory_bytes - -- All (?) standard mac address formats are allowable: - -- https://www.postgresql.org/docs/11/datatype-net-types.html - , 2300000000 -- cpu_actual_frequency_Hz - , 'OSX' -- os_name - , 'x86_64' -- architecture_name - , '18.2.0' -- kernel - , 'Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz' -- cpu_model_name - , 2 -- cpu_core_count - , 4 -- cpu_thread_count - , 2300000000 -- cpu_frequency_max_Hz - , 2300000000 -- cpu_frequency_min_Hz - , 32768 -- cpu_l1d_cache_bytes - , 32768 -- cpu_l1i_cache_bytes - , 262144 -- cpu_l2_cache_bytes - , 4194304 -- cpu_l3_cache_bytes - , '{"example": "for machine_other_attributes"}'::jsonb -); - - -INSERT INTO public.full_benchmark_run_view ( - benchmark_name - , parameter_names - , benchmark_description - , benchmark_type - , units - , lessisbetter - , benchmark_version - -- datum - , parameter_values - , value - , git_commit_timestamp - , git_hash - , val_min - , val_q1 - , val_q3 - , val_max - , std_dev - , n_obs - , run_timestamp - , run_metadata - , run_notes - -- machine_view - , machine_name - , mac_address - , memory_bytes - , cpu_actual_frequency_hz - , os_name - , architecture_name - , kernel_name - , cpu_model_name - , cpu_core_count - , cpu_thread_count - , cpu_frequency_max_hz - , cpu_frequency_min_hz - , cpu_l1d_cache_bytes - , cpu_l1i_cache_bytes - , cpu_l2_cache_bytes - , cpu_l3_cache_bytes - , machine_other_attributes - -- environment_view - , benchmark_language - , language_implementation_version - , dependencies -) VALUES ( - 'Benchmark 3' - , '{"arg0"}'::text[] - , 'Third benchmark' - , 'Memory' - , 'kilobytes' - , TRUE - , '0' - -- datum - , '{"arg0": 10}'::jsonb - , 0.5 - , '2019-01-31 14:31:10 -0600' - , '8136c46d5c60fb' - , 0.5 - , 0.5 - , 0.5 - , 0.5 - , 0 - , 2 - , '2019-02-14 14:00:00 -0600' - , '{"ci_99": [2.7e-06, 3.1e-06]}'::jsonb - , 'Additional run_notes.' - -- machine_view - , 'My-Machine-Name' - , '09-00-2c-01-02-03' - , 8589934592 - , 2300000000 - , 'OSX' - , 'x86_64' - , '18.2.0' - , 'Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz' - , 2 - , 4 - , 2300000000 - , 2300000000 - , 32768 - , 32768 - , 262144 - , 4194304 - , '{"example": "for machine_other_attributes"}'::jsonb - -- environment_view - , 'Python' - , 'CPython 2.7' - , '{"six": "", "numpy": "1.15", "other_lib": "1.0"}'::jsonb -); - - --- Bulk load from CSV. First column is empty; serial "benchmark_run_id" will be assigned. ---\copy benchmark_run_view FROM 'examples/benchmark_run_example.csv' WITH (FORMAT csv, HEADER); - --- Load from JSON ---\set content `cat examples/benchmark_example.json` ---SELECT ingest_benchmark_view(:'content'::jsonb); - -INSERT INTO public.benchmark_view ( - benchmark_name - , parameter_names - , benchmark_description - , benchmark_type - , units - , lessisbetter - , benchmark_version - , benchmark_language - ) VALUES ( - 'Benchmark 1' - , '{"arg0", "arg1", "arg2"}'::text[] - , E'Description.\nNewlines are OK in a string escaped with leading "E".' - , 'Time' - , 'miliseconds' - , TRUE - , 'Hash of code or other way to identify distinct benchmark versions.' - , 'Python' - ), ( - 'Benchmark 2' - , '{"arg0", "arg1"}'::text[] - , 'Description 2.' - , 'Time' - , 'nanoseconds' - , TRUE - , 'version 0' - , 'Python' - ); - - -\x -SELECT * from benchmark_run_view; - -\x diff --git a/dev/benchmarking/examples/example_graphql_mutation.json b/dev/benchmarking/examples/example_graphql_mutation.json deleted file mode 100644 index fec5eed0a68a..000000000000 --- a/dev/benchmarking/examples/example_graphql_mutation.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "query": "mutation ($p: CreateProjectInput!){createProject(input:$p){project{id}}}", - "variables": { - "p": { - "project": { - "projectName": "Apache Arrow", - "projectUrl": "https://www.arrow.apache.org", - "repoUrl": "https://www.github.com/apache/arrow" - } - } - } -} diff --git a/dev/benchmarking/examples/graphql_query_environment_view.json b/dev/benchmarking/examples/graphql_query_environment_view.json deleted file mode 100644 index 78804fa918a2..000000000000 --- a/dev/benchmarking/examples/graphql_query_environment_view.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "query": "{allEnvironmentViews(orderBy: [BENCHMARK_LANGUAGE_ASC, LANGUAGE_IMPLEMENTATION_VERSION_ASC, DEPENDENCIES_ASC]) {edges {node {environmentId, benchmarkLanguage, languageImplementationVersion, dependencies}}}}" -} diff --git a/dev/benchmarking/examples/machine.json b/dev/benchmarking/examples/machine.json deleted file mode 100644 index 2485e2bc1c4e..000000000000 --- a/dev/benchmarking/examples/machine.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "mac_address": "0a:00:2d:01:02:03", - "machine_name": "Yet-Another-Machine-Name", - "memory_bytes": 8589934592, - "cpu_actual_frequency_hz": 2300000000, - "os_name": "OSX", - "architecture_name": "x86_64", - "kernel_name": "18.2.0", - "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz", - "cpu_core_count": 2, - "cpu_thread_count": 4, - "cpu_frequency_max_hz": 2300000000, - "cpu_frequency_min_hz": 2300000000, - "cpu_l1d_cache_bytes": 32768, - "cpu_l1i_cache_bytes": 32768, - "cpu_l2_cache_bytes": 262144, - "cpu_l3_cache_bytes": 4194304, - "machine_other_attributes": {"just": "an example"}, - "gpu_information": "", - "gpu_part_number": "", - "gpu_product_name": "" -} diff --git a/dev/benchmarking/graphql_submit.sh b/dev/benchmarking/graphql_submit.sh deleted file mode 100755 index 2eaab9cdfa5d..000000000000 --- a/dev/benchmarking/graphql_submit.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -OPTIONS=("machine" "benchmarks" "runs") - -option=${1-help} -datafile=${2-machine.json} -uri=${3-localhost:5000/graphql} - -help() { - cat < ${1} -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, -.. software distributed under the License is distributed on an -.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -.. KIND, either express or implied. See the License for the -.. specific language governing permissions and limitations -.. under the License. - - -LICENSE -} - -warning() { - cat <<'WARNING' >> ${1} -.. WARNING -.. This is an auto-generated file. Please do not edit. - -.. To reproduce, please run :code:`./make_data_model_rst.sh`. -.. (This requires you have the -.. `psql client `_ -.. and have started the docker containers using -.. :code:`docker-compose up`). - -WARNING -} - -echo "Making ${OUTFILE}" - -license ${OUTFILE} -warning ${OUTFILE} - -PGPASSWORD=arrow \ - psql --tuples-only --username=arrow_web \ - --dbname=benchmark --port=5432 --host=localhost \ - --command="select public.documentation('${DOTFILE}');" \ - | sed "s/ *+$//" | sed "s/^ //" >> ${OUTFILE} diff --git a/dev/benchmarking/make_dotfile.sh b/dev/benchmarking/make_dotfile.sh deleted file mode 100755 index b86dc3eb3c6d..000000000000 --- a/dev/benchmarking/make_dotfile.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -e -OUTFILE=data_model.dot - -license() { - cat <<'LICENSE' > ${1} -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements.See the NOTICE file - distributed with this work for additional information - regarding copyright ownership.The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License.You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied.See the License for the - specific language governing permissions and limitations - under the License. -*/ - -LICENSE -} - -warning() { - cat <<'WARNING' >> ${1} -/* - WARNING - This is an auto-generated file. Please do not edit. - - To reproduce, please run :code:`./make_data_model_rst.sh`. - (This requires you have the - `psql client `_ - and have started the docker containers using - :code:`docker-compose up`). -*/ -WARNING -} - -echo "Making ${OUTFILE}" - -license ${OUTFILE} -warning ${OUTFILE} - -PGPASSWORD=arrow \ - psql --tuples-only --username=arrow_web \ - --dbname=benchmark --port=5432 --host=localhost \ - --command="select public.documentation_dotfile();" \ - | sed "s/ *+$//" | sed "s/^ //" >> ${OUTFILE} diff --git a/dev/benchmarking/make_machine_json.sh b/dev/benchmarking/make_machine_json.sh deleted file mode 100755 index 09bf0ea2d15d..000000000000 --- a/dev/benchmarking/make_machine_json.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -e -OUTFILE=machine.json - -echo "Making ${OUTFILE}" -echo "** NOTE: This command fails on everything but OSX right now. **" -echo "* also, the intent is to make this script not suck, just not now. *" -echo "Please type GPU details here (or manually modify ${OUTFILE} later)." -read -p "GPU information string (or ): " gpu_information -read -p "GPU part number (or ): " gpu_part_number -read -p "GPU product name (or ): " gpu_product_name - - -cat < ${OUTFILE} -{ - "mac_address": "$(ifconfig en1 | awk '/ether/{print $2}')", - "machine_name": "$(uname -n)", - "memory_bytes": $(sysctl -n hw.memsize), - "cpu_actual_frequency_hz": $(sysctl -n hw.cpufrequency), - "os_name": "$(uname -s)", - "architecture_name": "$(uname -m)", - "kernel_name": "$(uname -r)", - "cpu_model_name": "$(sysctl -n machdep.cpu.brand_string)", - "cpu_core_count": $(sysctl -n hw.physicalcpu), - "cpu_thread_count": $(sysctl -n hw.logicalcpu), - "cpu_frequency_max_hz": $(sysctl -n hw.cpufrequency_max), - "cpu_frequency_min_hz": $(sysctl -n hw.cpufrequency_min), - "cpu_l1d_cache_bytes": $(sysctl -n hw.l1dcachesize), - "cpu_l1i_cache_bytes": $(sysctl -n hw.l1icachesize), - "cpu_l2_cache_bytes": $(sysctl -n hw.l2cachesize), - "cpu_l3_cache_bytes": $(sysctl -n hw.l3cachesize), - "gpu_information": "${gpu_information}", - "gpu_part_number": "${gpu_part_number}", - "gpu_product_name": "${gpu_product_name}" -} -MACHINE_JSON - -echo "Machine details saved in ${OUTFILE}" diff --git a/dev/merge.conf.sample b/dev/merge.conf.sample deleted file mode 100644 index c71b211614da..000000000000 --- a/dev/merge.conf.sample +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Configuration for the merge_arrow_pr.py tool -# Install a copy of this file at ~/.config/arrow/merge.conf - -[jira] -# issues.apache.org JIRA credentials. Sadly, the jira instance doesn't offer -# token credentials. Ensure that the file is properly protected. -username=johnsmith -password=123456 diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py deleted file mode 100755 index 373ceb8e20f3..000000000000 --- a/dev/merge_arrow_pr.py +++ /dev/null @@ -1,610 +0,0 @@ -#!/usr/bin/env python - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Utility for creating well-formed pull request merges and pushing them to -# Apache. -# usage: ./merge_arrow_pr.py (see config env vars below) -# -# This utility assumes you already have a local Arrow git clone and that you -# have added remotes corresponding to both (i) the GitHub Apache Arrow mirror -# and (ii) the apache git repo. -# -# There are several pieces of authorization possibly needed via environment -# variables -# -# APACHE_JIRA_USERNAME: your Apache JIRA id -# APACHE_JIRA_PASSWORD: your Apache JIRA password -# ARROW_GITHUB_API_TOKEN: a GitHub API token to use for API requests (to avoid -# rate limiting) - -import configparser -import os -import pprint -import re -import subprocess -import sys -import requests -import getpass - -from six.moves import input -import six - -try: - import jira.client - import jira.exceptions -except ImportError: - print("Could not find jira library. " - "Run 'sudo pip install jira' to install.") - print("Exiting without trying to close the associated JIRA.") - sys.exit(1) - -# Remote name which points to the GitHub site -PR_REMOTE_NAME = os.environ.get("PR_REMOTE_NAME", "apache") - -# For testing to avoid accidentally pushing to apache -DEBUG = bool(int(os.environ.get("DEBUG", 0))) - - -if DEBUG: - print("**************** DEBUGGING ****************") - - -# Prefix added to temporary branches -BRANCH_PREFIX = "PR_TOOL" -JIRA_API_BASE = "https://issues.apache.org/jira" - - -def get_json(url, headers=None): - req = requests.get(url, headers=headers) - return req.json() - - -def run_cmd(cmd): - if isinstance(cmd, six.string_types): - cmd = cmd.split(' ') - - try: - output = subprocess.check_output(cmd) - except subprocess.CalledProcessError as e: - # this avoids hiding the stdout / stderr of failed processes - print('Command failed: %s' % cmd) - print('With output:') - print('--------------') - print(e.output) - print('--------------') - raise e - - if isinstance(output, six.binary_type): - output = output.decode('utf-8') - return output - - -original_head = run_cmd("git rev-parse HEAD")[:8] - - -def clean_up(): - print("Restoring head pointer to %s" % original_head) - run_cmd("git checkout %s" % original_head) - - branches = run_cmd("git branch").replace(" ", "").split("\n") - - for branch in [x for x in branches - if x.startswith(BRANCH_PREFIX)]: - print("Deleting local branch %s" % branch) - run_cmd("git branch -D %s" % branch) - - -_REGEX_CI_DIRECTIVE = re.compile(r'\[[^\]]*\]') - - -def strip_ci_directives(commit_message): - # Remove things like '[force ci]', '[skip appveyor]' from the assembled - # commit message - return _REGEX_CI_DIRECTIVE.sub('', commit_message) - - -def fix_version_from_branch(branch, versions): - # Note: Assumes this is a sorted (newest->oldest) list of un-released - # versions - if branch == "master": - return versions[-1] - else: - branch_ver = branch.replace("branch-", "") - return [x for x in versions if x.name.startswith(branch_ver)][-1] - - -# We can merge both ARROW and PARQUET patchesa -SUPPORTED_PROJECTS = ['ARROW', 'PARQUET'] -PR_TITLE_REGEXEN = [(project, re.compile(r'^(' + project + r'-[0-9]+)\b.*$')) - for project in SUPPORTED_PROJECTS] - - -class JiraIssue(object): - - def __init__(self, jira_con, jira_id, project, cmd): - self.jira_con = jira_con - self.jira_id = jira_id - self.project = project - self.cmd = cmd - - try: - self.issue = jira_con.issue(jira_id) - except Exception as e: - self.cmd.fail("ASF JIRA could not find %s\n%s" % (jira_id, e)) - - @property - def current_fix_versions(self): - return self.issue.fields.fixVersions - - def get_candidate_fix_versions(self, merge_branches=('master',)): - # Only suggest versions starting with a number, like 0.x but not JS-0.x - all_versions = self.jira_con.project_versions(self.project) - unreleased_versions = [x for x in all_versions - if not x.raw['released']] - - unreleased_versions = sorted(unreleased_versions, - key=lambda x: x.name, reverse=True) - - mainline_versions = self._filter_mainline_versions(unreleased_versions) - - mainline_non_patch_versions = [] - for v in mainline_versions: - (major, minor, patch) = v.name.split(".") - if patch == "0": - mainline_non_patch_versions.append(v) - - if len(mainline_versions) > len(mainline_non_patch_versions): - # If there is a non-patch release, suggest that instead - mainline_versions = mainline_non_patch_versions - - default_fix_versions = [ - fix_version_from_branch(x, mainline_versions).name - for x in merge_branches] - - return all_versions, default_fix_versions - - def _filter_mainline_versions(self, versions): - if self.project == 'PARQUET': - mainline_regex = re.compile(r'cpp-\d.*') - else: - mainline_regex = re.compile(r'\d.*') - - return [x for x in versions if mainline_regex.match(x.name)] - - def resolve(self, fix_versions, comment): - fields = self.issue.fields - cur_status = fields.status.name - - if cur_status == "Resolved" or cur_status == "Closed": - self.cmd.fail("JIRA issue %s already has status '%s'" - % (self.jira_id, cur_status)) - - if DEBUG: - print("JIRA issue %s untouched" % (self.jira_id)) - return - - resolve = [x for x in self.jira_con.transitions(self.jira_id) - if x['name'] == "Resolve Issue"][0] - - # ARROW-6915: do not overwrite existing fix versions corresponding to - # point releases - fix_versions = list(fix_versions) - fix_version_names = set(x['name'] for x in fix_versions) - for version in self.current_fix_versions: - major, minor, patch = version.name.split('.') - if patch != '0' and version.name not in fix_version_names: - fix_versions.append(version.raw) - - self.jira_con.transition_issue(self.jira_id, resolve["id"], - comment=comment, - fixVersions=fix_versions) - - print("Successfully resolved %s!" % (self.jira_id)) - - self.issue = self.jira_con.issue(self.jira_id) - self.show() - - def show(self): - fields = self.issue.fields - print(format_jira_output(self.jira_id, fields.status.name, - fields.summary, fields.assignee, - fields.components)) - - -def format_jira_output(jira_id, status, summary, assignee, components): - if assignee is None: - assignee = "NOT ASSIGNED!!!" - else: - assignee = assignee.displayName - - if len(components) == 0: - components = 'NO COMPONENTS!!!' - else: - components = ', '.join((x.name for x in components)) - - return """=== JIRA {} === -Summary\t\t{} -Assignee\t{} -Components\t{} -Status\t\t{} -URL\t\t{}/{}""".format(jira_id, summary, assignee, components, status, - '/'.join((JIRA_API_BASE, 'browse')), jira_id) - - -class GitHubAPI(object): - - def __init__(self, project_name): - self.github_api = ("https://api.github.com/repos/apache/{0}" - .format(project_name)) - - token = os.environ.get('ARROW_GITHUB_API_TOKEN', None) - if token: - self.headers = {'Authorization': 'token {0}'.format(token)} - else: - self.headers = None - - def get_pr_data(self, number): - return get_json("%s/pulls/%s" % (self.github_api, number), - headers=self.headers) - - -class CommandInput(object): - """ - Interface to input(...) to enable unit test mocks to be created - """ - - def fail(self, msg): - clean_up() - raise Exception(msg) - - def prompt(self, prompt): - return input(prompt) - - def getpass(self, prompt): - return getpass.getpass(prompt) - - def continue_maybe(self, prompt): - while True: - result = input("\n%s (y/n): " % prompt) - if result.lower() == "y": - return - elif result.lower() == "n": - self.fail("Okay, exiting") - else: - prompt = "Please input 'y' or 'n'" - - -class PullRequest(object): - - def __init__(self, cmd, github_api, git_remote, jira_con, number): - self.cmd = cmd - self.git_remote = git_remote - self.con = jira_con - self.number = number - self._pr_data = github_api.get_pr_data(number) - try: - self.url = self._pr_data["url"] - self.title = self._pr_data["title"] - self.body = self._pr_data["body"] - self.target_ref = self._pr_data["base"]["ref"] - self.user_login = self._pr_data["user"]["login"] - self.base_ref = self._pr_data["head"]["ref"] - except KeyError: - pprint.pprint(self._pr_data) - raise - self.description = "%s/%s" % (self.user_login, self.base_ref) - - self.jira_issue = self._get_jira() - - def show(self): - print("\n=== Pull Request #%s ===" % self.number) - print("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" - % (self.title, self.description, self.target_ref, self.url)) - if self.jira_issue is not None: - self.jira_issue.show() - else: - print("Minor PR. Please ensure it meets guidelines for minor.\n") - - @property - def is_merged(self): - return bool(self._pr_data["merged"]) - - @property - def is_mergeable(self): - return bool(self._pr_data["mergeable"]) - - def _get_jira(self): - jira_id = None - for project, regex in PR_TITLE_REGEXEN: - m = regex.search(self.title) - if m: - jira_id = m.group(1) - break - - if jira_id is None and not self.title.startswith("MINOR:"): - options = ' or '.join('{0}-XXX'.format(project) - for project in SUPPORTED_PROJECTS) - self.cmd.fail("PR title should be prefixed by a jira id " - "{0}, but found {1}".format(options, self.title)) - - return JiraIssue(self.con, jira_id, project, self.cmd) - - def merge(self): - """ - merge the requested PR and return the merge hash - """ - pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, self.number) - target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX, - self.number, - self.target_ref.upper()) - run_cmd("git fetch %s pull/%s/head:%s" % (self.git_remote, - self.number, - pr_branch_name)) - run_cmd("git fetch %s %s:%s" % (self.git_remote, self.target_ref, - target_branch_name)) - run_cmd("git checkout %s" % target_branch_name) - - had_conflicts = False - try: - run_cmd(['git', 'merge', pr_branch_name, '--ff', '--squash']) - except Exception as e: - msg = ("Error merging: %s\nWould you like to " - "manually fix-up this merge?" % e) - self.cmd.continue_maybe(msg) - msg = ("Okay, please fix any conflicts and 'git add' " - "conflicting files... Finished?") - self.cmd.continue_maybe(msg) - had_conflicts = True - - commit_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, - '--pretty=format:%an <%ae>']).split("\n") - distinct_authors = sorted(set(commit_authors), - key=lambda x: commit_authors.count(x), - reverse=True) - - for i, author in enumerate(distinct_authors): - print("Author {}: {}".format(i + 1, author)) - - if len(distinct_authors) > 1: - primary_author, distinct_authors = get_primary_author( - self.cmd, distinct_authors) - else: - # If there is only one author, do not prompt for a lead author - primary_author = distinct_authors[0] - - merge_message_flags = [] - - merge_message_flags += ["-m", self.title] - if self.body is not None: - merge_message_flags += ["-m", self.body] - - committer_name = run_cmd("git config --get user.name").strip() - committer_email = run_cmd("git config --get user.email").strip() - - authors = ("Authored-by:" if len(distinct_authors) == 1 - else "Lead-authored-by:") - authors += " %s" % (distinct_authors.pop(0)) - if len(distinct_authors) > 0: - authors += "\n" + "\n".join(["Co-authored-by: %s" % a - for a in distinct_authors]) - authors += "\n" + "Signed-off-by: %s <%s>" % (committer_name, - committer_email) - - if had_conflicts: - committer_name = run_cmd("git config --get user.name").strip() - committer_email = run_cmd("git config --get user.email").strip() - message = ("This patch had conflicts when merged, " - "resolved by\nCommitter: %s <%s>" % - (committer_name, committer_email)) - merge_message_flags += ["-m", message] - - # The string "Closes #%s" string is required for GitHub to correctly - # close the PR - merge_message_flags += [ - "-m", - "Closes #%s from %s" - % (self.number, self.description)] - merge_message_flags += ["-m", authors] - - if DEBUG: - print("\n".join(merge_message_flags)) - - run_cmd(['git', 'commit', - '--no-verify', # do not run commit hooks - '--author="%s"' % primary_author] + - merge_message_flags) - - self.cmd.continue_maybe("Merge complete (local ref %s). Push to %s?" - % (target_branch_name, self.git_remote)) - - try: - push_cmd = ('git push %s %s:%s' % (self.git_remote, - target_branch_name, - self.target_ref)) - if DEBUG: - print(push_cmd) - else: - run_cmd(push_cmd) - except Exception as e: - clean_up() - self.cmd.fail("Exception while pushing: %s" % e) - - merge_hash = run_cmd("git rev-parse %s" % target_branch_name)[:8] - clean_up() - print("Pull request #%s merged!" % self.number) - print("Merge hash: %s" % merge_hash) - return merge_hash - - -def get_primary_author(cmd, distinct_authors): - author_pat = re.compile(r'(.*) <(.*)>') - - while True: - primary_author = cmd.prompt( - "Enter primary author in the format of " - "\"name \" [%s]: " % distinct_authors[0]) - - if primary_author == "": - return distinct_authors[0], distinct_authors - - if author_pat.match(primary_author): - break - print('Bad author "{}", please try again'.format(primary_author)) - - # When primary author is specified manually, de-dup it from - # author list and put it at the head of author list. - distinct_authors = [x for x in distinct_authors - if x != primary_author] - distinct_authors = [primary_author] + distinct_authors - return primary_author, distinct_authors - - -def prompt_for_fix_version(cmd, jira_issue): - (all_versions, - default_fix_versions) = jira_issue.get_candidate_fix_versions() - - default_fix_versions = ",".join(default_fix_versions) - - issue_fix_versions = cmd.prompt("Enter comma-separated " - "fix version(s) [%s]: " - % default_fix_versions) - if issue_fix_versions == "": - issue_fix_versions = default_fix_versions - issue_fix_versions = issue_fix_versions.replace(" ", "").split(",") - - def get_version_json(version_str): - return [x for x in all_versions if x.name == version_str][0].raw - - return [get_version_json(v) for v in issue_fix_versions] - - -CONFIG_FILE = "~/.config/arrow/merge.conf" - - -def load_configuration(): - config = configparser.ConfigParser() - config.read(os.path.expanduser(CONFIG_FILE)) - return config - - -def get_credentials(cmd): - username, password = None, None - - config = load_configuration() - if "jira" in config.sections(): - username = config["jira"].get("username") - password = config["jira"].get("password") - - # Fallback to environment variables - if not username: - username = os.environ.get("APACHE_JIRA_USERNAME") - - if not password: - password = os.environ.get("APACHE_JIRA_PASSWORD") - - # Fallback to user tty prompt - if not username: - username = cmd.prompt("Env APACHE_JIRA_USERNAME not set, " - "please enter your JIRA username:") - - if not password: - password = cmd.getpass("Env APACHE_JIRA_PASSWORD not set, " - "please enter your JIRA password:") - - return (username, password) - - -def connect_jira(cmd): - try: - return jira.client.JIRA(options={'server': JIRA_API_BASE}, - basic_auth=get_credentials(cmd)) - except jira.exceptions.JIRAError as e: - if "CAPTCHA_CHALLENGE" in e.text: - print("") - print("It looks like you need to answer a captcha challenge for " - "this account (probably due to a login attempt with an " - "incorrect password). Please log in at " - "https://issues.apache.org/jira and complete the captcha " - "before running this tool again.") - print("Exiting.") - sys.exit(1) - raise e - - -def get_pr_num(): - if len(sys.argv) == 2: - return sys.argv[1] - - return input("Which pull request would you like to merge? (e.g. 34): ") - - -def cli(): - # Location of your Arrow git clone - ARROW_HOME = os.path.abspath(os.path.dirname(__file__)) - PROJECT_NAME = os.environ.get('ARROW_PROJECT_NAME') or 'arrow' - print("ARROW_HOME = " + ARROW_HOME) - print("PROJECT_NAME = " + PROJECT_NAME) - - cmd = CommandInput() - - pr_num = get_pr_num() - - os.chdir(ARROW_HOME) - - github_api = GitHubAPI(PROJECT_NAME) - - jira_con = connect_jira(cmd) - pr = PullRequest(cmd, github_api, PR_REMOTE_NAME, jira_con, pr_num) - - if pr.is_merged: - print("Pull request %s has already been merged") - sys.exit(0) - - if not pr.is_mergeable: - msg = ("Pull request %s is not mergeable in its current form.\n" - % pr_num + "Continue? (experts only!)") - cmd.continue_maybe(msg) - - pr.show() - - cmd.continue_maybe("Proceed with merging pull request #%s?" % pr_num) - - # merged hash not used - pr.merge() - - if pr.jira_issue is None: - print("Minor PR. No JIRA issue to update.\n") - return - - cmd.continue_maybe("Would you like to update the associated JIRA?") - jira_comment = ( - "Issue resolved by pull request %s\n[%s/%s]" - % (pr_num, - "https://github.com/apache/" + PROJECT_NAME + "/pull", - pr_num)) - - fix_versions_json = prompt_for_fix_version(cmd, pr.jira_issue) - pr.jira_issue.resolve(fix_versions_json, jira_comment) - - -if __name__ == '__main__': - try: - cli() - except Exception: - raise diff --git a/dev/merge_arrow_pr.sh b/dev/merge_arrow_pr.sh deleted file mode 100755 index 147f6c4bc0d1..000000000000 --- a/dev/merge_arrow_pr.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/sh - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Wrapper script that automatically creates a Python virtual environment -# and runs merge_arrow_pr.py inside it. - -set -e - -PYTHON=$(which python3) -PYVER=$($PYTHON -c "import sys; print('.'.join(map(str, sys.version_info[:2])))") - -GIT_ROOT=$(git rev-parse --show-toplevel) -ENV_DIR=$GIT_ROOT/dev/.venv$PYVER - -ENV_PYTHON=$ENV_DIR/bin/python3 -ENV_PIP="$ENV_PYTHON -m pip --no-input" - -check_venv() { - [ -x $ENV_PYTHON ] || { - echo "Virtual environment broken: $ENV_PYTHON not an executable" - exit 1 - } -} - -create_venv() { - echo "" - echo "Creating Python virtual environment in $ENV_DIR ..." - echo "" - $PYTHON -m venv $ENV_DIR - $ENV_PIP install -q -r $GIT_ROOT/dev/requirements_merge_arrow_pr.txt || { - echo "Failed to setup virtual environment" - echo "Please delete directory '$ENV_DIR' and try again" - exit $? - } -} - -[ -d $ENV_DIR ] || create_venv -check_venv - -$ENV_PYTHON $GIT_ROOT/dev/merge_arrow_pr.py "$@" diff --git a/dev/requirements_merge_arrow_pr.txt b/dev/requirements_merge_arrow_pr.txt deleted file mode 100644 index 7ac17dc1b193..000000000000 --- a/dev/requirements_merge_arrow_pr.txt +++ /dev/null @@ -1,3 +0,0 @@ -jira -requests -six diff --git a/dev/test_merge_arrow_pr.py b/dev/test_merge_arrow_pr.py deleted file mode 100644 index 8fe188350822..000000000000 --- a/dev/test_merge_arrow_pr.py +++ /dev/null @@ -1,317 +0,0 @@ -#!/usr/bin/env python - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from collections import namedtuple - -import pytest - -import merge_arrow_pr - - -FakeIssue = namedtuple('issue', ['fields']) -FakeFields = namedtuple('fields', ['status', 'summary', 'assignee', - 'components', 'fixVersions']) -FakeAssignee = namedtuple('assignee', ['displayName']) -FakeStatus = namedtuple('status', ['name']) -FakeComponent = namedtuple('component', ['name']) -FakeVersion = namedtuple('version', ['name', 'raw']) - -RAW_VERSION_JSON = [ - {'name': 'JS-0.4.0', 'released': False}, - {'name': '0.11.0', 'released': False}, - {'name': '0.12.0', 'released': False}, - {'name': '0.10.0', 'released': True}, - {'name': '0.9.0', 'released': True} -] - - -SOURCE_VERSIONS = [FakeVersion(raw['name'], raw) - for raw in RAW_VERSION_JSON] - -TRANSITIONS = [{'name': 'Resolve Issue', 'id': 1}] - -jira_id = 'ARROW-1234' -status = FakeStatus('In Progress') -fields = FakeFields(status, 'issue summary', FakeAssignee('groundhog'), - [FakeComponent('C++'), FakeComponent('Format')], - []) -FAKE_ISSUE_1 = FakeIssue(fields) - - -class FakeJIRA: - - def __init__(self, issue=None, project_versions=None, transitions=None, - current_fix_versions=None): - self._issue = issue - self._project_versions = project_versions - self._transitions = transitions - - def issue(self, jira_id): - return self._issue - - def transitions(self, jira_id): - return self._transitions - - def transition_issue(self, jira_id, transition_id, comment=None, - fixVersions=None): - self.captured_transition = { - 'jira_id': jira_id, - 'transition_id': transition_id, - 'comment': comment, - 'fixVersions': fixVersions - } - - def get_candidate_fix_versions(self): - return SOURCE_VERSIONS, ['0.12.0'] - - def project_versions(self, project): - return self._project_versions - - -class FakeCLI: - - def __init__(self, responses=()): - self.responses = responses - self.position = 0 - - def prompt(self, prompt): - response = self.responses[self.position] - self.position += 1 - return response - - def fail(self, msg): - raise Exception(msg) - - -def test_jira_fix_versions(): - jira = FakeJIRA(project_versions=SOURCE_VERSIONS, - transitions=TRANSITIONS) - - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) - all_versions, default_versions = issue.get_candidate_fix_versions() - assert all_versions == SOURCE_VERSIONS - assert default_versions == ['0.11.0'] - - -def test_jira_no_suggest_patch_release(): - versions_json = [ - {'name': '0.11.1', 'released': False}, - {'name': '0.12.0', 'released': False}, - ] - - versions = [FakeVersion(raw['name'], raw) for raw in versions_json] - - jira = FakeJIRA(project_versions=versions, transitions=TRANSITIONS) - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) - all_versions, default_versions = issue.get_candidate_fix_versions() - assert all_versions == versions - assert default_versions == ['0.12.0'] - - -def test_jira_parquet_no_suggest_non_cpp(): - # ARROW-7351 - versions_json = [ - {'name': 'cpp-1.5.0', 'released': True}, - {'name': 'cpp-1.6.0', 'released': False}, - {'name': 'cpp-1.7.0', 'released': False}, - {'name': '1.11.0', 'released': False}, - {'name': '1.12.0', 'released': False} - ] - - versions = [FakeVersion(raw['name'], raw) - for raw in versions_json] - - jira = FakeJIRA(project_versions=versions, transitions=TRANSITIONS) - issue = merge_arrow_pr.JiraIssue(jira, 'PARQUET-1713', 'PARQUET', - FakeCLI()) - all_versions, default_versions = issue.get_candidate_fix_versions() - assert all_versions == versions - assert default_versions == ['cpp-1.6.0'] - - -def test_jira_invalid_issue(): - class Mock: - - def issue(self, jira_id): - raise Exception("not found") - - with pytest.raises(Exception): - merge_arrow_pr.JiraIssue(Mock(), 'ARROW-1234', 'ARROW', FakeCLI()) - - -def test_jira_resolve(): - jira = FakeJIRA(issue=FAKE_ISSUE_1, - project_versions=SOURCE_VERSIONS, - transitions=TRANSITIONS) - - my_comment = 'my comment' - fix_versions = [SOURCE_VERSIONS[1].raw] - - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) - issue.resolve(fix_versions, my_comment) - - assert jira.captured_transition == { - 'jira_id': 'ARROW-1234', - 'transition_id': 1, - 'comment': my_comment, - 'fixVersions': fix_versions - } - - -def test_jira_resolve_non_mainline(): - jira = FakeJIRA(issue=FAKE_ISSUE_1, - project_versions=SOURCE_VERSIONS, - transitions=TRANSITIONS) - - my_comment = 'my comment' - fix_versions = [SOURCE_VERSIONS[0].raw] - - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) - issue.resolve(fix_versions, my_comment) - - assert jira.captured_transition == { - 'jira_id': 'ARROW-1234', - 'transition_id': 1, - 'comment': my_comment, - 'fixVersions': fix_versions - } - - -def test_jira_resolve_released_fix_version(): - # ARROW-5083 - jira = FakeJIRA(issue=FAKE_ISSUE_1, - project_versions=SOURCE_VERSIONS, - transitions=TRANSITIONS) - - cmd = FakeCLI(responses=['0.9.0']) - fix_versions_json = merge_arrow_pr.prompt_for_fix_version(cmd, jira) - assert fix_versions_json == [RAW_VERSION_JSON[-1]] - - -def test_multiple_authors_bad_input(): - a0 = 'Jimbob Crawfish ' - a1 = 'Jarvis McCratchett ' - a2 = 'Hank Miller ' - distinct_authors = [a0, a1] - - cmd = FakeCLI(responses=['']) - primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author( - cmd, distinct_authors) - assert primary_author == a0 - assert new_distinct_authors == [a0, a1] - - cmd = FakeCLI(responses=['oops', a1]) - primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author( - cmd, distinct_authors) - assert primary_author == a1 - assert new_distinct_authors == [a1, a0] - - cmd = FakeCLI(responses=[a2]) - primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author( - cmd, distinct_authors) - assert primary_author == a2 - assert new_distinct_authors == [a2, a0, a1] - - -def test_jira_already_resolved(): - status = FakeStatus('Resolved') - fields = FakeFields(status, 'issue summary', FakeAssignee('groundhog'), - [FakeComponent('Java')], []) - issue = FakeIssue(fields) - - jira = FakeJIRA(issue=issue, - project_versions=SOURCE_VERSIONS, - transitions=TRANSITIONS) - - fix_versions = [SOURCE_VERSIONS[0].raw] - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) - - with pytest.raises(Exception, - match="ARROW-1234 already has status 'Resolved'"): - issue.resolve(fix_versions, "") - - -def test_no_unset_point_release_fix_version(): - # ARROW-6915: We have had the problem of issues marked with a point release - # having their fix versions overwritten by the merge tool. This verifies - # that existing patch release versions are carried over - status = FakeStatus('In Progress') - - versions_json = { - '0.14.2': {'name': '0.14.2', 'id': 1}, - '0.15.1': {'name': '0.15.1', 'id': 2}, - '0.16.0': {'name': '0.16.0', 'id': 3}, - '0.17.0': {'name': '0.17.0', 'id': 4} - } - - fields = FakeFields(status, 'summary', FakeAssignee('someone'), - [FakeComponent('Java')], - [FakeVersion(v, versions_json[v]) - for v in ['0.17.0', '0.15.1', '0.14.2']]) - issue = FakeIssue(fields) - - jira = FakeJIRA(issue=issue, project_versions=SOURCE_VERSIONS, - transitions=TRANSITIONS) - - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) - issue.resolve([versions_json['0.16.0']], "a comment") - - assert jira.captured_transition == { - 'jira_id': 'ARROW-1234', - 'transition_id': 1, - 'comment': 'a comment', - 'fixVersions': [versions_json[v] - for v in ['0.16.0', '0.15.1', '0.14.2']] - } - - issue.resolve([versions_json['0.15.1']], "a comment") - - assert jira.captured_transition == { - 'jira_id': 'ARROW-1234', - 'transition_id': 1, - 'comment': 'a comment', - 'fixVersions': [versions_json[v] for v in ['0.15.1', '0.14.2']] - } - - -def test_jira_output_no_components(): - # ARROW-5472 - status = 'Interesting work' - components = [] - output = merge_arrow_pr.format_jira_output( - 'ARROW-1234', 'Resolved', status, FakeAssignee('Foo Bar'), - components) - - assert output == """=== JIRA ARROW-1234 === -Summary\t\tInteresting work -Assignee\tFoo Bar -Components\tNO COMPONENTS!!! -Status\t\tResolved -URL\t\thttps://issues.apache.org/jira/browse/ARROW-1234""" - - output = merge_arrow_pr.format_jira_output( - 'ARROW-1234', 'Resolved', status, FakeAssignee('Foo Bar'), - [FakeComponent('C++'), FakeComponent('Python')]) - - assert output == """=== JIRA ARROW-1234 === -Summary\t\tInteresting work -Assignee\tFoo Bar -Components\tC++, Python -Status\t\tResolved -URL\t\thttps://issues.apache.org/jira/browse/ARROW-1234""" diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 4a3092ec04d7..000000000000 --- a/docker-compose.yml +++ /dev/null @@ -1,1391 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Usage -# ----- -# -# The docker compose file is parametrized using environment variables, the -# defaults are set in .env file. -# -# Example: -# $ ARCH=arm64v8 docker-compose build ubuntu-cpp -# $ ARCH=arm64v8 docker-compose run ubuntu-cpp -# -# -# Coredumps -# --------- -# -# In order to enable coredumps for the C++ tests run by CTest either with -# command `make unittest` or `ctest --output-on-failure` the correct coredump -# patterns must be set. -# The kernel settings are coming from the host, so while it can be enabled from -# a running container using --priviled option the change will affect all other -# containers, so prefer setting it explicitly, directly on the host. -# WARNING: setting this will affect the host machine. -# -# Linux host: -# $ sudo sysctl -w kernel.core_pattern=core.%e.%p -# -# macOS host running Docker for Mac (won't persist between restarts): -# $ screen ~/Library/Containers/com.docker.docker/Data/vms/0/tty -# # echo "core.%e.%p" > /proc/sys/kernel/core_pattern -# -# The setup attempts to generate coredumps by default, but the correct paths -# above must be set. In order to disable the coredump generation set -# ULIMIT_CORE environment variable to 0 before running docker-compose -# (or by setting it in .env file): -# -# ULIMIT_CORE=0 docker-compose run --rm conda-cpp -# -# See more in cpp/build-support/run-test.sh::print_coredumps - -version: '3.5' - -x-ccache: &ccache - CCACHE_COMPILERCHECK: content - CCACHE_COMPRESS: 1 - CCACHE_COMPRESSLEVEL: 6 - CCACHE_MAXSIZE: 500M - CCACHE_DIR: /ccache - -x-with-gpus: - - ubuntu-cuda-cpp - - ubuntu-cuda-python - -x-hierarchy: - # This section is used by the archery tool to enable building nested images, - # so it is enough to call: - # archery run debian-ruby - # instead of a seguence of docker-compose commands: - # docker-compose build debian-cpp - # docker-compose build debian-c-glib - # docker-compose build debian-ruby - # docker-compose run --rm debian-ruby - # - # Each node must be either a string scalar of a list containing the - # descendant images if any. Archery checks that all node has a corresponding - # service entry, so any new image/service must be listed here. - - conda: - - conda-cpp: - - conda-cpp-hiveserver2 - - conda-cpp-valgrind - - conda-python: - - conda-python-pandas - - conda-python-dask - - conda-python-hdfs - - conda-python-jpype - - conda-python-turbodbc - - conda-python-kartothek - - conda-python-spark - - conda-integration - - debian-cpp: - - debian-c-glib: - - debian-ruby - - debian-python - - debian-go - - debian-java: - - debian-java-jni - - debian-js - - fedora-cpp: - - fedora-python - - ubuntu-cpp: - - ubuntu-c-glib: - - ubuntu-ruby - - ubuntu-lint - - ubuntu-python: - - ubuntu-docs - - ubuntu-python-sdist-test - - ubuntu-r - - ubuntu-cuda-cpp: - - ubuntu-cuda-python - - ubuntu-csharp - - ubuntu-cpp-sanitizer - - ubuntu-cpp-thread-sanitizer - - ubuntu-r-sanitizer - - python-sdist - - r - # helper services - - impala - - postgres - - python-wheel-manylinux-2010 - - python-wheel-manylinux-2014 - - python-wheel-manylinux-test-imports - - python-wheel-manylinux-test-unittests - - python-wheel-windows-vs2017 - - python-wheel-windows-test - -volumes: - conda-ccache: - name: ${ARCH}-conda-ccache - debian-ccache: - name: ${ARCH}-debian-${DEBIAN}-ccache - ubuntu-ccache: - name: ${ARCH}-ubuntu-${UBUNTU}-ccache - fedora-ccache: - name: ${ARCH}-fedora-${FEDORA}-ccache - debian-rust: - name: ${ARCH}-debian-${DEBIAN}-rust - maven-cache: - name: maven-cache - python-wheel-manylinux2010-ccache: - name: python-wheel-manylinux2010-ccache - python-wheel-manylinux2014-ccache: - name: python-wheel-manylinux2014-ccache - python-wheel-windows-clcache: - name: python-wheel-windows-clcache - -services: - - ################################# C++ ####################################### - # Release build: - # docker-compose run -e ARROW_BUILD_TYPE=release conda-cpp|debian-cpp|... - # Shared only: - # docker-compose run -e ARROW_BUILD_STATIC=OFF conda-cpp|debian-cpp|... - # Static only: - # docker-compose run \ - # -e ARROW_BUILD_SHARED=OFF \ - # -e ARROW_TEST_LINKAGE=static \ - # conda-cpp|debian-cpp|... - - conda: - # Base image for conda builds. - # - # Usage: - # docker-compose build con - # docker-compose run --rm conda - # Parameters: - # ARCH: amd64, arm32v7 - image: ${REPO}:${ARCH}-conda - build: - context: . - dockerfile: ci/docker/conda.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda - args: - arch: ${ARCH} - prefix: /opt/conda - volumes: - - .:/arrow:delegated - - conda-cpp: - # C++ build in conda environment, including the doxygen docs. - # - # Usage: - # docker-compose build conda - # docker-compose build conda-cpp - # docker-compose run --rm conda-cpp - # Parameters: - # ARCH: amd64, arm32v7 - image: ${REPO}:${ARCH}-conda-cpp - build: - context: . - dockerfile: ci/docker/conda-cpp.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-cpp - args: - repo: ${REPO} - arch: ${ARCH} - shm_size: &shm-size 2G - ulimits: &ulimits - core: ${ULIMIT_CORE} - environment: - <<: *ccache - ARROW_BUILD_BENCHMARKS: "ON" - ARROW_ENABLE_TIMING_TESTS: # inherit - ARROW_MIMALLOC: "ON" - ARROW_USE_LD_GOLD: "ON" - ARROW_USE_PRECOMPILED_HEADERS: "ON" - volumes: &conda-volumes - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}conda-ccache:/ccache:delegated - command: &cpp-conda-command - ["/arrow/ci/scripts/cpp_build.sh /arrow /build true && - /arrow/ci/scripts/cpp_test.sh /arrow /build"] - - conda-cpp-valgrind: - # Usage: - # docker-compose build conda - # docker-compose build conda-cpp - # docker-compose run --rm conda-cpp-valgrind - # Parameters: - # ARCH: amd64, arm32v7 - image: ${REPO}:${ARCH}-conda-cpp - build: - context: . - dockerfile: ci/docker/conda-cpp.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-cpp - args: - repo: ${REPO} - arch: ${ARCH} - prefix: /opt/conda - shm_size: *shm-size - environment: - <<: *ccache - ARROW_CXXFLAGS: "-Og" # Shrink test runtime by enabling minimal optimizations - ARROW_ENABLE_TIMING_TESTS: # inherit - ARROW_FLIGHT: "OFF" - ARROW_GANDIVA: "OFF" - ARROW_JEMALLOC: "OFF" - ARROW_RUNTIME_SIMD_LEVEL: "AVX2" # AVX512 not supported by Valgrind (ARROW-9851) - ARROW_S3: "OFF" - ARROW_TEST_MEMCHECK: "ON" - ARROW_USE_LD_GOLD: "ON" - BUILD_WARNING_LEVEL: "PRODUCTION" - volumes: *conda-volumes - command: *cpp-conda-command - - debian-cpp: - # Usage: - # docker-compose build debian-cpp - # docker-compose run --rm debian-cpp - # Parameters: - # ARCH: amd64, arm64v8, ... - # DEBIAN: 9, 10 - image: ${REPO}:${ARCH}-debian-${DEBIAN}-cpp - build: - context: . - dockerfile: ci/docker/debian-${DEBIAN}-cpp.dockerfile - cache_from: - - ${REPO}:${ARCH}-debian-${DEBIAN}-cpp - args: - arch: ${ARCH} - llvm: ${LLVM} - shm_size: *shm-size - ulimits: *ulimits - environment: - <<: *ccache - ARROW_ENABLE_TIMING_TESTS: # inherit - ARROW_MIMALLOC: "ON" - volumes: &debian-volumes - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}debian-ccache:/ccache:delegated - command: &cpp-command > - /bin/bash -c " - /arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/cpp_test.sh /arrow /build" - - ubuntu-cpp: - # Usage: - # docker-compose build ubuntu-cpp - # docker-compose run --rm ubuntu-cpp - # Parameters: - # ARCH: amd64, arm64v8, s390x, ... - # UBUNTU: 18.04, 20.04 - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp - build: - context: . - dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp - args: - arch: ${ARCH} - base: "${ARCH}/ubuntu:${UBUNTU}" - clang_tools: ${CLANG_TOOLS} - llvm: ${LLVM} - shm_size: *shm-size - ulimits: *ulimits - environment: - <<: *ccache - ARROW_ENABLE_TIMING_TESTS: # inherit - ARROW_MIMALLOC: "ON" - volumes: &ubuntu-volumes - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}ubuntu-ccache:/ccache:delegated - command: *cpp-command - - ubuntu-cuda-cpp: - # Usage: - # docker-compose build cuda-cpp - # docker-compose run --rm cuda-cpp - # Also need to edit the host docker configuration as follows: - # https://github.com/docker/compose/issues/6691#issuecomment-561504928 - # Parameters: - # ARCH: amd64 - # CUDA: 9.1, 10.0, 10.1 - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cuda-${CUDA}-cpp - build: - context: . - dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cuda-${CUDA}-cpp - args: - arch: ${ARCH} - base: nvidia/cuda:${CUDA}-devel-ubuntu${UBUNTU} - clang_tools: ${CLANG_TOOLS} - llvm: ${LLVM} - shm_size: *shm-size - ulimits: *ulimits - environment: - <<: *ccache - ARROW_CUDA: "ON" - volumes: *ubuntu-volumes - command: *cpp-command - - ubuntu-cpp-sanitizer: - # Usage: - # docker-compose build ubuntu-cpp-sanitizer - # docker-compose run --rm ubuntu-cpp-sanitizer - # Parameters: - # ARCH: amd64, arm64v8, ... - # UBUNTU: 18.04, 20.04 - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp - cap_add: - # For LeakSanitizer - - SYS_PTRACE - build: - context: . - dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp - args: - arch: ${ARCH} - clang_tools: ${CLANG_TOOLS} - llvm: ${LLVM} - shm_size: *shm-size - volumes: *ubuntu-volumes - environment: - <<: *ccache - CC: clang-${CLANG_TOOLS} - CXX: clang++-${CLANG_TOOLS} - ARROW_ENABLE_TIMING_TESTS: # inherit - ARROW_FUZZING: "ON" # Check fuzz regressions - ARROW_JEMALLOC: "OFF" - ARROW_ORC: "OFF" - ARROW_S3: "OFF" - ARROW_USE_ASAN: "ON" - ARROW_USE_UBSAN: "ON" - # utf8proc 2.1.0 in Ubuntu Bionic has test failures - utf8proc_SOURCE: "BUNDLED" - command: *cpp-command - - ubuntu-cpp-thread-sanitizer: - # Usage: - # docker-compose build ubuntu-cpp-thread-sanitizer - # docker-compose run --rm ubuntu-cpp-thread-sanitizer - # Parameters: - # ARCH: amd64, arm64v8, ... - # UBUNTU: 18.04, 20.04 - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp - build: - context: . - dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp - args: - arch: ${ARCH} - clang_tools: ${CLANG_TOOLS} - llvm: ${LLVM} - shm_size: *shm-size - volumes: *ubuntu-volumes - environment: - <<: *ccache - CC: clang-${CLANG_TOOLS} - CXX: clang++-${CLANG_TOOLS} - ARROW_ENABLE_TIMING_TESTS: # inherit - ARROW_DATASET: "ON" - ARROW_JEMALLOC: "OFF" - ARROW_ORC: "OFF" - ARROW_S3: "OFF" - ARROW_USE_TSAN: "ON" - command: *cpp-command - - fedora-cpp: - # Usage: - # docker-compose build fedora-cpp - # docker-compose run --rm fedora-cpp - # Parameters: - # ARCH: amd64, arm64v8, ... - # FEDORA: 33 - image: ${REPO}:${ARCH}-fedora-${FEDORA}-cpp - build: - context: . - dockerfile: ci/docker/fedora-${FEDORA}-cpp.dockerfile - cache_from: - - ${REPO}:${ARCH}-fedora-${FEDORA}-cpp - args: - arch: ${ARCH} - llvm: ${LLVM} - shm_size: *shm-size - ulimits: *ulimits - environment: - <<: *ccache - ARROW_ENABLE_TIMING_TESTS: # inherit - ARROW_MIMALLOC: "ON" - volumes: &fedora-volumes - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}fedora-ccache:/ccache:delegated - command: *cpp-command - - ############################### C GLib ###################################### - - debian-c-glib: - # Usage: - # docker-compose build debian-cpp - # docker-compose build debian-c-glib - # docker-compose run --rm debian-c-glib - # Parameters: - # ARCH: amd64, arm64v8, ... - # DEBIAN: 9, 10 - image: ${REPO}:${ARCH}-debian-${DEBIAN}-c-glib - build: - context: . - dockerfile: ci/docker/linux-apt-c-glib.dockerfile - cache_from: - - ${REPO}:${ARCH}-debian-${DEBIAN}-c-glib - args: - base: ${REPO}:${ARCH}-debian-${DEBIAN}-cpp - shm_size: *shm-size - ulimits: *ulimits - environment: - <<: *ccache - ARROW_GLIB_GTK_DOC: "true" - volumes: *debian-volumes - command: &c-glib-command > - /bin/bash -c " - /arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/c_glib_build.sh /arrow /build && - /arrow/ci/scripts/c_glib_test.sh /arrow /build" - - ubuntu-c-glib: - # Usage: - # docker-compose build ubuntu-cpp - # docker-compose build ubuntu-c-glib - # docker-compose run --rm ubuntu-c-glib - # Parameters: - # ARCH: amd64, arm64v8, ... - # UBUNTU: 18.04, 20.04 - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-c-glib - build: - context: . - dockerfile: ci/docker/linux-apt-c-glib.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-c-glib - args: - base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp - shm_size: *shm-size - ulimits: *ulimits - environment: - <<: *ccache - ARROW_GLIB_GTK_DOC: "true" - volumes: *ubuntu-volumes - command: *c-glib-command - - ############################### Ruby ######################################## - # Until Ruby is the only dependent implementation on top of C Glib we can - # test C Glib and Ruby in one pass. This is an optimization to avoid - # redundant (one for C GLib and one for Ruby doing the same work twice) - # builds on CI services. - - debian-ruby: - # Usage: - # docker-compose build debian-cpp - # docker-compose build debian-c-glib - # docker-compose build debian-ruby - # docker-compose run --rm debian-ruby - # Parameters: - # ARCH: amd64, arm64v8, ... - # DEBIAN: 9, 10 - image: ${REPO}:${ARCH}-debian-${DEBIAN}-ruby - build: - context: . - dockerfile: ci/docker/linux-apt-ruby.dockerfile - cache_from: - - ${REPO}:${ARCH}-debian-${DEBIAN}-ruby - args: - base: ${REPO}:${ARCH}-debian-${DEBIAN}-c-glib - shm_size: *shm-size - ulimits: *ulimits - environment: - <<: *ccache - volumes: *debian-volumes - command: &ruby-command > - /bin/bash -c " - /arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/c_glib_build.sh /arrow /build && - /arrow/ci/scripts/c_glib_test.sh /arrow /build && - /arrow/ci/scripts/ruby_test.sh /arrow /build" - - ubuntu-ruby: - # Usage: - # docker-compose build ubuntu-cpp - # docker-compose build ubuntu-c-glib - # docker-compose build ubuntu-ruby - # docker-compose run --rm ubuntu-ruby - # Parameters: - # ARCH: amd64, arm64v8, ... - # UBUNTU: 18.04, 20.04 - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-ruby - build: - context: . - dockerfile: ci/docker/linux-apt-ruby.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-ruby - args: - base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-c-glib - shm_size: *shm-size - ulimits: *ulimits - environment: - <<: *ccache - volumes: *ubuntu-volumes - command: *ruby-command - - ############################### Python ###################################### - - conda-python: - # Usage: - # docker-compose build conda - # docker-compose build conda-cpp - # docker-compose build conda-python - # docker-compose run --rm conda-python - # Parameters: - # ARCH: amd64, arm32v7 - # PYTHON: 3.6, 3.7, 3.8, 3.9 - image: ${REPO}:${ARCH}-conda-python-${PYTHON} - build: - context: . - dockerfile: ci/docker/conda-python.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-python-${PYTHON} - args: - repo: ${REPO} - arch: ${ARCH} - python: ${PYTHON} - shm_size: *shm-size - environment: - <<: *ccache - volumes: *conda-volumes - command: &python-conda-command - ["/arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/python_test.sh /arrow"] - - ubuntu-cuda-python: - # Usage: - # docker-compose build cuda-cpp - # docker-compose build cuda-python - # docker-compose run --rm cuda-python - # Parameters: - # ARCH: amd64 - # CUDA: 8.0, 10.0, ... - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cuda-${CUDA}-python-3 - build: - context: . - dockerfile: ci/docker/linux-apt-python-3.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cuda-${CUDA}-python-3 - args: - base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cuda-${CUDA}-cpp - shm_size: *shm-size - environment: - <<: *ccache - ARROW_CUDA: "ON" - volumes: *ubuntu-volumes - command: &python-command > - /bin/bash -c " - /arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/python_test.sh /arrow" - - debian-python: - # Usage: - # docker-compose build debian-cpp - # docker-compose build debian-python - # docker-compose run --rm debian-python - # Parameters: - # ARCH: amd64, arm64v8, ... - # DEBIAN: 9, 10 - image: ${REPO}:${ARCH}-debian-${DEBIAN}-python-3 - build: - context: . - dockerfile: ci/docker/linux-apt-python-3.dockerfile - cache_from: - - ${REPO}:${ARCH}-debian-${DEBIAN}-python-3 - args: - base: ${REPO}:${ARCH}-debian-${DEBIAN}-cpp - shm_size: *shm-size - environment: - <<: *ccache - volumes: *debian-volumes - command: *python-command - - ubuntu-python: - # Usage: - # docker-compose build ubuntu-cpp - # docker-compose build ubuntu-python - # docker-compose run --rm ubuntu-python - # Parameters: - # ARCH: amd64, arm64v8, ... - # UBUNTU: 18.04, 20.04 - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3 - build: - context: . - dockerfile: ci/docker/linux-apt-python-3.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3 - args: - base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp - shm_size: *shm-size - environment: - <<: *ccache - volumes: *ubuntu-volumes - command: *python-command - - fedora-python: - # Usage: - # docker-compose build fedora-cpp - # docker-compose build fedora-python - # docker-compose run --rm fedora-python - # Parameters: - # ARCH: amd64, arm64v8, ... - # FEDORA: 33 - image: ${REPO}:${ARCH}-fedora-${FEDORA}-python-3 - build: - context: . - dockerfile: ci/docker/linux-dnf-python-3.dockerfile - cache_from: - - ${REPO}:${ARCH}-fedora-${FEDORA}-python-3 - args: - base: ${REPO}:${ARCH}-fedora-${FEDORA}-cpp - shm_size: *shm-size - environment: - <<: *ccache - volumes: *fedora-volumes - command: *python-command - - ############################ Python sdist ################################### - - python-sdist: - # Usage: - # docker-compose build python-sdist - # docker-compose run --rm python-sdist - # Parameters: - # PYARROW_VERSION: The pyarrow version for sdist such as "3.0.0" - image: ${REPO}:python-sdist - build: - context: . - dockerfile: ci/docker/python-sdist.dockerfile - cache_from: - - ${REPO}:python-sdist - environment: - PYARROW_VERSION: ${PYARROW_VERSION:-} - volumes: - - .:/arrow:delegated - command: /arrow/ci/scripts/python_sdist_build.sh /arrow - - ubuntu-python-sdist-test: - # Usage: - # docker-compose build ubuntu-cpp - # docker-compose build ubuntu-python-sdist-test - # docker-compose run --rm ubuntu-python-sdist-test - # Parameters: - # ARCH: amd64, arm64v8, ... - # PYARROW_VERSION: The test target pyarrow version such as "3.0.0" - # UBUNTU: 18.04, 20.04 - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3 - build: - context: . - dockerfile: ci/docker/linux-apt-python-3.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3 - args: - base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp - shm_size: *shm-size - environment: - <<: *ccache - PYARROW_VERSION: ${PYARROW_VERSION:-} - volumes: *ubuntu-volumes - command: > - /bin/bash -c " - /arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/python_sdist_test.sh /arrow" - - ############################ Python wheels ################################## - - # See available versions at: - # https://quay.io/repository/pypa/manylinux2010_x86_64?tab=tags - # only amd64 arch is supported - python-wheel-manylinux-2010: - image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2010 - build: - args: - arch_alias: ${ARCH_ALIAS} - arch_short_alias: ${ARCH_SHORT_ALIAS} - base: quay.io/pypa/manylinux2010_${ARCH_ALIAS}:2020-12-03-912b0de - vcpkg: ${VCPKG} - python: ${PYTHON} - context: . - dockerfile: ci/docker/python-wheel-manylinux-201x.dockerfile - cache_from: - - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2010 - environment: - <<: *ccache - MANYLINUX_VERSION: 2010 - volumes: - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2010-ccache:/ccache:delegated - command: /arrow/ci/scripts/python_wheel_manylinux_build.sh - - # See available versions at: - # https://quay.io/repository/pypa/manylinux2014_x86_64?tab=tags - python-wheel-manylinux-2014: - image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014 - build: - args: - arch_alias: ${ARCH_ALIAS} - arch_short_alias: ${ARCH_SHORT_ALIAS} - base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2020-11-11-bc8ce45 - vcpkg: ${VCPKG} - python: ${PYTHON} - context: . - dockerfile: ci/docker/python-wheel-manylinux-201x.dockerfile - cache_from: - - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014 - environment: - <<: *ccache - MANYLINUX_VERSION: 2014 - volumes: - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated - command: /arrow/ci/scripts/python_wheel_manylinux_build.sh - - python-wheel-manylinux-test-imports: - image: ${ARCH}/python:${PYTHON} - shm_size: 2G - volumes: - - .:/arrow:delegated - command: /arrow/ci/scripts/python_wheel_manylinux_test.sh imports - - python-wheel-manylinux-test-unittests: - image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-test - build: - args: - arch: ${ARCH} - python: ${PYTHON} - context: . - dockerfile: ci/docker/python-wheel-manylinux-test.dockerfile - cache_from: - - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-test - shm_size: 2G - volumes: - - .:/arrow:delegated - command: /arrow/ci/scripts/python_wheel_manylinux_test.sh unittests - - python-wheel-windows-vs2017: - image: ${REPO}:python-${PYTHON}-wheel-windows-vs2017 - build: - args: - vcpkg: ${VCPKG} - python: ${PYTHON} - context: . - dockerfile: ci/docker/python-wheel-windows-vs2017.dockerfile - # This should make the pushed images reusable, but the image gets rebuilt. - # Uncomment if no local cache is available. - # cache_from: - # - mcr.microsoft.com/windows/servercore:ltsc2019 - # - ${REPO}:wheel-windows-vs2017 - volumes: - - "${DOCKER_VOLUME_PREFIX}python-wheel-windows-clcache:C:/clcache" - - type: bind - source: . - target: "C:/arrow" - command: arrow\\ci\\scripts\\python_wheel_windows_build.bat - - python-wheel-windows-test: - image: python:${PYTHON}-windowsservercore-1809 - volumes: - - type: bind - source: . - target: "C:/arrow" - command: arrow\\ci\\scripts\\python_wheel_windows_test.bat - - ############################## Integration ################################# - - conda-python-pandas: - # Possible $PANDAS parameters: - # - `latest`: latest release - # - `master`: git master branch, use `docker-compose run --no-cache` - # - ``: specific version available on conda-forge - # Usage: - # docker-compose build conda - # docker-compose build conda-cpp - # docker-compose build conda-python - # docker-compose build conda-python-pandas - # docker-compose run --rm conda-python-pandas - image: ${REPO}:${ARCH}-conda-python-${PYTHON}-pandas-${PANDAS} - build: - context: . - dockerfile: ci/docker/conda-python-pandas.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-python-${PYTHON}-pandas-${PANDAS} - args: - repo: ${REPO} - arch: ${ARCH} - python: ${PYTHON} - numpy: ${NUMPY} - pandas: ${PANDAS} - shm_size: *shm-size - environment: - <<: *ccache - volumes: *conda-volumes - command: *python-conda-command - - conda-python-dask: - # Possible $DASK parameters: - # - `latest`: latest release - # - `master`: git master branch, use `docker-compose run --no-cache` - # - ``: specific version available on conda-forge - # Usage: - # docker-compose build conda - # docker-compose build conda-cpp - # docker-compose build conda-python - # docker-compose build conda-python-dask - # docker-compose run --rm conda-python-dask - image: ${REPO}:${ARCH}-conda-python-${PYTHON}-dask-${DASK} - build: - context: . - dockerfile: ci/docker/conda-python-dask.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-python-${PYTHON}-dask-${DASK} - args: - repo: ${REPO} - arch: ${ARCH} - python: ${PYTHON} - dask: ${DASK} - shm_size: *shm-size - environment: - <<: *ccache - volumes: *conda-volumes - command: - ["/arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/integration_dask.sh"] - - conda-python-jpype: - # Usage: - # docker-compose build conda - # docker-compose build conda-cpp - # docker-compose build conda-python - # docker-compose build conda-python-jpype - # docker-compose run --rm conda-python-jpype - image: ${REPO}:${ARCH}-conda-python-${PYTHON}-jpype - build: - context: . - dockerfile: ci/docker/conda-python-jpype.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-python-${PYTHON}-jpype - args: - repo: ${REPO} - arch: ${ARCH} - python: ${PYTHON} - shm_size: *shm-size - environment: - <<: *ccache - ARROW_FLIGHT: "OFF" - ARROW_GANDIVA: "OFF" - volumes: *conda-volumes - command: - ["/arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/java_build.sh /arrow /build && - /arrow/ci/scripts/python_test.sh /arrow"] - - conda-python-turbodbc: - # Possible $TURBODBC parameters: - # - `latest`: latest release - # - `master`: git master branch, use `docker-compose run --no-cache` - # - ``: specific version available under github releases - # Usage: - # docker-compose build conda - # docker-compose build conda-cpp - # docker-compose build conda-python - # docker-compose build conda-python-turbodbc - # docker-compose run --rm conda-python-turbodbc - image: ${REPO}:${ARCH}-conda-python-${PYTHON}-turbodbc-${TURBODBC} - build: - context: . - dockerfile: ci/docker/conda-python-turbodbc.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-python-${PYTHON}-turbodbc-${TURBODBC} - args: - repo: ${REPO} - arch: ${ARCH} - python: ${PYTHON} - turbodbc: ${TURBODBC} - shm_size: *shm-size - environment: - <<: *ccache - volumes: *conda-volumes - command: - ["/arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/integration_turbodbc.sh /turbodbc /build"] - - conda-python-kartothek: - # Possible $KARTOTHEK parameters: - # - `latest`: latest release - # - `master`: git master branch, use `docker-compose run --no-cache` - # - ``: specific version available under github releases - # Usage: - # docker-compose build conda - # docker-compose build conda-cpp - # docker-compose build conda-python - # docker-compose build conda-python-kartothek - # docker-compose run --rm conda-python-kartothek - image: ${REPO}:${ARCH}-conda-python-${PYTHON}-kartothek-${KARTOTHEK} - build: - context: . - dockerfile: ci/docker/conda-python-kartothek.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-python-${PYTHON}-kartothek-${KARTOTHEK} - args: - repo: ${REPO} - arch: ${ARCH} - python: ${PYTHON} - kartothek: ${KARTOTHEK} - shm_size: *shm-size - environment: - <<: *ccache - volumes: *conda-volumes - command: - ["/arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/integration_kartothek.sh /kartothek /build"] - - ################################## R ######################################## - - ubuntu-r: - # Usage: - # docker-compose build ubuntu-cpp - # docker-compose build ubuntu-r - # docker-compose run ubuntu-r - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-r-${R} - build: - context: . - dockerfile: ci/docker/linux-apt-r.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-r-${R} - args: - arch: ${ARCH} - r: ${R} - base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp - shm_size: *shm-size - environment: - <<: *ccache - ARROW_R_CXXFLAGS: '-Werror' - LIBARROW_BUILD: 'false' - NOT_CRAN: 'true' - volumes: *ubuntu-volumes - command: > - /bin/bash -c " - /arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/r_test.sh /arrow" - - r: - # This lets you test building/installing the arrow R package - # (including building the C++ library) on any Docker image that contains R - # - # Usage: - # R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose build r - # R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose run r - image: ${REPO}:r-${R_ORG}-${R_IMAGE}-${R_TAG} - build: - context: . - dockerfile: ci/docker/linux-r.dockerfile - cache_from: - - ${REPO}:r-${R_ORG}-${R_IMAGE}-${R_TAG} - args: - base: ${R_ORG}/${R_IMAGE}:${R_TAG} - r_dev: ${ARROW_R_DEV} - devtoolset_version: ${DEVTOOLSET_VERSION} - shm_size: *shm-size - environment: - LIBARROW_DOWNLOAD: "false" - ARROW_SOURCE_HOME: "/arrow" - ARROW_R_DEV: ${ARROW_R_DEV} - # To test for CRAN release, delete ^^ these two env vars so we download the Apache release - ARROW_USE_PKG_CONFIG: "false" - devtoolset_version: ${DEVTOOLSET_VERSION} - volumes: - - .:/arrow:delegated - command: > - /bin/bash -c "/arrow/ci/scripts/r_test.sh /arrow" - - ubuntu-r-sanitizer: - # Only 18.04 and amd64 supported - # Usage: - # docker-compose build ubuntu-r-sanitizer - # docker-compose run ubuntu-r-sanitizer - image: ${REPO}:amd64-ubuntu-18.04-r-sanitizer - cap_add: - # LeakSanitizer and gdb requires ptrace(2) - - SYS_PTRACE - build: - context: . - dockerfile: ci/docker/linux-r.dockerfile - cache_from: - - ${REPO}:amd64-ubuntu-18.04-r-sanitizer - args: - base: wch1/r-debug:latest - r_bin: RDsan - environment: - <<: *ccache - volumes: *ubuntu-volumes - command: > - /bin/bash -c " - /arrow/ci/scripts/r_sanitize.sh /arrow" - - ################################# Go ######################################## - - debian-go: - # Usage: - # docker-compose build debian-go - # docker-compose run debian-go - image: ${REPO}:${ARCH}-debian-10-go-${GO} - build: - context: . - dockerfile: ci/docker/debian-10-go.dockerfile - cache_from: - - ${REPO}:${ARCH}-debian-10-go-${GO} - args: - arch: ${ARCH} - go: ${GO} - shm_size: *shm-size - volumes: *debian-volumes - command: &go-command > - /bin/bash -c " - /arrow/ci/scripts/go_build.sh /arrow && - /arrow/ci/scripts/go_test.sh /arrow" - - ############################# JavaScript #################################### - - debian-js: - # Usage: - # docker-compose build debian-js - # docker-compose run debian-js - image: ${REPO}:${ARCH}-debian-10-js-${NODE} - build: - context: . - dockerfile: ci/docker/debian-10-js.dockerfile - cache_from: - - ${REPO}:${ARCH}-debian-10-js-${NODE} - args: - arch: ${ARCH} - node: ${NODE} - shm_size: *shm-size - volumes: *debian-volumes - command: &js-command > - /bin/bash -c " - /arrow/ci/scripts/js_build.sh /arrow && - /arrow/ci/scripts/js_test.sh /arrow" - - #################################### C# ##################################### - - ubuntu-csharp: - # Usage: - # docker-compose build ubuntu-csharp - # docker-compose run ubuntu-csharp - image: ${REPO}:${ARCH}-ubuntu-18.04-csharp-${DOTNET} - build: - context: . - dockerfile: ci/docker/ubuntu-18.04-csharp.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-18.04-csharp-${DOTNET} - args: - dotnet: ${DOTNET} - platform: bionic # use bionic-arm64v8 for ARM - shm_size: *shm-size - volumes: *ubuntu-volumes - command: &csharp-command > - /bin/bash -c " - /arrow/ci/scripts/csharp_build.sh /arrow && - /arrow/ci/scripts/csharp_test.sh /arrow && - /arrow/ci/scripts/csharp_pack.sh /arrow" - - ################################ Java ####################################### - - debian-java: - # Usage: - # docker-compose build debian-java - # docker-compose run debian-java - image: ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN} - build: - context: . - dockerfile: ci/docker/debian-9-java.dockerfile - cache_from: - - ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN} - args: - arch: ${ARCH} - jdk: ${JDK} - maven: ${MAVEN} - shm_size: *shm-size - volumes: &java-volumes - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated - command: &java-command > - /bin/bash -c " - /arrow/ci/scripts/java_build.sh /arrow /build && - /arrow/ci/scripts/java_test.sh /arrow /build" - - debian-java-jni: - # Includes plasma test and jni for gandiva and orc. - # Usage: - # docker-compose build debian-java - # docker-compose build debian-java-jni - # docker-compose run debian-java-jni - image: ${REPO}:${ARCH}-debian-9-java-jni - build: - context: . - dockerfile: ci/docker/linux-apt-jni.dockerfile - cache_from: - - ${REPO}:${ARCH}-debian-9-java-jni - args: - base: ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN} - llvm: ${LLVM} - shm_size: *shm-size - environment: - <<: *ccache - volumes: - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated - - ${DOCKER_VOLUME_PREFIX}debian-ccache:/ccache:delegated - command: - /bin/bash -c " - /arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/java_build.sh /arrow /build && - /arrow/ci/scripts/java_test.sh /arrow /build" - - ############################## Integration ################################## - - conda-integration: - # Usage: - # docker-compose build conda-cpp - # docker-compose build conda-integration - # docker-compose run conda-integration - image: ${REPO}:${ARCH}-conda-integration - build: - context: . - dockerfile: ci/docker/conda-integration.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-integration - args: - repo: ${REPO} - arch: ${ARCH} - jdk: ${JDK} - # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should - # be set to ${MAVEN} - maven: 3.5 - node: ${NODE} - go: ${GO} - volumes: *conda-volumes - environment: - <<: *ccache - # tell archery where the arrow binaries are located - ARROW_CPP_EXE_PATH: /build/cpp/debug - command: - ["/arrow/ci/scripts/rust_build.sh /arrow /build && - /arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/go_build.sh /arrow && - /arrow/ci/scripts/java_build.sh /arrow /build && - /arrow/ci/scripts/js_build.sh /arrow /build && - /arrow/ci/scripts/integration_arrow.sh /arrow /build"] - - ################################ Docs ####################################### - - ubuntu-docs: - # Usage: - # docker-compose build ubuntu-cpp - # docker-compose build ubuntu-python - # docker-compose build ubuntu-docs - # docker-compose run --rm ubuntu-docs - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-docs - build: - context: . - dockerfile: ci/docker/linux-apt-docs.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-docs - args: - jdk: ${JDK} - node: ${NODE} - base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3 - environment: - <<: *ccache - ARROW_CUDA: "ON" - ARROW_GLIB_GTK_DOC: "true" - volumes: *ubuntu-volumes - command: &docs-command > - /bin/bash -c " - /arrow/ci/scripts/cpp_build.sh /arrow /build true && - /arrow/ci/scripts/c_glib_build.sh /arrow /build && - /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/java_build.sh /arrow /build true && - /arrow/ci/scripts/js_build.sh /arrow true && - /arrow/ci/scripts/r_build.sh /arrow true && - /arrow/ci/scripts/docs_build.sh /arrow /build" - - ################################# Tools ##################################### - - ubuntu-lint: - # Usage: - # docker-compose build ubuntu-cpp - # docker-compose build ubuntu-lint - # docker-compose run ubuntu-lint - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-lint - build: - context: . - dockerfile: ci/docker/linux-apt-lint.dockerfile - cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-lint - args: - base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp - clang_tools: ${CLANG_TOOLS} - rust: ${RUST} - environment: - <<: *ccache - volumes: *ubuntu-volumes - command: > - /bin/bash -c " - pip install -e /arrow/dev/archery && - archery lint --all --no-clang-tidy --no-iwyu --no-numpydoc" - - ######################### Integration Tests ################################# - - postgres: - # required for the impala service - image: postgres - ports: - - 5432:5432 - environment: - POSTGRES_PASSWORD: postgres - - impala: - # required for the hiveserver and hdfs tests - image: ibisproject/impala:latest - hostname: impala - links: - - postgres:postgres - environment: - PGPASSWORD: postgres - ports: - # HDFS - - 9020:9020 - - 50070:50070 - - 50075:50075 - - 8020:8020 - - 8042:8042 - # Hive - - 9083:9083 - # Impala - - 21000:21000 - - 21050:21050 - - 25000:25000 - - 25010:25010 - - 25020:25020 - - conda-cpp-hiveserver2: - # Usage: - # docker-compose build conda-cpp - # docker-compose build conda-cpp-hiveserver2 - # docker-compose run conda-cpp-hiveserver2 - image: ${REPO}:${ARCH}-conda-cpp - links: - - impala:impala - environment: - <<: *ccache - ARROW_FLIGHT: "OFF" - ARROW_GANDIVA: "OFF" - ARROW_PLASMA: "OFF" - ARROW_HIVESERVER2: "ON" - ARROW_HIVESERVER2_TEST_HOST: impala - shm_size: *shm-size - volumes: *conda-volumes - command: - ["/arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/integration_hiveserver2.sh /arrow /build"] - - conda-python-hdfs: - # Usage: - # docker-compose build conda-cpp - # docker-compose build conda-python - # docker-compose build conda-python-hdfs - # docker-compose run conda-python-hdfs - image: ${REPO}:${ARCH}-conda-python-${PYTHON}-hdfs-${HDFS} - build: - context: . - dockerfile: ci/docker/conda-python-hdfs.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-python-${PYTHON}-hdfs-${HDFS} - args: - repo: ${REPO} - arch: ${ARCH} - python: ${PYTHON} - jdk: ${JDK} - # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should - # be set to ${MAVEN} - maven: 3.5 - hdfs: ${HDFS} - links: - - impala:impala - environment: - <<: *ccache - ARROW_HDFS: "ON" - ARROW_HDFS_TEST_HOST: impala - ARROW_HDFS_TEST_PORT: 8020 - ARROW_HDFS_TEST_USER: hdfs - ARROW_S3: "OFF" - CMAKE_UNITY_BUILD: "ON" - shm_size: *shm-size - volumes: &conda-maven-volumes - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated - - ${DOCKER_VOLUME_PREFIX}conda-ccache:/ccache:delegated - command: - ["/arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/integration_hdfs.sh /arrow /build"] - - conda-python-spark: - # Usage: - # docker-compose build conda-cpp - # docker-compose build conda-python - # docker-compose build conda-python-spark - # docker-compose run conda-python-spark - image: ${REPO}:${ARCH}-conda-python-${PYTHON}-spark-${SPARK} - build: - context: . - dockerfile: ci/docker/conda-python-spark.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-python-${PYTHON}-spark-${SPARK} - args: - repo: ${REPO} - arch: ${ARCH} - python: ${PYTHON} - jdk: ${JDK} - # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should - # be set to ${MAVEN} - maven: 3.5 - spark: ${SPARK} - shm_size: *shm-size - environment: - <<: *ccache - volumes: *conda-maven-volumes - command: - ["/arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/java_build.sh /arrow /build && - /arrow/ci/scripts/integration_spark.sh /arrow /spark ${TEST_PYARROW_ONLY:-false}"]