diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore index 2c574ff30d12..c35b1a7c1944 100644 --- a/benchmarks/.gitignore +++ b/benchmarks/.gitignore @@ -1,2 +1,3 @@ data -results \ No newline at end of file +results +venv diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh index 77779a12c450..d4de7faa5205 100755 --- a/benchmarks/bench.sh +++ b/benchmarks/bench.sh @@ -37,6 +37,7 @@ DATA_DIR=${DATA_DIR:-$SCRIPT_DIR/data} #CARGO_COMMAND=${CARGO_COMMAND:-"cargo run --release"} CARGO_COMMAND=${CARGO_COMMAND:-"cargo run --profile release-nonlto"} # for faster iterations PREFER_HASH_JOIN=${PREFER_HASH_JOIN:-true} +VIRTUAL_ENV=${VIRTUAL_ENV:-$SCRIPT_DIR/venv} usage() { echo " @@ -46,6 +47,7 @@ Usage: $0 data [benchmark] $0 run [benchmark] $0 compare +$0 venv ********** Examples: @@ -62,6 +64,7 @@ DATAFUSION_DIR=/source/datafusion ./bench.sh run tpch data: Generates or downloads data needed for benchmarking run: Runs the named benchmark compare: Compares results from benchmark runs +venv: Creates new venv (unless already exists) and installs compare's requirements into it ********** * Benchmarks @@ -84,7 +87,8 @@ DATA_DIR directory to store datasets CARGO_COMMAND command that runs the benchmark binary DATAFUSION_DIR directory to use (default $DATAFUSION_DIR) RESULTS_NAME folder where the benchmark files are stored -PREFER_HASH_JOIN Prefer hash join algorithm(default true) +PREFER_HASH_JOIN Prefer hash join algorithm (default true) +VENV_PATH Python venv to use for compare and venv commands (default ./venv, override by /bin/activate) " exit 1 } @@ -243,6 +247,9 @@ main() { compare) compare_benchmarks "$ARG2" "$ARG3" ;; + venv) + setup_venv + ;; "") usage ;; @@ -448,7 +455,7 @@ compare_benchmarks() { echo "--------------------" echo "Benchmark ${bench}" echo "--------------------" - python3 "${SCRIPT_DIR}"/compare.py "${RESULTS_FILE1}" "${RESULTS_FILE2}" + PATH=$VIRTUAL_ENV/bin:$PATH python3 "${SCRIPT_DIR}"/compare.py "${RESULTS_FILE1}" "${RESULTS_FILE2}" else echo "Note: Skipping ${RESULTS_FILE1} as ${RESULTS_FILE2} does not exist" fi @@ -456,5 +463,10 @@ compare_benchmarks() { } +setup_venv() { + python3 -m venv $VIRTUAL_ENV + PATH=$VIRTUAL_ENV/bin:$PATH python3 -m pip install -r requirements.txt +} + # And start the process up main diff --git a/benchmarks/compare.py b/benchmarks/compare.py index ec2b28fa0556..2574c0735ca8 100755 --- a/benchmarks/compare.py +++ b/benchmarks/compare.py @@ -29,7 +29,7 @@ from rich.console import Console from rich.table import Table except ImportError: - print("Try `pip install rich` for using this script.") + print("Couldn't import modules -- run `./bench.sh venv` first") raise diff --git a/benchmarks/requirements.txt b/benchmarks/requirements.txt new file mode 100644 index 000000000000..20a5a2bddbf2 --- /dev/null +++ b/benchmarks/requirements.txt @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +rich