Skip to content
59 changes: 34 additions & 25 deletions docker/compose/docker-compose_hadoop284_hive233_spark244.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ services:
env_file:
- ./hadoop.env
healthcheck:
test: ["CMD", "curl", "-f", "http://namenode:50070"]
test: [ "CMD", "curl", "-f", "http://namenode:50070" ]
interval: 30s
timeout: 10s
retries: 3

datanode1:
image: apachehudi/hudi-hadoop_2.8.4-datanode:latest
container_name: datanode1
Expand All @@ -49,7 +49,7 @@ services:
- "namenode"
- "historyserver"
healthcheck:
test: ["CMD", "curl", "-f", "http://datanode1:50075"]
test: [ "CMD", "curl", "-f", "http://datanode1:50075" ]
interval: 30s
timeout: 10s
retries: 3
Expand All @@ -69,7 +69,7 @@ services:
ports:
- "58188:8188"
healthcheck:
test: ["CMD", "curl", "-f", "http://historyserver:8188"]
test: [ "CMD", "curl", "-f", "http://historyserver:8188" ]
interval: 30s
timeout: 10s
retries: 3
Expand All @@ -84,7 +84,7 @@ services:
- hive-metastore-postgresql:/var/lib/postgresql
hostname: hive-metastore-postgresql
container_name: hive-metastore-postgresql

hivemetastore:
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest
hostname: hivemetastore
Expand All @@ -100,7 +100,7 @@ services:
ports:
- "9083:9083"
healthcheck:
test: ["CMD", "nc", "-z", "hivemetastore", "9083"]
test: [ "CMD", "nc", "-z", "hivemetastore", "9083" ]
interval: 30s
timeout: 10s
retries: 3
Expand Down Expand Up @@ -201,25 +201,34 @@ services:
command: coordinator

presto-worker-1:
container_name: presto-worker-1
hostname: presto-worker-1
image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.217:latest
depends_on: ["presto-coordinator-1"]
environment:
- PRESTO_JVM_MAX_HEAP=512M
- PRESTO_QUERY_MAX_MEMORY=1GB
- PRESTO_QUERY_MAX_MEMORY_PER_NODE=256MB
- PRESTO_QUERY_MAX_TOTAL_MEMORY_PER_NODE=384MB
- PRESTO_MEMORY_HEAP_HEADROOM_PER_NODE=100MB
- TERM=xterm
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
command: worker
container_name: presto-worker-1
hostname: presto-worker-1
image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.217:latest
depends_on: [ "presto-coordinator-1" ]
environment:
- PRESTO_JVM_MAX_HEAP=512M
- PRESTO_QUERY_MAX_MEMORY=1GB
- PRESTO_QUERY_MAX_MEMORY_PER_NODE=256MB
- PRESTO_QUERY_MAX_TOTAL_MEMORY_PER_NODE=384MB
- PRESTO_MEMORY_HEAP_HEADROOM_PER_NODE=100MB
- TERM=xterm
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
command: worker

graphite:
container_name: graphite
hostname: graphite
image: graphiteapp/graphite-statsd
ports:
- 80:80
- 2003-2004:2003-2004
- 8126:8126

adhoc-1:
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest
Expand Down
28 changes: 28 additions & 0 deletions docker/emr_test_suite_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

sudo yum install -y docker
service docker start
docker run -d \
--name graphite \
-p 80:80 \
-p 2003-2004:2003-2004 \
-p 2023-2024:2023-2024 \
-p 8125:8125/udp \
-p 8126:8126 \
graphiteapp/graphite-statsd
106 changes: 71 additions & 35 deletions docker/generate_test_suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.

usage="
USAGE:
$(basename "$0") [--help] [--all boolen] -- Script to generate the test suite according to arguments provided and run these test suites.

where:
--help show this help text
--all set the seed value
--execute_test_suite flag if test need to execute (DEFAULT- true)
--medium_num_iterations number of medium iterations (DEFAULT- 20)
--long_num_iterations number of long iterations (DEFAULT- 30)
--intermittent_delay_mins delay after every test run (DEFAULT- 1)
--table_type hoodie table type to test (DEFAULT COPY_ON_WRITE)
--include_long_test_suite_yaml include long infra test suite (DEFAULT false)
--include_medium_test_suite_yaml include medium infra test suite (DEFAULT false)
--cluster_num_itr number of cluster iterations (DEFAULT 30)
--include_cluster_yaml include cluster infra test suite (DEFAULT false)
--include_cluster_yaml include cluster infra test suite (DEFAULT false)
--input_path input path for test in docker image (DEFAULT /user/hive/warehouse/hudi-integ-test-suite/input/)
--output_path input path for test in docker image (DEFAULT /user/hive/warehouse/hudi-integ-test-suite/output/)
"


MEDIUM_NUM_ITR=20
LONG_NUM_ITR=50
DELAY_MINS=1
Expand All @@ -39,6 +61,17 @@ do
key="$1"

case $key in
--help)
echo "$usage"
exit
;;
--all)
INCLUDE_LONG_TEST_SUITE="$2"
INCLUDE_MEDIUM_TEST_SUITE="$2"
INCLUDE_CLUSTER_YAML="$2"
shift # past argument
shift # past value
;;
--execute_test_suite)
EXECUTE_TEST_SUITE="$2"
shift # past argument
Expand Down Expand Up @@ -115,12 +148,15 @@ case $key in
;;
*) # unknown option
POSITIONAL+=("$1") # save it in an array for later
echo "Unknown argument provided - '$1'"
echo "$usage"
exit 0
shift # past argument
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters

echo "$POSITIONAL"
echo "Include Medium test suite $INCLUDE_MEDIUM_TEST_SUITE"
if $INCLUDE_MEDIUM_TEST_SUITE ; then
echo "Medium test suite iterations = ${MEDIUM_NUM_ITR}"
Expand Down Expand Up @@ -161,78 +197,78 @@ fi

cp demo/config/test-suite/templates/sanity.yaml.template demo/config/test-suite/staging/sanity.yaml

sed -i '' "s/NAME/$TABLE_TYPE/" demo/config/test-suite/staging/sanity.yaml
sed -i "s/NAME/$TABLE_TYPE/" demo/config/test-suite/staging/sanity.yaml

cp demo/config/test-suite/templates/test.properties.template demo/config/test-suite/staging/test.properties
sed -i '' "s/INPUT_PATH/$INPUT_PATH/" demo/config/test-suite/staging/test.properties
sed -i "s/INPUT_PATH/$INPUT_PATH/" demo/config/test-suite/staging/test.properties

cp demo/config/test-suite/templates/spark_command.txt.template demo/config/test-suite/staging/sanity_spark_command.sh

sed -i '' "s/JAR_NAME/$JAR_NAME/" demo/config/test-suite/staging/sanity_spark_command.sh
sed -i '' "s/INPUT_PATH/$INPUT_PATH/" demo/config/test-suite/staging/sanity_spark_command.sh
sed -i '' "s/OUTPUT_PATH/$OUTPUT_PATH/" demo/config/test-suite/staging/sanity_spark_command.sh
sed -i '' "s/input_yaml/sanity.yaml/" demo/config/test-suite/staging/sanity_spark_command.sh
sed -i '' "s/TABLE_TYPE/$TABLE_TYPE/" demo/config/test-suite/staging/sanity_spark_command.sh
sed -i "s/JAR_NAME/$JAR_NAME/" demo/config/test-suite/staging/sanity_spark_command.sh
sed -i "s/INPUT_PATH/$INPUT_PATH/" demo/config/test-suite/staging/sanity_spark_command.sh
sed -i "s/OUTPUT_PATH/$OUTPUT_PATH/" demo/config/test-suite/staging/sanity_spark_command.sh
sed -i "s/input_yaml/sanity.yaml/" demo/config/test-suite/staging/sanity_spark_command.sh
sed -i "s/TABLE_TYPE/$TABLE_TYPE/" demo/config/test-suite/staging/sanity_spark_command.sh

if $INCLUDE_MEDIUM_TEST_SUITE ; then

cp demo/config/test-suite/templates/medium_test_suite.yaml.template demo/config/test-suite/staging/medium_test_suite.yaml

sed -i '' "s/NAME/$TABLE_TYPE/" demo/config/test-suite/staging/medium_test_suite.yaml
sed -i '' "s/medium_num_iterations/$MEDIUM_NUM_ITR/" demo/config/test-suite/staging/medium_test_suite.yaml
sed -i '' "s/delay_in_mins/$DELAY_MINS/" demo/config/test-suite/staging/medium_test_suite.yaml
sed -i "s/NAME/$TABLE_TYPE/" demo/config/test-suite/staging/medium_test_suite.yaml
sed -i "s/medium_num_iterations/$MEDIUM_NUM_ITR/" demo/config/test-suite/staging/medium_test_suite.yaml
sed -i "s/delay_in_mins/$DELAY_MINS/" demo/config/test-suite/staging/medium_test_suite.yaml

cp demo/config/test-suite/templates/spark_command.txt.template demo/config/test-suite/staging/medium_test_suite_spark_command.sh

sed -i '' "s/JAR_NAME/$JAR_NAME/" demo/config/test-suite/staging/medium_test_suite_spark_command.sh
sed -i '' "s/INPUT_PATH/$INPUT_PATH/" demo/config/test-suite/staging/medium_test_suite_spark_command.sh
sed -i '' "s/OUTPUT_PATH/$OUTPUT_PATH/" demo/config/test-suite/staging/medium_test_suite_spark_command.sh
sed -i '' "s/input_yaml/medium_test_suite.yaml/" demo/config/test-suite/staging/medium_test_suite_spark_command.sh
sed -i '' "s/TABLE_TYPE/$TABLE_TYPE/" demo/config/test-suite/staging/medium_test_suite_spark_command.sh
sed -i "s/JAR_NAME/$JAR_NAME/" demo/config/test-suite/staging/medium_test_suite_spark_command.sh
sed -i "s/INPUT_PATH/$INPUT_PATH/" demo/config/test-suite/staging/medium_test_suite_spark_command.sh
sed -i "s/OUTPUT_PATH/$OUTPUT_PATH/" demo/config/test-suite/staging/medium_test_suite_spark_command.sh
sed -i "s/input_yaml/medium_test_suite.yaml/" demo/config/test-suite/staging/medium_test_suite_spark_command.sh
sed -i "s/TABLE_TYPE/$TABLE_TYPE/" demo/config/test-suite/staging/medium_test_suite_spark_command.sh

fi

if $INCLUDE_LONG_TEST_SUITE ; then

cp demo/config/test-suite/templates/long_test_suite.yaml.template demo/config/test-suite/staging/long_test_suite.yaml

sed -i '' "s/NAME/$TABLE_TYPE/" demo/config/test-suite/staging/long_test_suite.yaml
sed -i '' "s/long_num_iterations/$LONG_NUM_ITR/" demo/config/test-suite/staging/long_test_suite.yaml
sed -i '' "s/delay_in_mins/$DELAY_MINS/" demo/config/test-suite/staging/long_test_suite.yaml
sed -i "s/NAME/$TABLE_TYPE/" demo/config/test-suite/staging/long_test_suite.yaml
sed -i "s/long_num_iterations/$LONG_NUM_ITR/" demo/config/test-suite/staging/long_test_suite.yaml
sed -i "s/delay_in_mins/$DELAY_MINS/" demo/config/test-suite/staging/long_test_suite.yaml

cp demo/config/test-suite/templates/spark_command.txt.template demo/config/test-suite/staging/long_test_suite_spark_command.sh

sed -i '' "s/JAR_NAME/$JAR_NAME/" demo/config/test-suite/staging/long_test_suite_spark_command.sh
sed -i '' "s/INPUT_PATH/$INPUT_PATH/" demo/config/test-suite/staging/long_test_suite_spark_command.sh
sed -i '' "s/OUTPUT_PATH/$OUTPUT_PATH/" demo/config/test-suite/staging/long_test_suite_spark_command.sh
sed -i '' "s/input_yaml/long_test_suite.yaml/" demo/config/test-suite/staging/long_test_suite_spark_command.sh
sed -i '' "s/TABLE_TYPE/$TABLE_TYPE/" demo/config/test-suite/staging/long_test_suite_spark_command.sh
sed -i "s/JAR_NAME/$JAR_NAME/" demo/config/test-suite/staging/long_test_suite_spark_command.sh
sed -i "s/INPUT_PATH/$INPUT_PATH/" demo/config/test-suite/staging/long_test_suite_spark_command.sh
sed -i "s/OUTPUT_PATH/$OUTPUT_PATH/" demo/config/test-suite/staging/long_test_suite_spark_command.sh
sed -i "s/input_yaml/long_test_suite.yaml/" demo/config/test-suite/staging/long_test_suite_spark_command.sh
sed -i "s/TABLE_TYPE/$TABLE_TYPE/" demo/config/test-suite/staging/long_test_suite_spark_command.sh

fi

if $INCLUDE_CLUSTER_YAML ; then

cp demo/config/test-suite/templates/clustering.yaml.template demo/config/test-suite/staging/clustering.yaml

sed -i '' "s/NAME/$TABLE_TYPE/" demo/config/test-suite/staging/clustering.yaml
sed -i '' "s/clustering_num_iterations/$CLUSTER_NUM_ITR/" demo/config/test-suite/staging/clustering.yaml
sed -i '' "s/delay_in_mins/$CLUSTER_DELAY_MINS/" demo/config/test-suite/staging/clustering.yaml
sed -i '' "s/clustering_itr_count/$CLUSTER_ITR_COUNT/" demo/config/test-suite/staging/clustering.yaml
sed -i "s/NAME/$TABLE_TYPE/" demo/config/test-suite/staging/clustering.yaml
sed -i "s/clustering_num_iterations/$CLUSTER_NUM_ITR/" demo/config/test-suite/staging/clustering.yaml
sed -i "s/delay_in_mins/$CLUSTER_DELAY_MINS/" demo/config/test-suite/staging/clustering.yaml
sed -i "s/clustering_itr_count/$CLUSTER_ITR_COUNT/" demo/config/test-suite/staging/clustering.yaml

cp demo/config/test-suite/templates/spark_command.txt.template demo/config/test-suite/staging/clustering_spark_command.sh

sed -i '' "s/JAR_NAME/$JAR_NAME/" demo/config/test-suite/staging/clustering_spark_command.sh
sed -i '' "s/INPUT_PATH/$INPUT_PATH/" demo/config/test-suite/staging/clustering_spark_command.sh
sed -i '' "s/OUTPUT_PATH/$OUTPUT_PATH/" demo/config/test-suite/staging/clustering_spark_command.sh
sed -i '' "s/input_yaml/clustering.yaml/" demo/config/test-suite/staging/clustering_spark_command.sh
sed -i '' "s/TABLE_TYPE/$TABLE_TYPE/" demo/config/test-suite/staging/clustering_spark_command.sh
sed -i '' "/use-deltastreamer/d" demo/config/test-suite/staging/clustering_spark_command.sh
sed -i "s/JAR_NAME/$JAR_NAME/" demo/config/test-suite/staging/clustering_spark_command.sh
sed -i "s/INPUT_PATH/$INPUT_PATH/" demo/config/test-suite/staging/clustering_spark_command.sh
sed -i "s/OUTPUT_PATH/$OUTPUT_PATH/" demo/config/test-suite/staging/clustering_spark_command.sh
sed -i "s/input_yaml/clustering.yaml/" demo/config/test-suite/staging/clustering_spark_command.sh
sed -i "s/TABLE_TYPE/$TABLE_TYPE/" demo/config/test-suite/staging/clustering_spark_command.sh
sed -i "/use-deltastreamer/d" demo/config/test-suite/staging/clustering_spark_command.sh

fi

if $EXECUTE_TEST_SUITE ; then

docker cp $CUR_DIR/../packaging/hudi-integ-test-bundle/target/$JAR_NAME adhoc-2:/opt/
docker cp $CUR_DIR/../packaging/hudi-integ-test-bundle/target/"$JAR_NAME" adhoc-2:/opt/
docker exec -it adhoc-2 /bin/bash rm -rf /opt/staging*
docker cp demo/config/test-suite/staging/ adhoc-2:/opt/
docker exec -it adhoc-2 /bin/bash echo "\n============================== Executing sanity test suite ============================== "
Expand Down
Loading