diff --git a/dev-support/bin/tez_run_example.sh b/dev-support/bin/tez_run_example.sh new file mode 100755 index 0000000000..7e980b9484 --- /dev/null +++ b/dev-support/bin/tez_run_example.sh @@ -0,0 +1,148 @@ + +# This script is used to set up a local Hadoop and Tez environment for running a simple word count example. +# Prerequisites +# 1. java is installed and JAVA_HOME is set +# 2. ssh localhost works without password + +# All parameters are optional: +# TEZ_VERSION: defaults to the latest version available on the Apache Tez download page +# HADOOP_VERSION: defaults to the version which belongs to the TEZ_VERSION +# TEZ_EXAMPLE_WORKING_DIR: defaults to the current working directory + +# TEZ_VERSION comes from environment variable or is fetched from the Apache Tez download page +export TEZ_VERSION=${TEZ_VERSION:=$(curl -s "https://downloads.apache.org/tez/" | grep --color=never -o '[0-9]\+\.[0-9]\+\.[0-9]\+' | sed -n '/\/$/!p' | sort -V | tail -1)} # e.g. 0.10.4 +export TEZ_EXAMPLE_WORKING_DIR=${TEZ_EXAMPLE_WORKING_DIR:=$PWD} +cd $TEZ_EXAMPLE_WORKING_DIR + +echo "TEZ_VERSION: $TEZ_VERSION" +wget -nc https://archive.apache.org/dist/tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz + +# Need to extract the Tez tarball early to get hadoop version it depends on +if [ ! -d "apache-tez-$TEZ_VERSION-bin" ]; then + tar -xzf apache-tez-$TEZ_VERSION-bin.tar.gz +fi + +export HADOOP_VERSION=${HADOOP_VERSION:=$(basename apache-tez-$TEZ_VERSION-bin/lib/hadoop-hdfs-client-*.jar | sed -E 's/.*hadoop-hdfs-client-([0-9]+\.[0-9]+\.[0-9]+)\.jar/\1/')} # e.g. 3.4.1 + +cat < $HADOOP_HOME/etc/hadoop/hdfs-site.xml + + + + + + dfs.replication + 1 + + +EOF + +cat < $HADOOP_HOME/etc/hadoop/core-site.xml + + + + + + fs.defaultFS + hdfs://localhost:9000 + + +EOF + +cat < $HADOOP_HOME/etc/hadoop/yarn-site.xml + + + + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + +EOF + +# optionally stop previous clusters if any +$HADOOP_HOME/sbin/stop-dfs.sh +$HADOOP_HOME/sbin/stop-yarn.sh + +rm -rf /tmp/hadoop-$USER/dfs/data +hdfs namenode -format -force + +$HADOOP_HOME/sbin/start-dfs.sh +$HADOOP_HOME/sbin/start-yarn.sh + +hadoop fs -mkdir -p /apps/tez-$TEZ_VERSION +hadoop fs -copyFromLocal $TEZ_HOME/share/tez.tar.gz /apps/tez-$TEZ_VERSION + +# create a simple tez-site.xml +cat < $TEZ_HOME/conf/tez-site.xml + + + + + + tez.lib.uris + /apps/tez-$TEZ_VERSION/tez.tar.gz + + +EOF + +# create a simple input file +cat < ./words.txt +Apple +Banana +Car +Apple +Banana +Car +Dog +Elephant +Friend +Game +EOF + +hadoop fs -copyFromLocal words.txt /words.txt + +export HADOOP_USER_CLASSPATH_FIRST=true +# finally run the example +yarn jar $TEZ_HOME/tez-examples-$TEZ_VERSION.jar orderedwordcount /words.txt /words_out + +# check the output +hadoop fs -ls /words_out +hadoop fs -text /words_out/part-v002-o000-r-00000 + + +cat <