44# 1. java is installed and JAVA_HOME is set
55# 2. ssh localhost works without password
66
7- # configure this if needed, by default it will use the latest stable versions in the current directory
8- export TEZ_VERSION= $( curl -s " https://downloads.apache.org/tez/ " | grep -oP ' \K[0-9]+\.[0-9]+\.[0-9]+(?=/) ' | sort -V | tail -1 ) # e.g. 0.10.4
9- export HADOOP_VERSION= $( curl -s " https://downloads.apache.org/hadoop/common/ " | grep -oP ' hadoop-\K[0-9]+\.[0-9]+\.[0-9]+(?=/) ' | sort -V | tail -1 ) # e.g. 3.4.1
10- export HADOOP_STACK_HOME= $PWD
7+ # All parameters are optional:
8+ # TEZ_VERSION: defaults to the latest version available on the Apache Tez download page
9+ # HADOOP_VERSION: defaults to the version which belongs to the TEZ_VERSION
10+ # TEZ_EXAMPLE_WORKING_DIR: defaults to the current working directory
1111
12- echo " Demo script is running in $HADOOP_STACK_HOME with TEZ version $TEZ_VERSION and HADOOP version $HADOOP_VERSION "
12+ # TEZ_VERSION comes from environment variable or is fetched from the Apache Tez download page
13+ export TEZ_VERSION=${TEZ_VERSION:= $(curl -s " https://downloads.apache.org/tez/" | grep --color=never -o ' [0-9]\+\.[0-9]\+\.[0-9]\+' | sed -n ' /\/$/!p' | sort -V | tail -1)} # e.g. 0.10.4
14+ export TEZ_EXAMPLE_WORKING_DIR=${TEZ_EXAMPLE_WORKING_DIR:= $PWD }
15+ cd $TEZ_EXAMPLE_WORKING_DIR
1316
14- cd $HADOOP_STACK_HOME
15- wget -nc https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION /hadoop-$HADOOP_VERSION .tar.gz
17+ echo " TEZ_VERSION: $TEZ_VERSION "
1618wget -nc https://archive.apache.org/dist/tez/$TEZ_VERSION /apache-tez-$TEZ_VERSION -bin.tar.gz
1719
18- if [ ! -d " hadoop-$HADOOP_VERSION " ]; then
19- tar -xzf hadoop-$HADOOP_VERSION .tar.gz
20- fi
21-
20+ # Need to extract the Tez tarball early to get hadoop version it depends on
2221if [ ! -d " apache-tez-$TEZ_VERSION -bin" ]; then
2322 tar -xzf apache-tez-$TEZ_VERSION -bin.tar.gz
2423fi
2524
26- ln -s hadoop-$HADOOP_VERSION hadoop
27- ln -s apache-tez-$TEZ_VERSION -bin tez
25+ export HADOOP_VERSION=${HADOOP_VERSION:= $(basename apache-tez-$TEZ_VERSION -bin/ lib/ hadoop-hdfs-client-* .jar | sed -E ' s/.*hadoop-hdfs-client-([0-9]+\.[0-9]+\.[0-9]+)\.jar/\1/' )} # e.g. 3.4.1
26+
27+ cat << EOF
28+ ***
29+ *** Demo setup script is running in $TEZ_EXAMPLE_WORKING_DIR ***
30+ *** TEZ version: $TEZ_VERSION
31+ *** HADOOP version $HADOOP_VERSION
32+ ***
33+ EOF
34+
35+ wget -nc https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION /hadoop-$HADOOP_VERSION .tar.gz
36+
37+ if [ ! -d " hadoop-$HADOOP_VERSION " ]; then
38+ tar -xzf hadoop-$HADOOP_VERSION .tar.gz
39+ fi
2840
29- export HADOOP_HOME=$HADOOP_STACK_HOME /hadoop
30- export TEZ_HOME=$HADOOP_STACK_HOME / tez
41+ export HADOOP_HOME=$TEZ_EXAMPLE_WORKING_DIR /hadoop- $HADOOP_VERSION
42+ export TEZ_HOME=$TEZ_EXAMPLE_WORKING_DIR /apache- tez- $TEZ_VERSION -bin
3143export HADOOP_CLASSPATH=$TEZ_HOME /* :$TEZ_HOME /lib/* :$TEZ_HOME /conf
3244
3345export PATH=$PATH :$HADOOP_HOME /bin
@@ -70,16 +82,16 @@ cat <<EOF > $HADOOP_HOME/etc/hadoop/yarn-site.xml
7082EOF
7183
7284# optionally stop previous clusters if any
73- # $HADOOP_HOME/sbin/stop-dfs.sh
74- # $HADOOP_HOME/sbin/stop-yarn.sh
85+ $HADOOP_HOME /sbin/stop-dfs.sh
86+ $HADOOP_HOME /sbin/stop-yarn.sh
7587
76- hdfs namenode -format
88+ rm -rf /tmp/hadoop-$USER /dfs/data
89+ hdfs namenode -format -force
7790
7891$HADOOP_HOME /sbin/start-dfs.sh
7992$HADOOP_HOME /sbin/start-yarn.sh
8093
81- hadoop fs -mkdir /apps/
82- hadoop fs -mkdir /apps/tez-$TEZ_VERSION
94+ hadoop fs -mkdir -p /apps/tez-$TEZ_VERSION
8395hadoop fs -copyFromLocal $TEZ_HOME /share/tez.tar.gz /apps/tez-$TEZ_VERSION
8496
8597# create a simple tez-site.xml
111123
112124hadoop fs -copyFromLocal words.txt /words.txt
113125
126+ export HADOOP_USER_CLASSPATH_FIRST=true
114127# finally run the example
115- hadoop jar $TEZ_HOME /tez-examples-$TEZ_VERSION .jar orderedwordcount /words.txt /words_out
128+ yarn jar $TEZ_HOME /tez-examples-$TEZ_VERSION .jar orderedwordcount /words.txt /words_out
116129
117130# check the output
118131hadoop fs -ls /words_out
119- hadoop fs -text /words_out/part-v002-o000-r-00000
132+ hadoop fs -text /words_out/part-v002-o000-r-00000
133+
134+
135+ cat << EOF
136+ *** Since the environment is already set up, you can rerun the DAG using the commands below.
137+
138+ export HADOOP_USER_CLASSPATH_FIRST=true
139+ export TEZ_HOME=$TEZ_EXAMPLE_WORKING_DIR /apache-tez-$TEZ_VERSION -bin
140+ export HADOOP_CLASSPATH=$TEZ_HOME /*:$TEZ_HOME /lib/*:$TEZ_HOME /conf
141+ $HADOOP_HOME /bin/yarn jar $TEZ_HOME /tez-examples-$TEZ_VERSION .jar orderedwordcount /words.txt /words_out
142+
143+ *** You can also visit some of the sites that are set up during the script execution.
144+
145+ Yarn RM: http://localhost:8088
146+ HDFS NN: http://localhost:9870
147+
148+ EOF
0 commit comments