Skip to content

Commit bd94d8b

Browse files
committed
improvements + PR comments
1 parent 85bdf17 commit bd94d8b

1 file changed

Lines changed: 51 additions & 22 deletions

File tree

dev-support/bin/tez_run_example.sh

100644100755
Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,42 @@
44
# 1. java is installed and JAVA_HOME is set
55
# 2. ssh localhost works without password
66

7-
# configure this if needed, by default it will use the latest stable versions in the current directory
8-
export TEZ_VERSION=$(curl -s "https://downloads.apache.org/tez/" | grep -oP '\K[0-9]+\.[0-9]+\.[0-9]+(?=/)' | sort -V | tail -1) # e.g. 0.10.4
9-
export HADOOP_VERSION=$(curl -s "https://downloads.apache.org/hadoop/common/" | grep -oP 'hadoop-\K[0-9]+\.[0-9]+\.[0-9]+(?=/)' | sort -V | tail -1) # e.g. 3.4.1
10-
export HADOOP_STACK_HOME=$PWD
7+
# All parameters are optional:
8+
# TEZ_VERSION: defaults to the latest version available on the Apache Tez download page
9+
# HADOOP_VERSION: defaults to the version which belongs to the TEZ_VERSION
10+
# TEZ_EXAMPLE_WORKING_DIR: defaults to the current working directory
1111

12-
echo "Demo script is running in $HADOOP_STACK_HOME with TEZ version $TEZ_VERSION and HADOOP version $HADOOP_VERSION"
12+
# TEZ_VERSION comes from environment variable or is fetched from the Apache Tez download page
13+
export TEZ_VERSION=${TEZ_VERSION:=$(curl -s "https://downloads.apache.org/tez/" | grep --color=never -o '[0-9]\+\.[0-9]\+\.[0-9]\+' | sed -n '/\/$/!p' | sort -V | tail -1)} # e.g. 0.10.4
14+
export TEZ_EXAMPLE_WORKING_DIR=${TEZ_EXAMPLE_WORKING_DIR:=$PWD}
15+
cd $TEZ_EXAMPLE_WORKING_DIR
1316

14-
cd $HADOOP_STACK_HOME
15-
wget -nc https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
17+
echo "TEZ_VERSION: $TEZ_VERSION"
1618
wget -nc https://archive.apache.org/dist/tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz
1719

18-
if [ ! -d "hadoop-$HADOOP_VERSION" ]; then
19-
tar -xzf hadoop-$HADOOP_VERSION.tar.gz
20-
fi
21-
20+
# Need to extract the Tez tarball early to get hadoop version it depends on
2221
if [ ! -d "apache-tez-$TEZ_VERSION-bin" ]; then
2322
tar -xzf apache-tez-$TEZ_VERSION-bin.tar.gz
2423
fi
2524

26-
ln -s hadoop-$HADOOP_VERSION hadoop
27-
ln -s apache-tez-$TEZ_VERSION-bin tez
25+
export HADOOP_VERSION=${HADOOP_VERSION:=$(basename apache-tez-$TEZ_VERSION-bin/lib/hadoop-hdfs-client-*.jar | sed -E 's/.*hadoop-hdfs-client-([0-9]+\.[0-9]+\.[0-9]+)\.jar/\1/')} # e.g. 3.4.1
26+
27+
cat <<EOF
28+
***
29+
*** Demo setup script is running in $TEZ_EXAMPLE_WORKING_DIR ***
30+
*** TEZ version: $TEZ_VERSION
31+
*** HADOOP version $HADOOP_VERSION
32+
***
33+
EOF
34+
35+
wget -nc https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
36+
37+
if [ ! -d "hadoop-$HADOOP_VERSION" ]; then
38+
tar -xzf hadoop-$HADOOP_VERSION.tar.gz
39+
fi
2840

29-
export HADOOP_HOME=$HADOOP_STACK_HOME/hadoop
30-
export TEZ_HOME=$HADOOP_STACK_HOME/tez
41+
export HADOOP_HOME=$TEZ_EXAMPLE_WORKING_DIR/hadoop-$HADOOP_VERSION
42+
export TEZ_HOME=$TEZ_EXAMPLE_WORKING_DIR/apache-tez-$TEZ_VERSION-bin
3143
export HADOOP_CLASSPATH=$TEZ_HOME/*:$TEZ_HOME/lib/*:$TEZ_HOME/conf
3244

3345
export PATH=$PATH:$HADOOP_HOME/bin
@@ -70,16 +82,16 @@ cat <<EOF > $HADOOP_HOME/etc/hadoop/yarn-site.xml
7082
EOF
7183

7284
# optionally stop previous clusters if any
73-
#$HADOOP_HOME/sbin/stop-dfs.sh
74-
#$HADOOP_HOME/sbin/stop-yarn.sh
85+
$HADOOP_HOME/sbin/stop-dfs.sh
86+
$HADOOP_HOME/sbin/stop-yarn.sh
7587

76-
hdfs namenode -format
88+
rm -rf /tmp/hadoop-$USER/dfs/data
89+
hdfs namenode -format -force
7790

7891
$HADOOP_HOME/sbin/start-dfs.sh
7992
$HADOOP_HOME/sbin/start-yarn.sh
8093

81-
hadoop fs -mkdir /apps/
82-
hadoop fs -mkdir /apps/tez-$TEZ_VERSION
94+
hadoop fs -mkdir -p /apps/tez-$TEZ_VERSION
8395
hadoop fs -copyFromLocal $TEZ_HOME/share/tez.tar.gz /apps/tez-$TEZ_VERSION
8496

8597
# create a simple tez-site.xml
@@ -111,9 +123,26 @@ EOF
111123

112124
hadoop fs -copyFromLocal words.txt /words.txt
113125

126+
export HADOOP_USER_CLASSPATH_FIRST=true
114127
# finally run the example
115-
hadoop jar $TEZ_HOME/tez-examples-$TEZ_VERSION.jar orderedwordcount /words.txt /words_out
128+
yarn jar $TEZ_HOME/tez-examples-$TEZ_VERSION.jar orderedwordcount /words.txt /words_out
116129

117130
# check the output
118131
hadoop fs -ls /words_out
119-
hadoop fs -text /words_out/part-v002-o000-r-00000
132+
hadoop fs -text /words_out/part-v002-o000-r-00000
133+
134+
135+
cat <<EOF
136+
*** Since the environment is already set up, you can rerun the DAG using the commands below.
137+
138+
export HADOOP_USER_CLASSPATH_FIRST=true
139+
export TEZ_HOME=$TEZ_EXAMPLE_WORKING_DIR/apache-tez-$TEZ_VERSION-bin
140+
export HADOOP_CLASSPATH=$TEZ_HOME/*:$TEZ_HOME/lib/*:$TEZ_HOME/conf
141+
$HADOOP_HOME/bin/yarn jar $TEZ_HOME/tez-examples-$TEZ_VERSION.jar orderedwordcount /words.txt /words_out
142+
143+
*** You can also visit some of the sites that are set up during the script execution.
144+
145+
Yarn RM: http://localhost:8088
146+
HDFS NN: http://localhost:9870
147+
148+
EOF

0 commit comments

Comments
 (0)