forked from apache/tez
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtez_run_example.sh
More file actions
executable file
·148 lines (115 loc) · 4.46 KB
/
tez_run_example.sh
File metadata and controls
executable file
·148 lines (115 loc) · 4.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# This script is used to set up a local Hadoop and Tez environment for running a simple word count example.
# Prerequisites
# 1. java is installed and JAVA_HOME is set
# 2. ssh localhost works without password
# All parameters are optional:
# TEZ_VERSION: defaults to the latest version available on the Apache Tez download page
# HADOOP_VERSION: defaults to the version which belongs to the TEZ_VERSION
# TEZ_EXAMPLE_WORKING_DIR: defaults to the current working directory
# TEZ_VERSION comes from environment variable or is fetched from the Apache Tez download page
export TEZ_VERSION=${TEZ_VERSION:=$(curl -s "https://downloads.apache.org/tez/" | grep --color=never -o '[0-9]\+\.[0-9]\+\.[0-9]\+' | sed -n '/\/$/!p' | sort -V | tail -1)} # e.g. 0.10.4
export TEZ_EXAMPLE_WORKING_DIR=${TEZ_EXAMPLE_WORKING_DIR:=$PWD}
cd $TEZ_EXAMPLE_WORKING_DIR
echo "TEZ_VERSION: $TEZ_VERSION"
wget -nc https://archive.apache.org/dist/tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz
# Need to extract the Tez tarball early to get hadoop version it depends on
if [ ! -d "apache-tez-$TEZ_VERSION-bin" ]; then
tar -xzf apache-tez-$TEZ_VERSION-bin.tar.gz
fi
export HADOOP_VERSION=${HADOOP_VERSION:=$(basename apache-tez-$TEZ_VERSION-bin/lib/hadoop-hdfs-client-*.jar | sed -E 's/.*hadoop-hdfs-client-([0-9]+\.[0-9]+\.[0-9]+)\.jar/\1/')} # e.g. 3.4.1
cat <<EOF
***
*** Demo setup script is running in $TEZ_EXAMPLE_WORKING_DIR ***
*** TEZ version: $TEZ_VERSION
*** HADOOP version $HADOOP_VERSION
***
EOF
wget -nc https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
if [ ! -d "hadoop-$HADOOP_VERSION" ]; then
tar -xzf hadoop-$HADOOP_VERSION.tar.gz
fi
export HADOOP_HOME=$TEZ_EXAMPLE_WORKING_DIR/hadoop-$HADOOP_VERSION
export TEZ_HOME=$TEZ_EXAMPLE_WORKING_DIR/apache-tez-$TEZ_VERSION-bin
export HADOOP_CLASSPATH=$TEZ_HOME/*:$TEZ_HOME/lib/*:$TEZ_HOME/conf
export PATH=$PATH:$HADOOP_HOME/bin
# https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html#Pseudo-Distributed_Operation
cat <<EOF > $HADOOP_HOME/etc/hadoop/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
EOF
cat <<EOF > $HADOOP_HOME/etc/hadoop/core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
EOF
cat <<EOF > $HADOOP_HOME/etc/hadoop/yarn-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
EOF
# optionally stop previous clusters if any
$HADOOP_HOME/sbin/stop-dfs.sh
$HADOOP_HOME/sbin/stop-yarn.sh
rm -rf /tmp/hadoop-$USER/dfs/data
hdfs namenode -format -force
$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/sbin/start-yarn.sh
hadoop fs -mkdir -p /apps/tez-$TEZ_VERSION
hadoop fs -copyFromLocal $TEZ_HOME/share/tez.tar.gz /apps/tez-$TEZ_VERSION
# create a simple tez-site.xml
cat <<EOF > $TEZ_HOME/conf/tez-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>tez.lib.uris</name>
<value>/apps/tez-$TEZ_VERSION/tez.tar.gz</value>
</property>
</configuration>
EOF
# create a simple input file
cat <<EOF > ./words.txt
Apple
Banana
Car
Apple
Banana
Car
Dog
Elephant
Friend
Game
EOF
hadoop fs -copyFromLocal words.txt /words.txt
export HADOOP_USER_CLASSPATH_FIRST=true
# finally run the example
yarn jar $TEZ_HOME/tez-examples-$TEZ_VERSION.jar orderedwordcount /words.txt /words_out
# check the output
hadoop fs -ls /words_out
hadoop fs -text /words_out/part-v002-o000-r-00000
cat <<EOF
*** Since the environment is already set up, you can rerun the DAG using the commands below.
export HADOOP_USER_CLASSPATH_FIRST=true
export TEZ_HOME=$TEZ_EXAMPLE_WORKING_DIR/apache-tez-$TEZ_VERSION-bin
export HADOOP_CLASSPATH=$TEZ_HOME/*:$TEZ_HOME/lib/*:$TEZ_HOME/conf
$HADOOP_HOME/bin/yarn jar $TEZ_HOME/tez-examples-$TEZ_VERSION.jar orderedwordcount /words.txt /words_out
*** You can also visit some of the sites that are set up during the script execution.
Yarn RM: http://localhost:8088
HDFS NN: http://localhost:9870
EOF