-
Notifications
You must be signed in to change notification settings - Fork 35
Expand file tree
/
Copy pathbuild.sh
More file actions
executable file
·414 lines (386 loc) · 11.5 KB
/
build.sh
File metadata and controls
executable file
·414 lines (386 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
#!/bin/bash
set -e
# Environment variables used when building torch_musa
#
# TORCH_MUSA_ARCH_LIST
# specify which MUSA architectures to build for.
# ie 'TORCH_MUSA_ARCH_LIST="21;22"'
#
CUR_DIR=$(
cd $(dirname $0)
pwd
)
TORCH_MUSA_HOME=$CUR_DIR
PYTORCH_PATH=${PYTORCH_REPO_PATH:-$(realpath ${TORCH_MUSA_HOME}/../pytorch)}
TORCH_PATCHES_DIR=${TORCH_MUSA_HOME}/torch_patches/
KINETO_URL=${KINETO_URL:-https://github.com/MooreThreads/kineto.git}
KINETO_TAG=v2.7.0
BUILD_WHEEL=0
DEBUG_MODE=0
ASAN_MODE=0
BUILD_TORCH=1
BUILD_TORCH_MUSA=1
USE_KINETO=${USE_KINETO:-1}
ONLY_PATCH=0
CLEAN=0
COMPILE_FP64=1
PYTORCH_TAG=v2.7.1
PYTORCH_BUILD_VERSION="${PYTORCH_TAG:1}"
PYTORCH_BUILD_NUMBER=0 # This is used for official torch distribution.
USE_MCCL=${USE_MCCL:-1}
usage() {
echo -e "\033[1;32mThis script is used to build PyTorch and Torch_MUSA. \033[0m"
echo -e "\033[1;32mParameters usage: \033[0m"
echo -e "\033[32m --all : Means building both PyTorch and Torch_MUSA. \033[0m"
echo -e "\033[32m --fp64 : Means compiling fp64 data type in kernels using mcc in Torch_MUSA. \033[0m"
echo -e "\033[32m -m/--musa : Means building Torch_MUSA only. \033[0m"
echo -e "\033[32m -t/--torch : Means building original PyTorch only. \033[0m"
echo -e "\033[32m -d/--debug : Means building in debug mode. \033[0m"
echo -e "\033[32m -a/--asan : Means building in asan mode. \033[0m"
echo -e "\033[32m -c/--clean : Means cleaning everything that has been built. \033[0m"
echo -e "\033[32m -p/--patch : Means applying patches only. \033[0m"
echo -e "\033[32m -w/--wheel : Means generating wheel after building. \033[0m"
echo -e "\033[32m -n/--no_kineto : Disable kineto. \033[0m"
echo -e "\033[32m -h/--help : Help information. \033[0m"
}
# parse paremters
parameters=$(getopt -o +mtdacpwnh --long all,fp64,musa,torch,debug,asan,clean,patch,wheel,no_kineto,help, -n "$0" -- "$@")
[ $? -ne 0 ] && {
echo -e "\033[34mTry '$0 --help' for more information. \033[0m"
exit 1
}
eval set -- "$parameters"
while true; do
case "$1" in
--all)
BUILD_TORCH=1
BUILD_TORCH_MUSA=1
shift
;;
--fp64)
COMPILE_FP64=1
shift
;;
-m | --musa)
BUILD_TORCH_MUSA=1
BUILD_TORCH=0
shift
;;
-t | --torch)
BUILD_TORCH_MUSA=0
BUILD_TORCH=1
shift
;;
-d | --debug)
DEBUG_MODE=1
shift
;;
-a | --asan)
ASAN_MODE=1
shift
;;
-c | --clean)
CLEAN=1
shift
;;
-w | --wheel)
BUILD_WHEEL=1
shift
;;
-n | --no_kineto)
USE_KINETO=0
shift
;;
-p | --patch)
ONLY_PATCH=1
shift
;;
-h | --help)
usage
exit
;;
--)
shift
break
;;
*)
usage
exit 1
;;
esac
done
cmd_check(){
cmd="$1"
if command -v ${cmd} >/dev/null 2>&1; then
echo "- cmd exist : ${cmd}"
else
echo -e "\033[34m- cmd does not exist, automatically install \"${cmd}\"\033[0m"
pip install -r ${TORCH_MUSA_HOME}/requirements.txt # extra requirements
fi
}
precommit_install(){
cmd_check "pre-commit"
root_dir="$(dirname "$(realpath "${BASH_SOURCE:-$0}" )")"
if [ ! -f ${root_dir}/.git/hooks/pre-commit ]; then
pushd $root_dir
pre-commit install
popd
fi
}
precommit_install
clone_pytorch() {
# if PyTorch repo exists already, we skip gitting clone PyTorch
if [ -d ${PYTORCH_PATH} ]; then
echo -e "\033[34mPyTorch repo path is ${PYTORCH_PATH} ...\033[0m"
pushd ${PYTORCH_PATH}
git checkout ${PYTORCH_TAG}
echo -e "\033[34m Switch the Pytorch repo to tag ${PYTORCH_TAG} \033[0m"
popd
else
ABSOLUTE_PATH=$(cd $(dirname ${PYTORCH_PATH}) && pwd)"/pytorch"
echo -e "\033[34mUsing default pytorch repo path: ${ABSOLUTE_PATH}\033[0m"
if [ ! -d "${PYTORCH_PATH}" ]; then
pushd ${TORCH_MUSA_HOME}/..
echo -e "\033[34mPyTorch repo does not exist, now git clone PyTorch to ${ABSOLUTE_PATH} ...\033[0m"
git clone -b ${PYTORCH_TAG} https://github.com/pytorch/pytorch.git --depth=1
popd
fi
fi
# to make sure submodules are fetched
pushd ${PYTORCH_PATH}
update_submodule
}
apply_torch_patches() {
# apply patches into PyTorch
echo -e "\033[34mApplying patches to ${PYTORCH_PATH} ...\033[0m"
# clean PyTorch before patching
if [ -d "$PYTORCH_PATH/.git" ]; then
echo -e "\033[34mStash and checkout the PyTorch environment before patching. \033[0m"
pushd $PYTORCH_PATH
git stash -u
git checkout ${PYTORCH_TAG}
popd
fi
for file in $(find ${TORCH_PATCHES_DIR} -type f -not -path "*/kineto/*" -print); do
if [ "${file##*.}"x = "patch"x ]; then
echo -e "\033[34mapplying patch: $file \033[0m"
pushd $PYTORCH_PATH
git apply --check $file
git apply $file
popd
fi
done
if [ ${USE_KINETO} -eq 1 ]; then
for file in $(find ${TORCH_PATCHES_DIR}/kineto -type f -print); do
if [ "${file##*.}"x = "patch"x ]; then
echo -e "\033[34mapplying patch: $file \033[0m"
pushd $PYTORCH_PATH
git apply --check $file
git apply $file
popd
fi
done
fi
}
update_kineto_source() {
echo -e "\033[34mUpdating Kineto...\033[0m"
pushd ${PYTORCH_PATH}
# remove the current kineto
rm -rf ${PYTORCH_PATH}/third_party/kineto
git submodule update --init --recursive --depth 1
# remove the official kineto
rm -rf ${PYTORCH_PATH}/third_party/kineto
popd
echo -e "\033[34mUpdating KINETO_URL, might take a while...\033[0m"
if [ -d /home/kineto ]; then
pushd /home/kineto
git checkout ${KINETO_TAG}
git submodule update --init --recursive --depth 1
popd
cp -r /home/kineto ${PYTORCH_PATH}/third_party
else
git clone ${KINETO_URL} -b ${KINETO_TAG} --depth 1 --recursive ${PYTORCH_PATH}/third_party/kineto
fi
}
# Since the initial environment uses musa kineto by default, we should
# manually redirect torch kineto's url && commitid if `USE_KINETO=0`.
# Currently, it's only required for the internal testing purpose.
revert_torch_kineto() {
echo -e "\033[34mReverting to torch kineto...\033[0m"
echo -e "\033[34mRemoving mupti...\033[0m"
pushd ${PYTORCH_PATH}
rm -rf third_party/kineto
git submodule update --init --recursive third_party/kineto
popd
}
update_submodule() {
if [ -d ${PYTORCH_PATH}/third_party/kineto ]; then
pushd ${PYTORCH_PATH}/third_party/kineto
remote_url=$(git remote get-url origin)
current_tag=$(git describe --tags --always)
popd
if [ ${USE_KINETO} -eq 0 ]; then
if [ "${remote_url}" = "${KINETO_URL}" ]; then
rm -rf ${PYTORCH_PATH}/third_party/kineto
fi
pushd ${PYTORCH_PATH}
git submodule update --init --recursive --depth 1
popd
elif [ "${remote_url}" = "${KINETO_URL}" ]; then
pushd ${PYTORCH_PATH}/third_party/kineto
echo -e "\033[34mUpdating KINETO submodule, might take a while...\033[0m"
git submodule update --init --recursive
popd
if [ -d "/tmp/kineto" ]; then
rm -rf /tmp/kineto
fi
mv ${PYTORCH_PATH}/third_party/kineto /tmp
pushd ${PYTORCH_PATH}
git submodule update --init --recursive --depth 1
popd
rm -rf ${PYTORCH_PATH}/third_party/kineto
mv /tmp/kineto ${PYTORCH_PATH}/third_party
if [ "${current_tag}" != "${KINETO_TAG}" ]; then
echo -e "\033[34mUpdate the kineto to the [${KINETO_TAG}]\033[0m"
pushd ${PYTORCH_PATH}/third_party/kineto
git fetch origin tag ${KINETO_TAG}
git checkout ${KINETO_TAG}
popd
fi
else
update_kineto_source
fi
elif [ ${USE_KINETO} -eq 1 ]; then
update_kineto_source
else
pushd ${PYTORCH_PATH}
git submodule update --init --recursive --depth 1
popd
fi
}
build_pytorch() {
echo -e "\033[34mBuilding PyTorch...\033[0m"
status=0
if [ ! -d ${PYTORCH_PATH} ]; then
echo -e "\033[34mAn error occurred while building PyTorch, the specified PyTorch repo [${PYTORCH_PATH}] does not exist \033[0m"
exit 1
fi
pushd ${PYTORCH_PATH}
pip install -r requirements.txt
pip install -r ${TORCH_MUSA_HOME}/requirements.txt # extra requirements
if [ $BUILD_WHEEL -eq 1 ]; then
rm -rf dist
pip uninstall torch -y
PYTORCH_BUILD_NUMBER=${PYTORCH_BUILD_NUMBER} \
PYTORCH_BUILD_VERSION=${PYTORCH_BUILD_VERSION} \
DEBUG=${DEBUG_MODE} \
USE_ASAN=${ASAN_MODE} \
USE_KINETO=${USE_KINETO} \
BUILD_TEST=0 python setup.py bdist_wheel
status=$?
rm -rf torch.egg-info
pip install dist/*.whl
else
PYTORCH_BUILD_NUMBER=${PYTORCH_BUILD_NUMBER} \
PYTORCH_BUILD_VERSION=${PYTORCH_BUILD_VERSION} \
DEBUG=${DEBUG_MODE} \
USE_ASAN=${ASAN_MODE} \
USE_KINETO=${USE_KINETO} \
BUILD_TEST=0 python setup.py install
status=$?
fi
popd
return $status
}
clean_pytorch() {
echo -e "\033[34mCleaning PyTorch...\033[0m"
pushd ${PYTORCH_PATH}
python setup.py clean
popd
}
clean_torch_musa() {
echo -e "\033[34mCleaning torch_musa...\033[0m"
pushd ${TORCH_MUSA_HOME}
TORCH_DEVICE_BACKEND_AUTOLOAD=0 python setup.py clean
rm -rf $CUR_DIR/build
popd
}
build_torch_musa() {
echo -e "\033[34mBuilding torch_musa...\033[0m"
status=0
pushd ${TORCH_MUSA_HOME}
if [ $BUILD_WHEEL -eq 1 ]; then
rm -rf dist
TORCH_DEVICE_BACKEND_AUTOLOAD=0 \
PYTORCH_REPO_PATH=${PYTORCH_PATH} \
DEBUG=${DEBUG_MODE} \
USE_ASAN=${ASAN_MODE} \
ENABLE_COMPILE_FP64=${COMPILE_FP64} \
USE_MCCL=${USE_MCCL} \
USE_KINETO=${USE_KINETO} python setup.py bdist_wheel
status=$?
rm -rf torch_musa.egg-info
pip install dist/*.whl
else
TORCH_DEVICE_BACKEND_AUTOLOAD=0 \
PYTORCH_REPO_PATH=${PYTORCH_PATH} \
DEBUG=${DEBUG_MODE} \
USE_ASAN=${ASAN_MODE} \
ENABLE_COMPILE_FP64=${COMPILE_FP64} \
USE_MCCL=${USE_MCCL} \
USE_KINETO=${USE_KINETO} python setup.py install
status=$?
fi
if [ $status -ne 0 ]; then
exit $status
fi
# scan and output ops list for each building
bash ${CUR_DIR}/scripts/scan_ops.sh
popd
return $status
}
main() {
if [[ ${CLEAN} -eq 1 ]] && [[ ${BUILD_TORCH} -ne 1 ]] && [[ ${BUILD_TORCH_MUSA} -ne 1 ]]; then
clean_pytorch
clean_torch_musa
exit 0
fi
if [ ${ONLY_PATCH} -eq 1 ]; then
apply_torch_patches
exit 0
fi
if [ ${BUILD_TORCH} -eq 1 ]; then
clone_pytorch
if [ ${CLEAN} -eq 1 ]; then
clean_pytorch
fi
apply_torch_patches
build_pytorch
build_pytorch_status=$?
if [ $build_pytorch_status -ne 0 ]; then
clean_and_build="bash build.sh -c # Clean PyTorch/torch_musa and build"
echo -e "\033[31mBuilding PyTorch failed, please try cleaning first before building: \033[0m"
echo -e "\033[32m$clean_and_build \033[0m"
exit 1
fi
fi
if [ ${BUILD_TORCH_MUSA} -eq 1 ]; then
if [ ${CLEAN} -eq 1 ]; then
clean_torch_musa
fi
build_torch_musa
build_torch_musa_status=$?
if [ $build_torch_musa_status -ne 0 ]; then
echo -e "\033[31mPlease try the following commands once building torch_musa is failed: \033[0m"
echo -e "\033[32mClean PyTorch/torch_musa and build: \033[0m"
echo "cmd1: bash build.sh -c"
echo -e "\033[32mIf cmd1 still failed, update torch_musa to newest and build: \033[0m"
echo "cmd2: git fetch && git rebase origin/main && bash build.sh -c"
echo -e "\033[32mIf cmd2 still failed, update libraries and build: \033[0m"
echo "cmd3: bash docker/common/daily/update_daily_musart.sh && bash docker/common/daily/update_daily_mudnn.sh && bash build.sh -c"
echo -e "\033[32mIf cmd3 still failed, please check driver version on your host machine. \033[0m"
exit 1
fi
fi
}
main