-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy patheval.sh
More file actions
50 lines (42 loc) · 1.32 KB
/
eval.sh
File metadata and controls
50 lines (42 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/bin/bash
MODEL=$1
PRECISION=$2
if [[ -z "$MODEL" ]]; then
echo "Usage: $0 <model> [precision]"
echo "Model options: state-spaces/mamba-2.8b, state-spaces/mamba2-2.7b"
echo "Precision options: fp16, w8a8, w4a8, w4a16"
echo "Note: If precision is omitted, model name must contain 'quamba'"
exit 1
fi
# Check if PRECISION is missing and MODEL is not "quamba"
if [[ -z "$PRECISION" && "$MODEL" != *"quamba"* ]]; then
echo "Error: Precision is required unless model contains 'quamba'"
exit 1
fi
CMD="python main.py $MODEL --batch_size 16 --eval_zero_shot --task_list lambada_openai --pretrained_dir ./pretrained_models --log_dir ./logs"
# Append group_heads flag if model is mamba2
if [[ "$MODEL" == *"mamba2"* ]]; then
CMD+=" --group_heads"
fi
if [[ "$MODEL" != *"quamba"* ]]; then
case $PRECISION in
fp16)
;;
w8a8)
# default --w_bits 8 --a_bits 8
CMD+=" --quantize --quantize_embedding --quantize_lm_head"
;;
w4a8)
CMD+=" --quantize --w_bits 4 --a_bits 8 --apply_gptq --quantize_embedding --quantize_lm_head"
;;
w4a16)
CMD+=" --quantize --w_bits 4 --a_bits 16 --apply_gptq --quantize_embedding --quantize_lm_head"
;;
*)
echo "Unsupported precision: $PRECISION"
exit 1
;;
esac
fi
echo "Running: $CMD"
eval $CMD