@@ -140,7 +140,7 @@ def main():
140140 if not training_args .autotuner_benchmark :
141141 model = AutoModelForCausalLMPipe .from_pretrained (
142142 model_args .model_name_or_path ,
143- tensor_parallel_output = False ,
143+ tensor_parallel_output = True ,
144144 tensor_parallel_degree = training_args .tensor_parallel_degree ,
145145 tensor_parallel_rank = training_args .tensor_parallel_rank ,
146146 use_flash_attention = model_args .use_flash_attention ,
@@ -152,7 +152,7 @@ def main():
152152 # NOTE(gongenlei): new add autotuner_benchmark
153153 model_config = AutoConfig .from_pretrained (
154154 model_args .model_name_or_path ,
155- tensor_parallel_output = False ,
155+ tensor_parallel_output = True ,
156156 tensor_parallel_degree = training_args .tensor_parallel_degree ,
157157 tensor_parallel_rank = training_args .tensor_parallel_rank ,
158158 dtype = dtype ,
@@ -163,7 +163,7 @@ def main():
163163 else :
164164 model_config = AutoConfig .from_pretrained (
165165 model_args .model_name_or_path ,
166- tensor_parallel_output = False ,
166+ tensor_parallel_output = True ,
167167 tensor_parallel_degree = training_args .tensor_parallel_degree ,
168168 tensor_parallel_rank = training_args .tensor_parallel_rank ,
169169 dtype = dtype ,
0 commit comments