6666 disable_dropout_in_model ,
6767 ensure_master_addr_port ,
6868 entropy_from_logits ,
69+ get_config_model_id ,
6970 identity ,
7071 nanmax ,
7172 nanmin ,
@@ -245,7 +246,7 @@ def __init__(
245246 ):
246247 # Args
247248 if args is None :
248- model_name = model if isinstance (model , str ) else model .config . _name_or_path
249+ model_name = model if isinstance (model , str ) else get_config_model_id ( model .config )
249250 model_name = model_name .split ("/" )[- 1 ]
250251 args = GRPOConfig (f"{ model_name } -GRPO" )
251252
@@ -270,7 +271,7 @@ def __init__(
270271 architecture = getattr (transformers , config .architectures [0 ])
271272 model = architecture .from_pretrained (model_id , ** model_init_kwargs )
272273 else :
273- model_id = model .config . _name_or_path
274+ model_id = get_config_model_id ( model .config )
274275 if args .model_init_kwargs is not None :
275276 logger .warning (
276277 "You passed `model_init_kwargs` to the `GRPOConfig`, but your model is already instantiated. "
@@ -290,7 +291,7 @@ def __init__(
290291
291292 # Processing class
292293 if processing_class is None :
293- processing_class = AutoProcessor .from_pretrained (model .config . _name_or_path , truncation_side = "left" )
294+ processing_class = AutoProcessor .from_pretrained (get_config_model_id ( model .config ) , truncation_side = "left" )
294295
295296 # Handle pad token for processors or tokenizers
296297 if isinstance (processing_class , ProcessorMixin ):
@@ -317,7 +318,7 @@ def __init__(
317318 reward_func , num_labels = 1 , ** model_init_kwargs
318319 )
319320 if isinstance (reward_funcs [i ], nn .Module ): # Use Module over PretrainedModel for compat w/ compiled models
320- self .reward_func_names .append (reward_funcs [i ].config . _name_or_path .split ("/" )[- 1 ])
321+ self .reward_func_names .append (get_config_model_id ( reward_funcs [i ].config ) .split ("/" )[- 1 ])
321322 else :
322323 self .reward_func_names .append (reward_funcs [i ].__name__ )
323324 self .reward_funcs = reward_funcs
@@ -347,7 +348,7 @@ def __init__(
347348 for i , (reward_processing_class , reward_func ) in enumerate (zip (reward_processing_classes , reward_funcs )):
348349 if isinstance (reward_func , PreTrainedModel ):
349350 if reward_processing_class is None :
350- reward_processing_class = AutoTokenizer .from_pretrained (reward_func .config . _name_or_path )
351+ reward_processing_class = AutoTokenizer .from_pretrained (get_config_model_id ( reward_func .config ) )
351352 if reward_processing_class .pad_token_id is None :
352353 reward_processing_class .pad_token = reward_processing_class .eos_token
353354 # The reward model computes the reward for the latest non-padded token in the input sequence.
0 commit comments