3636from paddleformers .trainer .trainer_utils import ShardingOption
3737from paddleformers .utils .log import logger
3838from paddleformers import __version__ as paddleformers_version
39+ from paddleformers .datasets .template .template import get_template_and_fix_tokenizer
3940
4041from ernie .configuration import Ernie4_5_MoeConfig
4142from ernie .modeling_moe import Ernie4_5_MoeForCausalLM
@@ -418,15 +419,39 @@ def run_eval(args: Optional[dict[str, Any]] = None) -> None:
418419 "encode_one_turn" : data_args .encode_one_turn ,
419420 "use_template" : data_args .use_template ,
420421 "is_pretraining" : True if model_args .stage .lower () == "pt" else False ,
422+ "truncate_packing" : data_args .truncate_packing ,
423+ "stage" : model_args .stage ,
424+ "is_valid" : False ,
425+ "template_backend" : data_args .template_backend ,
426+ "split_multi_turn" : data_args .split_multi_turn ,
421427 }
422- from paddleformers .datasets .finetuning import collate_fn
428+ dataset_config .update (
429+ {
430+ "template" : data_args .template ,
431+ "train_on_prompt" : False ,
432+ "tool_format" : None ,
433+ "default_system" : None ,
434+ "enable_thinking" : True ,
435+ }
436+ )
437+
438+ if dataset_config ["template_backend" ] == "custom" :
439+ template_instance = get_template_and_fix_tokenizer (dataset_config )
440+ else :
441+ template_instance = None
442+ dataset_config .update (
443+ {
444+ "template_instance" : template_instance ,
445+ }
446+ )
447+ from paddleformers .datasets .collate import collate_fn
423448
424449 if data_args .dataset_type == "map" :
425- from paddleformers .datasets .finetuning import (
450+ from paddleformers .datasets .loader import (
426451 create_indexed_dataset as create_dataset ,
427452 )
428453 else :
429- from paddleformers .datasets .finetuning import create_dataset
454+ from paddleformers .datasets .loader import create_dataset
430455 dataset_config .update (
431456 {
432457 "num_samples_each_epoch" : data_args .num_samples_each_epoch ,
@@ -440,11 +465,11 @@ def run_eval(args: Optional[dict[str, Any]] = None) -> None:
440465 eval_file_path = os .path .join (data_args .offline_dataset_path , "eval" )
441466 eval_dataset = create_dataset (data_file_prefix = eval_file_path )
442467 else :
468+ dataset_config ["is_valid" ] = True
443469 eval_dataset = create_dataset (
444470 task_group = data_args .eval_dataset_path ,
445471 task_group_prob = data_args .eval_dataset_prob ,
446472 sub_dataset_type = data_args .eval_dataset_type ,
447- is_valid = True ,
448473 ** dataset_config ,
449474 )
450475
0 commit comments