diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h index 3ab220b0b6c12..27ac93f40330c 100644 --- a/onnxruntime/core/framework/session_options.h +++ b/onnxruntime/core/framework/session_options.h @@ -55,6 +55,7 @@ struct SessionOptions { TransformerLevel graph_optimization_level = TransformerLevel::Level1; // controls the size of the thread pool used to parallelize the execution of tasks within individual nodes (ops) + // if OpenMP is enabled, this configuration will be ignored int intra_op_num_threads = 0; // controls the size of the thread pool used to parallelize the execution of nodes (ops) diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index e18aa82de3826..fd4e96fb52301 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -102,8 +102,12 @@ InferenceSession::InferenceSession(const SessionOptions& session_options, : session_options_(session_options), graph_transformation_mgr_(session_options.max_num_graph_transformation_steps), logging_manager_(logging_manager), +#ifndef USE_OPENMP thread_pool_(concurrency::CreateThreadPool("intra_op_thread_pool", session_options.intra_op_num_threads)), +#else + thread_pool_(nullptr), +#endif inter_op_thread_pool_(session_options.execution_mode == ExecutionMode::ORT_PARALLEL ? concurrency::CreateThreadPool("inter_op_thread_pool", session_options.inter_op_num_threads) diff --git a/onnxruntime/test/perftest/README.md b/onnxruntime/test/perftest/README.md index ae8851d54613e..9f3cba19c02b7 100644 --- a/onnxruntime/test/perftest/README.md +++ b/onnxruntime/test/perftest/README.md @@ -32,7 +32,7 @@ Options: -v: Show verbose information. - -x: [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes. A value of 0 means the test will auto-select a default. Must >=0. + -x: [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes. A value of 0 means the test will auto-select a default. Must >=0. If OpenMP is enabled, this configuration will be ignored. -y: [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means the test will auto-select a default. Must >=0. diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 4b03aa7a12bd5..30c8937828507 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -41,7 +41,7 @@ namespace perftest { "\t-p [profile_file]: Specifies the profile name to enable profiling and dump the profile data to the file.\n" "\t-s: Show statistics result, like P75, P90.\n" "\t-v: Show verbose information.\n" - "\t-x [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes, A value of 0 means ORT will pick a default. Must >=0.\n" + "\t-x [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes, A value of 0 means ORT will pick a default. Must >=0. If OpenMP is enabled, this configuration will be ignored.\n" "\t-y [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means ORT will pick a default. Must >=0.\n" "\t-P: Use parallel executor instead of sequential executor.\n" "\t-o [optimization level]: Default is 1. Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all).\n" @@ -123,10 +123,15 @@ namespace perftest { test_config.run_config.f_verbose = true; break; case 'x': +#ifdef USE_OPENMP + fprintf(stderr, "cannot use argument '-x' when OpenMP is enabled.\n"); + return false; +#else test_config.run_config.intra_op_num_threads = static_cast(OrtStrtol(optarg, nullptr)); if (test_config.run_config.intra_op_num_threads < 0) { return false; } +#endif break; case 'y': test_config.run_config.inter_op_num_threads = static_cast(OrtStrtol(optarg, nullptr)); diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 5c8c5a8a99951..2e16451ae2488 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -82,7 +82,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else if (provider_name == onnxruntime::kAclExecutionProvider) { #ifdef USE_ACL Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ACL(session_options, - performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0)); + performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0)); #else ORT_THROW("Acl is not supported in this build\n"); #endif @@ -100,8 +100,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device else session_options.DisableMemPattern(); session_options.SetExecutionMode(performance_test_config.run_config.execution_mode); + +#ifndef USE_OPENMP fprintf(stdout, "Setting intra_op_num_threads to %d\n", performance_test_config.run_config.intra_op_num_threads); session_options.SetIntraOpNumThreads(performance_test_config.run_config.intra_op_num_threads); +#endif if (performance_test_config.run_config.execution_mode == ExecutionMode::ORT_PARALLEL) { fprintf(stdout, "Setting inter_op_num_threads to %d\n", performance_test_config.run_config.inter_op_num_threads);