From 8f534499dcf4d9a270236bdc4d8ccdfc1facfdb8 Mon Sep 17 00:00:00 2001 From: Yulong Wang Date: Tue, 26 Nov 2019 12:17:24 -0800 Subject: [PATCH 1/7] Force no thread pool creation when enabled OpenMP --- onnxruntime/core/session/inference_session.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index e18aa82de3826..b7d967fe11b31 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -102,12 +102,14 @@ InferenceSession::InferenceSession(const SessionOptions& session_options, : session_options_(session_options), graph_transformation_mgr_(session_options.max_num_graph_transformation_steps), logging_manager_(logging_manager), +#ifdef USE_OPENMP thread_pool_(concurrency::CreateThreadPool("intra_op_thread_pool", session_options.intra_op_num_threads)), inter_op_thread_pool_(session_options.execution_mode == ExecutionMode::ORT_PARALLEL ? concurrency::CreateThreadPool("inter_op_thread_pool", session_options.inter_op_num_threads) : nullptr), +#endif session_state_(execution_providers_, session_options.enable_mem_pattern && session_options.execution_mode == ExecutionMode::ORT_SEQUENTIAL, thread_pool_.get(), From f36221836b83dd3c7b62a9fb86ae99d28596b4de Mon Sep 17 00:00:00 2001 From: Yulong Wang Date: Tue, 26 Nov 2019 12:31:03 -0800 Subject: [PATCH 2/7] update comments and help messages --- onnxruntime/core/framework/session_options.h | 2 ++ onnxruntime/test/perftest/README.md | 4 ++-- onnxruntime/test/perftest/command_args_parser.cc | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h index 3ab220b0b6c12..b45589052c05b 100644 --- a/onnxruntime/core/framework/session_options.h +++ b/onnxruntime/core/framework/session_options.h @@ -55,10 +55,12 @@ struct SessionOptions { TransformerLevel graph_optimization_level = TransformerLevel::Level1; // controls the size of the thread pool used to parallelize the execution of tasks within individual nodes (ops) + // if OpenMP is enabled, this configuration will be ignored int intra_op_num_threads = 0; // controls the size of the thread pool used to parallelize the execution of nodes (ops) // configuring this makes sense only when you're using parallel executor + // if OpenMP is enabled, this configuration will be ignored int inter_op_num_threads = 0; // For models with free input dimensions (most commonly batch size), specifies a set of values to override those diff --git a/onnxruntime/test/perftest/README.md b/onnxruntime/test/perftest/README.md index ae8851d54613e..39dc0e83e1754 100644 --- a/onnxruntime/test/perftest/README.md +++ b/onnxruntime/test/perftest/README.md @@ -32,9 +32,9 @@ Options: -v: Show verbose information. - -x: [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes. A value of 0 means the test will auto-select a default. Must >=0. + -x: [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes. A value of 0 means the test will auto-select a default. Must >=0. If OpenMP is enabled, this configuration will be ignored. - -y: [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means the test will auto-select a default. Must >=0. + -y: [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means the test will auto-select a default. Must >=0. If OpenMP is enabled, this configuration will be ignored. -h: help. diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 4b03aa7a12bd5..9dbc69d114b3a 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -41,8 +41,8 @@ namespace perftest { "\t-p [profile_file]: Specifies the profile name to enable profiling and dump the profile data to the file.\n" "\t-s: Show statistics result, like P75, P90.\n" "\t-v: Show verbose information.\n" - "\t-x [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes, A value of 0 means ORT will pick a default. Must >=0.\n" - "\t-y [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means ORT will pick a default. Must >=0.\n" + "\t-x [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes, A value of 0 means ORT will pick a default. Must >=0. If OpenMP is enabled, this configuration will be ignored.\n" + "\t-y [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means ORT will pick a default. Must >=0. If OpenMP is enabled, this configuration will be ignored.\n" "\t-P: Use parallel executor instead of sequential executor.\n" "\t-o [optimization level]: Default is 1. Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all).\n" "\t\tPlease see onnxruntime_c_api.h (enum GraphOptimizationLevel) for the full list of all optimization levels. \n" From 43f51768e6bd4ba460bf7a985c803fef82ab87b4 Mon Sep 17 00:00:00 2001 From: Yulong Wang Date: Tue, 26 Nov 2019 13:42:09 -0800 Subject: [PATCH 3/7] apply to intra thread pool only --- onnxruntime/core/framework/session_options.h | 1 - onnxruntime/core/session/inference_session.cc | 2 +- onnxruntime/test/perftest/README.md | 2 +- onnxruntime/test/perftest/command_args_parser.cc | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h index b45589052c05b..27ac93f40330c 100644 --- a/onnxruntime/core/framework/session_options.h +++ b/onnxruntime/core/framework/session_options.h @@ -60,7 +60,6 @@ struct SessionOptions { // controls the size of the thread pool used to parallelize the execution of nodes (ops) // configuring this makes sense only when you're using parallel executor - // if OpenMP is enabled, this configuration will be ignored int inter_op_num_threads = 0; // For models with free input dimensions (most commonly batch size), specifies a set of values to override those diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index b7d967fe11b31..a35cb13f77688 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -105,11 +105,11 @@ InferenceSession::InferenceSession(const SessionOptions& session_options, #ifdef USE_OPENMP thread_pool_(concurrency::CreateThreadPool("intra_op_thread_pool", session_options.intra_op_num_threads)), +#endif inter_op_thread_pool_(session_options.execution_mode == ExecutionMode::ORT_PARALLEL ? concurrency::CreateThreadPool("inter_op_thread_pool", session_options.inter_op_num_threads) : nullptr), -#endif session_state_(execution_providers_, session_options.enable_mem_pattern && session_options.execution_mode == ExecutionMode::ORT_SEQUENTIAL, thread_pool_.get(), diff --git a/onnxruntime/test/perftest/README.md b/onnxruntime/test/perftest/README.md index 39dc0e83e1754..9f3cba19c02b7 100644 --- a/onnxruntime/test/perftest/README.md +++ b/onnxruntime/test/perftest/README.md @@ -34,7 +34,7 @@ Options: -x: [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes. A value of 0 means the test will auto-select a default. Must >=0. If OpenMP is enabled, this configuration will be ignored. - -y: [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means the test will auto-select a default. Must >=0. If OpenMP is enabled, this configuration will be ignored. + -y: [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means the test will auto-select a default. Must >=0. -h: help. diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 9dbc69d114b3a..7b4675dd60da8 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -42,7 +42,7 @@ namespace perftest { "\t-s: Show statistics result, like P75, P90.\n" "\t-v: Show verbose information.\n" "\t-x [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes, A value of 0 means ORT will pick a default. Must >=0. If OpenMP is enabled, this configuration will be ignored.\n" - "\t-y [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means ORT will pick a default. Must >=0. If OpenMP is enabled, this configuration will be ignored.\n" + "\t-y [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means ORT will pick a default. Must >=0.\n" "\t-P: Use parallel executor instead of sequential executor.\n" "\t-o [optimization level]: Default is 1. Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all).\n" "\t\tPlease see onnxruntime_c_api.h (enum GraphOptimizationLevel) for the full list of all optimization levels. \n" From 3d9d892aa93b634ebb70aad8d25e5cf602f0629b Mon Sep 17 00:00:00 2001 From: Yulong Wang Date: Tue, 26 Nov 2019 14:47:36 -0800 Subject: [PATCH 4/7] resolve feedback --- onnxruntime/core/session/inference_session.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index a35cb13f77688..03f20c3a11bfc 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -102,7 +102,7 @@ InferenceSession::InferenceSession(const SessionOptions& session_options, : session_options_(session_options), graph_transformation_mgr_(session_options.max_num_graph_transformation_steps), logging_manager_(logging_manager), -#ifdef USE_OPENMP +#ifndef USE_OPENMP thread_pool_(concurrency::CreateThreadPool("intra_op_thread_pool", session_options.intra_op_num_threads)), #endif From cba7e2b0fbdb8324cd1bb37b2f20db39a5195442 Mon Sep 17 00:00:00 2001 From: Yulong Wang Date: Tue, 26 Nov 2019 18:28:03 -0800 Subject: [PATCH 5/7] handle parameter '-x' in perftest --- onnxruntime/test/perftest/command_args_parser.cc | 4 ++++ onnxruntime/test/perftest/ort_test_session.cc | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 7b4675dd60da8..d01f120361b94 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -123,10 +123,14 @@ namespace perftest { test_config.run_config.f_verbose = true; break; case 'x': +#ifdef USE_OPENMP + fprintf(stderr, "Warning: argument '-x' will be ignored when OpenMP is enabled.\n"); +#else test_config.run_config.intra_op_num_threads = static_cast(OrtStrtol(optarg, nullptr)); if (test_config.run_config.intra_op_num_threads < 0) { return false; } +#endif break; case 'y': test_config.run_config.inter_op_num_threads = static_cast(OrtStrtol(optarg, nullptr)); diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 5c8c5a8a99951..2e16451ae2488 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -82,7 +82,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else if (provider_name == onnxruntime::kAclExecutionProvider) { #ifdef USE_ACL Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ACL(session_options, - performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0)); + performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0)); #else ORT_THROW("Acl is not supported in this build\n"); #endif @@ -100,8 +100,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device else session_options.DisableMemPattern(); session_options.SetExecutionMode(performance_test_config.run_config.execution_mode); + +#ifndef USE_OPENMP fprintf(stdout, "Setting intra_op_num_threads to %d\n", performance_test_config.run_config.intra_op_num_threads); session_options.SetIntraOpNumThreads(performance_test_config.run_config.intra_op_num_threads); +#endif if (performance_test_config.run_config.execution_mode == ExecutionMode::ORT_PARALLEL) { fprintf(stdout, "Setting inter_op_num_threads to %d\n", performance_test_config.run_config.inter_op_num_threads); From 1686db31d702e3363606af19a60b6f1b1e9edc0e Mon Sep 17 00:00:00 2001 From: Yulong Wang Date: Tue, 26 Nov 2019 19:57:28 -0800 Subject: [PATCH 6/7] explicitly initialize thread_pool_ --- onnxruntime/core/session/inference_session.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 03f20c3a11bfc..fd4e96fb52301 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -105,6 +105,8 @@ InferenceSession::InferenceSession(const SessionOptions& session_options, #ifndef USE_OPENMP thread_pool_(concurrency::CreateThreadPool("intra_op_thread_pool", session_options.intra_op_num_threads)), +#else + thread_pool_(nullptr), #endif inter_op_thread_pool_(session_options.execution_mode == ExecutionMode::ORT_PARALLEL ? concurrency::CreateThreadPool("inter_op_thread_pool", From c713bf3de9a4bf2972c1feebc0817eb4c07c1e4f Mon Sep 17 00:00:00 2001 From: Yulong Wang Date: Tue, 26 Nov 2019 19:59:04 -0800 Subject: [PATCH 7/7] exit perftest when '-x' is specified with openmp --- onnxruntime/test/perftest/command_args_parser.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index d01f120361b94..30c8937828507 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -124,7 +124,8 @@ namespace perftest { break; case 'x': #ifdef USE_OPENMP - fprintf(stderr, "Warning: argument '-x' will be ignored when OpenMP is enabled.\n"); + fprintf(stderr, "cannot use argument '-x' when OpenMP is enabled.\n"); + return false; #else test_config.run_config.intra_op_num_threads = static_cast(OrtStrtol(optarg, nullptr)); if (test_config.run_config.intra_op_num_threads < 0) {