From 17329ddc6bbcb8b46e36579f53346b352326b981 Mon Sep 17 00:00:00 2001 From: KeDengMS Date: Fri, 6 Dec 2019 15:52:29 -0800 Subject: [PATCH] [Nuphar EP] fixes for some object detection models Update notebook tutorial with multi-threaded int8 GEMM from #2517 --- .../onnxruntime-nuphar-tutorial.ipynb | 37 ++++++------- .../nuphar/nuphar_execution_provider.cc | 11 ++++ .../nuphar/scripts/symbolic_shape_infer.py | 53 +++++++++++++------ 3 files changed, 64 insertions(+), 37 deletions(-) diff --git a/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb b/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb index d93bfdc412009..c7179ea967fe9 100644 --- a/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb +++ b/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb @@ -216,8 +216,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Fusion speed-up 437.43%\n", - " Baseline: 0.733 s, Current: 0.136 s\n" + "Fusion speed-up 434.50%\n", + " Baseline: 0.716 s, Current: 0.134 s\n" ] } ], @@ -339,8 +339,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Nuphar Scan speed-up 1.97%\n", - " Baseline: 3.062 s, Current: 3.003 s\n" + "Nuphar Scan speed-up 7.68%\n", + " Baseline: 3.037 s, Current: 2.821 s\n" ] } ], @@ -444,8 +444,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Quantization speed-up 196.18%\n", - " Baseline: 3.003 s, Current: 1.014 s\n" + "Quantization speed-up 278.52%\n", + " Baseline: 2.821 s, Current: 0.745 s\n" ] } ], @@ -575,8 +575,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Nuphar BERT squad speed-up 67.20%\n", - " Baseline: 5.089 s, Current: 3.044 s\n" + "Nuphar BERT squad speed-up 65.18%\n", + " Baseline: 5.023 s, Current: 3.041 s\n" ] } ], @@ -765,8 +765,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Nuphar quantized BiDAF speed-up 44.03%\n", - " Baseline: 0.304 s, Current: 0.211 s\n" + "Nuphar quantized BiDAF speed-up 45.63%\n", + " Baseline: 0.305 s, Current: 0.209 s\n" ] } ], @@ -807,7 +807,7 @@ { "data": { "text/plain": [ - "'JIT took 4.612 seconds'" + "'JIT took 4.655 seconds'" ] }, "execution_count": 28, @@ -887,8 +887,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "AOT speed-up 952.77%\n", - " Baseline: 4.612 s, Current: 0.438 s\n" + "AOT speed-up 967.73%\n", + " Baseline: 4.655 s, Current: 0.436 s\n" ] } ], @@ -919,8 +919,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Single thread perf w/o parallel schedule speed-up 3.80%\n", - " Baseline: 0.318 s, Current: 0.306 s\n" + "Single thread perf w/o parallel schedule speed-up 2.83%\n", + " Baseline: 0.315 s, Current: 0.307 s\n" ] } ], @@ -947,13 +947,6 @@ "end = timer()\n", "print_speedup('Single thread perf w/o parallel schedule', end_baseline - start_baseline, end - start)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/onnxruntime/core/providers/nuphar/nuphar_execution_provider.cc b/onnxruntime/core/providers/nuphar/nuphar_execution_provider.cc index d244c51d30675..0c8566aa49c8f 100644 --- a/onnxruntime/core/providers/nuphar/nuphar_execution_provider.cc +++ b/onnxruntime/core/providers/nuphar/nuphar_execution_provider.cc @@ -225,6 +225,17 @@ NupharExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie } } } + // reject when pooling on symbolic dims, since shape computation does not support it yet + it = attrs.find("kernel_shape"); + ORT_ENFORCE(it != attrs.end()); + int kernel_rank = it->second.ints_size(); + const auto output_shape = node.OutputDefs()[0]->Shape(); + int output_rank = output_shape->dim_size(); + for (int d = output_rank - kernel_rank; d < output_rank; ++d) { + if (output_shape->dim(d).has_dim_param()) { + return false; + } + } } if (node.OpType() == "Slice") { diff --git a/onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py b/onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py index c4c212a6baaa4..3342977caa28c 100644 --- a/onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py +++ b/onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py @@ -120,6 +120,7 @@ def __init__(self, int_max, auto_merge, guess_output_rank, verbose): self.run_ = True self.suggested_merge_ = {} self.symbolic_dims_ = {} + self.input_symbols_ = {} self.auto_merge_ = auto_merge self.guess_output_rank_ = guess_output_rank self.verbose_ = verbose @@ -138,7 +139,12 @@ def _add_suggested_merge(self, symbols, apply=False): if is_literal(s): map_to = s break - # when no literals, map to existing symbolic dims + # when no literals, map to input symbolic dims, then existing symbolic dims + if map_to is None: + for s in symbols: + if s in self.input_symbols_: + map_to = s + break if map_to is None: for s in symbols: if type(self.symbolic_dims_[s]) == sympy.Symbol: @@ -228,6 +234,7 @@ def _merge_symbols(self, dims): int_dim = is_int.index(1) if self.verbose_ > 0: print('dim {} has been merged with value {}'.format(dims[1 - int_dim], dims[int_dim])) + self._check_merged_dims(dims, allow_broadcast=False) return dims[int_dim] else: return None @@ -621,14 +628,13 @@ def _infer_ConstantOfShape(self, node): self._update_computed_dims(sympy_shape) if type(sympy_shape) != list: sympy_shape = [sympy_shape] - vi.CopyFrom(helper.make_tensor_value_info(node.output[0], - vi.type.tensor_type.elem_type, - get_shape_from_sympy_shape(sympy_shape))) else: # create new dynamic shape - vi.CopyFrom(helper.make_tensor_value_info(node.output[0], - vi.type.tensor_type.elem_type, - self._new_symbolic_shape(self._get_shape_rank(node,0), node))) + sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node,0), node) + + vi.CopyFrom(helper.make_tensor_value_info(node.output[0], + vi.type.tensor_type.elem_type, + get_shape_from_sympy_shape(sympy_shape))) def _infer_Expand(self, node): expand_to_shape = self._try_get_value(node, 1) @@ -813,7 +819,7 @@ def _infer_Reshape(self, node): assert is_literal(shape_rank) vi.CopyFrom(helper.make_tensor_value_info(node.output[0], vi.type.tensor_type.elem_type, - self._new_symbolic_shape(shape_rank, node))) + get_shape_from_sympy_shape(self._new_symbolic_shape(shape_rank, node)))) else: input_shape = self._get_shape(node, 0) input_sympy_shape = self._get_sympy_shape(node, 0) @@ -850,19 +856,36 @@ def _infer_Reshape(self, node): def _infer_Resize(self, node): vi = self.known_vi_[node.output[0]] - if get_opset(self.out_mp_) <= 10: # only support opset 10 Resize for now + input_sympy_shape = self._get_sympy_shape(node, 0) + if get_opset(self.out_mp_) <= 10: scales = self._try_get_value(node, 1) if scales is not None: - input_sympy_shape = self._get_sympy_shape(node, 0) new_sympy_shape = [sympy.simplify(sympy.floor(d*s)) for d,s in zip(input_sympy_shape, scales)] self._update_computed_dims(new_sympy_shape) vi.CopyFrom(helper.make_tensor_value_info(node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, get_shape_from_sympy_shape(new_sympy_shape))) else: + roi = self._try_get_value(node, 1) + scales = self._try_get_value(node, 2) + sizes = self._try_get_value(node, 3) + if sizes is not None: + new_sympy_shape = [sympy.simplify(sympy.floor(s)) for s in sizes] + self._update_computed_dims(new_sympy_shape) + elif roi is not None and scales is not None: + rank = len(scales) + assert len(roi) == 2*rank + roi_start = list(roi)[:rank] + roi_end = list(roi)[rank:] + scales = list(scales) + new_sympy_shape = [sympy.simplify(sympy.floor(d * (end - start) * scale)) for d, start, end, scale in zip(input_sympy_shape, roi_start, roi_end, scales)] + self._update_computed_dims(new_sympy_shape) + else: + new_sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node, 0), node) + vi.CopyFrom(helper.make_tensor_value_info(node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, - self._new_symbolic_shape(self._get_shape_rank(node, 0), node))) + get_shape_from_sympy_shape(new_sympy_shape))) def _infer_Scan(self, node): subgraph = get_attribute(node, 'body') @@ -1064,16 +1087,16 @@ def _infer_impl(self, in_mp, start_sympy_data={}): self.sympy_data_ = start_sympy_data self.out_mp_.graph.ClearField('value_info') self._apply_suggested_merge(graph_input_only=True) - input_symbols = set() + self.input_symbols_ = set() for i in self.out_mp_.graph.input: input_dims = i.type.tensor_type.shape.dim for i_dim in range(len(input_dims)): if get_dim_from_type_proto(input_dims[i_dim]) is None: # some models use None for symbolic dim in input, replace it with a string input_dims[i_dim].dim_param = self._new_symbolic_dim(i.name, i_dim) - input_symbols.update([d for d in get_shape_from_type_proto(i.type) if type(d) == str]) + self.input_symbols_.update([d for d in get_shape_from_type_proto(i.type) if type(d) == str]) - for s in input_symbols: + for s in self.input_symbols_: if s in self.suggested_merge_: s_merge = self.suggested_merge_[s] assert s_merge in self.symbolic_dims_ @@ -1166,7 +1189,7 @@ def _infer_impl(self, in_mp, start_sympy_data={}): new_shape = self._new_symbolic_shape(out_rank, node, i_o) vi.CopyFrom(helper.make_tensor_value_info(vi.name, self.known_vi_[node.input[0]].type.tensor_type.elem_type, - new_shape)) + get_shape_from_sympy_shape(new_shape))) if self.verbose_ > 0: if is_unknown_op: