@@ -108,15 +108,21 @@ int mmdeploy_text_recognizer_apply_bbox(mm_handle_t handle, const mm_mat_t *imag
108108
109109 try {
110110 auto recognizer = static_cast <Handle *>(handle);
111- Value input{Value::kArray , Value::kArray };
111+ Value::Array input_images;
112+ Value::Array input_bboxes;
112113 auto _bboxes = bboxes;
113114 auto result_count = 0 ;
114- for (int i = 0 ; i < image_count; ++i) {
115- mmdeploy::Mat _mat{images[i].height , images[i].width , PixelFormat (images[i].format ),
116- DataType (images->type ), images[i].data , Device{" cpu" }};
117- input[0 ].push_back ({{" ori_img" , _mat}});
118115
116+ // mapping from image index to result index, -1 represents invalid image with no bboxes
117+ // supplied.
118+ std::vector<int > result_index (image_count, -1 );
119+
120+ for (int i = 0 ; i < image_count; ++i) {
119121 if (bboxes && bbox_count) {
122+ if (bbox_count[i] == 0 ) {
123+ // skip images with no bounding boxes (push nothing)
124+ continue ;
125+ }
120126 Value boxes (Value::kArray );
121127 for (int j = 0 ; j < bbox_count[i]; ++j) {
122128 Value box;
@@ -128,17 +134,26 @@ int mmdeploy_text_recognizer_apply_bbox(mm_handle_t handle, const mm_mat_t *imag
128134 }
129135 _bboxes += bbox_count[i];
130136 result_count += bbox_count[i];
131- input[ 1 ] .push_back ({{" boxes" , boxes}});
137+ input_bboxes .push_back ({{" boxes" , boxes}});
132138 } else {
133- input[ 1 ]. push_back (Value:: kNull );
139+ // bboxes or bbox_count not supplied, use whole image
134140 result_count += 1 ;
141+ input_bboxes.push_back (Value::kNull );
135142 }
143+
144+ result_index[i] = static_cast <int >(input_images.size ());
145+ mmdeploy::Mat _mat{images[i].height , images[i].width , PixelFormat (images[i].format ),
146+ DataType (images[i].type ), images[i].data , Device{" cpu" }};
147+ input_images.push_back ({{" ori_img" , _mat}});
136148 }
137149
138- auto output = recognizer-> Run ( std::move (input)). value (). front () ;
150+ std::vector<std::vector<mmocr::TextRecognizerOutput>> recognizer_outputs ;
139151
140- auto recognizer_outputs =
141- from_value<std::vector<std::vector<mmocr::TextRecognizerOutput>>>(output);
152+ if (!input_images.empty ()) {
153+ Value input{std::move (input_images), std::move (input_bboxes)};
154+ auto output = recognizer->Run (std::move (input)).value ().front ();
155+ from_value (output, recognizer_outputs);
156+ }
142157
143158 std::vector<int > counts;
144159 if (bboxes && bbox_count) {
@@ -157,21 +172,23 @@ int mmdeploy_text_recognizer_apply_bbox(mm_handle_t handle, const mm_mat_t *imag
157172 new mm_text_recognize_t [result_count]{}, deleter);
158173
159174 for (int i = 0 ; i < image_count; ++i) {
160- auto &recog_output = recognizer_outputs[i];
161- for (int j = 0 ; j < recog_output.size (); ++j) {
162- auto &res = _results[offsets[i] + j];
175+ if (result_index[i] >= 0 ) {
176+ auto &recog_output = recognizer_outputs[result_index[i]];
177+ for (int j = 0 ; j < recog_output.size (); ++j) {
178+ auto &res = _results[offsets[i] + j];
163179
164- auto &box_result = recog_output[j];
180+ auto &box_result = recog_output[j];
165181
166- auto &score = box_result.score ;
167- res.length = static_cast <int >(score.size ());
182+ auto &score = box_result.score ;
183+ res.length = static_cast <int >(score.size ());
168184
169- res.score = new float [score.size ()];
170- std::copy_n (score.data (), score.size (), res.score );
185+ res.score = new float [score.size ()];
186+ std::copy_n (score.data (), score.size (), res.score );
171187
172- auto text = box_result.text ;
173- res.text = new char [text.length () + 1 ];
174- std::copy_n (text.data (), text.length () + 1 , res.text );
188+ auto text = box_result.text ;
189+ res.text = new char [text.length () + 1 ];
190+ std::copy_n (text.data (), text.length () + 1 , res.text );
191+ }
175192 }
176193 }
177194 *results = _results.release ();
0 commit comments