@@ -34,20 +34,6 @@ MlBreakEngine::~MlBreakEngine() {}
3434
3535namespace {
3636 const char16_t INVALID = u' |' ;
37- const int32_t MAX_FEATURE = 13 ;
38- const int32_t MAX_FEATURE_LENGTH = 11 ;
39-
40- void concatChar (const char16_t *str, const UChar32 *arr, int32_t length, char16_t *feature, UErrorCode &status) {
41- if (U_FAILURE (status)) {
42- return ;
43- }
44- UnicodeString result (str);
45- for (int i = 0 ; i < length; i++) {
46- result.append (arr[i]);
47- }
48- U_ASSERT (result.length () < MAX_FEATURE_LENGTH);
49- result.extract (feature, MAX_FEATURE_LENGTH, status); // NUL-terminates
50- }
5137}
5238
5339int32_t MlBreakEngine::divideUpRange (UText *inText, int32_t rangeStart, int32_t rangeEnd,
@@ -144,96 +130,68 @@ int32_t MlBreakEngine::divideUpRange(UText *inText, int32_t rangeStart, int32_t
144130
145131void MlBreakEngine::evaluateBreakpoint (UChar32* elementList, int32_t index, int32_t &numBreaks,
146132 UVector32 &boundary, UErrorCode &status) const {
147- char16_t featureList[MAX_FEATURE][MAX_FEATURE_LENGTH];
148133 if (U_FAILURE (status)) {
149134 return ;
150135 }
151136
152- UChar32 arr[4 ] = {-1 , -1 , -1 , -1 };
153- int32_t length = 0 , listLength = 0 ;
154-
155- const UChar32 w1 = elementList[0 ];
156- const UChar32 w2 = elementList[1 ];
157- const UChar32 w3 = elementList[2 ];
158- const UChar32 w4 = elementList[3 ];
159- const UChar32 w5 = elementList[4 ];
160- const UChar32 w6 = elementList[5 ];
137+ UnicodeString feature;
138+ int32_t score = fNegativeSum ;
161139
162- length = 1 ;
163- if (w1 != INVALID) {
164- arr[ 0 ] = w1;
165- concatChar ( u" UW1:" , arr, length, featureList[listLength++], status );
140+ if (elementList[ 0 ] != INVALID) {
141+ // When the key doesn't exist, Hashtable.geti(key) returns 0 and 2 * 0 = 0.
142+ // So, we can skip to check whether fModel includes key featureList[j] or not.
143+ score += ( 2 * fModel . geti (feature. setTo ( u" UW1:" , 4 ). append (elementList[ 0 ])) );
166144 }
167- if (w2 != INVALID) {
168- arr[0 ] = w2;
169- concatChar (u" UW2:" , arr, length, featureList[listLength++], status);
145+ if (elementList[1 ] != INVALID) {
146+ score += (2 * fModel .geti (feature.setTo (u" UW2:" , 4 ).append (elementList[1 ])));
170147 }
171- if (w3 != INVALID) {
172- arr[0 ] = w3;
173- concatChar (u" UW3:" , arr, length, featureList[listLength++], status);
148+ if (elementList[2 ] != INVALID) {
149+ score += (2 * fModel .geti (feature.setTo (u" UW3:" , 4 ).append (elementList[2 ])));
174150 }
175- if (w4 != INVALID) {
176- arr[0 ] = w4;
177- concatChar (u" UW4:" , arr, length, featureList[listLength++], status);
151+ if (elementList[3 ] != INVALID) {
152+ score += (2 * fModel .geti (feature.setTo (u" UW4:" , 4 ).append (elementList[3 ])));
178153 }
179- if (w5 != INVALID) {
180- arr[0 ] = w5;
181- concatChar (u" UW5:" , arr, length, featureList[listLength++], status);
154+ if (elementList[4 ] != INVALID) {
155+ score += (2 * fModel .geti (feature.setTo (u" UW5:" , 4 ).append (elementList[4 ])));
182156 }
183- if (w6 != INVALID) {
184- arr[0 ] = w6;
185- concatChar (u" UW6:" , arr, length, featureList[listLength++], status);
157+ if (elementList[5 ] != INVALID) {
158+ score += (2 * fModel .geti (feature.setTo (u" UW6:" , 4 ).append (elementList[5 ])));
186159 }
187- length = 2 ;
188- if (w2 != INVALID && w3 != INVALID) {
189- arr[0 ] = w2;
190- arr[1 ] = w3;
191- concatChar (u" BW1:" , arr, length, featureList[listLength++], status);
160+ if (elementList[1 ] != INVALID && elementList[2 ] != INVALID) {
161+ score += (2 * fModel .geti (
162+ feature.setTo (u" BW1:" , 4 ).append (elementList[1 ]).append (elementList[2 ])));
192163 }
193- if (w3 != INVALID && w4 != INVALID) {
194- arr[0 ] = w3;
195- arr[1 ] = w4;
196- concatChar (u" BW2:" , arr, length, featureList[listLength++], status);
164+ if (elementList[2 ] != INVALID && elementList[3 ] != INVALID) {
165+ score += (2 * fModel .geti (
166+ feature.setTo (u" BW2:" , 4 ).append (elementList[2 ]).append (elementList[3 ])));
197167 }
198- if (w4 != INVALID && w5 != INVALID) {
199- arr[0 ] = w4;
200- arr[1 ] = w5;
201- concatChar (u" BW3:" , arr, length, featureList[listLength++], status);
168+ if (elementList[3 ] != INVALID && elementList[4 ] != INVALID) {
169+ score += (2 * fModel .geti (
170+ feature.setTo (u" BW3:" , 4 ).append (elementList[3 ]).append (elementList[4 ])));
202171 }
203- length = 3 ;
204- if (w1 != INVALID && w2 != INVALID && w3 != INVALID) {
205- arr[0 ] = w1;
206- arr[1 ] = w2;
207- arr[2 ] = w3;
208- concatChar (u" TW1:" , arr, length, featureList[listLength++], status);
172+ if (elementList[0 ] != INVALID && elementList[1 ] != INVALID && elementList[2 ] != INVALID) {
173+ score += (2 * fModel .geti (feature.setTo (u" TW1:" , 4 )
174+ .append (elementList[0 ])
175+ .append (elementList[1 ])
176+ .append (elementList[2 ])));
209177 }
210- if (w2 != INVALID && w3 != INVALID && w4 != INVALID) {
211- arr[ 0 ] = w2;
212- arr [1 ] = w3;
213- arr [2 ] = w4;
214- concatChar ( u" TW2: " , arr, length, featureList[listLength++], status );
178+ if (elementList[ 1 ] != INVALID && elementList[ 2 ] != INVALID && elementList[ 3 ] != INVALID) {
179+ score += ( 2 * fModel . geti (feature. setTo ( u" TW2: " , 4 )
180+ . append (elementList [1 ])
181+ . append (elementList [2 ])
182+ . append (elementList[ 3 ])) );
215183 }
216- if (w3 != INVALID && w4 != INVALID && w5 != INVALID) {
217- arr[ 0 ] = w3;
218- arr[ 1 ] = w4;
219- arr[ 2 ] = w5;
220- concatChar ( u" TW3: " , arr, length, featureList[listLength++], status );
184+ if (elementList[ 2 ] != INVALID && elementList[ 3 ] != INVALID && elementList[ 4 ] != INVALID) {
185+ score += ( 2 * fModel . geti (feature. setTo ( u" TW3: " , 4 )
186+ . append (elementList[ 2 ])
187+ . append (elementList[ 3 ])
188+ . append (elementList[ 4 ])) );
221189 }
222- if (w4 != INVALID && w5 != INVALID && w6 != INVALID) {
223- arr[0 ] = w4;
224- arr[1 ] = w5;
225- arr[2 ] = w6;
226- concatChar (u" TW4:" , arr, length, featureList[listLength++], status);
227- }
228- if (U_FAILURE (status)) {
229- return ;
230- }
231- int32_t score = fNegativeSum ;
232- for (int32_t j = 0 ; j < listLength; j++) {
233- UnicodeString key (featureList[j]);
234- if (fModel .containsKey (key)) {
235- score += (2 * fModel .geti (key));
236- }
190+ if (elementList[3 ] != INVALID && elementList[4 ] != INVALID && elementList[5 ] != INVALID) {
191+ score += (2 * fModel .geti (feature.setTo (u" TW4:" , 4 )
192+ .append (elementList[3 ])
193+ .append (elementList[4 ])
194+ .append (elementList[5 ])));
237195 }
238196 if (score > 0 ) {
239197 boundary.addElement (index, status);
0 commit comments