@@ -127,12 +127,12 @@ void BasicTokenizer::Tokenize(const string& text, vector<wstring>* res) const {
127127}
128128
129129WordPieceTokenizer::WordPieceTokenizer (
130- framework::Vocab* vocab, const wstring& unk_token /* = L"[UNK]"*/ ,
130+ const framework::Vocab* vocab, const wstring& unk_token /* = L"[UNK]"*/ ,
131131 const size_t max_input_chars_per_word /* = 100 */ )
132132 : vocab_(vocab),
133133 unk_token_ (unk_token),
134134 max_input_chars_per_word_(max_input_chars_per_word) {
135- unk_token_id_ = (* vocab_)[ unk_token_] ;
135+ unk_token_id_ = vocab_-> at ( unk_token_) ;
136136}
137137
138138void WordPieceTokenizer::Tokenize (const wstring& text,
@@ -182,7 +182,7 @@ void WordPieceTokenizer::Tokenize(const wstring& text,
182182 }
183183}
184184
185- BertTokenizer::BertTokenizer (framework::Vocab* vocab,
185+ BertTokenizer::BertTokenizer (const framework::Vocab* vocab,
186186 bool do_lower_case /* = false */ ,
187187 const wstring& unk_token /* = L"[UNK]" */ ,
188188 const wstring& pad_token /* = L"[PAD]" */ ,
@@ -200,11 +200,11 @@ BertTokenizer::BertTokenizer(framework::Vocab* vocab,
200200 vocab_(vocab),
201201 basic_tokenizer_(do_lower_case_),
202202 word_piece_tokenizer_(vocab_, unk_token) {
203- unk_token_id_ = (* vocab_)[ unk_token_] ;
204- pad_token_id_ = (* vocab_)[ pad_token_] ;
205- cls_token_id_ = (* vocab_)[ cls_token_] ;
206- mask_token_id_ = (* vocab_)[ mask_token_] ;
207- sep_token_id_ = (* vocab_)[ sep_token_] ;
203+ unk_token_id_ = vocab_-> at ( unk_token_) ;
204+ pad_token_id_ = vocab_-> at ( pad_token_) ;
205+ cls_token_id_ = vocab_-> at ( cls_token_) ;
206+ mask_token_id_ = vocab_-> at ( mask_token_) ;
207+ sep_token_id_ = vocab_-> at ( sep_token_) ;
208208
209209 all_special_tokens_ = vector<wstring>(
210210 {unk_token_, pad_token_, cls_token_, mask_token_, sep_token_});
0 commit comments