Skip to content

Commit 2127f6d

Browse files
committed
remove const_cast
1 parent 3227a88 commit 2127f6d

File tree

2 files changed

+14
-14
lines changed

2 files changed

+14
-14
lines changed

paddle/fluid/operators/string/faster_tokenizer_op.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -127,12 +127,12 @@ void BasicTokenizer::Tokenize(const string& text, vector<wstring>* res) const {
127127
}
128128

129129
WordPieceTokenizer::WordPieceTokenizer(
130-
framework::Vocab* vocab, const wstring& unk_token /* = L"[UNK]"*/,
130+
const framework::Vocab* vocab, const wstring& unk_token /* = L"[UNK]"*/,
131131
const size_t max_input_chars_per_word /* = 100 */)
132132
: vocab_(vocab),
133133
unk_token_(unk_token),
134134
max_input_chars_per_word_(max_input_chars_per_word) {
135-
unk_token_id_ = (*vocab_)[unk_token_];
135+
unk_token_id_ = vocab_->at(unk_token_);
136136
}
137137

138138
void WordPieceTokenizer::Tokenize(const wstring& text,
@@ -182,7 +182,7 @@ void WordPieceTokenizer::Tokenize(const wstring& text,
182182
}
183183
}
184184

185-
BertTokenizer::BertTokenizer(framework::Vocab* vocab,
185+
BertTokenizer::BertTokenizer(const framework::Vocab* vocab,
186186
bool do_lower_case /* = false */,
187187
const wstring& unk_token /* = L"[UNK]" */,
188188
const wstring& pad_token /* = L"[PAD]" */,
@@ -200,11 +200,11 @@ BertTokenizer::BertTokenizer(framework::Vocab* vocab,
200200
vocab_(vocab),
201201
basic_tokenizer_(do_lower_case_),
202202
word_piece_tokenizer_(vocab_, unk_token) {
203-
unk_token_id_ = (*vocab_)[unk_token_];
204-
pad_token_id_ = (*vocab_)[pad_token_];
205-
cls_token_id_ = (*vocab_)[cls_token_];
206-
mask_token_id_ = (*vocab_)[mask_token_];
207-
sep_token_id_ = (*vocab_)[sep_token_];
203+
unk_token_id_ = vocab_->at(unk_token_);
204+
pad_token_id_ = vocab_->at(pad_token_);
205+
cls_token_id_ = vocab_->at(cls_token_);
206+
mask_token_id_ = vocab_->at(mask_token_);
207+
sep_token_id_ = vocab_->at(sep_token_);
208208

209209
all_special_tokens_ = vector<wstring>(
210210
{unk_token_, pad_token_, cls_token_, mask_token_, sep_token_});

paddle/fluid/operators/string/faster_tokenizer_op.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,21 +56,22 @@ class BasicTokenizer {
5656

5757
class WordPieceTokenizer {
5858
public:
59-
explicit WordPieceTokenizer(framework::Vocab* vocab,
59+
explicit WordPieceTokenizer(const framework::Vocab* vocab,
6060
const wstring& unk_token = L"[UNK]",
6161
const size_t max_input_chars_per_word = 100);
6262
void Tokenize(const wstring& text, vector<int64_t>* output) const;
6363

6464
private:
65-
framework::Vocab* vocab_;
65+
const framework::Vocab* vocab_;
6666
wstring unk_token_{L"[UNK]"};
6767
int64_t unk_token_id_;
6868
size_t max_input_chars_per_word_;
6969
};
7070

7171
class BertTokenizer {
7272
public:
73-
explicit BertTokenizer(framework::Vocab* vocab, bool do_lower_case = false,
73+
explicit BertTokenizer(const framework::Vocab* vocab,
74+
bool do_lower_case = false,
7475
const wstring& unk_token = L"[UNK]",
7576
const wstring& pad_token = L"[PAD]",
7677
const wstring& cls_token = L"[CLS]",
@@ -106,7 +107,7 @@ class BertTokenizer {
106107
bool do_lower_case_;
107108
wstring unk_token_, pad_token_, cls_token_, mask_token_, sep_token_;
108109
string padding_site_;
109-
framework::Vocab* vocab_;
110+
const framework::Vocab* vocab_;
110111
BasicTokenizer basic_tokenizer_;
111112
WordPieceTokenizer word_piece_tokenizer_;
112113
int64_t unk_token_id_, cls_token_id_, mask_token_id_, pad_token_id_,
@@ -140,8 +141,7 @@ class FasterTokenizerKernel : public framework::OpKernel<T> {
140141
return;
141142
}
142143

143-
BertTokenizer tokenizer(const_cast<framework::Vocab*>(vocab),
144-
do_lower_case);
144+
BertTokenizer tokenizer(vocab, do_lower_case);
145145
size_t batch_max_seq_len = 0;
146146
size_t batch_size = text->size();
147147

0 commit comments

Comments
 (0)