Skip to content

Commit 33d6dee

Browse files
committed
Upgrading dependencies.
1 parent 909fdde commit 33d6dee

File tree

7 files changed

+21
-22
lines changed

7 files changed

+21
-22
lines changed

bindings/python/Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@ serde = { version = "1.0", features = ["rc", "derive"] }
1414
serde_json = "1.0"
1515
libc = "0.2"
1616
env_logger = "0.11"
17-
pyo3 = { version = "0.24.2", features = ["abi3", "abi3-py39", "py-clone"] }
18-
numpy = "0.24"
17+
pyo3 = { version = "0.25", features = ["abi3", "abi3-py39", "py-clone"] }
18+
numpy = "0.25"
1919
ndarray = "0.16"
20-
itertools = "0.12"
20+
itertools = "0.14"
2121

2222
[dependencies.tokenizers]
2323
path = "../../tokenizers"
2424

2525
[dev-dependencies]
2626
tempfile = "3.10"
27-
pyo3 = { version = "0.24.2", features = ["auto-initialize"] }
27+
pyo3 = { version = "0.25", features = ["auto-initialize"] }
2828

2929
[features]
3030
default = ["pyo3/extension-module"]

tokenizers/Cargo.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,19 @@ required-features = ["http"]
4242
harness = false
4343

4444
[dependencies]
45-
rand = "0.8"
45+
rand = "0.9"
4646
onig = { version = "6.5.1", default-features = false, optional = true }
4747
regex = "1.10"
4848
regex-syntax = "0.8"
4949
rayon = "1.10"
50-
rayon-cond = "0.3"
50+
rayon-cond = "0.4"
5151
serde = { version = "1.0", features = [ "derive" ] }
5252
serde_json = "1.0"
5353
unicode-normalization-alignments = "0.1"
5454
unicode_categories = "0.1"
5555
unicode-segmentation = "1.11"
5656
indicatif = {version = "0.17", optional = true}
57-
itertools = "0.13"
57+
itertools = "0.14"
5858
log = "0.4"
5959
derive_builder = "0.20"
6060
spm_precompiled = "0.1.3"
@@ -64,7 +64,7 @@ paste = "1.0.14"
6464
macro_rules_attribute = "0.2.0"
6565
thiserror = "2"
6666
fancy-regex = { version = "0.14", optional = true}
67-
getrandom = { version = "0.2.10" }
67+
getrandom = { version = "0.3" }
6868
esaxx-rs = { version = "0.1.10", default-features = false, features=[]}
6969
monostate = "0.1.12"
7070

@@ -73,11 +73,11 @@ default = ["progressbar", "onig", "esaxx_fast"]
7373
esaxx_fast = ["esaxx-rs/cpp"]
7474
progressbar = ["indicatif"]
7575
http = ["hf-hub"]
76-
unstable_wasm = ["fancy-regex", "getrandom/js"]
76+
unstable_wasm = ["fancy-regex", "getrandom/wasm_js"]
7777
rustls-tls = ["hf-hub?/rustls-tls"]
7878

7979
[dev-dependencies]
80-
criterion = "0.5"
80+
criterion = "0.6"
8181
tempfile = "3.10"
8282
assert_approx_eq = "1.1"
8383
tracing = "0.1"

tokenizers/benches/common/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::time::{Duration, Instant};
22

3-
use criterion::black_box;
3+
use std::hint::black_box;
44

55
use tokenizers::{
66
Decoder, EncodeInput, Model, Normalizer, PostProcessor, PreTokenizer, TokenizerImpl, Trainer,

tokenizers/benches/layout_benchmark.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ use std::io::{BufRead, BufReader};
66
use std::path::Path;
77
use std::time::{Duration, Instant};
88

9-
use criterion::black_box;
109
use criterion::Criterion;
10+
use std::hint::black_box;
1111
use tokenizers::processors::template::TemplateProcessing;
1212
use tokenizers::{EncodeInput, Encoding, PostProcessor, Tokenizer};
1313

tokenizers/benches/llama3.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#[macro_use]
22
extern crate criterion;
33

4+
use std::hint::black_box;
5+
46
use criterion::{Criterion, Throughput};
57
use tokenizers::Tokenizer;
68

@@ -15,7 +17,7 @@ pub fn llama3(c: &mut Criterion) {
1517
let add_special_tokens = false;
1618
b.iter(|| {
1719
tokenizer
18-
.encode_batch_char_offsets(criterion::black_box(data.clone()), add_special_tokens)
20+
.encode_batch_char_offsets(black_box(data.clone()), add_special_tokens)
1921
.unwrap()
2022
})
2123
});
@@ -26,7 +28,7 @@ pub fn llama3(c: &mut Criterion) {
2628
let add_special_tokens = false;
2729
b.iter(|| {
2830
tokenizer
29-
.encode_batch(criterion::black_box(data.clone()), add_special_tokens)
31+
.encode_batch(black_box(data.clone()), add_special_tokens)
3032
.unwrap()
3133
})
3234
});

tokenizers/src/models/bpe/word.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::Pair;
2-
use rand::{thread_rng, Rng};
2+
use rand::{rng, Rng};
33
use std::cmp::Ordering;
44
use std::collections::{BinaryHeap, HashMap};
55

@@ -177,10 +177,7 @@ impl Word {
177177
);
178178

179179
while let Some(top) = queue.pop() {
180-
if dropout
181-
.map(|d| thread_rng().gen::<f32>() < d)
182-
.unwrap_or(false)
183-
{
180+
if dropout.map(|d| rng().random::<f32>() < d).unwrap_or(false) {
184181
skip.push(top);
185182
} else {
186183
// Re-insert the skipped elements

tokenizers/src/models/unigram/lattice.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
use rand::distributions::WeightedIndex;
2-
use rand::prelude::*;
1+
use rand::distr::weighted::WeightedIndex;
2+
use rand::{prelude::*, rng};
33
use std::cell::RefCell;
44
use std::cmp::{min, Ordering};
55
use std::collections::BinaryHeap;
@@ -397,7 +397,7 @@ impl<'a> Lattice<'a> {
397397
}
398398
}
399399

400-
let mut rng = thread_rng();
400+
let mut rng = rng();
401401
let mut results: Vec<NodeRef> = vec![];
402402
let mut probs: Vec<f64> = vec![];
403403
let mut z = alpha[self.eos_node().borrow().node_id];

0 commit comments

Comments
 (0)