Skip to content

Commit 3a6504d

Browse files
authored
Upgrade to PyO3 0.23 (#1708)
* Upgrade to PyO3 0.23 * Macos-12 deprecated? * Clippy. * Clippy auto ellision.
1 parent 555d44c commit 3a6504d

File tree

19 files changed

+283
-154
lines changed

19 files changed

+283
-154
lines changed

.github/workflows/CI.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ jobs:
117117
strategy:
118118
matrix:
119119
platform:
120-
- runner: macos-12
120+
- runner: macos-13
121121
target: x86_64
122122
- runner: macos-14
123123
target: aarch64

bindings/python/Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@ serde = { version = "1.0", features = ["rc", "derive"] }
1414
serde_json = "1.0"
1515
libc = "0.2"
1616
env_logger = "0.11"
17-
pyo3 = { version = "0.22", features = ["abi3", "abi3-py39"] }
18-
numpy = "0.22"
19-
ndarray = "0.15"
17+
pyo3 = { version = "0.23", features = ["abi3", "abi3-py39"] }
18+
numpy = "0.23"
19+
ndarray = "0.16"
2020
itertools = "0.12"
2121

2222
[dependencies.tokenizers]
2323
path = "../../tokenizers"
2424

2525
[dev-dependencies]
2626
tempfile = "3.10"
27-
pyo3 = { version = "0.22", features = ["auto-initialize"] }
27+
pyo3 = { version = "0.23", features = ["auto-initialize"] }
2828

2929
[features]
3030
defaut = ["pyo3/extension-module"]

bindings/python/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ dynamic = [
2525
'description',
2626
'license',
2727
'readme',
28+
'version',
2829
]
2930
dependencies = ["huggingface_hub>=0.16.4,<1.0"]
3031

bindings/python/src/decoders.rs

Lines changed: 52 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -43,22 +43,48 @@ impl PyDecoder {
4343
pub(crate) fn get_as_subtype(&self, py: Python<'_>) -> PyResult<PyObject> {
4444
let base = self.clone();
4545
Ok(match &self.decoder {
46-
PyDecoderWrapper::Custom(_) => Py::new(py, base)?.into_py(py),
46+
PyDecoderWrapper::Custom(_) => Py::new(py, base)?.into_pyobject(py)?.into_any().into(),
4747
PyDecoderWrapper::Wrapped(inner) => match &*inner.as_ref().read().unwrap() {
48-
DecoderWrapper::Metaspace(_) => Py::new(py, (PyMetaspaceDec {}, base))?.into_py(py),
49-
DecoderWrapper::WordPiece(_) => Py::new(py, (PyWordPieceDec {}, base))?.into_py(py),
50-
DecoderWrapper::ByteFallback(_) => {
51-
Py::new(py, (PyByteFallbackDec {}, base))?.into_py(py)
52-
}
53-
DecoderWrapper::Strip(_) => Py::new(py, (PyStrip {}, base))?.into_py(py),
54-
DecoderWrapper::Fuse(_) => Py::new(py, (PyFuseDec {}, base))?.into_py(py),
55-
DecoderWrapper::ByteLevel(_) => Py::new(py, (PyByteLevelDec {}, base))?.into_py(py),
56-
DecoderWrapper::Replace(_) => Py::new(py, (PyReplaceDec {}, base))?.into_py(py),
57-
DecoderWrapper::BPE(_) => Py::new(py, (PyBPEDecoder {}, base))?.into_py(py),
58-
DecoderWrapper::CTC(_) => Py::new(py, (PyCTCDecoder {}, base))?.into_py(py),
59-
DecoderWrapper::Sequence(_) => {
60-
Py::new(py, (PySequenceDecoder {}, base))?.into_py(py)
61-
}
48+
DecoderWrapper::Metaspace(_) => Py::new(py, (PyMetaspaceDec {}, base))?
49+
.into_pyobject(py)?
50+
.into_any()
51+
.into(),
52+
DecoderWrapper::WordPiece(_) => Py::new(py, (PyWordPieceDec {}, base))?
53+
.into_pyobject(py)?
54+
.into_any()
55+
.into(),
56+
DecoderWrapper::ByteFallback(_) => Py::new(py, (PyByteFallbackDec {}, base))?
57+
.into_pyobject(py)?
58+
.into_any()
59+
.into(),
60+
DecoderWrapper::Strip(_) => Py::new(py, (PyStrip {}, base))?
61+
.into_pyobject(py)?
62+
.into_any()
63+
.into(),
64+
DecoderWrapper::Fuse(_) => Py::new(py, (PyFuseDec {}, base))?
65+
.into_pyobject(py)?
66+
.into_any()
67+
.into(),
68+
DecoderWrapper::ByteLevel(_) => Py::new(py, (PyByteLevelDec {}, base))?
69+
.into_pyobject(py)?
70+
.into_any()
71+
.into(),
72+
DecoderWrapper::Replace(_) => Py::new(py, (PyReplaceDec {}, base))?
73+
.into_pyobject(py)?
74+
.into_any()
75+
.into(),
76+
DecoderWrapper::BPE(_) => Py::new(py, (PyBPEDecoder {}, base))?
77+
.into_pyobject(py)?
78+
.into_any()
79+
.into(),
80+
DecoderWrapper::CTC(_) => Py::new(py, (PyCTCDecoder {}, base))?
81+
.into_pyobject(py)?
82+
.into_any()
83+
.into(),
84+
DecoderWrapper::Sequence(_) => Py::new(py, (PySequenceDecoder {}, base))?
85+
.into_pyobject(py)?
86+
.into_any()
87+
.into(),
6288
},
6389
})
6490
}
@@ -85,7 +111,7 @@ impl PyDecoder {
85111
e
86112
))
87113
})?;
88-
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
114+
Ok(PyBytes::new(py, data.as_bytes()).into())
89115
}
90116

91117
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@@ -484,8 +510,8 @@ impl PySequenceDecoder {
484510
Ok((PySequenceDecoder {}, Sequence::new(decoders).into()))
485511
}
486512

487-
fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
488-
PyTuple::new_bound(py, [PyList::empty_bound(py)])
513+
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<Bound<'p, PyTuple>> {
514+
PyTuple::new(py, [PyList::empty(py)])
489515
}
490516
}
491517

@@ -504,7 +530,7 @@ impl Decoder for CustomDecoder {
504530
Python::with_gil(|py| {
505531
let decoded = self
506532
.inner
507-
.call_method_bound(py, "decode", (tokens,), None)?
533+
.call_method(py, "decode", (tokens,), None)?
508534
.extract(py)?;
509535
Ok(decoded)
510536
})
@@ -514,7 +540,7 @@ impl Decoder for CustomDecoder {
514540
Python::with_gil(|py| {
515541
let decoded = self
516542
.inner
517-
.call_method_bound(py, "decode_chain", (tokens,), None)?
543+
.call_method(py, "decode_chain", (tokens,), None)?
518544
.extract(py)?;
519545
Ok(decoded)
520546
})
@@ -693,7 +719,12 @@ mod test {
693719

694720
let obj = Python::with_gil(|py| {
695721
let py_msp = PyDecoder::new(Metaspace::default().into());
696-
let obj: PyObject = Py::new(py, py_msp).unwrap().into_py(py);
722+
let obj: PyObject = Py::new(py, py_msp)
723+
.unwrap()
724+
.into_pyobject(py)
725+
.unwrap()
726+
.into_any()
727+
.into();
697728
obj
698729
});
699730
let py_seq = PyDecoderWrapper::Custom(Arc::new(RwLock::new(CustomDecoder::new(obj))));

bindings/python/src/encoding.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ impl PyEncoding {
3737
e
3838
))
3939
})?;
40-
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
40+
Ok(PyBytes::new(py, data.as_bytes()).into())
4141
}
4242

4343
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {

bindings/python/src/error.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use pyo3::exceptions;
22
use pyo3::prelude::*;
33
use pyo3::type_object::PyTypeInfo;
4+
use std::ffi::CString;
45
use std::fmt::{Display, Formatter, Result as FmtResult};
56
use tokenizers::tokenizer::Result;
67

@@ -35,7 +36,7 @@ impl<T> ToPyResult<T> {
3536
}
3637

3738
pub(crate) fn deprecation_warning(py: Python<'_>, version: &str, message: &str) -> PyResult<()> {
38-
let deprecation_warning = py.import_bound("builtins")?.getattr("DeprecationWarning")?;
39+
let deprecation_warning = py.import("builtins")?.getattr("DeprecationWarning")?;
3940
let full_message = format!("Deprecated in {}: {}", version, message);
40-
pyo3::PyErr::warn_bound(py, &deprecation_warning, &full_message, 0)
41+
pyo3::PyErr::warn(py, &deprecation_warning, &CString::new(full_message)?, 0)
4142
}

bindings/python/src/models.rs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,22 @@ impl PyModel {
3535
pub(crate) fn get_as_subtype(&self, py: Python<'_>) -> PyResult<PyObject> {
3636
let base = self.clone();
3737
Ok(match *self.model.as_ref().read().unwrap() {
38-
ModelWrapper::BPE(_) => Py::new(py, (PyBPE {}, base))?.into_py(py),
39-
ModelWrapper::WordPiece(_) => Py::new(py, (PyWordPiece {}, base))?.into_py(py),
40-
ModelWrapper::WordLevel(_) => Py::new(py, (PyWordLevel {}, base))?.into_py(py),
41-
ModelWrapper::Unigram(_) => Py::new(py, (PyUnigram {}, base))?.into_py(py),
38+
ModelWrapper::BPE(_) => Py::new(py, (PyBPE {}, base))?
39+
.into_pyobject(py)?
40+
.into_any()
41+
.into(),
42+
ModelWrapper::WordPiece(_) => Py::new(py, (PyWordPiece {}, base))?
43+
.into_pyobject(py)?
44+
.into_any()
45+
.into(),
46+
ModelWrapper::WordLevel(_) => Py::new(py, (PyWordLevel {}, base))?
47+
.into_pyobject(py)?
48+
.into_any()
49+
.into(),
50+
ModelWrapper::Unigram(_) => Py::new(py, (PyUnigram {}, base))?
51+
.into_pyobject(py)?
52+
.into_any()
53+
.into(),
4254
})
4355
}
4456
}
@@ -105,7 +117,7 @@ impl PyModel {
105117
e
106118
))
107119
})?;
108-
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
120+
Ok(PyBytes::new(py, data.as_bytes()).into())
109121
}
110122

111123
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {

bindings/python/src/normalizers.rs

Lines changed: 66 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -54,38 +54,73 @@ impl PyNormalizer {
5454
pub(crate) fn get_as_subtype(&self, py: Python<'_>) -> PyResult<PyObject> {
5555
let base = self.clone();
5656
Ok(match self.normalizer {
57-
PyNormalizerTypeWrapper::Sequence(_) => Py::new(py, (PySequence {}, base))?.into_py(py),
57+
PyNormalizerTypeWrapper::Sequence(_) => Py::new(py, (PySequence {}, base))?
58+
.into_pyobject(py)?
59+
.into_any()
60+
.into(),
5861
PyNormalizerTypeWrapper::Single(ref inner) => match &*inner.as_ref().read().unwrap() {
59-
PyNormalizerWrapper::Custom(_) => Py::new(py, base)?.into_py(py),
62+
PyNormalizerWrapper::Custom(_) => {
63+
Py::new(py, base)?.into_pyobject(py)?.into_any().into()
64+
}
6065
PyNormalizerWrapper::Wrapped(ref inner) => match inner {
61-
NormalizerWrapper::Sequence(_) => {
62-
Py::new(py, (PySequence {}, base))?.into_py(py)
63-
}
66+
NormalizerWrapper::Sequence(_) => Py::new(py, (PySequence {}, base))?
67+
.into_pyobject(py)?
68+
.into_any()
69+
.into(),
6470
NormalizerWrapper::BertNormalizer(_) => {
65-
Py::new(py, (PyBertNormalizer {}, base))?.into_py(py)
66-
}
67-
NormalizerWrapper::StripNormalizer(_) => {
68-
Py::new(py, (PyStrip {}, base))?.into_py(py)
69-
}
70-
NormalizerWrapper::Prepend(_) => Py::new(py, (PyPrepend {}, base))?.into_py(py),
71-
NormalizerWrapper::ByteLevel(_) => {
72-
Py::new(py, (PyByteLevel {}, base))?.into_py(py)
73-
}
74-
NormalizerWrapper::StripAccents(_) => {
75-
Py::new(py, (PyStripAccents {}, base))?.into_py(py)
76-
}
77-
NormalizerWrapper::NFC(_) => Py::new(py, (PyNFC {}, base))?.into_py(py),
78-
NormalizerWrapper::NFD(_) => Py::new(py, (PyNFD {}, base))?.into_py(py),
79-
NormalizerWrapper::NFKC(_) => Py::new(py, (PyNFKC {}, base))?.into_py(py),
80-
NormalizerWrapper::NFKD(_) => Py::new(py, (PyNFKD {}, base))?.into_py(py),
81-
NormalizerWrapper::Lowercase(_) => {
82-
Py::new(py, (PyLowercase {}, base))?.into_py(py)
83-
}
84-
NormalizerWrapper::Precompiled(_) => {
85-
Py::new(py, (PyPrecompiled {}, base))?.into_py(py)
71+
Py::new(py, (PyBertNormalizer {}, base))?
72+
.into_pyobject(py)?
73+
.into_any()
74+
.into()
8675
}
87-
NormalizerWrapper::Replace(_) => Py::new(py, (PyReplace {}, base))?.into_py(py),
88-
NormalizerWrapper::Nmt(_) => Py::new(py, (PyNmt {}, base))?.into_py(py),
76+
NormalizerWrapper::StripNormalizer(_) => Py::new(py, (PyStrip {}, base))?
77+
.into_pyobject(py)?
78+
.into_any()
79+
.into(),
80+
NormalizerWrapper::Prepend(_) => Py::new(py, (PyPrepend {}, base))?
81+
.into_pyobject(py)?
82+
.into_any()
83+
.into(),
84+
NormalizerWrapper::ByteLevel(_) => Py::new(py, (PyByteLevel {}, base))?
85+
.into_pyobject(py)?
86+
.into_any()
87+
.into(),
88+
NormalizerWrapper::StripAccents(_) => Py::new(py, (PyStripAccents {}, base))?
89+
.into_pyobject(py)?
90+
.into_any()
91+
.into(),
92+
NormalizerWrapper::NFC(_) => Py::new(py, (PyNFC {}, base))?
93+
.into_pyobject(py)?
94+
.into_any()
95+
.into(),
96+
NormalizerWrapper::NFD(_) => Py::new(py, (PyNFD {}, base))?
97+
.into_pyobject(py)?
98+
.into_any()
99+
.into(),
100+
NormalizerWrapper::NFKC(_) => Py::new(py, (PyNFKC {}, base))?
101+
.into_pyobject(py)?
102+
.into_any()
103+
.into(),
104+
NormalizerWrapper::NFKD(_) => Py::new(py, (PyNFKD {}, base))?
105+
.into_pyobject(py)?
106+
.into_any()
107+
.into(),
108+
NormalizerWrapper::Lowercase(_) => Py::new(py, (PyLowercase {}, base))?
109+
.into_pyobject(py)?
110+
.into_any()
111+
.into(),
112+
NormalizerWrapper::Precompiled(_) => Py::new(py, (PyPrecompiled {}, base))?
113+
.into_pyobject(py)?
114+
.into_any()
115+
.into(),
116+
NormalizerWrapper::Replace(_) => Py::new(py, (PyReplace {}, base))?
117+
.into_pyobject(py)?
118+
.into_any()
119+
.into(),
120+
NormalizerWrapper::Nmt(_) => Py::new(py, (PyNmt {}, base))?
121+
.into_pyobject(py)?
122+
.into_any()
123+
.into(),
89124
},
90125
},
91126
})
@@ -114,7 +149,7 @@ impl PyNormalizer {
114149
e
115150
))
116151
})?;
117-
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
152+
Ok(PyBytes::new(py, data.as_bytes()).into())
118153
}
119154

120155
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@@ -371,8 +406,8 @@ impl PySequence {
371406
))
372407
}
373408

374-
fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
375-
PyTuple::new_bound(py, [PyList::empty_bound(py)])
409+
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<Bound<'p, PyTuple>> {
410+
PyTuple::new(py, [PyList::empty(py)])
376411
}
377412

378413
fn __len__(&self) -> usize {

0 commit comments

Comments
 (0)