Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/native_s3_huggingface.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
- name: Install Environment
run: |
yum -y update
yum -y install centos-release-scl-rh epel-release
yum -y install centos-release-scl-rh epel-release perl-core
yum -y install devtoolset-7 git patch cmake3 libstdc++-static
ln -s /usr/bin/cmake3 /usr/bin/cmake
curl https://sh.rustup.rs -sSf | sh -s -- -y
Expand Down Expand Up @@ -184,7 +184,7 @@ jobs:
- name: Install Environment
run: |
yum -y update
yum -y install centos-release-scl-rh epel-release
yum -y install centos-release-scl-rh epel-release perl-core
yum -y install devtoolset-7 git patch cmake3 libstdc++-static
ln -s /usr/bin/cmake3 /usr/bin/cmake
curl https://sh.rustup.rs -sSf | sh -s -- -y
Expand Down
2 changes: 1 addition & 1 deletion extensions/tokenizers/build.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
@rem choco install rust -y
@rem choco install jdk8 -y

set VERSION=python-v"%1"
set VERSION=v"%1"

if exist "tokenizers" (
echo Found "tokenizers"
Expand Down
2 changes: 1 addition & 1 deletion extensions/tokenizers/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ elif [[ -n $(command -v sysctl) ]]; then
fi
PLATFORM=$(uname | tr '[:upper:]' '[:lower:]')

VERSION=python-v$1
VERSION=v$1
ARCH=$2

pushd $WORK_DIR
Expand Down
2 changes: 1 addition & 1 deletion extensions/tokenizers/rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2018"

[dependencies]
jni = "0.19.0"
tokenizers = { path = "../tokenizers/tokenizers", version = "*" }
tokenizers = { path = "../tokenizers/tokenizers", version = "*", features = ["http"] }

[target.'cfg(target_os = "linux")'.dependencies]
openssl = { version = "0.10", features = ["vendored"] }
Expand Down
8 changes: 6 additions & 2 deletions extensions/tokenizers/rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_
}
}
let decoding: String = tokenizer
.decode(decode_ids, skip_special_tokens == JNI_TRUE)
.decode(&*decode_ids, skip_special_tokens == JNI_TRUE)
.unwrap();
let ret = env
.new_string(decoding)
Expand Down Expand Up @@ -527,8 +527,12 @@ pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_
}
batch_decode_input.push(decode_ids);
}
let mut references: Vec<&[u32]> = Vec::new();
for reference in batch_decode_input.iter() {
references.push(reference);
}
let decoding: Vec<String> = tokenizer
.decode_batch(batch_decode_input, skip_special_tokens == JNI_TRUE)
.decode_batch(&references, skip_special_tokens == JNI_TRUE)
.unwrap();
let ret: jobjectArray = env
.new_object_array(batch_len, "java/lang/String", JObject::null())
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ trt_version=8.4.1
onnxruntime_version=1.16.0
paddlepaddle_version=2.3.2
sentencepiece_version=0.1.97
tokenizers_version=0.13.3
tokenizers_version=0.14.1
fasttext_version=0.9.2
xgboost_version=1.7.5
lightgbm_version=3.2.110
Expand Down