Skip to content

Commit b0a0127

Browse files
authored
Merge pull request #257 from urchade/load_from_config
add load from config
2 parents c15e090 + 6a55a8a commit b0a0127

File tree

2 files changed

+48
-1
lines changed

2 files changed

+48
-1
lines changed

gliner/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.2.19"
1+
__version__ = "0.2.20"
22

33
from .model import GLiNER
44
from .config import GLiNERConfig

gliner/model.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,3 +860,50 @@ def _from_pretrained(
860860
new_num_tokens, None
861861
)
862862
return gliner
863+
864+
@staticmethod
865+
def load_from_config(gliner_config: GLiNERConfig):
866+
# Initialize tokenizer
867+
tokenizer = AutoTokenizer.from_pretrained(
868+
gliner_config.model_name,
869+
model_max_length=gliner_config.max_len
870+
)
871+
872+
# Add special tokens and update config
873+
gliner_config.class_token_index = len(tokenizer)
874+
tokenizer.add_tokens([
875+
gliner_config.ent_token,
876+
gliner_config.sep_token
877+
])
878+
gliner_config.vocab_size = len(tokenizer)
879+
880+
# Select appropriate processor
881+
words_splitter = WordsSplitter()
882+
if gliner_config.span_mode == "token_level":
883+
data_processor = TokenProcessor(
884+
gliner_config,
885+
tokenizer,
886+
words_splitter,
887+
preprocess_text=True
888+
)
889+
else:
890+
data_processor = SpanProcessor(
891+
gliner_config,
892+
tokenizer,
893+
words_splitter,
894+
preprocess_text=True
895+
)
896+
897+
# Instantiate model and apply token resizing
898+
model = GLiNER(
899+
gliner_config,
900+
data_processor=data_processor
901+
)
902+
903+
model.resize_token_embeddings(
904+
[gliner_config.ent_token, gliner_config.sep_token],
905+
set_class_token_index=False,
906+
add_tokens_to_tokenizer=False
907+
)
908+
909+
return model

0 commit comments

Comments
 (0)