From 6a55a8afdfec911d759e1ba8cefd72ab9bf912d0 Mon Sep 17 00:00:00 2001 From: urchade Date: Fri, 9 May 2025 18:56:55 +0400 Subject: [PATCH] add load from config --- gliner/__init__.py | 2 +- gliner/model.py | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/gliner/__init__.py b/gliner/__init__.py index 3523a6d..40dc326 100644 --- a/gliner/__init__.py +++ b/gliner/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.2.19" +__version__ = "0.2.20" from .model import GLiNER from .config import GLiNERConfig diff --git a/gliner/model.py b/gliner/model.py index 40904b1..73d5c47 100644 --- a/gliner/model.py +++ b/gliner/model.py @@ -859,3 +859,50 @@ def _from_pretrained( new_num_tokens, None ) return gliner + + @staticmethod + def load_from_config(gliner_config: GLiNERConfig): + # Initialize tokenizer + tokenizer = AutoTokenizer.from_pretrained( + gliner_config.model_name, + model_max_length=gliner_config.max_len + ) + + # Add special tokens and update config + gliner_config.class_token_index = len(tokenizer) + tokenizer.add_tokens([ + gliner_config.ent_token, + gliner_config.sep_token + ]) + gliner_config.vocab_size = len(tokenizer) + + # Select appropriate processor + words_splitter = WordsSplitter() + if gliner_config.span_mode == "token_level": + data_processor = TokenProcessor( + gliner_config, + tokenizer, + words_splitter, + preprocess_text=True + ) + else: + data_processor = SpanProcessor( + gliner_config, + tokenizer, + words_splitter, + preprocess_text=True + ) + + # Instantiate model and apply token resizing + model = GLiNER( + gliner_config, + data_processor=data_processor + ) + + model.resize_token_embeddings( + [gliner_config.ent_token, gliner_config.sep_token], + set_class_token_index=False, + add_tokens_to_tokenizer=False + ) + + return model \ No newline at end of file