Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gliner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.2.19"
__version__ = "0.2.20"

from .model import GLiNER
from .config import GLiNERConfig
Expand Down
47 changes: 47 additions & 0 deletions gliner/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,3 +859,50 @@ def _from_pretrained(
new_num_tokens, None
)
return gliner

@staticmethod
def load_from_config(gliner_config: GLiNERConfig):
# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained(
gliner_config.model_name,
model_max_length=gliner_config.max_len
)

# Add special tokens and update config
gliner_config.class_token_index = len(tokenizer)
tokenizer.add_tokens([
gliner_config.ent_token,
gliner_config.sep_token
])
gliner_config.vocab_size = len(tokenizer)

# Select appropriate processor
words_splitter = WordsSplitter()
if gliner_config.span_mode == "token_level":
data_processor = TokenProcessor(
gliner_config,
tokenizer,
words_splitter,
preprocess_text=True
)
else:
data_processor = SpanProcessor(
gliner_config,
tokenizer,
words_splitter,
preprocess_text=True
)

# Instantiate model and apply token resizing
model = GLiNER(
gliner_config,
data_processor=data_processor
)

model.resize_token_embeddings(
[gliner_config.ent_token, gliner_config.sep_token],
set_class_token_index=False,
add_tokens_to_tokenizer=False
)

return model