From f9d4319223e3a6d9cd0ad9ffd22cec6dafc6be36 Mon Sep 17 00:00:00 2001 From: Ishita Date: Mon, 6 Dec 2021 18:21:59 +0530 Subject: [PATCH 1/3] Added handler and requirements --- examples/bart_style_nli/handler.py | 136 +++++++++++++++++++++++ examples/bart_style_nli/requirements.txt | 3 + 2 files changed, 139 insertions(+) create mode 100644 examples/bart_style_nli/handler.py create mode 100644 examples/bart_style_nli/requirements.txt diff --git a/examples/bart_style_nli/handler.py b/examples/bart_style_nli/handler.py new file mode 100644 index 0000000..d25c119 --- /dev/null +++ b/examples/bart_style_nli/handler.py @@ -0,0 +1,136 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +""" +Instructions: +Please work through this file to construct your handler. Here are things +to watch out for: +- TODO blocks: you need to fill or modify these according to the instructions. + The code in these blocks are for demo purpose only and they may not work. +- NOTE inline comments: remember to follow these instructions to pass the test. +""" +import os +import sys + +import torch +import torch.nn.functional as F + +from dynalab.handler.base_handler import BaseDynaHandler +from dynalab.tasks.task_io import TaskIO, ROOTPATH + + +# NOTE: use the following line to import modules from your repo +sys.path.append("/home/model-server/code") +# import MyModel +from transformers import ( + AutoConfig, + AutoModelForSequenceClassification, + AutoTokenizer, +) + + +class Handler(BaseDynaHandler): + + def initialize(self, context): + """ + load model and extra files + """ + self.taskIO = TaskIO("nli") + model_pt_path, extra_file_dir, device = self._handler_initialize(context) + + # ############TODO 1: Initialize model ############ + """ + Load model and read relevant files here. + Your extra files can be read from os.path.join(extra_file_dir, file_name). + """ + + # self.model = MyModel(config) + config = AutoConfig.from_pretrained('.') + self.model = AutoModelForSequenceClassification.from_pretrained( + '.', config=config + ) + self.tokenizer = AutoTokenizer.from_pretrained('.') + self.model.to(device) + self.model.eval() + # ################################################# + + self.initialized = True + + def preprocess(self, data): + """ + preprocess data into a format that the model can do inference on + """ + example = self._read_data(data) + + # ############TODO 2: preprocess data ############# + """ + You can extract the key and values from the input data like below + example is a always json object. Yo can see what an example looks like by + ``` + dynalab.tasks.nli.TaskIO().get_input_json() + ``` + """ + args = (example["context"], example["hypothesis"]) + input_data = self.tokenizer(*args, max_length=1024, return_tensors="pt") + + return input_data + + def inference(self, input_data): + """ + do inference on the processed example + """ + + # ############TODO 3: inference ################### + """ + Run model prediction using the processed data + """ + with torch.no_grad(): + inference_output = self.model(**input_data) + inference_output = F.softmax(inference_output[0]).squeeze() + # ################################################# + + return inference_output + + def postprocess(self, inference_output, data): + """ + post process inference output into a response. + response should be a single element list of a json + the response format will need to pass the validation in + ``` + dynalab.tasks.nli.TaskIO().verify_response(response) + ``` + """ + response = dict() + example = self._read_data(data) + # ############TODO 4: postprocess response ######## + """ + Add attributes to response + """ + response["id"] = example["uid"] + response["label"] = {0: "entailed", 1: "contradictory", 2: "neutral"}[int(inference_output.argmax())] + response["prob"] = {"entailed": float(inference_output[0]), "contradictory": float(inference_output[1]), "neutral": float(inference_output[2])} + # ################################################# + self.taskIO.sign_response(response, example) + return [response] + + +_service = Handler() + + +def handle(data, context): + if not _service.initialized: + _service.initialize(context) + if data is None: + return None + + # ############TODO 5: assemble inference pipeline ##### + """ + Normally you don't need to change anything in this block. + However, if you do need to change this part (e.g. function name, argument, etc.), + remember to make corresponding changes in the Handler class definition. + """ + input_data = _service.preprocess(data) + output = _service.inference(input_data) + response = _service.postprocess(output, data) + # ##################################################### + + return response diff --git a/examples/bart_style_nli/requirements.txt b/examples/bart_style_nli/requirements.txt new file mode 100644 index 0000000..5c5b5a5 --- /dev/null +++ b/examples/bart_style_nli/requirements.txt @@ -0,0 +1,3 @@ +transformers +sentencepiece +protobuf \ No newline at end of file From 55b3abfd3aecec3b4e601c8cd45be8a8ae366fd7 Mon Sep 17 00:00:00 2001 From: Ishita Date: Mon, 6 Dec 2021 20:19:07 +0530 Subject: [PATCH 2/3] Adding Readme and Initializer files --- examples/bart_style_nli/README.md | 38 ++++++++++++++++++++++++++ examples/bart_style_nli/initializer.py | 8 ++++++ 2 files changed, 46 insertions(+) create mode 100644 examples/bart_style_nli/README.md create mode 100644 examples/bart_style_nli/initializer.py diff --git a/examples/bart_style_nli/README.md b/examples/bart_style_nli/README.md new file mode 100644 index 0000000..7eab20d --- /dev/null +++ b/examples/bart_style_nli/README.md @@ -0,0 +1,38 @@ +This directory contains an example handler.py for a Huggingface BERT-style NLI +model. + +***How to customize initializer:*** +1. Since we are using BartModel specifically [this one](https://huggingface.co/facebook/bart-large-mnli/tree/main), we imported AutoModelForSequenceClassification and AutoTokenizer from transformers + +2. Change *facebook/bart-large-mnli* in `model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')` to be the name of your chosen model from HuggingFace, do the same for `tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')` + +***Here are the following steps to use this handler:*** + +1. Run `intializer.py` and it woould essentially download all the required files namely `pytorch_model.bin`, `config.json`, `tokenizer.json`, `vocab.json` and `tokenizer_config.json` into this folder (`dynalab/examples/bart_style_nli`) + +2. Make a new file `requirements.txt` and add: +``` +transformers +sentencepiece +protobuf +``` + +3. Run `dynalab-cli init -n bartstylenli`, and provide these answers: + +``` +Initializing . for dynalab model 'bartstylenli'... +Please choose a valid task name from one of [nli, qa, sentiment, hs, vqa, flores_small1, flores_small2, flores_full]: nli +Checkpoint file ./checkpoint.pt not a valid path. Please re-specify path to checkpoint file inside the root dir: pytorch_model.bin +Handler file found at ./handler.py. Press enter, or specify alternative path [./handler.py]: +Requirements file found. Do you want us to install dependencies using ./requirements.txt? [Y/n] Y +``` + +4. Run `dynalab-cli init -n bartstylenli --amend` and fill `model_files` with `["vocab.json", "tokenizer_config.json", "config.json", "special_tokens_map.json"]`. + +5. Run tests: +``` +dynalab-cli test --local -n bartstylenli # this should pass +dynalab-cli test -n bartstylenli # this should pass (try increasing Docker storage size and memory if it does not) +``` + +6. Login into Dynabench via `dynalab-cli login` and then submit the model via `dynalab-cli upload -n bartstylenli` \ No newline at end of file diff --git a/examples/bart_style_nli/initializer.py b/examples/bart_style_nli/initializer.py new file mode 100644 index 0000000..ab75bb5 --- /dev/null +++ b/examples/bart_style_nli/initializer.py @@ -0,0 +1,8 @@ +# We are using BART Model, please change tokenizer to the specific model used for example BertModel, RobertaModel etc. + +from transformers import BartTokenizer, BartModel + +model = BartModel.from_pretrained('facebook/bart-base') +tokenizer = BartTokenizer.from_pretrained('facebook/bart-base') +model.save_pretrained(".") +tokenizer.save_pretrained(".") From 6e8e405bf1345137ec45baf9a5fccb14bd73813b Mon Sep 17 00:00:00 2001 From: Ishita Date: Mon, 6 Dec 2021 20:30:42 +0530 Subject: [PATCH 3/3] Adding Readme and Initializer files --- examples/bart_style_nli/README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/bart_style_nli/README.md b/examples/bart_style_nli/README.md index 7eab20d..843defb 100644 --- a/examples/bart_style_nli/README.md +++ b/examples/bart_style_nli/README.md @@ -1,14 +1,13 @@ -This directory contains an example handler.py for a Huggingface BERT-style NLI -model. +This directory contains an example handler.py for a Huggingface BART-style NLI model. ***How to customize initializer:*** -1. Since we are using BartModel specifically [this one](https://huggingface.co/facebook/bart-large-mnli/tree/main), we imported AutoModelForSequenceClassification and AutoTokenizer from transformers +1. Since we are using BartModel specifically [this one](https://huggingface.co/facebook/bart-base/tree/main), we imported BartTokenizer, BartModel, if you are using BART istead you can import BertTokenizer, BertModel. -2. Change *facebook/bart-large-mnli* in `model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')` to be the name of your chosen model from HuggingFace, do the same for `tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')` +2. Change *bart-base* in `model = BartModel.from_pretrained('facebook/bart-base')` to be the name of your chosen model from HuggingFace, do the same for `tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')` ***Here are the following steps to use this handler:*** -1. Run `intializer.py` and it woould essentially download all the required files namely `pytorch_model.bin`, `config.json`, `tokenizer.json`, `vocab.json` and `tokenizer_config.json` into this folder (`dynalab/examples/bart_style_nli`) +1. Run `intializer.py` and it would essentially download all the required files namely `pytorch_model.bin`, `config.json`, `special_tokens_map.json`, `vocab.json` and `tokenizer_config.json` into this folder (`dynalab/examples/bart_style_nli`) 2. Make a new file `requirements.txt` and add: ```