facebookresearch · Ishita-0112 · Dec 6, 2021 · Dec 6, 2021 · Dec 6, 2021
diff --git a/examples/bart_style_nli/README.md b/examples/bart_style_nli/README.md
@@ -0,0 +1,37 @@
+This directory contains an example handler.py for a Huggingface BART-style NLI model. 
+
+***How to customize initializer:***
+1. Since we are using BartModel specifically [this one](https://huggingface.co/facebook/bart-base/tree/main), we imported BartTokenizer, BartModel, if you are using BART istead you can import BertTokenizer, BertModel.
+
+2. Change *bart-base* in `model = BartModel.from_pretrained('facebook/bart-base')` to be the name of your chosen model from HuggingFace, do the same for `tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')`
+
+***Here are the following steps to use this handler:***
+
+1. Run `intializer.py` and it would essentially download all the required files namely `pytorch_model.bin`, `config.json`, `special_tokens_map.json`, `vocab.json` and `tokenizer_config.json` into this folder (`dynalab/examples/bart_style_nli`)
+
+2. Make a new file `requirements.txt` and add:
+```
+transformers
+sentencepiece
+protobuf
+```
+
+3. Run `dynalab-cli init -n bartstylenli`, and provide these answers:
+
+```
+Initializing . for dynalab model 'bartstylenli'...
+Please choose a valid task name from one of [nli, qa, sentiment, hs, vqa, flores_small1, flores_small2, flores_full]: nli
+Checkpoint file ./checkpoint.pt not a valid path. Please re-specify path to checkpoint file inside the root dir: pytorch_model.bin
+Handler file found at ./handler.py. Press enter, or specify alternative path [./handler.py]: 
+Requirements file found. Do you want us to install dependencies using ./requirements.txt? [Y/n] Y
+```
+
+4. Run `dynalab-cli init -n bartstylenli --amend` and fill `model_files` with `["vocab.json", "tokenizer_config.json", "config.json", "special_tokens_map.json"]`.
+
+5. Run tests:
+```
+dynalab-cli test --local -n bartstylenli # this should pass
+dynalab-cli test -n bartstylenli # this should pass (try increasing Docker storage size and memory if it does not)
+```
+
+6. Login into Dynabench via `dynalab-cli login` and then submit the model via `dynalab-cli upload -n bartstylenli`
diff --git a/examples/bart_style_nli/handler.py b/examples/bart_style_nli/handler.py
@@ -0,0 +1,136 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+Instructions:
+Please work through this file to construct your handler. Here are things
+to watch out for:
+- TODO blocks: you need to fill or modify these according to the instructions.
+   The code in these blocks are for demo purpose only and they may not work.
+- NOTE inline comments: remember to follow these instructions to pass the test.
+"""
+import os
+import sys
+
+import torch
+import torch.nn.functional as F
+
+from dynalab.handler.base_handler import BaseDynaHandler
+from dynalab.tasks.task_io import TaskIO, ROOTPATH
+
+
+# NOTE: use the following line to import modules from your repo
+sys.path.append("/home/model-server/code")
+# import MyModel
+from transformers import (
+    AutoConfig,
+    AutoModelForSequenceClassification,
+    AutoTokenizer,
+)
+
+
+class Handler(BaseDynaHandler):
+
+    def initialize(self, context):
+        """
+        load model and extra files
+        """
+        self.taskIO = TaskIO("nli")
+        model_pt_path, extra_file_dir, device = self._handler_initialize(context)
+
+        # ############TODO 1: Initialize model ############
+        """
+        Load model and read relevant files here.
+        Your extra files can be read from os.path.join(extra_file_dir, file_name).
+        """
+
+        # self.model = MyModel(config)
+        config = AutoConfig.from_pretrained('.')
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            '.', config=config
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained('.')
+        self.model.to(device)
+        self.model.eval()
+        # #################################################
+
+        self.initialized = True
+
+    def preprocess(self, data):
+        """
+        preprocess data into a format that the model can do inference on
+        """
+        example = self._read_data(data)
+
+        # ############TODO 2: preprocess data #############
+        """
+        You can extract the key and values from the input data like below
+        example is a always json object. Yo can see what an example looks like by
+        ```
+        dynalab.tasks.nli.TaskIO().get_input_json()
+        ```
+        """
+        args = (example["context"], example["hypothesis"])
+        input_data = self.tokenizer(*args, max_length=1024, return_tensors="pt")
+
+        return input_data
+
+    def inference(self, input_data):
+        """
+        do inference on the processed example
+        """
+
+        # ############TODO 3: inference ###################
+        """
+        Run model prediction using the processed data
+        """
+        with torch.no_grad():
+            inference_output = self.model(**input_data)
+            inference_output = F.softmax(inference_output[0]).squeeze()
+        # #################################################
+
+        return inference_output
+
+    def postprocess(self, inference_output, data):
+        """
+        post process inference output into a response.
+        response should be a single element list of a json
+        the response format will need to pass the validation in
+        ```
+        dynalab.tasks.nli.TaskIO().verify_response(response)
+        ```
+        """
+        response = dict()
+        example = self._read_data(data)
+        # ############TODO 4: postprocess response ########
+        """
+        Add attributes to response
+        """
+        response["id"] = example["uid"]
+        response["label"] = {0: "entailed", 1: "contradictory", 2: "neutral"}[int(inference_output.argmax())]
+        response["prob"] = {"entailed": float(inference_output[0]), "contradictory": float(inference_output[1]), "neutral": float(inference_output[2])}
+        # #################################################
+        self.taskIO.sign_response(response, example)
+        return [response]
+
+
+_service = Handler()
+
+
+def handle(data, context):
+    if not _service.initialized:
+        _service.initialize(context)
+    if data is None:
+        return None
+
+    # ############TODO 5: assemble inference pipeline #####
+    """
+    Normally you don't need to change anything in this block.
+    However, if you do need to change this part (e.g. function name, argument, etc.),
+    remember to make corresponding changes in the Handler class definition.
+    """
+    input_data = _service.preprocess(data)
+    output = _service.inference(input_data)
+    response = _service.postprocess(output, data)
+    # #####################################################
+
+    return response
diff --git a/examples/bart_style_nli/initializer.py b/examples/bart_style_nli/initializer.py
@@ -0,0 +1,8 @@
+# We are using BART Model, please change tokenizer to the specific model used for example BertModel, RobertaModel etc.
+
+from transformers import BartTokenizer, BartModel
+
+model = BartModel.from_pretrained('facebook/bart-base')
+tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
+model.save_pretrained(".")
+tokenizer.save_pretrained(".")
diff --git a/examples/bart_style_nli/requirements.txt b/examples/bart_style_nli/requirements.txt
@@ -0,0 +1,3 @@
+transformers
+sentencepiece
+protobuf