datalab-to
diff --git a/‎signatures/version1/cla.json‎
Lines changed: 8 additions & 0 deletions b/‎signatures/version1/cla.json‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎surya/model/ocr_error/config.py‎
Lines changed: 66 additions & 0 deletions b/‎surya/model/ocr_error/config.py‎
Lines changed: 66 additions & 0 deletions
@@ -63,6 +63,14 @@
       "created_at": "2024-10-30T17:55:23Z",
       "repoId": 741297064,
       "pullRequestNo": 235
+    },
+    {
+      "name": "ArthurMor4is",
+      "id": 42987302,
+      "comment_id": 2515315717,
+      "created_at": "2024-12-03T18:37:45Z",
+      "repoId": 741297064,
+      "pullRequestNo": 255
     }
   ]
 }
@@ -0,0 +1,66 @@
+from collections import OrderedDict
+from typing import Mapping
+
+from transformers.configuration_utils import PretrainedConfig
+from transformers.onnx import OnnxConfig
+
+ID2LABEL = {
+    0: 'good',
+    1: 'bad'
+}
+
+class DistilBertConfig(PretrainedConfig):
+    model_type = "distilbert"
+    attribute_map = {
+        "hidden_size": "dim",
+        "num_attention_heads": "n_heads",
+        "num_hidden_layers": "n_layers",
+    }
+
+    def __init__(
+        self,
+        vocab_size=30522,
+        max_position_embeddings=512,
+        sinusoidal_pos_embds=False,
+        n_layers=6,
+        n_heads=12,
+        dim=768,
+        hidden_dim=4 * 768,
+        dropout=0.1,
+        attention_dropout=0.1,
+        activation="gelu",
+        initializer_range=0.02,
+        qa_dropout=0.1,
+        seq_classif_dropout=0.2,
+        pad_token_id=0,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.sinusoidal_pos_embds = sinusoidal_pos_embds
+        self.n_layers = n_layers
+        self.n_heads = n_heads
+        self.dim = dim
+        self.hidden_dim = hidden_dim
+        self.dropout = dropout
+        self.attention_dropout = attention_dropout
+        self.activation = activation
+        self.initializer_range = initializer_range
+        self.qa_dropout = qa_dropout
+        self.seq_classif_dropout = seq_classif_dropout
+        super().__init__(**kwargs, pad_token_id=pad_token_id)
+
+
+class DistilBertOnnxConfig(OnnxConfig):
+    @property
+    def inputs(self) -> Mapping[str, Mapping[int, str]]:
+        if self.task == "multiple-choice":
+            dynamic_axis = {0: "batch", 1: "choice", 2: "sequence"}
+        else:
+            dynamic_axis = {0: "batch", 1: "sequence"}
+        return OrderedDict(
+            [
+                ("input_ids", dynamic_axis),
+                ("attention_mask", dynamic_axis),
+            ]
+        )
Original file line number	Diff line number	Diff line change
`@@ -63,6 +63,14 @@`
`63`	`63`	`"created_at": "2024-10-30T17:55:23Z",`
`64`	`64`	`"repoId": 741297064,`
`65`	`65`	`"pullRequestNo": 235`
	`66`	`+ },`
	`67`	`+ {`
	`68`	`+ "name": "ArthurMor4is",`
	`69`	`+ "id": 42987302,`
	`70`	`+ "comment_id": 2515315717,`
	`71`	`+ "created_at": "2024-12-03T18:37:45Z",`
	`72`	`+ "repoId": 741297064,`
	`73`	`+ "pullRequestNo": 255`
`66`	`74`	`}`
`67`	`75`	`]`
`68`	`76`	`}`