huggingface · ArthurZucker · Oct 1, 2024 · Oct 1, 2024 · Oct 1, 2024 · Oct 1, 2024
diff --git a/.github/workflows/docs-check.yml b/.github/workflows/docs-check.yml
@@ -35,7 +35,7 @@ jobs:
         run: make clean && make html_all O="-W --keep-going"
 
       - name: Upload built doc
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: documentation
           path: ./docs/build/*
diff --git a/bindings/python/examples/using_the_visualizer.ipynb b/bindings/python/examples/using_the_visualizer.ipynb
@@ -35,7 +35,7 @@
    "outputs": [],
    "source": [
     "from tokenizers import BertWordPieceTokenizer\n",
-    "from tokenizers.tools import EncodingVisualizer\n"
+    "from tokenizers.tools import EncodingVisualizer"
    ]
   },
   {
@@ -305,7 +305,7 @@
     "anno2 = Annotation(start=2, end=4, label=\"bar\")\n",
     "anno3 = Annotation(start=6, end=8, label=\"poo\")\n",
     "anno4 = Annotation(start=9, end=12, label=\"shoe\")\n",
-    "annotations=[\n",
+    "annotations = [\n",
     "    anno1,\n",
     "    anno2,\n",
     "    anno3,\n",
@@ -315,8 +315,7 @@
     "    Annotation(start=80, end=95, label=\"bar\"),\n",
     "    Annotation(start=120, end=128, label=\"bar\"),\n",
     "    Annotation(start=152, end=155, label=\"poo\"),\n",
-    "]\n",
-    "\n"
+    "]"
    ]
   },
   {
@@ -521,7 +520,7 @@
     }
    ],
    "source": [
-    "visualizer(text,annotations=annotations)"
+    "visualizer(text, annotations=annotations)"
    ]
   },
   {
@@ -553,7 +552,7 @@
     }
    ],
    "source": [
-    "funnyAnnotations = [dict(startPlace=i,endPlace=i+3,theTag=str(i)) for i in range(0,20,4)]\n",
+    "funnyAnnotations = [dict(startPlace=i, endPlace=i + 3, theTag=str(i)) for i in range(0, 20, 4)]\n",
     "funnyAnnotations"
    ]
   },
@@ -563,7 +562,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "converter = lambda funny: Annotation(start=funny['startPlace'], end=funny['endPlace'], label=funny['theTag'])\n",
+    "def converter(funny):\n",
+    "    return Annotation(start=funny[\"startPlace\"], end=funny[\"endPlace\"], label=funny[\"theTag\"])\n",
+    "\n",
+    "\n",
     "visualizer = EncodingVisualizer(tokenizer=tokenizer, default_to_notebook=True, annotation_converter=converter)"
    ]
   },
@@ -817,7 +819,7 @@
    ],
    "source": [
     "!wget \"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json\" -O /tmp/roberta-base-vocab.json\n",
-    "!wget \"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt\" -O /tmp/roberta-base-merges.txt\n"
+    "!wget \"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt\" -O /tmp/roberta-base-merges.txt"
    ]
   },
   {
@@ -1023,7 +1025,8 @@
    ],
    "source": [
     "from tokenizers import ByteLevelBPETokenizer\n",
-    "roberta_tokenizer = ByteLevelBPETokenizer.from_file('/tmp/roberta-base-vocab.json', '/tmp/roberta-base-merges.txt')\n",
+    "\n",
+    "roberta_tokenizer = ByteLevelBPETokenizer.from_file(\"/tmp/roberta-base-vocab.json\", \"/tmp/roberta-base-merges.txt\")\n",
     "roberta_visualizer = EncodingVisualizer(tokenizer=roberta_tokenizer, default_to_notebook=True)\n",
     "roberta_visualizer(text, annotations=annotations)"
    ]