AK391 · AK391 · Jan 11, 2022 · Jan 7, 2022 · Jan 8, 2022 · Jan 10, 2022
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -99,7 +99,7 @@ jobs:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
                   path: ~/transformers/reports
-    
+
     run_tests_torch_and_tf_all:
         working_directory: ~/transformers
         docker:
@@ -169,7 +169,7 @@ jobs:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
                   path: ~/transformers/reports
-    
+
     run_tests_torch_and_flax_all:
         working_directory: ~/transformers
         docker:
@@ -237,7 +237,7 @@ jobs:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
                   path: ~/transformers/reports
-    
+
     run_tests_torch_all:
         working_directory: ~/transformers
         docker:
@@ -304,7 +304,7 @@ jobs:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
                   path: ~/transformers/reports
-    
+
     run_tests_tf_all:
         working_directory: ~/transformers
         docker:
@@ -370,7 +370,7 @@ jobs:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
                   path: ~/transformers/reports
-    
+
     run_tests_flax_all:
         working_directory: ~/transformers
         docker:
@@ -437,7 +437,7 @@ jobs:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
                   path: ~/transformers/reports
-    
+
     run_tests_pipelines_torch_all:
         working_directory: ~/transformers
         docker:
@@ -549,15 +549,15 @@ jobs:
                       - v0.4-custom_tokenizers-{{ checksum "setup.py" }}
                       - v0.4-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
-            - run: pip install .[ja,testing,sentencepiece,jieba]
+            - run: pip install .[ja,testing,sentencepiece,jieba,spacy,ftfy]
             - run: python -m unidic download
             - save_cache:
                   key: v0.4-custom_tokenizers-{{ checksum "setup.py" }}
                   paths:
                       - '~/.cache/pip'
             - run: |
                   if [ -f test_list.txt ]; then
-                    python -m pytest -s --make-reports=tests_custom_tokenizers ./tests/test_tokenization_bert_japanese.py | tee tests_output.txt
+                    python -m pytest -s --make-reports=tests_custom_tokenizers ./tests/test_tokenization_bert_japanese.py ./tests/test_tokenization_openai.py | tee tests_output.txt
                   fi
             - store_artifacts:
                   path: ~/transformers/tests_output.txt
@@ -662,7 +662,7 @@ jobs:
                   path: ~/transformers/flax_examples_output.txt
             - store_artifacts:
                   path: ~/transformers/reports
-    
+
     run_examples_flax_all:
         working_directory: ~/transformers
         docker:
@@ -729,7 +729,7 @@ jobs:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
                   path: ~/transformers/reports
-    
+
     run_tests_hub_all:
         working_directory: ~/transformers
         docker:
@@ -795,7 +795,7 @@ jobs:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
                   path: ~/transformers/reports
-    
+
     run_tests_onnxruntime_all:
         working_directory: ~/transformers
         docker:

diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md
@@ -49,7 +49,7 @@ Library:
 - Deepspeed: @stas00
 - Ray/raytune: @richardliaw, @amogkam
 - Text generation: @patrickvonplaten @narsil
-- Tokenizers: @LysandreJik
+- Tokenizers: @SaulLu
 - Trainer: @sgugger
 - Pipelines: @Narsil
 - Speech: @patrickvonplaten, @anton-l

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
@@ -51,6 +51,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/tests_torch_gpu_failures_short.txt
 
+      - name: Test durations
+        if: ${{ always() }}
+        run: cat reports/tests_torch_gpu_durations.txt
+
       - name: Run examples tests on GPU
         if: ${{ always() }}
         env:
@@ -67,6 +71,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/examples_torch_gpu_failures_short.txt
 
+      - name: Test durations
+        if: ${{ always() }}
+        run: cat reports/examples_torch_gpu_durations.txt
+
       - name: Run all pipeline tests on GPU
         if: ${{ always() }}
         env:
@@ -78,6 +86,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/tests_torch_pipeline_gpu_failures_short.txt
 
+      - name: Test durations
+        if: ${{ always() }}
+        run: cat reports/tests_torch_pipeline_gpu_durations.txt
+
       - name: Test suite reports artifacts
         if: ${{ always() }}
         uses: actions/upload-artifact@v2
@@ -119,6 +131,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/tests_flax_gpu_failures_short.txt
 
+      - name: Test durations
+        if: ${{ always() }}
+        run: cat reports/tests_flax_gpu_durations.txt
+
       - name: Test suite reports artifacts
         if: ${{ always() }}
         uses: actions/upload-artifact@v2
@@ -163,6 +179,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/tests_tf_gpu_failures_short.txt
 
+      - name: Test durations
+        if: ${{ always() }}
+        run: cat reports/tests_tf_gpu_durations.txt
+
       - name: Run all pipeline tests on GPU
         if: ${{ always() }}
         env:
@@ -176,6 +196,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/tests_tf_pipeline_gpu_failures_short.txt
 
+      - name: Test durations
+        if: ${{ always() }}
+        run: cat reports/tests_tf_pipeline_gpu_durations.txt
+
       - name: Test suite reports artifacts
         if: ${{ always() }}
         uses: actions/upload-artifact@v2
@@ -215,6 +239,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/tests_torch_xla_tpu_failures_short.txt
 
+      - name: Tests durations
+        if: ${{ always() }}
+        run: cat reports/tests_torch_xla_tpu_durations.txt
+
       - name: Test suite reports artifacts
         if: ${{ always() }}
         uses: actions/upload-artifact@v2
@@ -258,6 +286,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/tests_torch_multi_gpu_failures_short.txt
 
+      - name: Test durations
+        if: ${{ always() }}
+        run: cat reports/tests_torch_multi_gpu_durations.txt
+
       - name: Run all pipeline tests on GPU
         if: ${{ always() }}
         env:
@@ -269,6 +301,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/tests_torch_pipeline_multi_gpu_failures_short.txt
 
+      - name: Test durations
+        if: ${{ always() }}
+        run: cat reports/tests_torch_pipeline_multi_gpu_durations.txt
+
       - name: Test suite reports artifacts
         if: ${{ always() }}
         uses: actions/upload-artifact@v2
@@ -313,6 +349,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/tests_tf_multi_gpu_failures_short.txt
 
+      - name: Test durations
+        if: ${{ always() }}
+        run: cat reports/tests_tf_multi_gpu_durations.txt
+
       - name: Run all pipeline tests on GPU
         if: ${{ always() }}
         env:
@@ -326,6 +366,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/tests_tf_pipeline_multi_gpu_failures_short.txt
 
+      - name: Test durations
+        if: ${{ always() }}
+        run: cat reports/tests_tf_pipeline_multi_gpu_durations.txt
+
       - name: Test suite reports artifacts
         if: ${{ always() }}
         uses: actions/upload-artifact@v2
@@ -403,6 +447,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/tests_torch_cuda_extensions_gpu_failures_short.txt
 
+      - name: Test durations
+        if: ${{ always() }}
+        run: cat reports/tests_torch_cuda_extensions_gpu_durations.txt
+
       - name: Test suite reports artifacts
         if: ${{ always() }}
         uses: actions/upload-artifact@v2
@@ -443,6 +491,10 @@ jobs:
         if: ${{ always() }}
         run: cat reports/tests_torch_cuda_extensions_multi_gpu_failures_short.txt
 
+      - name: Test durations
+        if: ${{ always() }}
+        run: cat reports/tests_torch_cuda_extensions_multi_gpu_durations.txt
+
       - name: Test suite reports artifacts
         if: ${{ always() }}
         uses: actions/upload-artifact@v2

diff --git a/README.md b/README.md
@@ -285,6 +285,7 @@ Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih.
 1. **[Megatron-GPT2](https://huggingface.co/docs/transformers/model_doc/megatron_gpt2)** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://arxiv.org/abs/1909.08053) by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
 1. **[MPNet](https://huggingface.co/docs/transformers/model_doc/mpnet)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://arxiv.org/abs/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
 1. **[MT5](https://huggingface.co/docs/transformers/model_doc/mt5)** (from Google AI) released with the paper [mT5: A massively multilingual pre-trained text-to-text transformer](https://arxiv.org/abs/2010.11934) by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel.
+1. **[Nyströmformer](https://huggingface.co/docs/transformers/master/model_doc/nystromformer)** (from the University of Wisconsin - Madison) released with the paper [Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention](https://arxiv.org/abs/2102.03902) by Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn Fung, Yin Li, Vikas Singh.
 1. **[Pegasus](https://huggingface.co/docs/transformers/model_doc/pegasus)** (from Google) released with the paper [PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization](https://arxiv.org/abs/1912.08777) by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu.
 1. **[Perceiver IO](https://huggingface.co/docs/transformers/model_doc/perceiver)** (from Deepmind) released with the paper [Perceiver IO: A General Architecture for Structured Inputs & Outputs](https://arxiv.org/abs/2107.14795) by Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch, Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M. Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira.
 1. **[PhoBERT](https://huggingface.co/docs/transformers/model_doc/phobert)** (from VinAI Research) released with the paper [PhoBERT: Pre-trained language models for Vietnamese](https://www.aclweb.org/anthology/2020.findings-emnlp.92/) by Dat Quoc Nguyen and Anh Tuan Nguyen.

diff --git a/README_ko.md b/README_ko.md
@@ -264,6 +264,7 @@ Flax, PyTorch, TensorFlow 설치 페이지에서 이들을 conda로 설치하는
 1. **[mLUKE](https://huggingface.co/docs/transformers/model_doc/mluke)** (from Studio Ousia) released with the paper [mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models](https://arxiv.org/abs/2110.08151) by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka.
 1. **[MPNet](https://huggingface.co/docs/transformers/model_doc/mpnet)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://arxiv.org/abs/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
 1. **[MT5](https://huggingface.co/docs/transformers/model_doc/mt5)** (from Google AI) released with the paper [mT5: A massively multilingual pre-trained text-to-text transformer](https://arxiv.org/abs/2010.11934) by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel.
+1. **[Nyströmformer](https://huggingface.co/docs/transformers/master/model_doc/nystromformer)** (from the University of Wisconsin - Madison) released with the paper [Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention](https://arxiv.org/abs/2102.03902) by Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn Fung, Yin Li, Vikas Singh.
 1. **[Pegasus](https://huggingface.co/docs/transformers/model_doc/pegasus)** (from Google) released with the paper [PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization](https://arxiv.org/abs/1912.08777) by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu.
 1. **[Perceiver IO](https://huggingface.co/docs/transformers/model_doc/perceiver)** (from Deepmind) released with the paper [Perceiver IO: A General Architecture for Structured Inputs & Outputs](https://arxiv.org/abs/2107.14795) by Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch, Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M. Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira.
 1. **[PhoBERT](https://huggingface.co/docs/transformers/model_doc/phobert)** (from VinAI Research) released with the paper [PhoBERT: Pre-trained language models for Vietnamese](https://www.aclweb.org/anthology/2020.findings-emnlp.92/) by Dat Quoc Nguyen and Anh Tuan Nguyen.

diff --git a/README_zh-hans.md b/README_zh-hans.md
@@ -288,6 +288,7 @@ conda install -c huggingface transformers
 1. **[mLUKE](https://huggingface.co/docs/transformers/model_doc/mluke)** (来自 Studio Ousia) 伴随论文 [mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models](https://arxiv.org/abs/2110.08151) 由 Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka 发布。
 1. **[MPNet](https://huggingface.co/docs/transformers/model_doc/mpnet)** (来自 Microsoft Research) 伴随论文 [MPNet: Masked and Permuted Pre-training for Language Understanding](https://arxiv.org/abs/2004.09297) 由 Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu 发布。
 1. **[MT5](https://huggingface.co/docs/transformers/model_doc/mt5)** (来自 Google AI) 伴随论文 [mT5: A massively multilingual pre-trained text-to-text transformer](https://arxiv.org/abs/2010.11934) 由 Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel 发布。
+1. **[Nyströmformer](https://huggingface.co/docs/transformers/master/model_doc/nystromformer)** (来自 the University of Wisconsin - Madison) 伴随论文 [Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention](https://arxiv.org/abs/2102.03902) 由 Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn Fung, Yin Li, Vikas Singh 发布。
 1. **[Pegasus](https://huggingface.co/docs/transformers/model_doc/pegasus)** (来自 Google) 伴随论文 [PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization](https://arxiv.org/abs/1912.08777) 由 Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu 发布。
 1. **[Perceiver IO](https://huggingface.co/docs/transformers/model_doc/perceiver)** (来自 Deepmind) 伴随论文 [Perceiver IO: A General Architecture for Structured Inputs & Outputs](https://arxiv.org/abs/2107.14795) 由 Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch, Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M. Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira 发布。
 1. **[PhoBERT](https://huggingface.co/docs/transformers/model_doc/phobert)** (来自 VinAI Research) 伴随论文 [PhoBERT: Pre-trained language models for Vietnamese](https://www.aclweb.org/anthology/2020.findings-emnlp.92/) 由 Dat Quoc Nguyen and Anh Tuan Nguyen 发布。

diff --git a/README_zh-hant.md b/README_zh-hant.md
@@ -300,6 +300,7 @@ conda install -c huggingface transformers
 1. **[mLUKE](https://huggingface.co/docs/transformers/model_doc/mluke)** (from Studio Ousia) released with the paper [mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models](https://arxiv.org/abs/2110.08151) by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka.
 1. **[MPNet](https://huggingface.co/docs/transformers/model_doc/mpnet)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://arxiv.org/abs/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
 1. **[MT5](https://huggingface.co/docs/transformers/model_doc/mt5)** (from Google AI) released with the paper [mT5: A massively multilingual pre-trained text-to-text transformer](https://arxiv.org/abs/2010.11934) by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel.
+1. **[Nyströmformer](https://huggingface.co/docs/transformers/master/model_doc/nystromformer)** (from the University of Wisconsin - Madison) released with the paper [Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention](https://arxiv.org/abs/2102.03902) by Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn Fung, Yin Li, Vikas Singh.
 1. **[Pegasus](https://huggingface.co/docs/transformers/model_doc/pegasus)** (from Google) released with the paper [PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization](https://arxiv.org/abs/1912.08777) by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu.
 1. **[Perceiver IO](https://huggingface.co/docs/transformers/model_doc/perceiver)** (from Deepmind) released with the paper [Perceiver IO: A General Architecture for Structured Inputs & Outputs](https://arxiv.org/abs/2107.14795) by Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch, Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M. Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira.
 1. **[PhoBERT](https://huggingface.co/docs/transformers/model_doc/phobert)** (from VinAI Research) released with the paper [PhoBERT: Pre-trained language models for Vietnamese](https://www.aclweb.org/anthology/2020.findings-emnlp.92/) by Dat Quoc Nguyen and Anh Tuan Nguyen.

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
@@ -214,6 +214,8 @@
       title: MPNet
     - local: model_doc/mt5
       title: MT5
+    - local: model_doc/nystromformer
+      title: Nyströmformer
     - local: model_doc/openai-gpt
       title: OpenAI GPT
     - local: model_doc/gpt2

diff --git a/docs/source/benchmarks.mdx b/docs/source/benchmarks.mdx
@@ -14,13 +14,13 @@ specific language governing permissions and limitations under the License.
 
 [[open-in-colab]]
 
-Let's take a look at how 🤗 Transformer models can be benchmarked, best practices, and already available benchmarks.
+Let's take a look at how 🤗 Transformers models can be benchmarked, best practices, and already available benchmarks.
 
-A notebook explaining in more detail how to benchmark 🤗 Transformer models can be found [here](https://github.com/huggingface/notebooks/tree/master/examples/benchmark.ipynb).
+A notebook explaining in more detail how to benchmark 🤗 Transformers models can be found [here](https://github.com/huggingface/notebooks/tree/master/examples/benchmark.ipynb).
 
-## How to benchmark 🤗 Transformer models
+## How to benchmark 🤗 Transformers models
 
-The classes [`PyTorchBenchmark`] and [`TensorFlowBenchmark`] allow to flexibly benchmark 🤗 Transformer models. The benchmark classes allow us to measure the _peak memory usage_ and _required time_ for both _inference_ and _training_.
+The classes [`PyTorchBenchmark`] and [`TensorFlowBenchmark`] allow to flexibly benchmark 🤗 Transformers models. The benchmark classes allow us to measure the _peak memory usage_ and _required time_ for both _inference_ and _training_.
 
 <Tip>