diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 22cc11288c3..00000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,107 +0,0 @@ -version: 2.1 - -orbs: - win: circleci/windows@4.1.1 - -jobs: - run_dataset_script_tests_pyarrow_latest: - working_directory: ~/datasets - docker: - - image: cimg/python:3.6 - resource_class: medium - steps: - - checkout - - run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev - - run: pip install --upgrade pip - - run: python -m venv venv - - run: source venv/bin/activate - - run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.6" && pip install "setuptools-scm==6.4.2" - - run: pip install .[tests] - - run: pip install -r additional-tests-requirements.txt --no-deps - - run: pip install pyarrow --upgrade - - run: HF_SCRIPTS_VERSION=main HF_ALLOW_CODE_EVAL=1 python -m pytest -d --tx 2*popen//python=python3.6 --dist loadfile -sv ./tests/ - - run_dataset_script_tests_pyarrow_6: - working_directory: ~/datasets - docker: - - image: cimg/python:3.6 - resource_class: medium - steps: - - checkout - - run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev - - run: pip install --upgrade pip - - run: python -m venv venv - - run: source venv/bin/activate - - run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.6" && pip install "setuptools-scm==6.4.2" - - run: pip install .[tests] - - run: pip install -r additional-tests-requirements.txt --no-deps - - run: pip install pyarrow==6.0.0 - - run: HF_SCRIPTS_VERSION=main HF_ALLOW_CODE_EVAL=1 python -m pytest -d --tx 2*popen//python=python3.6 --dist loadfile -sv ./tests/ - - run_dataset_script_tests_pyarrow_latest_WIN: - working_directory: ~/datasets - executor: - name: win/default - shell: powershell - steps: - - checkout - - run: | - conda init powershell - conda update conda - conda create -n py37 python=3.7 pytorch --yes - - run: | - conda activate py37 - pip install .[tests] - pip install -r additional-tests-requirements.txt --no-deps - pip install pyarrow --upgrade - - run: | - conda activate py37 - $env:HF_SCRIPTS_VERSION="main" - python -m pytest -n 2 --dist loadfile -sv ./tests/ - - run_dataset_script_tests_pyarrow_6_WIN: - working_directory: ~/datasets - executor: - name: win/default - shell: powershell - steps: - - checkout - - run: | - conda init powershell - conda update conda - conda create -n py37 python=3.7 pytorch --yes - - run: | - conda activate py37 - pip install .[tests] - pip install -r additional-tests-requirements.txt --no-deps - pip install pyarrow==6.0.0 - - run: | - conda activate py37 - $env:HF_SCRIPTS_VERSION="main" - python -m pytest -n 2 --dist loadfile -sv ./tests/ - - check_code_quality: - working_directory: ~/datasets - docker: - - image: cimg/python:3.6 - resource_class: medium - parallelism: 1 - steps: - - checkout - - run: pip install --upgrade pip - - run: python -m venv venv - - run: source venv/bin/activate - - run: pip install .[quality] - - run: black --check --line-length 119 --target-version py36 tests src benchmarks datasets metrics - - run: isort --check-only tests src benchmarks datasets metrics - - run: flake8 tests src benchmarks datasets metrics - -workflows: - version: 2 - build_and_test: - jobs: - - check_code_quality - - run_dataset_script_tests_pyarrow_latest - - run_dataset_script_tests_pyarrow_6 - - run_dataset_script_tests_pyarrow_latest_WIN - - run_dataset_script_tests_pyarrow_6_WIN diff --git a/.circleci/deploy.sh b/.circleci/deploy.sh deleted file mode 100755 index 10929052252..00000000000 --- a/.circleci/deploy.sh +++ /dev/null @@ -1,81 +0,0 @@ -cd docs - -function deploy_doc(){ - echo "Creating doc at commit $1 and pushing to folder $2" - git checkout $1 - if [ ! -z "$2" ] - then - if [ "$2" == "master" ]; then - echo "Pushing master" - make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir/$2/ - cp -r _build/html/_static . - elif ssh -oStrictHostKeyChecking=no $doc "[ -d $dir/$2 ]"; then - echo "Directory" $2 "already exists" - scp -r -oStrictHostKeyChecking=no _static/* $doc:$dir/$2/_static/ - else - echo "Pushing version" $2 - make clean && make html - rm -rf _build/html/_static - cp -r _static _build/html - scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2 - fi - else - echo "Pushing stable" - make clean && make html - rm -rf _build/html/_static - cp -r _static _build/html - scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir - fi -} - -# You can find the commit for each tag on https://github.com/huggingface/datasets/tags -# Deploys the master documentation on huggingface.co/docs/datasets/master -deploy_doc "master" master - -# Example of how to deploy a doc on a certain commit (the commit doesn't have to be on the master branch). -# The following commit would live on huggingface.co/docs/datasets/v1.0.0 -deploy_doc "faf3d79" v1.18.4 -deploy_doc "c6bc52a" v1.18.3 -deploy_doc "ba00b25" v1.18.2 -deploy_doc "218e496" v1.18.1 -deploy_doc "c0aea8d" v1.18.0 -deploy_doc "dff6c92" v1.17.0 -deploy_doc "acca8f4" v1.16.1 -deploy_doc "d50f5f9" v1.16.0 -deploy_doc "0181006" v1.15.1 -deploy_doc "dcaa3c0" v1.15.0 -deploy_doc "ec82422" v1.14.0 -deploy_doc "10dc68c" v1.13.3 -deploy_doc "e82164f" v1.13.2 -deploy_doc "2ed762b" v1.13.1 -deploy_doc "38ec259" v1.13.0 -deploy_doc "2c1fc9c" v1.12.1 -deploy_doc "c65dccc" v1.12.0 -deploy_doc "ea7f0b8" v1.11.0 -deploy_doc "cea1a29" v1.10.2 -deploy_doc "6b7b227" v1.10.1 -deploy_doc "3aabafb" v1.10.0 -deploy_doc "5bc064d" v1.9.0 -deploy_doc "bcf0543" v1.8.0 -deploy_doc "448c177" v1.7.0 -deploy_doc "b0d7ae1" v1.6.2 -deploy_doc "e8fc41f" v1.6.1 -deploy_doc "40bb9e6" v1.6.0 -deploy_doc "f256b77" v1.5.0 -deploy_doc "ca41320" v1.4.1 -deploy_doc "f42658e" v1.4.0 -deploy_doc "ef633da" v1.3.0 -deploy_doc "a59580b" v1.2.1 -deploy_doc "dae6880" v1.2.0 -deploy_doc "000b584" v1.1.3 -deploy_doc "2256521" v1.1.2 -deploy_doc "8029965" v1.1.1 -deploy_doc "fe52b67" v1.1.0 -deploy_doc "af7cd94" v1.0.2 -deploy_doc "7c9d2b5" v1.0.1 -deploy_doc "322ba0e" v1.0.0 -deploy_doc "99e0ee6" v0.3.0 -deploy_doc "21e8091" v0.4.0 - -# Replace this by the latest stable commit. It is recommended to pin on a version release rather than master. -deploy_doc "master" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000000..2efb293fe5a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,76 @@ +name: CI + +on: + pull_request: + branches: + - main + +env: + HF_SCRIPTS_VERSION: main + HF_ALLOW_CODE_EVAL: 1 + +jobs: + + check_code_quality: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.6" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install .[quality] + - name: Check quality + run: | + black --check --line-length 119 --target-version py36 tests src benchmarks datasets metrics + isort --check-only tests src benchmarks datasets metrics + flake8 tests src benchmarks datasets metrics + + test: + needs: check_code_quality + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + pyarrow_version: [latest, 6.0.1] + runs-on: ${{ matrix.os }} + steps: + - name: Install OS dependencies + if: ${{ matrix.os == 'ubuntu-latest' }} + run: | + sudo apt-get -y update + sudo apt-get -y install libsndfile1 sox + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Set up Python 3.6 + if: ${{ matrix.os == 'ubuntu-latest' }} + uses: actions/setup-python@v4 + with: + python-version: 3.6 + - name: Set up Python 3.7 + if: ${{ matrix.os == 'windows-latest' }} + uses: actions/setup-python@v4 + with: + python-version: 3.7 + - name: Upgrade pip + run: python -m pip install --upgrade pip + - name: Pin setuptools-scm + if: ${{ matrix.os == 'ubuntu-latest' }} + run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.6" && pip install "setuptools-scm==6.4.2" + - name: Install dependencies + run: | + pip install .[tests] + pip install -r additional-tests-requirements.txt --no-deps + - name: Install latest PyArrow + if: ${{ matrix.pyarrow_version == 'latest' }} + run: pip install pyarrow --upgrade + - name: Install PyArrow ${{ matrix.pyarrow_version }} + if: ${{ matrix.pyarrow_version != 'latest' }} + run: pip install pyarrow==${{ matrix.pyarrow_version }} + - name: Test with pytest + run: | + python -m pytest -n 2 --dist loadfile -sv ./tests/ diff --git a/.github/workflows/test-audio.yml b/.github/workflows/test-audio.yml deleted file mode 100644 index b0b40f4ff4e..00000000000 --- a/.github/workflows/test-audio.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: Test audio - -on: - pull_request: - branches: - - main - -jobs: - test: - runs-on: ubuntu-latest - steps: - - name: Install OS dependencies - run: | - sudo apt-get update - sudo apt-get install libsndfile1 sox - - uses: actions/checkout@v2 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: "3.6" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install .[tests,audio] - pip install pyarrow --upgrade - - name: Test audio with pytest - run: | - HF_SCRIPTS_VERSION=main python -m pytest -n 2 -sv ./tests/features/test_audio.py diff --git a/tests/test_arrow_dataset.py b/tests/test_arrow_dataset.py index 7e34e13d9b8..dd1daaccc3e 100644 --- a/tests/test_arrow_dataset.py +++ b/tests/test_arrow_dataset.py @@ -3119,9 +3119,9 @@ def test_pickle_dataset_after_transforming_the_table(in_memory, method_and_param @pytest.mark.skipif( - os.name == "nt" and os.getenv("CIRCLECI") == "true", - reason='On Windows CircleCI, it raises botocore.exceptions.EndpointConnectionError: Could not connect to the endpoint URL: "http://127.0.0.1:5555/test"', -) # TODO(QL): find what's wrong with CircleCI + os.name == "nt" and (os.getenv("CIRCLECI") == "true" or os.getenv("GITHUB_ACTIONS") == "true"), + reason='On Windows CircleCI or GitHub Actions, it raises botocore.exceptions.EndpointConnectionError: Could not connect to the endpoint URL: "http://127.0.0.1:5555/test"', +) # TODO: find what's wrong with CircleCI / GitHub Actions @require_s3 def test_dummy_dataset_serialize_s3(s3, dataset): mock_bucket = s3_test_bucket_name diff --git a/tests/test_dataset_dict.py b/tests/test_dataset_dict.py index befa91c6d6f..643edf043ae 100644 --- a/tests/test_dataset_dict.py +++ b/tests/test_dataset_dict.py @@ -665,9 +665,9 @@ def test_datasetdict_from_text_split(split, text_path, tmp_path): @pytest.mark.skipif( - os.name == "nt" and os.getenv("CIRCLECI") == "true", - reason='On Windows CircleCI, it raises botocore.exceptions.EndpointConnectionError: Could not connect to the endpoint URL: "http://127.0.0.1:5555/test"', -) # TODO(QL): find what's wrong with CircleCI + os.name == "nt" and (os.getenv("CIRCLECI") == "true" or os.getenv("GITHUB_ACTIONS") == "true"), + reason='On Windows CircleCI or GitHub Actions, it raises botocore.exceptions.EndpointConnectionError: Could not connect to the endpoint URL: "http://127.0.0.1:5555/test"', +) # TODO: find what's wrong with CircleCI / GitHub Actions @require_s3 def test_dummy_dataset_serialize_s3(s3, dataset): dsets = DatasetDict({"train": dataset, "test": dataset.select(range(2))})