Skip to content
203 changes: 108 additions & 95 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,21 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: [3.7, 3.8, 3.9, "3.10"]
pytorch-channel: [pytorch, pytorch-nightly]
include:
# includes a single build on windows
- os: windows-latest
pytorch-channel: pytorch
python-version: 3.8
skip-distrib-tests: 1
# includes a single build on macosx
- os: macos-latest
pytorch-channel: pytorch
python-version: 3.8
skip-distrib-tests: 1
# python-version: [3.7, 3.8, 3.9, "3.10"]
# pytorch-channel: [pytorch, pytorch-nightly]
python-version: ["3.10"]
pytorch-channel: [pytorch-nightly]
# include:
# # includes a single build on windows
# - os: windows-latest
# pytorch-channel: pytorch
# python-version: 3.8
# skip-distrib-tests: 1
# # includes a single build on macosx
# - os: macos-latest
# pytorch-channel: pytorch
# python-version: 3.8
# skip-distrib-tests: 1

steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -90,93 +92,104 @@ jobs:
pip install -r requirements-dev.txt
python setup.py install

- name: Check code formatting
run: |
bash ./tests/run_code_style.sh install
bash ./tests/run_code_style.sh lint
# - name: Check code formatting
# run: |
# bash ./tests/run_code_style.sh install
# bash ./tests/run_code_style.sh lint

- name: Run Mypy
# https://github.com/pytorch/ignite/pull/2780
#
if: ${{ matrix.os == 'ubuntu-latest' && matrix.pytorch-channel == 'pytorch-nightly'}}
run: |
bash ./tests/run_code_style.sh mypy
# - name: Run Mypy
# # https://github.com/pytorch/ignite/pull/2780
# #
# if: ${{ matrix.os == 'ubuntu-latest' && matrix.pytorch-channel == 'pytorch-nightly'}}
# run: |
# bash ./tests/run_code_style.sh mypy

# Download MNIST: https://github.com/pytorch/ignite/issues/1737
# to "/tmp" for unit tests
- name: Download MNIST
uses: pytorch-ignite/download-mnist-github-action@master
# - name: Download MNIST
# uses: pytorch-ignite/download-mnist-github-action@master
# with:
# target_dir: /tmp

# # Copy MNIST to "." for the examples
# - name: Copy MNIST
# run: |
# cp -R /tmp/MNIST .

# -- REMOVE THIS
- name: Setup tmate session
uses: mxschmitt/action-tmate@v3
with:
target_dir: /tmp

# Copy MNIST to "." for the examples
- name: Copy MNIST
run: |
cp -R /tmp/MNIST .
limit-access-to-actor: true
timeout-minutes: 15
# -- REMOVE THIS

- name: Run Tests
run: |
SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
flags: cpu
fail_ci_if_error: false

- name: Run MNIST Examples
run: |
# MNIST
# 1) mnist.py
python examples/mnist/mnist.py --epochs=1

- name: Run MNIST with loggers Examples
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
# 2) mnist_with_visdom.py
python -c "from visdom.server.build import download_scripts; download_scripts()" # download scripts : https://github.com/facebookresearch/visdom/blob/master/py/server.py#L929
python -m visdom.server &
sleep 10
python examples/mnist/mnist_with_visdom.py --epochs=1
kill %1
# 3.1) mnist_with_tensorboard.py with tbX
python examples/mnist/mnist_with_tensorboard.py --epochs=1
# 3.2) mnist_with_tensorboard.py with native torch tb
pip uninstall -y tensorboardX
python examples/mnist/mnist_with_tensorboard.py --epochs=1

- name: Run MNIST Example With Crash
if: ${{ matrix.os == 'ubuntu-latest' }}
continue-on-error: true
run: |
# 4) mnist_save_resume_engine.py
python examples/mnist/mnist_save_resume_engine.py --epochs=2 --crash_iteration 1100

- name: Resume MNIST from previous crash
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
python examples/mnist/mnist_save_resume_engine.py --epochs=2 --resume_from=/tmp/mnist_save_resume/checkpoint_1.pt

- name: Run GAN example
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
# DCGAN
python examples/gan/dcgan.py --dataset fake --dataroot /tmp/fakedata --output-dir /tmp/outputs-dcgan --batch-size 2 --epochs 2 --workers 0

- name: Run RL Examples
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
# RL
# 1) Actor-Critic
python examples/reinforcement_learning/actor_critic.py --max-episodes=2
# 2) Reinforce
python examples/reinforcement_learning/reinforce.py --max-episodes=2

- name: Run Neural Style Example
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
#fast-neural-style
#train
mkdir -p ~/.cache/torch/checkpoints/ && wget "https://download.pytorch.org/models/vgg16-397923af.pth" -O ~/.cache/torch/checkpoints/vgg16-397923af.pth
python examples/fast_neural_style/neural_style.py train --epochs 1 --cuda 0 --dataset test --dataroot . --image_size 32 --style_image examples/fast_neural_style/images/style_images/mosaic.jpg --style_size 32
# SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh
# SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh "distributed"
# SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh "test_idist_parallel_spawn_n_procs_native"
CUDA_VISIBLE_DEVICES= pytest -vvv tests -s -k test_idist_parallel_spawn_n_procs_native

# - name: Upload coverage to Codecov
# uses: codecov/codecov-action@v3
# with:
# file: ./coverage.xml
# flags: cpu
# fail_ci_if_error: false

# - name: Run MNIST Examples
# run: |
# # MNIST
# # 1) mnist.py
# python examples/mnist/mnist.py --epochs=1

# - name: Run MNIST with loggers Examples
# if: ${{ matrix.os == 'ubuntu-latest' }}
# run: |
# # 2) mnist_with_visdom.py
# python -c "from visdom.server.build import download_scripts; download_scripts()" # download scripts : https://github.com/facebookresearch/visdom/blob/master/py/server.py#L929
# python -m visdom.server &
# sleep 10
# python examples/mnist/mnist_with_visdom.py --epochs=1
# kill %1
# # 3.1) mnist_with_tensorboard.py with tbX
# python examples/mnist/mnist_with_tensorboard.py --epochs=1
# # 3.2) mnist_with_tensorboard.py with native torch tb
# pip uninstall -y tensorboardX
# python examples/mnist/mnist_with_tensorboard.py --epochs=1

# - name: Run MNIST Example With Crash
# if: ${{ matrix.os == 'ubuntu-latest' }}
# continue-on-error: true
# run: |
# # 4) mnist_save_resume_engine.py
# python examples/mnist/mnist_save_resume_engine.py --epochs=2 --crash_iteration 1100

# - name: Resume MNIST from previous crash
# if: ${{ matrix.os == 'ubuntu-latest' }}
# run: |
# python examples/mnist/mnist_save_resume_engine.py --epochs=2 --resume_from=/tmp/mnist_save_resume/checkpoint_1.pt

# - name: Run GAN example
# if: ${{ matrix.os == 'ubuntu-latest' }}
# run: |
# # DCGAN
# python examples/gan/dcgan.py --dataset fake --dataroot /tmp/fakedata --output-dir /tmp/outputs-dcgan --batch-size 2 --epochs 2 --workers 0

# - name: Run RL Examples
# if: ${{ matrix.os == 'ubuntu-latest' }}
# run: |
# # RL
# # 1) Actor-Critic
# python examples/reinforcement_learning/actor_critic.py --max-episodes=2
# # 2) Reinforce
# python examples/reinforcement_learning/reinforce.py --max-episodes=2

# - name: Run Neural Style Example
# if: ${{ matrix.os == 'ubuntu-latest' }}
# run: |
# #fast-neural-style
# #train
# mkdir -p ~/.cache/torch/checkpoints/ && wget "https://download.pytorch.org/models/vgg16-397923af.pth" -O ~/.cache/torch/checkpoints/vgg16-397923af.pth
# python examples/fast_neural_style/neural_style.py train --epochs 1 --cuda 0 --dataset test --dataroot . --image_size 32 --style_image examples/fast_neural_style/images/style_images/mosaic.jpg --style_size 32
42 changes: 14 additions & 28 deletions tests/ignite/distributed/test_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,26 +96,16 @@ def _test_check_idist_parallel_torch_launch(init_method, fp, backend, nprocs):
@pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
@pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip because test uses torch launch")
@pytest.mark.parametrize("init_method", [None, "tcp://0.0.0.0:29500", "FILE"])
def test_check_idist_parallel_torch_launch_n_procs_gloo(init_method, dirname, exec_filepath):
@pytest.mark.parametrize(
"backend",
["gloo", pytest.param("nccl", marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU"))],
)
def test_check_idist_parallel_torch_launch_n_procs_native(init_method, dirname, exec_filepath, backend):
if init_method == "FILE":
init_method = f"file://{dirname}/shared"

np = torch.cuda.device_count() if torch.cuda.is_available() else 4
# temporarily disable this while running on torch nightly
if "dev" not in torch.__version__:
_test_check_idist_parallel_torch_launch(init_method, exec_filepath, "gloo", np)


@pytest.mark.distributed
@pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
@pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip because test uses torch launch")
@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
@pytest.mark.parametrize("init_method", [None, "tcp://0.0.0.0:29500", "FILE"])
def test_check_idist_parallel_torch_launch_n_procs_nccl(init_method, dirname, exec_filepath):
if init_method == "FILE":
init_method = f"file://{dirname}/shared"

_test_check_idist_parallel_torch_launch(init_method, exec_filepath, "nccl", torch.cuda.device_count())
_test_check_idist_parallel_torch_launch(init_method, exec_filepath, backend, np)


def _test_check_idist_parallel_hvdrun(fp, backend, nprocs):
Expand Down Expand Up @@ -160,9 +150,13 @@ def _test_check_idist_parallel_spawn(fp, backend, nprocs):
@pytest.mark.distributed
@pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
@pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip if launched as multiproc")
def test_check_idist_parallel_spawn_n_procs_gloo(exec_filepath):
@pytest.mark.parametrize(
"backend",
["gloo", pytest.param("nccl", marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU"))],
)
def test_check_idist_parallel_spawn_n_procs_native(exec_filepath, backend):
np = 4 if not torch.cuda.is_available() else torch.cuda.device_count()
_test_check_idist_parallel_spawn(exec_filepath, "gloo", np)
_test_check_idist_parallel_spawn(exec_filepath, backend, np)


@pytest.mark.distributed
Expand All @@ -171,7 +165,7 @@ def test_check_idist_parallel_spawn_n_procs_gloo(exec_filepath):
def test_smoke_test_check_idist_parallel_spawn_multinode_n_procs_gloo(exec_filepath):
# Just a smoke test from check_idist_parallel.py for an emulated multi-node configuration
cmd1 = "export CUDA_VISIBLE_DEVICES= && "
cmd1 += 'bash -c "python tests/ignite/distributed/check_idist_parallel.py --backend=gloo --nproc_per_node=2 '
cmd1 += f'bash -c "{sys.executable} {exec_filepath} --backend=gloo --nproc_per_node=2 '
cmd1 += '--nnodes=2 --node_rank=0 --master_addr=localhost --master_port=3344 &"'
os.system(cmd1)

Expand All @@ -197,14 +191,6 @@ def test_smoke_test_check_idist_parallel_spawn_multinode_n_procs_gloo(exec_filep
assert "End of run" in out


@pytest.mark.distributed
@pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
@pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip if launched as multiproc")
@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
def test_check_idist_parallel_spawn_n_procs_nccl(exec_filepath):
_test_check_idist_parallel_spawn(exec_filepath, "nccl", torch.cuda.device_count())


@pytest.mark.tpu
@pytest.mark.skipif("NUM_TPU_WORKERS" not in os.environ, reason="Skip if no NUM_TPU_WORKERS in env vars")
@pytest.mark.skipif(not has_xla_support, reason="Skip if no PyTorch XLA package")
Expand Down Expand Up @@ -238,7 +224,7 @@ def _test_func(index, ws, device, backend, true_init_method):
@pytest.mark.distributed
@pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip if launched as multiproc")
@pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
@pytest.mark.parametrize("init_method", ["env://", "tcp://0.0.0.0:29500", "FILE"])
@pytest.mark.parametrize("init_method", ["env://", "tcp://0.0.0.0:29501", "FILE"])
@pytest.mark.parametrize(
"backend",
["gloo", pytest.param("nccl", marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU"))],
Expand Down