From 7f530bb3f0a0802ba8e8ae9ee928ecabdd3df5ee Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Fri, 11 Feb 2022 17:54:55 -0800 Subject: [PATCH 1/7] Remove distributed utils_test fixtures and add client fixture --- conftest.py | 2 +- tests/integration/fixtures.py | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/conftest.py b/conftest.py index 799e617d1..6f38951e1 100644 --- a/conftest.py +++ b/conftest.py @@ -1,6 +1,6 @@ import pytest -pytest_plugins = ["distributed.utils_test", "tests.integration.fixtures"] +pytest_plugins = ["tests.integration.fixtures"] def pytest_addoption(parser): diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index aa381de88..9fc8a6d32 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -6,7 +6,8 @@ import pandas as pd import pytest from dask.datasets import timeseries -from dask.distributed import Client +from dask.distributed import Client, LocalCluster +from dask.distributed.utils_test import loop # noqa: F401 from pandas.testing import assert_frame_equal try: @@ -287,3 +288,15 @@ def setup_dask_client(): os.getenv("DASK_SQL_TEST_SCHEDULER", None) is not None, reason="Can not run with external cluster", ) + + +@pytest.fixture() +def cluster(loop): # noqa: F811 + with LocalCluster(loop=loop) as cluster: + yield cluster + + +@pytest.fixture() +def client(cluster): + with Client(cluster) as client: + yield client From bc03e35afd53a3c4c50fc3804b5350f441bec66d Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Fri, 11 Feb 2022 17:56:42 -0800 Subject: [PATCH 2/7] Reduce connection timeout for non reachable test --- tests/integration/test_cmd.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_cmd.py b/tests/integration/test_cmd.py index 8193fb6e8..5b07e4407 100644 --- a/tests/integration/test_cmd.py +++ b/tests/integration/test_cmd.py @@ -1,4 +1,5 @@ import pytest +from dask import config as dask_config from mock import MagicMock, patch from prompt_toolkit.application import create_app_session from prompt_toolkit.input import create_pipe_input @@ -103,8 +104,9 @@ def test_meta_commands(c, client, capsys): match="Timed out during handshake while " "connecting to tcp://localhost:8787 after 5 s", ): - client = _meta_commands("\\dsc localhost:8787", context=c, client=client) - assert client.scheduler.__dict__["addr"] == "localhost:8787" + with dask_config.set({"distributed.comm.timeouts.connect": 5}): + client = _meta_commands("\\dsc localhost:8787", context=c, client=client) + assert client.scheduler.__dict__["addr"] == "localhost:8787" def test_connection_info(c, client, capsys): From a818de83da0fa3ceed64ae53194ca0b5ce95a267 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Mon, 14 Feb 2022 12:26:58 -0800 Subject: [PATCH 3/7] Rerun tests From 81563ea58ae728f2f6173d09c74259630bdd1769 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 16 Feb 2022 12:09:24 -0800 Subject: [PATCH 4/7] Mount tempfile directory in independent worker container --- .github/docker-compose.yaml | 2 ++ tests/integration/test_create.py | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/docker-compose.yaml b/.github/docker-compose.yaml index 5b17f5d4b..f496d0920 100644 --- a/.github/docker-compose.yaml +++ b/.github/docker-compose.yaml @@ -15,3 +15,5 @@ services: command: dask-worker dask-scheduler:8786 environment: EXTRA_CONDA_PACKAGES: "pandas>=1.3 numpy=1.20.2 -c conda-forge" + volumes: + - /tmp:/tmp diff --git a/tests/integration/test_create.py b/tests/integration/test_create.py index 3a893cafb..c768ef8bd 100644 --- a/tests/integration/test_create.py +++ b/tests/integration/test_create.py @@ -4,10 +4,8 @@ from pandas.testing import assert_frame_equal import dask_sql -from tests.integration.fixtures import skip_if_external_scheduler -@skip_if_external_scheduler @pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)]) def test_create_from_csv(c, df, temporary_data_file, gpu): df.to_csv(temporary_data_file, index=False) @@ -66,7 +64,6 @@ def test_cluster_memory(client, c, df, gpu): assert_frame_equal(df, return_df) -@skip_if_external_scheduler @pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)]) def test_create_from_csv_persist(c, df, temporary_data_file, gpu): df.to_csv(temporary_data_file, index=False) @@ -159,7 +156,6 @@ def test_create_from_query(c, df): assert_frame_equal(df, return_df) -@skip_if_external_scheduler @pytest.mark.parametrize( "gpu", [ From a0e479b23b2397fa8a29991d35821be1463c8f5b Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 16 Feb 2022 12:10:45 -0800 Subject: [PATCH 5/7] Skip test_fsql on external cluster --- tests/integration/test_fugue.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_fugue.py b/tests/integration/test_fugue.py index 83a97f909..723fe4f02 100644 --- a/tests/integration/test_fugue.py +++ b/tests/integration/test_fugue.py @@ -4,6 +4,7 @@ from pandas.testing import assert_frame_equal from dask_sql import Context +from tests.integration.fixtures import skip_if_external_scheduler fugue_sql = pytest.importorskip("fugue_sql") @@ -38,6 +39,7 @@ def test_simple_statement(): assert_frame_equal(return_df, pd.DataFrame({"a": [1], "b": ["world"]})) +@skip_if_external_scheduler def test_fsql(): def assert_eq(df: pd.DataFrame) -> None: assert_frame_equal(df, pd.DataFrame({"a": [1]})) From 240411504dcd51a76e795fdbc2e5918448a10eab Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 16 Feb 2022 12:12:52 -0800 Subject: [PATCH 6/7] Relax external cluster's conda packages --- .github/docker-compose.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/docker-compose.yaml b/.github/docker-compose.yaml index f496d0920..6edf29e02 100644 --- a/.github/docker-compose.yaml +++ b/.github/docker-compose.yaml @@ -8,12 +8,12 @@ services: ports: - "8786:8786" environment: - EXTRA_CONDA_PACKAGES: "pandas>=1.3 numpy=1.20.2 -c conda-forge" + EXTRA_CONDA_PACKAGES: "pandas>=1.0.0" dask-worker: container_name: dask-worker image: daskdev/dask:latest command: dask-worker dask-scheduler:8786 environment: - EXTRA_CONDA_PACKAGES: "pandas>=1.3 numpy=1.20.2 -c conda-forge" + EXTRA_CONDA_PACKAGES: "pandas>=1.0.0" volumes: - /tmp:/tmp From d77f061393c855233c7b31b19221953f4fc1be58 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Tue, 22 Feb 2022 14:36:01 -0800 Subject: [PATCH 7/7] Add fixme note to failing fugue test due to missing triad module --- tests/integration/test_fugue.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_fugue.py b/tests/integration/test_fugue.py index 723fe4f02..ba3acd00c 100644 --- a/tests/integration/test_fugue.py +++ b/tests/integration/test_fugue.py @@ -39,6 +39,8 @@ def test_simple_statement(): assert_frame_equal(return_df, pd.DataFrame({"a": [1], "b": ["world"]})) +# TODO: Revisit fixing this on an independant cluster (without dask-sql) based on the +# discussion in https://github.com/dask-contrib/dask-sql/issues/407 @skip_if_external_scheduler def test_fsql(): def assert_eq(df: pd.DataFrame) -> None: