Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .github/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,9 @@ services:
command: dask-scheduler
ports:
- "8786:8786"
environment:
EXTRA_CONDA_PACKAGES: "pandas>=1.0.0"
dask-worker:
container_name: dask-worker
image: daskdev/dask:latest
command: dask-worker dask-scheduler:8786
environment:
EXTRA_CONDA_PACKAGES: "pandas>=1.0.0"
volumes:
- /tmp:/tmp
4 changes: 1 addition & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,7 @@ jobs:
docker logs dask-worker
- name: Test with pytest while running an independent dask cluster
run: |
pytest tests
env:
DASK_SQL_TEST_SCHEDULER: tcp://127.0.0.1:8786
DASK_SQL_TEST_SCHEDULER="tcp://127.0.0.1:8786" pytest tests

import:
name: "Test importing with bare requirements"
Expand Down
32 changes: 7 additions & 25 deletions tests/integration/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import pandas as pd
import pytest
from dask.datasets import timeseries
from dask.distributed import Client, LocalCluster
from dask.distributed.utils_test import loop # noqa: F401
from dask.distributed import Client
from pandas.testing import assert_frame_equal

try:
Expand Down Expand Up @@ -287,40 +286,23 @@ def gpu_cluster():
pytest.skip("dask_cuda not installed")
return None

cluster = LocalCUDACluster(protocol="tcp")
yield cluster
cluster.close()
with LocalCUDACluster(protocol="tcp") as cluster:
yield cluster


@pytest.fixture()
def gpu_client(gpu_cluster):
if gpu_cluster:
client = Client(gpu_cluster)
yield client
client.close()
with Client(gpu_cluster) as client:
yield client


@pytest.fixture(scope="session", autouse=True)
def setup_dask_client():
"""Setup a dask client if requested"""
address = os.getenv("DASK_SQL_TEST_SCHEDULER", None)
if address:
client = Client(address)
def client():
yield Client(address=os.getenv("DASK_SQL_TEST_SCHEDULER", None))


skip_if_external_scheduler = pytest.mark.skipif(
os.getenv("DASK_SQL_TEST_SCHEDULER", None) is not None,
reason="Can not run with external cluster",
)


@pytest.fixture()
def cluster(loop): # noqa: F811
with LocalCluster(loop=loop) as cluster:
yield cluster


@pytest.fixture()
def client(cluster):
with Client(cluster) as client:
yield client
7 changes: 3 additions & 4 deletions tests/integration/test_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,7 @@ def test_meta_commands(c, client, capsys):
assert "Schema not_exists not available\n" == captured.out

with pytest.raises(
OSError,
match="Timed out during handshake while "
"connecting to tcp://localhost:8787 after 5 s",
OSError, match="Timed out .* to tcp://localhost:8787 after 5 s",
):
with dask_config.set({"distributed.comm.timeouts.connect": 5}):
client = _meta_commands("\\dsc localhost:8787", context=c, client=client)
Expand All @@ -120,8 +118,9 @@ def test_connection_info(c, client, capsys):


def test_quit(c, client, capsys):
dummy_client = MagicMock()
with patch("sys.exit", return_value=lambda: "exit"):
_meta_commands("quit", context=c, client=client)
_meta_commands("quit", context=c, client=dummy_client)
captured = capsys.readouterr()
assert captured.out == "Quitting dask-sql ...\n"

Expand Down
2 changes: 2 additions & 0 deletions tests/integration/test_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ def test_cluster_memory(client, c, df, gpu):

assert_frame_equal(df, return_df)

client.unpublish_dataset("df")


@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
def test_create_from_csv_persist(c, df, temporary_data_file, gpu):
Expand Down
2 changes: 0 additions & 2 deletions tests/integration/test_jdbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ def app_client(c):

yield TestClient(app)

app.client.close()


def test_jdbc_has_schema(app_client, c):
create_meta_data(c)
Expand Down
20 changes: 20 additions & 0 deletions tests/integration/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ def gpu_training_df(c):
return None


# TODO - many ML tests fail on clusters without sklearn - can we avoid this?
@skip_if_external_scheduler
def test_training_and_prediction(c, training_df):
c.sql(
"""
Expand Down Expand Up @@ -149,6 +151,8 @@ def test_xgboost_training_prediction(c, gpu_training_df):
check_trained_model(c)


# TODO - many ML tests fail on clusters without sklearn - can we avoid this?
@skip_if_external_scheduler
def test_clustering_and_prediction(c, training_df):
c.sql(
"""
Expand All @@ -165,6 +169,8 @@ def test_clustering_and_prediction(c, training_df):
check_trained_model(c)


# TODO - many ML tests fail on clusters without sklearn - can we avoid this?
@skip_if_external_scheduler
def test_iterative_and_prediction(c, training_df):
c.sql(
"""
Expand All @@ -184,6 +190,8 @@ def test_iterative_and_prediction(c, training_df):
check_trained_model(c)


# TODO - many ML tests fail on clusters without sklearn - can we avoid this?
@skip_if_external_scheduler
def test_show_models(c, training_df):
c.sql(
"""
Expand Down Expand Up @@ -403,6 +411,8 @@ def test_drop_model(c, training_df):
assert "my_model" not in c.schema[c.schema_name].models


# TODO - many ML tests fail on clusters without sklearn - can we avoid this?
@skip_if_external_scheduler
def test_describe_model(c, training_df):
c.sql(
"""
Expand Down Expand Up @@ -504,6 +514,8 @@ def test_export_model(c, training_df, tmpdir):
)


# TODO - many ML tests fail on clusters without sklearn - can we avoid this?
@skip_if_external_scheduler
def test_mlflow_export(c, training_df, tmpdir):
# Test only when mlflow was installed
mlflow = pytest.importorskip("mlflow", reason="mlflow not installed")
Expand Down Expand Up @@ -560,10 +572,12 @@ def test_mlflow_export(c, training_df, tmpdir):
)


# TODO - many ML tests fail on clusters without sklearn - can we avoid this?
@pytest.mark.xfail(
sys.platform == "win32",
reason="Windows is not officially supported for dask/xgboost",
)
@skip_if_external_scheduler
def test_mlflow_export_xgboost(c, client, training_df, tmpdir):
# Test only when mlflow & xgboost was installed
mlflow = pytest.importorskip("mlflow", reason="mlflow not installed")
Expand Down Expand Up @@ -626,6 +640,8 @@ def test_mlflow_export_lightgbm(c, training_df, tmpdir):
)


# TODO - many ML tests fail on clusters without sklearn - can we avoid this?
@skip_if_external_scheduler
def test_ml_experiment(c, client, training_df):

with pytest.raises(
Expand Down Expand Up @@ -818,6 +834,8 @@ def test_ml_experiment(c, client, training_df):
)


# TODO - many ML tests fail on clusters without sklearn - can we avoid this?
@skip_if_external_scheduler
def test_experiment_automl_classifier(c, client, training_df):
tpot = pytest.importorskip("tpot", reason="tpot not installed")
# currently tested with tpot==
Expand All @@ -841,6 +859,8 @@ def test_experiment_automl_classifier(c, client, training_df):
check_trained_model(c, "my_automl_exp1")


# TODO - many ML tests fail on clusters without sklearn - can we avoid this?
@skip_if_external_scheduler
def test_experiment_automl_regressor(c, client, training_df):
tpot = pytest.importorskip("tpot", reason="tpot not installed")
# test regressor
Expand Down