dask-contrib · charlesbluca · Nov 6, 2023 · Nov 3, 2023 · Nov 3, 2023 · Nov 6, 2023
@@ -9,7 +9,6 @@
 
 INDEXER_WINDOW_STEP_IMPLEMENTED = _pandas_version >= parseVersion("1.5.0")
 PANDAS_GT_200 = _pandas_version >= parseVersion("2.0.0")
-PANDAS_GT_210 = _pandas_version >= parseVersion("2.1.0")
 
 # TODO: remove if prompt-toolkit min version gets bumped
 PIPE_INPUT_CONTEXT_MANAGER = _prompt_toolkit_version >= parseVersion("3.0.29")

@@ -57,7 +57,7 @@ def apply_sort(
                 by=sort_columns,
                 ascending=sort_ascending[0],
                 na_position="first" if sort_null_first[0] else "last",
-                ignore_index=True,
+                # ignore_index=True,
             ).persist()
         except ValueError:
             pass

@@ -18,7 +18,6 @@
 import pytest
 
 from dask_sql import Context
-from dask_sql._compat import PANDAS_GT_210
 from dask_sql.utils import ParsingException
 from tests.utils import assert_eq
 
@@ -29,10 +28,8 @@ def cast_datetime_to_string(df):
     if not cols:
         return df
 
-    strf = "%Y-%m-%dT%H:%M:%S" if PANDAS_GT_210 else "%Y-%m-%d %H:%M:%S"
-
     for col in cols:
-        df[col] = df[col].dt.strftime(strf)
+        df[col] = df[col].dt.strftime("%Y-%m-%d %H:%M:%S")
 
     return df
 

@@ -488,7 +488,7 @@ def test_covar_aggregation(c, timeseries_df):
         pytest.param("gpu_user_table_1", marks=pytest.mark.gpu),
     ],
 )
-@pytest.mark.parametrize("split_out", [None, 2, 4])
+@pytest.mark.parametrize("split_out", [1, 2, 4])
 def test_groupby_split_out(c, input_table, split_out, request):
     user_table = request.getfixturevalue(input_table)
 

@@ -88,11 +88,13 @@ def test_training_and_prediction(c, gpu_client):
     check_trained_model(c, df_name=timeseries)
 
 
-@pytest.mark.flaky(reruns=8, condition="sys.platform == 'darwin'")
 @pytest.mark.xfail(
     sys.platform == "win32",
     reason="'xgboost.core.XGBoostError: Failed to poll' on Windows only",
 )
+@pytest.mark.xfail(
+    sys.platform == "darwin", reason="Intermittent socket errors on macOS", strict=False
+)
 @pytest.mark.parametrize(
     "gpu_client", [False, pytest.param(True, marks=pytest.mark.gpu)], indirect=True
 )
@@ -627,7 +629,9 @@ def test_mlflow_export(c, tmpdir):
         )
 
 
-@pytest.mark.flaky(reruns=8, condition="sys.platform == 'darwin'")
+@pytest.mark.xfail(
+    sys.platform == "darwin", reason="Intermittent socket errors on macOS", strict=False
+)
 def test_mlflow_export_xgboost(c, client, tmpdir):
     # Test only when mlflow & xgboost was installed
     mlflow = pytest.importorskip("mlflow", reason="mlflow not installed")