From 939b5e886c1883d6241fcbfe693d68423864ab56 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 7 Feb 2022 17:45:37 -0500 Subject: [PATCH 01/16] Relocate doc source files --- docs/Makefile | 4 ++-- docs/make.bat | 6 +++--- docs/{pages => source}/api.rst | 0 docs/{pages => source}/cmd.rst | 0 docs/{pages => source}/custom.rst | 0 docs/{pages => source}/data_input.rst | 0 docs/{pages => source}/how_does_it_work.rst | 0 docs/{ => source}/index.rst | 20 ++++++++++---------- docs/{pages => source}/installation.rst | 0 docs/{pages => source}/machine_learning.rst | 0 docs/{pages => source}/quickstart.rst | 0 docs/{pages => source}/server.rst | 0 docs/{pages => source}/sql.rst | 0 docs/{pages => source}/sql/creation.rst | 0 docs/{pages => source}/sql/describe.rst | 0 docs/{pages => source}/sql/ml.rst | 0 docs/{pages => source}/sql/select.rst | 0 17 files changed, 15 insertions(+), 15 deletions(-) rename docs/{pages => source}/api.rst (100%) rename docs/{pages => source}/cmd.rst (100%) rename docs/{pages => source}/custom.rst (100%) rename docs/{pages => source}/data_input.rst (100%) rename docs/{pages => source}/how_does_it_work.rst (100%) rename docs/{ => source}/index.rst (94%) rename docs/{pages => source}/installation.rst (100%) rename docs/{pages => source}/machine_learning.rst (100%) rename docs/{pages => source}/quickstart.rst (100%) rename docs/{pages => source}/server.rst (100%) rename docs/{pages => source}/sql.rst (100%) rename docs/{pages => source}/sql/creation.rst (100%) rename docs/{pages => source}/sql/describe.rst (100%) rename docs/{pages => source}/sql/ml.rst (100%) rename docs/{pages => source}/sql/select.rst (100%) diff --git a/docs/Makefile b/docs/Makefile index d4bb2cbb9..d0c3cbf10 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -5,8 +5,8 @@ # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build -SOURCEDIR = . -BUILDDIR = _build +SOURCEDIR = source +BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: diff --git a/docs/make.bat b/docs/make.bat index 2119f5109..6fcf05b4b 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -7,8 +7,8 @@ REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) -set SOURCEDIR=. -set BUILDDIR=_build +set SOURCEDIR=source +set BUILDDIR=build if "%1" == "" goto help @@ -21,7 +21,7 @@ if errorlevel 9009 ( echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ + echo.https://www.sphinx-doc.org/ exit /b 1 ) diff --git a/docs/pages/api.rst b/docs/source/api.rst similarity index 100% rename from docs/pages/api.rst rename to docs/source/api.rst diff --git a/docs/pages/cmd.rst b/docs/source/cmd.rst similarity index 100% rename from docs/pages/cmd.rst rename to docs/source/cmd.rst diff --git a/docs/pages/custom.rst b/docs/source/custom.rst similarity index 100% rename from docs/pages/custom.rst rename to docs/source/custom.rst diff --git a/docs/pages/data_input.rst b/docs/source/data_input.rst similarity index 100% rename from docs/pages/data_input.rst rename to docs/source/data_input.rst diff --git a/docs/pages/how_does_it_work.rst b/docs/source/how_does_it_work.rst similarity index 100% rename from docs/pages/how_does_it_work.rst rename to docs/source/how_does_it_work.rst diff --git a/docs/index.rst b/docs/source/index.rst similarity index 94% rename from docs/index.rst rename to docs/source/index.rst index f9fbeeba1..c78192b59 100644 --- a/docs/index.rst +++ b/docs/source/index.rst @@ -60,16 +60,16 @@ Any pandas or dask dataframe can be used as input and ``dask-sql`` understands a :maxdepth: 1 :caption: Contents: - pages/installation - pages/quickstart - pages/sql - pages/data_input - pages/custom - pages/machine_learning - pages/api - pages/server - pages/cmd - pages/how_does_it_work + source/installation + source/quickstart + source/sql + source/data_input + source/custom + source/machine_learning + source/api + source/server + source/cmd + source/how_does_it_work .. note:: diff --git a/docs/pages/installation.rst b/docs/source/installation.rst similarity index 100% rename from docs/pages/installation.rst rename to docs/source/installation.rst diff --git a/docs/pages/machine_learning.rst b/docs/source/machine_learning.rst similarity index 100% rename from docs/pages/machine_learning.rst rename to docs/source/machine_learning.rst diff --git a/docs/pages/quickstart.rst b/docs/source/quickstart.rst similarity index 100% rename from docs/pages/quickstart.rst rename to docs/source/quickstart.rst diff --git a/docs/pages/server.rst b/docs/source/server.rst similarity index 100% rename from docs/pages/server.rst rename to docs/source/server.rst diff --git a/docs/pages/sql.rst b/docs/source/sql.rst similarity index 100% rename from docs/pages/sql.rst rename to docs/source/sql.rst diff --git a/docs/pages/sql/creation.rst b/docs/source/sql/creation.rst similarity index 100% rename from docs/pages/sql/creation.rst rename to docs/source/sql/creation.rst diff --git a/docs/pages/sql/describe.rst b/docs/source/sql/describe.rst similarity index 100% rename from docs/pages/sql/describe.rst rename to docs/source/sql/describe.rst diff --git a/docs/pages/sql/ml.rst b/docs/source/sql/ml.rst similarity index 100% rename from docs/pages/sql/ml.rst rename to docs/source/sql/ml.rst diff --git a/docs/pages/sql/select.rst b/docs/source/sql/select.rst similarity index 100% rename from docs/pages/sql/select.rst rename to docs/source/sql/select.rst From 2c29e0e3e5c3c64340164484ca9a30c6c2afc8b6 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 7 Feb 2022 15:03:04 -0800 Subject: [PATCH 02/16] Move conf.py to docs source folder --- docs/{ => source}/conf.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/{ => source}/conf.py (100%) diff --git a/docs/conf.py b/docs/source/conf.py similarity index 100% rename from docs/conf.py rename to docs/source/conf.py From beaef69361b00385fe33ae5224dac735be292cf5 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 7 Feb 2022 18:01:10 -0800 Subject: [PATCH 03/16] Fix broken links in index --- docs/source/index.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index c78192b59..666aba6b1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -60,16 +60,16 @@ Any pandas or dask dataframe can be used as input and ``dask-sql`` understands a :maxdepth: 1 :caption: Contents: - source/installation - source/quickstart - source/sql - source/data_input - source/custom - source/machine_learning - source/api - source/server - source/cmd - source/how_does_it_work + installation + quickstart + sql + data_input + custom + machine_learning + api + server + cmd + how_does_it_work .. note:: From 3aee4a387fa69428436e94ba5ac69fae48e80b1b Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 7 Feb 2022 18:02:37 -0800 Subject: [PATCH 04/16] Simplify environment file --- docs/environment.yaml | 153 ------------------------------------------ docs/environment.yml | 9 +++ 2 files changed, 9 insertions(+), 153 deletions(-) delete mode 100644 docs/environment.yaml create mode 100644 docs/environment.yml diff --git a/docs/environment.yaml b/docs/environment.yaml deleted file mode 100644 index 73603a226..000000000 --- a/docs/environment.yaml +++ /dev/null @@ -1,153 +0,0 @@ -name: dask-sql-docs -channels: - - conda-forge - - defaults -dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=1_gnu - - alabaster=0.7.12=py_0 - - alsa-lib=1.2.3=h516909a_0 - - attrs=20.2.0=pyh9f0ad1d_0 - - babel=2.8.0=py_0 - - bokeh=2.2.1=py38h32f6830_0 - - brotlipy=0.7.0=py38h1e0a361_1000 - - ca-certificates=2020.6.20=hecda079_0 - - cairo=1.16.0=h3fc0475_1005 - - certifi=2020.6.20=py38h32f6830_0 - - cffi=1.14.3=py38h5bae8af_0 - - chardet=3.0.4=py38h32f6830_1007 - - click=7.1.2=pyh9f0ad1d_0 - - cloudpickle=1.6.0=py_0 - - coverage=5.3=py38h1e0a361_0 - - cryptography=3.1.1=py38h766eaa4_0 - - cytoolz=0.11.0=py38h1e0a361_0 - - dask=2.28.0=py_0 - - dask-core=2.28.0=py_0 - - dask-sphinx-theme=1.3.2=pyh9f0ad1d_0 - - distributed=2.28.0=py38h32f6830_0 - - docutils=0.16=py38h32f6830_1 - - fontconfig=2.13.1=h1056068_1002 - - freetype=2.10.2=he06d7ca_0 - - fsspec=0.8.3=py_0 - - gettext=0.19.8.1=hc5be6a0_1002 - - giflib=5.2.1=h516909a_2 - - glib=2.66.1=h680cd38_0 - - graphite2=1.3.13=he1b5a44_1001 - - harfbuzz=2.7.2=hee91db6_0 - - heapdict=1.0.1=py_0 - - icu=67.1=he1b5a44_0 - - idna=2.10=pyh9f0ad1d_0 - - imagesize=1.2.0=py_0 - - iniconfig=1.0.1=pyh9f0ad1d_0 - - jinja2=2.11.2=pyh9f0ad1d_0 - - jpeg=9d=h516909a_0 - - jpype1=1.0.2=py38hbf85e49_0 - - lcms2=2.11=hbd6801e_0 - - ld_impl_linux-64=2.35=h769bd43_9 - - libblas=3.8.0=17_openblas - - libcblas=3.8.0=17_openblas - - libffi=3.2.1=he1b5a44_1007 - - libgcc-ng=9.3.0=h24d8f2e_16 - - libgfortran-ng=7.5.0=hdf63c60_16 - - libgomp=9.3.0=h24d8f2e_16 - - libiconv=1.16=h516909a_0 - - liblapack=3.8.0=17_openblas - - libopenblas=0.3.10=pthreads_hb3c22a3_4 - - libpng=1.6.37=hed695b0_2 - - libstdcxx-ng=9.3.0=hdf63c60_16 - - libtiff=4.1.0=hc7e4089_6 - - libuuid=2.32.1=h14c3975_1000 - - libwebp-base=1.1.0=h516909a_3 - - libxcb=1.13=h14c3975_1002 - - libxml2=2.9.10=h68273f3_2 - - locket=0.2.0=py_2 - - lz4-c=1.9.2=he1b5a44_3 - - markupsafe=1.1.1=py38h1e0a361_1 - - maven=3.6.0=0 - - more-itertools=8.5.0=py_0 - - msgpack-python=1.0.0=py38hbf85e49_1 - - ncurses=6.2=he1b5a44_1 - - numpy=1.19.1=py38hbc27379_2 - - olefile=0.46=py_0 - - openjdk=11.0.8=hacce0ff_0 - - openssl=1.1.1h=h516909a_0 - - packaging=20.4=pyh9f0ad1d_0 - - pandas=1.1.2=py38h950e882_0 - - partd=1.1.0=py_0 - - pcre=8.44=he1b5a44_0 - - pillow=7.2.0=py38h9776b28_1 - - pip=20.2.3=py_0 - - pixman=0.38.0=h516909a_1003 - - pluggy=0.13.1=py38h32f6830_2 - - psutil=5.7.2=py38h1e0a361_0 - - pthread-stubs=0.4=h14c3975_1001 - - py=1.9.0=pyh9f0ad1d_0 - - pycparser=2.20=pyh9f0ad1d_2 - - pygments=2.7.1=py_0 - - pyopenssl=19.1.0=py_1 - - pyparsing=2.4.7=pyh9f0ad1d_0 - - pysocks=1.7.1=py38h32f6830_1 - - pytest=6.1.0=py38h32f6830_0 - - pytest-cov=2.10.1=pyh9f0ad1d_0 - - python=3.8.5=h1103e12_9_cpython - - python-dateutil=2.8.1=py_0 - - python_abi=3.8=1_cp38 - - pytz=2020.1=pyh9f0ad1d_0 - - pyyaml=5.3.1=py38h1e0a361_0 - - readline=8.0=he28a2e2_2 - - requests=2.24.0=pyh9f0ad1d_0 - - setuptools=49.6.0=py38h32f6830_1 - - six=1.15.0=pyh9f0ad1d_0 - - snowballstemmer=2.0.0=py_0 - - sortedcontainers=2.2.2=pyh9f0ad1d_0 - - sphinx=3.2.1=py_0 - - sphinx_rtd_theme=0.5.0=pyh9f0ad1d_0 - - sphinxcontrib-applehelp=1.0.2=py_0 - - sphinxcontrib-devhelp=1.0.2=py_0 - - sphinxcontrib-htmlhelp=1.0.3=py_0 - - sphinxcontrib-jsmath=1.0.1=py_0 - - sphinxcontrib-qthelp=1.0.3=py_0 - - sphinxcontrib-serializinghtml=1.1.4=py_0 - - sqlite=3.33.0=h4cf870e_0 - - tblib=1.6.0=py_0 - - tk=8.6.10=hed695b0_0 - - toml=0.10.1=pyh9f0ad1d_0 - - toolz=0.11.1=py_0 - - tornado=6.0.4=py38h1e0a361_1 - - typing_extensions=3.7.4.2=py_0 - - urllib3=1.25.10=py_0 - - wheel=0.35.1=pyh9f0ad1d_0 - - xorg-fixesproto=5.0=h14c3975_1002 - - xorg-inputproto=2.3.2=h14c3975_1002 - - xorg-kbproto=1.0.7=h14c3975_1002 - - xorg-libice=1.0.10=h516909a_0 - - xorg-libsm=1.2.3=h84519dc_1000 - - xorg-libx11=1.6.12=h516909a_0 - - xorg-libxau=1.0.9=h14c3975_0 - - xorg-libxdmcp=1.1.3=h516909a_0 - - xorg-libxext=1.3.4=h516909a_0 - - xorg-libxfixes=5.0.3=h516909a_1004 - - xorg-libxi=1.7.10=h516909a_0 - - xorg-libxrender=0.9.10=h516909a_1002 - - xorg-libxtst=1.2.3=h516909a_1002 - - xorg-recordproto=1.14.2=h516909a_1002 - - xorg-renderproto=0.11.1=h14c3975_1002 - - xorg-xextproto=7.3.0=h14c3975_1002 - - xorg-xproto=7.0.31=h14c3975_1007 - - xz=5.2.5=h516909a_1 - - yaml=0.2.5=h516909a_0 - - zict=2.0.0=py_0 - - zlib=1.2.11=h516909a_1009 - - zstd=1.4.5=h6597ccf_2 - - pip: - - adagio==0.2.2 - - antlr4-python3-runtime==4.9.1 - - appdirs==1.4.4 - - ciso8601==2.1.3 - - fs==2.4.12 - - fugue[sql]==0.5.3 - - pyarrow==2.0.0 - - qpd==0.2.5 - - sqlalchemy==1.3.22 - - triad==0.5.2 -prefix: /home/nils/anaconda3/envs/dask-sql-docs diff --git a/docs/environment.yml b/docs/environment.yml new file mode 100644 index 000000000..1775a91fe --- /dev/null +++ b/docs/environment.yml @@ -0,0 +1,9 @@ +name: dask-sql-docs +channels: + - conda-forge + - nodefaults +dependencies: + - sphinx>=4.0.0 + - dask-sphinx-theme>=2.0.0 + - maven>=3.6.0 + - fugue>=0.5.3 From 298a25f876239b4b6ac1cb0c9bb23938430b1243 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 7 Feb 2022 19:39:51 -0800 Subject: [PATCH 05/16] Start adding CPU/GPU tabs for code blocks --- dask_sql/context.py | 2 +- docs/environment.yml | 12 +++ docs/source/conf.py | 2 +- docs/source/data_input.rst | 168 +++++++++++++++++++++++++++++-------- docs/source/index.rst | 115 ++++++++++++++++++------- docs/source/quickstart.rst | 55 +++++++++--- 6 files changed, 275 insertions(+), 79 deletions(-) diff --git a/dask_sql/context.py b/dask_sql/context.py index c3a66de6a..2407a8c64 100644 --- a/dask_sql/context.py +++ b/dask_sql/context.py @@ -728,7 +728,7 @@ def run_server( def stop_server(self): # pragma: no cover """ - Stop a SQL server started by ``run_server`. + Stop a SQL server started by ``run_server``. """ if self.sql_server is not None: loop = asyncio.get_event_loop() diff --git a/docs/environment.yml b/docs/environment.yml index 1775a91fe..8a609dd60 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -3,7 +3,19 @@ channels: - conda-forge - nodefaults dependencies: + - python=3.9 - sphinx>=4.0.0 + - sphinx-tabs - dask-sphinx-theme>=2.0.0 - maven>=3.6.0 + - dask>=2021.11.1 + - pandas>=1.0.0 - fugue>=0.5.3 + - jpype1>=1.0.2 + - fastapi>=0.61.1 + - uvicorn>=0.11.3 + - tzlocal>=2.1 + - prompt_toolkit + - pygments + - tabulate + - nest-asyncio diff --git a/docs/source/conf.py b/docs/source/conf.py index 42211bd9f..3784603e7 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -29,7 +29,7 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx_tabs.tabs"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/docs/source/data_input.rst b/docs/source/data_input.rst index 26b503a40..731906bf7 100644 --- a/docs/source/data_input.rst +++ b/docs/source/data_input.rst @@ -15,25 +15,55 @@ Chances are high, there exists already a function to load your favorite format o See below for all formats understood by ``dask-sql``. Make sure to install required libraries both on the driver and worker machines. -.. code-block:: python +.. tabs:: - import dask.dataframe as dd - from dask_sql import Context + .. group-tab:: CPU - c = Context() - df = dd.read_csv("s3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv") + .. code-block:: python + + import dask.dataframe as dd + from dask_sql import Context + + c = Context() + df = dd.read_csv("s3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv") + + c.create_table("my_data", df) + + .. group-tab:: GPU + + .. code-block:: python - c.create_table("my_data", df) + import dask.dataframe as dd + from dask_sql import Context + + c = Context() + df = dd.read_csv("s3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv") + + c.create_table("my_data", df, gpu=True) or in short (equivalent): -.. code-block:: python +.. tabs:: - from dask_sql import Context + .. group-tab:: CPU - c = Context() + .. code-block:: python + + from dask_sql import Context + + c = Context() - c.create_table("my_data", "s3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv") + c.create_table("my_data", "s3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv") + + .. group-tab:: GPU + + .. code-block:: python + + from dask_sql import Context + + c = Context() + + c.create_table("my_data", "s3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv", gpu=True) 2. Load it via SQL ------------------ @@ -41,12 +71,26 @@ or in short (equivalent): If you are connected to the SQL server implementation or you do not want to issue python command calls, you can also achieve the data loading via SQL only. -.. code-block:: sql +.. tabs:: - CREATE TABLE my_data WITH ( - format = 'csv', - location = 's3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv' - ) + .. group-tab:: CPU + + .. code-block:: sql + + CREATE TABLE my_data WITH ( + format = 'csv', + location = 's3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv' + ) + + .. group-tab:: GPU + + .. code-block:: sql + + CREATE TABLE my_data WITH ( + format = 'csv', + location = 's3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv', + gpu = True + ) The parameters are the same as in the python function described above. You can find more information in :ref:`creation`. @@ -68,21 +112,46 @@ and then later register it in the :class:`~dask_sql.Context` via SQL: Later in SQL: -.. code-block:: SQL +.. tabs:: - CREATE TABLE my_data WITH ( - format = 'memory', - location = 'my_ds' - ) + .. group-tab:: CPU + + .. code-block:: SQL + + CREATE TABLE my_data WITH ( + format = 'memory', + location = 'my_ds' + ) + + .. group-tab:: GPU + + .. code-block:: SQL + + CREATE TABLE my_data WITH ( + format = 'memory', + location = 'my_ds', + gpu = True + ) Note, that the format is set to ``memory`` and the location is the name, which was chosen when publishing the dataset. To achieve the same thing from python, you can just use dask's methods to get the dataset -.. code-block:: python +.. tabs:: + + .. group-tab:: CPU + + .. code-block:: python + + df = client.get_dataset("my_df") + c.create_table("my_data", df) + + .. group-tab:: GPU - df = client.get_dataset("my_df") - c.create_table("my_data", df) + .. code-block:: python + + df = client.get_dataset("my_df") + c.create_table("my_data", df, gpu=True) Input Formats @@ -95,22 +164,51 @@ Input Formats The data can be from local disc or many remote locations (S3, hdfs, Azure Filesystem, http, Google Filesystem, ...) - just prefix the path with the matching protocol. Additional arguments passed to :func:`~dask_sql.Context.create_table` or ``CREATE TABLE`` are given to the ``read_`` calls. - Example: +Example: - .. code-block:: python +.. tabs:: - c.create_table("my_data", "s3://bucket-name/my-data-*.csv", - storage_options={'anon': True}) + .. group-tab:: CPU - .. code-block:: sql + .. code-block:: python - CREATE TABLE my_data WITH ( - format = 'csv', -- can also be omitted, as clear from the extension - location = 's3://bucket-name/my-data-*.csv', - storage_options = ( - anon = True - ) - ) + c.create_table( + "my_data", + "s3://bucket-name/my-data-*.csv", + storage_options={'anon': True} + ) + + .. code-block:: sql + + CREATE TABLE my_data WITH ( + format = 'csv', -- can also be omitted, as clear from the extension + location = 's3://bucket-name/my-data-*.csv', + storage_options = ( + anon = True + ) + ) + + .. group-tab:: GPU + + .. code-block:: python + + c.create_table( + "my_data", + "s3://bucket-name/my-data-*.csv", + gpu=True, + storage_options={'anon': True} + ) + + .. code-block:: sql + + CREATE TABLE my_data WITH ( + format = 'csv', -- can also be omitted, as clear from the extension + location = 's3://bucket-name/my-data-*.csv', + gpu = True, + storage_options = ( + anon = True + ) + ) * If your data is already in Pandas (or Dask) DataFrames format, you can just use it as it is via the Python API by giving it to :func:`~dask_sql.Context.create_table` directly. diff --git a/docs/source/index.rst b/docs/source/index.rst index 666aba6b1..2d6fc7d87 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -22,38 +22,89 @@ Example For this example, we use some data loaded from disk and query them with a SQL command from our python code. Any pandas or dask dataframe can be used as input and ``dask-sql`` understands a large amount of formats (csv, parquet, json,...) and locations (s3, hdfs, gcs,...). -.. code-block:: python - - import dask.dataframe as dd - from dask_sql import Context - - # Create a context to hold the registered tables - c = Context() - - # Load the data and register it in the context - # This will give the table a name, that we can use in queries - df = dd.read_csv("...") - c.create_table("my_data", df) - - # Now execute a SQL query. The result is again dask dataframe. - result = c.sql(""" - SELECT - my_data.name, - SUM(my_data.x) - FROM - my_data - GROUP BY - my_data.name - """) - - # Show the result - print(result) - - # Show the result... - print(result.compute()) - - # ... or use it for any other dask calculation - print(result.x.mean().compute()) + +.. tabs:: + + .. group-tab:: CPU + + .. code-block:: python + + from dask_sql import Context + + # Create a context to hold the registered tables + c = Context() + + # Load the data and register it in the context + # This will give the table a name, that we can use in queries + c.sql(""" + CREATE TABLE + my_data + WITH ( + location = '/path/to/data', + format = 'csv' + ) + """) + + # Now execute a SQL query. The result is again dask dataframe. + result = c.sql(""" + SELECT + my_data.name, + SUM(my_data.x) + FROM + my_data + GROUP BY + my_data.name + """) + + # Show the result + print(result) + + # Show the result... + print(result.compute()) + + # ... or use it for any other dask calculation + print(result.x.mean().compute()) + + .. group-tab:: GPU + + .. code-block:: python + + from dask_sql import Context + + # Create a context to hold the registered tables + c = Context() + + # Load the data and register it in the context + # This will give the table a name, that we can use in queries + c.sql(""" + CREATE TABLE + my_data + WITH ( + location = '/path/to/data', + format = 'csv', + gpu = True + ) + """) + + # Now execute a SQL query. The result is again dask dataframe. + result = c.sql(""" + SELECT + my_data.name, + SUM(my_data.x) + FROM + my_data + GROUP BY + my_data.name + """) + + # Show the result + print(result) + + # Show the result... + print(result.compute()) + + # ... or use it for any other dask calculation + print(result.x.mean().compute()) .. toctree:: diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index f91f9ba7e..bbd3a72be 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -15,11 +15,25 @@ However, the real magic of ``dask`` (and ``dask-sql``) comes from the ability to There are `plenty `_ of possibilities to setup a ``dask`` cluster. For local development and testing, you can setup a distributed version of ``dask`` with -.. code-block:: python +.. tabs:: + + .. group-tab:: CPU + + .. code-block:: python + + from dask.distributed import Client + + client = Client() - from dask.distributed import Client + .. group-tab:: GPU - client = Client() + .. code-block:: python + + from dask_cuda import LocalCUDACluster + from dask.distributed import Client + + cluster = LocalCUDACluster() + client = Client(cluster) 1. Data Loading --------------- @@ -44,12 +58,25 @@ If we want to work with the data in SQL, we need to give the data frame a unique We do this by registering the data at an instance of a :class:`~dask_sql.Context`. Typically, you only have a single context per application. -.. code-block:: python +.. tabs:: + + .. group-tab:: CPU - from dask_sql import Context + .. code-block:: python - c = Context() - c.create_table("timeseries", df) + from dask_sql import Context + + c = Context() + c.create_table("timeseries", df) + + .. group-tab:: GPU + + .. code-block:: python + + from dask_sql import Context + + c = Context() + c.create_table("timeseries", df, gpu=True) From now on, the data is accessible as the "timeseries" table of this context. It is possible to register multiple data frames at the same context. @@ -59,11 +86,19 @@ It is possible to register multiple data frames at the same context. If you plan to query the same data multiple times, it might make sense to persist the data before: - .. code-block:: python + .. tabs:: + + .. group-tab:: CPU + + .. code-block:: python + + c.create_table("timeseries", df, persist=True) + + .. group-tab:: GPU - df = df.persist() - c.create_table("timeseries", df) + .. code-block:: python + c.create_table("timeseries", df, persist=True, gpu=True) 3. Run your queries ------------------- From da954b9fa17fa227ace18326cce4f324ab849812 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 8 Feb 2022 08:07:12 -0800 Subject: [PATCH 06/16] Add GPU tabs to ML docs --- docs/source/machine_learning.rst | 39 ++++++++++++++++++++++++-------- tests/integration/test_model.py | 2 +- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/docs/source/machine_learning.rst b/docs/source/machine_learning.rst index 0f7a5a8bb..41a1353f6 100644 --- a/docs/source/machine_learning.rst +++ b/docs/source/machine_learning.rst @@ -68,17 +68,36 @@ which allows you to also call it e.g. from your BI tool. Additionally to the ``PREDICT`` keyword mentioned above, ``dask-sql`` also has a way to create and train a model from SQL: -.. code-block:: sql +.. tabs:: - CREATE MODEL my_model WITH ( - model_class = 'sklearn.ensemble.GradientBoostingClassifier', - wrap_predict = True, - target_column = 'target' - ) AS ( - SELECT x, y, target - FROM timeseries - LIMIT 100 - ) + .. group-tab:: CPU + + .. code-block:: sql + + CREATE MODEL my_model WITH ( + model_class = 'sklearn.ensemble.GradientBoostingClassifier', + wrap_predict = True, + target_column = 'target' + ) AS ( + SELECT x, y, target + FROM timeseries + LIMIT 100 + ) + + .. group-tab:: GPU + + .. code-block:: sql + + CREATE MODEL my_model WITH ( + model_class = 'cuml.linear_model.LogisticRegression', + wrap_predict = True, + wrap_fit = False, -- can we explain why this needs to be added? + target_column = 'target' + ) AS ( + SELECT x, y, target + FROM timeseries + LIMIT 100 + ) This call will create a new instance of ``sklearn.ensemble.GradientBoostingClassifier`` and train it with the data collected from the ``SELECT`` call (again, every valid ``SELECT`` diff --git a/tests/integration/test_model.py b/tests/integration/test_model.py index a63eb6bf2..042800711 100644 --- a/tests/integration/test_model.py +++ b/tests/integration/test_model.py @@ -841,7 +841,7 @@ def test_experiment_automl_classifier(c, client, training_df): check_trained_model(c, "my_automl_exp1") -def test_experiement_automl_regressor(c, client, training_df): +def test_experiment_automl_regressor(c, client, training_df): tpot = pytest.importorskip("tpot", reason="tpot not installed") # test regressor c.sql( From 1de3e9c9a6cf9ff11274602e8f0a84864079f428 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Tue, 8 Feb 2022 11:27:23 -0800 Subject: [PATCH 07/16] dask_ml_docs_fix --- docs/source/machine_learning.rst | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/source/machine_learning.rst b/docs/source/machine_learning.rst index 41a1353f6..45606b7c6 100644 --- a/docs/source/machine_learning.rst +++ b/docs/source/machine_learning.rst @@ -75,34 +75,35 @@ create and train a model from SQL: .. code-block:: sql CREATE MODEL my_model WITH ( - model_class = 'sklearn.ensemble.GradientBoostingClassifier', + model_class = 'sklearn.linear_model.LogisticRegression', wrap_predict = True, target_column = 'target' ) AS ( - SELECT x, y, target + SELECT x, y, x*y > 0 as target FROM timeseries LIMIT 100 ) .. group-tab:: GPU - + .. code-block:: sql CREATE MODEL my_model WITH ( model_class = 'cuml.linear_model.LogisticRegression', wrap_predict = True, - wrap_fit = False, -- can we explain why this needs to be added? target_column = 'target' ) AS ( - SELECT x, y, target + SELECT x, y, x*y > 0 as target FROM timeseries LIMIT 100 ) -This call will create a new instance of ``sklearn.ensemble.GradientBoostingClassifier`` +This call will create a new instance of ``linear_model.LogisticRegression`` and train it with the data collected from the ``SELECT`` call (again, every valid ``SELECT`` query can be given). The model can than be used in subsequent calls to ``PREDICT`` -using the given name. +using the given name. +We set ``wrap_predict`` = ``True`` here to parallelize post fit prediction task of non distributed models (sklearn/cuML etc) across workers. + Have a look into :ref:`ml` for more information. 4. Check Model parameters - Model meta data From af3ba023b2c29c30f534f6b74491e6d936beb0d9 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 9 Feb 2022 09:28:16 -0800 Subject: [PATCH 08/16] Run pre-commit hooks --- docs/source/machine_learning.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/machine_learning.rst b/docs/source/machine_learning.rst index 45606b7c6..119f29620 100644 --- a/docs/source/machine_learning.rst +++ b/docs/source/machine_learning.rst @@ -85,7 +85,7 @@ create and train a model from SQL: ) .. group-tab:: GPU - + .. code-block:: sql CREATE MODEL my_model WITH ( @@ -98,11 +98,11 @@ create and train a model from SQL: LIMIT 100 ) -This call will create a new instance of ``linear_model.LogisticRegression`` +This call will create a new instance of ``linear_model.LogisticRegression`` and train it with the data collected from the ``SELECT`` call (again, every valid ``SELECT`` query can be given). The model can than be used in subsequent calls to ``PREDICT`` -using the given name. -We set ``wrap_predict`` = ``True`` here to parallelize post fit prediction task of non distributed models (sklearn/cuML etc) across workers. +using the given name. +We set ``wrap_predict`` = ``True`` here to parallelize post fit prediction task of non distributed models (sklearn/cuML etc) across workers. Have a look into :ref:`ml` for more information. From 05a671852489c631b01f9d5208bf28ec7b180f41 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 9 Feb 2022 09:59:26 -0800 Subject: [PATCH 09/16] Add TODOs to ML docs --- docs/source/machine_learning.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/machine_learning.rst b/docs/source/machine_learning.rst index 119f29620..fac0daacb 100644 --- a/docs/source/machine_learning.rst +++ b/docs/source/machine_learning.rst @@ -128,6 +128,9 @@ in SQL using below SQL syntax, choose different tuners from the dask_ml package based on memory and compute constraints and for more details refer to the `dask ml documentation `_ +.. + TODO - add a GPU section to these examples once we have working CREATE EXPERIMENT tests for GPU + .. code-block:: sql CREATE EXPERIMENT my_exp WITH ( @@ -194,6 +197,9 @@ and used by dask-sql for training, prediction and exporting the model through standard sklearn interface +.. + TODO - add a GPU section to these examples once we have working EXPORT MODEL tests for GPU + .. code-block:: sql -- for pickle model serialization @@ -226,6 +232,9 @@ The following SQL-only code gives an example on how the commands can play togeth We assume that you have created/registered a table "my_data" with the numerical columns ``x`` and ``y`` and the boolean target ``label``. +.. + TODO - add a GPU section to these examples once we have working CREATE EXPERIMENT tests for GPU + .. code-block:: sql -- First, we create a new feature z out of x and y. From 44de2d84f4c3b3fa4e43c4fa790765f91d4e30b6 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Fri, 11 Feb 2022 09:54:40 -0800 Subject: [PATCH 10/16] Start updating docs around code blocks --- docs/source/data_input.rst | 5 +-- docs/source/index.rst | 78 ++++++++++++++------------------------ docs/source/quickstart.rst | 26 ++++++------- 3 files changed, 43 insertions(+), 66 deletions(-) diff --git a/docs/source/data_input.rst b/docs/source/data_input.rst index 731906bf7..7bbae23f2 100644 --- a/docs/source/data_input.rst +++ b/docs/source/data_input.rst @@ -4,8 +4,7 @@ Data Loading and Input ====================== Before data can be queried with ``dask-sql``, it needs to be loaded into the dask cluster (or local instance) and registered with the :class:`~dask_sql.Context`. -For this, ``dask-sql`` uses the wide field of possible `input formats `_ of ``dask``, plus some additional formats only suitable for `dask-sql`. -You have multiple possibilities to load input data in ``dask-sql``: +``dask-sql`` supports all ``dask``-compatible `input formats `_, plus some additional formats only suitable for ``dask-sql``. 1. Load it via python --------------------- @@ -13,7 +12,7 @@ You have multiple possibilities to load input data in ``dask-sql``: You can either use already created dask dataframes or create one by using the :func:`~dask_sql.Context.create_table` function. Chances are high, there exists already a function to load your favorite format or location (e.g. s3 or hdfs). See below for all formats understood by ``dask-sql``. -Make sure to install required libraries both on the driver and worker machines. +Make sure to install required libraries both on the driver and worker machines: .. tabs:: diff --git a/docs/source/index.rst b/docs/source/index.rst index 2d6fc7d87..580c6ec32 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -19,9 +19,8 @@ if you need it. Example ------- -For this example, we use some data loaded from disk and query them with a SQL command from our python code. -Any pandas or dask dataframe can be used as input and ``dask-sql`` understands a large amount of formats (csv, parquet, json,...) and locations (s3, hdfs, gcs,...). - +For this example, we use some data loaded from disk and query it with a SQL command. +``dask-sql`` accepts any pandas, cuDF, or dask dataframe as input and is able to read data directly from a variety of storage formats (csv, parquet, json) and file systems (s3, hdfs, gcs): .. tabs:: @@ -29,82 +28,61 @@ Any pandas or dask dataframe can be used as input and ``dask-sql`` understands a .. code-block:: python + import dask.datasets from dask_sql import Context - # Create a context to hold the registered tables + # create a context to register tables c = Context() - # Load the data and register it in the context - # This will give the table a name, that we can use in queries - c.sql(""" - CREATE TABLE - my_data - WITH ( - location = '/path/to/data', - format = 'csv' - ) - """) + # create a table and register it in the context + df = dask.datasets.timeseries() + c.create_table("timeseries", df) - # Now execute a SQL query. The result is again dask dataframe. + # execute a SQL query; the result is a "lazy" Dask dataframe result = c.sql(""" SELECT - my_data.name, - SUM(my_data.x) + name, SUM(x) as "sum" FROM - my_data + timeseries GROUP BY - my_data.name + name """) - # Show the result - print(result) - - # Show the result... - print(result.compute()) + # actually compute the query... + result.compute() - # ... or use it for any other dask calculation - print(result.x.mean().compute()) + # ...or use it for another computation + result.sum.mean().compute() .. group-tab:: GPU .. code-block:: python + import dask.datasets from dask_sql import Context - # Create a context to hold the registered tables + # create a context to register tables c = Context() - # Load the data and register it in the context - # This will give the table a name, that we can use in queries - c.sql(""" - CREATE TABLE - my_data - WITH ( - location = '/path/to/data', - format = 'csv', - gpu = True - ) - """) + # create a table and register it in the context + df = dask.datasets.timeseries() + c.create_table("timeseries", df, gpu=True) - # Now execute a SQL query. The result is again dask dataframe. + # execute a SQL query; the result is a "lazy" Dask dataframe result = c.sql(""" SELECT - my_data.name, - SUM(my_data.x) + name, SUM(x) as "sum" FROM - my_data + timeseries GROUP BY - my_data.name + name """) - # Show the result - print(result) - - # Show the result... - print(result.compute()) + # actually compute the query... + result.compute() - # ... or use it for any other dask calculation - print(result.x.mean().compute()) + # ...or use it for another computation + result.sum.mean().compute() .. toctree:: diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index bbd3a72be..2e4240415 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -10,10 +10,9 @@ Run the following code in an interactive python session, a python script or a ju 0. Cluster Setup ---------------- -If you just want to try out ``dask-sql`` quickly, you can skip this step at first. -However, the real magic of ``dask`` (and ``dask-sql``) comes from the ability to scale the computations over multiple machines. -There are `plenty `_ of possibilities to setup a ``dask`` cluster. -For local development and testing, you can setup a distributed version of ``dask`` with +If you just want to try out ``dask-sql`` quickly, this step can be skipped. +However, the real magic of ``dask`` (and ``dask-sql``) comes from the ability to scale the computations over multiple cores and/or machines. +For local development and testing, a Distributed ``LocalCluster`` (or, if using GPUs, a `Dask-CUDA `_ ``LocalCUDACluster``) can be deployed and a client connected to it like so: .. tabs:: @@ -21,27 +20,29 @@ For local development and testing, you can setup a distributed version of ``dask .. code-block:: python - from dask.distributed import Client + from distributed import Client, LocalCluster - client = Client() + cluster = LocalCluster() + client = Client(cluster) .. group-tab:: GPU .. code-block:: python from dask_cuda import LocalCUDACluster - from dask.distributed import Client + from distributed import Client cluster = LocalCUDACluster() client = Client(cluster) +There are several options for deploying clusters depending on the platform being used and the resources available; see `Dask - Deploying Clusters `_ for more information. + 1. Data Loading --------------- Before querying the data, you need to create a ``dask`` `data frame `_ containing the data. ``dask`` understands many different `input formats `_ and sources. - -In this example, we do not read in external data, but use test data in the form of random event time series. +In this example, we do not read in external data, but use test data in the form of random event time series: .. code-block:: python @@ -55,8 +56,7 @@ Read more on the data input part in :ref:`data_input`. -------------------- If we want to work with the data in SQL, we need to give the data frame a unique name. -We do this by registering the data at an instance of a :class:`~dask_sql.Context`. -Typically, you only have a single context per application. +We do this by registering the data in an instance of a :class:`~dask_sql.Context`: .. tabs:: @@ -78,8 +78,8 @@ Typically, you only have a single context per application. c = Context() c.create_table("timeseries", df, gpu=True) -From now on, the data is accessible as the "timeseries" table of this context. -It is possible to register multiple data frames at the same context. +From now on, the data is accessible as the ``timeseries`` table of this context. +It is possible to register multiple data frames in the same context. .. hint:: From 3e6f981c5d563e0c0deca40b2ef8dce82a006792 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 15 Mar 2022 12:10:00 -0700 Subject: [PATCH 11/16] Disable collapsible tabs --- docs/source/conf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/conf.py b/docs/source/conf.py index 137bcfe7f..a314915b3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -62,3 +62,6 @@ # Do not show type mappings autodoc_typehints = "none" + +# disable collapsible tabs +sphinx_tabs_disable_tab_closing = True From 8110a230cb16d228442c58e1a91a930689339c8d Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 15 Mar 2022 12:46:26 -0700 Subject: [PATCH 12/16] Ignore docs build directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 4bfc739f1..947f81393 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,4 @@ dask_sql/jar .next/ dask-worker-space/ node_modules/ +docs/source/_build/ From 8e2c116c1fcf86e10ccf79dceae8d4a13514bc7d Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 15 Mar 2022 12:47:07 -0700 Subject: [PATCH 13/16] Use pip for RTD builds --- .readthedocs.yaml | 15 ++++++++++----- docs/environment.yml | 21 --------------------- docs/requirements-docs.txt | 15 +++++++++++++++ 3 files changed, 25 insertions(+), 26 deletions(-) delete mode 100644 docs/environment.yml create mode 100644 docs/requirements-docs.txt diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 582ddbeee..43ede12b1 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,10 +1,15 @@ +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 +build: + os: ubuntu-20.04 + tools: + python: "3.8" + +sphinx: + configuration: docs/source/conf.py python: - version: 3.8 install: - - method: setuptools + - requirements: docs/requirements-docs.txt + - method: pip path: . - -conda: - environment: docs/environment.yaml diff --git a/docs/environment.yml b/docs/environment.yml deleted file mode 100644 index e67ec890f..000000000 --- a/docs/environment.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: dask-sql-docs -channels: - - conda-forge - - nodefaults -dependencies: - - python=3.9 - - sphinx>=4.0.0 - - sphinx-tabs - - dask-sphinx-theme>=2.0.3 - - maven>=3.6.0 - - dask>=2021.11.1 - - pandas>=1.0.0 - - fugue>=0.5.3 - - jpype1>=1.0.2 - - fastapi>=0.61.1 - - uvicorn>=0.11.3 - - tzlocal>=2.1 - - prompt_toolkit - - pygments - - tabulate - - nest-asyncio diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt new file mode 100644 index 000000000..ef3f19382 --- /dev/null +++ b/docs/requirements-docs.txt @@ -0,0 +1,15 @@ +sphinx>=4.0.0 +sphinx-tabs +dask-sphinx-theme>=2.0.3 +maven>=3.6.0 +dask>=2021.11.1 +pandas>=1.0.0 +fugue>=0.5.3 +jpype1>=1.0.2 +fastapi>=0.61.1 +uvicorn>=0.11.3 +tzlocal>=2.1 +prompt_toolkit +pygments +tabulate +nest-asyncio From 98718c7e76bb16aca1c5acc94dbab57dacaac6fb Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 15 Mar 2022 12:53:29 -0700 Subject: [PATCH 14/16] Attempt to install maven through apt for RTD builds --- .readthedocs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 43ede12b1..3b3682543 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -4,6 +4,8 @@ build: os: ubuntu-20.04 tools: python: "3.8" + apt_packages: + - maven sphinx: configuration: docs/source/conf.py From 83a311eda01d9b076bbb19477e8e5cfed6bdeec3 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 15 Mar 2022 13:13:58 -0700 Subject: [PATCH 15/16] Remove maven from pip requirements --- docs/requirements-docs.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt index ef3f19382..7d9e2e6cc 100644 --- a/docs/requirements-docs.txt +++ b/docs/requirements-docs.txt @@ -1,7 +1,6 @@ sphinx>=4.0.0 sphinx-tabs dask-sphinx-theme>=2.0.3 -maven>=3.6.0 dask>=2021.11.1 pandas>=1.0.0 fugue>=0.5.3 From 9d804fb6fad7f5fe29c262fbe78bd07a5ba8b87a Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 15 Mar 2022 13:42:28 -0700 Subject: [PATCH 16/16] Add conda environment for convenience --- docs/environment.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 docs/environment.yml diff --git a/docs/environment.yml b/docs/environment.yml new file mode 100644 index 000000000..e67ec890f --- /dev/null +++ b/docs/environment.yml @@ -0,0 +1,21 @@ +name: dask-sql-docs +channels: + - conda-forge + - nodefaults +dependencies: + - python=3.9 + - sphinx>=4.0.0 + - sphinx-tabs + - dask-sphinx-theme>=2.0.3 + - maven>=3.6.0 + - dask>=2021.11.1 + - pandas>=1.0.0 + - fugue>=0.5.3 + - jpype1>=1.0.2 + - fastapi>=0.61.1 + - uvicorn>=0.11.3 + - tzlocal>=2.1 + - prompt_toolkit + - pygments + - tabulate + - nest-asyncio