dask-contrib · charlesbluca · Nov 12, 2021 · Nov 10, 2021 · Nov 12, 2021 · Nov 12, 2021
@@ -0,0 +1,56 @@
+# type: ignore
+import argparse
+import itertools
+import pathlib
+import textwrap
+
+parser = argparse.ArgumentParser()
+parser.add_argument("filepaths", nargs="+", type=pathlib.Path)
+args = parser.parse_args()
+
+filepaths = sorted(p for p in args.filepaths if p.is_file())
+
+
+def extract_short_test_summary_info(lines):
+    up_to_start_of_section = itertools.dropwhile(
+        lambda l: "=== short test summary info ===" not in l, lines,
+    )
+    up_to_section_content = itertools.islice(up_to_start_of_section, 1, None)
+    section_content = itertools.takewhile(
+        lambda l: l.startswith("FAILED") or l.startswith("ERROR"), up_to_section_content
+    )
+    content = "\n".join(section_content)
+
+    return content
+
+
+def format_log_message(path):
+    py_version = path.name.split("-")[1]
+    summary = f"Python {py_version} Test Summary Info"
+    with open(path) as f:
+        data = extract_short_test_summary_info(line.rstrip() for line in f)
+    message = (
+        textwrap.dedent(
+            """\
+            <details><summary>{summary}</summary>
+
+            ```
+            {data}
+            ```
+
+            </details>
+            """
+        )
+        .rstrip()
+        .format(summary=summary, data=data)
+    )
+
+    return message
+
+
+print("Parsing logs ...")
+message = "\n\n".join(format_log_message(path) for path in filepaths)
+
+output_file = pathlib.Path("pytest-logs.txt")
+print(f"Writing output file to: {output_file.absolute()}")
+output_file.write_text(message)
@@ -0,0 +1,188 @@
+name: Scheduled testing
+on:
+  schedule:
+    - cron: "0 0 * * *" # Daily “At 00:00” UTC
+  workflow_dispatch: # allows you to trigger the workflow run manually
+
+jobs:
+  build:
+    # This build step should be similar to the deploy build, to make sure we actually test
+    # the future deployable
+    name: "Build the jar on ubuntu"
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Cache local Maven repository
+        uses: actions/cache@v2
+        with:
+          path: ~/.m2/repository
+          key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }}
+      - name: Set up Python
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          miniforge-variant: Mambaforge
+          use-mamba: true
+          python-version: 3.8
+          activate-environment: dask-sql
+          environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml
+      - name: Install dependencies and build the jar
+        shell: bash -l {0}
+        run: |
+          python setup.py java
+      - name: Upload the jar
+        uses: actions/upload-artifact@v1
+        with:
+          name: jar
+          path: dask_sql/jar/DaskSQL.jar
+
+  test-dev:
+    name: "Test upstream dev (${{ matrix.os }}, java: ${{ matrix.java }}, python: ${{ matrix.python }})"
+    needs: build
+    runs-on: ${{ matrix.os }}
+    env:
+      CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-jdk${{ matrix.java }}-dev.yaml
+    defaults:
+      run:
+        shell: bash -l {0}
+    strategy:
+      fail-fast: false
+      matrix:
+        java: [8, 11]
+        os: [ubuntu-latest, windows-latest]
+        python: [3.7, 3.8]
+    outputs:
+      artifacts_availability: ${{ steps.status.outputs.ARTIFACTS_AVAILABLE }}
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0 # Fetch all history for all branches and tags.
+      - name: Cache local Maven repository
+        uses: actions/cache@v2
+        with:
+          path: ~/.m2/repository
+          key: ${{ runner.os }}-maven-v1-jdk${{ matrix.java }}-${{ hashFiles('**/pom.xml') }}
+      - name: Set up Python
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          miniforge-variant: Mambaforge
+          use-mamba: true
+          python-version: ${{ matrix.python }}
+          activate-environment: dask-sql
+          environment-file: ${{ env.CONDA_FILE }}
+      - name: Download the pre-build jar
+        uses: actions/download-artifact@v1
+        with:
+          name: jar
+          path: dask_sql/jar/
+      - name: Install hive testing dependencies for Linux
+        run: |
+          mamba install -c conda-forge sasl>=0.3.1
+          docker pull bde2020/hive:2.3.2-postgresql-metastore
+          docker pull bde2020/hive-metastore-postgresql:2.3.0
+        if: matrix.os == 'ubuntu-latest'
+      - name: Set proper JAVA_HOME for Windows
+        run: |
+          echo "JAVA_HOME=${{ env.CONDA }}\envs\dask-sql\Library" >> $GITHUB_ENV
+        if: matrix.os == 'windows-latest'
+      - name: Install upstream dev Dask
+        run: |
+          python -m pip install --no-deps git+https://github.com/dask/dask
+          python -m pip install --no-deps git+https://github.com/dask/distributed
+      - name: Test with pytest
+        if: success()
+        run: |
+          set -euo pipefail
+          pytest -n auto tests --dist loadfile | tee output-${{ matrix.python-version }}-log || (
+              echo '::set-output name=ARTIFACTS_AVAILABLE::true' && false
+          )
+      - name: Upload artifacts
+        if: |
+          failure()
+          && steps.status.outcome == 'failure'
+          && github.event_name == 'schedule'
+          && github.repository == 'dask-contrib/dask-sql'
+        uses: actions/upload-artifact@v2
+        with:
+          name: output-${{ matrix.os }}-py${{ matrix.python }}-jdk${{ os.java }}-log
+          path: output-${{ matrix.os }}-py${{ matrix.python }}-jdk${{ os.java }}-log
+          retention-days: 5
+
+  report-failures:
+    name: Open issue for upstream dev failures
+    needs: test
+    if: |
+      failure()
+      && github.event_name == 'schedule'
+      && needs.upstream-dev.outputs.artifacts_availability == 'true'
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: "3.x"
+      - uses: actions/download-artifact@v2
+        with:
+          path: /tmp/workspace/logs
+      - name: Move all log files into a single directory
+        run: |
+          rsync -a /tmp/workspace/logs/output-*/ ./logs
+          ls -R ./logs
+      - name: Parse logs
+        run: |
+          shopt -s globstar
+          python .github/workflows/parse_logs.py logs/**/*-log
+      - name: Report failures
+        uses: actions/github-script@v5
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const fs = require('fs');
+            const pytest_logs = fs.readFileSync('pytest-logs.txt', 'utf8');
+            const title = "⚠️ Nightly upstream-dev CI failed ⚠️"
+            const workflow_url = `https://github.com/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`
+            const issue_body = `[Workflow Run URL](${workflow_url})\n${pytest_logs}`
+
+            // Run GraphQL query against GitHub API to find the most recent open issue used for reporting failures
+            const query = `query($owner:String!, $name:String!, $creator:String!, $label:String!){
+              repository(owner: $owner, name: $name) {
+                issues(first: 1, states: OPEN, filterBy: {createdBy: $creator, labels: [$label]}, orderBy: {field: CREATED_AT, direction: DESC}) {
+                  edges {
+                    node {
+                      body
+                      id
+                      number
+                    }
+                  }
+                }
+              }
+            }`;
+
+            const variables = {
+                owner: context.repo.owner,
+                name: context.repo.repo,
+                label: 'CI',
+                creator: "github-actions[bot]"
+            }
+            const result = await github.graphql(query, variables)
+
+            // If no issue is open, create a new issue,
+            // else update the body of the existing issue.
+            if (result.repository.issues.edges.length === 0) {
+                github.rest.issues.create({
+                    owner: variables.owner,
+                    repo: variables.name,
+                    body: issue_body,
+                    title: title,
+                    labels: [variables.label]
+                })
+            } else {
+                github.rest.issues.update({
+                    owner: variables.owner,
+                    repo: variables.name,
+                    issue_number: result.repository.issues.edges[0].node.number,
+                    body: issue_body
+                })
+            }
@@ -16,6 +16,23 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  detect-ci-trigger:
+    name: Check for upstream dev Dask trigger phrase
+    runs-on: ubuntu-latest
+    if: |
+      github.repository == 'dask-contrib/dask-sql'
+      && (github.event_name == 'push' || github.event_name == 'pull_request')
+    outputs:
+      triggered: ${{ steps.detect-trigger.outputs.trigger-found }}
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 2
+      - uses: xarray-contrib/[email protected]
+        id: detect-trigger
+        with:
+          keyword: "[test-upstream]"
+
   build:
     # This build step should be similar to the deploy build, to make sure we actually test
     # the future deployable
@@ -48,7 +65,7 @@ jobs:
 
   test:
     name: "Test (${{ matrix.os }}, java: ${{ matrix.java }}, python: ${{ matrix.python }})"
-    needs: build
+    needs: [detect-ci-trigger, build]
     runs-on: ${{ matrix.os }}
     env:
       CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-jdk${{ matrix.java }}-dev.yaml
@@ -89,6 +106,11 @@ jobs:
         run: |
           echo "JAVA_HOME=${{ env.CONDA }}\envs\dask-sql\Library" >> $GITHUB_ENV
         if: matrix.os == 'windows-latest'
+      - name: Optionally install upstream dev Dask
+        if: needs.detect-ci-trigger.outputs.triggered == 'true'
+        run: |
+          python -m pip install --no-deps git+https://github.com/dask/dask
+          python -m pip install --no-deps git+https://github.com/dask/distributed
       - name: Test with pytest
         shell: bash -l {0}
         run: |
@@ -108,7 +130,7 @@ jobs:
 
   cluster:
     name: "Test in a dask cluster"
-    needs: build
+    needs: [detect-ci-trigger, build]
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
@@ -138,6 +160,11 @@ jobs:
           which python
           pip list
           mamba list
+      - name: Optionally install upstream dev Dask
+        if: needs.detect-ci-trigger.outputs.triggered == 'true'
+        run: |
+          python -m pip install --no-deps git+https://github.com/dask/dask
+          python -m pip install --no-deps git+https://github.com/dask/distributed
       - name: run a dask cluster
         shell: bash -l {0}
         run: |
@@ -157,7 +184,7 @@ jobs:
 
   import:
     name: "Test importing with bare requirements"
-    needs: build
+    needs: [detect-ci-trigger, build]
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
@@ -186,6 +213,11 @@ jobs:
           which python
           pip list
           mamba list
+      - name: Optionally install upstream dev Dask
+        if: needs.detect-ci-trigger.outputs.triggered == 'true'
+        run: |
+          python -m pip install --no-deps git+https://github.com/dask/dask
+          python -m pip install --no-deps git+https://github.com/dask/distributed
       - name: Try to import dask-sql
         shell: bash -l {0}
         run: |