Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion elyra/airflow/bootstrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from abc import ABC
from abc import abstractmethod
import glob
import json
import logging
import os
import subprocess
Expand Down Expand Up @@ -218,10 +219,14 @@ def execute(self) -> None:
try:
OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'")
t0 = time.time()

# Include kernel selection in execution time
kernel_name = NotebookFileOp.find_best_kernel(notebook)

# Really hate to do this but have to invoke Papermill via library as workaround
import papermill

papermill.execute_notebook(notebook, notebook_output)
papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name)
duration = time.time() - t0
OpUtil.log_operation_info("notebook execution completed", duration)

Expand Down Expand Up @@ -262,6 +267,52 @@ def convert_notebook_to_html(notebook_file: str, html_file: str) -> str:
OpUtil.log_operation_info(f"{notebook_file} converted to {html_file}", duration)
return html_file

@staticmethod
def find_best_kernel(notebook_file: str) -> str:
"""Determines the best kernel to use via the following algorithm:

1. Loads notebook and gets kernel_name and kernel_language from NB metadata.
2. Gets the list of configured kernels using KernelSpecManager.
3. If notebook kernel_name is in list, use that, else
4. If not found, load each configured kernel.json file and find a language match.
5. On first match, log info message regarding the switch and use that kernel.
6. If no language match is found, revert to notebook kernel and log warning message.
"""
from jupyter_client.kernelspec import KernelSpecManager
import nbformat

nb = nbformat.read(notebook_file, 4)

nb_kspec = nb.metadata.kernelspec
nb_kernel_name = nb_kspec.get("name")
nb_kernel_lang = nb_kspec.get("language")

kernel_specs = KernelSpecManager().find_kernel_specs()

# see if we have a direct match...
if nb_kernel_name in kernel_specs.keys():
return nb_kernel_name

# no match found for kernel, try matching language...
for name, file in kernel_specs.items():
# load file (JSON) and pick out language, if match, use first found
with open(os.path.join(file, "kernel.json")) as f:
kspec = json.load(f)
if kspec.get("language").lower() == nb_kernel_lang.lower():
matched_kernel = os.path.basename(file)
logger.info(
f"Matched kernel by language ({nb_kernel_lang}), using kernel "
f"'{matched_kernel}' instead of the missing kernel '{nb_kernel_name}'."
)
return matched_kernel

# no match found for language, return notebook kernel and let execution fail
logger.warning(
f"Reverting back to missing notebook kernel '{nb_kernel_name}' since no "
f"language match ({nb_kernel_lang}) was found in current kernel specifications."
)
return nb_kernel_name


class PythonFileOp(FileOpBase):
"""Perform Python File Operation"""
Expand Down
66 changes: 66 additions & 0 deletions elyra/tests/airflow/resources/test-notebookA.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TEST_ENV_VAR1: None\n"
]
},
{
"data": {
"text/plain": [
"'test-file/test,file/test,file-copy.txt'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"from shutil import copyfile\n",
"\n",
"print(f\"TEST_ENV_VAR1: {os.getenv('TEST_ENV_VAR1')}\")\n",
"\n",
"os.makedirs(\"test-file/test,file\", exist_ok=True)\n",
"copyfile(\"test-file.txt\", \"test-file/test-file-copy.txt\")\n",
"copyfile(\"test,file.txt\", \"test-file/test,file/test,file-copy.txt\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
72 changes: 72 additions & 0 deletions elyra/tests/airflow/test_bootstrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#
# Copyright 2018-2023 Elyra Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import logging
import os

import nbformat

from elyra.airflow import bootstrapper

RESOURCES_DIR = os.path.join(os.path.dirname(__file__), "resources")


def test_find_best_kernel_nb(tmpdir):
source_nb_file = os.path.join(RESOURCES_DIR, "test-notebookA.ipynb")
nb_file = os.path.join(tmpdir, "test-notebookA.ipynb")

# "Copy" nb file to destination - this test does not update the kernel or language.
nb = nbformat.read(source_nb_file, 4)
nbformat.write(nb, nb_file)

with tmpdir.as_cwd():
kernel_name = bootstrapper.NotebookFileOp.find_best_kernel(nb_file)
assert kernel_name == nb.metadata.kernelspec["name"]


def test_find_best_kernel_lang(tmpdir, caplog):
caplog.set_level(logging.INFO)
source_nb_file = os.path.join(RESOURCES_DIR, "test-notebookA.ipynb")
nb_file = os.path.join(tmpdir, "test-notebookA.ipynb")

# "Copy" nb file to destination after updating the kernel name - forcing a language match
nb = nbformat.read(source_nb_file, 4)
nb.metadata.kernelspec["name"] = "test-kernel"
nb.metadata.kernelspec["language"] = "PYTHON" # test case-insensitivity
nbformat.write(nb, nb_file)

with tmpdir.as_cwd():
kernel_name = bootstrapper.NotebookFileOp.find_best_kernel(nb_file)
assert kernel_name == "python3"
assert len(caplog.records) == 1
assert caplog.records[0].message.startswith("Matched kernel by language (PYTHON)")


def test_find_best_kernel_nomatch(tmpdir, caplog):
source_nb_file = os.path.join(RESOURCES_DIR, "test-notebookA.ipynb")
nb_file = os.path.join(tmpdir, "test-notebookA.ipynb")

# "Copy" nb file to destination after updating the kernel name and language - forcing use of updated name
nb = nbformat.read(source_nb_file, 4)
nb.metadata.kernelspec["name"] = "test-kernel"
nb.metadata.kernelspec["language"] = "test-language"
nbformat.write(nb, nb_file)

with tmpdir.as_cwd():
kernel_name = bootstrapper.NotebookFileOp.find_best_kernel(nb_file)
assert kernel_name == "test-kernel"
assert len(caplog.records) == 1
assert caplog.records[0].message.startswith("Reverting back to missing notebook kernel 'test-kernel'")