diff --git a/.flake8 b/.flake8 deleted file mode 100644 index f9bda5354f..0000000000 --- a/.flake8 +++ /dev/null @@ -1,25 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION. -# SPDX-License-Identifier: Apache-2.0 - -[flake8] -filename = *.py, *.pyx, *.pxd, *.pxi -exclude = __init__.py, *.egg, build, docs, .git -force-check = True -ignore = - # line break before binary operator - W503, - # whitespace before : - E203 -per-file-ignores = - # Rules ignored only in Cython: - # E211: whitespace before '(' (used in multi-line imports) - # E225: Missing whitespace around operators (breaks cython casting syntax like ) - # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*) - # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax) - # E275: Missing whitespace after keyword (Doesn't work with Cython except?) - # E402: invalid syntax (works for Python, not Cython) - # E999: invalid syntax (works for Python, not Cython) - # W504: line break after binary operator (breaks lines that end with a pointer) - *.pyx: E211, E225, E226, E227, E275, E402, E999, W504 - *.pxd: E211, E225, E226, E227, E275, E402, E999, W504 - *.pxi: E211, E225, E226, E227, E275, E402, E999, W504 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b0c510d532..d5b622c061 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,23 +17,13 @@ repos: # project can specify its own first/third-party packages. args: ["--config-root=python/", "--resolve-all-configs"] files: python/.* - types_or: [python, cython, pyi] - - repo: https://github.com/psf/black - rev: 22.3.0 + types: [cython] + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.14.3 hooks: - - id: black - files: python/.* - # Explicitly specify the pyproject.toml at the repo root, not per-project. - args: ["--config", "pyproject.toml"] - - repo: https://github.com/PyCQA/flake8 - rev: 7.1.1 - hooks: - - id: flake8 - args: ["--config=.flake8"] - files: python/.*$ - types: [file] - types_or: [python, cython] - additional_dependencies: ["flake8-force"] + - id: ruff-check + args: [--fix] + - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy rev: 'v0.971' hooks: @@ -110,7 +100,7 @@ repos: ^CHANGELOG[.]md$| ^cpp/cmake/patches/cutlass/build-export[.]patch$ - repo: https://github.com/rapidsai/pre-commit-hooks - rev: v1.2.0 + rev: v1.2.1 hooks: - id: verify-copyright name: verify-copyright-cuvs diff --git a/cpp/scripts/analyze_nvcc_log.py b/cpp/scripts/analyze_nvcc_log.py index 823c4f8e3e..936b5b1751 100755 --- a/cpp/scripts/analyze_nvcc_log.py +++ b/cpp/scripts/analyze_nvcc_log.py @@ -1,15 +1,15 @@ #!/usr/bin/env python3 -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import sys import pandas as pd -import numpy as np import matplotlib.pyplot as plt import seaborn as sns from pathlib import Path from matplotlib import colors + def main(input_path): input_path = Path(input_path) print("-- loading data") @@ -22,40 +22,56 @@ def main(input_path): df["file"] = df["source file name"] df["phase"] = df["phase name"].str.strip() - dfp = (df - # Remove nvcc driver entries. They don't contain a source file name - .query("phase!='nvcc (driver)'") - # Make a pivot table containing files as row, phase (preprocessing, - # cicc, etc.) as column and the total times as table entries. NOTE: - # if compiled for multiple archs, the archs will be summed. - .pivot_table(index="file", values="seconds", columns="phase", aggfunc='sum')) + dfp = ( + df + # Remove nvcc driver entries. They don't contain a source file name + .query("phase!='nvcc (driver)'") + # Make a pivot table containing files as row, phase (preprocessing, + # cicc, etc.) as column and the total times as table entries. NOTE: + # if compiled for multiple archs, the archs will be summed. + .pivot_table( + index="file", values="seconds", columns="phase", aggfunc="sum" + ) + ) dfp_sum = dfp.sum(axis="columns") df_fraction = dfp.divide(dfp_sum, axis="index") df_fraction["total time"] = dfp_sum - df_fraction = df_fraction.melt(ignore_index=False, id_vars="total time", var_name="phase", value_name="fraction") + df_fraction = df_fraction.melt( + ignore_index=False, + id_vars="total time", + var_name="phase", + value_name="fraction", + ) dfp["total time"] = dfp_sum - df_absolute = dfp.melt(ignore_index=False, id_vars="total time", var_name="phase", value_name="seconds") + df_absolute = dfp.melt( + ignore_index=False, + id_vars="total time", + var_name="phase", + value_name="seconds", + ) # host: light red to dark red (preprocessing, cudafe, gcc (compiling)) # device: ligt green to dark green (preprocessing, cicc, ptxas) palette = { "gcc (preprocessing 4)": colors.hsv_to_rgb((0, 1, 1)), - 'cudafe++': colors.hsv_to_rgb((0, 1, .75)), - 'gcc (compiling)': colors.hsv_to_rgb((0, 1, .4)), - "gcc (preprocessing 1)": colors.hsv_to_rgb((.33, 1, 1)), - 'cicc': colors.hsv_to_rgb((.33, 1, 0.75)), - 'ptxas': colors.hsv_to_rgb((.33, 1, 0.4)), - 'fatbinary': "grey", + "cudafe++": colors.hsv_to_rgb((0, 1, 0.75)), + "gcc (compiling)": colors.hsv_to_rgb((0, 1, 0.4)), + "gcc (preprocessing 1)": colors.hsv_to_rgb((0.33, 1, 1)), + "cicc": colors.hsv_to_rgb((0.33, 1, 0.75)), + "ptxas": colors.hsv_to_rgb((0.33, 1, 0.4)), + "fatbinary": "grey", } print("-- Ten longest translation units:") - colwidth = pd.get_option('display.max_colwidth') - 1 + colwidth = pd.get_option("display.max_colwidth") - 1 dfp = dfp.reset_index() dfp["file"] = dfp["file"].apply(lambda s: s[-colwidth:]) - print(dfp.sort_values("total time", ascending=False).reset_index().loc[:10]) + print( + dfp.sort_values("total time", ascending=False).reset_index().loc[:10] + ) print("-- Plotting absolute compile times") abs_out_path = f"{input_path}.absolute.compile_times.png" @@ -64,43 +80,57 @@ def main(input_path): y="file", hue="phase", hue_order=reversed( - ["gcc (preprocessing 4)", 'cudafe++', 'gcc (compiling)', - "gcc (preprocessing 1)", 'cicc', 'ptxas', - 'fatbinary', - ]), + [ + "gcc (preprocessing 4)", + "cudafe++", + "gcc (compiling)", + "gcc (preprocessing 1)", + "cicc", + "ptxas", + "fatbinary", + ] + ), palette=palette, weights="seconds", multiple="stack", kind="hist", height=20, ) - plt.xlabel("seconds"); + plt.xlabel("seconds") plt.savefig(abs_out_path) print(f"-- Wrote absolute compile time plot to {abs_out_path}") print("-- Plotting relative compile times") rel_out_path = f"{input_path}.relative.compile_times.png" sns.displot( - df_fraction.sort_values('total time').reset_index(), + df_fraction.sort_values("total time").reset_index(), y="file", hue="phase", - hue_order=reversed(["gcc (preprocessing 4)", 'cudafe++', 'gcc (compiling)', - "gcc (preprocessing 1)", 'cicc', 'ptxas', - 'fatbinary', - ]), + hue_order=reversed( + [ + "gcc (preprocessing 4)", + "cudafe++", + "gcc (compiling)", + "gcc (preprocessing 1)", + "cicc", + "ptxas", + "fatbinary", + ] + ), palette=palette, weights="fraction", multiple="stack", kind="hist", height=15, ) - plt.xlabel("fraction"); + plt.xlabel("fraction") plt.savefig(rel_out_path) print(f"-- Wrote relative compile time plot to {rel_out_path}") + if __name__ == "__main__": if len(sys.argv) != 2: - printf("""NVCC log analyzer + print("""NVCC log analyzer Analyzes nvcc logs and outputs a figure with highest ranking translation units. diff --git a/cpp/scripts/gitutils.py b/cpp/scripts/gitutils.py index 99fe5de676..7b1cef1f74 100644 --- a/cpp/scripts/gitutils.py +++ b/cpp/scripts/gitutils.py @@ -55,17 +55,20 @@ def repo_version_major_minor(): full_repo_version = repo_version() - match = re.match(r"^v?(?P[0-9]+)(?:\.(?P[0-9]+))?", - full_repo_version) - - if (match is None): - print(" [DEBUG] Could not determine repo major minor version. " - f"Full repo version: {full_repo_version}.") + match = re.match( + r"^v?(?P[0-9]+)(?:\.(?P[0-9]+))?", full_repo_version + ) + + if match is None: + print( + " [DEBUG] Could not determine repo major minor version. " + f"Full repo version: {full_repo_version}." + ) return None out_version = match.group("major") - if (match.group("minor")): + if match.group("minor"): out_version += "." + match.group("minor") return out_version @@ -91,44 +94,50 @@ def determine_merge_commit(current_branch="HEAD"): try: # Try to determine the target branch from the most recent tag - head_branch = __git("describe", - "--all", - "--tags", - "--match='branch-*'", - "--abbrev=0") + head_branch = __git( + "describe", "--all", "--tags", "--match='branch-*'", "--abbrev=0" + ) except subprocess.CalledProcessError: - print(" [DEBUG] Could not determine target branch from most recent " - "tag. Falling back to 'branch-{major}.{minor}.") + print( + " [DEBUG] Could not determine target branch from most recent " + "tag. Falling back to 'branch-{major}.{minor}." + ) head_branch = None - if (head_branch is not None): + if head_branch is not None: # Convert from head to branch name head_branch = __git("name-rev", "--name-only", head_branch) else: # Try and guess the target branch as "branch-." version = repo_version_major_minor() - if (version is None): + if version is None: return None head_branch = "branch-{}".format(version) try: # Now get the remote tracking branch - remote_branch = __git("rev-parse", - "--abbrev-ref", - "--symbolic-full-name", - head_branch + "@{upstream}") + remote_branch = __git( + "rev-parse", + "--abbrev-ref", + "--symbolic-full-name", + head_branch + "@{upstream}", + ) except subprocess.CalledProcessError: - print(" [DEBUG] Could not remote tracking reference for " - f"branch {head_branch}.") + print( + " [DEBUG] Could not remote tracking reference for " + f"branch {head_branch}." + ) remote_branch = None - if (remote_branch is None): + if remote_branch is None: return None - print(f" [DEBUG] Determined TARGET_BRANCH as: '{remote_branch}'. " - "Finding common ancestor.") + print( + f" [DEBUG] Determined TARGET_BRANCH as: '{remote_branch}'. " + "Finding common ancestor." + ) common_commit = __git("merge-base", remote_branch, current_branch) @@ -166,9 +175,9 @@ def changedFilesBetween(baseName, branchName, commitHash): # checkout latest commit from branch __git("checkout", "-fq", commitHash) - files = __gitdiff("--name-only", - "--ignore-submodules", - f"{baseName}..{branchName}") + files = __gitdiff( + "--name-only", "--ignore-submodules", f"{baseName}..{branchName}" + ) # restore the original branch __git("checkout", "--force", current) @@ -180,13 +189,15 @@ def changesInFileBetween(file, b1, b2, filter=None): current = branch() __git("checkout", "--quiet", b1) __git("checkout", "--quiet", b2) - diffs = __gitdiff("--ignore-submodules", - "-w", - "--minimal", - "-U0", - "%s...%s" % (b1, b2), - "--", - file) + diffs = __gitdiff( + "--ignore-submodules", + "-w", + "--minimal", + "-U0", + "%s...%s" % (b1, b2), + "--", + file, + ) __git("checkout", "--quiet", current) lines = [] for line in diffs.splitlines(): @@ -215,25 +226,29 @@ def modifiedFiles(pathFilter=None): currentBranch = branch() print( f" [DEBUG] TARGET_BRANCH={targetBranch}, COMMIT_HASH={commitHash}, " - f"currentBranch={currentBranch}") + f"currentBranch={currentBranch}" + ) if targetBranch and commitHash and (currentBranch == "current-pr-branch"): print(" [DEBUG] Assuming a CI environment.") allFiles = changedFilesBetween(targetBranch, currentBranch, commitHash) else: - print(" [DEBUG] Did not detect CI environment. " - "Determining TARGET_BRANCH locally.") + print( + " [DEBUG] Did not detect CI environment. " + "Determining TARGET_BRANCH locally." + ) common_commit = determine_merge_commit(currentBranch) - if (common_commit is not None): - + if common_commit is not None: # Now get the diff. Use --staged to get both diff between # common_commit..HEAD and any locally staged files - allFiles = __gitdiff("--name-only", - "--ignore-submodules", - "--staged", - f"{common_commit}").splitlines() + allFiles = __gitdiff( + "--name-only", + "--ignore-submodules", + "--staged", + f"{common_commit}", + ).splitlines() else: # Fallback to just uncommitted files allFiles = uncommittedFiles() diff --git a/cpp/scripts/heuristics/select_k/algorithm_selection.ipynb b/cpp/scripts/heuristics/select_k/algorithm_selection.ipynb index c56281ef58..a0be1de932 100644 --- a/cpp/scripts/heuristics/select_k/algorithm_selection.ipynb +++ b/cpp/scripts/heuristics/select_k/algorithm_selection.ipynb @@ -247,12 +247,13 @@ "source": [ "from collections import Counter\n", "\n", + "\n", "def rank_algos(df, use_relative_speedup=False):\n", " _, y, weights = get_dataset(df)\n", " times = Counter()\n", " for algo, speedup in zip(y, weights):\n", " times[algo] += speedup if use_relative_speedup else 1\n", - " return sorted(times.items(), key=lambda x:-x[-1])" + " return sorted(times.items(), key=lambda x: -x[-1])" ] }, { @@ -343,7 +344,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "bc0a10ea-652b-4822-8587-514c8f0348c3", "metadata": { "tags": [] @@ -382,11 +383,11 @@ "# well over diverse inputs.\n", "#\n", "# note: the lowest performing algorithm here might actually be pretty good, but\n", - "# just not provide much benefit over another similar algorithm. \n", - "# As an example, kWarpDistributed is an excellent selection algorithm, but in testing \n", - "# kWarpDistributedShm is slightly faster than it in situations where it does well, \n", + "# just not provide much benefit over another similar algorithm.\n", + "# As an example, kWarpDistributed is an excellent selection algorithm, but in testing\n", + "# kWarpDistributedShm is slightly faster than it in situations where it does well,\n", "# meaning that it gets removed early on in this loop\n", - "current = df[df.use_memory_pool == True]\n", + "current = df[df.use_memory_pool == True] # noqa: E712\n", "algos = set(df.algo)\n", "\n", "# we're arbitrarily getting this down to 3 selection algorithms\n", diff --git a/cpp/scripts/heuristics/select_k/generate_heuristic.ipynb b/cpp/scripts/heuristics/select_k/generate_heuristic.ipynb index 50bc12556a..941567b826 100644 --- a/cpp/scripts/heuristics/select_k/generate_heuristic.ipynb +++ b/cpp/scripts/heuristics/select_k/generate_heuristic.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "56765f40-96ce-46c6-bce8-ab782cd72b6e", "metadata": { "tags": [] @@ -245,12 +245,21 @@ "# load up the timings from the MATRIX_BENCH script into a pandas dataframe\n", "df = load_dataframe(\"select_k_times.json\")\n", "\n", - "# we're limiting down to 3 different select_k methods - chosen by \n", + "# we're limiting down to 3 different select_k methods - chosen by\n", "# the 'algorithm_selection.ipynb' script here\n", - "df = df[df.algo.isin(['kWarpImmediate', 'kRadix11bitsExtraPass', 'kRadix11bits', 'kWarpDistributedShm'])]\n", + "df = df[\n", + " df.algo.isin(\n", + " [\n", + " \"kWarpImmediate\",\n", + " \"kRadix11bitsExtraPass\",\n", + " \"kRadix11bits\",\n", + " \"kWarpDistributedShm\",\n", + " ]\n", + " )\n", + "]\n", "\n", "# we're also assuming we have a memory pool for now\n", - "df = df[(df.use_memory_pool == True)]\n", + "df = df[(df.use_memory_pool == True)] # noqa: E712\n", "# df = df[(df.index_type == 'int64_t') & (df.key_type == 'float')]\n", "\n", "df" @@ -278,7 +287,9 @@ "source": [ "# break down into a train/set set\n", "X, y, weights = get_dataset(df)\n", - "train_test_sets = sklearn.model_selection.train_test_split(X, y, weights, test_size=0.15, random_state=1)\n", + "train_test_sets = sklearn.model_selection.train_test_split(\n", + " X, y, weights, test_size=0.15, random_state=1\n", + ")\n", "X_train, X_test, y_train, y_test, weights_train, weights_test = train_test_sets\n", "X_train.shape, X_test.shape" ] @@ -307,7 +318,7 @@ ], "source": [ "model = sklearn.tree.DecisionTreeClassifier(max_depth=4, max_leaf_nodes=8)\n", - "model.fit(X_train, y_train) #, weights_train)" + "model.fit(X_train, y_train) # , weights_train)" ] }, { @@ -389,8 +400,15 @@ ], "source": [ "import matplotlib.pyplot as plt\n", - "plt.figure(figsize=(12,12))\n", - "viz = sklearn.tree.plot_tree(model, fontsize=8, class_names=list(model.classes_), feature_names=[\"k\", \"rows\", \"cols\", \"use_memory_pool\"], impurity=True)" + "\n", + "plt.figure(figsize=(12, 12))\n", + "viz = sklearn.tree.plot_tree(\n", + " model,\n", + " fontsize=8,\n", + " class_names=list(model.classes_),\n", + " feature_names=[\"k\", \"rows\", \"cols\", \"use_memory_pool\"],\n", + " impurity=True,\n", + ")" ] }, { @@ -441,33 +459,36 @@ " classes = model.classes_\n", " tree = model.tree_\n", " feature_names = [\"k\", \"rows\", \"cols\", \"use_memory_pool\"]\n", - " \n", + "\n", " def _get_label(nodeid):\n", - " \"\"\" returns the most frequent class name for the node \"\"\"\n", + " \"\"\"returns the most frequent class name for the node\"\"\"\n", " return classes[np.argsort(tree.value[nodeid, 0])[-1]]\n", - " \n", + "\n", " def _is_leaf_node(nodeid):\n", - " \"\"\" returns whether or not the node is a leaf node in the tree\"\"\"\n", + " \"\"\"returns whether or not the node is a leaf node in the tree\"\"\"\n", " # negative values here indicate we're a leaf\n", " if tree.feature[nodeid] < 0:\n", " return True\n", - " \n", + "\n", " # some nodes have both branches with the same label, combine those\n", - " left, right = tree.children_left[nodeid], tree.children_right[nodeid] \n", - " if (_is_leaf_node(left) and \n", - " _is_leaf_node(right) and \n", - " _get_label(left) == _get_label(right)):\n", + " left, right = tree.children_left[nodeid], tree.children_right[nodeid]\n", + " if (\n", + " _is_leaf_node(left)\n", + " and _is_leaf_node(right)\n", + " and _get_label(left) == _get_label(right)\n", + " ):\n", " return True\n", - " \n", + "\n", " return False\n", - " \n", + "\n", " code = []\n", + "\n", " def _convert_node(nodeid, indent):\n", " if _is_leaf_node(nodeid):\n", " # we're a leaf node, just output the label of the most frequent algorithm\n", " class_name = _get_label(nodeid)\n", " code.append(\" \" * indent + f\"return Algo::{class_name};\")\n", - " else: \n", + " else:\n", " feature = feature_names[tree.feature[nodeid]]\n", " threshold = int(np.floor(tree.threshold[nodeid]))\n", " code.append(\" \" * indent + f\"if ({feature} > {threshold}) \" + \"{\")\n", @@ -475,13 +496,16 @@ " code.append(\" \" * indent + \"} else {\")\n", " _convert_node(tree.children_left[nodeid], indent + 2)\n", " code.append(\" \" * indent + \"}\")\n", - " \n", - " code.append(\"inline Algo choose_select_k_algorithm(size_t rows, size_t cols, int k)\")\n", + "\n", + " code.append(\n", + " \"inline Algo choose_select_k_algorithm(size_t rows, size_t cols, int k)\"\n", + " )\n", " code.append(\"{\")\n", " _convert_node(0, indent=2)\n", " code.append(\"}\")\n", " return \"\\n\".join(code)\n", "\n", + "\n", "code = convert_model_to_code(model)\n", "print(code)" ] @@ -506,14 +530,27 @@ "source": [ "# also update the source code in raft/matrix/detail/select_k.cuh\n", "import pathlib\n", - "select_k_path = pathlib.Path.cwd() / \"..\" / \"..\" / \"..\" / \"include\" / \"raft\" / \"matrix\" / \"detail\" / \"select_k-inl.cuh\"\n", + "\n", + "select_k_path = (\n", + " pathlib.Path.cwd()\n", + " / \"..\"\n", + " / \"..\"\n", + " / \"..\"\n", + " / \"include\"\n", + " / \"raft\"\n", + " / \"matrix\"\n", + " / \"detail\"\n", + " / \"select_k-inl.cuh\"\n", + ")\n", "source_lines = open(select_k_path.resolve()).read().split(\"\\n\")\n", "\n", "# figure out the location of the code snippet in the file, and splice it in\n", "code_lines = code.split(\"\\n\")\n", "first_line = source_lines.index(code_lines[0])\n", - "last_line = source_lines.index(code_lines[-1], first_line)\n", - "new_source = source_lines[:first_line] + code_lines + source_lines[last_line+1:]\n", + "last_line = source_lines.index(code_lines[-1], first_line)\n", + "new_source = (\n", + " source_lines[:first_line] + code_lines + source_lines[last_line + 1 :]\n", + ")\n", "\n", "open(select_k_path.resolve(), \"w\").write(\"\\n\".join(new_source))" ] diff --git a/cpp/scripts/heuristics/select_k/generate_plots.ipynb b/cpp/scripts/heuristics/select_k/generate_plots.ipynb index ffdad58b1c..4e0d048a56 100644 --- a/cpp/scripts/heuristics/select_k/generate_plots.ipynb +++ b/cpp/scripts/heuristics/select_k/generate_plots.ipynb @@ -15,12 +15,13 @@ "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", + "\n", "sns.set_theme()" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "f91d6f1d-e198-46c8-9ac6-955995f058d1", "metadata": { "tags": [] @@ -233,9 +234,10 @@ ], "source": [ "from select_k_dataset import load_dataframe, get_dataset\n", + "\n", "df = load_dataframe(\"select_k_times.json\")\n", - "df = df[(df.use_memory_pool == True)]\n", - "df = df[(df.index_type == 'int64_t') & (df.key_type == 'float')]\n", + "df = df[(df.use_memory_pool == True)] # noqa: E712\n", + "df = df[(df.index_type == \"int64_t\") & (df.key_type == \"float\")]\n", "df" ] }, @@ -253,24 +255,33 @@ " for algo in sorted(set(df.algo)):\n", " current = df[(df.algo == algo) & (df.time < np.inf)]\n", " ax.plot(current[x_axis], current[\"time\"], label=algo)\n", - " ax.set_xscale('log', base=2)\n", - " ax.set_yscale('log', base=2)\n", + " ax.set_xscale(\"log\", base=2)\n", + " ax.set_yscale(\"log\", base=2)\n", " ax.set_xlabel(x_axis)\n", " ax.set_ylabel(\"time(s)\")\n", " ax.set_title(title)\n", " fig.set_dpi(200)\n", - " ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=4)\n", - "# fig.legend()\n", + " ax.legend(loc=\"upper center\", bbox_to_anchor=(0.5, -0.15), ncol=4)\n", + " # fig.legend()\n", " plt.show()\n", "\n", + "\n", "def generate_k_plot(df, col, row):\n", - " return generate_plot(df[(df.col == col) & (df.row == row)], \"k\", f\"#cols={col}, #rows={row}\")\n", + " return generate_plot(\n", + " df[(df.col == col) & (df.row == row)], \"k\", f\"#cols={col}, #rows={row}\"\n", + " )\n", + "\n", "\n", "def generate_col_plot(df, row, k):\n", - " return generate_plot(df[(df.row == row) & (df.k == k)], \"col\", f\"#rows={row}, k={k}\")\n", + " return generate_plot(\n", + " df[(df.row == row) & (df.k == k)], \"col\", f\"#rows={row}, k={k}\"\n", + " )\n", + "\n", "\n", "def generate_row_plot(df, col, k):\n", - " return generate_plot(df[(df.col == col) & (df.k == k)], \"row\", f\"#cols={col}, k={k}\")" + " return generate_plot(\n", + " df[(df.col == col) & (df.k == k)], \"row\", f\"#cols={col}, k={k}\"\n", + " )" ] }, { diff --git a/cpp/scripts/include_checker.py b/cpp/scripts/include_checker.py index efbebdb765..5c45f63f0d 100644 --- a/cpp/scripts/include_checker.py +++ b/cpp/scripts/include_checker.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2020-2023, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # @@ -6,7 +6,6 @@ import sys import re import os -import subprocess import argparse @@ -15,14 +14,20 @@ exclusion_regex = re.compile(r".*thirdparty.*") + def parse_args(): argparser = argparse.ArgumentParser( - "Checks for a consistent '#include' syntax") - argparser.add_argument("--regex", type=str, - default=r"[.](cu|cuh|h|hpp|hxx|cpp)$", - help="Regex string to filter in sources") - argparser.add_argument("dirs", type=str, nargs="*", - help="List of dirs where to find sources") + "Checks for a consistent '#include' syntax" + ) + argparser.add_argument( + "--regex", + type=str, + default=r"[.](cu|cuh|h|hpp|hxx|cpp)$", + help="Regex string to filter in sources", + ) + argparser.add_argument( + "dirs", type=str, nargs="*", help="List of dirs where to find sources" + ) args = argparser.parse_args() args.regex_compiled = re.compile(args.regex) return args @@ -33,7 +38,9 @@ def list_all_source_file(file_regex, srcdirs): for srcdir in srcdirs: for root, dirs, files in os.walk(srcdir): for f in files: - if not re.search(exclusion_regex, root) and re.search(file_regex, f): + if not re.search(exclusion_regex, root) and re.search( + file_regex, f + ): src = os.path.join(root, f) all_files.append(src) return all_files @@ -51,10 +58,10 @@ def check_includes_in(src): inc_file = val[1:-1] # strip out " or < full_path = os.path.join(dir, inc_file) line_num = line_number + 1 - if val[0] == "\"" and not os.path.exists(full_path): + if val[0] == '"' and not os.path.exists(full_path): errs.append("Line:%d use #include <...>" % line_num) elif val[0] == "<" and os.path.exists(full_path): - errs.append("Line:%d use #include \"...\"" % line_num) + errs.append('Line:%d use #include "..."' % line_num) return errs diff --git a/cpp/scripts/run-clang-compile.py b/cpp/scripts/run-clang-compile.py index 30ff6fac98..d1eef26627 100644 --- a/cpp/scripts/run-clang-compile.py +++ b/cpp/scripts/run-clang-compile.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2020-2023, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -18,7 +18,8 @@ CMAKE_COMPILER_REGEX = re.compile( - r"^\s*CMAKE_CXX_COMPILER:FILEPATH=(.+)\s*$", re.MULTILINE) + r"^\s*CMAKE_CXX_COMPILER:FILEPATH=(.+)\s*$", re.MULTILINE +) CLANG_COMPILER = "clang++" GPU_ARCH_REGEX = re.compile(r"sm_(\d+)") SPACES = re.compile(r"\s+") @@ -26,28 +27,43 @@ XPTXAS_FLAG = re.compile(r"-((Xptxas)|(-ptxas-options))=?") # any options that may have equal signs in nvcc but not in clang # add those options here if you find any -OPTIONS_NO_EQUAL_SIGN = ['-isystem'] +OPTIONS_NO_EQUAL_SIGN = ["-isystem"] SEPARATOR = "-" * 8 END_SEPARATOR = "*" * 64 def parse_args(): - argparser = argparse.ArgumentParser("Runs clang++ on a project instead of nvcc") + argparser = argparse.ArgumentParser( + "Runs clang++ on a project instead of nvcc" + ) argparser.add_argument( - "-cdb", type=str, default="compile_commands.json", - help="Path to cmake-generated compilation database") + "-cdb", + type=str, + default="compile_commands.json", + help="Path to cmake-generated compilation database", + ) argparser.add_argument( - "-ignore", type=str, default=None, - help="Regex used to ignore files from checking") + "-ignore", + type=str, + default=None, + help="Regex used to ignore files from checking", + ) argparser.add_argument( - "-select", type=str, default=None, - help="Regex used to select files for checking") + "-select", + type=str, + default=None, + help="Regex used to select files for checking", + ) argparser.add_argument( - "-j", type=int, default=-1, help="Number of parallel jobs to launch.") + "-j", type=int, default=-1, help="Number of parallel jobs to launch." + ) argparser.add_argument( - "-build_dir", type=str, default=None, + "-build_dir", + type=str, + default=None, help="Directory from which compile commands should be called. " - "By default, directory of compile_commands.json file.") + "By default, directory of compile_commands.json file.", + ) args = argparser.parse_args() if args.j <= 0: args.j = mp.cpu_count() @@ -92,11 +108,14 @@ def get_gpu_archs(command): # clang only accepts a single architecture, so first determine the lowest archs = [] for loc in range(len(command)): - if (command[loc] != "-gencode" and command[loc] != "--generate-code" - and not command[loc].startswith("--generate-code=")): + if ( + command[loc] != "-gencode" + and command[loc] != "--generate-code" + and not command[loc].startswith("--generate-code=") + ): continue if command[loc].startswith("--generate-code="): - arch_flag = command[loc][len("--generate-code="):] + arch_flag = command[loc][len("--generate-code=") :] else: arch_flag = command[loc + 1] match = GPU_ARCH_REGEX.search(arch_flag) @@ -106,8 +125,9 @@ def get_gpu_archs(command): def get_index(arr, item_options): - return set(i for i, s in enumerate(arr) for item in item_options - if s == item) + return set( + i for i, s in enumerate(arr) for item in item_options if s == item + ) def remove_items(arr, item_options): @@ -120,8 +140,12 @@ def remove_items_plus_one(arr, item_options): if i < len(arr) - 1: del arr[i + 1] del arr[i] - idx = set(i for i, s in enumerate(arr) for item in item_options - if s.startswith(item + "=")) + idx = set( + i + for i, s in enumerate(arr) + for item in item_options + if s.startswith(item + "=") + ) for i in sorted(idx, reverse=True): del arr[i] @@ -131,7 +155,7 @@ def add_cuda_path(command, nvcc): if not nvcc_path: raise Exception("Command %s has invalid compiler %s" % (command, nvcc)) cuda_root = os.path.dirname(os.path.dirname(nvcc_path)) - command.append('--cuda-path=%s' % cuda_root) + command.append("--cuda-path=%s" % cuda_root) def get_clang_args(cmd, build_dir): @@ -152,57 +176,63 @@ def get_clang_args(cmd, build_dir): # provide proper cuda path to clang add_cuda_path(command, cc_orig) # remove all kinds of nvcc flags clang doesn't know about - remove_items_plus_one(command, [ - "--generate-code", - "-gencode", - "--x", - "-x", - "--compiler-bindir", - "-ccbin", - "--diag_suppress", - "-diag-suppress", - "--default-stream", - "-default-stream", - ]) - remove_items(command, [ - "-extended-lambda", - "--extended-lambda", - "-expt-extended-lambda", - "--expt-extended-lambda", - "-expt-relaxed-constexpr", - "--expt-relaxed-constexpr", - "--device-debug", - "-G", - "--generate-line-info", - "-lineinfo", - ]) + remove_items_plus_one( + command, + [ + "--generate-code", + "-gencode", + "--x", + "-x", + "--compiler-bindir", + "-ccbin", + "--diag_suppress", + "-diag-suppress", + "--default-stream", + "-default-stream", + ], + ) + remove_items( + command, + [ + "-extended-lambda", + "--extended-lambda", + "-expt-extended-lambda", + "--expt-extended-lambda", + "-expt-relaxed-constexpr", + "--expt-relaxed-constexpr", + "--device-debug", + "-G", + "--generate-line-info", + "-lineinfo", + ], + ) # "-x cuda" is the right usage in clang command.extend(["-x", "cuda"]) # we remove -Xcompiler flags: here we basically have to hope for the # best that clang++ will accept any flags which nvcc passed to gcc for i, c in reversed(list(enumerate(command))): - new_c = XCOMPILER_FLAG.sub('', c) + new_c = XCOMPILER_FLAG.sub("", c) if new_c == c: continue - command[i:i + 1] = new_c.split(',') + command[i : i + 1] = new_c.split(",") # we also change -Xptxas to -Xcuda-ptxas, always adding space here for i, c in reversed(list(enumerate(command))): if XPTXAS_FLAG.search(c): if not c.endswith("=") and i < len(command) - 1: del command[i + 1] - command[i] = '-Xcuda-ptxas' - command.insert(i + 1, XPTXAS_FLAG.sub('', c)) + command[i] = "-Xcuda-ptxas" + command.insert(i + 1, XPTXAS_FLAG.sub("", c)) # several options like isystem don't expect `=` for opt in OPTIONS_NO_EQUAL_SIGN: - opt_eq = opt + '=' + opt_eq = opt + "=" # make sure that we iterate from back to front here for insert for i, c in reversed(list(enumerate(command))): if not c.startswith(opt_eq): continue - x = c.split('=') + x = c.split("=") # we only care about the first `=` command[i] = x[0] - command.insert(i + 1, '='.join(x[1:])) + command.insert(i + 1, "=".join(x[1:])) # use extensible whole program, to avoid ptx resolution/linking command.extend(["-Xcuda-ptxas", "-ewp"]) # for libcudacxx, we need to allow variadic functions @@ -210,13 +240,17 @@ def get_clang_args(cmd, build_dir): # add some additional CUDA intrinsics cuda_intrinsics_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), - "__clang_cuda_additional_intrinsics.h") + "__clang_cuda_additional_intrinsics.h", + ) command.extend(["-include", cuda_intrinsics_file]) # somehow this option gets onto the commandline, it is unrecognized by clang - remove_items(command, [ - "--forward-unknown-to-host-compiler", - "-forward-unknown-to-host-compiler" - ]) + remove_items( + command, + [ + "--forward-unknown-to-host-compiler", + "-forward-unknown-to-host-compiler", + ], + ) # do not treat warnings as errors here ! for i, x in reversed(list(enumerate(command))): if x.startswith("-Werror"): @@ -228,8 +262,14 @@ def get_clang_args(cmd, build_dir): def run_clang_command(clang_cmd, cwd): cmd = " ".join(clang_cmd) - result = subprocess.run(cmd, check=False, shell=True, cwd=cwd, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + result = subprocess.run( + cmd, + check=False, + shell=True, + cwd=cwd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) result.stdout = result.stdout.decode("utf-8").strip() out = "CMD: " + cmd + "\n" out += "CWD: " + cwd + "\n" @@ -281,11 +321,15 @@ def run_sequential(args, all_files): results = [] for cmd in all_files: # skip files that we don't want to look at - if args.ignore_compiled is not None and \ - re.search(args.ignore_compiled, cmd["file"]) is not None: + if ( + args.ignore_compiled is not None + and re.search(args.ignore_compiled, cmd["file"]) is not None + ): continue - if args.select_compiled is not None and \ - re.search(args.select_compiled, cmd["file"]) is None: + if ( + args.select_compiled is not None + and re.search(args.select_compiled, cmd["file"]) is None + ): continue results.append(run_clang(cmd, args)) return all(results) @@ -305,11 +349,15 @@ def run_parallel(args, all_files): results = [] for cmd in all_files: # skip files that we don't want to look at - if args.ignore_compiled is not None and \ - re.search(args.ignore_compiled, cmd["file"]) is not None: + if ( + args.ignore_compiled is not None + and re.search(args.ignore_compiled, cmd["file"]) is not None + ): continue - if args.select_compiled is not None and \ - re.search(args.select_compiled, cmd["file"]) is None: + if ( + args.select_compiled is not None + and re.search(args.select_compiled, cmd["file"]) is None + ): continue results.append(pool.apply_async(run_clang, args=(cmd, args))) results_final = [r.get() for r in results] diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py index 8382668ec9..2c051fd9f7 100644 --- a/cpp/scripts/run-clang-tidy.py +++ b/cpp/scripts/run-clang-tidy.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2020-2023, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -20,7 +20,8 @@ EXPECTED_VERSIONS = ("20.1.4",) VERSION_REGEX = re.compile(r"clang version ([0-9.]+)") CMAKE_COMPILER_REGEX = re.compile( - r"^\s*CMAKE_CXX_COMPILER:FILEPATH=(.+)\s*$", re.MULTILINE) + r"^\s*CMAKE_CXX_COMPILER:FILEPATH=(.+)\s*$", re.MULTILINE +) CLANG_COMPILER = "clang++" GPU_ARCH_REGEX = re.compile(r"sm_(\d+)") SPACES = re.compile(r"\s+") @@ -28,7 +29,7 @@ XPTXAS_FLAG = re.compile(r"-((Xptxas)|(-ptxas-options))=?") # any options that may have equal signs in nvcc but not in clang # add those options here if you find any -OPTIONS_NO_EQUAL_SIGN = ['-isystem'] +OPTIONS_NO_EQUAL_SIGN = ["-isystem"] SEPARATOR = "-" * 8 END_SEPARATOR = "*" * 64 @@ -36,28 +37,48 @@ def parse_args(): argparser = argparse.ArgumentParser("Runs clang-tidy on a project") argparser.add_argument( - "-cdb", type=str, default="compile_commands.json", - help="Path to cmake-generated compilation database") + "-cdb", + type=str, + default="compile_commands.json", + help="Path to cmake-generated compilation database", + ) argparser.add_argument( - "-exe", type=str, default="clang-tidy", help="Path to clang-tidy exe") + "-exe", type=str, default="clang-tidy", help="Path to clang-tidy exe" + ) argparser.add_argument( - "-ignore", type=str, default=None, - help="Regex used to ignore files from checking") + "-ignore", + type=str, + default=None, + help="Regex used to ignore files from checking", + ) argparser.add_argument( - "-select", type=str, default=None, - help="Regex used to select files for checking") + "-select", + type=str, + default=None, + help="Regex used to select files for checking", + ) argparser.add_argument( - "-j", type=int, default=-1, help="Number of parallel jobs to launch.") + "-j", type=int, default=-1, help="Number of parallel jobs to launch." + ) argparser.add_argument( - "-root", type=str, default=None, - help="Repo root path to filter headers correctly, CWD by default.") + "-root", + type=str, + default=None, + help="Repo root path to filter headers correctly, CWD by default.", + ) argparser.add_argument( - "-thrust_dir", type=str, default=None, - help="Pass the directory to a THRUST git repo recent enough for clang.") + "-thrust_dir", + type=str, + default=None, + help="Pass the directory to a THRUST git repo recent enough for clang.", + ) argparser.add_argument( - "-build_dir", type=str, default=None, + "-build_dir", + type=str, + default=None, help="Directory from which compile commands should be called. " - "By default, directory of compile_commands.json file.") + "By default, directory of compile_commands.json file.", + ) args = argparser.parse_args() if args.j <= 0: args.j = mp.cpu_count() @@ -71,8 +92,10 @@ def parse_args(): raise Exception("Failed to figure out clang compiler version!") version = version.group(1) if version not in EXPECTED_VERSIONS: - raise Exception("clang compiler version must be in %s found '%s'" % - (EXPECTED_VERSIONS, version)) + raise Exception( + "clang compiler version must be in %s found '%s'" + % (EXPECTED_VERSIONS, version) + ) if not os.path.exists(args.cdb): raise Exception("Compilation database '%s' missing" % args.cdb) # we assume that this script is run from repo root @@ -82,7 +105,8 @@ def parse_args(): # we need to have a recent enough cub version for clang to compile if args.thrust_dir is None: args.thrust_dir = os.path.join( - os.path.dirname(args.cdb), "thrust_1.15", "src", "thrust_1.15") + os.path.dirname(args.cdb), "thrust_1.15", "src", "thrust_1.15" + ) if args.build_dir is None: args.build_dir = os.path.dirname(args.cdb) if not os.path.isdir(args.thrust_dir): @@ -120,11 +144,14 @@ def get_gpu_archs(command): # clang only accepts a single architecture, so first determine the lowest archs = [] for loc in range(len(command)): - if (command[loc] != "-gencode" and command[loc] != "--generate-code" - and not command[loc].startswith("--generate-code=")): + if ( + command[loc] != "-gencode" + and command[loc] != "--generate-code" + and not command[loc].startswith("--generate-code=") + ): continue if command[loc].startswith("--generate-code="): - arch_flag = command[loc][len("--generate-code="):] + arch_flag = command[loc][len("--generate-code=") :] else: arch_flag = command[loc + 1] match = GPU_ARCH_REGEX.search(arch_flag) @@ -134,8 +161,9 @@ def get_gpu_archs(command): def get_index(arr, item_options): - return set(i for i, s in enumerate(arr) for item in item_options - if s == item) + return set( + i for i, s in enumerate(arr) for item in item_options if s == item + ) def remove_items(arr, item_options): @@ -148,8 +176,12 @@ def remove_items_plus_one(arr, item_options): if i < len(arr) - 1: del arr[i + 1] del arr[i] - idx = set(i for i, s in enumerate(arr) for item in item_options - if s.startswith(item + "=")) + idx = set( + i + for i, s in enumerate(arr) + for item in item_options + if s.startswith(item + "=") + ) for i in sorted(idx, reverse=True): del arr[i] @@ -159,7 +191,7 @@ def add_cuda_path(command, nvcc): if not nvcc_path: raise Exception("Command %s has invalid compiler %s" % (command, nvcc)) cuda_root = os.path.dirname(os.path.dirname(nvcc_path)) - command.append('--cuda-path=%s' % cuda_root) + command.append("--cuda-path=%s" % cuda_root) def get_tidy_args(cmd, args): @@ -183,57 +215,63 @@ def get_tidy_args(cmd, args): # provide proper cuda path to clang add_cuda_path(command, cc_orig) # remove all kinds of nvcc flags clang doesn't know about - remove_items_plus_one(command, [ - "--generate-code", - "-gencode", - "--x", - "-x", - "--compiler-bindir", - "-ccbin", - "--diag_suppress", - "-diag-suppress", - "--default-stream", - "-default-stream", - ]) - remove_items(command, [ - "-extended-lambda", - "--extended-lambda", - "-expt-extended-lambda", - "--expt-extended-lambda", - "-expt-relaxed-constexpr", - "--expt-relaxed-constexpr", - "--device-debug", - "-G", - "--generate-line-info", - "-lineinfo", - ]) + remove_items_plus_one( + command, + [ + "--generate-code", + "-gencode", + "--x", + "-x", + "--compiler-bindir", + "-ccbin", + "--diag_suppress", + "-diag-suppress", + "--default-stream", + "-default-stream", + ], + ) + remove_items( + command, + [ + "-extended-lambda", + "--extended-lambda", + "-expt-extended-lambda", + "--expt-extended-lambda", + "-expt-relaxed-constexpr", + "--expt-relaxed-constexpr", + "--device-debug", + "-G", + "--generate-line-info", + "-lineinfo", + ], + ) # "-x cuda" is the right usage in clang command.extend(["-x", "cuda"]) # we remove -Xcompiler flags: here we basically have to hope for the # best that clang++ will accept any flags which nvcc passed to gcc for i, c in reversed(list(enumerate(command))): - new_c = XCOMPILER_FLAG.sub('', c) + new_c = XCOMPILER_FLAG.sub("", c) if new_c == c: continue - command[i:i + 1] = new_c.split(',') + command[i : i + 1] = new_c.split(",") # we also change -Xptxas to -Xcuda-ptxas, always adding space here for i, c in reversed(list(enumerate(command))): if XPTXAS_FLAG.search(c): if not c.endswith("=") and i < len(command) - 1: del command[i + 1] - command[i] = '-Xcuda-ptxas' - command.insert(i + 1, XPTXAS_FLAG.sub('', c)) + command[i] = "-Xcuda-ptxas" + command.insert(i + 1, XPTXAS_FLAG.sub("", c)) # several options like isystem don't expect `=` for opt in OPTIONS_NO_EQUAL_SIGN: - opt_eq = opt + '=' + opt_eq = opt + "=" # make sure that we iterate from back to front here for insert for i, c in reversed(list(enumerate(command))): if not c.startswith(opt_eq): continue - x = c.split('=') + x = c.split("=") # we only care about the first `=` command[i] = x[0] - command.insert(i + 1, '='.join(x[1:])) + command.insert(i + 1, "=".join(x[1:])) # use extensible whole program, to avoid ptx resolution/linking command.extend(["-Xcuda-ptxas", "-ewp"]) # for libcudacxx, we need to allow variadic functions @@ -241,13 +279,17 @@ def get_tidy_args(cmd, args): # add some additional CUDA intrinsics cuda_intrinsics_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), - "__clang_cuda_additional_intrinsics.h") + "__clang_cuda_additional_intrinsics.h", + ) command.extend(["-include", cuda_intrinsics_file]) # somehow this option gets onto the commandline, it is unrecognized by tidy - remove_items(command, [ - "--forward-unknown-to-host-compiler", - "-forward-unknown-to-host-compiler" - ]) + remove_items( + command, + [ + "--forward-unknown-to-host-compiler", + "-forward-unknown-to-host-compiler", + ], + ) # do not treat warnings as errors here ! for i, x in reversed(list(enumerate(command))): if x.startswith("-Werror"): @@ -271,8 +313,14 @@ def check_output_for_errors(output): def run_clang_tidy_command(tidy_cmd, cwd): cmd = " ".join(tidy_cmd) - result = subprocess.run(cmd, check=False, shell=True, cwd=cwd, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + result = subprocess.run( + cmd, + check=False, + shell=True, + cwd=cwd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) result.stdout = result.stdout.decode("utf-8").strip() out = "CMD: " + cmd + "\n" out += "EXIT-CODE: %d\n" % result.returncode @@ -300,7 +348,8 @@ def __exit__(self, _, __, ___): def print_result(passed, stdout, file, errors): if any(errors): raise Exception( - "File %s: got %d errors:\n%s" % (file, len(errors), stdout)) + "File %s: got %d errors:\n%s" % (file, len(errors), stdout) + ) status_str = "PASSED" if passed else "FAILED" print("%s File:%s %s %s" % (SEPARATOR, file, status_str, SEPARATOR)) if not passed and stdout: @@ -354,11 +403,15 @@ def run_sequential(args, all_files): # actual tidy checker for cmd in all_files: # skip files that we don't want to look at - if args.ignore_compiled is not None and \ - re.search(args.ignore_compiled, cmd["file"]) is not None: + if ( + args.ignore_compiled is not None + and re.search(args.ignore_compiled, cmd["file"]) is not None + ): continue - if args.select_compiled is not None and \ - re.search(args.select_compiled, cmd["file"]) is None: + if ( + args.select_compiled is not None + and re.search(args.select_compiled, cmd["file"]) is None + ): continue results.append(run_clang_tidy(cmd, args)) return parse_results(results) @@ -379,11 +432,15 @@ def run_parallel(args, all_files): # actual tidy checker for cmd in all_files: # skip files that we don't want to look at - if args.ignore_compiled is not None and \ - re.search(args.ignore_compiled, cmd["file"]) is not None: + if ( + args.ignore_compiled is not None + and re.search(args.ignore_compiled, cmd["file"]) is not None + ): continue - if args.select_compiled is not None and \ - re.search(args.select_compiled, cmd["file"]) is None: + if ( + args.select_compiled is not None + and re.search(args.select_compiled, cmd["file"]) is None + ): continue results.append(pool.apply_async(run_clang_tidy, args=(cmd, args))) results_final = [r.get() for r in results] @@ -409,22 +466,29 @@ def main(): # first get a list of all checks that were run ret = subprocess.check_output(args.exe + " --list-checks", shell=True) ret = ret.decode("utf-8") - checks = [line.strip() for line in ret.splitlines() - if line.startswith(' ' * 4)] + checks = [ + line.strip() + for line in ret.splitlines() + if line.startswith(" " * 4) + ] max_check_len = max(len(c) for c in checks) check_counts = dict() content = os.linesep.join(lines) for check in checks: check_counts[check] = content.count(check) sorted_counts = sorted( - check_counts.items(), key=lambda x: x[1], reverse=True) - print("Failed {} check(s) in total. Counts as per below:".format( - sum(1 for _, count in sorted_counts if count > 0))) + check_counts.items(), key=lambda x: x[1], reverse=True + ) + print( + "Failed {} check(s) in total. Counts as per below:".format( + sum(1 for _, count in sorted_counts if count > 0) + ) + ) for check, count in sorted_counts: if count <= 0: break n_space = max_check_len - len(check) + 4 - print("{}:{}{}".format(check, ' ' * n_space, count)) + print("{}:{}{}".format(check, " " * n_space, count)) raise Exception("clang-tidy failed! Refer to the errors above.") diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py index da66e37996..0cfa0c2c2a 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py @@ -69,128 +69,141 @@ dict( path_prefix="canberra", OpT="cuvs::distance::detail::ops::canberra_distance_op", - archs = [60], + archs=[60], ), dict( path_prefix="correlation", OpT="cuvs::distance::detail::ops::correlation_distance_op", - archs = [60], + archs=[60], ), dict( path_prefix="cosine", OpT="cuvs::distance::detail::ops::cosine_distance_op", - archs = [60, 80], + archs=[60, 80], ), dict( path_prefix="hamming_unexpanded", OpT="cuvs::distance::detail::ops::hamming_distance_op", - archs = [60], + archs=[60], ), dict( path_prefix="hellinger_expanded", OpT="cuvs::distance::detail::ops::hellinger_distance_op", - archs = [60], + archs=[60], ), # inner product is handled by cublas. dict( path_prefix="jensen_shannon", OpT="cuvs::distance::detail::ops::jensen_shannon_distance_op", - archs = [60], + archs=[60], ), dict( path_prefix="kl_divergence", OpT="cuvs::distance::detail::ops::kl_divergence_op", - archs = [60], + archs=[60], ), dict( path_prefix="l1", OpT="cuvs::distance::detail::ops::l1_distance_op", - archs = [60], + archs=[60], ), dict( path_prefix="l2_expanded", OpT="cuvs::distance::detail::ops::l2_exp_distance_op", - archs = [60, 80], + archs=[60, 80], ), dict( path_prefix="l2_unexpanded", OpT="cuvs::distance::detail::ops::l2_unexp_distance_op", - archs = [60], + archs=[60], ), dict( path_prefix="l_inf", OpT="cuvs::distance::detail::ops::l_inf_distance_op", - archs = [60], + archs=[60], ), dict( path_prefix="lp_unexpanded", OpT="cuvs::distance::detail::ops::lp_unexp_distance_op", - archs = [60], + archs=[60], ), dict( path_prefix="russel_rao", OpT="cuvs::distance::detail::ops::russel_rao_distance_op", - archs = [60], - ), + archs=[60], + ), ] + def arch_headers(archs): - include_headers ="\n".join([ - f"#include \"dispatch_sm{arch}.cuh\"" - for arch in archs - ]) + include_headers = "\n".join( + [f'#include "dispatch_sm{arch}.cuh"' for arch in archs] + ) return include_headers - for op in op_instances: for dt in data_type_instances: - DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]); + DataT, AccT, OutT, IdxT = ( + dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"] + ) path = f"dispatch_{op['path_prefix']}_{DataT}_{AccT}_{OutT}_{IdxT}.cu" with open(path, "w") as f: f.write(header) f.write(arch_headers(op["archs"])) f.write(macro) - OpT = op['OpT'] + OpT = op["OpT"] FinOpT = "raft::identity_op" - f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n") - f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") + f.write( + f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n" + ) + f.write( + "\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n" + ) print(f"src/distance/detail/pairwise_matrix/{path}") # Dispatch kernels for with the RBF fin op. with open("dispatch_rbf.cu", "w") as f: - OpT="cuvs::distance::detail::ops::l2_unexp_distance_op" - archs = [60] + OpT = "cuvs::distance::detail::ops::l2_unexp_distance_op" + archs = [60] - f.write(header) - f.write("#include \"../kernels/rbf_fin_op.cuh\" // rbf_fin_op\n") - f.write(arch_headers(archs)) - f.write(macro) + f.write(header) + f.write('#include "../kernels/rbf_fin_op.cuh" // rbf_fin_op\n') + f.write(arch_headers(archs)) + f.write(macro) - for dt in data_type_instances: - DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]); - IdxT = "int64_t" # overwrite IdxT - - FinOpT = f"cuvs::distance::kernels::detail::rbf_fin_op<{DataT}>" - f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n") + for dt in data_type_instances: + DataT, AccT, OutT, IdxT = ( + dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"] + ) + IdxT = "int64_t" # overwrite IdxT - f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") + FinOpT = f"cuvs::distance::kernels::detail::rbf_fin_op<{DataT}>" + f.write( + f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n" + ) + f.write( + "\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n" + ) - print("src/distance/detail/pairwise_matrix/dispatch_rbf.cu") + print("src/distance/detail/pairwise_matrix/dispatch_rbf.cu") # L2 with int64_t indices for kmeans code int64_t_op_instances = [ dict( path_prefix="l2_expanded", OpT="cuvs::distance::detail::ops::l2_exp_distance_op", - archs = [60, 80], - )] + archs=[60, 80], + ) +] for op in int64_t_op_instances: for dt in data_type_instances: - DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]); + DataT, AccT, OutT, IdxT = ( + dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"] + ) IdxT = "int64_t" path = f"dispatch_{op['path_prefix']}_{DataT}_{AccT}_{OutT}_{IdxT}.cu" @@ -199,8 +212,12 @@ def arch_headers(archs): f.write(arch_headers(op["archs"])) f.write(macro) - OpT = op['OpT'] + OpT = op["OpT"] FinOpT = "raft::identity_op" - f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n") - f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") + f.write( + f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n" + ) + f.write( + "\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n" + ) print(f"src/distance/detail/pairwise_matrix/{path}") diff --git a/cpp/src/neighbors/ball_cover/detail/ball_cover/registers_00_generate.py b/cpp/src/neighbors/ball_cover/detail/ball_cover/registers_00_generate.py index 7068014e9c..f5ef49f67f 100644 --- a/cpp/src/neighbors/ball_cover/detail/ball_cover/registers_00_generate.py +++ b/cpp/src/neighbors/ball_cover/detail/ball_cover/registers_00_generate.py @@ -93,38 +93,38 @@ """ -euclideanSq="cuvs::neighbors::ball_cover::detail::EuclideanSqFunc" +euclideanSq = "cuvs::neighbors::ball_cover::detail::EuclideanSqFunc" types = dict( int64_float=("std::int64_t", "float"), ) -path = f"registers_pass_one.cu" +path = "registers_pass_one.cu" with open(path, "w") as f: f.write(header) f.write(macro_pass_one) for type_path, (int_t, data_t) in types.items(): - f.write(f"instantiate_cuvs_neighbors_detail_rbc_low_dim_pass_one(\n") + f.write("instantiate_cuvs_neighbors_detail_rbc_low_dim_pass_one(\n") f.write(f" {int_t}, {data_t});\n") f.write("#undef instantiate_cuvs_neighbors_detail_rbc_low_dim_pass_one\n") print(f"src/neighbors/ball_cover/detail/ball_cover/{path}") -path = f"registers_pass_two.cu" +path = "registers_pass_two.cu" with open(path, "w") as f: f.write(header) f.write(macro_pass_two) for type_path, (int_t, data_t) in types.items(): - f.write(f"instantiate_cuvs_neighbors_detail_rbc_low_dim_pass_two(\n") + f.write("instantiate_cuvs_neighbors_detail_rbc_low_dim_pass_two(\n") f.write(f" {int_t}, {data_t});\n") f.write("#undef instantiate_cuvs_neighbors_detail_rbc_low_dim_pass_two\n") print(f"src/neighbors/ball_cover/detail/ball_cover/{path}") -path="registers_eps_pass_euclidean.cu" +path = "registers_eps_pass_euclidean.cu" with open(path, "w") as f: f.write(header) f.write(macro_pass_eps) for type_path, (int_t, data_t) in types.items(): - f.write(f"instantiate_cuvs_neighbors_detail_rbc_eps_pass(\n") + f.write("instantiate_cuvs_neighbors_detail_rbc_eps_pass(\n") f.write(f" {int_t}, {data_t}, {euclideanSq});\n") f.write("#undef instantiate_cuvs_neighbors_detail_rbc_eps_pass\n") print(f"src/neighbors/ball_cover/detail/ball_cover/{path}") diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_00_generate.py b/cpp/src/neighbors/detail/cagra/compute_distance_00_generate.py index c0b5d572fb..fde2081c12 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/compute_distance_00_generate.py @@ -19,19 +19,19 @@ * */ -{includes} +{{includes}} -namespace cuvs::neighbors::cagra::detail {{ +namespace cuvs::neighbors::cagra::detail {{{{ using namespace cuvs::distance; -{content} +{{content}} -}} // namespace cuvs::neighbors::cagra::detail +}}}} // namespace cuvs::neighbors::cagra::detail """ mxdim_team = [(128, 8), (256, 16), (512, 32)] -#mxdim_team = [(64, 8), (128, 16), (256, 32)] -#mxdim_team = [(32, 8), (64, 16), (128, 32)] +# mxdim_team = [(64, 8), (128, 16), (256, 32)] +# mxdim_team = [(32, 8), (64, 16), (128, 32)] pq_bits = [8] pq_lens = [2, 4] @@ -48,27 +48,24 @@ uint8_uint32=("uint8_t", "uint32_t", "float"), ) -metric_prefix = 'DistanceType::' +metric_prefix = "DistanceType::" specs = [] descs = [] cmake_list = [] - - # Cleanup first for f in glob.glob("compute_distance_standard_*.cu"): - os.remove(f) + os.remove(f) for f in glob.glob("compute_distance_vpq_*.cu"): - os.remove(f) + os.remove(f) # Generate new files for type_path, (data_t, idx_t, distance_t) in search_types.items(): - for (mxdim, team) in mxdim_team: + for mxdim, team in mxdim_team: # CAGRA - for metric in ['L2Expanded', 'InnerProduct', 'CosineExpanded']: - + for metric in ["L2Expanded", "InnerProduct", "CosineExpanded"]: path = f"compute_distance_standard_{metric}_{type_path}_dim{mxdim}_t{team}.cu" includes = '#include "compute_distance_standard-impl.cuh"' params = f"{metric_prefix}{metric}, {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}" @@ -83,7 +80,7 @@ for code_book_t in code_book_types: for pq_len in pq_lens: for pq_bit in pq_bits: - for metric in ['L2Expanded']: + for metric in ["L2Expanded"]: path = f"compute_distance_vpq_{metric}_{type_path}_dim{mxdim}_t{team}_{pq_bit}pq_{pq_len}subd_{code_book_t}.cu" includes = '#include "compute_distance_vpq-impl.cuh"' params = f"{metric_prefix}{metric}, {team}, {mxdim}, {pq_bit}, {pq_len}, {code_book_t}, {data_t}, {idx_t}, {distance_t}" @@ -91,18 +88,26 @@ content = f"""template struct {spec};""" specs.append(spec) with open(path, "w") as f: - f.write(template.format(includes=includes, content=content)) - cmake_list.append(f" src/neighbors/detail/cagra/{path}") + f.write( + template.format( + includes=includes, content=content + ) + ) + cmake_list.append( + f" src/neighbors/detail/cagra/{path}" + ) # CAGRA (Binary Hamming distance) -for (mxdim, team) in mxdim_team: - metric = 'BitwiseHamming' - type_path = 'u8_uint32' - idx_t = 'uint32_t' - distance_t = 'float' - data_t = 'uint8_t' - - path = f"compute_distance_standard_{metric}_{type_path}_dim{mxdim}_t{team}.cu" +for mxdim, team in mxdim_team: + metric = "BitwiseHamming" + type_path = "u8_uint32" + idx_t = "uint32_t" + distance_t = "float" + data_t = "uint8_t" + + path = ( + f"compute_distance_standard_{metric}_{type_path}_dim{mxdim}_t{team}.cu" + ) includes = '#include "compute_distance_standard-impl.cuh"' params = f"{metric_prefix}{metric}, {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}" spec = f"standard_descriptor_spec<{params}>" @@ -113,14 +118,14 @@ cmake_list.append(f" src/neighbors/detail/cagra/{path}") with open("compute_distance-ext.cuh", "w") as f: - includes = ''' + includes = """ #pragma once #include "compute_distance_standard.hpp" #include "compute_distance_vpq.hpp" -''' +""" newline = "\n" - contents = f''' + contents = f""" {newline.join(map(lambda s: "extern template struct " + s + ";", specs))} extern template struct @@ -142,16 +147,16 @@ }} return init(params, dataset, metric, dataset_norms); }} -''' +""" f.write(template.format(includes=includes, content=contents)) with open("compute_distance.cu", "w") as f: includes = '#include "compute_distance-ext.cuh"' newline = "\n" - contents = f''' + contents = f""" template struct instance_selector<{("," + newline + " ").join(specs)}>; -''' +""" f.write(template.format(includes=includes, content=contents)) cmake_list.sort() diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py index f5e0287321..342a61afd6 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py @@ -22,7 +22,7 @@ #define COMMA , -namespace cuvs::neighbors::cagra::detail::multi_cta_search { +namespace cuvs::neighbors::cagra::detail::multi_cta_search {{ """ trailer = """ @@ -48,10 +48,10 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_kernel_selection(\n {data_t}, {idx_t}, {distance_t}, cuvs::neighbors::filtering::none_sample_filter);\n" + f"instantiate_kernel_selection(\n {data_t}, {idx_t}, {distance_t}, cuvs::neighbors::filtering::none_sample_filter);\n" ) f.write( - f"instantiate_kernel_selection(\n {data_t}, {idx_t}, {distance_t}, CagraSampleFilterWithQueryIdOffset>);\n" + f"instantiate_kernel_selection(\n {data_t}, {idx_t}, {distance_t}, CagraSampleFilterWithQueryIdOffset>);\n" ) f.write(trailer) # For pasting into CMakeLists.txt diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py index 78e03d9b4f..0e98c6e41c 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py @@ -22,7 +22,7 @@ #define COMMA , -namespace cuvs::neighbors::cagra::detail::single_cta_search { +namespace cuvs::neighbors::cagra::detail::single_cta_search {{ """ trailer = """ @@ -51,10 +51,10 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_kernel_selection(\n {data_t}, {idx_t}, {distance_t}, cuvs::neighbors::filtering::none_sample_filter);\n" + f"instantiate_kernel_selection(\n {data_t}, {idx_t}, {distance_t}, cuvs::neighbors::filtering::none_sample_filter);\n" ) f.write( - f"instantiate_kernel_selection(\n {data_t}, {idx_t}, {distance_t}, CagraSampleFilterWithQueryIdOffset>);\n" + f"instantiate_kernel_selection(\n {data_t}, {idx_t}, {distance_t}, CagraSampleFilterWithQueryIdOffset>);\n" ) f.write(trailer) diff --git a/cpp/src/neighbors/iface/generate_iface.py b/cpp/src/neighbors/iface/generate_iface.py index 385914e4c6..ef9e56a5b2 100644 --- a/cpp/src/neighbors/iface/generate_iface.py +++ b/cpp/src/neighbors/iface/generate_iface.py @@ -207,24 +207,24 @@ const std::string& filename); """ -flat_macros = dict ( - flat = dict( +flat_macros = dict( + flat=dict( include=include_macro, definition=flat_macro, name="CUVS_INST_MG_FLAT", ) ) -pq_macros = dict ( - pq = dict( +pq_macros = dict( + pq=dict( include=include_macro, definition=pq_macro, name="CUVS_INST_MG_PQ", ) ) -cagra_macros = dict ( - cagra = dict( +cagra_macros = dict( + cagra=dict( include=include_macro, definition=cagra_macro, name="CUVS_INST_MG_CAGRA", @@ -252,17 +252,21 @@ uint8_t_uint32_t=("uint8_t", "uint32_t"), ) -for macros, types in [(flat_macros, flat_types), (pq_macros, pq_types), (cagra_macros, cagra_types)]: - for type_path, (T, IdxT) in types.items(): - for macro_path, macro in macros.items(): - path = f"iface_{macro_path}_{type_path}.cu" - with open(path, "w") as f: - f.write(header) - f.write(macro['include']) - f.write(namespace_macro) - f.write(macro["definition"]) - f.write(f"{macro['name']}({T}, {IdxT});\n\n") - f.write(f"#undef {macro['name']}\n") - f.write(footer) +for macros, types in [ + (flat_macros, flat_types), + (pq_macros, pq_types), + (cagra_macros, cagra_types), +]: + for type_path, (T, IdxT) in types.items(): + for macro_path, macro in macros.items(): + path = f"iface_{macro_path}_{type_path}.cu" + with open(path, "w") as f: + f.write(header) + f.write(macro["include"]) + f.write(namespace_macro) + f.write(macro["definition"]) + f.write(f"{macro['name']}({T}, {IdxT});\n\n") + f.write(f"#undef {macro['name']}\n") + f.write(footer) - print(f"src/neighbors/iface/{path}") + print(f"src/neighbors/iface/{path}") diff --git a/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py b/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py index b55a945c27..a44c306ca3 100644 --- a/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py +++ b/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py @@ -161,7 +161,7 @@ path = f"ivf_flat_{macro_path}_{type_path}.cu" with open(path, "w") as f: f.write(header) - f.write(macro['include']) + f.write(macro["include"]) f.write(namespace_macro) f.write(macro["definition"]) f.write(f"{macro['name']}({T}, {IdxT});\n\n") diff --git a/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py b/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py index 9feba888ba..da9b78992e 100644 --- a/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py +++ b/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py @@ -77,7 +77,7 @@ path = f"ivf_pq_{macro_path}_{type_path}.cu" with open(path, "w") as f: f.write(header) - f.write(macro['include']) + f.write(macro["include"]) f.write(namespace_macro) f.write(macro["definition"]) f.write(f"{macro['name']}({T}, {IdxT});\n\n") diff --git a/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq_compute_similarity.py b/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq_compute_similarity.py index 5c7543e973..2d9f619a62 100644 --- a/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq_compute_similarity.py +++ b/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq_compute_similarity.py @@ -65,26 +65,62 @@ #define COMMA , """ -none_filter_int64 = "cuvs::neighbors::filtering::ivf_to_sample_filter" \ - "" -bitset_filter64 = "cuvs::neighbors::filtering::ivf_to_sample_filter" \ - ">" +none_filter_int64 = ( + "cuvs::neighbors::filtering::ivf_to_sample_filter" + "" +) +bitset_filter64 = ( + "cuvs::neighbors::filtering::ivf_to_sample_filter" + ">" +) types = dict( - half_fp8_false=("half", "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", none_filter_int64), - half_fp8_true=("half", "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", none_filter_int64), + half_fp8_false=( + "half", + "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", + none_filter_int64, + ), + half_fp8_true=( + "half", + "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", + none_filter_int64, + ), half_half=("half", "half", none_filter_int64), float_half=("float", "half", none_filter_int64), - float_float= ("float", "float", none_filter_int64), - float_fp8_false=("float", "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", none_filter_int64), - float_fp8_true=("float", "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", none_filter_int64), - half_fp8_false_bitset64=("half", "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", bitset_filter64), - half_fp8_true_bitset64=("half", "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", bitset_filter64), + float_float=("float", "float", none_filter_int64), + float_fp8_false=( + "float", + "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", + none_filter_int64, + ), + float_fp8_true=( + "float", + "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", + none_filter_int64, + ), + half_fp8_false_bitset64=( + "half", + "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", + bitset_filter64, + ), + half_fp8_true_bitset64=( + "half", + "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", + bitset_filter64, + ), half_half_bitset64=("half", "half", bitset_filter64), float_half_bitset64=("float", "half", bitset_filter64), - float_float_bitset64= ("float", "float", bitset_filter64), - float_fp8_false_bitset64=("float", "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", bitset_filter64), - float_fp8_true_bitset64=("float", "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", bitset_filter64) + float_float_bitset64=("float", "float", bitset_filter64), + float_fp8_false_bitset64=( + "float", + "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", + bitset_filter64, + ), + float_fp8_true_bitset64=( + "float", + "cuvs::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", + bitset_filter64, + ), ) for path_key, (OutT, LutT, FilterT) in types.items(): @@ -92,5 +128,7 @@ with open(path, "w") as f: f.write(header) f.write(declaration_macro) - f.write(f"instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select({OutT}, {LutT}, {FilterT});\n") + f.write( + f"instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select({OutT}, {LutT}, {FilterT});\n" + ) print(f"src/neighbors/ivf_pq/{path}") diff --git a/cpp/src/neighbors/mg/generate_mg.py b/cpp/src/neighbors/mg/generate_mg.py index 14dcf19e6d..c85a1aeaf3 100644 --- a/cpp/src/neighbors/mg/generate_mg.py +++ b/cpp/src/neighbors/mg/generate_mg.py @@ -230,24 +230,24 @@ } // namespace cuvs::neighbors::cagra """ -flat_macros = dict ( - flat = dict( +flat_macros = dict( + flat=dict( include=include_macro, definition=flat_macro, name="CUVS_INST_MG_FLAT", ) ) -pq_macros = dict ( - pq = dict( +pq_macros = dict( + pq=dict( include=include_macro, definition=pq_macro, name="CUVS_INST_MG_PQ", ) ) -cagra_macros = dict ( - cagra = dict( +cagra_macros = dict( + cagra=dict( include=include_macro, definition=cagra_macro, name="CUVS_INST_MG_CAGRA", @@ -275,15 +275,19 @@ uint8_t_uint32_t=("uint8_t", "uint32_t"), ) -for macros, types in [(flat_macros, flat_types), (pq_macros, pq_types), (cagra_macros, cagra_types)]: - for type_path, (T, IdxT) in types.items(): - for macro_path, macro in macros.items(): - path = f"mg_{macro_path}_{type_path}.cu" - with open(path, "w") as f: - f.write(header) - f.write(macro['include']) - f.write(macro["definition"]) - f.write(f"{macro['name']}({T}, {IdxT});\n\n") - f.write(f"#undef {macro['name']}\n") +for macros, types in [ + (flat_macros, flat_types), + (pq_macros, pq_types), + (cagra_macros, cagra_types), +]: + for type_path, (T, IdxT) in types.items(): + for macro_path, macro in macros.items(): + path = f"mg_{macro_path}_{type_path}.cu" + with open(path, "w") as f: + f.write(header) + f.write(macro["include"]) + f.write(macro["definition"]) + f.write(f"{macro['name']}({T}, {IdxT});\n\n") + f.write(f"#undef {macro['name']}\n") - print(f"src/neighbors/mg/{path}") + print(f"src/neighbors/mg/{path}") diff --git a/docs/source/conf.py b/docs/source/conf.py index a2b407f8d8..5b8c38b0fb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -37,7 +37,7 @@ "breathe", "recommonmark", "sphinx_markdown_tables", - "sphinx_copybutton" + "sphinx_copybutton", ] breathe_default_project = "cuvs" @@ -74,7 +74,9 @@ # The short X.Y version. version = f"{CUVS_VERSION.major:02}.{CUVS_VERSION.minor:02}" # The full version, including alpha/beta/rc tags. -release = f"{CUVS_VERSION.major:02}.{CUVS_VERSION.minor:02}.{CUVS_VERSION.micro:02}" +release = ( + f"{CUVS_VERSION.major:02}.{CUVS_VERSION.minor:02}.{CUVS_VERSION.micro:02}" +) # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -150,7 +152,13 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, "cuvs.tex", "cuVS Documentation", "NVIDIA Corporation", "manual"), + ( + master_doc, + "cuvs.tex", + "cuVS Documentation", + "NVIDIA Corporation", + "manual", + ), ] # -- Options for manual page output --------------------------------------- diff --git a/docs/source/sphinxext/github_link.py b/docs/source/sphinxext/github_link.py index 512782af58..1ee5f610b5 100644 --- a/docs/source/sphinxext/github_link.py +++ b/docs/source/sphinxext/github_link.py @@ -10,7 +10,6 @@ import re import subprocess import sys -from functools import partial from operator import attrgetter orig = inspect.isfunction @@ -18,7 +17,6 @@ # See https://opendreamkit.org/2017/06/09/CythonSphinx/ def isfunction(obj): - orig_val = orig(obj) new_val = hasattr(type(obj), "__code__") @@ -125,7 +123,6 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): try: lineno = inspect.getsourcelines(obj)[1] except Exception: - # Can happen if its a cyfunction. See if it has `__code__` if hasattr(obj, "__code__"): lineno = obj.__code__.co_firstlineno diff --git a/notebooks/VectorSearch_QuestionRetrieval.ipynb b/notebooks/VectorSearch_QuestionRetrieval.ipynb index 1115a5920d..d93f1e8fd3 100644 --- a/notebooks/VectorSearch_QuestionRetrieval.ipynb +++ b/notebooks/VectorSearch_QuestionRetrieval.ipynb @@ -56,10 +56,13 @@ "import torch\n", "import pylibraft\n", "from cuvs.neighbors import ivf_flat, ivf_pq\n", - "pylibraft.config.set_output_as(lambda device_ndarray: device_ndarray.copy_to_host())\n", + "\n", + "pylibraft.config.set_output_as(\n", + " lambda device_ndarray: device_ndarray.copy_to_host()\n", + ")\n", "\n", "if not torch.cuda.is_available():\n", - " print(\"Warning: No GPU found. Please add GPU to your notebook\")" + " print(\"Warning: No GPU found. Please add GPU to your notebook\")" ] }, { @@ -70,41 +73,51 @@ "outputs": [], "source": [ "# We use the Bi-Encoder to encode all passages, so that we can use it with semantic search\n", - "model_name = 'nq-distilbert-base-v1'\n", + "model_name = \"nq-distilbert-base-v1\"\n", "bi_encoder = SentenceTransformer(model_name)\n", "\n", "# As dataset, we use Simple English Wikipedia. Compared to the full English wikipedia, it has only\n", "# about 170k articles. We split these articles into paragraphs and encode them with the bi-encoder\n", "\n", - "wikipedia_filepath = 'data/simplewiki-2020-11-01.jsonl.gz'\n", + "wikipedia_filepath = \"data/simplewiki-2020-11-01.jsonl.gz\"\n", "\n", "if not os.path.exists(wikipedia_filepath):\n", - " util.http_get('http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz', wikipedia_filepath)\n", + " util.http_get(\n", + " \"http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz\",\n", + " wikipedia_filepath,\n", + " )\n", "\n", "passages = []\n", - "with gzip.open(wikipedia_filepath, 'rt', encoding='utf8') as fIn:\n", + "with gzip.open(wikipedia_filepath, \"rt\", encoding=\"utf8\") as fIn:\n", " for line in fIn:\n", " data = json.loads(line.strip())\n", - " for paragraph in data['paragraphs']:\n", + " for paragraph in data[\"paragraphs\"]:\n", " # We encode the passages as [title, text]\n", - " passages.append([data['title'], paragraph])\n", + " passages.append([data[\"title\"], paragraph])\n", "\n", "# If you like, you can also limit the number of passages you want to use\n", "print(\"Passages:\", len(passages))\n", "\n", "# To speed things up, pre-computed embeddings are downloaded.\n", "# The provided file encoded the passages with the model 'nq-distilbert-base-v1'\n", - "if model_name == 'nq-distilbert-base-v1':\n", - " embeddings_filepath = 'simplewiki-2020-11-01-nq-distilbert-base-v1.pt'\n", + "if model_name == \"nq-distilbert-base-v1\":\n", + " embeddings_filepath = \"simplewiki-2020-11-01-nq-distilbert-base-v1.pt\"\n", " if not os.path.exists(embeddings_filepath):\n", - " util.http_get('http://sbert.net/datasets/simplewiki-2020-11-01-nq-distilbert-base-v1.pt', embeddings_filepath)\n", + " util.http_get(\n", + " \"http://sbert.net/datasets/simplewiki-2020-11-01-nq-distilbert-base-v1.pt\",\n", + " embeddings_filepath,\n", + " )\n", "\n", " corpus_embeddings = torch.load(embeddings_filepath)\n", - " corpus_embeddings = corpus_embeddings.float() # Convert embedding file to float\n", + " corpus_embeddings = (\n", + " corpus_embeddings.float()\n", + " ) # Convert embedding file to float\n", " if torch.cuda.is_available():\n", - " corpus_embeddings = corpus_embeddings.to('cuda')\n", + " corpus_embeddings = corpus_embeddings.to(\"cuda\")\n", "else: # Here, we compute the corpus_embeddings from scratch (which can take a while depending on the GPU)\n", - " corpus_embeddings = bi_encoder.encode(passages, convert_to_tensor=True, show_progress_bar=True)" + " corpus_embeddings = bi_encoder.encode(\n", + " passages, convert_to_tensor=True, show_progress_bar=True\n", + " )" ] }, { @@ -131,11 +144,14 @@ "pq_index = ivf_pq.build(params, corpus_embeddings)\n", "search_params = ivf_pq.SearchParams()\n", "\n", - "def search_cuvs_pq(query, top_k = 5):\n", + "\n", + "def search_cuvs_pq(query, top_k=5):\n", " # Encode the query using the bi-encoder and find potentially relevant passages\n", " question_embedding = bi_encoder.encode(query, convert_to_tensor=True)\n", "\n", - " hits = ivf_pq.search(search_params, pq_index, question_embedding[None], top_k)\n", + " hits = ivf_pq.search(\n", + " search_params, pq_index, question_embedding[None], top_k\n", + " )\n", "\n", " # Output of top-k hits\n", " print(\"Input question:\", query)\n", @@ -199,7 +215,7 @@ "outputs": [], "source": [ "%%time\n", - "search_cuvs_pq(query = \"What is creating tides?\")" + "search_cuvs_pq(query=\"What is creating tides?\")" ] }, { @@ -214,12 +230,15 @@ "flat_index = ivf_flat.build(params, corpus_embeddings)\n", "search_params = ivf_flat.SearchParams()\n", "\n", - "def search_cuvs_flat(query, top_k = 5):\n", + "\n", + "def search_cuvs_flat(query, top_k=5):\n", " # Encode the query using the bi-encoder and find potentially relevant passages\n", " question_embedding = bi_encoder.encode(query, convert_to_tensor=True)\n", - " \n", + "\n", " start_time = time.time()\n", - " hits = ivf_flat.search(search_params, flat_index, question_embedding[None], top_k)\n", + " hits = ivf_flat.search(\n", + " search_params, flat_index, question_embedding[None], top_k\n", + " )\n", " end_time = time.time()\n", "\n", " # Output of top-k hits\n", @@ -259,7 +278,7 @@ "outputs": [], "source": [ "%%time\n", - "search_cuvs_flat(query = \"What is creating tides?\")" + "search_cuvs_flat(query=\"What is creating tides?\")" ] }, { @@ -304,11 +323,13 @@ "metadata": {}, "outputs": [], "source": [ - "def search_cuvs_cagra(query, top_k = 5):\n", + "def search_cuvs_cagra(query, top_k=5):\n", " # Encode the query using the bi-encoder and find potentially relevant passages\n", " question_embedding = bi_encoder.encode(query, convert_to_tensor=True)\n", "\n", - " hits = cagra.search(search_params, cagra_index, question_embedding[None], top_k)\n", + " hits = cagra.search(\n", + " search_params, cagra_index, question_embedding[None], top_k\n", + " )\n", "\n", " # Output of top-k hits\n", " print(\"Input question:\", query)\n", diff --git a/notebooks/VectorSearch_QuestionRetrieval_Milvus.ipynb b/notebooks/VectorSearch_QuestionRetrieval_Milvus.ipynb index 09a6cca43b..647c9f83b7 100644 --- a/notebooks/VectorSearch_QuestionRetrieval_Milvus.ipynb +++ b/notebooks/VectorSearch_QuestionRetrieval_Milvus.ipynb @@ -84,13 +84,14 @@ "from typing import List\n", "\n", "\n", - "from pymilvus import (\n", - " connections, utility\n", - ")\n", - "from pymilvus.bulk_writer import LocalBulkWriter, BulkFileType # pip install pymilvus[bulk_writer]\n", + "from pymilvus import connections, utility\n", + "from pymilvus.bulk_writer import (\n", + " LocalBulkWriter,\n", + " BulkFileType,\n", + ") # pip install pymilvus[bulk_writer]\n", "\n", "if not torch.cuda.is_available():\n", - " print(\"Warning: No GPU found. Please add GPU to your notebook\")" + " print(\"Warning: No GPU found. Please add GPU to your notebook\")" ] }, { @@ -118,32 +119,45 @@ "DIM = 768\n", "MILVUS_PORT = 30004\n", "MILVUS_HOST = f\"http://localhost:{MILVUS_PORT}\"\n", - "ID_FIELD=\"id\"\n", - "EMBEDDING_FIELD=\"embedding\"\n", + "ID_FIELD = \"id\"\n", + "EMBEDDING_FIELD = \"embedding\"\n", "\n", "collection_name = \"simple_wiki\"\n", "\n", + "\n", "def get_milvus_client():\n", " return pymilvus.MilvusClient(uri=MILVUS_HOST)\n", "\n", + "\n", "client = get_milvus_client()\n", "\n", "fields = [\n", - " pymilvus.FieldSchema(name=ID_FIELD, dtype=pymilvus.DataType.INT64, is_primary=True),\n", - " pymilvus.FieldSchema(name=EMBEDDING_FIELD, dtype=pymilvus.DataType.FLOAT_VECTOR, dim=DIM)\n", + " pymilvus.FieldSchema(\n", + " name=ID_FIELD, dtype=pymilvus.DataType.INT64, is_primary=True\n", + " ),\n", + " pymilvus.FieldSchema(\n", + " name=EMBEDDING_FIELD, dtype=pymilvus.DataType.FLOAT_VECTOR, dim=DIM\n", + " ),\n", "]\n", "\n", "schema = pymilvus.CollectionSchema(fields)\n", "schema.verify()\n", "\n", "if collection_name in client.list_collections():\n", - " print(f\"Collection '{collection_name}' already exists. Deleting collection...\")\n", + " print(\n", + " f\"Collection '{collection_name}' already exists. Deleting collection...\"\n", + " )\n", " client.drop_collection(collection_name)\n", "\n", - "client.create_collection(collection_name, schema=schema, dimension=DIM, vector_field_name=EMBEDDING_FIELD)\n", + "client.create_collection(\n", + " collection_name,\n", + " schema=schema,\n", + " dimension=DIM,\n", + " vector_field_name=EMBEDDING_FIELD,\n", + ")\n", "collection = pymilvus.Collection(name=collection_name, using=client._using)\n", "collection.release()\n", - "collection.drop_index()\n" + "collection.drop_index()" ] }, { @@ -169,40 +183,50 @@ "outputs": [], "source": [ "# We use the Bi-Encoder to encode all passages, so that we can use it with semantic search\n", - "model_name = 'nq-distilbert-base-v1'\n", + "model_name = \"nq-distilbert-base-v1\"\n", "bi_encoder = SentenceTransformer(model_name)\n", "\n", "# As dataset, we use Simple English Wikipedia. Compared to the full English wikipedia, it has only\n", "# about 170k articles. We split these articles into paragraphs and encode them with the bi-encoder\n", "\n", - "wikipedia_filepath = 'data/simplewiki-2020-11-01.jsonl.gz'\n", + "wikipedia_filepath = \"data/simplewiki-2020-11-01.jsonl.gz\"\n", "\n", "if not os.path.exists(wikipedia_filepath):\n", - " util.http_get('http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz', wikipedia_filepath)\n", + " util.http_get(\n", + " \"http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz\",\n", + " wikipedia_filepath,\n", + " )\n", "\n", "passages = []\n", - "with gzip.open(wikipedia_filepath, 'rt', encoding='utf8') as fIn:\n", + "with gzip.open(wikipedia_filepath, \"rt\", encoding=\"utf8\") as fIn:\n", " for line in fIn:\n", " data = json.loads(line.strip())\n", - " for paragraph in data['paragraphs']:\n", + " for paragraph in data[\"paragraphs\"]:\n", " # We encode the passages as [title, text]\n", - " passages.append([data['title'], paragraph])\n", + " passages.append([data[\"title\"], paragraph])\n", "\n", "# If you like, you can also limit the number of passages you want to use\n", "print(\"Passages:\", len(passages))\n", "\n", "# To speed things up, pre-computed embeddings are downloaded.\n", "# The provided file encoded the passages with the model 'nq-distilbert-base-v1'\n", - "if model_name == 'nq-distilbert-base-v1':\n", - " embeddings_filepath = 'simplewiki-2020-11-01-nq-distilbert-base-v1.pt'\n", + "if model_name == \"nq-distilbert-base-v1\":\n", + " embeddings_filepath = \"simplewiki-2020-11-01-nq-distilbert-base-v1.pt\"\n", " if not os.path.exists(embeddings_filepath):\n", - " util.http_get('http://sbert.net/datasets/simplewiki-2020-11-01-nq-distilbert-base-v1.pt', embeddings_filepath)\n", + " util.http_get(\n", + " \"http://sbert.net/datasets/simplewiki-2020-11-01-nq-distilbert-base-v1.pt\",\n", + " embeddings_filepath,\n", + " )\n", "\n", - " corpus_embeddings = torch.load(embeddings_filepath, map_location='cpu', weights_only=True).float() # Convert embedding file to float\n", - " #if torch.cuda.is_available():\n", + " corpus_embeddings = torch.load(\n", + " embeddings_filepath, map_location=\"cpu\", weights_only=True\n", + " ).float() # Convert embedding file to float\n", + " # if torch.cuda.is_available():\n", " # corpus_embeddings = corpus_embeddings.to('cuda')\n", "else: # Here, we compute the corpus_embeddings from scratch (which can take a while depending on the GPU)\n", - " corpus_embeddings = bi_encoder.encode(passages, convert_to_tensor=True, show_progress_bar=True).to('cpu')" + " corpus_embeddings = bi_encoder.encode(\n", + " passages, convert_to_tensor=True, show_progress_bar=True\n", + " ).to(\"cpu\")" ] }, { @@ -236,73 +260,109 @@ "MINIO_SECRET_KEY = \"minioadmin\"\n", "MINIO_ACCESS_KEY = \"minioadmin\"\n", "\n", - "def upload_to_minio(file_paths: List[List[str]], remote_paths: List[List[str]], bucket_name=\"milvus-bucket\"):\n", - " minio_client = Minio(endpoint=MINIO_URL, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False)\n", + "\n", + "def upload_to_minio(\n", + " file_paths: List[List[str]],\n", + " remote_paths: List[List[str]],\n", + " bucket_name=\"milvus-bucket\",\n", + "):\n", + " minio_client = Minio(\n", + " endpoint=MINIO_URL,\n", + " access_key=MINIO_ACCESS_KEY,\n", + " secret_key=MINIO_SECRET_KEY,\n", + " secure=False,\n", + " )\n", " if not minio_client.bucket_exists(bucket_name):\n", " minio_client.make_bucket(bucket_name)\n", "\n", " for local_batch, remote_batch in zip(file_paths, remote_paths):\n", " for local_file, remote_file in zip(local_batch, remote_batch):\n", - " minio_client.fput_object(bucket_name, \n", - " object_name=remote_file,\n", - " file_path=local_file,\n", - " part_size=512 * 1024 * 1024,\n", - " num_parallel_uploads=5)\n", - " \n", - " \n", - "def ingest_data_bulk(collection_name, vectors, schema: pymilvus.CollectionSchema, log_times=True, bulk_writer_type=\"milvus\", debug=False):\n", + " minio_client.fput_object(\n", + " bucket_name,\n", + " object_name=remote_file,\n", + " file_path=local_file,\n", + " part_size=512 * 1024 * 1024,\n", + " num_parallel_uploads=5,\n", + " )\n", + "\n", + "\n", + "def ingest_data_bulk(\n", + " collection_name,\n", + " vectors,\n", + " schema: pymilvus.CollectionSchema,\n", + " log_times=True,\n", + " bulk_writer_type=\"milvus\",\n", + " debug=False,\n", + "):\n", " print(f\"- Ingesting {len(vectors) // 1000}k vectors, Bulk\")\n", " tic = time.perf_counter()\n", - " collection = pymilvus.Collection(collection_name, using=get_milvus_client()._using)\n", + " collection = pymilvus.Collection(\n", + " collection_name, using=get_milvus_client()._using\n", + " )\n", " remote_path = None\n", "\n", - " if bulk_writer_type == 'milvus':\n", + " if bulk_writer_type == \"milvus\":\n", " # # Prepare source data for faster ingestion\n", " writer = LocalBulkWriter(\n", " schema=schema,\n", - " local_path='bulk_data',\n", - " segment_size=512 * 1024 * 1024, # Default value\n", - " file_type=BulkFileType.NPY\n", + " local_path=\"bulk_data\",\n", + " segment_size=512 * 1024 * 1024, # Default value\n", + " file_type=BulkFileType.NPY,\n", " )\n", " for id, vec in enumerate(vectors):\n", " writer.append_row({ID_FIELD: id, EMBEDDING_FIELD: vec})\n", "\n", " if debug:\n", " print(writer.batch_files)\n", + "\n", " def callback(file_list):\n", " if debug:\n", - " print(f\" - Commit successful\")\n", + " print(\" - Commit successful\")\n", " print(file_list)\n", + "\n", " writer.commit(call_back=callback)\n", " files_to_upload = writer.batch_files\n", - " elif bulk_writer_type == 'dask':\n", + " elif bulk_writer_type == \"dask\":\n", " # Prepare source data for faster ingestion\n", " if not os.path.isdir(\"bulk_data\"):\n", " os.mkdir(\"bulk_data\")\n", "\n", " from dask.distributed import Client, LocalCluster\n", + "\n", " cluster = LocalCluster(n_workers=1, threads_per_worker=1)\n", " client = Client(cluster)\n", "\n", " chunk_size = 100000\n", - " da_vectors = da.from_array(vectors, chunks=(chunk_size, vectors.shape[1]))\n", + " da_vectors = da.from_array(\n", + " vectors, chunks=(chunk_size, vectors.shape[1])\n", + " )\n", " da_ids = da.arange(len(vectors), chunks=(chunk_size,))\n", " da.to_npy_stack(\"bulk_data/da_embedding/\", da_vectors)\n", " da.to_npy_stack(\"bulk_data/da_id/\", da_ids)\n", " files_to_upload = []\n", " remote_path = []\n", " for chunk_nb in range(math.ceil(len(vectors) / chunk_size)):\n", - " files_to_upload.append([f\"bulk_data/da_embedding/{chunk_nb}.npy\", f\"bulk_data/da_id/{chunk_nb}.npy\"])\n", - " remote_path.append([f\"bulk_data/da_{chunk_nb}/embedding.npy\", f\"bulk_data/da__{chunk_nb}/id.npy\"])\n", + " files_to_upload.append(\n", + " [\n", + " f\"bulk_data/da_embedding/{chunk_nb}.npy\",\n", + " f\"bulk_data/da_id/{chunk_nb}.npy\",\n", + " ]\n", + " )\n", + " remote_path.append(\n", + " [\n", + " f\"bulk_data/da_{chunk_nb}/embedding.npy\",\n", + " f\"bulk_data/da__{chunk_nb}/id.npy\",\n", + " ]\n", + " )\n", "\n", - " elif bulk_writer_type == 'numpy':\n", + " elif bulk_writer_type == \"numpy\":\n", " # Directly save NPY files\n", " np.save(\"bulk_data/embedding.npy\", vectors)\n", " np.save(\"bulk_data/id.npy\", np.arange(len(vectors)))\n", " files_to_upload = [[\"bulk_data/embedding.npy\", \"bulk_data/id.npy\"]]\n", " else:\n", " raise ValueError(\"Invalid bulk writer type\")\n", - " \n", + "\n", " toc = time.perf_counter()\n", " if log_times:\n", " print(f\" - File save time: {toc - tic:.2f} seconds\")\n", @@ -310,17 +370,29 @@ " if remote_path is None:\n", " remote_path = files_to_upload\n", " upload_to_minio(files_to_upload, remote_path)\n", - " \n", - " job_ids = [utility.do_bulk_insert(collection_name, batch, using=get_milvus_client()._using) for batch in remote_path]\n", + "\n", + " job_ids = [\n", + " utility.do_bulk_insert(\n", + " collection_name, batch, using=get_milvus_client()._using\n", + " )\n", + " for batch in remote_path\n", + " ]\n", "\n", " while True:\n", - " tasks = [utility.get_bulk_insert_state(job_id, using=get_milvus_client()._using) for job_id in job_ids]\n", + " tasks = [\n", + " utility.get_bulk_insert_state(\n", + " job_id, using=get_milvus_client()._using\n", + " )\n", + " for job_id in job_ids\n", + " ]\n", " success = all(task.state_name == \"Completed\" for task in tasks)\n", " failure = any(task.state_name == \"Failed\" for task in tasks)\n", " for i in range(len(tasks)):\n", " task = tasks[i]\n", " if debug:\n", - " print(f\" - Task {i}/{len(tasks)} state: {task.state_name}, Progress percent: {task.infos['progress_percent']}, Imported row count: {task.row_count}\")\n", + " print(\n", + " f\" - Task {i}/{len(tasks)} state: {task.state_name}, Progress percent: {task.infos['progress_percent']}, Imported row count: {task.row_count}\"\n", + " )\n", " if task.state_name == \"Failed\":\n", " print(task)\n", " if success or failure:\n", @@ -334,9 +406,18 @@ " toc = time.perf_counter()\n", " if log_times:\n", " datasize = vectors.nbytes / 1024 / 1024\n", - " print(f\"- Ingestion time: {toc - tic:.2f} seconds. ({(datasize / (toc-tic)):.2f}MB/s)\")\n", + " print(\n", + " f\"- Ingestion time: {toc - tic:.2f} seconds. ({(datasize / (toc - tic)):.2f}MB/s)\"\n", + " )\n", + "\n", "\n", - "ingest_data_bulk(collection_name, np.array(corpus_embeddings), schema, bulk_writer_type='dask', log_times=True)" + "ingest_data_bulk(\n", + " collection_name,\n", + " np.array(corpus_embeddings),\n", + " schema,\n", + " bulk_writer_type=\"dask\",\n", + " log_times=True,\n", + ")" ] }, { @@ -358,8 +439,11 @@ "index_params = dict(\n", " index_type=\"GPU_IVF_PQ\",\n", " metric_type=\"L2\",\n", - " params={\"nlist\": 150, # Number of clusters\n", - " \"m\": 96}) # Product Quantization dimension\n", + " params={\n", + " \"nlist\": 150, # Number of clusters\n", + " \"m\": 96,\n", + " },\n", + ") # Product Quantization dimension\n", "\n", "# Drop the index if it exists\n", "if collection.has_index():\n", @@ -389,22 +473,27 @@ "outputs": [], "source": [ "# Search the index\n", - "def search_cuvs_pq(query, top_k = 5, n_probe = 30):\n", + "def search_cuvs_pq(query, top_k=5, n_probe=30):\n", " # Encode the query using the bi-encoder and find potentially relevant passages\n", " question_embedding = bi_encoder.encode(query, convert_to_tensor=True)\n", "\n", " search_params = {\"nprobe\": n_probe}\n", " tic = time.perf_counter()\n", " hits = collection.search(\n", - " data=np.array(question_embedding[None].cpu()), anns_field=EMBEDDING_FIELD, param=search_params, limit=top_k\n", - " )\n", + " data=np.array(question_embedding[None].cpu()),\n", + " anns_field=EMBEDDING_FIELD,\n", + " param=search_params,\n", + " limit=top_k,\n", + " )\n", " toc = time.perf_counter()\n", "\n", " # Output of top-k hits\n", " print(\"Input question:\", query)\n", - " print(\"Results (after {:.3f} ms):\".format((toc - tic)*1000))\n", + " print(\"Results (after {:.3f} ms):\".format((toc - tic) * 1000))\n", " for k in range(top_k):\n", - " print(\"\\t{:.3f}\\t{}\".format(hits[0][k].distance, passages[hits[0][k].id]))" + " print(\n", + " \"\\t{:.3f}\\t{}\".format(hits[0][k].distance, passages[hits[0][k].id])\n", + " )" ] }, { @@ -463,7 +552,7 @@ }, "outputs": [], "source": [ - "search_cuvs_pq(query = \"What is creating tides?\")" + "search_cuvs_pq(query=\"What is creating tides?\")" ] }, { @@ -487,9 +576,8 @@ "\n", "# Create the IVF Flat index\n", "index_params = dict(\n", - " index_type=\"GPU_IVF_FLAT\",\n", - " metric_type=\"L2\",\n", - " params={\"nlist\": 150}) # Number of clusters)\n", + " index_type=\"GPU_IVF_FLAT\", metric_type=\"L2\", params={\"nlist\": 150}\n", + ") # Number of clusters)\n", "tic = time.perf_counter()\n", "collection.create_index(field_name=EMBEDDING_FIELD, index_params=index_params)\n", "collection.load()\n", @@ -511,22 +599,27 @@ }, "outputs": [], "source": [ - "def search_cuvs_flat(query, top_k = 5, n_probe = 30):\n", + "def search_cuvs_flat(query, top_k=5, n_probe=30):\n", " # Encode the query using the bi-encoder and find potentially relevant passages\n", " question_embedding = bi_encoder.encode(query, convert_to_tensor=True)\n", - " \n", + "\n", " search_params = {\"nprobe\": n_probe}\n", " tic = time.perf_counter()\n", " hits = collection.search(\n", - " data=np.array(question_embedding[None].cpu()), anns_field=EMBEDDING_FIELD, param=search_params, limit=top_k\n", - " )\n", + " data=np.array(question_embedding[None].cpu()),\n", + " anns_field=EMBEDDING_FIELD,\n", + " param=search_params,\n", + " limit=top_k,\n", + " )\n", " toc = time.perf_counter()\n", "\n", " # Output of top-k hits\n", " print(\"Input question:\", query)\n", - " print(\"Results (after {:.3f} ms):\".format((toc - tic)*1000))\n", + " print(\"Results (after {:.3f} ms):\".format((toc - tic) * 1000))\n", " for k in range(top_k):\n", - " print(\"\\t{:.3f}\\t{}\".format(hits[0][k].distance, passages[hits[0][k].id]))" + " print(\n", + " \"\\t{:.3f}\\t{}\".format(hits[0][k].distance, passages[hits[0][k].id])\n", + " )" ] }, { @@ -577,7 +670,7 @@ }, "outputs": [], "source": [ - "search_cuvs_flat(query = \"What is creating tides?\")" + "search_cuvs_flat(query=\"What is creating tides?\")" ] }, { @@ -616,7 +709,13 @@ "index_params = dict(\n", " index_type=\"GPU_CAGRA\",\n", " metric_type=\"L2\",\n", - " params={\"graph_degree\": 64, \"intermediate_graph_degree\": 128, \"build_algo\": \"NN_DESCENT\", \"adapt_for_cpu\": True})\n", + " params={\n", + " \"graph_degree\": 64,\n", + " \"intermediate_graph_degree\": 128,\n", + " \"build_algo\": \"NN_DESCENT\",\n", + " \"adapt_for_cpu\": True,\n", + " },\n", + ")\n", "tic = time.perf_counter()\n", "collection.create_index(field_name=EMBEDDING_FIELD, index_params=index_params)\n", "collection.load()\n", @@ -638,22 +737,27 @@ }, "outputs": [], "source": [ - "def search_cuvs_cagra(query, top_k = 5, itopk = 32):\n", + "def search_cuvs_cagra(query, top_k=5, itopk=32):\n", " # Encode the query using the bi-encoder and find potentially relevant passages\n", " question_embedding = bi_encoder.encode(query, convert_to_tensor=True)\n", "\n", " search_params = {\"params\": {\"itopk\": itopk, \"ef\": 35}}\n", " tic = time.perf_counter()\n", " hits = collection.search(\n", - " data=np.array(question_embedding[None].cpu()), anns_field=EMBEDDING_FIELD, param=search_params, limit=top_k\n", - " )\n", + " data=np.array(question_embedding[None].cpu()),\n", + " anns_field=EMBEDDING_FIELD,\n", + " param=search_params,\n", + " limit=top_k,\n", + " )\n", " toc = time.perf_counter()\n", "\n", " # Output of top-k hits\n", " print(\"Input question:\", query)\n", - " print(\"Results (after {:.3f} ms):\".format((toc - tic)*1000))\n", + " print(\"Results (after {:.3f} ms):\".format((toc - tic) * 1000))\n", " for k in range(top_k):\n", - " print(\"\\t{:.3f}\\t{}\".format(hits[0][k].distance, passages[hits[0][k].id]))" + " print(\n", + " \"\\t{:.3f}\\t{}\".format(hits[0][k].distance, passages[hits[0][k].id])\n", + " )" ] }, { diff --git a/notebooks/cuvs_hpo_example.ipynb b/notebooks/cuvs_hpo_example.ipynb index 964110cb76..e333f8fa9f 100644 --- a/notebooks/cuvs_hpo_example.ipynb +++ b/notebooks/cuvs_hpo_example.ipynb @@ -20,7 +20,7 @@ }, "outputs": [], "source": [ - "#Install Required Packages\n", + "# Install Required Packages\n", "%mamba install -c rapidsai-nightly -c conda-forge cuvs optuna -y\n", "%pip install cupy" ] @@ -38,13 +38,12 @@ "import numpy as np\n", "from cuvs.neighbors import ivf_flat\n", "import urllib.request\n", - "import numpy as np\n", "import time\n", "import optuna\n", "from utils import calc_recall\n", "from optuna.visualization import plot_optimization_history\n", "import math\n", - "import os\n" + "import os" ] }, { @@ -65,21 +64,24 @@ "outputs": [], "source": [ "import tarfile\n", + "\n", "home_dir = os.path.expanduser(\"~/\")\n", - "#wiki-all datasets are in tar format\n", + "\n", + "\n", + "# wiki-all datasets are in tar format\n", "def download_files(url, file):\n", " if os.path.exists(home_dir + \"/\" + file):\n", " print(\"tar file is already downloaded\")\n", " else:\n", " urllib.request.urlretrieve(url, home_dir + \"/\" + file)\n", " # Open the .tar file\n", - " with tarfile.open(home_dir + \"/\" + file, 'r') as tar:\n", + " with tarfile.open(home_dir + \"/\" + file, \"r\") as tar:\n", " filename = file.split(\".\")[0]\n", " if os.path.exists(home_dir + \"/\" + filename + \"/\"):\n", " print(\"Files already extracted\")\n", " return home_dir + \"/\" + filename + \"/\"\n", " # Extract all contents into the specified directory\n", - " extract_path=home_dir + \"/\" +file.split(\".\")[0]\n", + " extract_path = home_dir + \"/\" + file.split(\".\")[0]\n", " tar.extractall(extract_path)\n", " return extract_path" ] @@ -102,7 +104,10 @@ } ], "source": [ - "extracted_path=download_files('https://data.rapids.ai/raft/datasets/wiki_all_1M/wiki_all_1M.tar', 'wiki_all_1M.tar')" + "extracted_path = download_files(\n", + " \"https://data.rapids.ai/raft/datasets/wiki_all_1M/wiki_all_1M.tar\",\n", + " \"wiki_all_1M.tar\",\n", + ")" ] }, { @@ -136,8 +141,8 @@ "source": [ "def read_data(file_path, dtype):\n", " with open(file_path, \"rb\") as f:\n", - " rows,cols = np.fromfile(f, count=2, dtype= np.int32)\n", - " d = np.fromfile(f,count=rows*cols,dtype=dtype).reshape(rows, cols)\n", + " rows, cols = np.fromfile(f, count=2, dtype=np.int32)\n", + " d = np.fromfile(f, count=rows * cols, dtype=dtype).reshape(rows, cols)\n", " return cp.asarray(d)" ] }, @@ -150,9 +155,11 @@ }, "outputs": [], "source": [ - "vectors= read_data(extracted_path + \"/base.1M.fbin\",np.float32)\n", - "queries = read_data(extracted_path + \"/queries.fbin\",np.float32)\n", - "gt_neighbors = read_data(extracted_path + \"/groundtruth.1M.neighbors.ibin\",np.int32)" + "vectors = read_data(extracted_path + \"/base.1M.fbin\", np.float32)\n", + "queries = read_data(extracted_path + \"/queries.fbin\", np.float32)\n", + "gt_neighbors = read_data(\n", + " extracted_path + \"/groundtruth.1M.neighbors.ibin\", np.int32\n", + ")" ] }, { @@ -164,7 +171,7 @@ }, "outputs": [], "source": [ - "#Get the dataset size of database vectors\n", + "# Get the dataset size of database vectors\n", "dataset_size = vectors.shape[0]\n", "dim = vectors.shape[1]" ] @@ -190,26 +197,26 @@ "source": [ "def visualization(study_obj):\n", " \"\"\"\n", - " This function creates two Pareto front plots to visualize trade-offs between different \n", - " optimization objectives. The plots help in understanding the balance between competing \n", + " This function creates two Pareto front plots to visualize trade-offs between different\n", + " optimization objectives. The plots help in understanding the balance between competing\n", " objectives in the optimization process.\n", "\n", " Args:\n", " study_obj (optuna.Study): The Optuna study object containing the optimization results.\n", "\n", " The function produces the following plots:\n", - " 1. **Figure 1**: A Pareto front plot showing the trade-off between `build_time_in_secs` \n", - " and `recall`. It visualizes how the optimization process balances the build time \n", + " 1. **Figure 1**: A Pareto front plot showing the trade-off between `build_time_in_secs`\n", + " and `recall`. It visualizes how the optimization process balances the build time\n", " and recall score.\n", - " 2. **Figure 2**: A Pareto front plot showing the trade-off between `latency_in_ms` \n", + " 2. **Figure 2**: A Pareto front plot showing the trade-off between `latency_in_ms`\n", " and `recall`. This plot illustrates the relationship between latency and recall score.\n", - " \n", + "\n", " \"\"\"\n", - " \n", + "\n", " fig1 = optuna.visualization.plot_pareto_front(\n", - " study_obj,\n", - " targets=lambda t: (t.values[0], t.values[2]),\n", - " target_names=[\"build_time_in_secs\", \"recall\"],\n", + " study_obj,\n", + " targets=lambda t: (t.values[0], t.values[2]),\n", + " target_names=[\"build_time_in_secs\", \"recall\"],\n", " )\n", " fig1.show()\n", "\n", @@ -234,13 +241,14 @@ " print(f\"\\tnumber: {target_instance.number}\")\n", " print(f\"\\tparams: {target_instance.params}\")\n", " print(f\"\\tvalues: {target_instance.values}\")\n", - " \n", + "\n", + "\n", "def print_best_trial_values(optuna_study):\n", " \"\"\"\n", " Prints details about the trials on the Pareto front of an Optuna study.\n", "\n", - " This function analyzes the best trials from an Optuna study, which are typically \n", - " those with the most favorable trade-offs among multiple objectives. It prints \n", + " This function analyzes the best trials from an Optuna study, which are typically\n", + " those with the most favorable trade-offs among multiple objectives. It prints\n", " information on three specific metrics:\n", "\n", " 1. The number of trials on the Pareto front.\n", @@ -256,20 +264,28 @@ " - `values[0]`: Build time\n", " - `values[1]`: latency\n", " - `values[2]`: Accuracy\n", - " \n", + "\n", " \"\"\"\n", - " print(f\"Number of trials on the Pareto front: {len(optuna_study.best_trials)}\")\n", + " print(\n", + " f\"Number of trials on the Pareto front: {len(optuna_study.best_trials)}\"\n", + " )\n", "\n", - " trial_with_lowest_build_time = min(optuna_study.best_trials, key=lambda t: t.values[0])\n", - " print(f\"Trial with lowest build time in secs: \")\n", + " trial_with_lowest_build_time = min(\n", + " optuna_study.best_trials, key=lambda t: t.values[0]\n", + " )\n", + " print(\"Trial with lowest build time in secs: \")\n", " print_target_instance_summary(trial_with_lowest_build_time)\n", "\n", - " trial_with_lowest_latency = min(optuna_study.best_trials, key=lambda t: t.values[1])\n", - " print(f\"Trial with lowest latency in ms: \")\n", + " trial_with_lowest_latency = min(\n", + " optuna_study.best_trials, key=lambda t: t.values[1]\n", + " )\n", + " print(\"Trial with lowest latency in ms: \")\n", " print_target_instance_summary(trial_with_lowest_latency)\n", - " \n", - " trial_with_highest_accuracy = max(optuna_study.best_trials, key=lambda t: t.values[2])\n", - " print(f\"Trial with highest accuracy: \")\n", + "\n", + " trial_with_highest_accuracy = max(\n", + " optuna_study.best_trials, key=lambda t: t.values[2]\n", + " )\n", + " print(\"Trial with highest accuracy: \")\n", " print_target_instance_summary(trial_with_highest_accuracy)" ] }, @@ -312,9 +328,9 @@ "\n", " \"\"\"\n", " # Suggest an integer for the number of lists\n", - " n_lists = trial.suggest_int(\"n_lists\", 10, dataset_size*0.1)\n", + " n_lists = trial.suggest_int(\"n_lists\", 10, dataset_size * 0.1)\n", " # Suggest an integer for the number of probes\n", - " n_probes = trial.suggest_int(\"n_probes\",n_lists*0.01 , n_lists*0.1)\n", + " n_probes = trial.suggest_int(\"n_probes\", n_lists * 0.01, n_lists * 0.1)\n", " build_params = ivf_flat.IndexParams(\n", " n_lists=n_lists,\n", " )\n", @@ -328,12 +344,16 @@ " start_search_time = time.time()\n", " distances, indices = ivf_flat.search(search_params, index, queries, k=10)\n", " search_time = time.time() - start_search_time\n", - " \n", - " latency_in_ms = (search_time * 1000)/queries.shape[0]\n", - " \n", + "\n", + " latency_in_ms = (search_time * 1000) / queries.shape[0]\n", + "\n", " found_distances, found_indices = cp.asnumpy(distances), cp.asnumpy(indices)\n", " recall = calc_recall(found_indices, gt_neighbors)\n", - " return round(build_time_in_secs,4), round(latency_in_ms,4), round(recall,4)" + " return (\n", + " round(build_time_in_secs, 4),\n", + " round(latency_in_ms, 4),\n", + " round(recall, 4),\n", + " )" ] }, { @@ -363,7 +383,9 @@ } ], "source": [ - "ivf_flat_study = optuna.create_study(directions=['minimize', 'minimize', 'maximize'])\n", + "ivf_flat_study = optuna.create_study(\n", + " directions=[\"minimize\", \"minimize\", \"maximize\"]\n", + ")\n", "ivf_flat_study.optimize(multi_objective_ivf_flat, n_trials=10)" ] }, @@ -1419,7 +1441,7 @@ "yaxis": { "autorange": true, "range": [ - 0.9929718446601943, + 0.9929718446601944, 1.0004281553398058 ], "title": { @@ -2473,7 +2495,7 @@ "yaxis": { "autorange": true, "range": [ - 0.9929718446601943, + 0.9929718446601944, 1.0004281553398058 ], "title": { @@ -2537,7 +2559,7 @@ }, "outputs": [], "source": [ - "from cuvs.neighbors import ivf_pq,refine" + "from cuvs.neighbors import ivf_pq, refine" ] }, { @@ -2555,15 +2577,15 @@ "\n", " \"\"\"\n", " # Suggest values for build parameters\n", - " pq_dim = trial.suggest_int(\"pq_dim\", dim*0.25, dim, step=2)\n", + " pq_dim = trial.suggest_int(\"pq_dim\", dim * 0.25, dim, step=2)\n", " n_lists = 1000\n", "\n", " # Suggest an integer for the number of probes\n", - " n_probes = trial.suggest_int(\"n_probes\",n_lists*0.01 , n_lists*0.1)\n", + " n_probes = trial.suggest_int(\"n_probes\", n_lists * 0.01, n_lists * 0.1)\n", "\n", " build_params = ivf_pq.IndexParams(\n", - " n_lists=n_lists,\n", - " pq_dim=pq_dim,\n", + " n_lists=n_lists,\n", + " pq_dim=pq_dim,\n", " )\n", "\n", " start_build_time = time.time()\n", @@ -2578,12 +2600,16 @@ " distances, indices = ivf_pq.search(search_params, index, queries, k=10)\n", " search_time = time.time() - start_search_time\n", "\n", - " latency_in_ms = (search_time * 1000)/queries.shape[0]\n", + " latency_in_ms = (search_time * 1000) / queries.shape[0]\n", "\n", " found_distances, found_indices = cp.asnumpy(distances), cp.asnumpy(indices)\n", " recall = calc_recall(found_indices, gt_neighbors)\n", "\n", - " return round(build_time_in_secs,4), round(latency_in_ms, 4), round(recall,4)" + " return (\n", + " round(build_time_in_secs, 4),\n", + " round(latency_in_ms, 4),\n", + " round(recall, 4),\n", + " )" ] }, { @@ -2741,7 +2767,9 @@ } ], "source": [ - "ivf_pq_study = optuna.create_study(directions=['minimize', 'minimize', 'maximize'])\n", + "ivf_pq_study = optuna.create_study(\n", + " directions=[\"minimize\", \"minimize\", \"maximize\"]\n", + ")\n", "ivf_pq_study.optimize(multi_objective_ivf_pq, n_trials=10)" ] }, @@ -4907,7 +4935,7 @@ "metadata": {}, "outputs": [], "source": [ - "from cuvs.neighbors import cagra\n" + "from cuvs.neighbors import cagra" ] }, { @@ -4923,13 +4951,15 @@ "\n", " \"\"\"\n", " # Suggest values for build parameters\n", - " intermediate_graph_degree = trial.suggest_int(\"intermediate_graph_degree\", 64, 128, step=2 )\n", + " intermediate_graph_degree = trial.suggest_int(\n", + " \"intermediate_graph_degree\", 64, 128, step=2\n", + " )\n", "\n", " # Suggest an integer for the number of probes\n", " itopk_size = trial.suggest_int(\"itopk_size\", 64, 128, step=2)\n", "\n", " build_params = cagra.IndexParams(\n", - " intermediate_graph_degree=intermediate_graph_degree\n", + " intermediate_graph_degree=intermediate_graph_degree\n", " )\n", "\n", " start_build_time = time.time()\n", @@ -4941,15 +4971,21 @@ "\n", " # perform search and refine to increase recall/accuracy\n", " start_search_time = time.time()\n", - " distances, indices = cagra.search(search_params, cagra_index, queries, k=10)\n", + " distances, indices = cagra.search(\n", + " search_params, cagra_index, queries, k=10\n", + " )\n", " search_time = time.time() - start_search_time\n", "\n", - " latency_in_ms = (search_time * 1000)/queries.shape[0]\n", + " latency_in_ms = (search_time * 1000) / queries.shape[0]\n", "\n", " found_distances, found_indices = cp.asnumpy(distances), cp.asnumpy(indices)\n", " recall = calc_recall(found_indices, gt_neighbors)\n", "\n", - " return round(build_time_in_secs,4), round(latency_in_ms,4), round(recall,4)" + " return (\n", + " round(build_time_in_secs, 4),\n", + " round(latency_in_ms, 4),\n", + " round(recall, 4),\n", + " )" ] }, { @@ -5047,7 +5083,9 @@ } ], "source": [ - "cagra_study = optuna.create_study(directions=['minimize', 'minimize', 'maximize'])\n", + "cagra_study = optuna.create_study(\n", + " directions=[\"minimize\", \"minimize\", \"maximize\"]\n", + ")\n", "cagra_study.optimize(multi_objective_cagra, n_trials=5)" ] }, diff --git a/notebooks/ivf_flat_example.ipynb b/notebooks/ivf_flat_example.ipynb index ce35866833..99f1d33626 100644 --- a/notebooks/ivf_flat_example.ipynb +++ b/notebooks/ivf_flat_example.ipynb @@ -53,9 +53,9 @@ "source": [ "import rmm\n", "from rmm.allocators.cupy import rmm_cupy_allocator\n", + "\n", "mr = rmm.mr.PoolMemoryResource(\n", - " rmm.mr.CudaMemoryResource(),\n", - " initial_pool_size=2**30\n", + " rmm.mr.CudaMemoryResource(), initial_pool_size=2**30\n", ")\n", "rmm.mr.set_current_device_resource(mr)\n", "cp.cuda.set_allocator(rmm_cupy_allocator)" @@ -100,7 +100,10 @@ "outputs": [], "source": [ "WORK_FOLDER = os.path.join(tempfile.gettempdir(), \"cuvs_example\")\n", - "f = load_dataset(\"http://ann-benchmarks.com/sift-128-euclidean.hdf5\", work_folder=WORK_FOLDER)" + "f = load_dataset(\n", + " \"http://ann-benchmarks.com/sift-128-euclidean.hdf5\",\n", + " work_folder=WORK_FOLDER,\n", + ")" ] }, { @@ -110,16 +113,18 @@ "metadata": {}, "outputs": [], "source": [ - "metric = f.attrs['distance']\n", + "metric = f.attrs[\"distance\"]\n", "\n", - "dataset = cp.asarray(f['train'])\n", - "queries = cp.asarray(f['test'])\n", - "gt_neighbors = cp.asarray(f['neighbors'][:])\n", - "gt_distances = cp.asarray(f['distances'][:])\n", + "dataset = cp.asarray(f[\"train\"])\n", + "queries = cp.asarray(f[\"test\"])\n", + "gt_neighbors = cp.asarray(f[\"neighbors\"][:])\n", + "gt_distances = cp.asarray(f[\"distances\"][:])\n", "\n", - "itemsize = dataset.dtype.itemsize \n", + "itemsize = dataset.dtype.itemsize\n", "\n", - "print(f\"Loaded dataset of size {dataset.shape}, {dataset.size*itemsize/(1<<30):4.1f} GiB; metric: '{metric}'.\")\n", + "print(\n", + " f\"Loaded dataset of size {dataset.shape}, {dataset.size * itemsize / (1 << 30):4.1f} GiB; metric: '{metric}'.\"\n", + ")\n", "print(f\"Number of test queries: {queries.shape[0]}\")" ] }, @@ -141,12 +146,12 @@ "source": [ "%%time\n", "build_params = ivf_flat.IndexParams(\n", - " n_lists=1024,\n", - " metric=\"euclidean\",\n", - " kmeans_trainset_fraction=0.1,\n", - " kmeans_n_iters=20,\n", - " add_data_on_build=True\n", - " )\n", + " n_lists=1024,\n", + " metric=\"euclidean\",\n", + " kmeans_trainset_fraction=0.1,\n", + " kmeans_n_iters=20,\n", + " add_data_on_build=True,\n", + ")\n", "\n", "index = ivf_flat.build(build_params, dataset)" ] @@ -211,13 +216,19 @@ "outputs": [], "source": [ "%%time\n", - "n_queries=10000\n", + "n_queries = 10000\n", "# n_probes is the number of clusters we select in the first (coarse) search step. This is the only hyper parameter for search.\n", "search_params = ivf_flat.SearchParams(n_probes=30)\n", "\n", "# Search 10 nearest neighbors.\n", - "distances, indices = ivf_flat.search(search_params, index, cp.asarray(queries[:n_queries,:]), k=10, resources=handle)\n", - " \n", + "distances, indices = ivf_flat.search(\n", + " search_params,\n", + " index,\n", + " cp.asarray(queries[:n_queries, :]),\n", + " k=10,\n", + " resources=handle,\n", + ")\n", + "\n", "# cuVS calls are asynchronous (when handle arg is provided), we need to sync before accessing the results.\n", "handle.sync()\n", "distances, neighbors = cp.asnumpy(distances), cp.asnumpy(indices)" @@ -289,9 +300,9 @@ "metadata": {}, "outputs": [], "source": [ - "n_probes = np.asarray([10, 20, 30, 50, 100, 200, 500, 1024]);\n", - "qps = np.zeros(n_probes.shape);\n", - "recall = np.zeros(n_probes.shape);\n", + "n_probes = np.asarray([10, 20, 30, 50, 100, 200, 500, 1024])\n", + "qps = np.zeros(n_probes.shape)\n", + "recall = np.zeros(n_probes.shape)\n", "\n", "for i in range(len(n_probes)):\n", " print(\"\\nBenchmarking search with n_probes =\", n_probes[i])\n", @@ -305,7 +316,7 @@ " resources=handle,\n", " )\n", " handle.sync()\n", - " \n", + "\n", " recall[i] = calc_recall(cp.asnumpy(neighbors), gt_neighbors)\n", " print(\"recall\", recall[i])\n", "\n", @@ -313,7 +324,11 @@ " avg_time = timings.mean()\n", " std_time = timings.std()\n", " qps[i] = queries.shape[0] / avg_time\n", - " print(\"Average search time: {0:7.3f} +/- {1:7.3} s\".format(avg_time, std_time))\n", + " print(\n", + " \"Average search time: {0:7.3f} +/- {1:7.3} s\".format(\n", + " avg_time, std_time\n", + " )\n", + " )\n", " print(\"Queries per second (QPS): {0:8.0f}\".format(qps[i]))" ] }, @@ -332,28 +347,28 @@ "metadata": {}, "outputs": [], "source": [ - "fig = plt.figure(figsize=(12,3))\n", + "fig = plt.figure(figsize=(12, 3))\n", "ax = fig.add_subplot(131)\n", - "ax.plot(n_probes, recall,'o-')\n", - "#ax.set_xticks(bench_k, bench_k)\n", - "ax.set_xlabel('n_probes')\n", + "ax.plot(n_probes, recall, \"o-\")\n", + "# ax.set_xticks(bench_k, bench_k)\n", + "ax.set_xlabel(\"n_probes\")\n", "ax.grid()\n", - "ax.set_ylabel('recall (@k=10)')\n", + "ax.set_ylabel(\"recall (@k=10)\")\n", "\n", "ax = fig.add_subplot(132)\n", - "ax.plot(n_probes, qps,'o-')\n", - "#ax.set_xticks(bench_k, bench_k)\n", - "ax.set_xlabel('n_probes')\n", + "ax.plot(n_probes, qps, \"o-\")\n", + "# ax.set_xticks(bench_k, bench_k)\n", + "ax.set_xlabel(\"n_probes\")\n", "ax.grid()\n", - "ax.set_ylabel('queries per second');\n", + "ax.set_ylabel(\"queries per second\")\n", "\n", "ax = fig.add_subplot(133)\n", - "ax.plot(recall, qps,'o-')\n", - "#ax.set_xticks(bench_k, bench_k)\n", - "ax.set_xlabel('recall')\n", + "ax.plot(recall, qps, \"o-\")\n", + "# ax.set_xticks(bench_k, bench_k)\n", + "ax.set_xlabel(\"recall\")\n", "ax.grid()\n", - "ax.set_ylabel('queries per second');\n", - "#ax.set_yscale('log')" + "ax.set_ylabel(\"queries per second\");\n", + "# ax.set_yscale('log')" ] }, { @@ -375,12 +390,12 @@ "source": [ "%%time\n", "build_params = ivf_flat.IndexParams(\n", - " n_lists=100,\n", - " metric=\"euclidean\",\n", - " kmeans_trainset_fraction=1,\n", - " kmeans_n_iters=20,\n", - " add_data_on_build=True\n", - " )\n", + " n_lists=100,\n", + " metric=\"euclidean\",\n", + " kmeans_trainset_fraction=1,\n", + " kmeans_n_iters=20,\n", + " add_data_on_build=True,\n", + ")\n", "\n", "index = ivf_flat.build(build_params, dataset, resources=handle)" ] @@ -401,13 +416,19 @@ "outputs": [], "source": [ "%%time\n", - "n_queries=10000\n", + "n_queries = 10000\n", "\n", "search_params = ivf_flat.SearchParams(n_probes=10)\n", "\n", "# Search 10 nearest neighbors.\n", - "distances, indices = ivf_flat.search(search_params, index, cp.asarray(queries[:n_queries,:]), k=10, resources=handle)\n", - " \n", + "distances, indices = ivf_flat.search(\n", + " search_params,\n", + " index,\n", + " cp.asarray(queries[:n_queries, :]),\n", + " k=10,\n", + " resources=handle,\n", + ")\n", + "\n", "handle.sync()\n", "distances, neighbors = cp.asnumpy(distances), cp.asnumpy(indices)" ] @@ -439,12 +460,12 @@ "outputs": [], "source": [ "%%time\n", - "build_params = ivf_flat.IndexParams( \n", - " n_lists=100, \n", - " metric=\"sqeuclidean\", \n", - " kmeans_trainset_fraction=0.1, \n", - " kmeans_n_iters=20 \n", - " ) \n", + "build_params = ivf_flat.IndexParams(\n", + " n_lists=100,\n", + " metric=\"sqeuclidean\",\n", + " kmeans_trainset_fraction=0.1,\n", + " kmeans_n_iters=20,\n", + ")\n", "index = ivf_flat.build(build_params, dataset, resources=handle)" ] }, @@ -465,8 +486,14 @@ "source": [ "search_params = ivf_flat.SearchParams(n_probes=10)\n", "\n", - "distances, indices = ivf_flat.search(search_params, index, cp.asarray(queries[:n_queries,:]), k=10, resources=handle)\n", - " \n", + "distances, indices = ivf_flat.search(\n", + " search_params,\n", + " index,\n", + " cp.asarray(queries[:n_queries, :]),\n", + " k=10,\n", + " resources=handle,\n", + ")\n", + "\n", "handle.sync()\n", "distances, neighbors = cp.asnumpy(distances), cp.asnumpy(indices)\n", "calc_recall(neighbors, gt_neighbors)" @@ -494,16 +521,18 @@ "source": [ "# subsample the dataset\n", "n_train = 10000\n", - "train_set = dataset[cp.random.choice(dataset.shape[0], n_train, replace=False),:]\n", + "train_set = dataset[\n", + " cp.random.choice(dataset.shape[0], n_train, replace=False), :\n", + "]\n", "\n", "# build using training set\n", "build_params = ivf_flat.IndexParams(\n", - " n_lists=1024,\n", - " metric=\"sqeuclidean\",\n", - " kmeans_trainset_fraction=1,\n", - " kmeans_n_iters=20,\n", - " add_data_on_build=False\n", - " )\n", + " n_lists=1024,\n", + " metric=\"sqeuclidean\",\n", + " kmeans_trainset_fraction=1,\n", + " kmeans_n_iters=20,\n", + " add_data_on_build=False,\n", + ")\n", "index = ivf_flat.build(build_params, train_set)\n", "\n", "print(\"Index before adding vectors\", index)\n", diff --git a/notebooks/tutorial_ivf_pq.ipynb b/notebooks/tutorial_ivf_pq.ipynb index 9d59daea23..bd1119c5e5 100644 --- a/notebooks/tutorial_ivf_pq.ipynb +++ b/notebooks/tutorial_ivf_pq.ipynb @@ -56,7 +56,7 @@ " return {\n", " attr: getattr(obj, attr)\n", " for attr in dir(obj)\n", - " if type(getattr(type(obj), attr)).__name__ == 'getset_descriptor'\n", + " if type(getattr(type(obj), attr)).__name__ == \"getset_descriptor\"\n", " }" ] }, @@ -67,10 +67,10 @@ "outputs": [], "source": [ "# We'll need to load store some data in this tutorial\n", - "WORK_FOLDER = os.path.join(tempfile.gettempdir(), 'cuvs_ivf_pq_tutorial')\n", + "WORK_FOLDER = os.path.join(tempfile.gettempdir(), \"cuvs_ivf_pq_tutorial\")\n", "\n", "if not os.path.exists(WORK_FOLDER):\n", - " os.makedirs(WORK_FOLDER)\n", + " os.makedirs(WORK_FOLDER)\n", "print(\"The index and data will be saved in\", WORK_FOLDER)" ] }, @@ -100,8 +100,7 @@ "outputs": [], "source": [ "pool = rmm.mr.PoolMemoryResource(\n", - " rmm.mr.CudaMemoryResource(),\n", - " initial_pool_size=2**30\n", + " rmm.mr.CudaMemoryResource(), initial_pool_size=2**30\n", ")\n", "rmm.mr.set_current_device_resource(pool)\n", "cp.cuda.set_allocator(rmm_cupy_allocator)" @@ -141,12 +140,12 @@ "metadata": {}, "outputs": [], "source": [ - "metric = f.attrs['distance']\n", + "metric = f.attrs[\"distance\"]\n", "\n", - "dataset = cp.array(f['train'])\n", - "queries = cp.array(f['test'])\n", - "gt_neighbors = cp.array(f['neighbors'])\n", - "gt_distances = cp.array(f['distances'])\n", + "dataset = cp.array(f[\"train\"])\n", + "queries = cp.array(f[\"test\"])\n", + "gt_neighbors = cp.array(f[\"neighbors\"])\n", + "gt_distances = cp.array(f[\"distances\"])\n", "\n", "print(f\"Loaded dataset of size {dataset.shape}; metric: '{metric}'.\")\n", "print(f\"Number of test queries: {queries.shape[0]}\")" @@ -229,7 +228,7 @@ "source": [ "%%time\n", "index_filepath = os.path.join(WORK_FOLDER, \"ivf_pq.bin\")\n", - "ivf_pq.save(index_filepath, index) \n", + "ivf_pq.save(index_filepath, index)\n", "loaded_index = ivf_pq.load(index_filepath)\n", "resources.sync()\n", "index" @@ -263,7 +262,9 @@ "outputs": [], "source": [ "%%time\n", - "distances, neighbors = ivf_pq.search(search_params, index, queries, k, resources=resources)\n", + "distances, neighbors = ivf_pq.search(\n", + " search_params, index, queries, k, resources=resources\n", + ")\n", "# Sync the GPU to make sure we've got the timing right\n", "resources.sync()" ] @@ -283,7 +284,9 @@ "outputs": [], "source": [ "recall_first_try = calc_recall(neighbors, gt_neighbors)\n", - "print(f\"Got recall = {recall_first_try} with the default parameters (k = {k}).\")" + "print(\n", + " f\"Got recall = {recall_first_try} with the default parameters (k = {k}).\"\n", + ")" ] }, { @@ -304,8 +307,12 @@ "source": [ "%%time\n", "\n", - "candidates = ivf_pq.search(search_params, index, queries, k * 2, resources=resources)[1]\n", - "distances, neighbors = refine(dataset, queries, candidates, k, resources=resources)\n", + "candidates = ivf_pq.search(\n", + " search_params, index, queries, k * 2, resources=resources\n", + ")[1]\n", + "distances, neighbors = refine(\n", + " dataset, queries, candidates, k, resources=resources\n", + ")\n", "resources.sync()" ] }, @@ -354,13 +361,13 @@ " bench_avg[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n", " bench_std[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).std()\n", "\n", - "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n", + "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1 / 2))\n", "ax.errorbar(bench_k, bench_avg, bench_std)\n", - "ax.set_xscale('log')\n", + "ax.set_xscale(\"log\")\n", "ax.set_xticks(bench_k, bench_k)\n", - "ax.set_xlabel('k')\n", + "ax.set_xlabel(\"k\")\n", "ax.grid()\n", - "ax.set_ylabel('QPS');" + "ax.set_ylabel(\"QPS\");" ] }, { @@ -390,8 +397,10 @@ " sp = ivf_pq.SearchParams(n_probes=n_probes)\n", " r = %timeit -o ivf_pq.search(sp, index, queries, k, resources=resources); resources.sync()\n", " bench_qps[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n", - " bench_recall[i] = calc_recall(ivf_pq.search(sp, index, queries, k, resources=resources)[1], gt_neighbors)\n", - " " + " bench_recall[i] = calc_recall(\n", + " ivf_pq.search(sp, index, queries, k, resources=resources)[1],\n", + " gt_neighbors,\n", + " )" ] }, { @@ -412,27 +421,27 @@ "metadata": {}, "outputs": [], "source": [ - "fig, ax = plt.subplots(1, 3, figsize=plt.figaspect(1/4))\n", + "fig, ax = plt.subplots(1, 3, figsize=plt.figaspect(1 / 4))\n", "\n", "ax[0].plot(bench_probes, bench_recall)\n", - "ax[0].set_xscale('log')\n", + "ax[0].set_xscale(\"log\")\n", "ax[0].set_xticks(bench_probes, bench_probes)\n", - "ax[0].set_xlabel('n_probes')\n", - "ax[0].set_ylabel('recall')\n", + "ax[0].set_xlabel(\"n_probes\")\n", + "ax[0].set_ylabel(\"recall\")\n", "ax[0].grid()\n", "\n", "ax[1].plot(bench_probes, bench_qps)\n", - "ax[1].set_xscale('log')\n", + "ax[1].set_xscale(\"log\")\n", "ax[1].set_xticks(bench_probes, bench_probes)\n", - "ax[1].set_xlabel('n_probes')\n", - "ax[1].set_ylabel('QPS')\n", - "ax[1].set_yscale('log')\n", + "ax[1].set_xlabel(\"n_probes\")\n", + "ax[1].set_ylabel(\"QPS\")\n", + "ax[1].set_yscale(\"log\")\n", "ax[1].grid()\n", "\n", "ax[2].plot(bench_recall, bench_qps)\n", - "ax[2].set_xlabel('recall')\n", - "ax[2].set_ylabel('QPS')\n", - "ax[2].set_yscale('log')\n", + "ax[2].set_xlabel(\"recall\")\n", + "ax[2].set_ylabel(\"QPS\")\n", + "ax[2].set_yscale(\"log\")\n", "ax[2].grid();" ] }, @@ -484,18 +493,39 @@ "bench_recall_s1 = np.zeros((5,), dtype=np.float32)\n", "k = 10\n", "n_probes = 256\n", - "search_params_32_32 = ivf_pq.SearchParams(n_probes=n_probes, internal_distance_dtype=np.float32, lut_dtype=np.float32)\n", - "search_params_32_16 = ivf_pq.SearchParams(n_probes=n_probes, internal_distance_dtype=np.float32, lut_dtype=np.float16)\n", - "search_params_32_08 = ivf_pq.SearchParams(n_probes=n_probes, internal_distance_dtype=np.float32, lut_dtype=np.uint8)\n", - "search_params_16_16 = ivf_pq.SearchParams(n_probes=n_probes, internal_distance_dtype=np.float16, lut_dtype=np.float16)\n", - "search_params_16_08 = ivf_pq.SearchParams(n_probes=n_probes, internal_distance_dtype=np.float16, lut_dtype=np.uint8)\n", - "search_ps = [search_params_32_32, search_params_32_16, search_params_32_08, search_params_16_16, search_params_16_08]\n", - "bench_names = ['32/32', '32/16', '32/8', '16/16', '16/8']\n", + "search_params_32_32 = ivf_pq.SearchParams(\n", + " n_probes=n_probes, internal_distance_dtype=np.float32, lut_dtype=np.float32\n", + ")\n", + "search_params_32_16 = ivf_pq.SearchParams(\n", + " n_probes=n_probes, internal_distance_dtype=np.float32, lut_dtype=np.float16\n", + ")\n", + "search_params_32_08 = ivf_pq.SearchParams(\n", + " n_probes=n_probes, internal_distance_dtype=np.float32, lut_dtype=np.uint8\n", + ")\n", + "search_params_16_16 = ivf_pq.SearchParams(\n", + " n_probes=n_probes, internal_distance_dtype=np.float16, lut_dtype=np.float16\n", + ")\n", + "search_params_16_08 = ivf_pq.SearchParams(\n", + " n_probes=n_probes, internal_distance_dtype=np.float16, lut_dtype=np.uint8\n", + ")\n", + "search_ps = [\n", + " search_params_32_32,\n", + " search_params_32_16,\n", + " search_params_32_08,\n", + " search_params_16_16,\n", + " search_params_16_08,\n", + "]\n", + "bench_names = [\"32/32\", \"32/16\", \"32/8\", \"16/16\", \"16/8\"]\n", "\n", "for i, sp in enumerate(search_ps):\n", " r = %timeit -o ivf_pq.search(sp, index, queries, k, resources=resources); resources.sync()\n", - " bench_qps_s1[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n", - " bench_recall_s1[i] = calc_recall(ivf_pq.search(sp, index, queries, k, resources=resources)[1], gt_neighbors)" + " bench_qps_s1[i] = (\n", + " queries.shape[0] * r.loops / np.array(r.all_runs)\n", + " ).mean()\n", + " bench_recall_s1[i] = calc_recall(\n", + " ivf_pq.search(sp, index, queries, k, resources=resources)[1],\n", + " gt_neighbors,\n", + " )" ] }, { @@ -504,27 +534,35 @@ "metadata": {}, "outputs": [], "source": [ - "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n", + "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1 / 2))\n", "fig.suptitle(\n", - " f'Effects of search parameters on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n", - " f'k = {k}, n_probes = {n_probes}, pq_dim = {pq_dim}')\n", - "ax.plot(bench_recall_s1, bench_qps_s1, 'o')\n", - "ax.set_xlabel('recall')\n", - "ax.set_ylabel('QPS')\n", + " f\"Effects of search parameters on QPS/recall trade-off ({DATASET_NAME})\\n\"\n", + " + f\"k = {k}, n_probes = {n_probes}, pq_dim = {pq_dim}\"\n", + ")\n", + "ax.plot(bench_recall_s1, bench_qps_s1, \"o\")\n", + "ax.set_xlabel(\"recall\")\n", + "ax.set_ylabel(\"QPS\")\n", "ax.grid()\n", "annotations = []\n", "for i, label in enumerate(bench_names):\n", - " annotations.append(ax.text(\n", - " bench_recall_s1[i], bench_qps_s1[i],\n", - " f\" {label} \",\n", - " ha='center', va='center'))\n", + " annotations.append(\n", + " ax.text(\n", + " bench_recall_s1[i],\n", + " bench_qps_s1[i],\n", + " f\" {label} \",\n", + " ha=\"center\",\n", + " va=\"center\",\n", + " )\n", + " )\n", "clutter = [\n", " ax.text(\n", - " 0.02, 0.08,\n", - " 'Labels denote the bitsize of: internal_distance_dtype/lut_dtype',\n", - " verticalalignment='top',\n", - " bbox={'facecolor': 'white', 'edgecolor': 'grey'},\n", - " transform = ax.transAxes)\n", + " 0.02,\n", + " 0.08,\n", + " \"Labels denote the bitsize of: internal_distance_dtype/lut_dtype\",\n", + " verticalalignment=\"top\",\n", + " bbox={\"facecolor\": \"white\", \"edgecolor\": \"grey\"},\n", + " transform=ax.transAxes,\n", + " )\n", "]\n", "adjust_text(annotations, objects=clutter);" ] @@ -554,18 +592,29 @@ "source": [ "def search_refine(ps, ratio):\n", " k_search = k * ratio\n", - " candidates = ivf_pq.search(ps, index, queries, k_search, resources=resources)[1]\n", - " return candidates if ratio == 1 else refine(dataset, queries, candidates, k, resources=resources)[1]\n", + " candidates = ivf_pq.search(\n", + " ps, index, queries, k_search, resources=resources\n", + " )[1]\n", + " return (\n", + " candidates\n", + " if ratio == 1\n", + " else refine(dataset, queries, candidates, k, resources=resources)[1]\n", + " )\n", + "\n", "\n", "ratios = [1, 2, 4]\n", "bench_qps_sr = np.zeros((len(ratios), len(search_ps)), dtype=np.float32)\n", "bench_recall_sr = np.zeros((len(ratios), len(search_ps)), dtype=np.float32)\n", "\n", - "for j, ratio in enumerate(ratios): \n", + "for j, ratio in enumerate(ratios):\n", " for i, ps in enumerate(search_ps):\n", " r = %timeit -o search_refine(ps, ratio); resources.sync()\n", - " bench_qps_sr[j, i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n", - " bench_recall_sr[j, i] = calc_recall(search_refine(ps, ratio), gt_neighbors)" + " bench_qps_sr[j, i] = (\n", + " queries.shape[0] * r.loops / np.array(r.all_runs)\n", + " ).mean()\n", + " bench_recall_sr[j, i] = calc_recall(\n", + " search_refine(ps, ratio), gt_neighbors\n", + " )" ] }, { @@ -574,34 +623,42 @@ "metadata": {}, "outputs": [], "source": [ - "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n", + "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1 / 2))\n", "fig.suptitle(\n", - " f'Effects of search parameters on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n", - " f'k = {k}, n_probes = {n_probes}, pq_dim = {pq_dim}')\n", + " f\"Effects of search parameters on QPS/recall trade-off ({DATASET_NAME})\\n\"\n", + " + f\"k = {k}, n_probes = {n_probes}, pq_dim = {pq_dim}\"\n", + ")\n", "labels = []\n", "for j, ratio in enumerate(ratios):\n", - " ax.plot(bench_recall_sr[j, :], bench_qps_sr[j, :], 'o')\n", + " ax.plot(bench_recall_sr[j, :], bench_qps_sr[j, :], \"o\")\n", " labels.append(f\"refine ratio = {ratio}\")\n", "ax.legend(labels)\n", - "ax.set_xlabel('recall')\n", - "ax.set_ylabel('QPS')\n", + "ax.set_xlabel(\"recall\")\n", + "ax.set_ylabel(\"QPS\")\n", "ax.grid()\n", "colors = plt.rcParams[\"axes.prop_cycle\"].by_key()[\"color\"]\n", "annotations = []\n", "for j, ratio in enumerate(ratios):\n", " for i, label in enumerate(bench_names):\n", - " annotations.append(ax.text(\n", - " bench_recall_sr[j, i], bench_qps_sr[j, i],\n", - " f\" {label} \",\n", - " color=colors[j],\n", - " ha='center', va='center'))\n", + " annotations.append(\n", + " ax.text(\n", + " bench_recall_sr[j, i],\n", + " bench_qps_sr[j, i],\n", + " f\" {label} \",\n", + " color=colors[j],\n", + " ha=\"center\",\n", + " va=\"center\",\n", + " )\n", + " )\n", "clutter = [\n", " ax.text(\n", - " 0.02, 0.08,\n", - " 'Labels denote the bitsize of: internal_distance_dtype/lut_dtype',\n", - " verticalalignment='top',\n", - " bbox={'facecolor': 'white', 'edgecolor': 'grey'},\n", - " transform = ax.transAxes)\n", + " 0.02,\n", + " 0.08,\n", + " \"Labels denote the bitsize of: internal_distance_dtype/lut_dtype\",\n", + " verticalalignment=\"top\",\n", + " bbox={\"facecolor\": \"white\", \"edgecolor\": \"grey\"},\n", + " transform=ax.transAxes,\n", + " )\n", "]\n", "adjust_text(annotations, objects=clutter);" ] @@ -629,18 +686,24 @@ " ps = ivf_pq.SearchParams(\n", " n_probes=n_probes,\n", " internal_distance_dtype=internal_distance_dtype,\n", - " lut_dtype=lut_dtype)\n", - " candidates = ivf_pq.search(ps, index, queries, k_search, resources=resources)[1]\n", - " return candidates if ratio == 1 else refine(dataset, queries, candidates, k, resources=resources)[1]\n", + " lut_dtype=lut_dtype,\n", + " )\n", + " candidates = ivf_pq.search(\n", + " ps, index, queries, k_search, resources=resources\n", + " )[1]\n", + " return (\n", + " candidates\n", + " if ratio == 1\n", + " else refine(dataset, queries, candidates, k, resources=resources)[1]\n", + " )\n", + "\n", "\n", "search_configs = [\n", " lambda n_probes: search_refine(np.float16, np.float16, 1, n_probes),\n", " lambda n_probes: search_refine(np.float32, np.uint8, 1, n_probes),\n", - " lambda n_probes: search_refine(np.float32, np.uint8, 2, n_probes)\n", + " lambda n_probes: search_refine(np.float32, np.uint8, 2, n_probes),\n", "]\n", - "search_config_names = [\n", - " '16/16', '32/8', '32/8/r2'\n", - "]" + "search_config_names = [\"16/16\", \"32/8\", \"32/8/r2\"]" ] }, { @@ -699,16 +762,22 @@ "search_fun = search_configs[selected_search_variant]\n", "search_label = search_config_names[selected_search_variant]\n", "\n", - "bench_qps_nl = np.zeros((len(n_list_variants), len(pl_ratio_variants)), dtype=np.float32)\n", + "bench_qps_nl = np.zeros(\n", + " (len(n_list_variants), len(pl_ratio_variants)), dtype=np.float32\n", + ")\n", "bench_recall_nl = np.zeros_like(bench_qps_nl, dtype=np.float32)\n", "\n", "for i, n_lists in enumerate(n_list_variants):\n", - " index_params = ivf_pq.IndexParams(n_lists=n_lists, metric=metric, pq_dim=pq_dim)\n", + " index_params = ivf_pq.IndexParams(\n", + " n_lists=n_lists, metric=metric, pq_dim=pq_dim\n", + " )\n", " index = ivf_pq.build(index_params, dataset, resources=resources)\n", " for j, pl_ratio in enumerate(pl_ratio_variants):\n", " n_probes = max(1, n_lists // pl_ratio)\n", " r = %timeit -o search_fun(n_probes); resources.sync()\n", - " bench_qps_nl[i, j] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n", + " bench_qps_nl[i, j] = (\n", + " queries.shape[0] * r.loops / np.array(r.all_runs)\n", + " ).mean()\n", " bench_recall_nl[i, j] = calc_recall(search_fun(n_probes), gt_neighbors)\n", " del index" ] @@ -719,19 +788,20 @@ "metadata": {}, "outputs": [], "source": [ - "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n", + "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1 / 2))\n", "fig.suptitle(\n", - " f'Effects of n_list on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n", - " f'k = {k}, pq_dim = {pq_dim}, search = {search_label}')\n", + " f\"Effects of n_list on QPS/recall trade-off ({DATASET_NAME})\\n\"\n", + " + f\"k = {k}, pq_dim = {pq_dim}, search = {search_label}\"\n", + ")\n", "labels = []\n", "for i, n_lists in enumerate(n_list_variants):\n", " ax.plot(bench_recall_nl[i, :], bench_qps_nl[i, :])\n", " labels.append(f\"n_lists = {n_lists}\")\n", "\n", "ax.legend(labels)\n", - "ax.set_xlabel('recall')\n", - "ax.set_ylabel('QPS')\n", - "ax.set_yscale('log')\n", + "ax.set_xlabel(\"recall\")\n", + "ax.set_ylabel(\"QPS\")\n", + "ax.set_yscale(\"log\")\n", "ax.grid()" ] }, @@ -867,13 +937,40 @@ "n_lists = 1000\n", "\n", "build_configs = {\n", - " '64-8-subspace': ivf_pq.IndexParams(n_lists=n_lists, metric=metric, pq_dim=64, pq_bits=8, codebook_kind=\"subspace\"),\n", - " '128-8-subspace': ivf_pq.IndexParams(n_lists=n_lists, metric=metric, pq_dim=128, pq_bits=8, codebook_kind=\"subspace\"),\n", - " '128-6-subspace': ivf_pq.IndexParams(n_lists=n_lists, metric=metric, pq_dim=128, pq_bits=6, codebook_kind=\"subspace\"),\n", - " '128-6-cluster': ivf_pq.IndexParams(n_lists=n_lists, metric=metric, pq_dim=128, pq_bits=6, codebook_kind=\"cluster\"),\n", + " \"64-8-subspace\": ivf_pq.IndexParams(\n", + " n_lists=n_lists,\n", + " metric=metric,\n", + " pq_dim=64,\n", + " pq_bits=8,\n", + " codebook_kind=\"subspace\",\n", + " ),\n", + " \"128-8-subspace\": ivf_pq.IndexParams(\n", + " n_lists=n_lists,\n", + " metric=metric,\n", + " pq_dim=128,\n", + " pq_bits=8,\n", + " codebook_kind=\"subspace\",\n", + " ),\n", + " \"128-6-subspace\": ivf_pq.IndexParams(\n", + " n_lists=n_lists,\n", + " metric=metric,\n", + " pq_dim=128,\n", + " pq_bits=6,\n", + " codebook_kind=\"subspace\",\n", + " ),\n", + " \"128-6-cluster\": ivf_pq.IndexParams(\n", + " n_lists=n_lists,\n", + " metric=metric,\n", + " pq_dim=128,\n", + " pq_bits=6,\n", + " codebook_kind=\"cluster\",\n", + " ),\n", "}\n", "\n", - "bench_qps_ip = np.zeros((len(build_configs), len(search_configs), len(n_probes_variants)), dtype=np.float32)\n", + "bench_qps_ip = np.zeros(\n", + " (len(build_configs), len(search_configs), len(n_probes_variants)),\n", + " dtype=np.float32,\n", + ")\n", "bench_recall_ip = np.zeros_like(bench_qps_ip, dtype=np.float32)\n", "\n", "for i, index_params in enumerate(build_configs.values()):\n", @@ -881,8 +978,12 @@ " for l, search_fun in enumerate(search_configs):\n", " for j, n_probes in enumerate(n_probes_variants):\n", " r = %timeit -o search_fun(n_probes); resources.sync()\n", - " bench_qps_ip[i, l, j] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n", - " bench_recall_ip[i, l, j] = calc_recall(search_fun(n_probes), gt_neighbors)" + " bench_qps_ip[i, l, j] = (\n", + " queries.shape[0] * r.loops / np.array(r.all_runs)\n", + " ).mean()\n", + " bench_recall_ip[i, l, j] = calc_recall(\n", + " search_fun(n_probes), gt_neighbors\n", + " )" ] }, { @@ -891,10 +992,13 @@ "metadata": {}, "outputs": [], "source": [ - "fig, ax = plt.subplots(len(search_config_names), 1, figsize=(16, len(search_config_names)*8))\n", + "fig, ax = plt.subplots(\n", + " len(search_config_names), 1, figsize=(16, len(search_config_names) * 8)\n", + ")\n", "fig.suptitle(\n", - " f'Effects of index parameters on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n", - " f'k = {k}, n_lists = {n_lists}')\n", + " f\"Effects of index parameters on QPS/recall trade-off ({DATASET_NAME})\\n\"\n", + " + f\"k = {k}, n_lists = {n_lists}\"\n", + ")\n", "\n", "for j, search_label in enumerate(search_config_names):\n", " labels = []\n", @@ -904,9 +1008,9 @@ "\n", " ax[j].set_title(f\"search: {search_label}\")\n", " ax[j].legend(labels)\n", - " ax[j].set_xlabel('recall')\n", - " ax[j].set_ylabel('QPS')\n", - " ax[j].set_yscale('log')\n", + " ax[j].set_xlabel(\"recall\")\n", + " ax[j].set_ylabel(\"QPS\")\n", + " ax[j].set_yscale(\"log\")\n", " ax[j].grid()" ] }, diff --git a/notebooks/utils.py b/notebooks/utils.py index c8a121f531..f456e6f02d 100644 --- a/notebooks/utils.py +++ b/notebooks/utils.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 @@ -10,6 +10,7 @@ import time import urllib + ## Check the quality of the prediction (recall) def calc_recall(found_indices, ground_truth): found_indices = cp.asarray(found_indices) diff --git a/pyproject.toml b/pyproject.toml index 4175144663..9a51f4fdb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,22 +1,27 @@ -[tool.black] +# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +[tool.ruff] line-length = 79 -target-version = ["py310"] -include = '\.py?$' -force-exclude = ''' -/( - thirdparty | - \.eggs | - \.git | - \.hg | - \.mypy_cache | - \.tox | - \.venv | - _build | - buck-out | - build | - dist -)/ -''' +exclude = [ + "__init__.py", +] + +[tool.ruff.lint] +ignore = [ + # whitespace before : + "E203", +] + +[tool.ruff.lint.per-file-ignores] +"*.ipynb" = [ + # unused imports + "F401", + # unused variable + "F841", + # ambiguous variable name + "E741", +] [tool.pydocstyle] # Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather diff --git a/python/cuvs/cuvs/tests/test_cagra.py b/python/cuvs/cuvs/tests/test_cagra.py index 20ac07fa9a..3468a086b9 100644 --- a/python/cuvs/cuvs/tests/test_cagra.py +++ b/python/cuvs/cuvs/tests/test_cagra.py @@ -161,7 +161,6 @@ def run_cagra_build_search_test( def test_cagra_dataset_dtype_host_device( dtype, array_type, inplace, build_algo, metric, serialize ): - # Note that inner_product tests use normalized input which we cannot # represent in int8, therefore we test only sqeuclidean metric here. run_cagra_build_search_test( diff --git a/python/cuvs/cuvs/tests/test_mg_cagra.py b/python/cuvs/cuvs/tests/test_mg_cagra.py index 42bb8220ea..903c16ea24 100644 --- a/python/cuvs/cuvs/tests/test_mg_cagra.py +++ b/python/cuvs/cuvs/tests/test_mg_cagra.py @@ -538,9 +538,9 @@ def test_mg_cagra_simple(): # Distances should be non-negative and sorted assert np.all(distances >= 0) for i in range(n_queries): - assert np.all( - distances[i, :-1] <= distances[i, 1:] - ), f"Distances not sorted for query {i}" + assert np.all(distances[i, :-1] <= distances[i, 1:]), ( + f"Distances not sorted for query {i}" + ) # Integration test with multiple operations diff --git a/python/cuvs/cuvs/tests/test_mg_ivf_flat.py b/python/cuvs/cuvs/tests/test_mg_ivf_flat.py index 08c2610b86..99dff4e221 100644 --- a/python/cuvs/cuvs/tests/test_mg_ivf_flat.py +++ b/python/cuvs/cuvs/tests/test_mg_ivf_flat.py @@ -572,9 +572,9 @@ def test_mg_ivf_flat_simple(): # Distances should be non-negative and sorted assert np.all(distances >= 0) for i in range(n_queries): - assert np.all( - distances[i, :-1] <= distances[i, 1:] - ), f"Distances not sorted for query {i}" + assert np.all(distances[i, :-1] <= distances[i, 1:]), ( + f"Distances not sorted for query {i}" + ) # Integration test with multiple operations diff --git a/python/cuvs/cuvs/tests/test_mg_ivf_pq.py b/python/cuvs/cuvs/tests/test_mg_ivf_pq.py index d54f170153..6c6cf8415b 100644 --- a/python/cuvs/cuvs/tests/test_mg_ivf_pq.py +++ b/python/cuvs/cuvs/tests/test_mg_ivf_pq.py @@ -600,9 +600,9 @@ def test_mg_ivf_pq_simple(): # Distances should be non-negative and sorted assert np.all(distances >= 0) for i in range(n_queries): - assert np.all( - distances[i, :-1] <= distances[i, 1:] - ), f"Distances not sorted for query {i}" + assert np.all(distances[i, :-1] <= distances[i, 1:]), ( + f"Distances not sorted for query {i}" + ) # Integration test with multiple operations diff --git a/python/cuvs/cuvs/tests/test_refine.py b/python/cuvs/cuvs/tests/test_refine.py index 2a6d3a3add..bb6b373e78 100644 --- a/python/cuvs/cuvs/tests/test_refine.py +++ b/python/cuvs/cuvs/tests/test_refine.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # @@ -23,7 +23,6 @@ def run_refine( dtype=np.float32, memory_type="device", ): - dataset = generate_data((n_rows, n_cols), dtype) queries = generate_data((n_queries, n_cols), dtype) diff --git a/python/cuvs_bench/cuvs_bench/plot/__main__.py b/python/cuvs_bench/cuvs_bench/plot/__main__.py index 6d9d9cb4cd..aca08505ea 100644 --- a/python/cuvs_bench/cuvs_bench/plot/__main__.py +++ b/python/cuvs_bench/cuvs_bench/plot/__main__.py @@ -520,7 +520,6 @@ def main( time_unit: str, raw: bool, ) -> None: - args = locals() if args["algorithms"]: diff --git a/python/cuvs_bench/cuvs_bench/run/data_export.py b/python/cuvs_bench/cuvs_bench/run/data_export.py index d658c07d9e..707677a083 100644 --- a/python/cuvs_bench/cuvs_bench/run/data_export.py +++ b/python/cuvs_bench/cuvs_bench/run/data_export.py @@ -222,9 +222,9 @@ def convert_json_to_csv_search(dataset, dataset_path): write.iloc[s_index, write_ncols] = build_df.iloc[ b_index, 2 ] - write.iloc[ - s_index, write_ncols + 1 : - ] = build_df.iloc[b_index, 3:] + write.iloc[s_index, write_ncols + 1 :] = ( + build_df.iloc[b_index, 3:] + ) break # Write search data and compute frontiers write.to_csv(file.replace(".json", ",raw.csv"), index=False) @@ -256,8 +256,9 @@ def create_pointset(data, xn, yn): xm, ym = metrics[xn], metrics[yn] y_col = 4 if yn == "latency" else 3 - rev_x, rev_y = (-1 if xm["worst"] < 0 else 1), ( - -1 if ym["worst"] < 0 else 1 + rev_x, rev_y = ( + (-1 if xm["worst"] < 0 else 1), + (-1 if ym["worst"] < 0 else 1), ) # Sort data based on x and y metrics data.sort(key=lambda t: (rev_y * t[y_col], rev_x * t[2])) diff --git a/python/cuvs_bench/cuvs_bench/tests/test_cli.py b/python/cuvs_bench/cuvs_bench/tests/test_cli.py index abd5fa636f..c65f97bc2a 100644 --- a/python/cuvs_bench/cuvs_bench/tests/test_cli.py +++ b/python/cuvs_bench/cuvs_bench/tests/test_cli.py @@ -40,9 +40,9 @@ def test_get_dataset_creates_expected_files(temp_datasets_dir: Path): # Verify that each expected file exists in the datasets directory. for filename in expected_files: file_path = temp_datasets_dir / filename - assert ( - file_path.exists() - ), f"Expected file {filename} was not generated." + assert file_path.exists(), ( + f"Expected file {filename} was not generated." + ) def test_run_command_creates_results(temp_datasets_dir: Path): @@ -82,9 +82,9 @@ def test_run_command_creates_results(temp_datasets_dir: Path): "--force", ] result = runner.invoke(run_main, run_args) - assert ( - result.exit_code == 0 - ), f"Run command failed with output:\n{result.output}" + assert result.exit_code == 0, ( + f"Run command failed with output:\n{result.output}" + ) common_build_header = [ "algo_name", @@ -426,9 +426,9 @@ def test_run_command_creates_results(temp_datasets_dir: Path): for rel_path, expectations in expected_files.items(): file_path = temp_datasets_dir / rel_path assert file_path.exists(), f"Expected file {file_path} does not exist." - assert ( - file_path.stat().st_size > 0 - ), f"Expected file {file_path} is empty." + assert file_path.stat().st_size > 0, ( + f"Expected file {file_path} is empty." + ) df = pd.read_csv(file_path) @@ -436,9 +436,9 @@ def test_run_command_creates_results(temp_datasets_dir: Path): actual_rows = len(df) # breakpoint() - assert ( - actual_header == expectations["header"] - ), f"Wrong header produced in file f{rel_path}" + assert actual_header == expectations["header"], ( + f"Wrong header produced in file f{rel_path}" + ) assert actual_rows == expectations["rows"] @@ -483,9 +483,9 @@ def test_plot_command_creates_png_files(temp_datasets_dir: Path): "latency", ] result = runner.invoke(plot_main, args) - assert ( - result.exit_code == 0 - ), f"Plot command failed with output:\n{result.output}" + assert result.exit_code == 0, ( + f"Plot command failed with output:\n{result.output}" + ) # Expected output file names. expected_files = [ @@ -496,6 +496,6 @@ def test_plot_command_creates_png_files(temp_datasets_dir: Path): for filename in expected_files: file_path = temp_datasets_dir / filename assert file_path.exists(), f"Expected file {filename} does not exist." - assert ( - file_path.stat().st_size > 0 - ), f"Expected file {filename} is empty." + assert file_path.stat().st_size > 0, ( + f"Expected file {filename} is empty." + )