Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
698 changes: 697 additions & 1 deletion notebooks/tutorial.ipynb

Large diffs are not rendered by default.

9 changes: 3 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@ build-backend = "setuptools.build_meta"
# this should be our single source of truth for versioning

[project]
# to rename as kb_src later and updated codebase
# do that later let's get all the import right first
name = "src"
name = "kernelbench"
version = "0.0.1"
requires-python = "==3.13.*"
dependencies = [
Expand Down Expand Up @@ -51,6 +49,5 @@ dev = [


[tool.setuptools.packages.find]
where = ["."]
include = ["src*"]
# TODO: change to kb_src later
where = ["src"]
include = ["kernelbench*"]
4 changes: 2 additions & 2 deletions scripts/benchmark_eval_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pydra
from pydra import Config, REQUIRED
from src.dataset import construct_kernelbench_dataset
from kernelbench.dataset import construct_kernelbench_dataset
from tabulate import tabulate

"""
Expand Down Expand Up @@ -129,7 +129,7 @@ def analyze_greedy_eval(run_name, hardware, baseline, level):
import numpy as np

# Calculate speedup metrics
from src.score import (
from kernelbench.score import (
fastp,
geometric_mean_speed_ratio_correct_and_faster_only,
geometric_mean_speed_ratio_correct_only,
Expand Down
4 changes: 2 additions & 2 deletions scripts/debug_stddout.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import os
import ninja

from src.utils import set_gpu_arch
from src.eval import build_compile_cache_with_capturing
from kernelbench.utils import set_gpu_arch
from kernelbench.eval import build_compile_cache_with_capturing

################################################################################
# Test for checking if we can capture nvcc errors
Expand Down
12 changes: 6 additions & 6 deletions scripts/eval_from_generations.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@
from pydra import Config, REQUIRED

# Import only what we need
from src import compile, eval, utils
from kernelbench import compile, eval, utils

from src.dataset import construct_kernelbench_dataset
from src.eval import (
from kernelbench.dataset import construct_kernelbench_dataset
from kernelbench.eval import (
build_compile_cache,
get_error_name,
check_metadata_serializable_all_types,
eval_kernel_against_ref,
KernelExecResult,
)

from src.utils import read_file, set_gpu_arch
from kernelbench.utils import read_file, set_gpu_arch
from tqdm import tqdm

# Modal support
Expand Down Expand Up @@ -183,8 +183,8 @@ def evaluate_single_sample_modal(
Evaluate a single sample on Modal GPU with automatic retries for GPU attachment failures
and proper GPU corruption handling via stop_fetching_inputs()
"""
from src.eval import eval_kernel_against_ref, get_torch_dtype_from_string
from src.utils import set_gpu_arch
from kernelbench.eval import eval_kernel_against_ref, get_torch_dtype_from_string
from kernelbench.utils import set_gpu_arch
import torch
import time
import modal.experimental
Expand Down
12 changes: 6 additions & 6 deletions scripts/generate_and_eval_single_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@

from datasets import load_dataset

from src.dataset import construct_kernelbench_dataset
from src.eval import eval_kernel_against_ref
from src.prompt_constructor_toml import get_prompt_for_backend, get_custom_prompt
from src.utils import (
from kernelbench.dataset import construct_kernelbench_dataset
from kernelbench.eval import eval_kernel_against_ref
from kernelbench.prompt_constructor_toml import get_prompt_for_backend, get_custom_prompt
from kernelbench.utils import (
create_inference_server_from_presets,
extract_first_code,
query_server,
read_file,
set_gpu_arch,
)
from src.eval import get_torch_dtype_from_string
from kernelbench.eval import get_torch_dtype_from_string
"""
Generate and evaluate a single sample
Easiest way to get started, to test a single problem for experimentation or debugging
Expand Down Expand Up @@ -99,7 +99,7 @@ def main(config: EvalConfig):
Keep it simple: Generate and evaluate a single sample
Note: will shorten code logic to make this as simple as possible
"""
from src.utils import SERVER_PRESETS
from kernelbench.utils import SERVER_PRESETS

if config.server_type and config.server_type in SERVER_PRESETS:
preset = SERVER_PRESETS[config.server_type]
Expand Down
12 changes: 6 additions & 6 deletions scripts/generate_and_eval_single_sample_modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from datasets import load_dataset

#from src.dataset import construct_kernelbench_dataset
from src.utils import extract_first_code, query_server, set_gpu_arch, read_file, create_inference_server_from_presets
from kernelbench.utils import extract_first_code, query_server, set_gpu_arch, read_file, create_inference_server_from_presets

app = modal.App("eval_single_sample")

Expand Down Expand Up @@ -113,10 +113,10 @@ def eval_single_sample_modal(self, ref_arch_src, custom_kernel, verbose, gpu_arc
# 3. Evaluate Kernel
# NOTE: no need to wrap around process here as only a single sample
# see batch eval for examples of process isolation
from src.eval import eval_kernel_against_ref
from src.eval import get_torch_dtype_from_string
from kernelbench.eval import eval_kernel_against_ref
from kernelbench.eval import get_torch_dtype_from_string
# Use utility function to set the GPU architecture in the modal environment
from src.utils import set_gpu_arch as modal_set_gpu_arch
from kernelbench.utils import set_gpu_arch as modal_set_gpu_arch
modal_set_gpu_arch(gpu_arch)
return eval_kernel_against_ref(
ref_arch_src, custom_kernel, verbose=verbose, measure_performance=True,
Expand All @@ -130,7 +130,7 @@ def main(config: EvalConfig):
"""
Keep it simple: Generate and evaluate a single sample
"""
from src.utils import SERVER_PRESETS
from kernelbench.utils import SERVER_PRESETS

if config.server_type and config.server_type in SERVER_PRESETS:
preset = SERVER_PRESETS[config.server_type]
Expand Down Expand Up @@ -238,7 +238,7 @@ def main(config: EvalConfig):
)

# Lazy import prompt constructor
from src.prompt_constructor_toml import get_prompt_for_backend, get_custom_prompt
from kernelbench.prompt_constructor_toml import get_prompt_for_backend, get_custom_prompt

if custom_prompt_key:
custom_prompt = get_custom_prompt(
Expand Down
8 changes: 4 additions & 4 deletions scripts/generate_baseline_time.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import torch
import numpy as np
from src.eval import (
from kernelbench.eval import (
load_original_model_and_inputs,
set_seed,
fetch_ref_arch_from_problem_id,
)
from src.timing import (
from kernelbench.timing import (
get_timing_function,
get_timing_stats,
)
from src.dataset import construct_problem_dataset_from_problem_dir
from src.utils import read_file
from kernelbench.dataset import construct_problem_dataset_from_problem_dir
from kernelbench.utils import read_file
import os
import json
from tqdm import tqdm
Expand Down
6 changes: 3 additions & 3 deletions scripts/generate_baseline_time_modal.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import torch
import numpy as np
from src.eval import (
from kernelbench.eval import (
load_original_model_and_inputs,
time_execution_with_cuda_event,
get_timing_stats,
set_seed,
fetch_ref_arch_from_problem_id,
)
from src.dataset import construct_problem_dataset_from_problem_dir
from src.utils import read_file
from kernelbench.dataset import construct_problem_dataset_from_problem_dir
from kernelbench.utils import read_file
import os
import json
from tqdm import tqdm
Expand Down
10 changes: 5 additions & 5 deletions scripts/generate_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
from datasets import load_dataset
from pydra import Config, REQUIRED

from src.dataset import construct_kernelbench_dataset
from src.eval import eval_kernel_against_ref
from src.prompt_constructor_toml import get_prompt_for_backend, get_custom_prompt
from src.utils import (
from kernelbench.dataset import construct_kernelbench_dataset
from kernelbench.eval import eval_kernel_against_ref
from kernelbench.prompt_constructor_toml import get_prompt_for_backend, get_custom_prompt
from kernelbench.utils import (
create_inference_server_from_presets,
extract_first_code,
maybe_multithread,
Expand Down Expand Up @@ -210,7 +210,7 @@ def main(config: GenerationConfig):
Batch Generate Samples for Particular Level
Store generated kernels in the specified run directory
"""
from src.utils import SERVER_PRESETS
from kernelbench.utils import SERVER_PRESETS

if config.server_type and config.server_type in SERVER_PRESETS:
preset = SERVER_PRESETS[config.server_type]
Expand Down
2 changes: 1 addition & 1 deletion scripts/get_baseline_time_single_problem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch
import numpy as np
from src.eval import (
from kernelbench.eval import (
load_original_model_and_inputs,
time_execution_with_cuda_event,
get_timing_stats,
Expand Down
4 changes: 2 additions & 2 deletions scripts/inspect_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import os
import sys
import numpy as np
from src.eval import (
from kernelbench.eval import (
load_original_model_and_inputs,
time_execution_with_cuda_event,
get_timing_stats,
set_seed,
fetch_ref_arch_from_problem_id,
)
from src.dataset import construct_problem_dataset_from_problem_dir
from kernelbench.dataset import construct_problem_dataset_from_problem_dir
import os, sys
import logging
import json
Expand Down
8 changes: 4 additions & 4 deletions scripts/inspect_kernel_pytorch_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
device = "cuda:0"


from src.utils import read_file
from src.eval import (
from kernelbench.utils import read_file
from kernelbench.eval import (
load_custom_model,
load_original_model_and_inputs,
set_seed,
Expand Down Expand Up @@ -103,8 +103,8 @@ def get_torch_profiler_info(ref_arch_src: str,
def __main__():
# run_profile(dataset, problem_id, num_trials=10)

ref_arch_src_path = os.path.join(REPO_ROOT, "src/prompts/few_shot/model_ex_mnist2.py")
kernel_src_path = os.path.join(REPO_ROOT, "src/prompts/few_shot/model_new_ex_mnist2.py")
ref_arch_src_path = os.path.join(REPO_ROOT, "src/kernelbench/prompts/few_shot/model_ex_mnist2.py")
kernel_src_path = os.path.join(REPO_ROOT, "src/kernelbench/prompts/few_shot/model_new_ex_mnist2.py")

ref_arch_src = read_file(ref_arch_src_path)
kernel_src = read_file(kernel_src_path)
Expand Down
4 changes: 2 additions & 2 deletions scripts/inspect_triton.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
device = "cuda:0"


from src.utils import read_file
from src.eval import (
from kernelbench.utils import read_file
from kernelbench.eval import (
load_custom_model,
load_original_model_and_inputs,
time_execution_with_cuda_event,
Expand Down
20 changes: 10 additions & 10 deletions scripts/run_and_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from datasets import load_dataset
import modal

from src import eval as kernel_eval
from src import utils as kernel_utils
from kernelbench import eval as kernel_eval
from kernelbench import utils as kernel_utils
from scripts.generate_baseline_time import measure_program_time
from src.utils import read_file
from kernelbench.utils import read_file

# Modal setup
app = modal.App("run_and_check")
Expand Down Expand Up @@ -47,7 +47,7 @@
You will need two files
1. Reference: PyTorch reference (module Model) implementation with init and input shapes
2. Solution: PyTorch solution (module ModelNew) with inline CUDA Code
Please see examples in src/prompts
Please see examples in src/kernelbench/prompts

The Reference could be either
1. a local file: specify the path to the file
Expand All @@ -56,15 +56,15 @@
====================================================
Usage:
1. PyTorch reference is a local file (local eval)
python3 scripts/run_and_check.py ref_origin=local ref_arch_src_path=src/prompts/model_ex_add.py kernel_src_path=src/prompts/model_new_ex_add.py eval_mode=local
python3 scripts/run_and_check.py ref_origin=local ref_arch_src_path=src/prompts/few_shot/model_ex_tiled_matmul.py kernel_src_path=src/prompts/few_shot/model_new_ex_tiled_matmul.py eval_mode=local
python3 scripts/run_and_check.py ref_origin=local ref_arch_src_path=src/kernelbench/prompts/model_ex_add.py kernel_src_path=src/kernelbench/prompts/model_new_ex_add.py eval_mode=local
python3 scripts/run_and_check.py ref_origin=local ref_arch_src_path=src/kernelbench/prompts/few_shot/model_ex_tiled_matmul.py kernel_src_path=src/kernelbench/prompts/few_shot/model_new_ex_tiled_matmul.py eval_mode=local


2. PyTorch reference is a kernelbench problem (local eval)
python3 scripts/run_and_check.py ref_origin=kernelbench level=<level> problem_id=<problem_id> kernel_src_path=<path to model-generated kernel> eval_mode=local

3. PyTorch reference is a local file (modal eval on cloud GPU)
python3 scripts/run_and_check.py ref_origin=local ref_arch_src_path=src/prompts/model_ex_add.py kernel_src_path=src/prompts/model_new_ex_add.py eval_mode=modal gpu=H100
python3 scripts/run_and_check.py ref_origin=local ref_arch_src_path=src/kernelbench/prompts/model_ex_add.py kernel_src_path=src/kernelbench/prompts/model_new_ex_add.py eval_mode=modal gpu=H100

4. PyTorch reference is a kernelbench problem (modal eval on cloud GPU)
python3 scripts/run_and_check.py ref_origin=kernelbench level=<level> problem_id=<problem_id> kernel_src_path=<path to model-generated kernel> eval_mode=modal gpu=L40S
Expand Down Expand Up @@ -178,8 +178,8 @@ class EvalFunc:
@modal.method()
def evaluate_single_sample_src_modal(self, ref_arch_src: str, kernel_src: str, configs: dict, gpu_arch: list):
"""Evaluate a single sample source code against a reference source code on Modal"""
from src.utils import set_gpu_arch
from src.eval import eval_kernel_against_ref, get_torch_dtype_from_string
from kernelbench.utils import set_gpu_arch
from kernelbench.eval import eval_kernel_against_ref, get_torch_dtype_from_string

set_gpu_arch(gpu_arch)
device = torch.device("cuda:0")
Expand Down Expand Up @@ -218,7 +218,7 @@ def measure_program_time_modal(
):
"""Measure the execution time of a reference program on Modal"""
from scripts.generate_baseline_time import measure_program_time
from src.utils import set_gpu_arch
from kernelbench.utils import set_gpu_arch

set_gpu_arch(gpu_arch)
device = torch.device("cuda:0")
Expand Down
4 changes: 2 additions & 2 deletions scripts/verify_generation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import sys, os
import src.utils as utils
import kernelbench.utils as utils
import time
from src.prompt_constructor_toml import get_prompt_for_backend, get_custom_prompt
from kernelbench.prompt_constructor_toml import get_prompt_for_backend, get_custom_prompt
"""
For testing infernece and quickly iterate on prompts
Uses functions in prompt_constructor
Expand Down
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions src/compile.py → src/kernelbench/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from tqdm import tqdm

import shutil
from src.eval import build_compile_cache
from src import utils as utils
from kernelbench.eval import build_compile_cache
from kernelbench import utils as utils
import torch
import os
import multiprocessing as mp
Expand Down
2 changes: 1 addition & 1 deletion src/dataset.py → src/kernelbench/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
REPO_TOP_PATH = os.path.abspath(
os.path.join(
os.path.dirname(__file__),
"..",
"../..",
)
)
KERNEL_BENCH_PATH = os.path.join(REPO_TOP_PATH, "KernelBench")
Expand Down
2 changes: 1 addition & 1 deletion src/eval.py → src/kernelbench/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
REPO_TOP_PATH = os.path.abspath(
os.path.join(
os.path.dirname(__file__),
"..",
"../..",
)
)
KERNEL_BENCH_PATH = os.path.join(REPO_TOP_PATH, "KernelBench")
Expand Down
File renamed without changes.
File renamed without changes.
Loading