Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
244 changes: 244 additions & 0 deletions aiter/configs/a8w8_bpreshuffle_cktile_tuned_gemm.csv

Large diffs are not rendered by default.

244 changes: 244 additions & 0 deletions aiter/configs/a8w8_bpreshuffle_cktile_untuned_gemm.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
M,N,K
1,9216,4096
2,9216,4096
4,9216,4096
8,9216,4096
16,9216,4096
32,9216,4096
64,9216,4096
128,9216,4096
256,9216,4096
1024,9216,4096
2048,9216,4096
4096,9216,4096
4240,9216,4096
16384,9216,4096
32768,9216,4096
1,4608,4096
2,4608,4096
4,4608,4096
8,4608,4096
16,4608,4096
32,4608,4096
64,4608,4096
128,4608,4096
256,4608,4096
1024,4608,4096
2048,4608,4096
4096,4608,4096
16384,4608,4096
32768,4608,4096
1,1280,8192
32,1280,8192
64,1280,8192
128,1280,8192
192,1280,8192
256,1280,8192
320,1280,8192
512,1280,8192
1024,1280,8192
2048,1280,8192
4096,1280,8192
8192,1280,8192
16384,1280,8192
1,8192,1024
32,8192,1024
64,8192,1024
128,8192,1024
192,8192,1024
256,8192,1024
320,8192,1024
512,8192,1024
1024,8192,1024
2048,8192,1024
4096,8192,1024
8192,8192,1024
16384,8192,1024
16,1536,7168
32,1536,7168
64,1536,7168
128,1536,7168
256,1536,7168
512,1536,7168
1024,1536,7168
1536,1536,7168
2048,1536,7168
4096,1536,7168
8192,1536,7168
16384,1536,7168
20480,1536,7168
16,3072,1536
32,3072,1536
64,3072,1536
128,3072,1536
256,3072,1536
512,3072,1536
1024,3072,1536
1536,3072,1536
2048,3072,1536
4096,3072,1536
8192,3072,1536
16384,3072,1536
20480,3072,1536
16,576,7168
32,576,7168
64,576,7168
128,576,7168
256,576,7168
512,576,7168
1024,576,7168
1536,576,7168
2048,576,7168
4096,576,7168
8192,576,7168
16384,576,7168
20480,576,7168
16,7168,2048
32,7168,2048
64,7168,2048
128,7168,2048
256,7168,2048
512,7168,2048
1024,7168,2048
1536,7168,2048
2048,7168,2048
4096,7168,2048
8192,7168,2048
16384,7168,2048
20480,7168,2048
16,4608,7168
32,4608,7168
64,4608,7168
128,4608,7168
256,4608,7168
512,4608,7168
1024,4608,7168
1536,4608,7168
2048,4608,7168
4096,4608,7168
8192,4608,7168
16384,4608,7168
20480,4608,7168
16,7168,2304
32,7168,2304
64,7168,2304
128,7168,2304
256,7168,2304
512,7168,2304
1024,7168,2304
1536,7168,2304
2048,7168,2304
4096,7168,2304
8192,7168,2304
16384,7168,2304
20480,7168,2304
16,512,7168
32,512,7168
64,512,7168
128,512,7168
256,512,7168
512,512,7168
1024,512,7168
1536,512,7168
2048,512,7168
4096,512,7168
8192,512,7168
16384,512,7168
20480,512,7168
16,4096,512
32,4096,512
64,4096,512
128,4096,512
256,4096,512
512,4096,512
1024,4096,512
1536,4096,512
2048,4096,512
4096,4096,512
8192,4096,512
16384,4096,512
20480,4096,512
16,7168,256
32,7168,256
64,7168,256
128,7168,256
256,7168,256
512,7168,256
1024,7168,256
1536,7168,256
2048,7168,256
4096,7168,256
8192,7168,256
16384,7168,256
20480,7168,256
32, 7168, 1536
32, 7168, 576
32, 2048, 7168
32, 7168, 512
32, 256, 7168
64, 7168, 1536
64, 7168, 576
64, 2048, 7168
64, 7168, 512
64, 256, 7168
96, 7168, 1536
96, 7168, 576
96, 2048, 7168
96, 7168, 512
96, 256, 7168
128, 7168, 1536
128, 7168, 576
128, 2048, 7168
128, 7168, 512
128, 256, 7168
256, 7168, 1536
256, 7168, 576
256, 2048, 7168
256, 7168, 512
256, 256, 7168
512, 7168, 1536
512, 7168, 576
512, 2048, 7168
512, 7168, 512
512, 256, 7168
1024, 7168, 1536
1024, 7168, 576
1024, 2048, 7168
1024, 7168, 512
1024, 256, 7168
2048, 7168, 1536
2048, 7168, 576
2048, 2048, 7168
2048, 7168, 512
2048, 256, 7168
4096, 7168, 1536
4096, 7168, 576
4096, 2048, 7168
4096, 7168, 512
4096, 256, 7168
8192, 7168, 1536
8192, 7168, 576
8192, 2048, 7168
8192, 7168, 512
8192, 256, 7168
16384, 7168, 1536
16384, 7168, 576
16384, 2048, 7168
16384, 7168, 512
16384, 256, 7168
5112,6912,5120
5104,5120,8192
2048,4096,5120
5120,5120,4096
5120,5120,8192
32, 2112, 7168
64, 2112, 7168
96, 2112, 7168
128, 2112, 7168
256, 2112, 7168
512, 2112, 7168
1024, 2112, 7168
2048, 2112, 7168
4096, 2112, 7168
8192, 2112, 7168
16384, 2112, 7168
11 changes: 11 additions & 0 deletions aiter/jit/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,12 @@ def get_config_file(env_name, default_file, tuned_file_name):
"AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE",
f"{AITER_ROOT_DIR}/aiter/configs/a8w8_bpreshuffle_tuned_gemm.csv",
)

AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE = os.getenv(
"AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE",
f"{AITER_ROOT_DIR}/aiter/configs/a8w8_bpreshuffle_cktile_tuned_gemm.csv",
)

AITER_CONFIG_GEMM_A8W8_BLOCKSCALE = os.getenv(
"AITER_CONFIG_GEMM_A8W8_BLOCKSCALE",
f"{AITER_ROOT_DIR}/aiter/configs/a8w8_blockscale_tuned_gemm.csv",
Expand Down Expand Up @@ -197,6 +203,11 @@ def get_config_file(env_name, default_file, tuned_file_name):
AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE,
"a8w8_bpreshuffle_tuned_gemm",
)
AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE_FILE = get_config_file(
"AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE",
AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE,
"a8w8_bpreshuffle_cktile_tuned_gemm",
)
AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_FILE = get_config_file(
"AITER_CONFIG_GEMM_A8W8_BLOCKSCALE",
AITER_CONFIG_GEMM_A8W8_BLOCKSCALE,
Expand Down
37 changes: 37 additions & 0 deletions aiter/jit/optCompilerConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,25 @@
"hip_clang_path": "os.environ.get('FLATMM_HIP_CLANG_PATH')",
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_deepgemm/gen_instances.py --working_path {{}}'"
},
"module_gemm_a8w8_bpreshuffle_cktile": {
"srcs": [
"f'{AITER_CSRC_DIR}/pybind/gemm_a8w8_bpreshuffle_cktile_pybind.cu'",
"f'{AITER_CSRC_DIR}/py_itfs_cu/gemm_common.cu'",
"f'{AITER_CSRC_DIR}/cktile_gemm_a8w8_bpreshuffle/gemm_a8w8_bpreshuffle_cktile.cu'"
],
"flags_extra_cc": [],
"flags_extra_hip": [],
"extra_ldflags": "None",
"extra_include": [
"f'{AITER_CSRC_DIR}/cktile_gemm_a8w8_bpreshuffle/include'",
"f'{CK_DIR}/example/ck_tile/18_flatmm'"
],
"is_python_module": "True",
"is_standalone": "False",
"verbose": "False",
"hip_clang_path": "os.environ.get('FLATMM_HIP_CLANG_PATH')",
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/cktile_gemm_a8w8_bpreshuffle/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE_FILE}'"
},
"module_gemm_a8w8_asm": {
"srcs": [
"f'{AITER_CSRC_DIR}/pybind/gemm_a8w8_asm_pybind.cu'",
Expand Down Expand Up @@ -582,6 +601,24 @@
"is_standalone": "False",
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_gemm_a8w8_bpreshuffle/gen_instances.py --working_path {{}} --tune'"
},
"module_gemm_a8w8_bpreshuffle_cktile_tune": {
"srcs": [
"f'{AITER_CSRC_DIR}/pybind/gemm_a8w8_bpreshuffle_cktile_tune_pybind.cu'",
"f'{AITER_CSRC_DIR}/cktile_gemm_a8w8_bpreshuffle/gemm_a8w8_bpreshuffle_cktile_tune.cu'"
],
"flags_extra_cc": [],
"flags_extra_hip": [],
"extra_ldflags": "None",
"extra_include": [
"f'{AITER_CSRC_DIR}/cktile_gemm_a8w8_bpreshuffle/include'",
"f'{CK_DIR}/example/ck_tile/18_flatmm'"
],
"verbose": "False",
"hip_clang_path": "os.environ.get('FLATMM_HIP_CLANG_PATH')",
"is_python_module": "True",
"is_standalone": "False",
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/cktile_gemm_a8w8_bpreshuffle/gen_instances.py --working_path {{}} --tune'"
},
"module_aiter_operator": {
"srcs": [
"f'{AITER_CSRC_DIR}/pybind/aiter_operator_pybind.cu'",
Expand Down
Loading