Skip to content

Commit 2fb54d2

Browse files
ikawrakowIwan Kawrakow
andauthored
Fuse Q and K RoPE (ggml-org#980)
Co-authored-by: Iwan Kawrakow <[email protected]>
1 parent da5de88 commit 2fb54d2

File tree

3 files changed

+439
-1
lines changed

3 files changed

+439
-1
lines changed

ggml/src/ggml-cuda.cu

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3346,7 +3346,19 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
33463346
ggml_cuda_op_soft_cap_max(ctx, dst);
33473347
break;
33483348
case GGML_OP_ROPE:
3349-
ggml_cuda_op_rope(ctx, dst);
3349+
if (fusion && i + 2 < cgraph->n_nodes &&
3350+
cgraph->nodes[i+1]->op == GGML_OP_VIEW &&
3351+
cgraph->nodes[i+2]->op == GGML_OP_ROPE &&
3352+
ggml_cuda_op_rope_rope(ctx, dst, cgraph->nodes[i+2])) {
3353+
i += 2;
3354+
}
3355+
else if (fusion && i + 1 < cgraph->n_nodes &&
3356+
cgraph->nodes[i+1]->op == GGML_OP_ROPE &&
3357+
ggml_cuda_op_rope_rope(ctx, dst, cgraph->nodes[i+1])) {
3358+
i += 1;
3359+
} else {
3360+
ggml_cuda_op_rope(ctx, dst);
3361+
}
33503362
break;
33513363
case GGML_OP_ROPE_BACK:
33523364
ggml_cuda_op_rope_back(ctx, dst);

0 commit comments

Comments
 (0)