@@ -12009,7 +12009,7 @@ static void ggml_cl_cumsum(ggml_backend_t backend, const ggml_tensor * src0, con
1200912009 const cl_ulong nbt1 = net0*nbt0;
1201012010 const cl_ulong nbt2 = net1*nbt1;
1201112011 const cl_ulong nbt3 = net2*nbt2;
12012-
12012+
1201312013 static ggml_cl_buffer tmp_buffer;
1201412014 tmp_buffer.allocate(backend_ctx->context, net0*ne01*ne02*ne03*sizeof(float));
1201512015
@@ -12034,7 +12034,7 @@ static void ggml_cl_cumsum(ggml_backend_t backend, const ggml_tensor * src0, con
1203412034 size_t local_work_size[] = { (size_t)nth, 1, 1};
1203512035
1203612036 backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst);
12037-
12037+
1203812038 if(ne00 > nth){
1203912039 cl_ulong offsett = 0;
1204012040 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &tmp_buffer.buffer));
@@ -12057,7 +12057,7 @@ static void ggml_cl_cumsum(ggml_backend_t backend, const ggml_tensor * src0, con
1205712057 backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst);
1205812058
1205912059 kernel = backend_ctx->kernel_cumsum_add;
12060-
12060+
1206112061 CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &tmp_buffer.buffer));
1206212062 CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extrad->data_device));
1206312063 CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_ulong), &offsetd));
0 commit comments