Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
| [IBM zDNN](docs/backend/zDNN.md) | IBM Z & LinuxONE |
| [WebGPU [In Progress]](docs/build.md#webgpu) | All |
| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |
| [Hexagon [In Progress]](docs/backend/hexagon/README.md) | Snapdragon |
| [Hexagon [In Progress]](docs/backend/snapdragon/README.md) | Snapdragon |
| [VirtGPU](docs/backend/VirtGPU.md) | VirtGPU APIR |

## Obtaining and quantizing models
Expand Down
21 changes: 16 additions & 5 deletions ggml/src/ggml-hexagon/ggml-hexagon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1865,15 +1865,26 @@ static bool ggml_hexagon_supported_binary(const struct ggml_hexagon_session * se
const struct ggml_tensor * src1 = op->src[1];
const struct ggml_tensor * dst = op;

if (src0->type != GGML_TYPE_F32) {
return false;
if (src0->type == GGML_TYPE_F32) {
if (src1->type != GGML_TYPE_F32) {
return false;
}
if (dst->type != GGML_TYPE_F32) {
return false;
}
}
if (src1->type != GGML_TYPE_F32) {
return false;
else if (src0->type == GGML_TYPE_F16) {
if (src1->type != GGML_TYPE_F16) {
return false;
}
if (dst->type != GGML_TYPE_F16) {
return false;
}
}
if (dst->type != GGML_TYPE_F32) {
else {
return false;
}

if (!ggml_are_same_shape(src0, dst)) {
return false;
}
Expand Down
8 changes: 3 additions & 5 deletions ggml/src/ggml-hexagon/htp/act-ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -693,8 +693,8 @@ static int execute_op_activations_f32(struct htp_ops_context * octx) {
return HTP_STATUS_NO_SUPPORT;
}

const uint32_t n_threads = octx->n_threads;
const uint32_t src0_nrows = src0->ne[1] * src0->ne[2] * src0->ne[3];
const uint32_t n_threads = MIN(octx->n_threads, src0_nrows);

size_t src0_row_size = src0->nb[1];
size_t src1_row_size = src1->nb[1]; // zero bytes if src1 is not used
Expand Down Expand Up @@ -748,13 +748,11 @@ static int execute_op_activations_f32(struct htp_ops_context * octx) {
return HTP_STATUS_OK;
}

uint32_t n_jobs = MIN(n_threads, src0_nrows);

// Prepare context
struct htp_act_context actx;
actx.octx = octx;

actx.src0_nrows_per_thread = (src0_nrows + n_jobs - 1) / n_jobs;
actx.src0_nrows_per_thread = (src0_nrows + n_threads - 1) / n_threads;

actx.src0_row_size = src0_row_size;
actx.src1_row_size = src1_row_size;
Expand Down Expand Up @@ -794,7 +792,7 @@ static int execute_op_activations_f32(struct htp_ops_context * octx) {
actx.data_src1 = data_src1;
actx.data_dst = (uint8_t *) dst->data;

worker_pool_run_func(octx->ctx->worker_pool, act_op_func, &actx, n_jobs);
worker_pool_run_func(octx->ctx->worker_pool, act_op_func, &actx, n_threads);
return HTP_STATUS_OK;
}

Expand Down
12 changes: 6 additions & 6 deletions ggml/src/ggml-hexagon/htp/argsort-ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,9 @@ int op_argsort(struct htp_ops_context * octx) {
return HTP_STATUS_NO_SUPPORT;
}

const uint32_t total_rows = octx->src0.ne[1] * octx->src0.ne[2] * octx->src0.ne[3];
const uint32_t n_threads = MIN(total_rows, octx->n_threads);

// Allocate scratchpad
// We need 1 row of float + 1 row of int32 per thread.
uint32_t ne00 = octx->src0.ne[0];
Expand All @@ -251,7 +254,7 @@ int op_argsort(struct htp_ops_context * octx) {
// Make sure we round up to 256 for alignment requirements
spad_per_thread = hex_round_up(spad_per_thread, 256);

size_t total_spad_size = spad_per_thread * octx->n_threads;
size_t total_spad_size = spad_per_thread * n_threads;

if (octx->ctx->vtcm_size < total_spad_size) {
FARF(ERROR, "argsort: VTCM size too small. Needed %zu, have %zu", total_spad_size, octx->ctx->vtcm_size);
Expand All @@ -267,15 +270,12 @@ int op_argsort(struct htp_ops_context * octx) {
octx->dst.ne[0], octx->dst.ne[1], octx->dst.ne[2], octx->dst.ne[3],
octx->src0.data, octx->dst.data);

uint32_t total_rows = octx->src0.ne[1] * octx->src0.ne[2] * octx->src0.ne[3];
uint32_t n_jobs = MIN(total_rows, octx->n_threads);

struct htp_argsort_context actx;
actx.octx = octx;
actx.nrows_per_thread = (total_rows + n_jobs - 1) / n_jobs;
actx.nrows_per_thread = (total_rows + n_threads - 1) / n_threads;

// Run jobs
worker_pool_run_func(octx->ctx->worker_pool, htp_argsort_f32, &actx, n_jobs);
worker_pool_run_func(octx->ctx->worker_pool, htp_argsort_f32, &actx, n_threads);

return HTP_STATUS_OK;
}
194 changes: 129 additions & 65 deletions ggml/src/ggml-hexagon/htp/binary-ops.c

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions ggml/src/ggml-hexagon/htp/cpy-ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ static void cpy_work_func(unsigned int n, unsigned int i, void *data) {
int op_cpy(struct htp_ops_context * octx) {
cpy_preamble;

const uint32_t n_threads = MIN(nr, octx->n_threads);

struct htp_copy_context ct;
ct.octx = octx;

Expand All @@ -227,8 +229,7 @@ int op_cpy(struct htp_ops_context * octx) {
const bool transposed = (nb00 > nb01) || (nb0 > nb1);
const bool sameshape = !transposed && (ne00 == ne0 && ne01 == ne1 && ne02 == ne2 && ne03 == ne3);

const uint32_t n_jobs = MIN(nr, octx->n_threads);
ct.src0_nrows_per_thread = (nr + n_jobs - 1) / n_jobs;
ct.src0_nrows_per_thread = (nr + n_threads - 1) / n_threads;

if (sametype && sameshape) {
ct.copy = cpy_thread_sametype_sameshape;
Expand All @@ -245,7 +246,7 @@ int op_cpy(struct htp_ops_context * octx) {
return HTP_STATUS_NO_SUPPORT;
}

worker_pool_run_func(octx->ctx->worker_pool, cpy_work_func, &ct, n_jobs);
worker_pool_run_func(octx->ctx->worker_pool, cpy_work_func, &ct, n_threads);

return HTP_STATUS_OK;
}
7 changes: 4 additions & 3 deletions ggml/src/ggml-hexagon/htp/get-rows-ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ static void get_rows_thread_f32_f32(unsigned int nth, unsigned int ith, void *da
int op_get_rows(struct htp_ops_context * octx) {
get_rows_preamble;

const uint32_t n_threads = MIN(nr, octx->n_threads);

if (octx->src0.type != HTP_TYPE_F32) {
return HTP_STATUS_NO_SUPPORT;
}
Expand All @@ -103,9 +105,8 @@ int op_get_rows(struct htp_ops_context * octx) {
grctx.get_rows_div_ne10 = init_fastdiv_values(octx->src1.ne[0]);
grctx.get_rows_div_ne10_ne11 = init_fastdiv_values(octx->src1.ne[0] * octx->src1.ne[1]);

const uint32_t n_jobs = MIN(nr, octx->n_threads);
grctx.src1_nrows_per_thread = (nr + n_jobs - 1) / n_jobs;
grctx.src1_nrows_per_thread = (nr + n_threads - 1) / n_threads;

worker_pool_run_func(octx->ctx->worker_pool, get_rows_thread_f32_f32, &grctx, n_jobs);
worker_pool_run_func(octx->ctx->worker_pool, get_rows_thread_f32_f32, &grctx, n_threads);
return HTP_STATUS_OK;
}
Loading
Loading