Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f682374
ggml-blas: initial mmid impl
taronaeo Dec 11, 2025
19c8ec9
ggml-blas: fully working mmid
taronaeo Dec 11, 2025
1926e07
ggml-blas: code clean up
taronaeo Dec 11, 2025
61ee32d
tests: set tensor usage as weight for weight tensors
taronaeo Dec 14, 2025
9a14a09
ggml: rewrite ggml-blas
taronaeo Dec 14, 2025
aae6d1e
ggml-blas: fix invalid data access
taronaeo Dec 14, 2025
717531b
ggml-blas: add note
taronaeo Dec 14, 2025
4470579
ggml-blas: fix ne
taronaeo Dec 14, 2025
6dff031
ggml-blas: force dequant routine to use max logical cores
taronaeo Dec 14, 2025
e481be6
ggml-blas: move global blas n threads to set_n_threads
taronaeo Dec 14, 2025
7998d08
ggml-blas: bring back openmp
taronaeo Dec 14, 2025
75e506f
ggml-blas: clean up code
taronaeo Dec 14, 2025
10ce5e0
ggml-blas: more code formatting
taronaeo Dec 14, 2025
46dea5d
CODEOWNERS: add @taronaeo to blas backend [no ci]
taronaeo Dec 14, 2025
04ed19b
ggml-blas: further cleanup
taronaeo Dec 14, 2025
623e713
ggml-blas: fix memleak
taronaeo Dec 20, 2025
2ee4d5f
ggml-blas: fix graph realloc
taronaeo Dec 20, 2025
adbfbf9
ggml-blas: refactor backend
taronaeo Dec 20, 2025
265183d
ggml-blas: bring back out prod
taronaeo Dec 20, 2025
7729be2
ggml-blas: bring back openmp warnings
taronaeo Dec 20, 2025
dded9fe
ggml-blas: switch from cpu to blas buffer
taronaeo Dec 20, 2025
d216b62
ggml-blas: refactor min_batch to graph_compute
taronaeo Dec 21, 2025
ebef650
ggml-blas: relax batch constraints
taronaeo Dec 24, 2025
51c069a
ggml-blas: band-aid fix
taronaeo Dec 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
/ggml/cmake/ @ggerganov
/ggml/include/ @ggerganov
/ggml/src/ggml-common.h @ggerganov
/ggml/src/ggml-blas/ @taronaeo
/ggml/src/ggml-cpu/ @ggerganov
/ggml/src/ggml-cpu/spacemit/ @alex-spacemit
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
Expand Down
7 changes: 4 additions & 3 deletions ggml/src/ggml-blas/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ find_package(BLAS)
if (BLAS_FOUND)
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")

ggml_add_backend_library(ggml-blas
ggml-blas.cpp
)
file(GLOB GGML_SOURCES_BLAS "*.c" "*.cpp")
file(GLOB GGML_HEADERS_BLAS "*.h" "*.hpp")

ggml_add_backend_library(ggml-blas ${GGML_HEADERS_BLAS} ${GGML_SOURCES_BLAS})

if (${GGML_BLAS_VENDOR} MATCHES "Apple")
add_compile_definitions(ACCELERATE_NEW_LAPACK)
Expand Down
67 changes: 67 additions & 0 deletions ggml/src/ggml-blas/common.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#pragma once

#include "ggml.h"
#include "ggml-impl.h"
#include "ggml-backend-impl.h"

#include <vector>
#include <memory>
#include <future>

#if defined(GGML_BLAS_USE_ACCELERATE)
# include <Accelerate/Accelerate.h>
#elif defined(GGML_BLAS_USE_MKL)
# include <mkl.h>
#elif defined(GGML_BLAS_USE_BLIS)
# include <blis.h>
#elif defined(GGML_BLAS_USE_NVPL)
# include <nvpl_blas.h>
#else
# include <cblas.h>
#endif

#define GGML_BLAS_NAME "BLAS"
#define GGML_BLAS_VERSION GGML_BACKEND_API_VERSION

#ifdef __cplusplus
extern "C" {
#endif

struct ggml_backend_blas_buffer {
void * data; // dequantized data
size_t size; // ggml_nelements * sizeof(float)
};

struct ggml_backend_blas_buffer_context {
void * data;
size_t size;
std::vector<ggml_backend_blas_buffer *> buffers;

~ggml_backend_blas_buffer_context() {
ggml_aligned_free(data, size);
for (auto * extra : buffers) {
ggml_aligned_free(extra->data, extra->size);
delete extra;
}
}
};

struct ggml_backend_blas_buffer_type_context {
int n_threads;

#ifndef GGML_USE_OPENMP
std::vector<std::future<void>> tasks;
#endif
};

struct ggml_backend_blas_context {
int n_threads;
};

struct ggml_backend_blas_device_context {
char _dummy; // Prevent empty struct warning
};

#ifdef __cplusplus
}
#endif
Loading
Loading