Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,6 @@ poetry.toml
/run-vim.sh
/run-chat.sh
.ccache/

# emscripten
a.out.*
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just a.out* ?

6 changes: 5 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
if (EMSCRIPTEN)
set(BUILD_SHARED_LIBS_DEFAULT OFF)

option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" OFF)
option(LLAMA_BUILD_HTML "llama: build HTML file" ON)
if (LLAMA_BUILD_HTML)
set(CMAKE_EXECUTABLE_SUFFIX ".html")
endif()
else()
if (MINGW)
set(BUILD_SHARED_LIBS_DEFAULT OFF)
Expand Down
2 changes: 2 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,8 @@ std::string fs_get_cache_directory() {
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
#elif defined(_WIN32)
cache_directory = std::getenv("LOCALAPPDATA");
#elif defined(__EMSCRIPTEN__)
GGML_ABORT("not implemented on this platform");
#else
# error Unknown architecture
#endif
Expand Down
17 changes: 15 additions & 2 deletions ggml/src/ggml-webgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,28 @@ add_dependencies(ggml-webgpu generate_shaders)
if(EMSCRIPTEN)
set(EMDAWNWEBGPU_DIR "" CACHE PATH "Path to emdawnwebgpu_pkg")

target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
target_link_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
if(NOT EMDAWNWEBGPU_DIR)
# default built-in port
target_compile_options(ggml-webgpu PRIVATE "--use-port=emdawnwebgpu")
target_link_options(ggml-webgpu INTERFACE "--use-port=emdawnwebgpu" "-sASYNCIFY=1")
else()
# custom port
target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
target_link_options(ggml-webgpu INTERFACE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py" "-sASYNCIFY=1")
endif()

set(DawnWebGPU_TARGET webgpu_cpp)
else()
find_package(Dawn REQUIRED)
set(DawnWebGPU_TARGET dawn::webgpu_dawn)
endif()

if (GGML_WEBGPU_DEBUG)
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
if(EMSCRIPTEN)
target_compile_options(ggml-webgpu PRIVATE "-fexceptions")
target_link_options(ggml-webgpu INTERFACE "-sASSERTIONS=2" "-fexceptions")
endif()
endif()

target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR})
Expand Down
23 changes: 21 additions & 2 deletions ggml/src/ggml-webgpu/ggml-webgpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
#include "ggml-impl.h"
#include "ggml-wgsl-shaders.hpp"

#ifdef __EMSCRIPTEN__
#include <emscripten/emscripten.h>
#endif

#include <webgpu/webgpu_cpp.h>

#include <condition_variable>
Expand Down Expand Up @@ -1173,8 +1177,12 @@ static ggml_backend_dev_t ggml_backend_webgpu_reg_get_device(ggml_backend_reg_t
ctx->adapter.GetInfo(&info);

// Initialize device
std::vector<wgpu::FeatureName> required_features = { wgpu::FeatureName::ShaderF16,
wgpu::FeatureName::ImplicitDeviceSynchronization };
std::vector<wgpu::FeatureName> required_features = {
wgpu::FeatureName::ShaderF16,
#ifndef __EMSCRIPTEN__
wgpu::FeatureName::ImplicitDeviceSynchronization,
#endif
};
wgpu::DeviceDescriptor dev_desc;
dev_desc.requiredLimits = &ctx->limits;
dev_desc.requiredFeatures = required_features.data();
Expand Down Expand Up @@ -1287,6 +1295,17 @@ ggml_backend_reg_t ggml_backend_webgpu_reg() {
instance_descriptor.requiredFeatures = instance_features.data();
instance_descriptor.requiredFeatureCount = instance_features.size();
webgpu_ctx->instance = wgpu::CreateInstance(&instance_descriptor);

#ifdef __EMSCRIPTEN__
#ifndef __EMSCRIPTEN_PTHREADS__
GGML_LOG_WARN("ggml_webgpu: pthread is disabled. This may cause bugs\n");
#endif

if (webgpu_ctx->instance == nullptr) {
GGML_LOG_ERROR("ggml_webgpu: Failed to create WebGPU instance. Make sure -sASYNCIFY is set\n");
return nullptr;
}
#endif
GGML_ASSERT(webgpu_ctx->instance != nullptr);

static ggml_backend_reg reg = {
Expand Down
12 changes: 9 additions & 3 deletions tests/test-backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,18 @@
#include <thread>
#include <vector>

#ifdef __EMSCRIPTEN__
# define N_THREADS 1
#else
# define N_THREADS std::thread::hardware_concurrency()
#endif

static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
size_t nels = ggml_nelements(tensor);
std::vector<float> data(nels);
{
// parallel initialization
static const size_t n_threads = std::thread::hardware_concurrency();
static const size_t n_threads = N_THREADS;
// static RNG initialization (revisit if n_threads stops being constant)
static std::vector<std::default_random_engine> generators = []() {
std::random_device rd;
Expand Down Expand Up @@ -104,7 +110,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
};

const size_t min_blocks_per_thread = 1;
const size_t n_threads = std::min<size_t>(std::thread::hardware_concurrency()/2,
const size_t n_threads = std::min<size_t>(N_THREADS/2,
std::max<size_t>(1, n_blocks / min_blocks_per_thread));
std::vector<std::future<void>> tasks;
tasks.reserve(n_threads);
Expand Down Expand Up @@ -6934,7 +6940,7 @@ int main(int argc, char ** argv) {
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
if (ggml_backend_set_n_threads_fn) {
// TODO: better value for n_threads
ggml_backend_set_n_threads_fn(backend, std::thread::hardware_concurrency());
ggml_backend_set_n_threads_fn(backend, N_THREADS);
}

size_t free, total; // NOLINT
Expand Down
Loading