Skip to content

Commit a187ce8

Browse files
committed
Merge remote-tracking branch 'ggerganov/master'
* ggerganov/master: (72 commits) node : add win platform check for require path (ggml-org#3363) ci : update main-cuda.Dockerfile (ggml-org#3371) whisper : fixed crash in GPU device selection on multi-GPU systems (ggml-org#3372) wasm : change ggml model host to HF (ggml-org#3369) ruby : Add ruby binding for max_len (ggml-org#3365) stream.wasm : add language selection support (ggml-org#3354) whisper : reset conv scheduler when CoreML is used (ggml-org#3350) ggml : remove old kompute, cann (skip) (ggml-org#3349) talk-llama : sync llama.cpp sync : ggml vulkan : add fp16 support for the conv_2d kernel (llama/14872) vulkan: skip empty set_rows to avoid invalid API usage (llama/14860) HIP: Enable Matrix cores for MMQ Kernels, Enable stream-K for CDNA 3 (llama/14624) CANN: Implement GLU ops (llama/14884) musa: fix build warnings (unused variable) (llama/14869) ggml-cpu : disable GGML_NNPA by default due to instability (llama/14880) metal: SSM_SCAN performance (llama/14743) opencl: add fused `rms_norm_mul` (llama/14841) ggml : remove invalid portPos specifiers from dot files (llama/14838) rpc : check for null buffers in get/set/copy tensor endpoints (llama/14868) ...
2 parents 52c9807 + 040510a commit a187ce8

File tree

181 files changed

+9753
-10296
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

181 files changed

+9753
-10296
lines changed

.devops/main-cuda.Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
3-
ARG CUDA_VERSION=12.3.1
3+
ARG CUDA_VERSION=13.0.0
44
# Target the CUDA build image
55
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
66
# Target the CUDA runtime image
@@ -20,12 +20,12 @@ RUN apt-get update && \
2020
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
2121

2222
# Ref: https://stackoverflow.com/a/53464012
23-
ENV CUDA_MAIN_VERSION=12.3
23+
ENV CUDA_MAIN_VERSION=13.0
2424
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
2525

2626
COPY .. .
2727
# Enable cuBLAS
28-
RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1"
28+
RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES='75;80;86;90'"
2929

3030
RUN find /app/build -name "*.o" -delete && \
3131
find /app/build -name "*.a" -delete && \
@@ -34,7 +34,7 @@ RUN find /app/build -name "*.o" -delete && \
3434
rm -rf /app/build/_deps
3535

3636
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
37-
ENV CUDA_MAIN_VERSION=12.3
37+
ENV CUDA_MAIN_VERSION=13.0
3838
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
3939
WORKDIR /app
4040

.devops/main-musa.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
3-
ARG MUSA_VERSION=rc4.0.1
3+
ARG MUSA_VERSION=rc4.2.0
44
# Target the MUSA build image
5-
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-devel-ubuntu${UBUNTU_VERSION}
5+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
66
# Target the MUSA runtime image
7-
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-runtime-ubuntu${UBUNTU_VERSION}
7+
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
88

99
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
1010
WORKDIR /app

.github/workflows/build.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,25 @@ on:
66
- master
77
tags:
88
- 'v*'
9+
paths: ['.github/workflows/build.yml',
10+
'**/CMakeLists.txt',
11+
'**/Makefile',
12+
'**/*.mk',
13+
'**/*.cmake',
14+
'**/*.in',
15+
'**/*.h',
16+
'**/*.hpp',
17+
'**/*.c',
18+
'**/*.cpp',
19+
'**/*.cu',
20+
'**/*.cuh',
21+
'**/*.cl',
22+
'**/*.swift',
23+
'**/*.m',
24+
'**/*.mm',
25+
'**/*.metal',
26+
'**/*.comp',
27+
'**/*.java']
928
pull_request:
1029
types: [opened, synchronize, reopened]
1130
workflow_dispatch:

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ Run the inference examples as usual, for example:
386386
## Moore Threads GPU support
387387

388388
With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
389-
First, make sure you have installed `MUSA SDK rc4.0.1`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=4.0.1
389+
First, make sure you have installed `MUSA SDK rc4.2.0`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=4.2.0
390390

391391
Now build `whisper.cpp` with MUSA support:
392392

bindings/go/Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,16 @@ BUILD_DIR := build_go
1515
MODELS_DIR := models
1616
EXAMPLES_DIR := $(wildcard examples/*)
1717
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
18-
LIBRARY_PATH := $(abspath ../../${BUILD_DIR}/src:$(abspath ../../${BUILD_DIR}/ggml/src))
18+
LIBRARY_PATH := $(abspath ../../${BUILD_DIR}/src):$(abspath ../../${BUILD_DIR}/ggml/src)
1919

2020
ifeq ($(GGML_CUDA),1)
2121
LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
2222
BUILD_FLAGS := -ldflags "-extldflags '-lcudart -lcuda -lcublas'"
2323
endif
2424

2525
ifeq ($(UNAME_S),Darwin)
26-
EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
26+
LIBRARY_PATH := $(LIBRARY_PATH):$(abspath ../../${BUILD_DIR}/ggml/src/ggml-blas):$(abspath ../../${BUILD_DIR}/ggml/src/ggml-metal)
27+
EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit -lggml-metal -lggml-blas
2728
endif
2829

2930
all: clean whisper examples

bindings/go/whisper.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ import (
99
// CGO
1010

1111
/*
12-
#cgo LDFLAGS: -lwhisper -lggml -lggml-base -lggml-cpu -lm -lstdc++ -fopenmp
12+
#cgo LDFLAGS: -lwhisper -lggml -lggml-base -lggml-cpu -lm -lstdc++
13+
#cgo linux LDFLAGS: -fopenmp
14+
#cgo darwin LDFLAGS: -lggml-metal -lggml-blas
1315
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
1416
#include <whisper.h>
1517
#include <stdlib.h>

bindings/ruby/ext/ruby_whisper_params.c

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
rb_define_method(cParams, #param_name, ruby_whisper_params_get_ ## param_name, 0); \
2727
rb_define_method(cParams, #param_name "=", ruby_whisper_params_set_ ## param_name, 1);
2828

29-
#define RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT 35
29+
#define RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT 36
3030

3131
extern VALUE cParams;
3232
extern VALUE cVADParams;
@@ -49,6 +49,7 @@ static ID id_print_timestamps;
4949
static ID id_suppress_blank;
5050
static ID id_suppress_nst;
5151
static ID id_token_timestamps;
52+
static ID id_max_len;
5253
static ID id_split_on_word;
5354
static ID id_initial_prompt;
5455
static ID id_diarize;
@@ -514,6 +515,33 @@ ruby_whisper_params_set_token_timestamps(VALUE self, VALUE value)
514515
{
515516
BOOL_PARAMS_SETTER(self, token_timestamps, value)
516517
}
518+
519+
/*
520+
* max segment length in characters.
521+
*
522+
* call-seq:
523+
* max_len -> Integer
524+
*/
525+
static VALUE
526+
ruby_whisper_params_get_max_len(VALUE self)
527+
{
528+
ruby_whisper_params *rwp;
529+
TypedData_Get_Struct(self, ruby_whisper_params, &ruby_whisper_params_type, rwp);
530+
return INT2NUM(rwp->params.max_len);
531+
}
532+
/*
533+
* call-seq:
534+
* max_len = length -> length
535+
*/
536+
static VALUE
537+
ruby_whisper_params_set_max_len(VALUE self, VALUE value)
538+
{
539+
ruby_whisper_params *rwp;
540+
TypedData_Get_Struct(self, ruby_whisper_params, &ruby_whisper_params_type, rwp);
541+
rwp->params.max_len = NUM2INT(value);
542+
return value;
543+
}
544+
517545
/*
518546
* If true, split on word rather than on token (when used with max_len).
519547
*
@@ -1137,6 +1165,7 @@ ruby_whisper_params_initialize(int argc, VALUE *argv, VALUE self)
11371165
SET_PARAM_IF_SAME(suppress_blank)
11381166
SET_PARAM_IF_SAME(suppress_nst)
11391167
SET_PARAM_IF_SAME(token_timestamps)
1168+
SET_PARAM_IF_SAME(max_len)
11401169
SET_PARAM_IF_SAME(split_on_word)
11411170
SET_PARAM_IF_SAME(initial_prompt)
11421171
SET_PARAM_IF_SAME(offset)
@@ -1271,30 +1300,31 @@ init_ruby_whisper_params(VALUE *mWhisper)
12711300
DEFINE_PARAM(suppress_blank, 8)
12721301
DEFINE_PARAM(suppress_nst, 9)
12731302
DEFINE_PARAM(token_timestamps, 10)
1274-
DEFINE_PARAM(split_on_word, 11)
1275-
DEFINE_PARAM(initial_prompt, 12)
1276-
DEFINE_PARAM(diarize, 13)
1277-
DEFINE_PARAM(offset, 14)
1278-
DEFINE_PARAM(duration, 15)
1279-
DEFINE_PARAM(max_text_tokens, 16)
1280-
DEFINE_PARAM(temperature, 17)
1281-
DEFINE_PARAM(max_initial_ts, 18)
1282-
DEFINE_PARAM(length_penalty, 19)
1283-
DEFINE_PARAM(temperature_inc, 20)
1284-
DEFINE_PARAM(entropy_thold, 21)
1285-
DEFINE_PARAM(logprob_thold, 22)
1286-
DEFINE_PARAM(no_speech_thold, 23)
1287-
DEFINE_PARAM(new_segment_callback, 24)
1288-
DEFINE_PARAM(new_segment_callback_user_data, 25)
1289-
DEFINE_PARAM(progress_callback, 26)
1290-
DEFINE_PARAM(progress_callback_user_data, 27)
1291-
DEFINE_PARAM(encoder_begin_callback, 28)
1292-
DEFINE_PARAM(encoder_begin_callback_user_data, 29)
1293-
DEFINE_PARAM(abort_callback, 30)
1294-
DEFINE_PARAM(abort_callback_user_data, 31)
1295-
DEFINE_PARAM(vad, 32)
1296-
DEFINE_PARAM(vad_model_path, 33)
1297-
DEFINE_PARAM(vad_params, 34)
1303+
DEFINE_PARAM(max_len, 11)
1304+
DEFINE_PARAM(split_on_word, 12)
1305+
DEFINE_PARAM(initial_prompt, 13)
1306+
DEFINE_PARAM(diarize, 14)
1307+
DEFINE_PARAM(offset, 15)
1308+
DEFINE_PARAM(duration, 16)
1309+
DEFINE_PARAM(max_text_tokens, 17)
1310+
DEFINE_PARAM(temperature, 18)
1311+
DEFINE_PARAM(max_initial_ts, 19)
1312+
DEFINE_PARAM(length_penalty, 20)
1313+
DEFINE_PARAM(temperature_inc, 21)
1314+
DEFINE_PARAM(entropy_thold, 22)
1315+
DEFINE_PARAM(logprob_thold, 23)
1316+
DEFINE_PARAM(no_speech_thold, 24)
1317+
DEFINE_PARAM(new_segment_callback, 25)
1318+
DEFINE_PARAM(new_segment_callback_user_data, 26)
1319+
DEFINE_PARAM(progress_callback, 27)
1320+
DEFINE_PARAM(progress_callback_user_data, 28)
1321+
DEFINE_PARAM(encoder_begin_callback, 29)
1322+
DEFINE_PARAM(encoder_begin_callback_user_data, 30)
1323+
DEFINE_PARAM(abort_callback, 31)
1324+
DEFINE_PARAM(abort_callback_user_data, 32)
1325+
DEFINE_PARAM(vad, 33)
1326+
DEFINE_PARAM(vad_model_path, 34)
1327+
DEFINE_PARAM(vad_params, 35)
12981328

12991329
rb_define_method(cParams, "on_new_segment", ruby_whisper_params_on_new_segment, 0);
13001330
rb_define_method(cParams, "on_progress", ruby_whisper_params_on_progress, 0);

bindings/ruby/sig/whisper.rbs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ module Whisper
135135
?suppress_blank: boolish,
136136
?suppress_nst: boolish,
137137
?token_timestamps: boolish,
138+
?max_len: Integer,
138139
?split_on_word: boolish,
139140
?initial_prompt: string | nil,
140141
?diarize: boolish,
@@ -222,6 +223,12 @@ module Whisper
222223
#
223224
def token_timestamps: () -> (true | false)
224225

226+
def max_len=: (Integer) -> Integer
227+
228+
# max segment length in characters.
229+
#
230+
def max_len: () -> Integer
231+
225232
def split_on_word=: (boolish) -> boolish
226233

227234
# If true, split on word rather than on token (when used with max_len).

bindings/ruby/test/test_params.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class TestParams < TestBase
1313
:suppress_blank,
1414
:suppress_nst,
1515
:token_timestamps,
16+
:max_len,
1617
:split_on_word,
1718
:initial_prompt,
1819
:diarize,
@@ -139,6 +140,13 @@ def test_token_timestamps
139140
assert !@params.token_timestamps
140141
end
141142

143+
def test_max_len
144+
@params.max_len = 42
145+
assert_equal @params.max_len, 42
146+
@params.max_len = 0
147+
assert_equal @params.max_len, 0
148+
end
149+
142150
def test_split_on_word
143151
@params.split_on_word = true
144152
assert @params.split_on_word

build-xcframework.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ GGML_METAL_EMBED_LIBRARY=ON
1515
GGML_BLAS_DEFAULT=ON
1616
GGML_METAL_USE_BF16=ON
1717
GGML_OPENMP=OFF
18+
BUILD_STATIC_XCFRAMEWORK=${BUILD_STATIC_XCFRAMEWORK:-OFF}
1819

1920
COMMON_C_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
2021
COMMON_CXX_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
@@ -327,6 +328,15 @@ combine_static_libraries() {
327328
arch_flags+=" -arch $arch"
328329
done
329330

331+
332+
if [[ "${BUILD_STATIC_XCFRAMEWORK}" == "ON" ]]; then
333+
echo "Packaging static framework for ${platform}."
334+
mkdir -p "$(dirname "${base_dir}/${output_lib}")"
335+
cp "${temp_dir}/combined.a" "${base_dir}/${output_lib}"
336+
rm -rf "${temp_dir}"
337+
return
338+
fi
339+
330340
# Create dynamic library
331341
echo "Creating dynamic library for ${platform}."
332342
xcrun -sdk $sdk clang++ -dynamiclib \
@@ -529,6 +539,20 @@ combine_static_libraries "build-tvos-device" "Release-appletvos" "tvos" "false"
529539

530540
# Create XCFramework with correct debug symbols paths
531541
echo "Creating XCFramework..."
542+
543+
if [[ "${BUILD_STATIC_XCFRAMEWORK}" == "ON" ]]; then
544+
xcodebuild -create-xcframework \
545+
-framework $(pwd)/build-ios-sim/framework/whisper.framework \
546+
-framework $(pwd)/build-ios-device/framework/whisper.framework \
547+
-framework $(pwd)/build-macos/framework/whisper.framework \
548+
-framework $(pwd)/build-visionos/framework/whisper.framework \
549+
-framework $(pwd)/build-visionos-sim/framework/whisper.framework \
550+
-framework $(pwd)/build-tvos-device/framework/whisper.framework \
551+
-framework $(pwd)/build-tvos-sim/framework/whisper.framework \
552+
-output $(pwd)/build-apple/whisper.xcframework
553+
exit 0
554+
fi
555+
532556
xcodebuild -create-xcframework \
533557
-framework $(pwd)/build-ios-sim/framework/whisper.framework \
534558
-debug-symbols $(pwd)/build-ios-sim/dSYMs/whisper.dSYM \

0 commit comments

Comments
 (0)