Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ ENV PATH=/opt/rocm/bin:${PATH}
FROM requirements-drivers AS build-requirements

ARG GO_VERSION=1.22.6
ARG CMAKE_VERSION=3.26.4
ARG CMAKE_VERSION=3.31.10
ARG CMAKE_FROM_SOURCE=false
ARG TARGETARCH
ARG TARGETVARIANT
Expand Down
52 changes: 36 additions & 16 deletions backend/Dockerfile.llama-cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
ARG GRPC_VERSION=v1.65.0
ARG CMAKE_FROM_SOURCE=false
ARG CMAKE_VERSION=3.26.4
# CUDA Toolkit 13.x compatibility: CMake 3.31.9+ fixes toolchain detection/arch table issues
ARG CMAKE_VERSION=3.31.10

ENV MAKEFLAGS=${GRPC_MAKEFLAGS}

Expand All @@ -26,7 +27,7 @@ RUN apt-get update && \

# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
if [ "${CMAKE_FROM_SOURCE}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
Expand All @@ -50,6 +51,13 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
rm -rf /build

FROM ${BASE_IMAGE} AS builder
ARG CMAKE_FROM_SOURCE=false
ARG CMAKE_VERSION=3.31.10
# We can target specific CUDA ARCHITECTURES like --build-arg CUDA_DOCKER_ARCH='75;86;89;120'
ARG CUDA_DOCKER_ARCH
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
ARG CMAKE_ARGS
ENV CMAKE_ARGS=${CMAKE_ARGS}
ARG BACKEND=rerankers
ARG BUILD_TYPE
ENV BUILD_TYPE=${BUILD_TYPE}
Expand All @@ -70,6 +78,7 @@ RUN apt-get update && \
ccache git \
ca-certificates \
make \
pkg-config libcurl4-openssl-dev \
curl unzip \
libssl-dev wget && \
apt-get clean && \
Expand Down Expand Up @@ -189,7 +198,7 @@ EOT

# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
if [ "${CMAKE_FROM_SOURCE}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
Expand All @@ -205,19 +214,30 @@ COPY --from=grpc /opt/grpc /usr/local

COPY . /LocalAI

## Otherwise just run the normal build
RUN <<EOT bash
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-fallback && \
make llama-cpp-grpc && make llama-cpp-rpc-server; \
else \
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-avx && \
make llama-cpp-avx2 && \
make llama-cpp-avx512 && \
make llama-cpp-fallback && \
make llama-cpp-grpc && \
make llama-cpp-rpc-server; \
fi
RUN <<'EOT' bash
set -euxo pipefail

if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
rm -rf /LocalAI/backend/cpp/llama-cpp-*-build
fi

if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
cd /LocalAI/backend/cpp/llama-cpp
make llama-cpp-fallback
make llama-cpp-grpc
make llama-cpp-rpc-server
else
cd /LocalAI/backend/cpp/llama-cpp
make llama-cpp-avx
make llama-cpp-avx2
make llama-cpp-avx512
make llama-cpp-fallback
make llama-cpp-grpc
make llama-cpp-rpc-server
fi
EOT


Expand Down
4 changes: 2 additions & 2 deletions backend/cpp/llama-cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
set(TARGET grpc-server)
# CUDA Toolkit 13.x compatibility: CMake 3.31.9+ fixes toolchain detection/arch table issues
Copy link
Copy Markdown
Owner

@mudler mudler Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really needed? If we need to do that, it requires compiling Cmake in the build process. Doable, but adds to compilation time and CI times. If there is no specific reason to do it I would avoid to do so for now.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had trouble compiling for RTX 5060 (SM_120) and the only version that consistently worked was CMake 3.31.10. I tried multiple 4.0.x versions and lower CMake versions, but none succeeded. I’d prefer we standardize on 3.31.10 for now - it looks like the safest option, and PyTorch also uses it. Also worth noting: 3.31.9 includes a fix related to CUDA 13, which may be connected to what we’re seeing.

cmake_minimum_required(VERSION 3.31.10)
set(CMAKE_CXX_STANDARD 17)
cmake_minimum_required(VERSION 3.15)
set(TARGET grpc-server)
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
set(_REFLECTION grpc++_reflection)
Expand Down
14 changes: 7 additions & 7 deletions backend/cpp/llama-cpp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ BUILD_TYPE?=
NATIVE?=false
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
TARGET?=--target grpc-server
JOBS?=$(shell nproc)
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1)
ARCH?=$(shell uname -m)

# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
Expand Down Expand Up @@ -109,10 +109,10 @@ llama-cpp-avx: llama.cpp
$(info ${GREEN}I llama-cpp build info:avx${RESET})
ifeq ($(OS),Darwin)
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
else ifneq ($(filter $(ARCH),aarch64 arm64),)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any specific reason? I find ifeq more readable

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason here is:

  1. Skip BMI flags on Darwin (macOS)
  2. Skip BMI flags on ARM (aarch64/arm64)
  3. Add BMI flags on x86_64 (the else case)

The ifneq checks if ARCH matches aarch64 or arm64. When the filter finds a match, the result is non-empty, so we skip the flags.

I can change to ifeq if you prefer, but the logic would need to invert:

ifeq ($(OS),Darwin)
    # No BMI flags (Darwin)
else ifeq ($(filter $(ARCH),aarch64 arm64),)
    # This is x86_64 - ADD BMI flags here
else
    # This is ARM - NO BMI flags
endif

CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
else
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS=-mno-bmi2 -DCMAKE_CXX_FLAGS=-mno-bmi2" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
CFLAGS="-mno-bmi2" CXXFLAGS="-mno-bmi2" CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GGML_BMI does not exist: https://github.com/ggml-org/ggml/blob/ebc3a0f4a56be1c9424a89fbec09962ac34fde85/CMakeLists.txt#L155

Do we need also CFLAGS/CXXFLAGS? If don't let's drop it. GGML_BMI2 should be enough

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right here. -DGGML_BMI2=off alone works (as I tested in the successful build) - the CFLAGS/CXXFLAGS were just me being overly cautious after fighting with compiler flags for too long.

endif
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx

Expand All @@ -122,10 +122,10 @@ llama-cpp-fallback: llama.cpp
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
ifeq ($(OS),Darwin)
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
else ifneq ($(filter $(ARCH),aarch64 arm64),)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto about logic inversion

CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
else
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
CFLAGS="-mno-bmi -mno-bmi2" CXXFLAGS="-mno-bmi -mno-bmi2" CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto above

endif
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback

Expand All @@ -135,10 +135,10 @@ llama-cpp-grpc: llama.cpp
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
ifeq ($(OS),Darwin)
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
else ifneq ($(filter $(ARCH),aarch64 arm64),)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
else
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
CFLAGS="-mno-bmi -mno-bmi2" CXXFLAGS="-mno-bmi -mno-bmi2" CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

endif
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc

Expand Down
3 changes: 2 additions & 1 deletion backend/go/stablediffusion-ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
cmake_minimum_required(VERSION 3.12)
# CUDA Toolkit 13.x compatibility: CMake 3.31.9+ fixes toolchain detection/arch table issues
cmake_minimum_required(VERSION 3.31.10)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

project(gosd LANGUAGES C CXX)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

Expand Down
2 changes: 1 addition & 1 deletion backend/go/whisper/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.12)
cmake_minimum_required(VERSION 3.31.10)
project(gowhisper LANGUAGES C CXX)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
Expand Down
Loading