diff --git a/src/llama-load-tensors.cpp b/src/llama-load-tensors.cpp index 659343b9a..7733dc2ea 100644 --- a/src/llama-load-tensors.cpp +++ b/src/llama-load-tensors.cpp @@ -312,7 +312,9 @@ ggml_context * create_tensors_helper::get_context_for_tensor(ggml_context * ctx, for (const auto * overrides = ml.tensor_buft_overrides; overrides->pattern != nullptr; ++overrides) { std::regex pattern(overrides->pattern); if (std::regex_search(name, pattern)) { - LLAMA_LOG_INFO("Tensor %s buffer type overriden to %s\n", name.c_str(), ggml_backend_buft_name(overrides->buft)); + const struct ggml_tensor * cur = ml.get_tensor_meta(name.c_str()); + const size_t nbytes = cur ? ggml_nbytes(cur) : 0; + LLAMA_LOG_INFO("Tensor %s (size = %.2f MiB) buffer type overriden to %s\n", name.c_str(), nbytes/1024./1024., ggml_backend_buft_name(overrides->buft)); ctx = ctx_for_buft(overrides->buft); break; } diff --git a/src/llama.cpp b/src/llama.cpp index 4442e2dd9..5d37d606b 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -2209,7 +2209,7 @@ static bool llm_load_tensors( // print memory requirements for (ggml_backend_buffer_t buf : model.bufs) { - LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0); + LLAMA_LOG_DEBUG("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0); } // populate tensors_by_name