diff --git a/common/common.cpp b/common/common.cpp index d4e8c7405eb..138424d14ac 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1092,10 +1092,16 @@ common_init_result::common_init_result(common_params & params) : auto cparams = common_context_params_to_llama(params); if (params.fit_params) { - LOG_INF("%s: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on\n", __func__); + const char * msg = "%s: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on\n"; + // hack: make sure this message is shown on CLI in case errors occur during fitting + if (params.verbosity == LOG_LEVEL_ERROR) { + LOG_WRN(msg, __func__); + } else { + LOG_INF(msg, __func__); + } llama_params_fit(params.model.path.c_str(), &mparams, &cparams, params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target, params.fit_params_min_ctx, - params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR); + params.verbosity >= LOG_LEVEL_DEBUG ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR); } llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams); diff --git a/common/log.cpp b/common/log.cpp index b17d2b62c35..6cf5daa8e50 100644 --- a/common/log.cpp +++ b/common/log.cpp @@ -125,6 +125,7 @@ struct common_log { file = nullptr; prefix = false; timestamps = false; + buffering = false; running = false; t_start = t_us(); @@ -156,6 +157,7 @@ struct common_log { bool prefix; bool timestamps; + bool buffering; bool running; int64_t t_start; @@ -172,7 +174,7 @@ struct common_log { void add(enum ggml_log_level level, const char * fmt, va_list args) { std::lock_guard lock(mtx); - if (!running) { + if (!running && !buffering) { // discard messages while the worker thread is paused return; } @@ -250,7 +252,7 @@ struct common_log { void resume() { std::lock_guard lock(mtx); - if (running) { + if (running || buffering) { return; } @@ -353,6 +355,26 @@ struct common_log { this->timestamps = timestamps; } + + void set_buffering(bool buffering) { + { + std::lock_guard lock(mtx); + + this->buffering = buffering; + } + if (buffering) { + pause(); + } else { + resume(); + } + } + + void drop() { + std::lock_guard lock(mtx); + + head = tail; + cv.notify_one(); + } }; // @@ -412,6 +434,14 @@ void common_log_set_colors(struct common_log * log, log_colors colors) { log->set_colors(true); } +void common_log_buffering(struct common_log * log, bool buffering) { + log->set_buffering(buffering); +} + +void common_log_drop(struct common_log * log) { + log->drop(); +} + void common_log_set_prefix(struct common_log * log, bool prefix) { log->set_prefix(prefix); } diff --git a/common/log.h b/common/log.h index f0f8471b5f4..d6681e4dabb 100644 --- a/common/log.h +++ b/common/log.h @@ -86,6 +86,22 @@ void common_log_set_prefix (struct common_log * log, bool prefix); // w void common_log_set_timestamps(struct common_log * log, bool timestamps); // whether to output timestamps in the prefix void common_log_flush (struct common_log * log); // flush all pending log messages +// Buffering log messages allows to only write log if we encouter an error later on +// This is useful for libraries where we want to avoid spamming the user with +// debug/info messages unless something goes wrong. +// +// example: +// common_log_buffering(log, true); +// ... do stuff ... +// if (error) { +// common_log_buffering(log, false); // also flushes the log +// } +// common_log_drop(log); +// common_log_buffering(log, false); + +void common_log_buffering(struct common_log * log, bool buffering); // not thread-safe +void common_log_drop (struct common_log * log); + // helper macros for logging // use these to avoid computing log arguments if the verbosity of the log is higher than the threshold // diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp index 128679d020c..76f71eab3c7 100644 --- a/tools/cli/cli.cpp +++ b/tools/cli/cli.cpp @@ -161,7 +161,9 @@ struct cli_context { int main(int argc, char ** argv) { common_params params; - params.verbosity = LOG_LEVEL_ERROR; // by default, less verbose logs + // by default, less verbose logs + auto default_log_lvl = LOG_LEVEL_ERROR; + params.verbosity = default_log_lvl; if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_CLI)) { return 1; @@ -173,6 +175,9 @@ int main(int argc, char ** argv) { console::error("please use llama-completion instead\n"); } + // TODO @ngxson: we need this to have colors in log, will it have any side effects? + common_log_set_prefix(common_log_main(), true); + common_init(); // struct that contains llama context and inference @@ -201,21 +206,43 @@ int main(int argc, char ** argv) { SetConsoleCtrlHandler(reinterpret_cast(console_ctrl_handler), true); #endif - console::log("\nLoading model... "); // followed by loading animation - console::spinner::start(); + // prepare model loading + auto curr_log_level = params.verbosity; + bool use_default_log = curr_log_level == default_log_lvl; + if (use_default_log) { + common_log_buffering(common_log_main(), true); + common_log_set_verbosity_thold(LOG_LEVEL_WARN); + console::log("\nLoading model... "); // followed by loading animation + console::spinner::start(); + } + if (!ctx_cli.ctx_server.load_model(params)) { - console::spinner::stop(); - console::error("\nFailed to load the model\n"); + if (use_default_log) { + console::error("\n----- ERROR -----\n"); + console::spinner::stop(); + console::log("\n"); + common_log_buffering(common_log_main(), false); + } + console::error("\nFailed to load the model, see logs above\n"); return 1; } - console::spinner::stop(); - console::log("\n"); + if (use_default_log) { + console::spinner::stop(); + console::log("\n"); + common_log_set_verbosity_thold(curr_log_level); + common_log_drop(common_log_main()); + common_log_buffering(common_log_main(), false); + } + // start server main loop in a separate thread std::thread inference_thread([&ctx_cli]() { ctx_cli.ctx_server.start_loop(); }); + // note: from this point onward, we're having 2 threads + // it is unsafe to call certain common_log functions that modify global state + auto inf = ctx_cli.ctx_server.get_info(); std::string modalities = "text"; if (inf.has_inp_image) {