Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1092,10 +1092,16 @@ common_init_result::common_init_result(common_params & params) :
auto cparams = common_context_params_to_llama(params);

if (params.fit_params) {
LOG_INF("%s: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on\n", __func__);
const char * msg = "%s: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on\n";
// hack: make sure this message is shown on CLI in case errors occur during fitting
if (params.verbosity == LOG_LEVEL_ERROR) {
LOG_WRN(msg, __func__);
} else {
LOG_INF(msg, __func__);
}
llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target, params.fit_params_min_ctx,
params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
params.verbosity >= LOG_LEVEL_DEBUG ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
}

llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
Expand Down
34 changes: 32 additions & 2 deletions common/log.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ struct common_log {
file = nullptr;
prefix = false;
timestamps = false;
buffering = false;
running = false;
t_start = t_us();

Expand Down Expand Up @@ -156,6 +157,7 @@ struct common_log {

bool prefix;
bool timestamps;
bool buffering;
bool running;

int64_t t_start;
Expand All @@ -172,7 +174,7 @@ struct common_log {
void add(enum ggml_log_level level, const char * fmt, va_list args) {
std::lock_guard<std::mutex> lock(mtx);

if (!running) {
if (!running && !buffering) {
// discard messages while the worker thread is paused
return;
}
Expand Down Expand Up @@ -250,7 +252,7 @@ struct common_log {
void resume() {
std::lock_guard<std::mutex> lock(mtx);

if (running) {
if (running || buffering) {
return;
}

Expand Down Expand Up @@ -353,6 +355,26 @@ struct common_log {

this->timestamps = timestamps;
}

void set_buffering(bool buffering) {
{
std::lock_guard<std::mutex> lock(mtx);

this->buffering = buffering;
}
if (buffering) {
pause();
} else {
resume();
}
}

void drop() {
std::lock_guard<std::mutex> lock(mtx);

head = tail;
cv.notify_one();
}
};

//
Expand Down Expand Up @@ -412,6 +434,14 @@ void common_log_set_colors(struct common_log * log, log_colors colors) {
log->set_colors(true);
}

void common_log_buffering(struct common_log * log, bool buffering) {
log->set_buffering(buffering);
}

void common_log_drop(struct common_log * log) {
log->drop();
}

void common_log_set_prefix(struct common_log * log, bool prefix) {
log->set_prefix(prefix);
}
Expand Down
16 changes: 16 additions & 0 deletions common/log.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,22 @@ void common_log_set_prefix (struct common_log * log, bool prefix); // w
void common_log_set_timestamps(struct common_log * log, bool timestamps); // whether to output timestamps in the prefix
void common_log_flush (struct common_log * log); // flush all pending log messages

// Buffering log messages allows to only write log if we encouter an error later on
// This is useful for libraries where we want to avoid spamming the user with
// debug/info messages unless something goes wrong.
//
// example:
// common_log_buffering(log, true);
// ... do stuff ...
// if (error) {
// common_log_buffering(log, false); // also flushes the log
// }
// common_log_drop(log);
// common_log_buffering(log, false);

void common_log_buffering(struct common_log * log, bool buffering); // not thread-safe
void common_log_drop (struct common_log * log);

// helper macros for logging
// use these to avoid computing log arguments if the verbosity of the log is higher than the threshold
//
Expand Down
41 changes: 34 additions & 7 deletions tools/cli/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ struct cli_context {
int main(int argc, char ** argv) {
common_params params;

params.verbosity = LOG_LEVEL_ERROR; // by default, less verbose logs
// by default, less verbose logs
auto default_log_lvl = LOG_LEVEL_ERROR;
params.verbosity = default_log_lvl;

if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_CLI)) {
return 1;
Expand All @@ -173,6 +175,9 @@ int main(int argc, char ** argv) {
console::error("please use llama-completion instead\n");
}

// TODO @ngxson: we need this to have colors in log, will it have any side effects?
common_log_set_prefix(common_log_main(), true);
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JohannesGaessler the master version does not have this line so it doesn't show the color. Although technically say, the color is automatically enabled if stdout is a TTY.

Not quite sure why the color is tied to this set_prefix, probably a remnant from the past.


common_init();

// struct that contains llama context and inference
Expand Down Expand Up @@ -201,21 +206,43 @@ int main(int argc, char ** argv) {
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
#endif

console::log("\nLoading model... "); // followed by loading animation
console::spinner::start();
// prepare model loading
auto curr_log_level = params.verbosity;
bool use_default_log = curr_log_level == default_log_lvl;
if (use_default_log) {
common_log_buffering(common_log_main(), true);
common_log_set_verbosity_thold(LOG_LEVEL_WARN);
console::log("\nLoading model... "); // followed by loading animation
console::spinner::start();
}

if (!ctx_cli.ctx_server.load_model(params)) {
console::spinner::stop();
console::error("\nFailed to load the model\n");
if (use_default_log) {
console::error("\n----- ERROR -----\n");
console::spinner::stop();
console::log("\n");
common_log_buffering(common_log_main(), false);
}
console::error("\nFailed to load the model, see logs above\n");
return 1;
}

console::spinner::stop();
console::log("\n");
if (use_default_log) {
console::spinner::stop();
console::log("\n");
common_log_set_verbosity_thold(curr_log_level);
common_log_drop(common_log_main());
common_log_buffering(common_log_main(), false);
}

// start server main loop in a separate thread
std::thread inference_thread([&ctx_cli]() {
ctx_cli.ctx_server.start_loop();
});

// note: from this point onward, we're having 2 threads
// it is unsafe to call certain common_log functions that modify global state

auto inf = ctx_cli.ctx_server.get_info();
std::string modalities = "text";
if (inf.has_inp_image) {
Expand Down
Loading