-
Notifications
You must be signed in to change notification settings - Fork 15.9k
Restore clip's cb() to its rightful glory - extract common debugging elements in llama #17914
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
d4f11d5
Extract common debugging functions; plug eval-callback and mtmd's MTM…
pwilkin 8bf7120
Move to common
pwilkin 0bb0625
Remove unneeded header
pwilkin 6cc552f
Unlink from common
pwilkin b2105a1
chore: update webui build output
pwilkin d52292c
Cleanup; properly pass params to mtmd without depending on common; fa…
pwilkin eb185fc
Revert change to webapp
pwilkin d60e338
Post-merge adjust
pwilkin 14f9bd9
Apply suggestions from code review
pwilkin 7d30c50
Apply code review changes
pwilkin 0020521
Remove changes to server-context
pwilkin 34f6925
Remove mtmd.h include
pwilkin 7db7c67
Remove utility functions from header
pwilkin cb52f64
Apply suggestions from code review
pwilkin fd506e6
Rename functions
pwilkin 16ee5d6
Update tools/mtmd/clip.cpp
pwilkin 7d9e0c4
Update tools/mtmd/clip.cpp
pwilkin 450c617
Update tools/mtmd/clip.cpp
pwilkin File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,165 @@ | ||
| #include "debug.h" | ||
|
|
||
| #include "log.h" | ||
|
|
||
| #include <cmath> | ||
| #include <string> | ||
|
|
||
| static std::string common_ggml_ne_string(const ggml_tensor * t) { | ||
| std::string str; | ||
| for (int i = 0; i < GGML_MAX_DIMS; ++i) { | ||
| str += std::to_string(t->ne[i]); | ||
| if (i + 1 < GGML_MAX_DIMS) { | ||
| str += ", "; | ||
| } | ||
| } | ||
| return str; | ||
| } | ||
|
|
||
| static float common_ggml_get_float_value(const uint8_t * data, | ||
| ggml_type type, | ||
| const size_t * nb, | ||
| size_t i0, | ||
| size_t i1, | ||
| size_t i2, | ||
| size_t i3) { | ||
| size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0]; | ||
| float v; | ||
| if (type == GGML_TYPE_F16) { | ||
| v = ggml_fp16_to_fp32(*(const ggml_fp16_t *) &data[i]); | ||
| } else if (type == GGML_TYPE_F32) { | ||
| v = *(const float *) &data[i]; | ||
| } else if (type == GGML_TYPE_I64) { | ||
| v = (float) *(const int64_t *) &data[i]; | ||
| } else if (type == GGML_TYPE_I32) { | ||
| v = (float) *(const int32_t *) &data[i]; | ||
| } else if (type == GGML_TYPE_I16) { | ||
| v = (float) *(const int16_t *) &data[i]; | ||
| } else if (type == GGML_TYPE_I8) { | ||
| v = (float) *(const int8_t *) &data[i]; | ||
| } else if (type == GGML_TYPE_BF16) { | ||
| v = ggml_bf16_to_fp32(*(const ggml_bf16_t *) &data[i]); | ||
| } else { | ||
| GGML_ABORT("fatal error"); | ||
| } | ||
| return v; | ||
| } | ||
|
|
||
| template <bool abort> | ||
| void common_debug_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) { | ||
| GGML_ASSERT(n > 0); | ||
| float sum = 0; | ||
| for (int64_t i3 = 0; i3 < ne[3]; i3++) { | ||
| for (int64_t i2 = 0; i2 < ne[2]; i2++) { | ||
| for (int64_t i1 = 0; i1 < ne[1]; i1++) { | ||
| for (int64_t i0 = 0; i0 < ne[0]; i0++) { | ||
| const float v = common_ggml_get_float_value(data, type, nb, i0, i1, i2, i3); | ||
| sum += v; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| for (int64_t i3 = 0; i3 < ne[3]; i3++) { | ||
| LOG_ERR(" [\n"); | ||
| for (int64_t i2 = 0; i2 < ne[2]; i2++) { | ||
| if (i2 == n && ne[2] > 2 * n) { | ||
| LOG_ERR(" ..., \n"); | ||
| i2 = ne[2] - n; | ||
| } | ||
| LOG_ERR(" [\n"); | ||
| for (int64_t i1 = 0; i1 < ne[1]; i1++) { | ||
| if (i1 == n && ne[1] > 2 * n) { | ||
| LOG_ERR(" ..., \n"); | ||
| i1 = ne[1] - n; | ||
| } | ||
| LOG_ERR(" ["); | ||
| for (int64_t i0 = 0; i0 < ne[0]; i0++) { | ||
| if (i0 == n && ne[0] > 2 * n) { | ||
| LOG_ERR("..., "); | ||
| i0 = ne[0] - n; | ||
| } | ||
| const float v = common_ggml_get_float_value(data, type, nb, i0, i1, i2, i3); | ||
| LOG_ERR("%12.4f", v); | ||
| if (i0 < ne[0] - 1) { | ||
| LOG_ERR(", "); | ||
| } | ||
| } | ||
| LOG_ERR("],\n"); | ||
| } | ||
| LOG_ERR(" ],\n"); | ||
| } | ||
| LOG_ERR(" ]\n"); | ||
| LOG_ERR(" sum = %f\n", sum); | ||
| } | ||
|
|
||
| if constexpr (abort) { | ||
| if (std::isnan(sum)) { | ||
| LOG_ERR("encountered NaN - aborting\n"); | ||
| exit(0); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * GGML operations callback during the graph execution. | ||
| * | ||
| * @param t current tensor | ||
| * @param ask when ask is true, the scheduler wants to know if we are interested in data from this tensor | ||
| * if we return true, a follow-up call will be made with ask=false in which we can do the actual collection. | ||
| * see ggml_backend_sched_eval_callback | ||
| * @param user_data user data to pass at each call back | ||
| * @return true to receive data or continue the graph, false otherwise | ||
| */ | ||
| template <bool abort_on_nan> bool common_debug_cb_eval(struct ggml_tensor * t, bool ask, void * user_data) { | ||
| auto * cb_data = (base_callback_data *) user_data; | ||
|
|
||
| const struct ggml_tensor * src0 = t->src[0]; | ||
| const struct ggml_tensor * src1 = t->src[1]; | ||
|
|
||
| if (ask) { | ||
| return true; // Always retrieve data | ||
| } | ||
|
|
||
| bool matches_filter = cb_data->tensor_filters.empty(); | ||
|
|
||
| if (!matches_filter) { | ||
| for (const auto & filter : cb_data->tensor_filters) { | ||
| if (std::regex_search(t->name, filter)) { | ||
| matches_filter = true; | ||
| break; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| char src1_str[128] = { 0 }; | ||
| if (src1) { | ||
| snprintf(src1_str, sizeof(src1_str), "%s{%s}", src1->name, common_ggml_ne_string(src1).c_str()); | ||
| } | ||
|
|
||
| if (matches_filter) { | ||
| LOG_ERR("%s: %24s = (%s) %10s(%s{%s}, %s}) = {%s}\n", __func__, t->name, ggml_type_name(t->type), | ||
| ggml_op_desc(t), src0->name, common_ggml_ne_string(src0).c_str(), src1 ? src1_str : "", | ||
| common_ggml_ne_string(t).c_str()); | ||
| } | ||
|
|
||
| const bool is_host = ggml_backend_buffer_is_host(t->buffer); | ||
|
|
||
| if (!is_host) { | ||
| auto n_bytes = ggml_nbytes(t); | ||
| cb_data->data.resize(n_bytes); | ||
| ggml_backend_tensor_get(t, cb_data->data.data(), 0, n_bytes); | ||
| } | ||
|
|
||
| if (!ggml_is_quantized(t->type) && matches_filter) { | ||
| uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data(); | ||
| common_debug_print_tensor<abort_on_nan>(data, t->type, t->ne, t->nb, 3); | ||
| } | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| // Explicit template instantiations | ||
| template bool common_debug_cb_eval<false>(ggml_tensor *, bool, void *); | ||
| template bool common_debug_cb_eval<true>(ggml_tensor *, bool, void *); | ||
| template void common_debug_print_tensor<false>(uint8_t *, ggml_type, const int64_t *, const size_t *, int64_t); | ||
| template void common_debug_print_tensor<true>(uint8_t *, ggml_type, const int64_t *, const size_t *, int64_t); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| #pragma once | ||
| #include "common.h" | ||
| #include <string> | ||
| #include <vector> | ||
| #include <regex> | ||
|
|
||
| // common debug functions and structs | ||
|
|
||
| // Print a tensor's detailed data | ||
| // data - the tensor's data in byte format | ||
| // type - the tensor's quantization type | ||
| // ne - the tensor dimensions array | ||
| // nb - the tensor strides array | ||
| // n - the number of rows/columns to fully print | ||
| template <bool abort_on_nan> void common_debug_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n); | ||
|
|
||
| // Intended to use as callback for ggml_backend_sched_eval_callback | ||
| // prints tensors that are processed in the computation graph | ||
| // by default prints all tensors, but can be configured by creating a `base_callback_data` instance with | ||
| // non-empty filter_patterns. See examples/debug.ccp for possible usage patterns | ||
| // The template parameter determins whether an error should be thrown whenever a NaN is encountered | ||
| // in a tensor (useful for stopping debug sessions on first erroneous tensor) | ||
| // The callback data will be passed as the third parameter (user_data) | ||
| template <bool abort_on_nan> bool common_debug_cb_eval(struct ggml_tensor * t, bool ask, void * user_data); | ||
| struct base_callback_data { | ||
| std::vector<uint8_t> data; | ||
| std::vector<std::regex> tensor_filters; | ||
|
|
||
| base_callback_data() = default; | ||
|
|
||
| base_callback_data(common_params & params, const std::vector<std::string> & filter_patterns) { | ||
| for (const auto & pattern : filter_patterns) { | ||
| try { | ||
| std::string anchored_pattern = "^" + pattern; | ||
| tensor_filters.emplace_back(anchored_pattern, std::regex::optimize); | ||
| } catch (const std::regex_error & e) { | ||
| throw std::runtime_error("Invalid regex pattern '" + pattern + "': " + e.what()); | ||
| } | ||
| } | ||
| params.cb_eval = common_debug_cb_eval<false>; | ||
| params.cb_eval_user_data = this; | ||
| } | ||
| }; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| { | ||
| { | ||
| "version": 4, | ||
| "configurePresets": [ | ||
| { | ||
|
|
||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.