Skip to content

Commit d98b548

Browse files
pwilkinngxson
andauthored
Restore clip's cb() to its rightful glory - extract common debugging elements in llama (#17914)
* Extract common debugging functions; plug eval-callback and mtmd's MTMD_DEBUG_GRAPH with same functionality * Move to common * Remove unneeded header * Unlink from common * chore: update webui build output * Cleanup; properly pass params to mtmd without depending on common; factorize debug.cpp to use common debug code. * Revert change to webapp * Post-merge adjust * Apply suggestions from code review Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com> * Apply code review changes * Remove changes to server-context * Remove mtmd.h include * Remove utility functions from header * Apply suggestions from code review Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com> * Rename functions * Update tools/mtmd/clip.cpp Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com> * Update tools/mtmd/clip.cpp Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com> * Update tools/mtmd/clip.cpp Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com> --------- Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>
1 parent 8fb7175 commit d98b548

12 files changed

Lines changed: 259 additions & 396 deletions

File tree

common/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ add_library(${TARGET} STATIC
6060
common.h
6161
console.cpp
6262
console.h
63+
debug.cpp
64+
debug.h
6365
download.cpp
6466
download.h
6567
http.h

common/debug.cpp

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#include "debug.h"
2+
3+
#include "log.h"
4+
5+
#include <cmath>
6+
#include <string>
7+
8+
static std::string common_ggml_ne_string(const ggml_tensor * t) {
9+
std::string str;
10+
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
11+
str += std::to_string(t->ne[i]);
12+
if (i + 1 < GGML_MAX_DIMS) {
13+
str += ", ";
14+
}
15+
}
16+
return str;
17+
}
18+
19+
static float common_ggml_get_float_value(const uint8_t * data,
20+
ggml_type type,
21+
const size_t * nb,
22+
size_t i0,
23+
size_t i1,
24+
size_t i2,
25+
size_t i3) {
26+
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
27+
float v;
28+
if (type == GGML_TYPE_F16) {
29+
v = ggml_fp16_to_fp32(*(const ggml_fp16_t *) &data[i]);
30+
} else if (type == GGML_TYPE_F32) {
31+
v = *(const float *) &data[i];
32+
} else if (type == GGML_TYPE_I64) {
33+
v = (float) *(const int64_t *) &data[i];
34+
} else if (type == GGML_TYPE_I32) {
35+
v = (float) *(const int32_t *) &data[i];
36+
} else if (type == GGML_TYPE_I16) {
37+
v = (float) *(const int16_t *) &data[i];
38+
} else if (type == GGML_TYPE_I8) {
39+
v = (float) *(const int8_t *) &data[i];
40+
} else if (type == GGML_TYPE_BF16) {
41+
v = ggml_bf16_to_fp32(*(const ggml_bf16_t *) &data[i]);
42+
} else {
43+
GGML_ABORT("fatal error");
44+
}
45+
return v;
46+
}
47+
48+
template <bool abort>
49+
void common_debug_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) {
50+
GGML_ASSERT(n > 0);
51+
float sum = 0;
52+
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
53+
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
54+
for (int64_t i1 = 0; i1 < ne[1]; i1++) {
55+
for (int64_t i0 = 0; i0 < ne[0]; i0++) {
56+
const float v = common_ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
57+
sum += v;
58+
}
59+
}
60+
}
61+
}
62+
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
63+
LOG_ERR(" [\n");
64+
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
65+
if (i2 == n && ne[2] > 2 * n) {
66+
LOG_ERR(" ..., \n");
67+
i2 = ne[2] - n;
68+
}
69+
LOG_ERR(" [\n");
70+
for (int64_t i1 = 0; i1 < ne[1]; i1++) {
71+
if (i1 == n && ne[1] > 2 * n) {
72+
LOG_ERR(" ..., \n");
73+
i1 = ne[1] - n;
74+
}
75+
LOG_ERR(" [");
76+
for (int64_t i0 = 0; i0 < ne[0]; i0++) {
77+
if (i0 == n && ne[0] > 2 * n) {
78+
LOG_ERR("..., ");
79+
i0 = ne[0] - n;
80+
}
81+
const float v = common_ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
82+
LOG_ERR("%12.4f", v);
83+
if (i0 < ne[0] - 1) {
84+
LOG_ERR(", ");
85+
}
86+
}
87+
LOG_ERR("],\n");
88+
}
89+
LOG_ERR(" ],\n");
90+
}
91+
LOG_ERR(" ]\n");
92+
LOG_ERR(" sum = %f\n", sum);
93+
}
94+
95+
if constexpr (abort) {
96+
if (std::isnan(sum)) {
97+
LOG_ERR("encountered NaN - aborting\n");
98+
exit(0);
99+
}
100+
}
101+
}
102+
103+
/**
104+
* GGML operations callback during the graph execution.
105+
*
106+
* @param t current tensor
107+
* @param ask when ask is true, the scheduler wants to know if we are interested in data from this tensor
108+
* if we return true, a follow-up call will be made with ask=false in which we can do the actual collection.
109+
* see ggml_backend_sched_eval_callback
110+
* @param user_data user data to pass at each call back
111+
* @return true to receive data or continue the graph, false otherwise
112+
*/
113+
template <bool abort_on_nan> bool common_debug_cb_eval(struct ggml_tensor * t, bool ask, void * user_data) {
114+
auto * cb_data = (base_callback_data *) user_data;
115+
116+
const struct ggml_tensor * src0 = t->src[0];
117+
const struct ggml_tensor * src1 = t->src[1];
118+
119+
if (ask) {
120+
return true; // Always retrieve data
121+
}
122+
123+
bool matches_filter = cb_data->tensor_filters.empty();
124+
125+
if (!matches_filter) {
126+
for (const auto & filter : cb_data->tensor_filters) {
127+
if (std::regex_search(t->name, filter)) {
128+
matches_filter = true;
129+
break;
130+
}
131+
}
132+
}
133+
134+
char src1_str[128] = { 0 };
135+
if (src1) {
136+
snprintf(src1_str, sizeof(src1_str), "%s{%s}", src1->name, common_ggml_ne_string(src1).c_str());
137+
}
138+
139+
if (matches_filter) {
140+
LOG_ERR("%s: %24s = (%s) %10s(%s{%s}, %s}) = {%s}\n", __func__, t->name, ggml_type_name(t->type),
141+
ggml_op_desc(t), src0->name, common_ggml_ne_string(src0).c_str(), src1 ? src1_str : "",
142+
common_ggml_ne_string(t).c_str());
143+
}
144+
145+
const bool is_host = ggml_backend_buffer_is_host(t->buffer);
146+
147+
if (!is_host) {
148+
auto n_bytes = ggml_nbytes(t);
149+
cb_data->data.resize(n_bytes);
150+
ggml_backend_tensor_get(t, cb_data->data.data(), 0, n_bytes);
151+
}
152+
153+
if (!ggml_is_quantized(t->type) && matches_filter) {
154+
uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();
155+
common_debug_print_tensor<abort_on_nan>(data, t->type, t->ne, t->nb, 3);
156+
}
157+
158+
return true;
159+
}
160+
161+
// Explicit template instantiations
162+
template bool common_debug_cb_eval<false>(ggml_tensor *, bool, void *);
163+
template bool common_debug_cb_eval<true>(ggml_tensor *, bool, void *);
164+
template void common_debug_print_tensor<false>(uint8_t *, ggml_type, const int64_t *, const size_t *, int64_t);
165+
template void common_debug_print_tensor<true>(uint8_t *, ggml_type, const int64_t *, const size_t *, int64_t);

common/debug.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#pragma once
2+
#include "common.h"
3+
#include <string>
4+
#include <vector>
5+
#include <regex>
6+
7+
// common debug functions and structs
8+
9+
// Print a tensor's detailed data
10+
// data - the tensor's data in byte format
11+
// type - the tensor's quantization type
12+
// ne - the tensor dimensions array
13+
// nb - the tensor strides array
14+
// n - the number of rows/columns to fully print
15+
template <bool abort_on_nan> void common_debug_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n);
16+
17+
// Intended to use as callback for ggml_backend_sched_eval_callback
18+
// prints tensors that are processed in the computation graph
19+
// by default prints all tensors, but can be configured by creating a `base_callback_data` instance with
20+
// non-empty filter_patterns. See examples/debug.ccp for possible usage patterns
21+
// The template parameter determins whether an error should be thrown whenever a NaN is encountered
22+
// in a tensor (useful for stopping debug sessions on first erroneous tensor)
23+
// The callback data will be passed as the third parameter (user_data)
24+
template <bool abort_on_nan> bool common_debug_cb_eval(struct ggml_tensor * t, bool ask, void * user_data);
25+
struct base_callback_data {
26+
std::vector<uint8_t> data;
27+
std::vector<std::regex> tensor_filters;
28+
29+
base_callback_data() = default;
30+
31+
base_callback_data(common_params & params, const std::vector<std::string> & filter_patterns) {
32+
for (const auto & pattern : filter_patterns) {
33+
try {
34+
std::string anchored_pattern = "^" + pattern;
35+
tensor_filters.emplace_back(anchored_pattern, std::regex::optimize);
36+
} catch (const std::regex_error & e) {
37+
throw std::runtime_error("Invalid regex pattern '" + pattern + "': " + e.what());
38+
}
39+
}
40+
params.cb_eval = common_debug_cb_eval<false>;
41+
params.cb_eval_user_data = this;
42+
}
43+
};

docs/backend/hexagon/CMakeUserPresets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{
1+
{
22
"version": 4,
33
"configurePresets": [
44
{

0 commit comments

Comments
 (0)