From 750ef29255dffdb65de4ad27f7a6aed957c60f9b Mon Sep 17 00:00:00 2001 From: Emanuele Torre Date: Mon, 25 Nov 2024 07:47:14 +0100 Subject: [PATCH 1/2] builtin.c: typecheck builtin cfunctions in function_list In C23 (default C standard used by GCC 15), jv (*fptr)(); has become equivalent to jv (*fptr)(void); so we can no longer assign builtin implemenations directly to the fptr member of cfunctions without generating a compile error. Since there does not seem to be any straight-forward way to tell autoconf to force the compiler to use C99 short of explicitly adding -std=c99 to CFLAGS, it is probably a cleaner solution to just make the code C23 compatible. A possible solution could have been to just redeclare cfunction.fptr as void*, but then the functions' return type would not have been type checked (e.g. if you tried to add a {printf, "printf", 2}, where printf is a function that does not return jv, the compiler wouldn't have complained.) We were already not typechecking the arguments of the functions, so e.g. {binop_plus, "_plus", 3}, /* instead of {f_plus, "_plus, 3}, */ {f_setpath, "setpath", 4}, /* instead of {f_setpath, "setpath", 3}, */ compile without errors despite not having the correct prototype. So I thought of instead improving the situation by redefining cfunction.fptr as a union of function pointers with the prototypes that the jq bytecode interpreter can call, and use a macro to add the builtin functions to function_list using to the arity argument to assign the implementation function to the appropriate union member. Now the code won't compile if the wrong arity, or an arity not supported by the bytecode interpreter (>5 = 1input+4arguments), or a prototype not jallable by the bytecode interpreter (e.g. binop_plus that doesn't expect a jq_state* argument). Also, the code now compiles with gcc -std=c23. Fixes #3206 --- src/builtin.c | 131 +++++++++++++++++++++++++------------------------ src/bytecode.h | 11 +++-- src/execute.c | 11 ++--- 3 files changed, 80 insertions(+), 73 deletions(-) diff --git a/src/builtin.c b/src/builtin.c index dbd72f3de1..d9dfdf427c 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -1790,85 +1790,88 @@ static jv f_have_decnum(jq_state *jq, jv a) { #endif } +#define CFUNC(func, name, arity) \ + {.fptr = { .a ## arity = func }, name, arity} + #define LIBM_DD(name) \ - {f_ ## name, #name, 1}, + CFUNC(f_ ## name, #name, 1), #define LIBM_DD_NO(name) LIBM_DD(name) #define LIBM_DA(name, type) LIBM_DD(name) #define LIBM_DA_NO(name, type) LIBM_DD(name) #define LIBM_DDD(name) \ - {f_ ## name, #name, 3}, + CFUNC(f_ ## name, #name, 3), #define LIBM_DDD_NO(name) LIBM_DDD(name) #define LIBM_DDDD(name) \ - {f_ ## name, #name, 4}, + CFUNC(f_ ## name, #name, 4), #define LIBM_DDDD_NO(name) LIBM_DDDD(name) static const struct cfunction function_list[] = { #include "libm.h" - {f_negate, "_negate", 1}, -#define BINOP(name) {f_ ## name, "_" #name, 3}, + CFUNC(f_negate, "_negate", 1), +#define BINOP(name) CFUNC(f_ ## name, "_" #name, 3), BINOPS #undef BINOP - {f_dump, "tojson", 1}, - {f_json_parse, "fromjson", 1}, - {f_tonumber, "tonumber", 1}, - {f_tostring, "tostring", 1}, - {f_keys, "keys", 1}, - {f_keys_unsorted, "keys_unsorted", 1}, - {f_startswith, "startswith", 2}, - {f_endswith, "endswith", 2}, - {f_string_split, "split", 2}, - {f_string_explode, "explode", 1}, - {f_string_implode, "implode", 1}, - {f_string_indexes, "_strindices", 2}, - {f_string_trim, "trim", 1}, - {f_string_ltrim, "ltrim", 1}, - {f_string_rtrim, "rtrim", 1}, - {f_setpath, "setpath", 3}, // FIXME typechecking - {f_getpath, "getpath", 2}, - {f_delpaths, "delpaths", 2}, - {f_has, "has", 2}, - {f_contains, "contains", 2}, - {f_length, "length", 1}, - {f_utf8bytelength, "utf8bytelength", 1}, - {f_type, "type", 1}, - {f_isinfinite, "isinfinite", 1}, - {f_isnan, "isnan", 1}, - {f_isnormal, "isnormal", 1}, - {f_infinite, "infinite", 1}, - {f_nan, "nan", 1}, - {f_sort, "sort", 1}, - {f_sort_by_impl, "_sort_by_impl", 2}, - {f_group_by_impl, "_group_by_impl", 2}, - {f_min, "min", 1}, - {f_max, "max", 1}, - {f_min_by_impl, "_min_by_impl", 2}, - {f_max_by_impl, "_max_by_impl", 2}, - {f_error, "error", 1}, - {f_format, "format", 2}, - {f_env, "env", 1}, - {f_halt, "halt", 1}, - {f_halt_error, "halt_error", 2}, - {f_get_search_list, "get_search_list", 1}, - {f_get_prog_origin, "get_prog_origin", 1}, - {f_get_jq_origin, "get_jq_origin", 1}, - {f_match, "_match_impl", 4}, - {f_modulemeta, "modulemeta", 1}, - {f_input, "input", 1}, - {f_debug, "debug", 1}, - {f_stderr, "stderr", 1}, - {f_strptime, "strptime", 2}, - {f_strftime, "strftime", 2}, - {f_strflocaltime, "strflocaltime", 2}, - {f_mktime, "mktime", 1}, - {f_gmtime, "gmtime", 1}, - {f_localtime, "localtime", 1}, - {f_now, "now", 1}, - {f_current_filename, "input_filename", 1}, - {f_current_line, "input_line_number", 1}, - {f_have_decnum, "have_decnum", 1}, - {f_have_decnum, "have_literal_numbers", 1}, + CFUNC(f_dump, "tojson", 1), + CFUNC(f_json_parse, "fromjson", 1), + CFUNC(f_tonumber, "tonumber", 1), + CFUNC(f_tostring, "tostring", 1), + CFUNC(f_keys, "keys", 1), + CFUNC(f_keys_unsorted, "keys_unsorted", 1), + CFUNC(f_startswith, "startswith", 2), + CFUNC(f_endswith, "endswith", 2), + CFUNC(f_string_split, "split", 2), + CFUNC(f_string_explode, "explode", 1), + CFUNC(f_string_implode, "implode", 1), + CFUNC(f_string_indexes, "_strindices", 2), + CFUNC(f_string_trim, "trim", 1), + CFUNC(f_string_ltrim, "ltrim", 1), + CFUNC(f_string_rtrim, "rtrim", 1), + CFUNC(f_setpath, "setpath", 3), + CFUNC(f_getpath, "getpath", 2), + CFUNC(f_delpaths, "delpaths", 2), + CFUNC(f_has, "has", 2), + CFUNC(f_contains, "contains", 2), + CFUNC(f_length, "length", 1), + CFUNC(f_utf8bytelength, "utf8bytelength", 1), + CFUNC(f_type, "type", 1), + CFUNC(f_isinfinite, "isinfinite", 1), + CFUNC(f_isnan, "isnan", 1), + CFUNC(f_isnormal, "isnormal", 1), + CFUNC(f_infinite, "infinite", 1), + CFUNC(f_nan, "nan", 1), + CFUNC(f_sort, "sort", 1), + CFUNC(f_sort_by_impl, "_sort_by_impl", 2), + CFUNC(f_group_by_impl, "_group_by_impl", 2), + CFUNC(f_min, "min", 1), + CFUNC(f_max, "max", 1), + CFUNC(f_min_by_impl, "_min_by_impl", 2), + CFUNC(f_max_by_impl, "_max_by_impl", 2), + CFUNC(f_error, "error", 1), + CFUNC(f_format, "format", 2), + CFUNC(f_env, "env", 1), + CFUNC(f_halt, "halt", 1), + CFUNC(f_halt_error, "halt_error", 2), + CFUNC(f_get_search_list, "get_search_list", 1), + CFUNC(f_get_prog_origin, "get_prog_origin", 1), + CFUNC(f_get_jq_origin, "get_jq_origin", 1), + CFUNC(f_match, "_match_impl", 4), + CFUNC(f_modulemeta, "modulemeta", 1), + CFUNC(f_input, "input", 1), + CFUNC(f_debug, "debug", 1), + CFUNC(f_stderr, "stderr", 1), + CFUNC(f_strptime, "strptime", 2), + CFUNC(f_strftime, "strftime", 2), + CFUNC(f_strflocaltime, "strflocaltime", 2), + CFUNC(f_mktime, "mktime", 1), + CFUNC(f_gmtime, "gmtime", 1), + CFUNC(f_localtime, "localtime", 1), + CFUNC(f_now, "now", 1), + CFUNC(f_current_filename, "input_filename", 1), + CFUNC(f_current_line, "input_line_number", 1), + CFUNC(f_have_decnum, "have_decnum", 1), + CFUNC(f_have_decnum, "have_literal_numbers", 1), }; #undef LIBM_DDDD_NO #undef LIBM_DDD_NO diff --git a/src/bytecode.h b/src/bytecode.h index 150198526c..a4055f594c 100644 --- a/src/bytecode.h +++ b/src/bytecode.h @@ -2,7 +2,7 @@ #define BYTECODE_H #include -#include "jv.h" +#include "jq.h" typedef enum { #define OP(name, imm, in, out) name, @@ -44,9 +44,14 @@ struct opcode_description { const struct opcode_description* opcode_describe(opcode op); -#define MAX_CFUNCTION_ARGS 10 +#define MAX_CFUNCTION_ARGS 4 struct cfunction { - jv (*fptr)(); + union { + jv (*a1)(jq_state *, jv); + jv (*a2)(jq_state *, jv, jv); + jv (*a3)(jq_state *, jv, jv, jv); + jv (*a4)(jq_state *, jv, jv, jv, jv); + } fptr; const char* name; int nargs; }; diff --git a/src/execute.c b/src/execute.c index cf255e49ec..6db30bc16a 100644 --- a/src/execute.c +++ b/src/execute.c @@ -917,14 +917,13 @@ jv jq_next(jq_state *jq) { } struct cfunction* function = &frame_current(jq)->bc->globals->cfunctions[*pc++]; switch (function->nargs) { - case 1: top = ((jv (*)(jq_state *, jv))function->fptr)(jq, in[0]); break; - case 2: top = ((jv (*)(jq_state *, jv, jv))function->fptr)(jq, in[0], in[1]); break; - case 3: top = ((jv (*)(jq_state *, jv, jv, jv))function->fptr)(jq, in[0], in[1], in[2]); break; - case 4: top = ((jv (*)(jq_state *, jv, jv, jv, jv))function->fptr)(jq, in[0], in[1], in[2], in[3]); break; - case 5: top = ((jv (*)(jq_state *, jv, jv, jv, jv, jv))function->fptr)(jq, in[0], in[1], in[2], in[3], in[4]); break; + case 1: top = function->fptr.a1(jq, in[0]); break; + case 2: top = function->fptr.a2(jq, in[0], in[1]); break; + case 3: top = function->fptr.a3(jq, in[0], in[1], in[2]); break; + case 4: top = function->fptr.a4(jq, in[0], in[1], in[2], in[3]); break; // FIXME: a) up to 7 arguments (input + 6), b) should assert // because the compiler should not generate this error. - default: return jv_invalid_with_msg(jv_string("Function takes too many arguments")); + default: return jv_invalid_with_msg(jv_string("Function takes too many arguments")) } if (jv_is_valid(top)) { From ab3173afa42fca97b1ae768aac5eb32b658b3e83 Mon Sep 17 00:00:00 2001 From: Emanuele Torre Date: Mon, 25 Nov 2024 18:59:08 +0100 Subject: [PATCH 2/2] jq_next: simplify CALL_BUILTIN implementation --- src/execute.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/src/execute.c b/src/execute.c index 6db30bc16a..356e6dfd02 100644 --- a/src/execute.c +++ b/src/execute.c @@ -341,8 +341,6 @@ static void set_error(jq_state *jq, jv value) { #define ON_BACKTRACK(op) ((op)+NUM_OPCODES) jv jq_next(jq_state *jq) { - jv cfunc_input[MAX_CFUNCTION_ARGS]; - jv_nomem_handler(jq->nomem_handler, jq->nomem_handler_data); uint16_t* pc = stack_restore(jq); @@ -909,33 +907,27 @@ jv jq_next(jq_state *jq) { case CALL_BUILTIN: { int nargs = *pc++; - jv top = stack_pop(jq); - jv* in = cfunc_input; - in[0] = top; - for (int i = 1; i < nargs; i++) { - in[i] = stack_pop(jq); - } struct cfunction* function = &frame_current(jq)->bc->globals->cfunctions[*pc++]; + jv in[MAX_CFUNCTION_ARGS]; + for (int i = 0; i < nargs; ++i) + in[i] = stack_pop(jq); + + jv top; switch (function->nargs) { case 1: top = function->fptr.a1(jq, in[0]); break; case 2: top = function->fptr.a2(jq, in[0], in[1]); break; case 3: top = function->fptr.a3(jq, in[0], in[1], in[2]); break; case 4: top = function->fptr.a4(jq, in[0], in[1], in[2], in[3]); break; - // FIXME: a) up to 7 arguments (input + 6), b) should assert - // because the compiler should not generate this error. - default: return jv_invalid_with_msg(jv_string("Function takes too many arguments")) + default: assert(0 && "Invalid number of arguments"); } - if (jv_is_valid(top)) { - stack_push(jq, top); - } else if (jv_invalid_has_msg(jv_copy(top))) { - set_error(jq, top); - goto do_backtrack; - } else { - // C-coded function returns invalid w/o msg? -> backtrack, as if - // it had returned `empty` + if (!jv_is_valid(top)) { + if (jv_invalid_has_msg(jv_copy(top))) + set_error(jq, top); goto do_backtrack; } + + stack_push(jq, top); break; }