Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion test/test-sets/ref/metrics.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"total_predict_calls":3,"total_learn_calls":3,"sfm_count_learn_example_with_shared":3,"cbea_labeled_ex":3,"cbea_predict_in_learn":0,"cbea_label_first_action":0,"cbea_label_not_first":3,"cbea_non_zero_cost":2,"cbea_min_actions":12,"cbea_max_actions":12,"total_log_calls":0,"cbea_sum_cost":-2.0,"cbea_sum_cost_baseline":0.0,"cbea_avg_feat_per_event":1156.0,"cbea_avg_actions_per_event":12.0,"cbea_avg_ns_per_event":73.0,"cbea_avg_feat_per_action":96.0,"cbea_avg_ns_per_action":6.0,"number_skipped_events":0,"number_events_zero_actions":0,"line_parse_error":0,"first_event_id":"0074434d3a3a46529f65de8a59631939","first_event_time":"","last_event_id":"9077f996581148978a0ebe2484260dab","last_event_time":""}
{"total_predict_calls":3,"total_learn_calls":3,"sfm_count_learn_example_with_shared":3,"cbea_labeled_ex":3,"cbea_predict_in_learn":0,"cbea_label_first_action":0,"cbea_label_not_first":3,"cbea_non_zero_cost":2,"cbea_min_actions":12,"cbea_max_actions":12,"total_log_calls":0,"cbea_sum_cost":-2.0,"cbea_sum_cost_baseline":0.0,"cbea_avg_feat_per_event":1156.0,"cbea_avg_actions_per_event":12.0,"cbea_avg_ns_per_event":73.0,"cbea_avg_feat_per_action":96.0,"cbea_avg_ns_per_action":6.0,"number_skipped_events":0,"number_events_zero_actions":0,"line_parse_error":0,"first_event_id":"0074434d3a3a46529f65de8a59631939","first_event_time":"","last_event_id":"9077f996581148978a0ebe2484260dab","last_event_time":"","dsjson_sum_cost_original":0.0}
2 changes: 1 addition & 1 deletion test/test-sets/ref/metrics_ccb.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"total_predict_calls":0,"total_learn_calls":59,"sfm_count_learn_example_with_shared":66,"cbea_labeled_ex":66,"cbea_predict_in_learn":0,"cbea_label_first_action":16,"cbea_label_not_first":50,"cbea_non_zero_cost":41,"cbea_min_actions":3,"cbea_max_actions":4,"total_log_calls":0,"cbea_sum_cost":-28.560001373291017,"cbea_sum_cost_baseline":-9.260000228881836,"cbea_avg_feat_per_event":0.0,"cbea_avg_actions_per_event":3.0,"cbea_avg_ns_per_event":0.0,"cbea_avg_feat_per_action":0.0,"cbea_avg_ns_per_action":0.0,"number_skipped_events":0,"number_events_zero_actions":0,"line_parse_error":0,"first_event_id":"0000000","first_event_time":"2021-04-26T17:23:28.1726275","last_event_id":"0000058","last_event_time":"2021-04-26T17:23:35.3057067"}
{"total_predict_calls":0,"total_learn_calls":59,"sfm_count_learn_example_with_shared":66,"cbea_labeled_ex":66,"cbea_predict_in_learn":0,"cbea_label_first_action":16,"cbea_label_not_first":50,"cbea_non_zero_cost":41,"cbea_min_actions":3,"cbea_max_actions":4,"total_log_calls":0,"cbea_sum_cost":-28.560001373291017,"cbea_sum_cost_baseline":-9.260000228881836,"cbea_avg_feat_per_event":0.0,"cbea_avg_actions_per_event":3.0,"cbea_avg_ns_per_event":0.0,"cbea_avg_feat_per_action":0.0,"cbea_avg_ns_per_action":0.0,"number_skipped_events":0,"number_events_zero_actions":0,"line_parse_error":0,"first_event_id":"0000000","first_event_time":"2021-04-26T17:23:28.1726275","last_event_id":"0000058","last_event_time":"2021-04-26T17:23:35.3057067","dsjson_sum_cost_original":-25.360000610351564}
2 changes: 1 addition & 1 deletion test/test-sets/ref/metrics_ccb_noevent.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"total_predict_calls":0,"total_learn_calls":59,"sfm_count_learn_example_with_shared":66,"cbea_labeled_ex":66,"cbea_predict_in_learn":0,"cbea_label_first_action":16,"cbea_label_not_first":50,"cbea_non_zero_cost":41,"cbea_min_actions":3,"cbea_max_actions":4,"total_log_calls":0,"cbea_sum_cost":-28.560001373291017,"cbea_sum_cost_baseline":-9.260000228881836,"cbea_avg_feat_per_event":0.0,"cbea_avg_actions_per_event":3.0,"cbea_avg_ns_per_event":0.0,"cbea_avg_feat_per_action":0.0,"cbea_avg_ns_per_action":0.0,"number_skipped_events":0,"number_events_zero_actions":0,"line_parse_error":0,"first_event_id":"","first_event_time":"2021-04-26T17:23:28.1726275","last_event_id":"","last_event_time":"2021-04-26T17:23:35.3057067"}
{"total_predict_calls":0,"total_learn_calls":59,"sfm_count_learn_example_with_shared":66,"cbea_labeled_ex":66,"cbea_predict_in_learn":0,"cbea_label_first_action":16,"cbea_label_not_first":50,"cbea_non_zero_cost":41,"cbea_min_actions":3,"cbea_max_actions":4,"total_log_calls":0,"cbea_sum_cost":-28.560001373291017,"cbea_sum_cost_baseline":-9.260000228881836,"cbea_avg_feat_per_event":0.0,"cbea_avg_actions_per_event":3.0,"cbea_avg_ns_per_event":0.0,"cbea_avg_feat_per_action":0.0,"cbea_avg_ns_per_action":0.0,"number_skipped_events":0,"number_events_zero_actions":0,"line_parse_error":0,"first_event_id":"","first_event_time":"2021-04-26T17:23:28.1726275","last_event_id":"","last_event_time":"2021-04-26T17:23:35.3057067","dsjson_sum_cost_original":-25.360000610351564}
2 changes: 1 addition & 1 deletion test/test-sets/ref/metrics_ccb_parse_error.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"total_predict_calls":0,"total_learn_calls":68,"sfm_count_learn_example_with_shared":75,"cbea_labeled_ex":75,"cbea_predict_in_learn":0,"cbea_label_first_action":18,"cbea_label_not_first":57,"cbea_non_zero_cost":47,"cbea_min_actions":3,"cbea_max_actions":4,"total_log_calls":2,"cbea_sum_cost":-32.81999969482422,"cbea_sum_cost_baseline":-11.260000228881836,"cbea_avg_feat_per_event":0.0,"cbea_avg_actions_per_event":3.0,"cbea_avg_ns_per_event":0.0,"cbea_avg_feat_per_action":0.0,"cbea_avg_ns_per_action":0.0,"number_skipped_events":0,"number_events_zero_actions":0,"line_parse_error":2,"first_event_id":"0000000","first_event_time":"2021-04-26T17:23:28.1726275","last_event_id":"0000058","last_event_time":"2021-04-26T17:23:35.3057067"}
{"total_predict_calls":0,"total_learn_calls":68,"sfm_count_learn_example_with_shared":75,"cbea_labeled_ex":75,"cbea_predict_in_learn":0,"cbea_label_first_action":18,"cbea_label_not_first":57,"cbea_non_zero_cost":47,"cbea_min_actions":3,"cbea_max_actions":4,"total_log_calls":2,"cbea_sum_cost":-32.81999969482422,"cbea_sum_cost_baseline":-11.260000228881836,"cbea_avg_feat_per_event":0.0,"cbea_avg_actions_per_event":3.0,"cbea_avg_ns_per_event":0.0,"cbea_avg_feat_per_action":0.0,"cbea_avg_ns_per_action":0.0,"number_skipped_events":0,"number_events_zero_actions":0,"line_parse_error":2,"first_event_id":"0000000","first_event_time":"2021-04-26T17:23:28.1726275","last_event_id":"0000058","last_event_time":"2021-04-26T17:23:35.3057067","dsjson_sum_cost_original":-29.6200008392334}
2 changes: 1 addition & 1 deletion test/test-sets/ref/metrics_skip.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"total_predict_calls":0,"total_learn_calls":0,"sfm_count_learn_example_with_shared":0,"cbea_labeled_ex":0,"cbea_predict_in_learn":0,"cbea_label_first_action":0,"cbea_label_not_first":0,"cbea_non_zero_cost":0,"total_log_calls":0,"cbea_sum_cost":0.0,"cbea_sum_cost_baseline":0.0,"number_skipped_events":1,"number_events_zero_actions":6,"line_parse_error":0,"first_event_id":"35cd1185d30a48c68fb50e0aef936afd-2NEhC","first_event_time":"2019-01-08T00:00:00.6480000Z","last_event_id":"4f237fdc7ef844de8ac7f2278a526a0b-2aw0J","last_event_time":""}
{"total_predict_calls":0,"total_learn_calls":0,"sfm_count_learn_example_with_shared":0,"cbea_labeled_ex":0,"cbea_predict_in_learn":0,"cbea_label_first_action":0,"cbea_label_not_first":0,"cbea_non_zero_cost":0,"total_log_calls":0,"cbea_sum_cost":0.0,"cbea_sum_cost_baseline":0.0,"number_skipped_events":1,"number_events_zero_actions":6,"line_parse_error":0,"first_event_id":"35cd1185d30a48c68fb50e0aef936afd-2NEhC","first_event_time":"2019-01-08T00:00:00.6480000Z","last_event_id":"4f237fdc7ef844de8ac7f2278a526a0b-2aw0J","last_event_time":"","dsjson_sum_cost_original":0.0}
2 changes: 1 addition & 1 deletion test/test-sets/ref/metrics_time.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"total_predict_calls":23,"total_learn_calls":23,"sfm_count_learn_example_with_shared":23,"cbea_labeled_ex":23,"cbea_predict_in_learn":0,"cbea_label_first_action":9,"cbea_label_not_first":14,"cbea_non_zero_cost":12,"cbea_min_actions":4,"cbea_max_actions":4,"total_log_calls":0,"cbea_sum_cost":-12.0,"cbea_sum_cost_baseline":-6.0,"cbea_avg_feat_per_event":32.0,"cbea_avg_actions_per_event":4.0,"cbea_avg_ns_per_event":14.0,"cbea_avg_feat_per_action":8.0,"cbea_avg_ns_per_action":3.0,"number_skipped_events":0,"number_events_zero_actions":0,"line_parse_error":0,"first_event_id":"13118d9b4c114f8485d9dec417e3aefe","first_event_time":"2021-02-04T16:31:29.2460000Z","last_event_id":"36ae045ca6694633ba32bd4e0ea1a263","last_event_time":"2021-02-04T16:31:47.3190000Z"}
{"total_predict_calls":23,"total_learn_calls":23,"sfm_count_learn_example_with_shared":23,"cbea_labeled_ex":23,"cbea_predict_in_learn":0,"cbea_label_first_action":9,"cbea_label_not_first":14,"cbea_non_zero_cost":12,"cbea_min_actions":4,"cbea_max_actions":4,"total_log_calls":0,"cbea_sum_cost":-12.0,"cbea_sum_cost_baseline":-6.0,"cbea_avg_feat_per_event":32.0,"cbea_avg_actions_per_event":4.0,"cbea_avg_ns_per_event":14.0,"cbea_avg_feat_per_action":8.0,"cbea_avg_ns_per_action":3.0,"number_skipped_events":0,"number_events_zero_actions":0,"line_parse_error":0,"first_event_id":"13118d9b4c114f8485d9dec417e3aefe","first_event_time":"2021-02-04T16:31:29.2460000Z","last_event_id":"36ae045ca6694633ba32bd4e0ea1a263","last_event_time":"2021-02-04T16:31:47.3190000Z","dsjson_sum_cost_original":-12.0}
1 change: 1 addition & 0 deletions vowpalwabbit/json_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ struct DecisionServiceInteraction
std::vector<unsigned> actions;
std::vector<float> probabilities;
float probabilityOfDrop = 0.f;
float originalLabelCost = 0.f;
bool skipLearn{false};
};

Expand Down
2 changes: 2 additions & 0 deletions vowpalwabbit/metrics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ void list_to_json_file(dsjson_metrics* ds_metrics, std::string filename, metric_
writer.String(ds_metrics->LastEventId.c_str());
writer.Key("last_event_time");
writer.String(ds_metrics->LastEventTime.c_str());
writer.Key("dsjson_sum_cost_original");
writer.Double(ds_metrics->DsjsonSumCostOriginal);
}

writer.EndObject();
Expand Down
28 changes: 28 additions & 0 deletions vowpalwabbit/parse_example_json.h
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,16 @@ class DefaultState : public BaseState<audit>
return &ctx.array_float_state;
}

else if (ctx.key_length == 20 && !strncmp(str, "_original_label_cost", 20))
{
if(!ctx.decision_service_data) {
THROW("_original_label_cost is only valid in DSJson");
}
ctx.float_state.output_float = &ctx.decision_service_data->originalLabelCost;
ctx.float_state.return_state = this;
return &ctx.float_state;
}

else if (ctx.key_length == 5 && !_stricmp(ctx.key, "__aid"))
{
ctx.uint_dedup_state.return_state = this;
Expand Down Expand Up @@ -1428,6 +1438,12 @@ class DecisionServiceState : public BaseState<audit>
ctx.array_float_state.return_state = this;
return &ctx.array_float_state;
}
else if (length == 20 && !strncmp(str, "_original_label_cost", 20))
{
ctx.float_state.output_float = &data->originalLabelCost;
ctx.float_state.return_state = this;
return &ctx.float_state;
}
}

// ignore unknown properties
Expand Down Expand Up @@ -1465,6 +1481,11 @@ struct Context
VW::example_factory_t example_factory;
void* example_factory_context;

// TODO: This shouldn't really exist in the Context. Once the JSON parser
// gets refactored to separate the VWJson/DSJson concepts, this should
// be moved into the DSJson version of the context
DecisionServiceInteraction* decision_service_data = nullptr;

// states
DefaultState<audit> default_state;
LabelState<audit> label_state;
Expand Down Expand Up @@ -1698,6 +1719,7 @@ bool read_line_decision_service_json(vw& all, v_array<example*>& examples, char*
VWReaderHandler<audit>& handler = parser.handler;
handler.init(&all, &examples, &ss, line + length, example_factory, ex_factory_context);
handler.ctx.SetStartStateToDecisionService(data);
handler.ctx.decision_service_data = data;

ParseResult result =
parser.reader.template Parse<kParseInsituFlag, InsituStringStream, VWReaderHandler<audit>>(ss, handler);
Expand Down Expand Up @@ -1763,6 +1785,12 @@ bool parse_line_json(vw* all, char* line, size_t num_chars, v_array<example*>& e
else
all->example_parser->metrics->LastEventTime = std::move(interaction.timestamp);
}

// Technically the aggregation operation here is supposed to be user-defined
// but according to Casey, the only operation used is Sum
// The _original_label_cost element is found either at the top level OR under
// the _outcomes node (for CCB)
all->example_parser->metrics->DsjsonSumCostOriginal += interaction.originalLabelCost;
}

// TODO: In refactoring the parser to be usable standalone, we need to ensure that we
Expand Down
7 changes: 7 additions & 0 deletions vowpalwabbit/parse_slates_example_json.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ void parse_slates_example_dsjson(vw& all, v_array<example*>& examples, char* lin
auto& current_obj = outcomes[i];
auto& destination = slot_examples[i]->l.slates.probabilities;
auto& actions = current_obj["_a"];

if (actions.GetType() == rapidjson::kNumberType) { destination.push_back({actions.GetUint(), 0.f}); }
else if (actions.GetType() == rapidjson::kArrayType)
{
Expand Down Expand Up @@ -291,6 +292,12 @@ void parse_slates_example_dsjson(vw& all, v_array<example*>& examples, char* lin
{
assert(false);
}

if (current_obj.HasMember("_original_label_cost"))
{
assert(current_obj["_original_label_cost"].IsFloat());
data->originalLabelCost = current_obj["_original_label_cost"].GetFloat();
}
}

for (const auto& slot : slot_examples)
Expand Down
1 change: 1 addition & 0 deletions vowpalwabbit/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ struct dsjson_metrics
size_t NumberOfSkippedEvents = 0;
size_t NumberOfEventsZeroActions = 0;
size_t LineParseError = 0;
float DsjsonSumCostOriginal = 0.f;
std::string FirstEventId;
std::string FirstEventTime;
std::string LastEventId;
Expand Down