diff --git a/external_parser/unit_tests/test_files/README.md b/external_parser/unit_tests/test_files/README.md index 381cd3010..cf2f3a51b 100644 --- a/external_parser/unit_tests/test_files/README.md +++ b/external_parser/unit_tests/test_files/README.md @@ -25,6 +25,9 @@ Residing under `valid_joined_logs` - average_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind cb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) - ccb_simple.log: generated by running `python joiner.py --problem_type_config 2` on the above files (ccb_v2.fb, fi-reward_v2.fb) and renaming the resulting default `merged.log` - ccb_sum_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind ccb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) +- ca_loop_simple.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) +- ca_loop_mixed_skip_learn.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3 --deferred_action_count 1` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) +- ca_loop_simple_e2e.log: generated by running `./example_gen --kind ca-loop --count 3 --config_file output/perfrlclient.json` and performing binary join with `python joiner.py --problem_type_config 4` - cb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 1 --learning_mode_config 1` with the files (cb_v2_size_5_apprentice.fb, f-reward_v2_size_5_apprentice.fb) and renaming the resulting default `merged.log` - cb_deferred_actions_w_activations_and_apprentice_10.[fb|json]: generated by running (with Apprentice mode enabled) `./example_gen --kind cb-loop --random_reward --count 10 --deferred_action_count 5` which should defer the first 5 actions and for those deferred actions, randomly send an activation (in practise without a seed, this sends 2 activations so we get a total of 7 learnable events) and performing binary joining - ccb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 2 --learning_mode_config 1` with the files (ccb-baseline-loopinteractions_v2.fb, ccb-baseline-loopobservations_v2.fb) and renaming the resulting default `merged.log` diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.fb b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.fb new file mode 100644 index 000000000..f6667f72f Binary files /dev/null and b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.fb differ diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.json b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.json new file mode 100644 index 000000000..716ecb081 --- /dev/null +++ b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.json @@ -0,0 +1,3 @@ +{"_label_ca":{"cost":-1.5,"pdf_value":0.0005050505278632045,"action":1.014871597290039},"Timestamp":"2021-08-25T15:36:54.000000Z","Version":"1","EventId":"91f71c8","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"},"_skipLearn":true} +{"_label_ca":{"cost":-1.5,"pdf_value":0.4755050539970398,"action":12.464624404907227},"Timestamp":"2021-08-25T15:36:54.000000Z","Version":"1","EventId":"75d50657","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}} +{"_label_ca":{"cost":-1.5,"pdf_value":0.4755050539970398,"action":12.43958568572998},"Timestamp":"2021-08-25T15:36:54.000000Z","Version":"1","EventId":"e28a9ae6","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}} diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.fb b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.fb new file mode 100755 index 000000000..b894e499d Binary files /dev/null and b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.fb differ diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.json b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.json new file mode 100644 index 000000000..12d55163c --- /dev/null +++ b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.json @@ -0,0 +1,3 @@ +{"_label_ca":{"cost":-1.5,"pdf_value":0.0005050505278632045,"action":1.014871597290039},"Timestamp":"2021-08-24T14:38:15.000000Z","Version":"1","EventId":"91f71c8","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}} +{"_label_ca":{"cost":-1.5,"pdf_value":0.4755050539970398,"action":12.464624404907227},"Timestamp":"2021-08-24T14:38:15.000000Z","Version":"1","EventId":"75d50657","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}} +{"_label_ca":{"cost":-1.5,"pdf_value":0.4755050539970398,"action":12.43958568572998},"Timestamp":"2021-08-24T14:38:15.000000Z","Version":"1","EventId":"e28a9ae6","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}} diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple_e2e.log b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple_e2e.log new file mode 100644 index 000000000..84cb19879 Binary files /dev/null and b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple_e2e.log differ diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_skip_learn_e2e.log b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_skip_learn_e2e.log new file mode 100644 index 000000000..e3b6d8873 Binary files /dev/null and b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_skip_learn_e2e.log differ diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc index c55d93bd6..b88fa32b5 100644 --- a/external_parser/unit_tests/test_log_converter.cc +++ b/external_parser/unit_tests/test_log_converter.cc @@ -145,6 +145,77 @@ BOOST_AUTO_TEST_CASE(ccb_payload_with_slot_id) { BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE(log_converter_ca_format) +BOOST_AUTO_TEST_CASE(ca_loop_simple) { + std::string infile_path = "valid_joined_logs/ca_loop_simple.fb"; + std::string outfile_path = "valid_joined_logs/ca_loop_simple.dsjson"; + + std::string converted_json = + get_json_event(infile_path, outfile_path, v2::ProblemType_CA); + std::string expected_json = + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045," + "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T14:38:15." + "000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," + "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T14:38:15." + "000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," + "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T14:38:15." + "000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; + + BOOST_CHECK_EQUAL(converted_json, expected_json); +} + +BOOST_AUTO_TEST_CASE(ca_loop_simple_e2e) { + std::string infile_path = "valid_joined_logs/ca_loop_simple_e2e.log"; + std::string outfile_path = "valid_joined_logs/ca_loop_simple_e2e.dsjson"; + + std::string converted_json = + get_json_event(infile_path, outfile_path, v2::ProblemType_CA); + std::string expected_json = + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045," + "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T16:34:38." + "000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," + "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T16:34:38." + "000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," + "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T16:34:38." + "000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; + + BOOST_CHECK_EQUAL(converted_json, expected_json); +} + +BOOST_AUTO_TEST_CASE(ca_loop_mixed_skip_learn) { + std::string infile_path = "valid_joined_logs/ca_loop_mixed_skip_learn.fb"; + std::string outfile_path = + "valid_joined_logs/ca_loop_mixed_skip_learn.dsjson"; + + std::string converted_json = + get_json_event(infile_path, outfile_path, v2::ProblemType_CA); + std::string expected_json = + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045," + "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-25T15:36:54." + "000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/" + "A\"},\"_skipLearn\":true}\n" + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," + "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-25T15:36:54." + "000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," + "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-25T15:36:54." + "000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; + + BOOST_CHECK_EQUAL(converted_json, expected_json); +} + BOOST_AUTO_TEST_CASE(ca_deferred_action_without_activation) { std::string infile_path = "skip_learn/ca/deferred_action_without_activation.fb"; diff --git a/external_parser/unit_tests/test_vw_external_parser.cc b/external_parser/unit_tests/test_vw_external_parser.cc index 8cc0d474e..7525ef38a 100644 --- a/external_parser/unit_tests/test_vw_external_parser.cc +++ b/external_parser/unit_tests/test_vw_external_parser.cc @@ -247,6 +247,49 @@ BOOST_AUTO_TEST_CASE(ccb_compare_dsjson_with_fb_models) { buffer_dsjson_model.end()); } +BOOST_AUTO_TEST_CASE(ca_compare_dsjson_with_fb_models_simple) { + std::string input_files = get_test_files_location(); + + std::string model_name = input_files + "/test_outputs/m_average"; + + std::string file_name = input_files + "/valid_joined_logs/ca_loop_simple"; + + generate_dsjson_and_fb_models( + model_name, + "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ", + file_name); + + // read the models and compare + auto buffer_fb_model = read_file(model_name + ".fb"); + auto buffer_dsjson_model = read_file(model_name + ".json"); + + BOOST_CHECK_EQUAL_COLLECTIONS(buffer_fb_model.begin(), buffer_fb_model.end(), + buffer_dsjson_model.begin(), + buffer_dsjson_model.end()); +} + +BOOST_AUTO_TEST_CASE(ca_compare_dsjson_with_fb_models_mixed_skip_learn) { + std::string input_files = get_test_files_location(); + + std::string model_name = input_files + "/test_outputs/m_average"; + + std::string file_name = + input_files + "/valid_joined_logs/ca_loop_mixed_skip_learn"; + + generate_dsjson_and_fb_models( + model_name, + "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ", + file_name); + + // read the models and compare + auto buffer_fb_model = read_file(model_name + ".fb"); + auto buffer_dsjson_model = read_file(model_name + ".json"); + + BOOST_CHECK_EQUAL_COLLECTIONS(buffer_fb_model.begin(), buffer_fb_model.end(), + buffer_dsjson_model.begin(), + buffer_dsjson_model.end()); +} + BOOST_AUTO_TEST_CASE(slates_compare_dsjson_with_fb_models) { std::string input_files = get_test_files_location(); diff --git a/test_tools/example_gen/example_gen.cc b/test_tools/example_gen/example_gen.cc index 142947bac..111a81f26 100644 --- a/test_tools/example_gen/example_gen.cc +++ b/test_tools/example_gen/example_gen.cc @@ -39,6 +39,7 @@ static const char *options[] = { "ss-reward", "action-taken", "cb-loop", + "ca-loop", "ccb-loop", "ccb-baseline-loop", "slates-loop", @@ -64,6 +65,7 @@ enum options{ S_S_REWARD, ACTION_TAKEN, CB_LOOP, + CA_LOOP, CCB_LOOP, CCB_BASELINE_ACTION_LOOP, SLATES_LOOP @@ -117,9 +119,7 @@ void load_config_from_json(int action, u::configuration& config, bool enable_app } else if (action == SLATES_ACTION || action == SLATES_LOOP) { std::string args = "--slates --ccb_explore_adf --json --quiet --epsilon " + std::to_string(epsilon) + " --first_only --id N/A"; config.set(r::name::MODEL_VW_INITIAL_COMMAND_LINE, args.c_str()); - } - else if (action == CA_ACTION) - { + } else if (action == CA_ACTION || action == CA_LOOP) { config.set(r::name::MODEL_VW_INITIAL_COMMAND_LINE, "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --json --quiet --id N/A"); } } @@ -250,7 +250,9 @@ void send_ccb_outcome(std::mt19937& rng, bool gen_random_reward, const char * ev } } -int take_action(r::live_model& rl, const char *event_id, int action, unsigned int action_flag, bool gen_random_reward, std::mt19937& rng) { +int take_action(r::live_model &rl, const char *event_id, int action, + unsigned int action_flag, bool gen_random_reward, + std::mt19937 &rng) { r::api_status status; float reward = gen_random_reward ? get_random_number(rng) : 1.5f; @@ -409,6 +411,35 @@ int take_action(r::live_model& rl, const char *event_id, int action, unsigned in break; }; + case CA_LOOP: { // "ca_loop", + r::continuous_action_response response; + if (rl.request_continuous_action(event_id, JSON_CA_CONTEXT, action_flag, + response, &status) != err::success) + std::cout << status.get_error_msg() << std::endl; + size_t num_of_rewards = get_random_number(rng); + for (size_t i = 0; i < num_of_rewards; i++) { + float reward = gen_random_reward ? get_random_number(rng, 0) : 1.5f; + std::cout << "report outcome: " << reward << " for event: " << event_id + << std::endl; + if (rl.report_outcome(event_id, reward, &status) != err::success) + std::cout << status.get_error_msg() << std::endl; + } + + if (action_flag == r::action_flags::DEFERRED) { + size_t rand_num = get_random_number(rng, 0 /*min*/); + if (rand_num % 2) + { + // send activation + std::cout << "sending activation for event_id: " << event_id + << std::endl; + if (rl.report_action_taken(event_id, &status) != err::success) { + std::cout << status.get_error_msg() << std::endl; + } + } + } + + break; + }; case CCB_LOOP: { // "ccb action and random number of float rewards and mix of slot ids / non slot ids / float / string rewards" // randomly decide to send either ccb with slot id's provided or random slot id's // the ccb interactions that are non-random are the ones we can use to send observations for the slot id using the slot-id string @@ -527,7 +558,9 @@ int pseudo_random(int seed) { return (int)(val & 0xFFFFFFFF); } -int run_config(int action, int count, int initial_seed, bool gen_random_reward, bool enable_apprentice_mode, int deferred_action_count, std::string config_file, std::mt19937& rng, float epsilon = 0.0f) { +int run_config(int action, int count, int initial_seed, bool gen_random_reward, + bool enable_apprentice_mode, int deferred_action_count, + std::string config_file, std::mt19937 &rng, float epsilon = 0.0f) { u::configuration config; if (config_file.empty()) @@ -560,7 +593,8 @@ int run_config(int action, int count, int initial_seed, bool gen_random_reward, auto action_flag = i < deferred_action_count ? r::action_flags::DEFERRED : r::action_flags::DEFAULT; - int r = take_action(rl, event_id, action, action_flag, gen_random_reward, rng); + int r = take_action(rl, event_id, action, action_flag, gen_random_reward, + rng); if(r) return r; } @@ -587,7 +621,7 @@ int main(int argc, char *argv[]) { ("count", po::value(), "Number of events to produce") ("seed", po::value(), "Initial seed used to produce event ids") ("epsilon", po::value(), "epsilon to be used in command line args for VW") - ("kind", po::value(), "which kind of example to generate (cb,invalid-cb,ccb,ccb-with-slot-id,ccb-baseline,slates,ca,cb-loop,ccb-loop,ccb-baseline-loop,slates-loop,(f|s)(s|i|mix|i-out-of-bound)?-reward,action-taken)") + ("kind", po::value(), "which kind of example to generate (cb,invalid-cb,ccb,ccb-with-slot-id,ccb-baseline,slates,ca,cb-loop,ca-loop,ccb-loop,ccb-baseline-loop,slates-loop,(f|s)(s|i|mix|i-out-of-bound)?-reward,action-taken)") ("random_reward", "Generate random float reward for observation event") ("config_file", po::value(), "json config file for rlclinetlib") ("apprentice", "Enable apprentice mode") @@ -607,7 +641,7 @@ int main(int argc, char *argv[]) { std::vector deferrable_interactions { "cb", "invalid-cb", "ccb", "ccb-baseline", "slates", "ca", "cb-loop", - "ccb-with-slot-id", "ccb-loop", "ccb-baseline-loop", "slates-loop" + "ca-loop", "ccb-with-slot-id", "ccb-loop", "ccb-baseline-loop", "slates-loop" }; if(vm.count("kind") > 0) @@ -646,7 +680,8 @@ int main(int argc, char *argv[]) { if(gen_all) { for(int i = 0; options[i]; ++i) { - if(run_config(i, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, epsilon)) + if (run_config(i, count, seed, gen_random_reward, enable_apprentice_mode, + deferred_action_count, config_file, rng, epsilon)) return -1; } return 0; @@ -666,5 +701,7 @@ int main(int argc, char *argv[]) { return -1; } - return run_config(action, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, epsilon); + return run_config(action, count, seed, gen_random_reward, + enable_apprentice_mode, deferred_action_count, config_file, + rng, epsilon); } \ No newline at end of file