From 3c30138a01c2e6a7f3e21a8501ca8f537a73f6cc Mon Sep 17 00:00:00 2001 From: Griffin Bassman Date: Mon, 23 Aug 2021 18:07:36 -0400 Subject: [PATCH 01/11] feat: add ca-loop to example_gen --- test_tools/example_gen/example_gen.cc | 36 ++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/test_tools/example_gen/example_gen.cc b/test_tools/example_gen/example_gen.cc index c0c1dede2..3463ce5d8 100644 --- a/test_tools/example_gen/example_gen.cc +++ b/test_tools/example_gen/example_gen.cc @@ -39,6 +39,7 @@ static const char *options[] = { "ss-reward", "action-taken", "cb-loop", + "ca-loop", "ccb-loop", "ccb-baseline-loop", nullptr @@ -63,6 +64,7 @@ enum options{ S_S_REWARD, ACTION_TAKEN, CB_LOOP, + CA_LOOP, CCB_LOOP, CCB_BASELINE_ACTION_LOOP }; @@ -116,7 +118,7 @@ void load_config_from_json(int action, u::configuration& config, bool enable_app std::string args = "--slates --ccb_explore_adf --json --quiet --epsilon " + std::to_string(epsilon) + " --first_only --id N/A"; config.set(r::name::MODEL_VW_INITIAL_COMMAND_LINE, args.c_str()); } - else if (action == CA_ACTION) + else if (action == CA_ACTION || action == CA_LOOP) { config.set(r::name::MODEL_VW_INITIAL_COMMAND_LINE, "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --json --quiet --id N/A"); } @@ -407,6 +409,34 @@ int take_action(r::live_model& rl, const char *event_id, int action, unsigned in break; }; + case CA_LOOP: {// "ca_loop", + r::continuous_action_response response; + if(rl.request_continuous_action(event_id, JSON_CA_CONTEXT, action_flag, response, &status) != err::success) + std::cout << status.get_error_msg() << std::endl; + size_t num_of_rewards = get_random_number(rng); + for (size_t i = 0; i < num_of_rewards; i++) + { + float reward = gen_random_reward ? get_random_number(rng, 0) : 1.5f; + std::cout << "report outcome: " << reward << " for event: " << event_id << std::endl; + if( rl.report_outcome(event_id, reward, &status) != err::success ) + std::cout << status.get_error_msg() << std::endl; + } + + if (action_flag == r::action_flags::DEFERRED) + { + size_t rand_num = get_random_number(rng, 0 /*min*/); + if (rand_num % 2) + { + // send activation + std::cout << "sending activation for event_id: " << event_id << std::endl; + if (rl.report_action_taken(event_id, &status) != err::success ) { + std::cout << status.get_error_msg() << std::endl; + } + } + } + + break; + }; case CCB_LOOP: { // "ccb action and random number of float rewards and mix of slot ids / non slot ids / float / string rewards" // randomly decide to send either ccb with slot id's provided or random slot id's // the ccb interactions that are non-random are the ones we can use to send observations for the slot id using the slot-id string @@ -555,7 +585,7 @@ int main(int argc, char *argv[]) { ("count", po::value(), "Number of events to produce") ("seed", po::value(), "Initial seed used to produce event ids") ("epsilon", po::value(), "epsilon to be used in command line args for VW") - ("kind", po::value(), "which kind of example to generate (cb,invalid-cb,ccb,ccb-with-slot-id,ccb-baseline,slates,ca,cb-loop,ccb-loop,ccb-baseline-loop,(f|s)(s|i|mix|i-out-of-bound)?-reward,action-taken)") + ("kind", po::value(), "which kind of example to generate (cb,invalid-cb,ccb,ccb-with-slot-id,ccb-baseline,slates,ca,cb-loop,ca-loop,ccb-loop,ccb-baseline-loop,(f|s)(s|i|mix|i-out-of-bound)?-reward,action-taken)") ("random_reward", "Generate random float reward for observation event") ("config_file", po::value(), "json config file for rlclinetlib") ("apprentice", "Enable apprentice mode") @@ -574,7 +604,7 @@ int main(int argc, char *argv[]) { enable_dedup = vm.count("dedup"); std::vector deferrable_interactions { - "cb", "invalid-cb", "ccb", "ccb-baseline", "slates", "ca", "cb-loop", + "cb", "invalid-cb", "ccb", "ccb-baseline", "slates", "ca", "cb-loop", "ca-loop", "ccb-with-slot-id", "ccb-loop", "ccb-baseline-loop" }; From e8d35f4cc16275a6069e91ea2a94d7491e62adf5 Mon Sep 17 00:00:00 2001 From: Griffin Bassman Date: Tue, 24 Aug 2021 11:27:44 -0400 Subject: [PATCH 02/11] add test for ca-loop --- external_parser/unit_tests/test_files/README.md | 1 + .../valid_joined_logs/ca_loop_simple.log | Bin 0 -> 2336 bytes .../unit_tests/test_log_converter.cc | 16 ++++++++++++++++ 3 files changed, 17 insertions(+) create mode 100755 external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.log diff --git a/external_parser/unit_tests/test_files/README.md b/external_parser/unit_tests/test_files/README.md index 83128f978..b3b73a709 100644 --- a/external_parser/unit_tests/test_files/README.md +++ b/external_parser/unit_tests/test_files/README.md @@ -25,6 +25,7 @@ Residing under `valid_joined_logs` - average_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind cb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) - ccb_simple.log: generated by running `python joiner.py --problem_type_config 2` on the above files (ccb_v2.fb, fi-reward_v2.fb) and renaming the resulting default `merged.log` - ccb_sum_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind ccb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) +- ca_loop_simple.log: generated by running `./example_gen --kind ca-loop --count 3` and performing binary join with `python joiner.py --problem_type_config 4` - cb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 1 --learning_mode_config 1` with the files (cb_v2_size_5_apprentice.fb, f-reward_v2_size_5_apprentice.fb) and renaming the resulting default `merged.log` - cb_deferred_actions_w_activations_and_apprentice_10.[fb|json]: generated by running (with Apprentice mode enabled) `./example_gen --kind cb-loop --random_reward --count 10 --deferred_action_count 5` which should defer the first 5 actions and for those deferred actions, randomly send an activation (in practise without a seed, this sends 2 activations so we get a total of 7 learnable events) and performing binary joining - ccb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 2 --learning_mode_config 1` with the files (ccb-baseline-loopinteractions_v2.fb, ccb-baseline-loopobservations_v2.fb) and renaming the resulting default `merged.log` diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.log b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.log new file mode 100755 index 0000000000000000000000000000000000000000..b894e499dca5985f2449dd43a974505bfb551c5e GIT binary patch literal 2336 zcmWFwcXMK7U|nH1|Nk!lq@S{LNbsrgGwiv`!oURNTR`P+0OdJ=Y!M*l z1mdjx%)Hbhy@E<0n+?uqVBlcj0b&*)2C*4{JO;2ix`qr4%s_T(X$pfN5J&*oAp1b7 zA%GpM7RY7+TJ^sGC=AjIG96?d$PSP@L1I1Z3=9u|0yaQAgO!2d3s6D>M4JItw{F3b4vDD~0-iV8gX)C1R2D0G)d|RH Lb`=l+t!DrL3WeX* literal 0 HcmV?d00001 diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc index c55d93bd6..94652906d 100644 --- a/external_parser/unit_tests/test_log_converter.cc +++ b/external_parser/unit_tests/test_log_converter.cc @@ -145,6 +145,22 @@ BOOST_AUTO_TEST_CASE(ccb_payload_with_slot_id) { BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE(log_converter_ca_format) +BOOST_AUTO_TEST_CASE(ca_loop_simple) { + std::string infile_path = + "valid_joined_logs/ca_loop_simple.log"; + std::string outfile_path = + "valid_joined_logs/ca_loop_simple.dsjson"; + + std::string converted_json = + get_json_event(infile_path, outfile_path, v2::ProblemType_CA); + std::string expected_json = +"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T14:38:15.000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" +"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T14:38:15.000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" +"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T14:38:15.000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; + + BOOST_CHECK_EQUAL(converted_json, expected_json); +} + BOOST_AUTO_TEST_CASE(ca_deferred_action_without_activation) { std::string infile_path = "skip_learn/ca/deferred_action_without_activation.fb"; From c111706b578f4a47063c6ae57eca04234d0e75b9 Mon Sep 17 00:00:00 2001 From: Griffin Bassman Date: Tue, 24 Aug 2021 11:28:48 -0400 Subject: [PATCH 03/11] formatting --- .../unit_tests/test_log_converter.cc | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc index 94652906d..f485b714e 100644 --- a/external_parser/unit_tests/test_log_converter.cc +++ b/external_parser/unit_tests/test_log_converter.cc @@ -146,17 +146,24 @@ BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE(log_converter_ca_format) BOOST_AUTO_TEST_CASE(ca_loop_simple) { - std::string infile_path = - "valid_joined_logs/ca_loop_simple.log"; - std::string outfile_path = - "valid_joined_logs/ca_loop_simple.dsjson"; + std::string infile_path = "valid_joined_logs/ca_loop_simple.log"; + std::string outfile_path = "valid_joined_logs/ca_loop_simple.dsjson"; std::string converted_json = get_json_event(infile_path, outfile_path, v2::ProblemType_CA); std::string expected_json = -"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T14:38:15.000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" -"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T14:38:15.000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" -"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T14:38:15.000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045," + "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T14:38:15." + "000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," + "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T14:38:15." + "000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," + "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T14:38:15." + "000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; BOOST_CHECK_EQUAL(converted_json, expected_json); } From f8baba1e42b056ffb7b748f7979cfe037784c36b Mon Sep 17 00:00:00 2001 From: Griffin Bassman Date: Tue, 24 Aug 2021 12:47:00 -0400 Subject: [PATCH 04/11] e2e tests --- external_parser/unit_tests/test_files/README.md | 1 + .../valid_joined_logs/ca_loop_simple_e2e.log | Bin 0 -> 2432 bytes external_parser/unit_tests/test_log_converter.cc | 15 +++++++++++++++ 3 files changed, 16 insertions(+) create mode 100644 external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple_e2e.log diff --git a/external_parser/unit_tests/test_files/README.md b/external_parser/unit_tests/test_files/README.md index b3b73a709..ff5e245d6 100644 --- a/external_parser/unit_tests/test_files/README.md +++ b/external_parser/unit_tests/test_files/README.md @@ -26,6 +26,7 @@ Residing under `valid_joined_logs` - ccb_simple.log: generated by running `python joiner.py --problem_type_config 2` on the above files (ccb_v2.fb, fi-reward_v2.fb) and renaming the resulting default `merged.log` - ccb_sum_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind ccb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) - ca_loop_simple.log: generated by running `./example_gen --kind ca-loop --count 3` and performing binary join with `python joiner.py --problem_type_config 4` +- ca_loop_simple_e2e.log: generated in DS repo with `./example_gen --kind ca-loop --count 3 --config_file output/perfrlclient.json` and performing binary join with `python joiner.py --problem_type_config 4` - cb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 1 --learning_mode_config 1` with the files (cb_v2_size_5_apprentice.fb, f-reward_v2_size_5_apprentice.fb) and renaming the resulting default `merged.log` - cb_deferred_actions_w_activations_and_apprentice_10.[fb|json]: generated by running (with Apprentice mode enabled) `./example_gen --kind cb-loop --random_reward --count 10 --deferred_action_count 5` which should defer the first 5 actions and for those deferred actions, randomly send an activation (in practise without a seed, this sends 2 activations so we get a total of 7 learnable events) and performing binary joining - ccb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 2 --learning_mode_config 1` with the files (ccb-baseline-loopinteractions_v2.fb, ccb-baseline-loopobservations_v2.fb) and renaming the resulting default `merged.log` diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple_e2e.log b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple_e2e.log new file mode 100644 index 0000000000000000000000000000000000000000..84cb198798cf1e3c28a0bedabd1f01841afd9501 GIT binary patch literal 2432 zcmWFwcXMK7U|nH1|Nk!lq@S{LNC+sYG0eHe$G`;STR`P+0OdJ=Y!M*l z1mdjx%)Hbhy@E<0n+?uqVBlcj0b&*)2C*4{JO;2ix`qr4%s_T(X$pfN5J&*oAp1b7 zA%GpM7RY7+TJ`?|P#B~aWID(^kR2d*g2bM%GcXtcaSKrJ1}g)D4iECYNO9=P6m4Th!J9 zRbY(+7UVdnT1vdXT7Dto;M`sMr?>i1$~*UxdF7-hrg;30V9^tFo{q z4!2u@MLL6}VVb#NvIPUjKZHB`2Ed&)e#E(R1Cl!<2f&@mCd9cj0$8WQ>OlDcaHpU+ zaqi>*)(isRngP)kxqKUvPWNC*r^M7P0nZ!lL3K-9DvK2@>Xw$Do;d4w Date: Tue, 24 Aug 2021 12:47:50 -0400 Subject: [PATCH 05/11] formatting --- external_parser/unit_tests/test_log_converter.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc index 55e4db8dc..4e12b37a5 100644 --- a/external_parser/unit_tests/test_log_converter.cc +++ b/external_parser/unit_tests/test_log_converter.cc @@ -152,10 +152,18 @@ BOOST_AUTO_TEST_CASE(ca_loop_simple) { std::string converted_json = get_json_event(infile_path, outfile_path, v2::ProblemType_CA); std::string expected_json = -"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,\"action\":1.0148716},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" -"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.464624},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" -"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.439586},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; - + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045," + "\"action\":1.0148716},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\"," + "\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{" + "\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," + "\"action\":12.464624},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\"," + "\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{" + "\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," + "\"action\":12.439586},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\"," + "\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{" + "\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; BOOST_CHECK_EQUAL(converted_json, expected_json); } From 1b8d5c940da48da9cc2dbb6ebf8bb41ad4c12364 Mon Sep 17 00:00:00 2001 From: Griffin Bassman Date: Tue, 24 Aug 2021 13:47:14 -0400 Subject: [PATCH 06/11] fix test --- .../unit_tests/test_log_converter.cc | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc index 4e12b37a5..051fdb3b7 100644 --- a/external_parser/unit_tests/test_log_converter.cc +++ b/external_parser/unit_tests/test_log_converter.cc @@ -153,17 +153,17 @@ BOOST_AUTO_TEST_CASE(ca_loop_simple) { get_json_event(infile_path, outfile_path, v2::ProblemType_CA); std::string expected_json = "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045," - "\"action\":1.0148716},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\"," - "\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{" - "\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" + "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T14:38:15." + "000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," - "\"action\":12.464624},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\"," - "\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{" - "\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" + "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T14:38:15." + "000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," - "\"action\":12.439586},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\"," - "\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{" - "\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; + "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T14:38:15." + "000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; BOOST_CHECK_EQUAL(converted_json, expected_json); } @@ -176,15 +176,15 @@ BOOST_AUTO_TEST_CASE(ca_loop_simple_e2e) { get_json_event(infile_path, outfile_path, v2::ProblemType_CA); std::string expected_json = "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045," - "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T14:38:15." + "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T16:34:38." "000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{" "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," - "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T14:38:15." + "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T16:34:38." "000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{" "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," - "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T14:38:15." + "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T16:34:38." "000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{" "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; From 208796aabd99cb963b84fdadd74dfcda4db441e5 Mon Sep 17 00:00:00 2001 From: Griffin Bassman Date: Wed, 25 Aug 2021 12:13:01 -0400 Subject: [PATCH 07/11] test VW model for dsjson vs binary --- .../unit_tests/test_files/README.md | 3 +- .../ca_loop_mixed_skip_learn.fb | Bin 0 -> 2512 bytes .../ca_loop_mixed_skip_learn.json | 3 ++ .../{ca_loop_simple.log => ca_loop_simple.fb} | Bin .../valid_joined_logs/ca_loop_simple.json | 3 ++ .../ca_loop_skip_learn_e2e.log | Bin 0 -> 1280 bytes .../unit_tests/test_log_converter.cc | 16 +++++++- .../unit_tests/test_vw_external_parser.cc | 38 ++++++++++++++++++ test_tools/example_gen/example_gen.cc | 17 ++++---- 9 files changed, 71 insertions(+), 9 deletions(-) create mode 100644 external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.fb create mode 100644 external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.json rename external_parser/unit_tests/test_files/valid_joined_logs/{ca_loop_simple.log => ca_loop_simple.fb} (100%) create mode 100644 external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.json create mode 100644 external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_skip_learn_e2e.log diff --git a/external_parser/unit_tests/test_files/README.md b/external_parser/unit_tests/test_files/README.md index ff5e245d6..ecbe17df4 100644 --- a/external_parser/unit_tests/test_files/README.md +++ b/external_parser/unit_tests/test_files/README.md @@ -25,7 +25,8 @@ Residing under `valid_joined_logs` - average_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind cb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) - ccb_simple.log: generated by running `python joiner.py --problem_type_config 2` on the above files (ccb_v2.fb, fi-reward_v2.fb) and renaming the resulting default `merged.log` - ccb_sum_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind ccb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) -- ca_loop_simple.log: generated by running `./example_gen --kind ca-loop --count 3` and performing binary join with `python joiner.py --problem_type_config 4` +- ca_loop_simple.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) +- ca_loop_mixed_skip_learn.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3 --deferred_action_count 1 --no_loop_actions` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) - ca_loop_simple_e2e.log: generated in DS repo with `./example_gen --kind ca-loop --count 3 --config_file output/perfrlclient.json` and performing binary join with `python joiner.py --problem_type_config 4` - cb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 1 --learning_mode_config 1` with the files (cb_v2_size_5_apprentice.fb, f-reward_v2_size_5_apprentice.fb) and renaming the resulting default `merged.log` - cb_deferred_actions_w_activations_and_apprentice_10.[fb|json]: generated by running (with Apprentice mode enabled) `./example_gen --kind cb-loop --random_reward --count 10 --deferred_action_count 5` which should defer the first 5 actions and for those deferred actions, randomly send an activation (in practise without a seed, this sends 2 activations so we get a total of 7 learnable events) and performing binary joining diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.fb b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.fb new file mode 100644 index 0000000000000000000000000000000000000000..f6667f72fb366367f859ee43acef91e93fbd6dd1 GIT binary patch literal 2512 zcmd5;IZFdk5T2OTtawGRh$6ZM3mcCJNj51IgD8lIh*u>Z*+4AZph+PT&`vO==nv3B z)E{7JX>TJ~imGh#+EZ0dIvW%cAUW!UnRVj{X6B&sJ#za;?OR;+aL zO2%0HH9XyhzCpZJ`sJnAk`m#a69-1Yr5MLw?Koab?u_H31LKiPG48+FaolhzM)}>w zm@qM3AWutc#_A!{ka2(Ddf<_3suoqw^{^+74)Im7 zmXZwx<6FVhdU_?BR@Z{@sGQG3w zOEFG<+Hu@8G5$3#TGao^3unxwFWaAl>CY~{MQNUw^KHQMMzK&T8d{O*(t)qhh4>cn zUR!VLZJSY7Zi=6XZsgayk^TSX(8T#u-ewVd?osn#tDo`9ZgjII>wWi3-tP>ud_fW7 O?m&DgGfqStfyO6qDX)r^&D(), "json config file for rlclinetlib") ("apprentice", "Enable apprentice mode") - ("deferred_action_count", po::value(), "Number of deferred action for interaction events. Set the deferred_action flag to true for first deferred_action_count number of actions"); + ("deferred_action_count", po::value(), "Number of deferred action for interaction events. Set the deferred_action flag to true for first deferred_action_count number of actions") + ("no_loop_actions", "Flag to disable actions being taken for all outcome events"); po::positional_options_description pd; pd.add("kind", 1); @@ -602,6 +604,7 @@ int main(int argc, char *argv[]) { gen_random_reward = vm.count("random_reward"); enable_apprentice_mode = vm.count("apprentice"); enable_dedup = vm.count("dedup"); + no_loop_actions = vm.count("no_loop_actions"); std::vector deferrable_interactions { "cb", "invalid-cb", "ccb", "ccb-baseline", "slates", "ca", "cb-loop", "ca-loop", @@ -644,7 +647,7 @@ int main(int argc, char *argv[]) { if(gen_all) { for(int i = 0; options[i]; ++i) { - if(run_config(i, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, epsilon)) + if(run_config(i, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, no_loop_actions, epsilon)) return -1; } return 0; @@ -664,5 +667,5 @@ int main(int argc, char *argv[]) { return -1; } - return run_config(action, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, epsilon); + return run_config(action, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, no_loop_actions, epsilon); } \ No newline at end of file From b6c0efb1eaaad54fdfe995a79f99afded8e3c9f8 Mon Sep 17 00:00:00 2001 From: Griffin Bassman Date: Wed, 25 Aug 2021 12:15:40 -0400 Subject: [PATCH 08/11] formatting --- .../unit_tests/test_log_converter.cc | 19 ++- .../unit_tests/test_vw_external_parser.cc | 13 +- test_tools/example_gen/example_gen.cc | 134 +++++++++--------- 3 files changed, 93 insertions(+), 73 deletions(-) diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc index 82854a158..b88fa32b5 100644 --- a/external_parser/unit_tests/test_log_converter.cc +++ b/external_parser/unit_tests/test_log_converter.cc @@ -193,14 +193,25 @@ BOOST_AUTO_TEST_CASE(ca_loop_simple_e2e) { BOOST_AUTO_TEST_CASE(ca_loop_mixed_skip_learn) { std::string infile_path = "valid_joined_logs/ca_loop_mixed_skip_learn.fb"; - std::string outfile_path = "valid_joined_logs/ca_loop_mixed_skip_learn.dsjson"; + std::string outfile_path = + "valid_joined_logs/ca_loop_mixed_skip_learn.dsjson"; std::string converted_json = get_json_event(infile_path, outfile_path, v2::ProblemType_CA); std::string expected_json = -"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,\"action\":1.014871597290039},\"Timestamp\":\"2021-08-25T15:36:54.000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"},\"_skipLearn\":true}\n" -"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.464624404907227},\"Timestamp\":\"2021-08-25T15:36:54.000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" -"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.43958568572998},\"Timestamp\":\"2021-08-25T15:36:54.000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045," + "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-25T15:36:54." + "000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/" + "A\"},\"_skipLearn\":true}\n" + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," + "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-25T15:36:54." + "000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n" + "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398," + "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-25T15:36:54." + "000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{" + "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"; BOOST_CHECK_EQUAL(converted_json, expected_json); } diff --git a/external_parser/unit_tests/test_vw_external_parser.cc b/external_parser/unit_tests/test_vw_external_parser.cc index 70719155a..12e5fc394 100644 --- a/external_parser/unit_tests/test_vw_external_parser.cc +++ b/external_parser/unit_tests/test_vw_external_parser.cc @@ -260,10 +260,12 @@ BOOST_AUTO_TEST_CASE(ca_compare_dsjson_with_fb_models_simple) { std::string model_name = input_files + "/test_outputs/m_average"; - std::string file_name = - input_files + "/valid_joined_logs/ca_loop_simple"; + std::string file_name = input_files + "/valid_joined_logs/ca_loop_simple"; - generate_dsjson_and_fb_models(model_name, "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ", file_name); + generate_dsjson_and_fb_models( + model_name, + "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ", + file_name); // read the models and compare auto buffer_fb_model = read_file(model_name + ".fb"); @@ -282,7 +284,10 @@ BOOST_AUTO_TEST_CASE(ca_compare_dsjson_with_fb_models_mixed_skip_learn) { std::string file_name = input_files + "/valid_joined_logs/ca_loop_mixed_skip_learn"; - generate_dsjson_and_fb_models(model_name, "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ", file_name); + generate_dsjson_and_fb_models( + model_name, + "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ", + file_name); // read the models and compare auto buffer_fb_model = read_file(model_name + ".fb"); diff --git a/test_tools/example_gen/example_gen.cc b/test_tools/example_gen/example_gen.cc index 6437df509..030a64466 100644 --- a/test_tools/example_gen/example_gen.cc +++ b/test_tools/example_gen/example_gen.cc @@ -21,31 +21,19 @@ namespace po = boost::program_options; //global var, yeah ugg bool enable_dedup = false; -static const char *options[] = { - "cb", - "invalid-cb", - "ccb", - "ccb-with-slot-id", - "ccb-baseline", - "slates", - "ca", - "f-reward", - "fi-reward", - "fi-out-of-bound-reward", - "fs-reward", - "fmix-reward", - "s-reward", - "si-reward", - "ss-reward", - "action-taken", - "cb-loop", - "ca-loop", - "ccb-loop", - "ccb-baseline-loop", - nullptr -}; - -enum options{ +static const char *options[] = {"cb", "invalid-cb", + "ccb", "ccb-with-slot-id", + "ccb-baseline", "slates", + "ca", "f-reward", + "fi-reward", "fi-out-of-bound-reward", + "fs-reward", "fmix-reward", + "s-reward", "si-reward", + "ss-reward", "action-taken", + "cb-loop", "ca-loop", + "ccb-loop", "ccb-baseline-loop", + nullptr}; + +enum options { CB_ACTION, INVALID_CB_ACTION, CCB_ACTION, @@ -117,9 +105,7 @@ void load_config_from_json(int action, u::configuration& config, bool enable_app } else if (action == SLATES_ACTION) { std::string args = "--slates --ccb_explore_adf --json --quiet --epsilon " + std::to_string(epsilon) + " --first_only --id N/A"; config.set(r::name::MODEL_VW_INITIAL_COMMAND_LINE, args.c_str()); - } - else if (action == CA_ACTION || action == CA_LOOP) - { + } else if (action == CA_ACTION || action == CA_LOOP) { config.set(r::name::MODEL_VW_INITIAL_COMMAND_LINE, "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --json --quiet --id N/A"); } } @@ -250,7 +236,9 @@ void send_ccb_outcome(std::mt19937& rng, bool gen_random_reward, const char * ev } } -int take_action(r::live_model& rl, const char *event_id, int action, unsigned int action_flag, bool gen_random_reward, std::mt19937& rng, bool no_loop_actions) { +int take_action(r::live_model &rl, const char *event_id, int action, + unsigned int action_flag, bool gen_random_reward, + std::mt19937 &rng, bool no_loop_actions) { r::api_status status; float reward = gen_random_reward ? get_random_number(rng) : 1.5f; @@ -409,32 +397,32 @@ int take_action(r::live_model& rl, const char *event_id, int action, unsigned in break; }; - case CA_LOOP: {// "ca_loop", + case CA_LOOP: { // "ca_loop", r::continuous_action_response response; - if(rl.request_continuous_action(event_id, JSON_CA_CONTEXT, action_flag, response, &status) != err::success) - std::cout << status.get_error_msg() << std::endl; + if (rl.request_continuous_action(event_id, JSON_CA_CONTEXT, action_flag, + response, &status) != err::success) + std::cout << status.get_error_msg() << std::endl; size_t num_of_rewards = get_random_number(rng); - for (size_t i = 0; i < num_of_rewards; i++) - { + for (size_t i = 0; i < num_of_rewards; i++) { float reward = gen_random_reward ? get_random_number(rng, 0) : 1.5f; - std::cout << "report outcome: " << reward << " for event: " << event_id << std::endl; - if( rl.report_outcome(event_id, reward, &status) != err::success ) - std::cout << status.get_error_msg() << std::endl; + std::cout << "report outcome: " << reward << " for event: " << event_id + << std::endl; + if (rl.report_outcome(event_id, reward, &status) != err::success) + std::cout << status.get_error_msg() << std::endl; } - if (action_flag == r::action_flags::DEFERRED && !no_loop_actions) - { + if (action_flag == r::action_flags::DEFERRED && !no_loop_actions) { size_t rand_num = get_random_number(rng, 0 /*min*/); - if (rand_num % 2) - { + if (rand_num % 2) { // send activation - std::cout << "sending activation for event_id: " << event_id << std::endl; - if (rl.report_action_taken(event_id, &status) != err::success ) { + std::cout << "sending activation for event_id: " << event_id + << std::endl; + if (rl.report_action_taken(event_id, &status) != err::success) { std::cout << status.get_error_msg() << std::endl; } } } - + break; }; case CCB_LOOP: { // "ccb action and random number of float rewards and mix of slot ids / non slot ids / float / string rewards" @@ -525,7 +513,10 @@ int pseudo_random(int seed) { return (int)(val & 0xFFFFFFFF); } -int run_config(int action, int count, int initial_seed, bool gen_random_reward, bool enable_apprentice_mode, int deferred_action_count, std::string config_file, std::mt19937& rng, bool no_loop_actions, float epsilon = 0.0f) { +int run_config(int action, int count, int initial_seed, bool gen_random_reward, + bool enable_apprentice_mode, int deferred_action_count, + std::string config_file, std::mt19937 &rng, bool no_loop_actions, + float epsilon = 0.0f) { u::configuration config; if (config_file.empty()) @@ -558,7 +549,8 @@ int run_config(int action, int count, int initial_seed, bool gen_random_reward, auto action_flag = i < deferred_action_count ? r::action_flags::DEFERRED : r::action_flags::DEFAULT; - int r = take_action(rl, event_id, action, action_flag, gen_random_reward, rng, no_loop_actions); + int r = take_action(rl, event_id, action, action_flag, gen_random_reward, + rng, no_loop_actions); if(r) return r; } @@ -579,19 +571,26 @@ int main(int argc, char *argv[]) { float epsilon = 0.f; bool no_loop_actions = false; - desc.add_options() - ("help", "Produce help message") - ("all", "use all args") - ("dedup", "Enable dedup/zstd") - ("count", po::value(), "Number of events to produce") - ("seed", po::value(), "Initial seed used to produce event ids") - ("epsilon", po::value(), "epsilon to be used in command line args for VW") - ("kind", po::value(), "which kind of example to generate (cb,invalid-cb,ccb,ccb-with-slot-id,ccb-baseline,slates,ca,cb-loop,ca-loop,ccb-loop,ccb-baseline-loop,(f|s)(s|i|mix|i-out-of-bound)?-reward,action-taken)") - ("random_reward", "Generate random float reward for observation event") - ("config_file", po::value(), "json config file for rlclinetlib") - ("apprentice", "Enable apprentice mode") - ("deferred_action_count", po::value(), "Number of deferred action for interaction events. Set the deferred_action flag to true for first deferred_action_count number of actions") - ("no_loop_actions", "Flag to disable actions being taken for all outcome events"); + desc.add_options()("help", "Produce help message")("all", "use all args")( + "dedup", "Enable dedup/zstd")("count", po::value(), + "Number of events to produce")( + "seed", po::value(), "Initial seed used to produce event ids")( + "epsilon", po::value(), + "epsilon to be used in command line args for VW")( + "kind", po::value(), + "which kind of example to generate " + "(cb,invalid-cb,ccb,ccb-with-slot-id,ccb-baseline,slates,ca,cb-loop,ca-" + "loop,ccb-loop,ccb-baseline-loop,(f|s)(s|i|mix|i-out-of-bound)?-reward," + "action-taken)")("random_reward", + "Generate random float reward for observation event")( + "config_file", po::value(), + "json config file for rlclinetlib")("apprentice", + "Enable apprentice mode")( + "deferred_action_count", po::value(), + "Number of deferred action for interaction events. Set the " + "deferred_action flag to true for first deferred_action_count number of " + "actions")("no_loop_actions", + "Flag to disable actions being taken for all outcome events"); po::positional_options_description pd; pd.add("kind", 1); @@ -606,10 +605,11 @@ int main(int argc, char *argv[]) { enable_dedup = vm.count("dedup"); no_loop_actions = vm.count("no_loop_actions"); - std::vector deferrable_interactions { - "cb", "invalid-cb", "ccb", "ccb-baseline", "slates", "ca", "cb-loop", "ca-loop", - "ccb-with-slot-id", "ccb-loop", "ccb-baseline-loop" - }; + std::vector deferrable_interactions{ + "cb", "invalid-cb", "ccb", + "ccb-baseline", "slates", "ca", + "cb-loop", "ca-loop", "ccb-with-slot-id", + "ccb-loop", "ccb-baseline-loop"}; if(vm.count("kind") > 0) action_name = vm["kind"].as(); @@ -647,7 +647,9 @@ int main(int argc, char *argv[]) { if(gen_all) { for(int i = 0; options[i]; ++i) { - if(run_config(i, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, no_loop_actions, epsilon)) + if (run_config(i, count, seed, gen_random_reward, enable_apprentice_mode, + deferred_action_count, config_file, rng, no_loop_actions, + epsilon)) return -1; } return 0; @@ -667,5 +669,7 @@ int main(int argc, char *argv[]) { return -1; } - return run_config(action, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, no_loop_actions, epsilon); + return run_config(action, count, seed, gen_random_reward, + enable_apprentice_mode, deferred_action_count, config_file, + rng, no_loop_actions, epsilon); } \ No newline at end of file From e4f8cbed8ee954906d12c85b40e0242b1b54d23f Mon Sep 17 00:00:00 2001 From: Griffin Bassman Date: Fri, 27 Aug 2021 11:15:12 -0400 Subject: [PATCH 09/11] commented changes --- .../unit_tests/test_files/README.md | 2 +- test_tools/example_gen/example_gen.cc | 38 ++++++++++--------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/external_parser/unit_tests/test_files/README.md b/external_parser/unit_tests/test_files/README.md index ecbe17df4..af1f90c7a 100644 --- a/external_parser/unit_tests/test_files/README.md +++ b/external_parser/unit_tests/test_files/README.md @@ -27,7 +27,7 @@ Residing under `valid_joined_logs` - ccb_sum_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind ccb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) - ca_loop_simple.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) - ca_loop_mixed_skip_learn.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3 --deferred_action_count 1 --no_loop_actions` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) -- ca_loop_simple_e2e.log: generated in DS repo with `./example_gen --kind ca-loop --count 3 --config_file output/perfrlclient.json` and performing binary join with `python joiner.py --problem_type_config 4` +- ca_loop_simple_e2e.log: generated by running `./example_gen --kind ca-loop --count 3 --config_file output/perfrlclient.json` and performing binary join with `python joiner.py --problem_type_config 4` - cb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 1 --learning_mode_config 1` with the files (cb_v2_size_5_apprentice.fb, f-reward_v2_size_5_apprentice.fb) and renaming the resulting default `merged.log` - cb_deferred_actions_w_activations_and_apprentice_10.[fb|json]: generated by running (with Apprentice mode enabled) `./example_gen --kind cb-loop --random_reward --count 10 --deferred_action_count 5` which should defer the first 5 actions and for those deferred actions, randomly send an activation (in practise without a seed, this sends 2 activations so we get a total of 7 learnable events) and performing binary joining - ccb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 2 --learning_mode_config 1` with the files (ccb-baseline-loopinteractions_v2.fb, ccb-baseline-loopobservations_v2.fb) and renaming the resulting default `merged.log` diff --git a/test_tools/example_gen/example_gen.cc b/test_tools/example_gen/example_gen.cc index 030a64466..098c79cb7 100644 --- a/test_tools/example_gen/example_gen.cc +++ b/test_tools/example_gen/example_gen.cc @@ -238,7 +238,7 @@ void send_ccb_outcome(std::mt19937& rng, bool gen_random_reward, const char * ev int take_action(r::live_model &rl, const char *event_id, int action, unsigned int action_flag, bool gen_random_reward, - std::mt19937 &rng, bool no_loop_actions) { + std::mt19937 &rng, float activation_ratio) { r::api_status status; float reward = gen_random_reward ? get_random_number(rng) : 1.5f; @@ -384,8 +384,8 @@ int take_action(r::live_model &rl, const char *event_id, int action, if (action_flag == r::action_flags::DEFERRED) { - size_t rand_num = get_random_number(rng, 0 /*min*/); - if (rand_num % 2) + float rand_float = rand()/float(RAND_MAX); + if (rand_float < activation_ratio) { // send activation std::cout << "sending activation for event_id: " << event_id << std::endl; @@ -411,9 +411,10 @@ int take_action(r::live_model &rl, const char *event_id, int action, std::cout << status.get_error_msg() << std::endl; } - if (action_flag == r::action_flags::DEFERRED && !no_loop_actions) { - size_t rand_num = get_random_number(rng, 0 /*min*/); - if (rand_num % 2) { + if (action_flag == r::action_flags::DEFERRED) { + float rand_float = rand()/float(RAND_MAX); + if (rand_float < activation_ratio) + { // send activation std::cout << "sending activation for event_id: " << event_id << std::endl; @@ -447,8 +448,8 @@ int take_action(r::live_model &rl, const char *event_id, int action, if (action_flag == r::action_flags::DEFERRED) { - size_t rand_num = get_random_number(rng, 0 /*min*/); - if (rand_num % 2) + float rand_float = rand()/float(RAND_MAX); + if (rand_float < activation_ratio) { // send activation std::cout << "sending activation for event_id: " << event_id << std::endl; @@ -483,8 +484,8 @@ int take_action(r::live_model &rl, const char *event_id, int action, if (action_flag == r::action_flags::DEFERRED) { - size_t rand_num = get_random_number(rng, 0 /*min*/); - if (rand_num % 2) + float rand_float = rand()/float(RAND_MAX); + if (rand_float < activation_ratio) { // send activation std::cout << "sending activation for event_id: " << event_id << std::endl; @@ -515,7 +516,7 @@ int pseudo_random(int seed) { int run_config(int action, int count, int initial_seed, bool gen_random_reward, bool enable_apprentice_mode, int deferred_action_count, - std::string config_file, std::mt19937 &rng, bool no_loop_actions, + std::string config_file, std::mt19937 &rng, float activation_ratio, float epsilon = 0.0f) { u::configuration config; @@ -550,7 +551,7 @@ int run_config(int action, int count, int initial_seed, bool gen_random_reward, ? r::action_flags::DEFERRED : r::action_flags::DEFAULT; int r = take_action(rl, event_id, action, action_flag, gen_random_reward, - rng, no_loop_actions); + rng, activation_ratio); if(r) return r; } @@ -569,7 +570,7 @@ int main(int argc, char *argv[]) { bool enable_apprentice_mode = false; int deferred_action_count = 0; float epsilon = 0.f; - bool no_loop_actions = false; + float activation_ratio = 0.5f; desc.add_options()("help", "Produce help message")("all", "use all args")( "dedup", "Enable dedup/zstd")("count", po::value(), @@ -589,8 +590,8 @@ int main(int argc, char *argv[]) { "deferred_action_count", po::value(), "Number of deferred action for interaction events. Set the " "deferred_action flag to true for first deferred_action_count number of " - "actions")("no_loop_actions", - "Flag to disable actions being taken for all outcome events"); + "actions")( + "activation_ratio", po::value(), "Percent of observations to activate in loop generators"); po::positional_options_description pd; pd.add("kind", 1); @@ -603,7 +604,6 @@ int main(int argc, char *argv[]) { gen_random_reward = vm.count("random_reward"); enable_apprentice_mode = vm.count("apprentice"); enable_dedup = vm.count("dedup"); - no_loop_actions = vm.count("no_loop_actions"); std::vector deferrable_interactions{ "cb", "invalid-cb", "ccb", @@ -623,6 +623,8 @@ int main(int argc, char *argv[]) { config_file = vm["config_file"].as(); if(vm.count("deferred_action_count") > 0) deferred_action_count = vm["deferred_action_count"].as(); + if(vm.count("activation_ratio") > 0) + activation_ratio = vm["activation_ratio"].as(); if(vm.count("deferred_action_count") > 0 && !std::any_of( deferrable_interactions.begin(), @@ -648,7 +650,7 @@ int main(int argc, char *argv[]) { if(gen_all) { for(int i = 0; options[i]; ++i) { if (run_config(i, count, seed, gen_random_reward, enable_apprentice_mode, - deferred_action_count, config_file, rng, no_loop_actions, + deferred_action_count, config_file, rng, activation_ratio, epsilon)) return -1; } @@ -671,5 +673,5 @@ int main(int argc, char *argv[]) { return run_config(action, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, - rng, no_loop_actions, epsilon); + rng, activation_ratio, epsilon); } \ No newline at end of file From 888a8e2dd4792d53505575b281017fc319d1f8fe Mon Sep 17 00:00:00 2001 From: Griffin Bassman Date: Fri, 27 Aug 2021 12:17:44 -0400 Subject: [PATCH 10/11] fix bug --- external_parser/unit_tests/test_vw_external_parser.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external_parser/unit_tests/test_vw_external_parser.cc b/external_parser/unit_tests/test_vw_external_parser.cc index c2ab55f2b..94574221b 100644 --- a/external_parser/unit_tests/test_vw_external_parser.cc +++ b/external_parser/unit_tests/test_vw_external_parser.cc @@ -239,7 +239,7 @@ BOOST_AUTO_TEST_CASE(ccb_compare_dsjson_with_fb_models) { generate_dsjson_and_fb_models(model_name, "--ccb_explore_adf ", file_name); // read the models and compare - auto bufffb_model = read_file(model_name + ".fb"); + auto buffer_fb_model = read_file(model_name + ".fb"); auto buffer_dsjson_model = read_file(model_name + ".json"); BOOST_CHECK_EQUAL_COLLECTIONS(buffer_fb_model.begin(), buffer_fb_model.end(), From b7aa258810728f341d7bcb15cc0a12919212e7cf Mon Sep 17 00:00:00 2001 From: Griffin Bassman Date: Fri, 27 Aug 2021 12:29:55 -0400 Subject: [PATCH 11/11] revert activation_ratio arg --- .../unit_tests/test_files/README.md | 2 +- test_tools/example_gen/example_gen.cc | 34 ++++++++----------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/external_parser/unit_tests/test_files/README.md b/external_parser/unit_tests/test_files/README.md index 766990d44..cf2f3a51b 100644 --- a/external_parser/unit_tests/test_files/README.md +++ b/external_parser/unit_tests/test_files/README.md @@ -26,7 +26,7 @@ Residing under `valid_joined_logs` - ccb_simple.log: generated by running `python joiner.py --problem_type_config 2` on the above files (ccb_v2.fb, fi-reward_v2.fb) and renaming the resulting default `merged.log` - ccb_sum_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind ccb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) - ca_loop_simple.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) -- ca_loop_mixed_skip_learn.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3 --deferred_action_count 1 --activation_ratio 0` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) +- ca_loop_mixed_skip_learn.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3 --deferred_action_count 1` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`) - ca_loop_simple_e2e.log: generated by running `./example_gen --kind ca-loop --count 3 --config_file output/perfrlclient.json` and performing binary join with `python joiner.py --problem_type_config 4` - cb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 1 --learning_mode_config 1` with the files (cb_v2_size_5_apprentice.fb, f-reward_v2_size_5_apprentice.fb) and renaming the resulting default `merged.log` - cb_deferred_actions_w_activations_and_apprentice_10.[fb|json]: generated by running (with Apprentice mode enabled) `./example_gen --kind cb-loop --random_reward --count 10 --deferred_action_count 5` which should defer the first 5 actions and for those deferred actions, randomly send an activation (in practise without a seed, this sends 2 activations so we get a total of 7 learnable events) and performing binary joining diff --git a/test_tools/example_gen/example_gen.cc b/test_tools/example_gen/example_gen.cc index a5519a949..111a81f26 100644 --- a/test_tools/example_gen/example_gen.cc +++ b/test_tools/example_gen/example_gen.cc @@ -252,7 +252,7 @@ void send_ccb_outcome(std::mt19937& rng, bool gen_random_reward, const char * ev int take_action(r::live_model &rl, const char *event_id, int action, unsigned int action_flag, bool gen_random_reward, - std::mt19937 &rng, float activation_ratio) { + std::mt19937 &rng) { r::api_status status; float reward = gen_random_reward ? get_random_number(rng) : 1.5f; @@ -398,8 +398,8 @@ int take_action(r::live_model &rl, const char *event_id, int action, if (action_flag == r::action_flags::DEFERRED) { - float rand_float = rand()/float(RAND_MAX); - if (rand_float < activation_ratio) + size_t rand_num = get_random_number(rng, 0 /*min*/); + if (rand_num % 2) { // send activation std::cout << "sending activation for event_id: " << event_id << std::endl; @@ -426,8 +426,8 @@ int take_action(r::live_model &rl, const char *event_id, int action, } if (action_flag == r::action_flags::DEFERRED) { - float rand_float = rand()/float(RAND_MAX); - if (rand_float < activation_ratio) + size_t rand_num = get_random_number(rng, 0 /*min*/); + if (rand_num % 2) { // send activation std::cout << "sending activation for event_id: " << event_id @@ -462,8 +462,8 @@ int take_action(r::live_model &rl, const char *event_id, int action, if (action_flag == r::action_flags::DEFERRED) { - float rand_float = rand()/float(RAND_MAX); - if (rand_float < activation_ratio) + size_t rand_num = get_random_number(rng, 0 /*min*/); + if (rand_num % 2) { // send activation std::cout << "sending activation for event_id: " << event_id << std::endl; @@ -498,8 +498,8 @@ int take_action(r::live_model &rl, const char *event_id, int action, if (action_flag == r::action_flags::DEFERRED) { - float rand_float = rand()/float(RAND_MAX); - if (rand_float < activation_ratio) + size_t rand_num = get_random_number(rng, 0 /*min*/); + if (rand_num % 2) { // send activation std::cout << "sending activation for event_id: " << event_id << std::endl; @@ -560,8 +560,7 @@ int pseudo_random(int seed) { int run_config(int action, int count, int initial_seed, bool gen_random_reward, bool enable_apprentice_mode, int deferred_action_count, - std::string config_file, std::mt19937 &rng, float activation_ratio, - float epsilon = 0.0f) { + std::string config_file, std::mt19937 &rng, float epsilon = 0.0f) { u::configuration config; if (config_file.empty()) @@ -595,7 +594,7 @@ int run_config(int action, int count, int initial_seed, bool gen_random_reward, ? r::action_flags::DEFERRED : r::action_flags::DEFAULT; int r = take_action(rl, event_id, action, action_flag, gen_random_reward, - rng, activation_ratio); + rng); if(r) return r; } @@ -614,7 +613,6 @@ int main(int argc, char *argv[]) { bool enable_apprentice_mode = false; int deferred_action_count = 0; float epsilon = 0.f; - float activation_ratio = 0.5f; desc.add_options() ("help", "Produce help message") @@ -627,8 +625,7 @@ int main(int argc, char *argv[]) { ("random_reward", "Generate random float reward for observation event") ("config_file", po::value(), "json config file for rlclinetlib") ("apprentice", "Enable apprentice mode") - ("deferred_action_count", po::value(), "Number of deferred action for interaction events. Set the deferred_action flag to true for first deferred_action_count number of actions") - ("activation_ratio", po::value(), "Percent of observations to activate in loop generators"); + ("deferred_action_count", po::value(), "Number of deferred action for interaction events. Set the deferred_action flag to true for first deferred_action_count number of actions"); po::positional_options_description pd; pd.add("kind", 1); @@ -659,8 +656,6 @@ int main(int argc, char *argv[]) { config_file = vm["config_file"].as(); if(vm.count("deferred_action_count") > 0) deferred_action_count = vm["deferred_action_count"].as(); - if(vm.count("activation_ratio") > 0) - activation_ratio = vm["activation_ratio"].as(); if(vm.count("deferred_action_count") > 0 && !std::any_of( deferrable_interactions.begin(), @@ -686,8 +681,7 @@ int main(int argc, char *argv[]) { if(gen_all) { for(int i = 0; options[i]; ++i) { if (run_config(i, count, seed, gen_random_reward, enable_apprentice_mode, - deferred_action_count, config_file, rng, activation_ratio, - epsilon)) + deferred_action_count, config_file, rng, epsilon)) return -1; } return 0; @@ -709,5 +703,5 @@ int main(int argc, char *argv[]) { return run_config(action, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, - rng, activation_ratio, epsilon); + rng, epsilon); } \ No newline at end of file