From 3c30138a01c2e6a7f3e21a8501ca8f537a73f6cc Mon Sep 17 00:00:00 2001
From: Griffin Bassman <griffinbassman@gmail.com>
Date: Mon, 23 Aug 2021 18:07:36 -0400
Subject: [PATCH 01/11] feat: add ca-loop to example_gen

---
 test_tools/example_gen/example_gen.cc | 36 ++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)
diff --git a/test_tools/example_gen/example_gen.cc b/test_tools/example_gen/example_gen.cc
index c0c1dede2..3463ce5d8 100644
--- a/test_tools/example_gen/example_gen.cc
+++ b/test_tools/example_gen/example_gen.cc
@@ -39,6 +39,7 @@ static const char *options[] = {
   "ss-reward",
   "action-taken",
   "cb-loop",
+  "ca-loop",
   "ccb-loop",
   "ccb-baseline-loop",
   nullptr
@@ -63,6 +64,7 @@ enum options{
   S_S_REWARD,
   ACTION_TAKEN,
   CB_LOOP,
+  CA_LOOP,
   CCB_LOOP,
   CCB_BASELINE_ACTION_LOOP
 };
@@ -116,7 +118,7 @@ void load_config_from_json(int action, u::configuration& config, bool enable_app
     std::string args = "--slates --ccb_explore_adf --json --quiet --epsilon " + std::to_string(epsilon) + " --first_only --id N/A";
     config.set(r::name::MODEL_VW_INITIAL_COMMAND_LINE, args.c_str());
   }
-  else if (action == CA_ACTION)
+  else if (action == CA_ACTION || action == CA_LOOP)
   {
     config.set(r::name::MODEL_VW_INITIAL_COMMAND_LINE, "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --json --quiet --id N/A");
   }
@@ -407,6 +409,34 @@ int take_action(r::live_model& rl, const char *event_id, int action, unsigned in
 
       break;
     };
+    case CA_LOOP: {// "ca_loop",
+      r::continuous_action_response response;
+      if(rl.request_continuous_action(event_id, JSON_CA_CONTEXT, action_flag, response, &status) != err::success)
+          std::cout << status.get_error_msg() << std::endl;
+      size_t num_of_rewards = get_random_number(rng);
+      for (size_t i = 0; i < num_of_rewards; i++)
+      {
+        float reward = gen_random_reward ? get_random_number(rng, 0) : 1.5f;
+        std::cout << "report outcome: " << reward << " for event: " << event_id << std::endl;
+        if( rl.report_outcome(event_id, reward, &status) != err::success )
+            std::cout << status.get_error_msg() << std::endl;
+      }
+
+      if (action_flag == r::action_flags::DEFERRED)
+      {
+        size_t rand_num = get_random_number(rng, 0 /*min*/);
+        if (rand_num % 2)
+        {
+          // send activation
+          std::cout << "sending activation for event_id: " << event_id << std::endl;
+          if (rl.report_action_taken(event_id, &status) != err::success ) {
+            std::cout << status.get_error_msg() << std::endl;
+          }
+        }
+      }
+      
+      break;
+    };
     case CCB_LOOP: { // "ccb action and random number of float rewards and mix of slot ids / non slot ids / float / string rewards"
       // randomly decide to send either ccb with slot id's provided or random slot id's
       // the ccb interactions that are non-random are the ones we can use to send observations for the slot id using the slot-id string
@@ -555,7 +585,7 @@ int main(int argc, char *argv[]) {
     ("count", po::value<int>(), "Number of events to produce")
     ("seed", po::value<int>(), "Initial seed used to produce event ids")
     ("epsilon", po::value<float>(), "epsilon to be used in command line args for VW")
-    ("kind", po::value<std::string>(), "which kind of example to generate (cb,invalid-cb,ccb,ccb-with-slot-id,ccb-baseline,slates,ca,cb-loop,ccb-loop,ccb-baseline-loop,(f|s)(s|i|mix|i-out-of-bound)?-reward,action-taken)")
+    ("kind", po::value<std::string>(), "which kind of example to generate (cb,invalid-cb,ccb,ccb-with-slot-id,ccb-baseline,slates,ca,cb-loop,ca-loop,ccb-loop,ccb-baseline-loop,(f|s)(s|i|mix|i-out-of-bound)?-reward,action-taken)")
     ("random_reward", "Generate random float reward for observation event")
     ("config_file", po::value<std::string>(), "json config file for rlclinetlib")
     ("apprentice", "Enable apprentice mode")
@@ -574,7 +604,7 @@ int main(int argc, char *argv[]) {
     enable_dedup = vm.count("dedup");
 
     std::vector<std::string> deferrable_interactions {
-      "cb", "invalid-cb", "ccb", "ccb-baseline", "slates", "ca", "cb-loop",
+      "cb", "invalid-cb", "ccb", "ccb-baseline", "slates", "ca", "cb-loop", "ca-loop", 
       "ccb-with-slot-id", "ccb-loop", "ccb-baseline-loop"
     };
 

From e8d35f4cc16275a6069e91ea2a94d7491e62adf5 Mon Sep 17 00:00:00 2001
From: Griffin Bassman <griffinbassman@gmail.com>
Date: Tue, 24 Aug 2021 11:27:44 -0400
Subject: [PATCH 02/11] add test for ca-loop

---
 external_parser/unit_tests/test_files/README.md |   1 +
 .../valid_joined_logs/ca_loop_simple.log        | Bin 0 -> 2336 bytes
 .../unit_tests/test_log_converter.cc            |  16 ++++++++++++++++
 3 files changed, 17 insertions(+)
 create mode 100755 external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.log

diff --git a/external_parser/unit_tests/test_files/README.md b/external_parser/unit_tests/test_files/README.md
index 83128f978..b3b73a709 100644
--- a/external_parser/unit_tests/test_files/README.md
+++ b/external_parser/unit_tests/test_files/README.md
@@ -25,6 +25,7 @@ Residing under `valid_joined_logs`
 - average_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind cb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
 - ccb_simple.log: generated by running `python joiner.py --problem_type_config 2` on the above files (ccb_v2.fb, fi-reward_v2.fb) and renaming the resulting default `merged.log`
 - ccb_sum_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind ccb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
+- ca_loop_simple.log: generated by running `./example_gen --kind ca-loop --count 3` and performing binary join with `python joiner.py --problem_type_config 4`
 - cb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 1 --learning_mode_config 1` with the files (cb_v2_size_5_apprentice.fb, f-reward_v2_size_5_apprentice.fb) and renaming the resulting default `merged.log`
 - cb_deferred_actions_w_activations_and_apprentice_10.[fb|json]: generated by running (with Apprentice mode enabled) `./example_gen --kind cb-loop --random_reward --count 10 --deferred_action_count 5` which should defer the first 5 actions and for those deferred actions, randomly send an activation (in practise without a seed, this sends 2 activations so we get a total of 7 learnable events) and performing binary joining
 - ccb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 2 --learning_mode_config 1` with the files (ccb-baseline-loopinteractions_v2.fb, ccb-baseline-loopobservations_v2.fb) and renaming the resulting default `merged.log`
diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.log b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.log
new file mode 100755
index 0000000000000000000000000000000000000000..b894e499dca5985f2449dd43a974505bfb551c5e
GIT binary patch
literal 2336
zcmWFwcXMK7U|<LZf&w7T0>nH1|Nk!lq@S{LNbsrgGwiv`!oURNTR`P+0OdJ=Y!M*l
z1mdjx%)Hbhy@E<0n+?uqVBlcj0b&*)2C*4{JO;2ix`qr4%s_T(X$pfN5J&*oAp1b7
zA%GpM7RY7+TJ^sGC=AjIG96?d$PSP@L1I1Z3=9u|0yaQAgO!2d3s6D<h%0~=TmU*+
z1BgR_9$W*}v*ySD|1f{ec*p`arw3{l$h{!ff?VMORj&b;<6~fl$^nHK(cQzqaKN5{
zfsa7}sD_6@0;pC7$O8jr1qcl%A--$?s)qm&Kh?-0(K0dB3?vfpbMU+I3l=x-0J<F%
zF38~sOH(00aZq{zg%OAbg)<K@)<I#^&Ef6{5=$%nYzK;0P}=a*cVv(U($z{q`APXD
zUce+)VyI+Qt&~=jnOu^YpQmJHZc$qcRDm_jSrmXW$be9oANe&n!`$37#niye)EwlO
zjNgObjXZw_zZ*ATaU;2DS1NyC(k^nkX!wg6<{iNFf}R)YS*8JtWd=*bG;_mb3kHRM
zgWG)v2D$qZfOQ~xoi&)^mjhU$3xG>>M4JItw{F3b4vDD~0-iV8gX)C1R2D0G)d|RH
Lb`=l+t!DrL3WeX*

literal 0
HcmV?d00001

diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc
index c55d93bd6..94652906d 100644
--- a/external_parser/unit_tests/test_log_converter.cc
+++ b/external_parser/unit_tests/test_log_converter.cc
@@ -145,6 +145,22 @@ BOOST_AUTO_TEST_CASE(ccb_payload_with_slot_id) {
 BOOST_AUTO_TEST_SUITE_END()
 
 BOOST_AUTO_TEST_SUITE(log_converter_ca_format)
+BOOST_AUTO_TEST_CASE(ca_loop_simple) {
+  std::string infile_path =
+      "valid_joined_logs/ca_loop_simple.log";
+  std::string outfile_path =
+      "valid_joined_logs/ca_loop_simple.dsjson";
+
+  std::string converted_json =
+      get_json_event(infile_path, outfile_path, v2::ProblemType_CA);
+  std::string expected_json =
+"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T14:38:15.000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
+"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T14:38:15.000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
+"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T14:38:15.000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";
+
+  BOOST_CHECK_EQUAL(converted_json, expected_json);
+}
+
 BOOST_AUTO_TEST_CASE(ca_deferred_action_without_activation) {
   std::string infile_path =
       "skip_learn/ca/deferred_action_without_activation.fb";

From c111706b578f4a47063c6ae57eca04234d0e75b9 Mon Sep 17 00:00:00 2001
From: Griffin Bassman <griffinbassman@gmail.com>
Date: Tue, 24 Aug 2021 11:28:48 -0400
Subject: [PATCH 03/11] formatting

---
 .../unit_tests/test_log_converter.cc          | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc
index 94652906d..f485b714e 100644
--- a/external_parser/unit_tests/test_log_converter.cc
+++ b/external_parser/unit_tests/test_log_converter.cc
@@ -146,17 +146,24 @@ BOOST_AUTO_TEST_SUITE_END()
 
 BOOST_AUTO_TEST_SUITE(log_converter_ca_format)
 BOOST_AUTO_TEST_CASE(ca_loop_simple) {
-  std::string infile_path =
-      "valid_joined_logs/ca_loop_simple.log";
-  std::string outfile_path =
-      "valid_joined_logs/ca_loop_simple.dsjson";
+  std::string infile_path = "valid_joined_logs/ca_loop_simple.log";
+  std::string outfile_path = "valid_joined_logs/ca_loop_simple.dsjson";
 
   std::string converted_json =
       get_json_event(infile_path, outfile_path, v2::ProblemType_CA);
   std::string expected_json =
-"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T14:38:15.000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
-"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T14:38:15.000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
-"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T14:38:15.000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";
+      "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,"
+      "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T14:38:15."
+      "000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{"
+      "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
+      "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
+      "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T14:38:15."
+      "000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{"
+      "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
+      "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
+      "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T14:38:15."
+      "000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{"
+      "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";
 
   BOOST_CHECK_EQUAL(converted_json, expected_json);
 }

From f8baba1e42b056ffb7b748f7979cfe037784c36b Mon Sep 17 00:00:00 2001
From: Griffin Bassman <griffinbassman@gmail.com>
Date: Tue, 24 Aug 2021 12:47:00 -0400
Subject: [PATCH 04/11] e2e tests

---
 external_parser/unit_tests/test_files/README.md  |   1 +
 .../valid_joined_logs/ca_loop_simple_e2e.log     | Bin 0 -> 2432 bytes
 external_parser/unit_tests/test_log_converter.cc |  15 +++++++++++++++
 3 files changed, 16 insertions(+)
 create mode 100644 external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple_e2e.log

diff --git a/external_parser/unit_tests/test_files/README.md b/external_parser/unit_tests/test_files/README.md
index b3b73a709..ff5e245d6 100644
--- a/external_parser/unit_tests/test_files/README.md
+++ b/external_parser/unit_tests/test_files/README.md
@@ -26,6 +26,7 @@ Residing under `valid_joined_logs`
 - ccb_simple.log: generated by running `python joiner.py --problem_type_config 2` on the above files (ccb_v2.fb, fi-reward_v2.fb) and renaming the resulting default `merged.log`
 - ccb_sum_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind ccb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
 - ca_loop_simple.log: generated by running `./example_gen --kind ca-loop --count 3` and performing binary join with `python joiner.py --problem_type_config 4`
+- ca_loop_simple_e2e.log: generated in DS repo with `./example_gen --kind ca-loop --count 3 --config_file output/perfrlclient.json` and performing binary join with `python joiner.py --problem_type_config 4`
 - cb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 1 --learning_mode_config 1` with the files (cb_v2_size_5_apprentice.fb, f-reward_v2_size_5_apprentice.fb) and renaming the resulting default `merged.log`
 - cb_deferred_actions_w_activations_and_apprentice_10.[fb|json]: generated by running (with Apprentice mode enabled) `./example_gen --kind cb-loop --random_reward --count 10 --deferred_action_count 5` which should defer the first 5 actions and for those deferred actions, randomly send an activation (in practise without a seed, this sends 2 activations so we get a total of 7 learnable events) and performing binary joining
 - ccb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 2 --learning_mode_config 1` with the files (ccb-baseline-loopinteractions_v2.fb, ccb-baseline-loopobservations_v2.fb) and renaming the resulting default `merged.log`
diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple_e2e.log b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple_e2e.log
new file mode 100644
index 0000000000000000000000000000000000000000..84cb198798cf1e3c28a0bedabd1f01841afd9501
GIT binary patch
literal 2432
zcmWFwcXMK7U|<LZf&w7T0>nH1|Nk!lq@S{LNC+sYG0eHe$G`;STR`P+0OdJ=Y!M*l
z1mdjx%)Hbhy@E<0n+?uqVBlcj0b&*)2C*4{JO;2ix`qr4%s_T(X$pfN5J&*oAp1b7
zA%GpM7RY7+TJ`?|P#B~aWID(^kR2d*g2bM%GcXtcaSKrJ1}g)D4iE<bB^LlK_yBaY
z0}!VGJ-7v`C*;Tf|1f`*-s1zCGX-iE$h{!ff?VMORj&b;<6~fl$^nHK(cQzqaKN5{
zfsa7}sD_6@0;pC7$O8jrkQpF5`Q<Gc?j7_3o6o?|U=Ok<)hJcBAhjqBq!PlnNVH5$
zH3RV(ej?mzI{<Fw+eExu*C4r-6YACjKrev82{~M0X)FXN4oW|uFayz`aOVN00#KNB
zbGSQ##L`MX+kxU4lve!o9U0_-bhT1Yeo}sk7cj|{7%Ev+E2R}>CYNO9=P6m4Th!J9
zRbY(+7UVdnT1vdXT7Dto;M`sMr<do8@VnLAG{w}w%+ws{aR!6m2)E800JkRf6YthL
zNN!CY0JqL9Bi^kYe-Pm+4t47RV0e?0wx#lEo3>?>i1$~*UxdF7-hrg;30V9^tFo{q
z4!2u@MLL6}VVb#NvIPUjKZHB`2Ed&)e#E(R1Cl!<2f&@mCd9cj0$8WQ>OlDcaHpU+
zaqi>*)(isRngP)kxqKUvPWNC*r^M7P0nZ!lL3K-9DvK2@>Xw$Do;d4w<TSks2!PID
F007kIA<X~)

literal 0
HcmV?d00001

diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc
index f485b714e..55e4db8dc 100644
--- a/external_parser/unit_tests/test_log_converter.cc
+++ b/external_parser/unit_tests/test_log_converter.cc
@@ -149,6 +149,21 @@ BOOST_AUTO_TEST_CASE(ca_loop_simple) {
   std::string infile_path = "valid_joined_logs/ca_loop_simple.log";
   std::string outfile_path = "valid_joined_logs/ca_loop_simple.dsjson";
 
+  std::string converted_json =
+      get_json_event(infile_path, outfile_path, v2::ProblemType_CA);
+  std::string expected_json =
+"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,\"action\":1.0148716},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
+"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.464624},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
+"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.439586},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";
+
+
+  BOOST_CHECK_EQUAL(converted_json, expected_json);
+}
+
+BOOST_AUTO_TEST_CASE(ca_loop_simple_e2e) {
+  std::string infile_path = "valid_joined_logs/ca_loop_simple_e2e.log";
+  std::string outfile_path = "valid_joined_logs/ca_loop_simple_e2e.dsjson";
+
   std::string converted_json =
       get_json_event(infile_path, outfile_path, v2::ProblemType_CA);
   std::string expected_json =

From 9f2abb74df6e068d783f75852da97c0e8495d509 Mon Sep 17 00:00:00 2001
From: Griffin Bassman <griffinbassman@gmail.com>
Date: Tue, 24 Aug 2021 12:47:50 -0400
Subject: [PATCH 05/11] formatting

---
 external_parser/unit_tests/test_log_converter.cc | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc
index 55e4db8dc..4e12b37a5 100644
--- a/external_parser/unit_tests/test_log_converter.cc
+++ b/external_parser/unit_tests/test_log_converter.cc
@@ -152,10 +152,18 @@ BOOST_AUTO_TEST_CASE(ca_loop_simple) {
   std::string converted_json =
       get_json_event(infile_path, outfile_path, v2::ProblemType_CA);
   std::string expected_json =
-"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,\"action\":1.0148716},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
-"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.464624},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
-"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.439586},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";
-
+      "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,"
+      "\"action\":1.0148716},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\","
+      "\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{"
+      "\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
+      "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
+      "\"action\":12.464624},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\","
+      "\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{"
+      "\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
+      "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
+      "\"action\":12.439586},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\","
+      "\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{"
+      "\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";
 
   BOOST_CHECK_EQUAL(converted_json, expected_json);
 }

From 1b8d5c940da48da9cc2dbb6ebf8bb41ad4c12364 Mon Sep 17 00:00:00 2001
From: Griffin Bassman <griffinbassman@gmail.com>
Date: Tue, 24 Aug 2021 13:47:14 -0400
Subject: [PATCH 06/11] fix test

---
 .../unit_tests/test_log_converter.cc          | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc
index 4e12b37a5..051fdb3b7 100644
--- a/external_parser/unit_tests/test_log_converter.cc
+++ b/external_parser/unit_tests/test_log_converter.cc
@@ -153,17 +153,17 @@ BOOST_AUTO_TEST_CASE(ca_loop_simple) {
       get_json_event(infile_path, outfile_path, v2::ProblemType_CA);
   std::string expected_json =
       "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,"
-      "\"action\":1.0148716},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\","
-      "\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{"
-      "\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
+      "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T14:38:15."
+      "000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{"
+      "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
       "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
-      "\"action\":12.464624},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\","
-      "\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{"
-      "\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
+      "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T14:38:15."
+      "000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{"
+      "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
       "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
-      "\"action\":12.439586},\"Timestamp\":\"2021-08-24T15:31:57.0000000Z\","
-      "\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{"
-      "\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";
+      "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T14:38:15."
+      "000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{"
+      "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";
 
   BOOST_CHECK_EQUAL(converted_json, expected_json);
 }
@@ -176,15 +176,15 @@ BOOST_AUTO_TEST_CASE(ca_loop_simple_e2e) {
       get_json_event(infile_path, outfile_path, v2::ProblemType_CA);
   std::string expected_json =
       "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,"
-      "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T14:38:15."
+      "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T16:34:38."
       "000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{"
       "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
       "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
-      "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T14:38:15."
+      "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T16:34:38."
       "000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{"
       "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
       "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
-      "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T14:38:15."
+      "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T16:34:38."
       "000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{"
       "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";
 

From 208796aabd99cb963b84fdadd74dfcda4db441e5 Mon Sep 17 00:00:00 2001
From: Griffin Bassman <griffinbassman@gmail.com>
Date: Wed, 25 Aug 2021 12:13:01 -0400
Subject: [PATCH 07/11] test VW model for dsjson vs binary

---
 .../unit_tests/test_files/README.md           |   3 +-
 .../ca_loop_mixed_skip_learn.fb               | Bin 0 -> 2512 bytes
 .../ca_loop_mixed_skip_learn.json             |   3 ++
 .../{ca_loop_simple.log => ca_loop_simple.fb} | Bin
 .../valid_joined_logs/ca_loop_simple.json     |   3 ++
 .../ca_loop_skip_learn_e2e.log                | Bin 0 -> 1280 bytes
 .../unit_tests/test_log_converter.cc          |  16 +++++++-
 .../unit_tests/test_vw_external_parser.cc     |  38 ++++++++++++++++++
 test_tools/example_gen/example_gen.cc         |  17 ++++----
 9 files changed, 71 insertions(+), 9 deletions(-)
 create mode 100644 external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.fb
 create mode 100644 external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.json
 rename external_parser/unit_tests/test_files/valid_joined_logs/{ca_loop_simple.log => ca_loop_simple.fb} (100%)
 create mode 100644 external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.json
 create mode 100644 external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_skip_learn_e2e.log

diff --git a/external_parser/unit_tests/test_files/README.md b/external_parser/unit_tests/test_files/README.md
index ff5e245d6..ecbe17df4 100644
--- a/external_parser/unit_tests/test_files/README.md
+++ b/external_parser/unit_tests/test_files/README.md
@@ -25,7 +25,8 @@ Residing under `valid_joined_logs`
 - average_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind cb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
 - ccb_simple.log: generated by running `python joiner.py --problem_type_config 2` on the above files (ccb_v2.fb, fi-reward_v2.fb) and renaming the resulting default `merged.log`
 - ccb_sum_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind ccb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
-- ca_loop_simple.log: generated by running `./example_gen --kind ca-loop --count 3` and performing binary join with `python joiner.py --problem_type_config 4`
+- ca_loop_simple.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
+- ca_loop_mixed_skip_learn.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3 --deferred_action_count 1 --no_loop_actions` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
 - ca_loop_simple_e2e.log: generated in DS repo with `./example_gen --kind ca-loop --count 3 --config_file output/perfrlclient.json` and performing binary join with `python joiner.py --problem_type_config 4`
 - cb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 1 --learning_mode_config 1` with the files (cb_v2_size_5_apprentice.fb, f-reward_v2_size_5_apprentice.fb) and renaming the resulting default `merged.log`
 - cb_deferred_actions_w_activations_and_apprentice_10.[fb|json]: generated by running (with Apprentice mode enabled) `./example_gen --kind cb-loop --random_reward --count 10 --deferred_action_count 5` which should defer the first 5 actions and for those deferred actions, randomly send an activation (in practise without a seed, this sends 2 activations so we get a total of 7 learnable events) and performing binary joining
diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.fb b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.fb
new file mode 100644
index 0000000000000000000000000000000000000000..f6667f72fb366367f859ee43acef91e93fbd6dd1
GIT binary patch
literal 2512
zcmd5;IZFdk5T2OTtawGRh$6ZM3mcCJNj51IgD8lIh*u>Z*+4AZph+PT&`vO==nv3B
z)E{7JX>TJ~i<W|gx9c~1WS1Za7I6nA@0fXSzL}kwm)W_IgolV`KpBhz_(Ib(KgK(s
z*xb+=p@Y^Ms=!({<u{Ovm^Xo|!K-R|O<C{BY+_zjToZ{@3l_jUC#*=nr#nRC#aziH
zsSy;wob9o;K|Wm<a{;C_2}bxV-_G~34aQjqmT{!uBYME*FgR05bOoUV9*2=*0RMo4
z)8KRSobaM)mT%#X8vU*UWNd>mGh#+EZ0dIvW%cAUW!UnRVj{X6B&sJ#za;?OR;+aL
zO2%0HH9XyhzCpZJ`sJnAk`m#a69-1Yr5MLw?Koab?u_H31LKiPG48+FaolhzM)}>w
zm@qM3AWutc#_A!{ka2(Ddf<_3suoqw^{^+74)I<p_cXwD%)MZucZk|CZUv{*Wi>m7
zmXZwx<6FVhdU_?BR@Z{@sGQG3<H!0EBuJd}TYZTiwm;A9?#bO~I2rDXgrn@sxdY>w
zOEFG<+Hu@8G5$3#TGao^3unxwFWaAl>CY~{MQNUw^KHQMMzK&T8d{O*(t)qhh4>cn
zUR!VLZJSY7Zi=6XZsgayk^TSX(8T#u-ewVd?osn#tDo`9ZgjII>wWi3-tP>ud_fW7
O<?3n9yf_8HJbeQ^!{apo

literal 0
HcmV?d00001

diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.json b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.json
new file mode 100644
index 000000000..716ecb081
--- /dev/null
+++ b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_mixed_skip_learn.json
@@ -0,0 +1,3 @@
+{"_label_ca":{"cost":-1.5,"pdf_value":0.0005050505278632045,"action":1.014871597290039},"Timestamp":"2021-08-25T15:36:54.000000Z","Version":"1","EventId":"91f71c8","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"},"_skipLearn":true}
+{"_label_ca":{"cost":-1.5,"pdf_value":0.4755050539970398,"action":12.464624404907227},"Timestamp":"2021-08-25T15:36:54.000000Z","Version":"1","EventId":"75d50657","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}}
+{"_label_ca":{"cost":-1.5,"pdf_value":0.4755050539970398,"action":12.43958568572998},"Timestamp":"2021-08-25T15:36:54.000000Z","Version":"1","EventId":"e28a9ae6","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}}
diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.log b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.fb
similarity index 100%
rename from external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.log
rename to external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.fb
diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.json b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.json
new file mode 100644
index 000000000..12d55163c
--- /dev/null
+++ b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_simple.json
@@ -0,0 +1,3 @@
+{"_label_ca":{"cost":-1.5,"pdf_value":0.0005050505278632045,"action":1.014871597290039},"Timestamp":"2021-08-24T14:38:15.000000Z","Version":"1","EventId":"91f71c8","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}}
+{"_label_ca":{"cost":-1.5,"pdf_value":0.4755050539970398,"action":12.464624404907227},"Timestamp":"2021-08-24T14:38:15.000000Z","Version":"1","EventId":"75d50657","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}}
+{"_label_ca":{"cost":-1.5,"pdf_value":0.4755050539970398,"action":12.43958568572998},"Timestamp":"2021-08-24T14:38:15.000000Z","Version":"1","EventId":"e28a9ae6","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}}
diff --git a/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_skip_learn_e2e.log b/external_parser/unit_tests/test_files/valid_joined_logs/ca_loop_skip_learn_e2e.log
new file mode 100644
index 0000000000000000000000000000000000000000..e3b6d887386c4108b93ee6892818ca4471507c5d
GIT binary patch
literal 1280
zcmc(fK}!Nr5XZ+{+Er3CB0LC%rywl0Lz*6}2oDk=LFFMLvK1_FMeHCV6a-y6^Z`0Z
zg{L|M9X<3RI&_u3gth(W?XD}*VNdyBc=KlF&HUcu&D%f7ZhDAl52K1%g1i}q5yIS)
zl}I@4qh32e6IdrL{|EdE^cjc`a;BFnT5YM?fIc}`6Dc$Wksz!SRwVMoRU$8R?c$We
z7!l}PkE6{Yi&*FqN*UXL;=P=m^KlL4<oOVZo&eX!A@Y#6<$LcNhRvUQ42bw^_&5)9
zF%?&{&lH8u=OGFTyHH8SN(^KU(!AIifi-H|y$f)QI;)H1zsp*CFEgB5ThZcGtyba;
zGGt9HCDdY)j=vqYX^ZU^{!z9q*!Y}W!}JA_$K-~|GkPaP-HaPf-;Fy?<;UUtd@%3u
zKRI6;?l_-^IPb^6xqj}Bvw*j3ze#NjoG)p2oC-Q2Bs#?T17<DW?%*vtJyeH{-)RA2
z_Yijp_gM%QyNB{$R~hynX;O;g`48Ax-k>?m&DgGfqStfyO6qDX)r^&D<zl_8S7NC|
YveiPwpZzBd_g}~F?tDl_9Soj-0q|6kFaQ7m

literal 0
HcmV?d00001

diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc
index 051fdb3b7..82854a158 100644
--- a/external_parser/unit_tests/test_log_converter.cc
+++ b/external_parser/unit_tests/test_log_converter.cc
@@ -146,7 +146,7 @@ BOOST_AUTO_TEST_SUITE_END()
 
 BOOST_AUTO_TEST_SUITE(log_converter_ca_format)
 BOOST_AUTO_TEST_CASE(ca_loop_simple) {
-  std::string infile_path = "valid_joined_logs/ca_loop_simple.log";
+  std::string infile_path = "valid_joined_logs/ca_loop_simple.fb";
   std::string outfile_path = "valid_joined_logs/ca_loop_simple.dsjson";
 
   std::string converted_json =
@@ -191,6 +191,20 @@ BOOST_AUTO_TEST_CASE(ca_loop_simple_e2e) {
   BOOST_CHECK_EQUAL(converted_json, expected_json);
 }
 
+BOOST_AUTO_TEST_CASE(ca_loop_mixed_skip_learn) {
+  std::string infile_path = "valid_joined_logs/ca_loop_mixed_skip_learn.fb";
+  std::string outfile_path = "valid_joined_logs/ca_loop_mixed_skip_learn.dsjson";
+
+  std::string converted_json =
+      get_json_event(infile_path, outfile_path, v2::ProblemType_CA);
+  std::string expected_json =
+"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,\"action\":1.014871597290039},\"Timestamp\":\"2021-08-25T15:36:54.000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"},\"_skipLearn\":true}\n"
+"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.464624404907227},\"Timestamp\":\"2021-08-25T15:36:54.000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
+"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.43958568572998},\"Timestamp\":\"2021-08-25T15:36:54.000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";
+
+  BOOST_CHECK_EQUAL(converted_json, expected_json);
+}
+
 BOOST_AUTO_TEST_CASE(ca_deferred_action_without_activation) {
   std::string infile_path =
       "skip_learn/ca/deferred_action_without_activation.fb";
diff --git a/external_parser/unit_tests/test_vw_external_parser.cc b/external_parser/unit_tests/test_vw_external_parser.cc
index 639e06d1d..70719155a 100644
--- a/external_parser/unit_tests/test_vw_external_parser.cc
+++ b/external_parser/unit_tests/test_vw_external_parser.cc
@@ -255,6 +255,44 @@ BOOST_AUTO_TEST_CASE(ccb_compare_dsjson_with_fb_models) {
                                 buffer_dsjson_model.end());
 }
 
+BOOST_AUTO_TEST_CASE(ca_compare_dsjson_with_fb_models_simple) {
+  std::string input_files = get_test_files_location();
+
+  std::string model_name = input_files + "/test_outputs/m_average";
+
+  std::string file_name =
+      input_files + "/valid_joined_logs/ca_loop_simple";
+
+  generate_dsjson_and_fb_models(model_name, "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ", file_name);
+
+  // read the models and compare
+  auto buffer_fb_model = read_file(model_name + ".fb");
+  auto buffer_dsjson_model = read_file(model_name + ".json");
+
+  BOOST_CHECK_EQUAL_COLLECTIONS(buffer_fb_model.begin(), buffer_fb_model.end(),
+                                buffer_dsjson_model.begin(),
+                                buffer_dsjson_model.end());
+}
+
+BOOST_AUTO_TEST_CASE(ca_compare_dsjson_with_fb_models_mixed_skip_learn) {
+  std::string input_files = get_test_files_location();
+
+  std::string model_name = input_files + "/test_outputs/m_average";
+
+  std::string file_name =
+      input_files + "/valid_joined_logs/ca_loop_mixed_skip_learn";
+
+  generate_dsjson_and_fb_models(model_name, "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ", file_name);
+
+  // read the models and compare
+  auto buffer_fb_model = read_file(model_name + ".fb");
+  auto buffer_dsjson_model = read_file(model_name + ".json");
+
+  BOOST_CHECK_EQUAL_COLLECTIONS(buffer_fb_model.begin(), buffer_fb_model.end(),
+                                buffer_dsjson_model.begin(),
+                                buffer_dsjson_model.end());
+}
+
 BOOST_AUTO_TEST_CASE(rrcr_ignore_examples_before_checkpoint) {
   std::string input_files = get_test_files_location();
 
diff --git a/test_tools/example_gen/example_gen.cc b/test_tools/example_gen/example_gen.cc
index 3463ce5d8..6437df509 100644
--- a/test_tools/example_gen/example_gen.cc
+++ b/test_tools/example_gen/example_gen.cc
@@ -250,7 +250,7 @@ void send_ccb_outcome(std::mt19937& rng, bool gen_random_reward, const char * ev
       }
 }
 
-int take_action(r::live_model& rl, const char *event_id, int action, unsigned int action_flag, bool gen_random_reward, std::mt19937& rng) {
+int take_action(r::live_model& rl, const char *event_id, int action, unsigned int action_flag, bool gen_random_reward, std::mt19937& rng, bool no_loop_actions) {
   r::api_status status;
   float reward = gen_random_reward ? get_random_number(rng) : 1.5f;
 
@@ -422,7 +422,7 @@ int take_action(r::live_model& rl, const char *event_id, int action, unsigned in
             std::cout << status.get_error_msg() << std::endl;
       }
 
-      if (action_flag == r::action_flags::DEFERRED)
+      if (action_flag == r::action_flags::DEFERRED && !no_loop_actions)
       {
         size_t rand_num = get_random_number(rng, 0 /*min*/);
         if (rand_num % 2)
@@ -525,7 +525,7 @@ int pseudo_random(int seed) {
   return (int)(val & 0xFFFFFFFF);
 }
 
-int run_config(int action, int count, int initial_seed, bool gen_random_reward, bool enable_apprentice_mode, int deferred_action_count, std::string config_file, std::mt19937& rng, float epsilon = 0.0f) {
+int run_config(int action, int count, int initial_seed, bool gen_random_reward, bool enable_apprentice_mode, int deferred_action_count, std::string config_file, std::mt19937& rng, bool no_loop_actions, float epsilon = 0.0f) {
   u::configuration config;
 
   if (config_file.empty())
@@ -558,7 +558,7 @@ int run_config(int action, int count, int initial_seed, bool gen_random_reward,
     auto action_flag = i < deferred_action_count
       ? r::action_flags::DEFERRED : r::action_flags::DEFAULT;
 
-    int r = take_action(rl, event_id, action, action_flag, gen_random_reward, rng);
+    int r = take_action(rl, event_id, action, action_flag, gen_random_reward, rng, no_loop_actions);
     if(r)
       return r;
   }
@@ -577,6 +577,7 @@ int main(int argc, char *argv[]) {
   bool enable_apprentice_mode = false;
   int deferred_action_count = 0;
   float epsilon = 0.f;
+  bool no_loop_actions = false;
 
   desc.add_options()
     ("help", "Produce help message")
@@ -589,7 +590,8 @@ int main(int argc, char *argv[]) {
     ("random_reward", "Generate random float reward for observation event")
     ("config_file", po::value<std::string>(), "json config file for rlclinetlib")
     ("apprentice", "Enable apprentice mode")
-    ("deferred_action_count",  po::value<int>(), "Number of deferred action for interaction events. Set the deferred_action flag to true for first deferred_action_count number of actions");
+    ("deferred_action_count",  po::value<int>(), "Number of deferred action for interaction events. Set the deferred_action flag to true for first deferred_action_count number of actions")
+    ("no_loop_actions", "Flag to disable actions being taken for all outcome events");
 
   po::positional_options_description pd;
   pd.add("kind", 1);
@@ -602,6 +604,7 @@ int main(int argc, char *argv[]) {
     gen_random_reward = vm.count("random_reward");
     enable_apprentice_mode = vm.count("apprentice");
     enable_dedup = vm.count("dedup");
+    no_loop_actions = vm.count("no_loop_actions");
 
     std::vector<std::string> deferrable_interactions {
       "cb", "invalid-cb", "ccb", "ccb-baseline", "slates", "ca", "cb-loop", "ca-loop", 
@@ -644,7 +647,7 @@ int main(int argc, char *argv[]) {
 
   if(gen_all) {
     for(int i = 0; options[i]; ++i) {
-      if(run_config(i, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, epsilon))
+      if(run_config(i, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, no_loop_actions, epsilon))
         return -1;
     }
     return 0;
@@ -664,5 +667,5 @@ int main(int argc, char *argv[]) {
     return -1;
   }
 
-  return run_config(action, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, epsilon);
+  return run_config(action, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, no_loop_actions, epsilon);
 }
\ No newline at end of file

From b6c0efb1eaaad54fdfe995a79f99afded8e3c9f8 Mon Sep 17 00:00:00 2001
From: Griffin Bassman <griffinbassman@gmail.com>
Date: Wed, 25 Aug 2021 12:15:40 -0400
Subject: [PATCH 08/11] formatting

---
 .../unit_tests/test_log_converter.cc          |  19 ++-
 .../unit_tests/test_vw_external_parser.cc     |  13 +-
 test_tools/example_gen/example_gen.cc         | 134 +++++++++---------
 3 files changed, 93 insertions(+), 73 deletions(-)

diff --git a/external_parser/unit_tests/test_log_converter.cc b/external_parser/unit_tests/test_log_converter.cc
index 82854a158..b88fa32b5 100644
--- a/external_parser/unit_tests/test_log_converter.cc
+++ b/external_parser/unit_tests/test_log_converter.cc
@@ -193,14 +193,25 @@ BOOST_AUTO_TEST_CASE(ca_loop_simple_e2e) {
 
 BOOST_AUTO_TEST_CASE(ca_loop_mixed_skip_learn) {
   std::string infile_path = "valid_joined_logs/ca_loop_mixed_skip_learn.fb";
-  std::string outfile_path = "valid_joined_logs/ca_loop_mixed_skip_learn.dsjson";
+  std::string outfile_path =
+      "valid_joined_logs/ca_loop_mixed_skip_learn.dsjson";
 
   std::string converted_json =
       get_json_event(infile_path, outfile_path, v2::ProblemType_CA);
   std::string expected_json =
-"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,\"action\":1.014871597290039},\"Timestamp\":\"2021-08-25T15:36:54.000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"},\"_skipLearn\":true}\n"
-"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.464624404907227},\"Timestamp\":\"2021-08-25T15:36:54.000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
-"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,\"action\":12.43958568572998},\"Timestamp\":\"2021-08-25T15:36:54.000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";
+      "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,"
+      "\"action\":1.014871597290039},\"Timestamp\":\"2021-08-25T15:36:54."
+      "000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{"
+      "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/"
+      "A\"},\"_skipLearn\":true}\n"
+      "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
+      "\"action\":12.464624404907227},\"Timestamp\":\"2021-08-25T15:36:54."
+      "000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{"
+      "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
+      "{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
+      "\"action\":12.43958568572998},\"Timestamp\":\"2021-08-25T15:36:54."
+      "000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{"
+      "\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";
 
   BOOST_CHECK_EQUAL(converted_json, expected_json);
 }
diff --git a/external_parser/unit_tests/test_vw_external_parser.cc b/external_parser/unit_tests/test_vw_external_parser.cc
index 70719155a..12e5fc394 100644
--- a/external_parser/unit_tests/test_vw_external_parser.cc
+++ b/external_parser/unit_tests/test_vw_external_parser.cc
@@ -260,10 +260,12 @@ BOOST_AUTO_TEST_CASE(ca_compare_dsjson_with_fb_models_simple) {
 
   std::string model_name = input_files + "/test_outputs/m_average";
 
-  std::string file_name =
-      input_files + "/valid_joined_logs/ca_loop_simple";
+  std::string file_name = input_files + "/valid_joined_logs/ca_loop_simple";
 
-  generate_dsjson_and_fb_models(model_name, "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ", file_name);
+  generate_dsjson_and_fb_models(
+      model_name,
+      "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ",
+      file_name);
 
   // read the models and compare
   auto buffer_fb_model = read_file(model_name + ".fb");
@@ -282,7 +284,10 @@ BOOST_AUTO_TEST_CASE(ca_compare_dsjson_with_fb_models_mixed_skip_learn) {
   std::string file_name =
       input_files + "/valid_joined_logs/ca_loop_mixed_skip_learn";
 
-  generate_dsjson_and_fb_models(model_name, "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ", file_name);
+  generate_dsjson_and_fb_models(
+      model_name,
+      "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ",
+      file_name);
 
   // read the models and compare
   auto buffer_fb_model = read_file(model_name + ".fb");
diff --git a/test_tools/example_gen/example_gen.cc b/test_tools/example_gen/example_gen.cc
index 6437df509..030a64466 100644
--- a/test_tools/example_gen/example_gen.cc
+++ b/test_tools/example_gen/example_gen.cc
@@ -21,31 +21,19 @@ namespace po = boost::program_options;
 //global var, yeah ugg
 bool enable_dedup = false;
 
-static const char *options[] = {
-  "cb",
-  "invalid-cb",
-  "ccb",
-  "ccb-with-slot-id",
-  "ccb-baseline",
-  "slates",
-  "ca",
-  "f-reward",
-  "fi-reward",
-  "fi-out-of-bound-reward",
-  "fs-reward",
-  "fmix-reward",
-  "s-reward",
-  "si-reward",
-  "ss-reward",
-  "action-taken",
-  "cb-loop",
-  "ca-loop",
-  "ccb-loop",
-  "ccb-baseline-loop",
-  nullptr
-};
-
-enum options{
+static const char *options[] = {"cb",           "invalid-cb",
+                                "ccb",          "ccb-with-slot-id",
+                                "ccb-baseline", "slates",
+                                "ca",           "f-reward",
+                                "fi-reward",    "fi-out-of-bound-reward",
+                                "fs-reward",    "fmix-reward",
+                                "s-reward",     "si-reward",
+                                "ss-reward",    "action-taken",
+                                "cb-loop",      "ca-loop",
+                                "ccb-loop",     "ccb-baseline-loop",
+                                nullptr};
+
+enum options {
   CB_ACTION,
   INVALID_CB_ACTION,
   CCB_ACTION,
@@ -117,9 +105,7 @@ void load_config_from_json(int action, u::configuration& config, bool enable_app
   } else if (action == SLATES_ACTION) {
     std::string args = "--slates --ccb_explore_adf --json --quiet --epsilon " + std::to_string(epsilon) + " --first_only --id N/A";
     config.set(r::name::MODEL_VW_INITIAL_COMMAND_LINE, args.c_str());
-  }
-  else if (action == CA_ACTION || action == CA_LOOP)
-  {
+  } else if (action == CA_ACTION || action == CA_LOOP) {
     config.set(r::name::MODEL_VW_INITIAL_COMMAND_LINE, "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --json --quiet --id N/A");
   }
 }
@@ -250,7 +236,9 @@ void send_ccb_outcome(std::mt19937& rng, bool gen_random_reward, const char * ev
       }
 }
 
-int take_action(r::live_model& rl, const char *event_id, int action, unsigned int action_flag, bool gen_random_reward, std::mt19937& rng, bool no_loop_actions) {
+int take_action(r::live_model &rl, const char *event_id, int action,
+                unsigned int action_flag, bool gen_random_reward,
+                std::mt19937 &rng, bool no_loop_actions) {
   r::api_status status;
   float reward = gen_random_reward ? get_random_number(rng) : 1.5f;
 
@@ -409,32 +397,32 @@ int take_action(r::live_model& rl, const char *event_id, int action, unsigned in
 
       break;
     };
-    case CA_LOOP: {// "ca_loop",
+    case CA_LOOP: { // "ca_loop",
       r::continuous_action_response response;
-      if(rl.request_continuous_action(event_id, JSON_CA_CONTEXT, action_flag, response, &status) != err::success)
-          std::cout << status.get_error_msg() << std::endl;
+      if (rl.request_continuous_action(event_id, JSON_CA_CONTEXT, action_flag,
+                                       response, &status) != err::success)
+        std::cout << status.get_error_msg() << std::endl;
       size_t num_of_rewards = get_random_number(rng);
-      for (size_t i = 0; i < num_of_rewards; i++)
-      {
+      for (size_t i = 0; i < num_of_rewards; i++) {
         float reward = gen_random_reward ? get_random_number(rng, 0) : 1.5f;
-        std::cout << "report outcome: " << reward << " for event: " << event_id << std::endl;
-        if( rl.report_outcome(event_id, reward, &status) != err::success )
-            std::cout << status.get_error_msg() << std::endl;
+        std::cout << "report outcome: " << reward << " for event: " << event_id
+                  << std::endl;
+        if (rl.report_outcome(event_id, reward, &status) != err::success)
+          std::cout << status.get_error_msg() << std::endl;
       }
 
-      if (action_flag == r::action_flags::DEFERRED && !no_loop_actions)
-      {
+      if (action_flag == r::action_flags::DEFERRED && !no_loop_actions) {
         size_t rand_num = get_random_number(rng, 0 /*min*/);
-        if (rand_num % 2)
-        {
+        if (rand_num % 2) {
           // send activation
-          std::cout << "sending activation for event_id: " << event_id << std::endl;
-          if (rl.report_action_taken(event_id, &status) != err::success ) {
+          std::cout << "sending activation for event_id: " << event_id
+                    << std::endl;
+          if (rl.report_action_taken(event_id, &status) != err::success) {
             std::cout << status.get_error_msg() << std::endl;
           }
         }
       }
-      
+
       break;
     };
     case CCB_LOOP: { // "ccb action and random number of float rewards and mix of slot ids / non slot ids / float / string rewards"
@@ -525,7 +513,10 @@ int pseudo_random(int seed) {
   return (int)(val & 0xFFFFFFFF);
 }
 
-int run_config(int action, int count, int initial_seed, bool gen_random_reward, bool enable_apprentice_mode, int deferred_action_count, std::string config_file, std::mt19937& rng, bool no_loop_actions, float epsilon = 0.0f) {
+int run_config(int action, int count, int initial_seed, bool gen_random_reward,
+               bool enable_apprentice_mode, int deferred_action_count,
+               std::string config_file, std::mt19937 &rng, bool no_loop_actions,
+               float epsilon = 0.0f) {
   u::configuration config;
 
   if (config_file.empty())
@@ -558,7 +549,8 @@ int run_config(int action, int count, int initial_seed, bool gen_random_reward,
     auto action_flag = i < deferred_action_count
       ? r::action_flags::DEFERRED : r::action_flags::DEFAULT;
 
-    int r = take_action(rl, event_id, action, action_flag, gen_random_reward, rng, no_loop_actions);
+    int r = take_action(rl, event_id, action, action_flag, gen_random_reward,
+                        rng, no_loop_actions);
     if(r)
       return r;
   }
@@ -579,19 +571,26 @@ int main(int argc, char *argv[]) {
   float epsilon = 0.f;
   bool no_loop_actions = false;
 
-  desc.add_options()
-    ("help", "Produce help message")
-    ("all", "use all args")
-    ("dedup", "Enable dedup/zstd")
-    ("count", po::value<int>(), "Number of events to produce")
-    ("seed", po::value<int>(), "Initial seed used to produce event ids")
-    ("epsilon", po::value<float>(), "epsilon to be used in command line args for VW")
-    ("kind", po::value<std::string>(), "which kind of example to generate (cb,invalid-cb,ccb,ccb-with-slot-id,ccb-baseline,slates,ca,cb-loop,ca-loop,ccb-loop,ccb-baseline-loop,(f|s)(s|i|mix|i-out-of-bound)?-reward,action-taken)")
-    ("random_reward", "Generate random float reward for observation event")
-    ("config_file", po::value<std::string>(), "json config file for rlclinetlib")
-    ("apprentice", "Enable apprentice mode")
-    ("deferred_action_count",  po::value<int>(), "Number of deferred action for interaction events. Set the deferred_action flag to true for first deferred_action_count number of actions")
-    ("no_loop_actions", "Flag to disable actions being taken for all outcome events");
+  desc.add_options()("help", "Produce help message")("all", "use all args")(
+      "dedup", "Enable dedup/zstd")("count", po::value<int>(),
+                                    "Number of events to produce")(
+      "seed", po::value<int>(), "Initial seed used to produce event ids")(
+      "epsilon", po::value<float>(),
+      "epsilon to be used in command line args for VW")(
+      "kind", po::value<std::string>(),
+      "which kind of example to generate "
+      "(cb,invalid-cb,ccb,ccb-with-slot-id,ccb-baseline,slates,ca,cb-loop,ca-"
+      "loop,ccb-loop,ccb-baseline-loop,(f|s)(s|i|mix|i-out-of-bound)?-reward,"
+      "action-taken)")("random_reward",
+                       "Generate random float reward for observation event")(
+      "config_file", po::value<std::string>(),
+      "json config file for rlclinetlib")("apprentice",
+                                          "Enable apprentice mode")(
+      "deferred_action_count", po::value<int>(),
+      "Number of deferred action for interaction events. Set the "
+      "deferred_action flag to true for first deferred_action_count number of "
+      "actions")("no_loop_actions",
+                 "Flag to disable actions being taken for all outcome events");
 
   po::positional_options_description pd;
   pd.add("kind", 1);
@@ -606,10 +605,11 @@ int main(int argc, char *argv[]) {
     enable_dedup = vm.count("dedup");
     no_loop_actions = vm.count("no_loop_actions");
 
-    std::vector<std::string> deferrable_interactions {
-      "cb", "invalid-cb", "ccb", "ccb-baseline", "slates", "ca", "cb-loop", "ca-loop", 
-      "ccb-with-slot-id", "ccb-loop", "ccb-baseline-loop"
-    };
+    std::vector<std::string> deferrable_interactions{
+        "cb",           "invalid-cb",       "ccb",
+        "ccb-baseline", "slates",           "ca",
+        "cb-loop",      "ca-loop",          "ccb-with-slot-id",
+        "ccb-loop",     "ccb-baseline-loop"};
 
     if(vm.count("kind") > 0)
       action_name = vm["kind"].as<std::string>();
@@ -647,7 +647,9 @@ int main(int argc, char *argv[]) {
 
   if(gen_all) {
     for(int i = 0; options[i]; ++i) {
-      if(run_config(i, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, no_loop_actions, epsilon))
+      if (run_config(i, count, seed, gen_random_reward, enable_apprentice_mode,
+                     deferred_action_count, config_file, rng, no_loop_actions,
+                     epsilon))
         return -1;
     }
     return 0;
@@ -667,5 +669,7 @@ int main(int argc, char *argv[]) {
     return -1;
   }
 
-  return run_config(action, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, no_loop_actions, epsilon);
+  return run_config(action, count, seed, gen_random_reward,
+                    enable_apprentice_mode, deferred_action_count, config_file,
+                    rng, no_loop_actions, epsilon);
 }
\ No newline at end of file

From e4f8cbed8ee954906d12c85b40e0242b1b54d23f Mon Sep 17 00:00:00 2001
From: Griffin Bassman <griffinbassman@gmail.com>
Date: Fri, 27 Aug 2021 11:15:12 -0400
Subject: [PATCH 09/11] commented changes

---
 .../unit_tests/test_files/README.md           |  2 +-
 test_tools/example_gen/example_gen.cc         | 38 ++++++++++---------
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/external_parser/unit_tests/test_files/README.md b/external_parser/unit_tests/test_files/README.md
index ecbe17df4..af1f90c7a 100644
--- a/external_parser/unit_tests/test_files/README.md
+++ b/external_parser/unit_tests/test_files/README.md
@@ -27,7 +27,7 @@ Residing under `valid_joined_logs`
 - ccb_sum_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind ccb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
 - ca_loop_simple.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
 - ca_loop_mixed_skip_learn.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3 --deferred_action_count 1 --no_loop_actions` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
-- ca_loop_simple_e2e.log: generated in DS repo with `./example_gen --kind ca-loop --count 3 --config_file output/perfrlclient.json` and performing binary join with `python joiner.py --problem_type_config 4`
+- ca_loop_simple_e2e.log: generated by running `./example_gen --kind ca-loop --count 3 --config_file output/perfrlclient.json` and performing binary join with `python joiner.py --problem_type_config 4`
 - cb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 1 --learning_mode_config 1` with the files (cb_v2_size_5_apprentice.fb, f-reward_v2_size_5_apprentice.fb) and renaming the resulting default `merged.log`
 - cb_deferred_actions_w_activations_and_apprentice_10.[fb|json]: generated by running (with Apprentice mode enabled) `./example_gen --kind cb-loop --random_reward --count 10 --deferred_action_count 5` which should defer the first 5 actions and for those deferred actions, randomly send an activation (in practise without a seed, this sends 2 activations so we get a total of 7 learnable events) and performing binary joining
 - ccb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 2 --learning_mode_config 1` with the files (ccb-baseline-loopinteractions_v2.fb, ccb-baseline-loopobservations_v2.fb) and renaming the resulting default `merged.log`
diff --git a/test_tools/example_gen/example_gen.cc b/test_tools/example_gen/example_gen.cc
index 030a64466..098c79cb7 100644
--- a/test_tools/example_gen/example_gen.cc
+++ b/test_tools/example_gen/example_gen.cc
@@ -238,7 +238,7 @@ void send_ccb_outcome(std::mt19937& rng, bool gen_random_reward, const char * ev
 
 int take_action(r::live_model &rl, const char *event_id, int action,
                 unsigned int action_flag, bool gen_random_reward,
-                std::mt19937 &rng, bool no_loop_actions) {
+                std::mt19937 &rng, float activation_ratio) {
   r::api_status status;
   float reward = gen_random_reward ? get_random_number(rng) : 1.5f;
 
@@ -384,8 +384,8 @@ int take_action(r::live_model &rl, const char *event_id, int action,
 
       if (action_flag == r::action_flags::DEFERRED)
       {
-        size_t rand_num = get_random_number(rng, 0 /*min*/);
-        if (rand_num % 2)
+        float rand_float = rand()/float(RAND_MAX);
+        if (rand_float < activation_ratio)
         {
           // send activation
           std::cout << "sending activation for event_id: " << event_id << std::endl;
@@ -411,9 +411,10 @@ int take_action(r::live_model &rl, const char *event_id, int action,
           std::cout << status.get_error_msg() << std::endl;
       }
 
-      if (action_flag == r::action_flags::DEFERRED && !no_loop_actions) {
-        size_t rand_num = get_random_number(rng, 0 /*min*/);
-        if (rand_num % 2) {
+      if (action_flag == r::action_flags::DEFERRED) {
+        float rand_float = rand()/float(RAND_MAX);
+        if (rand_float < activation_ratio)
+        {
           // send activation
           std::cout << "sending activation for event_id: " << event_id
                     << std::endl;
@@ -447,8 +448,8 @@ int take_action(r::live_model &rl, const char *event_id, int action,
 
       if (action_flag == r::action_flags::DEFERRED)
       {
-        size_t rand_num = get_random_number(rng, 0 /*min*/);
-        if (rand_num % 2)
+        float rand_float = rand()/float(RAND_MAX);
+        if (rand_float < activation_ratio)
         {
           // send activation
           std::cout << "sending activation for event_id: " << event_id << std::endl;
@@ -483,8 +484,8 @@ int take_action(r::live_model &rl, const char *event_id, int action,
 
       if (action_flag == r::action_flags::DEFERRED)
       {
-        size_t rand_num = get_random_number(rng, 0 /*min*/);
-        if (rand_num % 2)
+        float rand_float = rand()/float(RAND_MAX);
+        if (rand_float < activation_ratio)
         {
           // send activation
           std::cout << "sending activation for event_id: " << event_id << std::endl;
@@ -515,7 +516,7 @@ int pseudo_random(int seed) {
 
 int run_config(int action, int count, int initial_seed, bool gen_random_reward,
                bool enable_apprentice_mode, int deferred_action_count,
-               std::string config_file, std::mt19937 &rng, bool no_loop_actions,
+               std::string config_file, std::mt19937 &rng, float activation_ratio,
                float epsilon = 0.0f) {
   u::configuration config;
 
@@ -550,7 +551,7 @@ int run_config(int action, int count, int initial_seed, bool gen_random_reward,
       ? r::action_flags::DEFERRED : r::action_flags::DEFAULT;
 
     int r = take_action(rl, event_id, action, action_flag, gen_random_reward,
-                        rng, no_loop_actions);
+                        rng, activation_ratio);
     if(r)
       return r;
   }
@@ -569,7 +570,7 @@ int main(int argc, char *argv[]) {
   bool enable_apprentice_mode = false;
   int deferred_action_count = 0;
   float epsilon = 0.f;
-  bool no_loop_actions = false;
+  float activation_ratio = 0.5f;
 
   desc.add_options()("help", "Produce help message")("all", "use all args")(
       "dedup", "Enable dedup/zstd")("count", po::value<int>(),
@@ -589,8 +590,8 @@ int main(int argc, char *argv[]) {
       "deferred_action_count", po::value<int>(),
       "Number of deferred action for interaction events. Set the "
       "deferred_action flag to true for first deferred_action_count number of "
-      "actions")("no_loop_actions",
-                 "Flag to disable actions being taken for all outcome events");
+      "actions")(
+      "activation_ratio", po::value<float>(), "Percent of observations to activate in loop generators");
 
   po::positional_options_description pd;
   pd.add("kind", 1);
@@ -603,7 +604,6 @@ int main(int argc, char *argv[]) {
     gen_random_reward = vm.count("random_reward");
     enable_apprentice_mode = vm.count("apprentice");
     enable_dedup = vm.count("dedup");
-    no_loop_actions = vm.count("no_loop_actions");
 
     std::vector<std::string> deferrable_interactions{
         "cb",           "invalid-cb",       "ccb",
@@ -623,6 +623,8 @@ int main(int argc, char *argv[]) {
       config_file = vm["config_file"].as<std::string>();
     if(vm.count("deferred_action_count") > 0)
       deferred_action_count = vm["deferred_action_count"].as<int>();
+    if(vm.count("activation_ratio") > 0)
+      activation_ratio = vm["activation_ratio"].as<float>();
 
     if(vm.count("deferred_action_count") > 0 && !std::any_of(
       deferrable_interactions.begin(),
@@ -648,7 +650,7 @@ int main(int argc, char *argv[]) {
   if(gen_all) {
     for(int i = 0; options[i]; ++i) {
       if (run_config(i, count, seed, gen_random_reward, enable_apprentice_mode,
-                     deferred_action_count, config_file, rng, no_loop_actions,
+                     deferred_action_count, config_file, rng, activation_ratio,
                      epsilon))
         return -1;
     }
@@ -671,5 +673,5 @@ int main(int argc, char *argv[]) {
 
   return run_config(action, count, seed, gen_random_reward,
                     enable_apprentice_mode, deferred_action_count, config_file,
-                    rng, no_loop_actions, epsilon);
+                    rng, activation_ratio, epsilon);
 }
\ No newline at end of file

From 888a8e2dd4792d53505575b281017fc319d1f8fe Mon Sep 17 00:00:00 2001
From: Griffin Bassman <griffinbassman@gmail.com>
Date: Fri, 27 Aug 2021 12:17:44 -0400
Subject: [PATCH 10/11] fix bug

---
 external_parser/unit_tests/test_vw_external_parser.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external_parser/unit_tests/test_vw_external_parser.cc b/external_parser/unit_tests/test_vw_external_parser.cc
index c2ab55f2b..94574221b 100644
--- a/external_parser/unit_tests/test_vw_external_parser.cc
+++ b/external_parser/unit_tests/test_vw_external_parser.cc
@@ -239,7 +239,7 @@ BOOST_AUTO_TEST_CASE(ccb_compare_dsjson_with_fb_models) {
   generate_dsjson_and_fb_models(model_name, "--ccb_explore_adf ", file_name);
 
   // read the models and compare
-  auto bufffb_model = read_file(model_name + ".fb");
+  auto buffer_fb_model = read_file(model_name + ".fb");
   auto buffer_dsjson_model = read_file(model_name + ".json");
 
   BOOST_CHECK_EQUAL_COLLECTIONS(buffer_fb_model.begin(), buffer_fb_model.end(),

From b7aa258810728f341d7bcb15cc0a12919212e7cf Mon Sep 17 00:00:00 2001
From: Griffin Bassman <griffinbassman@gmail.com>
Date: Fri, 27 Aug 2021 12:29:55 -0400
Subject: [PATCH 11/11] revert activation_ratio arg

---
 .../unit_tests/test_files/README.md           |  2 +-
 test_tools/example_gen/example_gen.cc         | 34 ++++++++-----------
 2 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/external_parser/unit_tests/test_files/README.md b/external_parser/unit_tests/test_files/README.md
index 766990d44..cf2f3a51b 100644
--- a/external_parser/unit_tests/test_files/README.md
+++ b/external_parser/unit_tests/test_files/README.md
@@ -26,7 +26,7 @@ Residing under `valid_joined_logs`
 - ccb_simple.log: generated by running `python joiner.py --problem_type_config 2` on the above files (ccb_v2.fb, fi-reward_v2.fb) and renaming the resulting default `merged.log`
 - ccb_sum_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind ccb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
 - ca_loop_simple.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
-- ca_loop_mixed_skip_learn.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3 --deferred_action_count 1 --activation_ratio 0` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
+- ca_loop_mixed_skip_learn.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3 --deferred_action_count 1` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
 - ca_loop_simple_e2e.log: generated by running `./example_gen --kind ca-loop --count 3 --config_file output/perfrlclient.json` and performing binary join with `python joiner.py --problem_type_config 4`
 - cb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 1 --learning_mode_config 1` with the files (cb_v2_size_5_apprentice.fb, f-reward_v2_size_5_apprentice.fb) and renaming the resulting default `merged.log`
 - cb_deferred_actions_w_activations_and_apprentice_10.[fb|json]: generated by running (with Apprentice mode enabled) `./example_gen --kind cb-loop --random_reward --count 10 --deferred_action_count 5` which should defer the first 5 actions and for those deferred actions, randomly send an activation (in practise without a seed, this sends 2 activations so we get a total of 7 learnable events) and performing binary joining
diff --git a/test_tools/example_gen/example_gen.cc b/test_tools/example_gen/example_gen.cc
index a5519a949..111a81f26 100644
--- a/test_tools/example_gen/example_gen.cc
+++ b/test_tools/example_gen/example_gen.cc
@@ -252,7 +252,7 @@ void send_ccb_outcome(std::mt19937& rng, bool gen_random_reward, const char * ev
 
 int take_action(r::live_model &rl, const char *event_id, int action,
                 unsigned int action_flag, bool gen_random_reward,
-                std::mt19937 &rng, float activation_ratio) {
+                std::mt19937 &rng) {
   r::api_status status;
   float reward = gen_random_reward ? get_random_number(rng) : 1.5f;
 
@@ -398,8 +398,8 @@ int take_action(r::live_model &rl, const char *event_id, int action,
 
       if (action_flag == r::action_flags::DEFERRED)
       {
-        float rand_float = rand()/float(RAND_MAX);
-        if (rand_float < activation_ratio)
+        size_t rand_num = get_random_number(rng, 0 /*min*/);
+        if (rand_num % 2)
         {
           // send activation
           std::cout << "sending activation for event_id: " << event_id << std::endl;
@@ -426,8 +426,8 @@ int take_action(r::live_model &rl, const char *event_id, int action,
       }
 
       if (action_flag == r::action_flags::DEFERRED) {
-        float rand_float = rand()/float(RAND_MAX);
-        if (rand_float < activation_ratio)
+        size_t rand_num = get_random_number(rng, 0 /*min*/);
+        if (rand_num % 2)
         {
           // send activation
           std::cout << "sending activation for event_id: " << event_id
@@ -462,8 +462,8 @@ int take_action(r::live_model &rl, const char *event_id, int action,
 
       if (action_flag == r::action_flags::DEFERRED)
       {
-        float rand_float = rand()/float(RAND_MAX);
-        if (rand_float < activation_ratio)
+        size_t rand_num = get_random_number(rng, 0 /*min*/);
+        if (rand_num % 2)
         {
           // send activation
           std::cout << "sending activation for event_id: " << event_id << std::endl;
@@ -498,8 +498,8 @@ int take_action(r::live_model &rl, const char *event_id, int action,
 
       if (action_flag == r::action_flags::DEFERRED)
       {
-        float rand_float = rand()/float(RAND_MAX);
-        if (rand_float < activation_ratio)
+        size_t rand_num = get_random_number(rng, 0 /*min*/);
+        if (rand_num % 2)
         {
           // send activation
           std::cout << "sending activation for event_id: " << event_id << std::endl;
@@ -560,8 +560,7 @@ int pseudo_random(int seed) {
 
 int run_config(int action, int count, int initial_seed, bool gen_random_reward,
                bool enable_apprentice_mode, int deferred_action_count,
-               std::string config_file, std::mt19937 &rng, float activation_ratio,
-               float epsilon = 0.0f) {
+               std::string config_file, std::mt19937 &rng, float epsilon = 0.0f) {
   u::configuration config;
 
   if (config_file.empty())
@@ -595,7 +594,7 @@ int run_config(int action, int count, int initial_seed, bool gen_random_reward,
       ? r::action_flags::DEFERRED : r::action_flags::DEFAULT;
 
     int r = take_action(rl, event_id, action, action_flag, gen_random_reward,
-                        rng, activation_ratio);
+                        rng);
     if(r)
       return r;
   }
@@ -614,7 +613,6 @@ int main(int argc, char *argv[]) {
   bool enable_apprentice_mode = false;
   int deferred_action_count = 0;
   float epsilon = 0.f;
-  float activation_ratio = 0.5f;
 
   desc.add_options()
     ("help", "Produce help message")
@@ -627,8 +625,7 @@ int main(int argc, char *argv[]) {
     ("random_reward", "Generate random float reward for observation event")
     ("config_file", po::value<std::string>(), "json config file for rlclinetlib")
     ("apprentice", "Enable apprentice mode")
-    ("deferred_action_count",  po::value<int>(), "Number of deferred action for interaction events. Set the deferred_action flag to true for first deferred_action_count number of actions")
-    ("activation_ratio", po::value<float>(), "Percent of observations to activate in loop generators");
+    ("deferred_action_count",  po::value<int>(), "Number of deferred action for interaction events. Set the deferred_action flag to true for first deferred_action_count number of actions");
 
   po::positional_options_description pd;
   pd.add("kind", 1);
@@ -659,8 +656,6 @@ int main(int argc, char *argv[]) {
       config_file = vm["config_file"].as<std::string>();
     if(vm.count("deferred_action_count") > 0)
       deferred_action_count = vm["deferred_action_count"].as<int>();
-    if(vm.count("activation_ratio") > 0)
-      activation_ratio = vm["activation_ratio"].as<float>();
 
     if(vm.count("deferred_action_count") > 0 && !std::any_of(
       deferrable_interactions.begin(),
@@ -686,8 +681,7 @@ int main(int argc, char *argv[]) {
   if(gen_all) {
     for(int i = 0; options[i]; ++i) {
       if (run_config(i, count, seed, gen_random_reward, enable_apprentice_mode,
-                     deferred_action_count, config_file, rng, activation_ratio,
-                     epsilon))
+                     deferred_action_count, config_file, rng, epsilon))
         return -1;
     }
     return 0;
@@ -709,5 +703,5 @@ int main(int argc, char *argv[]) {
 
   return run_config(action, count, seed, gen_random_reward,
                     enable_apprentice_mode, deferred_action_count, config_file,
-                    rng, activation_ratio, epsilon);
+                    rng, epsilon);
 }
\ No newline at end of file