Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions external_parser/unit_tests/test_files/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ Residing under `valid_joined_logs`
- average_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind cb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
- ccb_simple.log: generated by running `python joiner.py --problem_type_config 2` on the above files (ccb_v2.fb, fi-reward_v2.fb) and renaming the resulting default `merged.log`
- ccb_sum_reward_100_interactions.[fb|json]: generated by running `./example_gen --kind ccb-loop --random_reward --count 100` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
- ca_loop_simple.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
- ca_loop_mixed_skip_learn.[fb|json]: generated by running `./example_gen --kind ca-loop --count 3 --deferred_action_count 1 --no_loop_actions` and performing binary joining and dsjson joining (with average reward) to generate each file (`.fb` and `.json`)
- ca_loop_simple_e2e.log: generated in DS repo with `./example_gen --kind ca-loop --count 3 --config_file output/perfrlclient.json` and performing binary join with `python joiner.py --problem_type_config 4`
- cb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 1 --learning_mode_config 1` with the files (cb_v2_size_5_apprentice.fb, f-reward_v2_size_5_apprentice.fb) and renaming the resulting default `merged.log`
- cb_deferred_actions_w_activations_and_apprentice_10.[fb|json]: generated by running (with Apprentice mode enabled) `./example_gen --kind cb-loop --random_reward --count 10 --deferred_action_count 5` which should defer the first 5 actions and for those deferred actions, randomly send an activation (in practise without a seed, this sends 2 activations so we get a total of 7 learnable events) and performing binary joining
- ccb_apprentice_5.log: generated by running `python joiner.py --problem_type_config 2 --learning_mode_config 1` with the files (ccb-baseline-loopinteractions_v2.fb, ccb-baseline-loopobservations_v2.fb) and renaming the resulting default `merged.log`
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"_label_ca":{"cost":-1.5,"pdf_value":0.0005050505278632045,"action":1.014871597290039},"Timestamp":"2021-08-25T15:36:54.000000Z","Version":"1","EventId":"91f71c8","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"},"_skipLearn":true}
{"_label_ca":{"cost":-1.5,"pdf_value":0.4755050539970398,"action":12.464624404907227},"Timestamp":"2021-08-25T15:36:54.000000Z","Version":"1","EventId":"75d50657","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}}
{"_label_ca":{"cost":-1.5,"pdf_value":0.4755050539970398,"action":12.43958568572998},"Timestamp":"2021-08-25T15:36:54.000000Z","Version":"1","EventId":"e28a9ae6","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"_label_ca":{"cost":-1.5,"pdf_value":0.0005050505278632045,"action":1.014871597290039},"Timestamp":"2021-08-24T14:38:15.000000Z","Version":"1","EventId":"91f71c8","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}}
{"_label_ca":{"cost":-1.5,"pdf_value":0.4755050539970398,"action":12.464624404907227},"Timestamp":"2021-08-24T14:38:15.000000Z","Version":"1","EventId":"75d50657","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}}
{"_label_ca":{"cost":-1.5,"pdf_value":0.4755050539970398,"action":12.43958568572998},"Timestamp":"2021-08-24T14:38:15.000000Z","Version":"1","EventId":"e28a9ae6","c":{"RobotJoint1":{"friction":78}},"VWState":{"m":"N/A"}}
Binary file not shown.
Binary file not shown.
71 changes: 71 additions & 0 deletions external_parser/unit_tests/test_log_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,77 @@ BOOST_AUTO_TEST_CASE(ccb_payload_with_slot_id) {
BOOST_AUTO_TEST_SUITE_END()

BOOST_AUTO_TEST_SUITE(log_converter_ca_format)
BOOST_AUTO_TEST_CASE(ca_loop_simple) {
std::string infile_path = "valid_joined_logs/ca_loop_simple.fb";
std::string outfile_path = "valid_joined_logs/ca_loop_simple.dsjson";

std::string converted_json =
get_json_event(infile_path, outfile_path, v2::ProblemType_CA);
std::string expected_json =
"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,"
"\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T14:38:15."
"000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{"
"\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
"\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T14:38:15."
"000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{"
"\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
"\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T14:38:15."
"000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{"
"\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";

BOOST_CHECK_EQUAL(converted_json, expected_json);
}

BOOST_AUTO_TEST_CASE(ca_loop_simple_e2e) {
std::string infile_path = "valid_joined_logs/ca_loop_simple_e2e.log";
std::string outfile_path = "valid_joined_logs/ca_loop_simple_e2e.dsjson";

std::string converted_json =
get_json_event(infile_path, outfile_path, v2::ProblemType_CA);
std::string expected_json =
"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,"
"\"action\":1.014871597290039},\"Timestamp\":\"2021-08-24T16:34:38."
"000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{"
"\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
"\"action\":12.464624404907227},\"Timestamp\":\"2021-08-24T16:34:38."
"000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{"
"\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
"\"action\":12.43958568572998},\"Timestamp\":\"2021-08-24T16:34:38."
"000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{"
"\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";

BOOST_CHECK_EQUAL(converted_json, expected_json);
}

BOOST_AUTO_TEST_CASE(ca_loop_mixed_skip_learn) {
std::string infile_path = "valid_joined_logs/ca_loop_mixed_skip_learn.fb";
std::string outfile_path =
"valid_joined_logs/ca_loop_mixed_skip_learn.dsjson";

std::string converted_json =
get_json_event(infile_path, outfile_path, v2::ProblemType_CA);
std::string expected_json =
"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.0005050505278632045,"
"\"action\":1.014871597290039},\"Timestamp\":\"2021-08-25T15:36:54."
"000000Z\",\"Version\":\"1\",\"EventId\":\"91f71c8\",\"c\":{"
"\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/"
"A\"},\"_skipLearn\":true}\n"
"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
"\"action\":12.464624404907227},\"Timestamp\":\"2021-08-25T15:36:54."
"000000Z\",\"Version\":\"1\",\"EventId\":\"75d50657\",\"c\":{"
"\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n"
"{\"_label_ca\":{\"cost\":-1.5,\"pdf_value\":0.4755050539970398,"
"\"action\":12.43958568572998},\"Timestamp\":\"2021-08-25T15:36:54."
"000000Z\",\"Version\":\"1\",\"EventId\":\"e28a9ae6\",\"c\":{"
"\"RobotJoint1\":{\"friction\":78}},\"VWState\":{\"m\":\"N/A\"}}\n";

BOOST_CHECK_EQUAL(converted_json, expected_json);
}

BOOST_AUTO_TEST_CASE(ca_deferred_action_without_activation) {
std::string infile_path =
"skip_learn/ca/deferred_action_without_activation.fb";
Expand Down
43 changes: 43 additions & 0 deletions external_parser/unit_tests/test_vw_external_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,49 @@ BOOST_AUTO_TEST_CASE(ccb_compare_dsjson_with_fb_models) {
buffer_dsjson_model.end());
}

BOOST_AUTO_TEST_CASE(ca_compare_dsjson_with_fb_models_simple) {
std::string input_files = get_test_files_location();

std::string model_name = input_files + "/test_outputs/m_average";

std::string file_name = input_files + "/valid_joined_logs/ca_loop_simple";

generate_dsjson_and_fb_models(
model_name,
"--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ",
file_name);

// read the models and compare
auto buffer_fb_model = read_file(model_name + ".fb");
auto buffer_dsjson_model = read_file(model_name + ".json");

BOOST_CHECK_EQUAL_COLLECTIONS(buffer_fb_model.begin(), buffer_fb_model.end(),
buffer_dsjson_model.begin(),
buffer_dsjson_model.end());
}

BOOST_AUTO_TEST_CASE(ca_compare_dsjson_with_fb_models_mixed_skip_learn) {
std::string input_files = get_test_files_location();

std::string model_name = input_files + "/test_outputs/m_average";

std::string file_name =
input_files + "/valid_joined_logs/ca_loop_mixed_skip_learn";

generate_dsjson_and_fb_models(
model_name,
"--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --id N/A ",
file_name);

// read the models and compare
auto buffer_fb_model = read_file(model_name + ".fb");
auto buffer_dsjson_model = read_file(model_name + ".json");

BOOST_CHECK_EQUAL_COLLECTIONS(buffer_fb_model.begin(), buffer_fb_model.end(),
buffer_dsjson_model.begin(),
buffer_dsjson_model.end());
}

BOOST_AUTO_TEST_CASE(rrcr_ignore_examples_before_checkpoint) {
std::string input_files = get_test_files_location();

Expand Down
135 changes: 86 additions & 49 deletions test_tools/example_gen/example_gen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,30 +21,19 @@ namespace po = boost::program_options;
//global var, yeah ugg
bool enable_dedup = false;

static const char *options[] = {
"cb",
"invalid-cb",
"ccb",
"ccb-with-slot-id",
"ccb-baseline",
"slates",
"ca",
"f-reward",
"fi-reward",
"fi-out-of-bound-reward",
"fs-reward",
"fmix-reward",
"s-reward",
"si-reward",
"ss-reward",
"action-taken",
"cb-loop",
"ccb-loop",
"ccb-baseline-loop",
nullptr
};

enum options{
static const char *options[] = {"cb", "invalid-cb",
"ccb", "ccb-with-slot-id",
"ccb-baseline", "slates",
"ca", "f-reward",
"fi-reward", "fi-out-of-bound-reward",
"fs-reward", "fmix-reward",
"s-reward", "si-reward",
"ss-reward", "action-taken",
"cb-loop", "ca-loop",
"ccb-loop", "ccb-baseline-loop",
nullptr};

enum options {
CB_ACTION,
INVALID_CB_ACTION,
CCB_ACTION,
Expand All @@ -63,6 +52,7 @@ enum options{
S_S_REWARD,
ACTION_TAKEN,
CB_LOOP,
CA_LOOP,
CCB_LOOP,
CCB_BASELINE_ACTION_LOOP
};
Expand Down Expand Up @@ -115,9 +105,7 @@ void load_config_from_json(int action, u::configuration& config, bool enable_app
} else if (action == SLATES_ACTION) {
std::string args = "--slates --ccb_explore_adf --json --quiet --epsilon " + std::to_string(epsilon) + " --first_only --id N/A";
config.set(r::name::MODEL_VW_INITIAL_COMMAND_LINE, args.c_str());
}
else if (action == CA_ACTION)
{
} else if (action == CA_ACTION || action == CA_LOOP) {
config.set(r::name::MODEL_VW_INITIAL_COMMAND_LINE, "--cats 4 --min_value 1 --max_value 100 --bandwidth 1 --json --quiet --id N/A");
}
}
Expand Down Expand Up @@ -248,7 +236,9 @@ void send_ccb_outcome(std::mt19937& rng, bool gen_random_reward, const char * ev
}
}

int take_action(r::live_model& rl, const char *event_id, int action, unsigned int action_flag, bool gen_random_reward, std::mt19937& rng) {
int take_action(r::live_model &rl, const char *event_id, int action,
unsigned int action_flag, bool gen_random_reward,
std::mt19937 &rng, bool no_loop_actions) {
r::api_status status;
float reward = gen_random_reward ? get_random_number(rng) : 1.5f;

Expand Down Expand Up @@ -407,6 +397,34 @@ int take_action(r::live_model& rl, const char *event_id, int action, unsigned in

break;
};
case CA_LOOP: { // "ca_loop",
r::continuous_action_response response;
if (rl.request_continuous_action(event_id, JSON_CA_CONTEXT, action_flag,
response, &status) != err::success)
std::cout << status.get_error_msg() << std::endl;
size_t num_of_rewards = get_random_number(rng);
for (size_t i = 0; i < num_of_rewards; i++) {
float reward = gen_random_reward ? get_random_number(rng, 0) : 1.5f;
std::cout << "report outcome: " << reward << " for event: " << event_id
<< std::endl;
if (rl.report_outcome(event_id, reward, &status) != err::success)
std::cout << status.get_error_msg() << std::endl;
}

if (action_flag == r::action_flags::DEFERRED && !no_loop_actions) {
size_t rand_num = get_random_number(rng, 0 /*min*/);
if (rand_num % 2) {
// send activation
std::cout << "sending activation for event_id: " << event_id
<< std::endl;
if (rl.report_action_taken(event_id, &status) != err::success) {
std::cout << status.get_error_msg() << std::endl;
}
}
}

break;
};
case CCB_LOOP: { // "ccb action and random number of float rewards and mix of slot ids / non slot ids / float / string rewards"
// randomly decide to send either ccb with slot id's provided or random slot id's
// the ccb interactions that are non-random are the ones we can use to send observations for the slot id using the slot-id string
Expand Down Expand Up @@ -495,7 +513,10 @@ int pseudo_random(int seed) {
return (int)(val & 0xFFFFFFFF);
}

int run_config(int action, int count, int initial_seed, bool gen_random_reward, bool enable_apprentice_mode, int deferred_action_count, std::string config_file, std::mt19937& rng, float epsilon = 0.0f) {
int run_config(int action, int count, int initial_seed, bool gen_random_reward,
bool enable_apprentice_mode, int deferred_action_count,
std::string config_file, std::mt19937 &rng, bool no_loop_actions,
float epsilon = 0.0f) {
u::configuration config;

if (config_file.empty())
Expand Down Expand Up @@ -528,7 +549,8 @@ int run_config(int action, int count, int initial_seed, bool gen_random_reward,
auto action_flag = i < deferred_action_count
? r::action_flags::DEFERRED : r::action_flags::DEFAULT;

int r = take_action(rl, event_id, action, action_flag, gen_random_reward, rng);
int r = take_action(rl, event_id, action, action_flag, gen_random_reward,
rng, no_loop_actions);
if(r)
return r;
}
Expand All @@ -547,19 +569,28 @@ int main(int argc, char *argv[]) {
bool enable_apprentice_mode = false;
int deferred_action_count = 0;
float epsilon = 0.f;

desc.add_options()
("help", "Produce help message")
("all", "use all args")
("dedup", "Enable dedup/zstd")
("count", po::value<int>(), "Number of events to produce")
("seed", po::value<int>(), "Initial seed used to produce event ids")
("epsilon", po::value<float>(), "epsilon to be used in command line args for VW")
("kind", po::value<std::string>(), "which kind of example to generate (cb,invalid-cb,ccb,ccb-with-slot-id,ccb-baseline,slates,ca,cb-loop,ccb-loop,ccb-baseline-loop,(f|s)(s|i|mix|i-out-of-bound)?-reward,action-taken)")
("random_reward", "Generate random float reward for observation event")
("config_file", po::value<std::string>(), "json config file for rlclinetlib")
("apprentice", "Enable apprentice mode")
("deferred_action_count", po::value<int>(), "Number of deferred action for interaction events. Set the deferred_action flag to true for first deferred_action_count number of actions");
bool no_loop_actions = false;

desc.add_options()("help", "Produce help message")("all", "use all args")(
"dedup", "Enable dedup/zstd")("count", po::value<int>(),
"Number of events to produce")(
"seed", po::value<int>(), "Initial seed used to produce event ids")(
"epsilon", po::value<float>(),
"epsilon to be used in command line args for VW")(
"kind", po::value<std::string>(),
"which kind of example to generate "
"(cb,invalid-cb,ccb,ccb-with-slot-id,ccb-baseline,slates,ca,cb-loop,ca-"
"loop,ccb-loop,ccb-baseline-loop,(f|s)(s|i|mix|i-out-of-bound)?-reward,"
"action-taken)")("random_reward",
"Generate random float reward for observation event")(
"config_file", po::value<std::string>(),
"json config file for rlclinetlib")("apprentice",
"Enable apprentice mode")(
"deferred_action_count", po::value<int>(),
"Number of deferred action for interaction events. Set the "
"deferred_action flag to true for first deferred_action_count number of "
"actions")("no_loop_actions",
"Flag to disable actions being taken for all outcome events");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure what this argument does, do you want to not send activations for some deferred events?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes the purpose of this is to ensure that skiplearn is included in all examples, to test that its being set in the dsjson conversion. It may be useful to change this option to a float instead, and use it to determine the % of events to activate. That way I could set it to 0 for this case, and add some more flexibility to example_gen for other loops

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@olgavrou I just removed the additional flag to simplify the PR


po::positional_options_description pd;
pd.add("kind", 1);
Expand All @@ -572,11 +603,13 @@ int main(int argc, char *argv[]) {
gen_random_reward = vm.count("random_reward");
enable_apprentice_mode = vm.count("apprentice");
enable_dedup = vm.count("dedup");
no_loop_actions = vm.count("no_loop_actions");

std::vector<std::string> deferrable_interactions {
"cb", "invalid-cb", "ccb", "ccb-baseline", "slates", "ca", "cb-loop",
"ccb-with-slot-id", "ccb-loop", "ccb-baseline-loop"
};
std::vector<std::string> deferrable_interactions{
"cb", "invalid-cb", "ccb",
"ccb-baseline", "slates", "ca",
"cb-loop", "ca-loop", "ccb-with-slot-id",
"ccb-loop", "ccb-baseline-loop"};

if(vm.count("kind") > 0)
action_name = vm["kind"].as<std::string>();
Expand Down Expand Up @@ -614,7 +647,9 @@ int main(int argc, char *argv[]) {

if(gen_all) {
for(int i = 0; options[i]; ++i) {
if(run_config(i, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, epsilon))
if (run_config(i, count, seed, gen_random_reward, enable_apprentice_mode,
deferred_action_count, config_file, rng, no_loop_actions,
epsilon))
return -1;
}
return 0;
Expand All @@ -634,5 +669,7 @@ int main(int argc, char *argv[]) {
return -1;
}

return run_config(action, count, seed, gen_random_reward, enable_apprentice_mode, deferred_action_count, config_file, rng, epsilon);
return run_config(action, count, seed, gen_random_reward,
enable_apprentice_mode, deferred_action_count, config_file,
rng, no_loop_actions, epsilon);
}