Skip to content
This repository was archived by the owner on Dec 21, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ namespace one_shot_object_detection {
namespace data_augmentation {

flex_dict build_annotation(ParameterSampler &parameter_sampler,
std::string label, size_t object_width,
size_t object_height, long seed) {
parameter_sampler.sample(seed);
std::string label,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can label be const?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typically this should either be just std::string if we want to pass by value and const std::string& if we want to pass by reference. See https://google.github.io/styleguide/cppguide.html#Use_of_const

For a function parameter passed by value, const has no effect on the caller, thus is not recommended in function declarations.

Here I would recommend a const reference, since we don't modify the value

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. It should be a const reference.

size_t object_width, size_t object_height,
size_t background_width, size_t background_height,
size_t seed, size_t row_number) {
parameter_sampler.sample(background_width, background_height, seed, row_number);

size_t original_top_left_x = 0;
size_t original_top_left_y = 0;
Expand Down Expand Up @@ -118,20 +120,19 @@ flex_image create_rgba_flex_image(const flex_image &object_input) {
}

std::pair<flex_image, flex_dict>
create_synthetic_image_from_background_and_starter(const flex_image &starter,
create_synthetic_image_from_background_and_starter(ParameterSampler &parameter_sampler,
const flex_image &starter,
const flex_image &background,
std::string &label,
size_t seed,
size_t row_number) {
ParameterSampler parameter_sampler =
ParameterSampler(background.m_width, background.m_height,
(background.m_width - starter.m_width) / 2,
(background.m_height - starter.m_height) / 2);

// construct annotation dictionary from parameters
flex_dict annotation =
build_annotation(parameter_sampler, label, starter.m_width,
starter.m_height, seed + row_number);
build_annotation(parameter_sampler, label,
starter.m_width, starter.m_height,
background.m_width, background.m_height,
seed, row_number);

if (background.get_image_data() == nullptr) {
log_and_throw("Background image has null image data.");
Expand All @@ -149,14 +150,14 @@ create_synthetic_image_from_background_and_starter(const flex_image &starter,
reinterpret_cast<const boost::gil::rgba8_pixel_t *>(
rgba_flex_image.get_image_data()),
rgba_flex_image.m_channels *
rgba_flex_image.m_width // row length in bytes
rgba_flex_image.m_width // row length in bytes
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason to remove the space here? I recommend we just use clang-format to resolve formatting decisions and be done with it.

In this case, I believe clang-format should leave two spaces: https://google.github.io/styleguide/cppguide.html#Implementation_Comments

);

boost::gil::rgb8_image_t::const_view_t background_view = interleaved_view(
background.m_width, background.m_height,
reinterpret_cast<const boost::gil::rgb8_pixel_t *>(
background.get_image_data()),
background.m_channels * background.m_width // row length in bytes
background.m_channels * background.m_width // row length in bytes
);
flex_image synthetic_image = create_synthetic_image(
starter_image_view, background_view, parameter_sampler);
Expand Down Expand Up @@ -209,23 +210,28 @@ gl_sframe augment_data(const gl_sframe &data,
* Replacing the `for` with a `parallel_for` fails the export_coreml unit test
* with an EXC_BAD_ACCESS in the function call to boost::gil::resample_pixels
*/
for (size_t segment_id = 0; segment_id < nsegments; segment_id++) {
size_t segment_start = (segment_id * backgrounds.size()) / nsegments;
size_t segment_end = ((segment_id + 1) * backgrounds.size()) / nsegments;
size_t row_number = segment_start;
for (const auto &background_ft :
backgrounds.range_iterator(segment_start, segment_end)) {
row_number++;
flex_image flex_background =
image_util::decode_image(background_ft.to<flex_image>());
for (const auto &row : decompressed_data.range_iterator()) {
// go through all the starter images and create augmented images for
// all starter images and the respective chunk of background images
const flex_image &object = row[image_column_index].get<flex_image>();
std::string label = row[target_column_index].to<flex_string>();
for (const auto &row : decompressed_data.range_iterator()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you avoid using auto here and on line 225?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: https://google.github.io/styleguide/cppguide.html#Type_deduction

The fundamental rule is: use type deduction only to make the code clearer or safer, and do not use it merely to avoid the inconvenience of writing an explicit type.

// go through all the starter images and create augmented images for
// all starter images and the respective chunk of background images
const flex_image &object = row[image_column_index].get<flex_image>();
std::string label = row[target_column_index].to<flex_string>();
ParameterSampler parameter_sampler = ParameterSampler(
object.m_width, object.m_height, 0, 0);

for (size_t segment_id = 0; segment_id < nsegments; segment_id++) {
size_t segment_start = (segment_id * backgrounds.size()) / nsegments;
size_t segment_end = ((segment_id + 1) * backgrounds.size()) / nsegments;
size_t row_number = segment_start;
for (const auto &background_ft :
backgrounds.range_iterator(segment_start, segment_end)) {
row_number++;
flex_image flex_background =
image_util::decode_image(background_ft.to<flex_image>());

std::pair<flex_image, flex_dict> synthetic_row =
create_synthetic_image_from_background_and_starter(
object, flex_background, label, seed, row_number);
parameter_sampler, object, flex_background,
label, seed, row_number);
flex_image synthetic_image = synthetic_row.first;
flex_dict annotation = synthetic_row.second;
// write the synthetically generated image and the constructed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*/

#include <core/data/image/numeric_extension/perspective_projection.hpp>
#include <limits>

#include <toolkits/object_detection/one_shot_object_detection/util/parameter_sampler.hpp>

Expand All @@ -16,9 +17,9 @@
namespace turi {
namespace one_shot_object_detection {

ParameterSampler::ParameterSampler(size_t width, size_t height, size_t dx,
size_t dy)
: width_(width), height_(height), dx_(dx), dy_(dy) {}
ParameterSampler::ParameterSampler(size_t starter_width, size_t starter_height,
size_t dx, size_t dy)
: starter_width_(starter_width), starter_height_(starter_height), dx_(dx), dy_(dy) {}

double deg_to_rad(double angle) { return angle * M_PI / 180.0; }

Expand Down Expand Up @@ -64,33 +65,31 @@ void ParameterSampler::set_warped_corners(
/* Function to sample all the parameters needed to build a transform, and
* then also build the transform.
*/
void ParameterSampler::sample(long seed) {
void ParameterSampler::sample(size_t background_width, size_t background_height,
size_t seed, size_t row_number) {
double theta_mean, phi_mean, gamma_mean;
std::srand(seed);
theta_mean = theta_means_[std::rand() % theta_means_.size()];
std::srand(seed + 1);
phi_mean = phi_means_[std::rand() % phi_means_.size()];
std::srand(seed + 2);
gamma_mean = gamma_means_[std::rand() % gamma_means_.size()];
std::seed_seq seed_seq = {static_cast<int>(seed), static_cast<int>(row_number)};
std::mt19937 engine(seed_seq);

std::uniform_int_distribution<int> index_distribution(0, INT_MAX);
theta_mean = theta_means_[index_distribution(engine) % theta_means_.size()];
phi_mean = phi_means_[index_distribution(engine) % phi_means_.size()];
gamma_mean = gamma_means_[index_distribution(engine) % gamma_means_.size()];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems strange to take the result modulo the number of means. Why not just generate a random integer in the desired range right off the bat? Could be a useful helper function here: given a non-inclusive upper bound on the range of the random variable, and the random engine, return a random index


std::normal_distribution<double> theta_distribution(theta_mean, angle_stdev_);
std::normal_distribution<double> phi_distribution(phi_mean, angle_stdev_);
std::normal_distribution<double> gamma_distribution(gamma_mean, angle_stdev_);
std::normal_distribution<double> focal_distribution((double)width_,
std::normal_distribution<double> focal_distribution((double)background_width,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

focal_stdev_);
theta_generator_.seed(seed + 3);
theta_ = theta_distribution(theta_generator_);
phi_generator_.seed(seed + 4);
phi_ = phi_distribution(phi_generator_);
gamma_generator_.seed(seed + 5);
gamma_ = gamma_distribution(gamma_generator_);
focal_generator_.seed(seed + 6);
focal_ = focal_distribution(focal_generator_);
std::uniform_int_distribution<int> dz_distribution(std::max(width_, height_),
theta_ = theta_distribution(engine);
phi_ = phi_distribution(engine);
gamma_ = gamma_distribution(engine);
focal_ = focal_distribution(engine);
std::uniform_int_distribution<int> dz_distribution(std::max(background_width, background_height),
max_depth_);
dz_generator_.seed(seed + 7);
dz_ = focal_ + dz_distribution(dz_generator_);
dz_ = focal_ + dz_distribution(engine);
transform_ = warp_perspective::get_transformation_matrix(
width_, height_, theta_, phi_, gamma_, dx_, dy_, dz_, focal_);
starter_width_, starter_height_, theta_, phi_, gamma_, dx_, dy_, dz_, focal_);
warped_corners_.reserve(4);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace one_shot_object_detection {
*/
class ParameterSampler {
public:
ParameterSampler(size_t width, size_t height, size_t dx, size_t dy);
ParameterSampler(size_t starter_width, size_t starter_height, size_t dx, size_t dy);

/* Getters for all the parameters:
* theta: rotation around the x axis.
Expand Down Expand Up @@ -54,22 +54,18 @@ class ParameterSampler {
/* Function to sample all the parameters needed to build a transform, and
* then also build the transform.
*/
void sample(long seed);
void sample(size_t background_width, size_t background_height,
size_t seed, size_t row_number);

private:
size_t width_;
size_t height_;
size_t starter_width_;
size_t starter_height_;
size_t max_depth_ = 13000;
double angle_stdev_ = 20.0;
double focal_stdev_ = 40.0;
std::vector<double> theta_means_ = {-180.0, 0.0, 180.0};
std::vector<double> phi_means_ = {-180.0, 0.0, 180.0};
std::vector<double> gamma_means_ = {-180.0, -90.0, 0.0, 90.0, 180.0};
std::default_random_engine theta_generator_;
std::default_random_engine phi_generator_;
std::default_random_engine gamma_generator_;
std::default_random_engine dz_generator_;
std::default_random_engine focal_generator_;
double theta_;
double phi_;
double gamma_;
Expand Down