Skip to content

Commit

Permalink
Fixes for benchmark, ostream and gitignore (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
improbable-til authored Aug 4, 2020
1 parent ad17ea8 commit d959668
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 153 deletions.
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
!.gitignore
!.travis.yml
bazel-*
!bazel-*.sh
compile_commands.json
perf.data*
build

/bazel-phtree-cpp-public/
105 changes: 72 additions & 33 deletions phtree/benchmark/benchmark_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@ template <dimension_t DIM>
auto CreateDataCUBE = [](auto& points,
size_t num_entities,
std::uint32_t seed,
double world_length,
double world_mininum,
double world_maximum,
auto set_coordinate) {
std::default_random_engine random_engine{seed};
std::uniform_real_distribution<> distribution(0, world_length);
std::uniform_real_distribution<> distribution(world_mininum, world_maximum);
for (size_t i = 0; i < num_entities; ++i) {
auto& p = points[i];
for (dimension_t d = 0; d < DIM; ++d) {
Expand All @@ -48,15 +49,17 @@ template <dimension_t DIM>
auto CreateDataCLUSTER = [](auto& points,
size_t num_entities,
std::uint32_t seed,
double world_length,
double world_mininum,
double world_maximum,
auto set_coordinate) {
std::default_random_engine random_engine{seed};
std::uniform_real_distribution<> distribution(0, world_length);
std::uniform_real_distribution<> distribution(world_mininum, world_maximum);
// SIGMA = 0.0001
std::normal_distribution<> gauss_distribution(0, 0.0001);
const int NUM_PT_PER_CLUSTER = 100;
// 1000 points per cluster, minimum is 1 cluster.
size_t num_cluster = std::max(1, (int)(num_entities / NUM_PT_PER_CLUSTER));
const double world_length = world_maximum - world_mininum;

// loop over clusters
PhPointD<DIM> cp; // center point of cluster
Expand All @@ -71,33 +74,35 @@ auto CreateDataCLUSTER = [](auto& points,
for (dimension_t d = 0; d < DIM; ++d) {
// double x = (R.nextGaussian() - 0.5) * GAUSS_SIGMA; // confine to small rectangle
double x = gauss_distribution(random_engine);
x *= world_length; // stretch if domain>1.0
x += cp[d]; // offset of cluster
x *= world_length + world_mininum; // stretch if domain>1.0
x += cp[d]; // offset of cluster
set_coordinate(p, d, x);
}
}
}
};

auto CreateDuplicates = [](auto& points, size_t num_entities, std::uint32_t seed) {
std::default_random_engine random_engine{seed};
std::uniform_int_distribution<> distribution(0, points.size());
for (int i = points.size(); i < num_entities; ++i) {
// copy some random other point or box
points[i] = points[distribution(random_engine)];
}
};
auto CreateDuplicates =
[](auto& points, size_t num_unique_entries, size_t num_total_entities, std::uint32_t seed) {
std::default_random_engine random_engine{seed};
std::uniform_int_distribution<> distribution(0, num_unique_entries);
for (int i = num_unique_entries; i < num_total_entities; ++i) {
// copy some random other point or box
points[i] = points[distribution(random_engine)];
}
};
} // namespace

enum TestGenerator { CUBE, CLUSTER };

template <dimension_t DIM>
auto CreatePointData = [](auto& points,
TestGenerator test_generator,
size_t num_entities,
int seed,
double world_length,
double fraction_of_duplicates = 0.) {
auto CreatePointDataMinMax = [](auto& points,
TestGenerator test_generator,
size_t num_entities,
int seed,
double world_minimum,
double world_maximum,
double fraction_of_duplicates) {
auto set_coordinate_lambda = [](auto& p, dimension_t dim, auto value) { p[dim] = value; };
// Create at least 1 unique point
// Note that the following point generator is likely, but not guaranteed, to created unique
Expand All @@ -106,28 +111,30 @@ auto CreatePointData = [](auto& points,
points.reserve(num_entities);
switch (test_generator) {
case CUBE:
CreateDataCUBE<DIM>(points, num_unique_entries, seed, world_length, set_coordinate_lambda);
CreateDataCUBE<DIM>(
points, num_unique_entries, seed, world_minimum, world_maximum, set_coordinate_lambda);
break;
case CLUSTER:
CreateDataCLUSTER<DIM>(
points, num_unique_entries, seed, world_length, set_coordinate_lambda);
points, num_unique_entries, seed, world_minimum, world_maximum, set_coordinate_lambda);
break;
default:
assert(false);
}

// Create duplicates
CreateDuplicates(points, num_entities, seed);
CreateDuplicates(points, num_unique_entries, num_entities, seed);
};

template <dimension_t DIM>
auto CreateBoxData = [](auto& points,
TestGenerator test_generator,
size_t num_entities,
int seed,
double world_length,
double box_length,
double fraction_of_duplicates = 0.) {
auto CreateBoxDataMinMax = [](auto& points,
TestGenerator test_generator,
size_t num_entities,
int seed,
double world_minimum,
double world_maximum,
double box_length,
double fraction_of_duplicates) {
auto set_coordinate_lambda = [box_length](auto& p, dimension_t dim, auto value) {
p.min()[dim] = value;
p.max()[dim] = value + box_length;
Expand All @@ -139,19 +146,51 @@ auto CreateBoxData = [](auto& points,
points.reserve(num_entities);
switch (test_generator) {
case CUBE:
CreateDataCUBE<DIM>(points, num_unique_entries, seed, world_length, set_coordinate_lambda);
CreateDataCUBE<DIM>(
points, num_unique_entries, seed, world_minimum, world_maximum, set_coordinate_lambda);
break;
case CLUSTER:
CreateDataCLUSTER<DIM>(
points, num_unique_entries, seed, world_length, set_coordinate_lambda);
points, num_unique_entries, seed, world_minimum, world_maximum, set_coordinate_lambda);
break;
default:
assert(false);
}

// Create duplicates
CreateDuplicates(points, num_entities, seed);
CreateDuplicates(points, num_unique_entries, num_entities, seed);
};

template <dimension_t DIM>
auto CreatePointData = [](auto& points,
TestGenerator test_generator,
size_t num_entities,
int seed,
double world_length,
double fraction_of_duplicates = 0.) {
CreatePointDataMinMax<DIM>(
points, test_generator, num_entities, seed, 0, world_length, fraction_of_duplicates);
};

template <dimension_t DIM>
auto CreateBoxData = [](auto& points,
TestGenerator test_generator,
size_t num_entities,
int seed,
double world_length,
double box_length,
double fraction_of_duplicates = 0.) {
CreateBoxDataMinMax<DIM>(
points,
test_generator,
num_entities,
seed,
0,
world_length,
box_length,
fraction_of_duplicates);
};

} // namespace improbable::phtree::phbenchmark

#endif // PHTREE_BENCHMARK_UTIL_H
97 changes: 43 additions & 54 deletions phtree/benchmark/update_box_d_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@
#include <benchmark/benchmark.h>
#include <spdlog/spdlog.h>
#include <spdlog/sinks/ansicolor_sink.h>
#include <random>

using namespace improbable;
using namespace improbable::phtree;
using namespace improbable::phtree::phbenchmark;

namespace {

constexpr int UPDATES_PER_ROUND = 1000;
constexpr double MOVE_DISTANCE = 10;

const double GLOBAL_MAX = 10000;
const double BOX_LEN = 10;

Expand All @@ -46,25 +48,26 @@ class IndexBenchmark {
benchmark::State& state,
TestGenerator data_type,
int num_entities,
int updates_per_round,
double move_distance);
int updates_per_round = UPDATES_PER_ROUND,
double move_distance = MOVE_DISTANCE);

void Benchmark(benchmark::State& state);

private:
void SetupWorld(benchmark::State& state);
void BuildUpdate(std::vector<UpdateOp<DIM>>& updates);
void UpdateWorld(benchmark::State& state, std::vector<UpdateOp<DIM>>& updates);
void BuildUpdates();
void UpdateWorld(benchmark::State& state);

const TestGenerator data_type_;
const int num_entities_;
const int updates_per_round_;
const double move_distance_;

PhTreeBoxD<DIM, scalar_t> tree_;
std::default_random_engine random_engine_;
std::uniform_real_distribution<> cube_distribution_;
std::vector<PhBoxD<DIM>> boxes_;
std::vector<UpdateOp<DIM>> updates_;
std::default_random_engine random_engine_;
std::uniform_int_distribution<> entity_id_distribution_;
};

template <dimension_t DIM>
Expand All @@ -78,9 +81,10 @@ IndexBenchmark<DIM>::IndexBenchmark(
, num_entities_(num_entities)
, updates_per_round_(updates_per_round)
, move_distance_(move_distance)
, random_engine_{1}
, cube_distribution_{0, GLOBAL_MAX}
, boxes_(num_entities) {
, boxes_(num_entities)
, updates_(updates_per_round)
, random_engine_{0}
, entity_id_distribution_{0, num_entities - 1} {
auto console_sink = std::make_shared<spdlog::sinks::ansicolor_stdout_sink_mt>();
spdlog::set_default_logger(
std::make_shared<spdlog::logger>("", spdlog::sinks_init_list({console_sink})));
Expand All @@ -91,41 +95,12 @@ IndexBenchmark<DIM>::IndexBenchmark(

template <dimension_t DIM>
void IndexBenchmark<DIM>::Benchmark(benchmark::State& state) {
std::vector<UpdateOp<DIM>> updates;
updates.reserve(updates_per_round_);
for (auto _ : state) {
state.PauseTiming();
BuildUpdate(updates);
BuildUpdates();
state.ResumeTiming();

UpdateWorld(state, updates);

state.PauseTiming();
for (auto& update : updates) {
boxes_[update.id_] = update.new_;
}
state.ResumeTiming();
}
}

template <dimension_t DIM>
void IndexBenchmark<DIM>::BuildUpdate(std::vector<UpdateOp<DIM>>& updates) {
// Use Delta to avoid moving points in insertion order (not that it matters for the PH-Tree, but
// we may test other trees as well.
int box_id_increment = num_entities_ / updates_per_round_; // int division
int box_id = 0;
updates.clear();
for (size_t i = 0; i < updates_per_round_; ++i) {
assert(box_id >= 0);
assert(box_id < boxes_.size());
auto& old_box = boxes_[box_id];
auto update = UpdateOp<DIM>{box_id, old_box, old_box};
for (dimension_t d = 0; d < DIM; ++d) {
update.new_.min()[d] += move_distance_;
update.new_.max()[d] += move_distance_;
}
updates.emplace_back(update);
box_id += box_id_increment;
UpdateWorld(state);
}
}

Expand All @@ -143,10 +118,24 @@ void IndexBenchmark<DIM>::SetupWorld(benchmark::State& state) {
}

template <dimension_t DIM>
void IndexBenchmark<DIM>::UpdateWorld(
benchmark::State& state, std::vector<UpdateOp<DIM>>& updates) {
void IndexBenchmark<DIM>::BuildUpdates() {
for (auto& update : updates_) {
int box_id = entity_id_distribution_(random_engine_);
update.id_ = box_id;
update.old_ = boxes_[box_id];
for (dimension_t d = 0; d < DIM; ++d) {
update.new_.min()[d] = update.old_.min()[d] + move_distance_;
update.new_.max()[d] = update.old_.max()[d] + move_distance_;
}
// update reference data
boxes_[box_id] = update.new_;
}
}

template <dimension_t DIM>
void IndexBenchmark<DIM>::UpdateWorld(benchmark::State& state) {
size_t initial_tree_size = tree_.size();
for (auto& update : updates) {
for (auto& update : updates_) {
size_t result_erase = tree_.erase(update.old_);
auto result_emplace = tree_.emplace(update.new_, update.id_);
assert(result_erase == 1);
Expand All @@ -155,7 +144,7 @@ void IndexBenchmark<DIM>::UpdateWorld(

// For normal indexes we expect num_entities==size(), but the PhTree<Map<...>> index has
// size() as low as (num_entities-duplicates).
if (tree_.size() > num_entities_ || tree_.size() < initial_tree_size - updates_per_round_) {
if (tree_.size() > num_entities_ || tree_.size() + updates_per_round_ < initial_tree_size) {
spdlog::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_);
}

Expand All @@ -167,35 +156,35 @@ void IndexBenchmark<DIM>::UpdateWorld(

template <typename... Arguments>
void PhTree3D(benchmark::State& state, Arguments&&... arguments) {
IndexBenchmark<4> benchmark{state, arguments...};
IndexBenchmark<3> benchmark{state, arguments...};
benchmark.Benchmark(state);
}

// index type, scenario name, data_type, num_entities, updates_per_round, move_distance
// PhTree3D CUBE
BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000, 100, 10.)
BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000)
->Unit(benchmark::kMillisecond);

BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000, 100, 10.)
BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000)
->Unit(benchmark::kMillisecond);

BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000, 100, 10.)
BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000)
->Unit(benchmark::kMillisecond);

BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000, 100, 10.)
BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000)
->Unit(benchmark::kMillisecond);

// PhTree3D CLUSTER
BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000, 100, 10.)
BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000)
->Unit(benchmark::kMillisecond);

BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000, 100, 10.)
BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000)
->Unit(benchmark::kMillisecond);

BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000, 100, 10.)
BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000)
->Unit(benchmark::kMillisecond);

BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000, 100, 10.)
BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000)
->Unit(benchmark::kMillisecond);

BENCHMARK_MAIN();
Loading

0 comments on commit d959668

Please sign in to comment.