Skip to content

Commit 9d4bcb1

Browse files
updated binary_tree
1 parent 590bc1e commit 9d4bcb1

6 files changed

Lines changed: 74 additions & 10 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ add_executable(
273273
binary_tree
274274
${TF_BENCHMARK_DIR}/binary_tree/main.cpp
275275
${TF_BENCHMARK_DIR}/binary_tree/tbb.cpp
276+
${TF_BENCHMARK_DIR}/binary_tree/omp.cpp
276277
${TF_BENCHMARK_DIR}/binary_tree/taskflow.cpp
277278
)
278279
target_include_directories(binary_tree PRIVATE ${PROJECT_SOURCE_DIR}/3rd-party/CLI11)

benchmark/binary_tree/binary_tree.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <cmath>
1010
#include <atomic>
1111

12-
std::chrono::microseconds measure_time_taskflow(unsigned, unsigned);
13-
std::chrono::microseconds measure_time_tbb(unsigned, unsigned);
12+
std::chrono::microseconds measure_time_taskflow(size_t, unsigned);
13+
std::chrono::microseconds measure_time_tbb(size_t, unsigned);
14+
std::chrono::microseconds measure_time_omp(size_t, unsigned);
1415

benchmark/binary_tree/main.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
void binary_tree(
55
const std::string& model,
6+
const size_t num_layers,
67
const unsigned num_threads,
78
const unsigned num_rounds
89
) {
@@ -11,7 +12,7 @@ void binary_tree(
1112
<< std::setw(12) << "runtime"
1213
<< std::endl;
1314

14-
for(unsigned i=1; i<=21; ++i) {
15+
for(size_t i=1; i<=num_layers; ++i) {
1516

1617
double runtime {0.0};
1718

@@ -22,6 +23,9 @@ void binary_tree(
2223
else if(model == "tbb") {
2324
runtime += measure_time_tbb(i, num_threads).count();
2425
}
26+
else if(model == "omp") {
27+
runtime += measure_time_omp(i, num_threads).count();
28+
}
2529
else assert(false);
2630
}
2731

@@ -40,12 +44,15 @@ int main(int argc, char* argv[]) {
4044

4145
unsigned num_rounds {1};
4246
app.add_option("-r,--num_rounds", num_rounds, "number of rounds (default=1)");
47+
48+
size_t num_layers {25};
49+
app.add_option("-l,--num_layers", num_layers, "number of layers (default=25)");
4350

4451
std::string model = "tf";
4552
app.add_option("-m,--model", model, "model name tbb|omp|tf (default=tf)")
4653
->check([] (const std::string& m) {
47-
if(m != "tbb" && m != "tf") {
48-
return "model name should be \"tbb\" or \"tf\"";
54+
if(m != "tbb" && m != "tf" && m != "omp") {
55+
return "model name should be \"tbb\", \"omp\", or \"tf\"";
4956
}
5057
return "";
5158
});
@@ -57,7 +64,7 @@ int main(int argc, char* argv[]) {
5764
<< "num_rounds=" << num_rounds << ' '
5865
<< std::endl;
5966

60-
binary_tree(model, num_threads, num_rounds);
67+
binary_tree(model, num_layers, num_threads, num_rounds);
6168

6269
return 0;
6370
}

benchmark/binary_tree/omp.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#include "binary_tree.hpp"
2+
#include <omp.h>
3+
4+
// binary_tree_omp
5+
void binary_tree_omp(size_t num_layers, unsigned num_threads) {
6+
7+
std::atomic<size_t> counter = 0;
8+
9+
size_t N = 1 << num_layers;
10+
size_t *D = new size_t [N];
11+
12+
#pragma omp parallel num_threads(num_threads)
13+
{
14+
#pragma omp single
15+
{
16+
for(size_t i = 1; i<N; ++i) {
17+
18+
size_t p = i / 2;
19+
size_t l = i * 2;
20+
size_t r = l + 1;
21+
22+
if(l < N && r < N) {
23+
#pragma omp task firstprivate(i) depend(out:D[l], D[r]) depend(in:D[p])
24+
{
25+
//printf("%d\n", i);
26+
counter.fetch_add(1, std::memory_order_relaxed);
27+
}
28+
}
29+
else {
30+
#pragma omp task firstprivate(i) depend(in:D[p])
31+
{
32+
//printf("%d\n", i);
33+
counter.fetch_add(1, std::memory_order_relaxed);
34+
}
35+
}
36+
}
37+
}
38+
}
39+
40+
assert((counter + 1) == N);
41+
42+
delete [] D;
43+
}
44+
45+
std::chrono::microseconds measure_time_omp(
46+
size_t num_layers,
47+
unsigned num_threads
48+
) {
49+
auto beg = std::chrono::high_resolution_clock::now();
50+
binary_tree_omp(num_layers, num_threads);
51+
auto end = std::chrono::high_resolution_clock::now();
52+
return std::chrono::duration_cast<std::chrono::microseconds>(end - beg);
53+
}
54+
55+

benchmark/binary_tree/taskflow.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#include <taskflow/taskflow.hpp>
33

44
// binary_tree_taskflow
5-
void binary_tree_taskflow(unsigned num_layers, unsigned num_threads) {
5+
void binary_tree_taskflow(size_t num_layers, unsigned num_threads) {
66

77
std::atomic<size_t> counter {0};
88

@@ -30,7 +30,7 @@ void binary_tree_taskflow(unsigned num_layers, unsigned num_threads) {
3030
}
3131

3232
std::chrono::microseconds measure_time_taskflow(
33-
unsigned num_layers,
33+
size_t num_layers,
3434
unsigned num_threads
3535
) {
3636
auto beg = std::chrono::high_resolution_clock::now();

benchmark/binary_tree/tbb.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include <tbb/flow_graph.h>
44

55
// binary_tree_tbb
6-
void binary_tree_tbb(unsigned num_layers, unsigned num_threads) {
6+
void binary_tree_tbb(size_t num_layers, unsigned num_threads) {
77

88
using namespace tbb;
99
using namespace tbb::flow;
@@ -44,7 +44,7 @@ void binary_tree_tbb(unsigned num_layers, unsigned num_threads) {
4444
}
4545

4646
std::chrono::microseconds measure_time_tbb(
47-
unsigned num_layers,
47+
size_t num_layers,
4848
unsigned num_threads
4949
) {
5050
auto beg = std::chrono::high_resolution_clock::now();

0 commit comments

Comments
 (0)