Skip to content

Commit da4734b

Browse files
added taskflow benchmark
1 parent ea54d5a commit da4734b

4 files changed

Lines changed: 190 additions & 124 deletions

File tree

CMakeLists.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ target_link_libraries(reduce Threads::Threads)
9999
add_executable(threadpool example/threadpool.cpp)
100100
target_link_libraries(threadpool Threads::Threads)
101101

102+
add_executable(taskflow example/taskflow.cpp)
103+
target_link_libraries(taskflow Threads::Threads)
104+
102105
add_executable(matrix example/matrix.cpp)
103106
target_compile_options(matrix PRIVATE ${OpenMP_CXX_FLAGS})
104107
target_link_libraries(matrix Threads::Threads OpenMP::OpenMP_CXX)
@@ -121,10 +124,11 @@ enable_testing()
121124
message(STATUS "Building unit tests ...")
122125
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/unittest)
123126
set(TF_UTEST_DIR ${PROJECT_SOURCE_DIR}/unittest)
124-
add_executable(taskflow unittest/taskflow.cpp)
125-
target_link_libraries(taskflow Threads::Threads)
126127

127128
# unittest for taskflow
129+
add_executable(taskflow_test_tmp unittest/taskflow.cpp)
130+
target_link_libraries(taskflow_test_tmp Threads::Threads)
131+
set_target_properties(taskflow_test_tmp PROPERTIES OUTPUT_NAME "taskflow")
128132
add_test(builder ${TF_UTEST_DIR}/taskflow -tc=Taskflow.Builder)
129133
add_test(dispatch ${TF_UTEST_DIR}/taskflow -tc=Taskflow.Dispatch)
130134
add_test(parallel_for ${TF_UTEST_DIR}/taskflow -tc=Taskflow.ParallelFor)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,7 @@ The folder `example/` contains several examples and is a great place to learn to
728728
| [reduce.cpp](./example/reduce.cpp)| performs reduce operations over linear containers |
729729
| [subflow.cpp](./example/subflow.cpp)| demonstrates how to create a subflow graph that spawns three dynamic tasks |
730730
| [threadpool.cpp](./example/threadpool.cpp)| benchmarks different threadpool implementations |
731+
| [taskflow.cpp](./example/taskflow.cpp)| benchmarks different threadpool implementations |
731732
| [threadpool_cxx14.cpp](./example/threadpool_cxx14.cpp)| shows use of the C++14-compatible threadpool implementation, which may be used when you have no inter-task (taskflow) dependencies to express |
732733

733734
# Get Involved

example/taskflow.cpp

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
// 2018/09/18 - created by Tsung-Wei Huang
2+
//
3+
// This program is used to benchmark the taskflow under different types
4+
// of workloads.
5+
6+
#include <taskflow/taskflow.hpp>
7+
#include <chrono>
8+
#include <random>
9+
#include <climits>
10+
11+
using tf_simple_t = tf::BasicTaskflow<std::function, tf::SimpleThreadpool>;
12+
using tf_proactive_t = tf::BasicTaskflow<std::function, tf::ProactiveThreadpool>;
13+
using tf_speculative_t = tf::BasicTaskflow<std::function, tf::SpeculativeThreadpool>;
14+
using tf_privatized_t = tf::BasicTaskflow<std::function, tf::PrivatizedThreadpool>;
15+
16+
// Procedure: benchmark
17+
#define BENCHMARK(TITLE, F) \
18+
std::cout << "========== " << TITLE << " ==========\n"; \
19+
\
20+
std::cout << "Taskflow [simple + std::func] elapsed time : " \
21+
<< F<tf_simple_t>() << " ms\n"; \
22+
\
23+
std::cout << "Taskflow [proactive + std::func] elapsed time : " \
24+
<< F<tf_proactive_t>() << " ms\n"; \
25+
\
26+
std::cout << "Taskflow [speculative + std::func] elapsed time: " \
27+
<< F<tf_speculative_t>() << " ms\n"; \
28+
\
29+
std::cout << "Taskflow [privatized + std::func] elapsed time : " \
30+
<< F<tf_privatized_t>() << " ms\n"; \
31+
32+
// ============================================================================
33+
// Binary Tree
34+
// ============================================================================
35+
36+
// Function: binary_tree
37+
template <typename T>
38+
auto binary_tree() {
39+
40+
const int num_levels = 21;
41+
42+
auto beg = std::chrono::high_resolution_clock::now();
43+
44+
T tf;
45+
46+
std::atomic<size_t> sum {0};
47+
std::function<void(int, typename T::TaskType)> insert;
48+
49+
insert = [&] (int l, typename T::TaskType parent) {
50+
51+
if(l < num_levels) {
52+
53+
auto lc = tf.silent_emplace([&] () {
54+
sum.fetch_add(1, std::memory_order_relaxed);
55+
});
56+
57+
auto rc = tf.silent_emplace([&] () {
58+
sum.fetch_add(1, std::memory_order_relaxed);
59+
});
60+
61+
parent.precede(lc);
62+
parent.precede(rc);
63+
64+
insert(l+1, lc);
65+
insert(l+1, rc);
66+
}
67+
};
68+
69+
auto root = tf.silent_emplace([&] () {
70+
sum.fetch_add(1, std::memory_order_relaxed);
71+
});
72+
73+
insert(1, root);
74+
75+
// synchronize until all tasks finish
76+
tf.wait_for_all();
77+
78+
assert(sum == (1 << (num_levels)) - 1);
79+
80+
auto end = std::chrono::high_resolution_clock::now();
81+
82+
return std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count();
83+
}
84+
85+
// ============================================================================
86+
// Empty Jobs
87+
// ============================================================================
88+
89+
// Function: empty_jobs
90+
template <typename T>
91+
auto empty_jobs() {
92+
93+
const int num_tasks = 1000000;
94+
95+
auto beg = std::chrono::high_resolution_clock::now();
96+
97+
T tf;
98+
99+
for(size_t i=0; i<num_tasks; i++){
100+
tf.silent_emplace([](){});
101+
}
102+
103+
tf.wait_for_all();
104+
105+
auto end = std::chrono::high_resolution_clock::now();
106+
107+
return std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count();
108+
}
109+
110+
// ============================================================================
111+
// Atomic add
112+
// ============================================================================
113+
114+
// Function: atomic_add
115+
template <typename T>
116+
auto atomic_add() {
117+
118+
const int num_tasks = 1000000;
119+
120+
std::atomic<int> counter(0);
121+
auto beg = std::chrono::high_resolution_clock::now();
122+
123+
T tf;
124+
for(size_t i=0; i<num_tasks; i++){
125+
tf.silent_emplace([&counter](){
126+
counter.fetch_add(1, std::memory_order_relaxed);
127+
});
128+
}
129+
tf.wait_for_all();
130+
131+
assert(counter == num_tasks);
132+
133+
auto end = std::chrono::high_resolution_clock::now();
134+
return std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count();
135+
}
136+
137+
// ----------------------------------------------------------------------------
138+
139+
// Function: main
140+
int main(int argc, char* argv[]) {
141+
142+
BENCHMARK("Empty Jobs", empty_jobs);
143+
BENCHMARK("Atomic Add", atomic_add);
144+
BENCHMARK("Binary Tree", binary_tree);
145+
146+
return 0;
147+
}
148+
149+
150+

0 commit comments

Comments
 (0)