@@ -17,18 +17,137 @@ using tf_privatized_t = tf::BasicTaskflow<std::function, tf::PrivatizedThreadpo
1717#define BENCHMARK (TITLE, F ) \
1818 std::cout << " ========== " << TITLE << " ==========\n " ; \
1919 \
20- std::cout << " Taskflow [simple + std::func] elapsed time : " \
20+ std::cout << " Taskflow [simple + std::func] elapsed time: " \
2121 << F<tf_simple_t >() << " ms\n " ; \
2222 \
23- std::cout << " Taskflow [proactive + std::func] elapsed time : " \
23+ std::cout << " Taskflow [proactive + std::func] elapsed time: " \
2424 << F<tf_proactive_t >() << " ms\n " ; \
2525 \
2626 std::cout << " Taskflow [speculative + std::func] elapsed time: " \
2727 << F<tf_speculative_t >() << " ms\n " ; \
2828 \
29- std::cout << " Taskflow [privatized + std::func] elapsed time : " \
29+ std::cout << " Taskflow [privatized + std::func] elapsed time: " \
3030 << F<tf_privatized_t >() << " ms\n " ; \
3131
32+ // ============================================================================
33+ // Map-Reduce
34+ // ============================================================================
35+
36+ // Function: map_reduce
37+ template <typename T>
38+ auto map_reduce () {
39+
40+ auto beg = std::chrono::high_resolution_clock::now ();
41+
42+ const int num_batches = 65536 ;
43+
44+ std::vector<int > C (1024 , 10 );
45+ std::atomic<size_t > sum {0 };
46+
47+ T tf;
48+
49+ std::optional<typename T::TaskType> prev;
50+
51+ for (int i=0 ; i<num_batches; ++i) {
52+
53+ auto [s, t] = tf.parallel_for (C.begin (), C.end (), [&] (int v) {
54+ sum.fetch_add (v, std::memory_order_relaxed);
55+ });
56+
57+ if (prev) {
58+ prev->precede (s);
59+ }
60+
61+ prev = t;
62+ }
63+
64+ tf.wait_for_all ();
65+
66+ assert (sum == num_batches * C.size () * 10 );
67+
68+ auto end = std::chrono::high_resolution_clock::now ();
69+
70+ return std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count ();
71+ }
72+
73+ // ============================================================================
74+ // Level Graph
75+ // ============================================================================
76+
77+ // Function: level_graph
78+ template <typename T>
79+ auto level_graph () {
80+
81+ const int num_levels = 2048 ;
82+ const int num_nodes_per_level = 1024 ;
83+
84+ auto beg = std::chrono::high_resolution_clock::now ();
85+
86+ std::atomic<size_t > sum {0 };
87+
88+ T tf;
89+
90+ std::vector< std::vector<typename T::TaskType> > tasks;
91+
92+ tasks.resize (num_levels);
93+ for (int l=0 ; l<num_levels; ++l) {
94+ for (int i=0 ; i<num_nodes_per_level; ++i) {
95+ tasks[l].push_back (tf.silent_emplace ([&] () {
96+ sum.fetch_add (1 , std::memory_order_relaxed);
97+ }));
98+ }
99+ }
100+
101+ // connections for each level l to l+1
102+ for (int l=0 ; l<num_levels-1 ; ++l) {
103+ for (int i=0 ; i<num_nodes_per_level; ++i) {
104+ for (int j=0 ; j<num_nodes_per_level; j=j+i+1 ) {
105+ tasks[l][i].precede (tasks[l+1 ][j]);
106+ }
107+ }
108+ }
109+
110+ tf.wait_for_all ();
111+
112+ assert (sum == num_levels * num_nodes_per_level);
113+
114+ auto end = std::chrono::high_resolution_clock::now ();
115+
116+ return std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count ();
117+ }
118+
119+ // ============================================================================
120+ // Linear Graph
121+ // ============================================================================
122+
123+ // Function: linear_graph
124+ template <typename T>
125+ auto linear_graph () {
126+
127+ const int num_nodes = 1000000 ;
128+
129+ auto beg = std::chrono::high_resolution_clock::now ();
130+
131+ size_t sum {0 };
132+
133+ T tf;
134+
135+ std::vector<typename T::TaskType> tasks;
136+
137+ for (int i=0 ; i<num_nodes; ++i) {
138+ tasks.push_back (tf.silent_emplace ([&] () { ++sum; }));
139+ }
140+
141+ tf.linearize (tasks);
142+ tf.wait_for_all ();
143+
144+ assert (sum == num_nodes);
145+
146+ auto end = std::chrono::high_resolution_clock::now ();
147+
148+ return std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count ();
149+ }
150+
32151// ============================================================================
33152// Binary Tree
34153// ============================================================================
@@ -138,10 +257,13 @@ auto atomic_add() {
138257
139258// Function: main
140259int main (int argc, char * argv[]) {
141-
260+
142261 BENCHMARK (" Empty Jobs" , empty_jobs);
143262 BENCHMARK (" Atomic Add" , atomic_add);
144263 BENCHMARK (" Binary Tree" , binary_tree);
264+ BENCHMARK (" Linear Graph" , linear_graph);
265+ BENCHMARK (" Level Graph" , level_graph);
266+ BENCHMARK (" Map Reduce" , map_reduce);
145267
146268 return 0 ;
147269}
0 commit comments