Skip to content

Commit e9236f9

Browse files
Merge branch 'dev' of https://github.com/cpp-taskflow/cpp-taskflow into dev
2 parents 7754cf1 + 11c92e1 commit e9236f9

14 files changed

Lines changed: 343 additions & 453 deletions

File tree

CMakeLists.txt

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,9 @@ add_test(reduce_max ${TF_UTEST_DIR}/taskflow -tc=ReduceMax)
196196
add_test(joined_subflow ${TF_UTEST_DIR}/taskflow -tc=JoinedSubflow)
197197
add_test(detached_subflow ${TF_UTEST_DIR}/taskflow -tc=DetachedSubflow)
198198
add_test(framework ${TF_UTEST_DIR}/taskflow -tc=Framework)
199+
add_test(composition-1 ${TF_UTEST_DIR}/taskflow -tc=Composition-1)
200+
add_test(composition-2 ${TF_UTEST_DIR}/taskflow -tc=Composition-2)
201+
add_test(composition-3 ${TF_UTEST_DIR}/taskflow -tc=Composition-3)
199202

200203
# unittest for threadpool
201204
add_executable(threadpool_test_tmp unittest/threadpool.cpp)
@@ -222,7 +225,6 @@ add_test(threadpool_cxx14_basic ${TF_UTEST_DIR}/threadpool_cxx14 -tc=Thre
222225
add_test(threadpool_cxx14_wait_for_all ${TF_UTEST_DIR}/threadpool_cxx14 -tc=Threadpool.WaitForAll)
223226

224227
# run all examples
225-
#add_test(inttest.simple )
226228

227229
endif()
228230

@@ -295,7 +297,6 @@ target_link_libraries(
295297
)
296298
set_target_properties(graph_traversal PROPERTIES COMPILE_FLAGS ${OpenMP_CXX_FLAGS})
297299

298-
299300
## benchmark 3: MNIST
300301
message(STATUS "benchmark 3: mnist")
301302
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${TF_BENCHMARK_DIR}/mnist)
@@ -357,6 +358,23 @@ target_link_libraries(
357358
)
358359
set_target_properties(parallel_dnn PROPERTIES COMPILE_FLAGS ${OpenMP_CXX_FLAGS})
359360

361+
## benchmark 6: Jacobi Iterative Stencil
362+
message(STATUS "benchmark 6: Jacobi iterative stencil")
363+
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${TF_BENCHMARK_DIR}/jacobi_stencil)
364+
add_executable(
365+
jacobi_stencil
366+
${TF_BENCHMARK_DIR}/jacobi_stencil/main.cpp
367+
${TF_BENCHMARK_DIR}/jacobi_stencil/omp.cpp
368+
${TF_BENCHMARK_DIR}/jacobi_stencil/tbb.cpp
369+
${TF_BENCHMARK_DIR}/jacobi_stencil/seq.cpp
370+
${TF_BENCHMARK_DIR}/jacobi_stencil/taskflow.cpp
371+
)
372+
target_link_libraries(
373+
jacobi_stencil
374+
${PROJECT_NAME} Threads::Threads ${TBB_IMPORTED_TARGETS}
375+
${OpenMP_CXX_LIBRARIES} stdc++fs
376+
)
377+
360378

361379
endif()
362380

benchmark/parallel_dnn/dnn.hpp

Lines changed: 2 additions & 175 deletions
Original file line numberDiff line numberDiff line change
@@ -170,172 +170,6 @@ inline void deactivate(Eigen::MatrixXf& mat, Activation act) {
170170
}
171171

172172

173-
174-
struct MNIST {
175-
176-
// Ctor
177-
MNIST() {
178-
std::string path = std::experimental::filesystem::current_path();
179-
//path = path.substr(0, path.rfind("cpp-taskflow") + 12);
180-
//path += "/benchmark/mnist/";
181-
182-
images = read_mnist_image("./train-images.data");
183-
labels = read_mnist_label("./train-labels.data");
184-
185-
test_images = read_mnist_image("./t10k-images-idx3-ubyte");
186-
test_labels = read_mnist_label("./t10k-labels-idx1-ubyte");
187-
}
188-
189-
void add_layer(size_t in_degree, size_t out_degree, Activation act) {
190-
acts.emplace_back(act);
191-
Ys.emplace_back().resize(batch_size, out_degree);
192-
Ws.push_back(Eigen::MatrixXf::Random(in_degree, out_degree));
193-
Bs.push_back(Eigen::MatrixXf::Random(1, out_degree));
194-
195-
dW.emplace_back().resize(in_degree, out_degree);
196-
dB.emplace_back().resize(1, out_degree);
197-
}
198-
199-
void forward(size_t layer, const Eigen::MatrixXf& mat) {
200-
Ys[layer] = mat * Ws[layer] + Bs[layer].replicate(mat.rows(), 1);
201-
activate(Ys[layer], acts[layer]);
202-
}
203-
204-
void loss(const Eigen::VectorXi& labels) {
205-
delta = Ys.back();
206-
delta = (delta - delta.rowwise().maxCoeff().replicate(1, delta.cols())).array().exp().matrix();
207-
delta = delta.cwiseQuotient(delta.rowwise().sum().replicate(1, delta.cols()));
208-
for(size_t i=beg_row, j=0; j<batch_size; i++, j++) {
209-
delta(j, labels[i]) -= 1.0;
210-
}
211-
}
212-
213-
void backward(size_t layer, const Eigen::MatrixXf& Xin) {
214-
deactivate(Ys[layer], acts[layer]);
215-
delta = delta.cwiseProduct(Ys[layer]);
216-
//std::cout << Xin.rows() << "/" << Xin.cols() << " = " << delta.rows() << "/" << delta.cols() << std::endl;
217-
dB[layer] = delta.colwise().sum();
218-
dW[layer] = Xin.transpose() * delta;
219-
//dW[layer] = Xin * delta;
220-
221-
if(layer > 0) {
222-
delta = delta * Ws[layer].transpose();
223-
}
224-
}
225-
226-
void update(size_t layer) {
227-
Ws[layer] -= lrate*(dW[layer] + decay*Ws[layer]);
228-
Bs[layer] -= lrate*(dB[layer] + decay*Bs[layer]);
229-
}
230-
231-
void shuffle(Eigen::MatrixXf& mat, Eigen::VectorXi& vec, const size_t row_num) {
232-
233-
static thread_local std::mt19937 gen(0);
234-
235-
Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic> p(row_num);
236-
p.setIdentity();
237-
std::shuffle(p.indices().data(), p.indices().data() + p.indices().size(), gen);
238-
239-
mat = p * mat;
240-
vec = p * vec;
241-
}
242-
243-
void validate() {
244-
Eigen::MatrixXf res = test_images;
245-
auto t1 = std::chrono::high_resolution_clock::now();
246-
for(size_t i=0; i<acts.size(); i++) {
247-
res = res * Ws[i] + Bs[i].replicate(res.rows(), 1);
248-
if(acts[i] == Activation::RELU) {
249-
relu(res);
250-
}
251-
else if(acts[i] == Activation::SIGMOID) {
252-
sigmoid(res);
253-
}
254-
}
255-
auto t2 = std::chrono::high_resolution_clock::now();
256-
std::cout << "Infer runtime: " << time_diff(t1, t2) << " ms\n";
257-
258-
size_t correct_num {0};
259-
for(int k=0; k<res.rows(); k++) {
260-
int pred ;
261-
res.row(k).maxCoeff(&pred);
262-
if(pred == test_labels[k]) {
263-
correct_num ++;
264-
}
265-
}
266-
std::cout << "Accuracy: " << correct_num << '/' << res.rows() << '\n';
267-
}
268-
269-
270-
// Parameter functions ------------------------------------------------------
271-
auto& epoch_num(unsigned e) {
272-
epoch = e;
273-
return *this;
274-
}
275-
auto& batch(size_t b) {
276-
batch_size = b;
277-
assert(images.rows()%batch_size == 0);
278-
return *this;
279-
}
280-
auto& learning_rate(float l) {
281-
lrate = l;
282-
return *this;
283-
}
284-
285-
std::vector<Eigen::MatrixXf> Ys;
286-
std::vector<Eigen::MatrixXf> Ws;
287-
std::vector<Eigen::MatrixXf> Bs;
288-
std::vector<Eigen::MatrixXf> dW;
289-
std::vector<Eigen::MatrixXf> dB;
290-
291-
std::vector<Activation> acts;
292-
293-
// Training images # = 60000 x 784 (28 x 28)
294-
Eigen::MatrixXf images;
295-
Eigen::VectorXi labels;
296-
Eigen::MatrixXf delta;
297-
298-
// Testing images # = 10000 x 784 (28 x 28)
299-
Eigen::MatrixXf test_images;
300-
Eigen::VectorXi test_labels;
301-
302-
int beg_row {0};
303-
304-
float lrate {0.01f};
305-
float decay {0.01f};
306-
307-
unsigned epoch {0};
308-
size_t batch_size {1};
309-
};
310-
311-
inline auto build_dnn(unsigned epoch) {
312-
MNIST dnn;
313-
dnn.epoch_num(epoch).batch(60).learning_rate(0.001);
314-
315-
//dnn.add_layer(784, 64, Activation::RELU);
316-
//dnn.add_layer(64, 32, Activation::RELU);
317-
//dnn.add_layer(32, 10, Activation::NONE);
318-
319-
//dnn.add_layer(784, 64, Activation::RELU);
320-
//dnn.add_layer(64, 32, Activation::RELU);
321-
//dnn.add_layer(32, 16, Activation::RELU);
322-
//dnn.add_layer(16, 8, Activation::RELU);
323-
//dnn.add_layer(8, 10, Activation::NONE);
324-
325-
//dnn.add_layer(784, 256, Activation::RELU);
326-
//dnn.add_layer(256, 128, Activation::RELU);
327-
//dnn.add_layer(128, 64, Activation::RELU);
328-
//dnn.add_layer(64, 32, Activation::RELU);
329-
//dnn.add_layer(32, 10, Activation::NONE);
330-
331-
dnn.add_layer(784, 100, Activation::RELU);
332-
dnn.add_layer(100, 30, Activation::RELU);
333-
dnn.add_layer(30, 10, Activation::NONE);
334-
335-
return dnn;
336-
}
337-
338-
339173
// ----------------------------------------------------------------------------
340174

341175
struct MNIST_DNN {
@@ -406,7 +240,7 @@ struct MNIST_DNN {
406240
correct_num ++;
407241
}
408242
}
409-
std::cout << "Accuracy: " << correct_num << '/' << res.rows() << '\n';
243+
//std::cout << "Accuracy: " << correct_num << '/' << res.rows() << '\n';
410244
}
411245

412246
// Parameter functions ------------------------------------------------------
@@ -516,7 +350,6 @@ inline void backward_task(MNIST_DNN& D, size_t i, Eigen::MatrixXf& mat) {
516350
if(D.beg_row >= IMAGES.rows()) {
517351
D.beg_row = 0;
518352
}
519-
520353
}
521354
}
522355

@@ -544,11 +377,5 @@ inline void report_runtime(std::chrono::time_point<std::chrono::high_resolution_
544377
void run_tbb(unsigned, unsigned);
545378
void run_taskflow(unsigned, unsigned);
546379
void run_omp(unsigned, unsigned);
547-
548-
//void run_omp(MNIST&, unsigned);
549-
void run_sequential(MNIST&, unsigned);
550-
void run_sequential(MNIST_DNN&, unsigned);
551-
552-
//void run_sequential2(MNIST_DNN&, unsigned);
553-
void run_sequential2(unsigned, unsigned);
380+
void run_sequential(unsigned, unsigned);
554381

benchmark/parallel_dnn/main.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ std::chrono::milliseconds measure_time_taskflow(
88
unsigned num_threads
99
) {
1010
std::puts("Taskflow");
11-
//auto dnn {build_dnn(num_epochs)};
1211
auto t1 = std::chrono::high_resolution_clock::now();
1312
run_taskflow(num_epochs, num_threads);
1413
auto t2 = std::chrono::high_resolution_clock::now();
@@ -33,10 +32,7 @@ std::chrono::milliseconds measure_time_tbb(
3332
unsigned num_threads
3433
) {
3534
std::puts("TBB");
36-
//auto dnn {build_dnn(num_epochs)};
3735
auto t1 = std::chrono::high_resolution_clock::now();
38-
//run_tbb(dnn, num_threads);
39-
//run_tbb(num_epochs, num_threads);
4036
run_tbb(num_epochs, num_threads);
4137
auto t2 = std::chrono::high_resolution_clock::now();
4238
return std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
@@ -88,10 +84,9 @@ int main(int argc, char *argv[]){
8884
double tf_time {0.0};
8985

9086
for(int j=0; j<rounds; ++j) {
91-
//omp_time += measure_time_omp(epoch, num_threads).count();
92-
//tbb_time += measure_time_tbb(epoch, num_threads).count();
93-
tf_time += measure_time_taskflow(epoch, num_threads).count();
94-
exit(1);
87+
omp_time += measure_time_omp(epoch, num_threads).count();
88+
tbb_time += measure_time_tbb(epoch, num_threads).count();
89+
tf_time += measure_time_taskflow(epoch, num_threads).count();
9590
}
9691

9792
std::cout << std::setw(12) << epoch

benchmark/parallel_dnn/omp.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ void omp_dnn(MNIST_DNN& D, unsigned num_iteration) {
1515
#pragma omp single
1616
{
1717
for(auto i=0u; i<num_iteration; i++) {
18-
//for(auto i=0u; i<2; i++) {
1918
// Forward Task
2019
if(i == 0) {
2120
#pragma omp task depend (out: dep_f[i]) shared(D, IMAGES, LABELS)
@@ -82,6 +81,7 @@ void omp_dnn(MNIST_DNN& D, unsigned num_iteration) {
8281
}
8382
} // End of omp parallel
8483

84+
8585
delete [] dep_f;
8686
delete [] dep_b;
8787
delete [] dep_u;
@@ -94,15 +94,14 @@ void run_omp(unsigned num_epochs, unsigned num_threads) {
9494
init_dnn(dnns[i]);
9595
}
9696

97-
//omp_set_num_threads(num_threads);
98-
omp_set_num_threads(4);
97+
omp_set_num_threads(num_threads);
9998

100-
auto t1 = std::chrono::high_resolution_clock::now();
99+
//auto t1 = std::chrono::high_resolution_clock::now();
101100
#pragma omp parallel
102101
{
103102
#pragma omp single
104103
{
105-
for(auto i=0u; i<100; i++) {
104+
for(auto i=0u; i<num_epochs; i++) {
106105
for(auto j=0u; j<NUM_DNNS; j++) {
107106
#pragma omp task firstprivate(j) shared(dnns)
108107
{
@@ -112,11 +111,11 @@ void run_omp(unsigned num_epochs, unsigned num_threads) {
112111
#pragma omp taskwait
113112

114113
for(auto j=0u; j<NUM_DNNS; j++) {
115-
std::cout << "Validate " << j << "th NN: ";
114+
//std::cout << "Validate " << j << "th NN: ";
116115
dnns[j].validate(TEST_IMAGES, TEST_LABELS);
117116
}
118117
shuffle(IMAGES, LABELS);
119-
report_runtime(t1);
118+
//report_runtime(t1);
120119
}
121120
}
122121
}
@@ -126,8 +125,6 @@ void run_omp(unsigned num_epochs, unsigned num_threads) {
126125

127126

128127

129-
void run_omp(MNIST& D, unsigned num_threads) {
130-
}
131128

132129
/*
133130
void run_omp(MNIST& D, unsigned num_threads) {

0 commit comments

Comments
 (0)