Skip to content

Commit 4809121

Browse files
updated wavefront cookbook
1 parent fd97fcb commit 4809121

File tree

6 files changed

+400
-312
lines changed

6 files changed

+400
-312
lines changed

doc/app/wavefront/Makefile

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
tbb_include=/home/twhuang/PhD/Code/tbb/include/
2+
tbb_library=/home/twhuang/PhD/Code/tbb/build/linux_intel64_gcc_cc7_libc2.27_kernel4.15.0_release/
3+
taskflow_include=/home/twhuang/PhD/Code/cpp-taskflow/
4+
5+
default:
6+
g++ seq.cpp -O2 -o seq
7+
g++ omp.cpp -O2 -fopenmp -o omp
8+
g++ taskflow.cpp -O2 -I $(taskflow_include) -std=c++1z -lpthread -o taskflow
9+
g++ -std=c++17 -O2 tbb.cpp -o tbb -I $(tbb_include) -L $(tbb_library) -ltbb -lpthread -lrt
10+
11+
clean:
12+
rm omp seq taskflow tbb

doc/app/wavefront/omp.cpp

Lines changed: 57 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,47 @@
1-
#include <cstdio>
21
#include <iostream>
3-
#include <string>
4-
#include <array>
5-
#include <vector>
62
#include <random>
7-
#include <algorithm>
8-
#include <iterator>
9-
#include <tuple>
3+
#include <thread>
104
#include <chrono>
115
#include <cmath>
12-
6+
#include <cassert>
137
#include <omp.h>
148

15-
int M= 40000, N=40000;
9+
int M = 40000, N = 40000;
1610
int B = 160;
1711
int MB = (M/B) + (M%B>0);
1812
int NB = (N/B) + (N%B>0);
1913

20-
int **D; //dependency matrix
14+
int **D {nullptr};
15+
double **matrix {nullptr};
2116

22-
double **value;
17+
// initialize the matrix
18+
void init_matrix(){
19+
matrix = new double *[M];
20+
for (int i = 0; i < M; ++i ) {
21+
matrix[i] = new double [N];
22+
}
23+
for (int i=0; i<M; ++i){
24+
for(int j=0; j<N ; ++j){
25+
matrix[i][j] = i*N + j;
26+
}
27+
}
28+
}
2329

24-
inline double calc(double v0, double v1) {
25-
if(v0 == v1)
26-
return std::pow(v0/v1, 4.0f);
27-
else
28-
return std::max(v0,v1);
30+
// destroy the matrix
31+
void destroy_matrix() {
32+
for ( int i = 0; i < M; ++i ) {
33+
delete [] matrix[i];
34+
}
35+
delete [] matrix;
36+
}
37+
38+
// perform some nominal computations
39+
double calc(double v0, double v1) {
40+
return (v0 == v1) ? std::pow(v0/v1, 4.0f) : std::max(v0,v1);
2941
}
3042

3143
//computation given block row index i, block col index j
32-
inline void block_computation(int i, int j){
44+
void block_computation(int i, int j){
3345

3446
int start_i = i*B;
3547
int end_i = (i*B+B > M) ? M : i*B+B;
@@ -38,24 +50,33 @@ inline void block_computation(int i, int j){
3850

3951
for ( int ii = start_i; ii < end_i; ++ii ) {
4052
for ( int jj = start_j; jj < end_j; ++jj ) {
41-
double v0 = ii == 0 ? 0 : value[ii-1][jj];
42-
double v1 = jj == 0 ? 0 : value[ii][jj-1];
43-
value[ii][jj] = ii==0 && jj==0 ? 1 : calc(v0,v1);
53+
double v0 = ii == 0 ? 0 : matrix[ii-1][jj];
54+
double v1 = jj == 0 ? 0 : matrix[ii][jj-1];
55+
matrix[ii][jj] = ii==0 && jj==0 ? 1 : calc(v0,v1);
4456
}
4557
}
4658

4759
}
4860

49-
50-
void RunGraph() {
61+
// wavefront computation
62+
void wavefront() {
5163

52-
omp_set_num_threads(4);
64+
// set up the dependency matrix
65+
D = new int *[MB];
66+
for(int i=0; i<MB; ++i) D[i] = new int [NB];
67+
for(int i=0; i<MB; ++i){
68+
for(int j=0; j<NB; ++j){
69+
D[i][j] = 0;
70+
}
71+
}
72+
73+
omp_set_num_threads(std::thread::hardware_concurrency());
5374

5475
#pragma omp parallel
5576
{
5677
#pragma omp single
5778
{
58-
value[M-1][N-1] = 0;
79+
matrix[M-1][N-1] = 0;
5980
for( int k=1; k <= 2*MB-1; k++) {
6081
int i, j;
6182
if(k <= MB){
@@ -95,50 +116,32 @@ void RunGraph() {
95116
block_computation(i, j);
96117
}
97118
else{
98-
std::cout << "There is some case not covered!!!" << std::endl;
119+
assert(false);
99120
}
100-
101-
102121
}
103-
104-
// #pragma omp taskwait
105-
106122
}
107123
}
108124
}
125+
126+
for ( int i = 0; i < MB; ++i ) delete [] D[i];
127+
delete [] D;
109128
}
110129

130+
// main function
111131
int main(int argc, char *argv[]) {
112132

113-
D = new int *[MB];
114-
for(int i=0; i<MB; ++i) D[i] = new int [NB];
115-
for(int i=0; i<MB; ++i){
116-
for(int j=0; j<NB; ++j){
117-
D[i][j] = 0;
118-
}
119-
}
120-
121-
value = new double *[M];
122-
for(int i = 0; i<M; ++i) value[i] = new double [N];
123-
for(int i=0; i<M; ++i){
124-
for(int j=0; j<N ; ++j){
125-
value[i][j] = i*N + j;
126-
}
127-
}
133+
init_matrix();
128134

129135
auto beg = std::chrono::high_resolution_clock::now();
130-
131-
RunGraph();
132-
136+
wavefront();
133137
auto end = std::chrono::high_resolution_clock::now();
134-
std::cout << "OMP: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count() << " ms, ";
135-
std::cout << "The result is:" << value[M-1][N-1] << std::endl;
136138

137-
for ( int i = 0; i < M; ++i ) delete [] value[i];
138-
delete [] value;
139+
std::cout << "OpenMP wavefront elapsed time: "
140+
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count()
141+
<< " ms\n"
142+
<< "result: " << matrix[M-1][N-1] << std::endl;
139143

140-
for ( int i = 0; i < MB; ++i ) delete [] D[i];
141-
delete [] D;
144+
destroy_matrix();
142145

143146
return 0;
144147
}

doc/app/wavefront/seq.cpp

Lines changed: 55 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,83 @@
1-
#include <algorithm> // for std::max
1+
#include <algorithm>
22
#include <cstdio>
33
#include <chrono>
44
#include <iostream>
55
#include <thread>
66
#include <random>
77
#include <cmath>
88

9-
int M=40000, N=40000;
9+
int M = 40000, N = 40000;
1010
int B = 160;
1111
int MB = (M/B) + (M%B>0);
1212
int NB = (N/B) + (N%B>0);
1313

14-
double **value;
14+
double **matrix {nullptr};
1515

16-
inline double calc(double v0, double v1) {
17-
if(v0 == v1){
18-
return std::pow(v0/v1, 4.0f);
19-
}
20-
else
21-
return std::max(v0,v1);
16+
// perform some nominal computations.
17+
double calc(double v0, double v1) {
18+
return (v0 == v1) ? std::pow(v0/v1, 4.0f) : std::max(v0,v1);
2219
}
2320

24-
void init_data(){
25-
value = new double *[M];
26-
for ( int i = 0; i < M; ++i ) value[i] = new double [N];
27-
for(int i=0; i<M; ++i){
21+
// initialize the matrix
22+
void init_matrix(){
23+
matrix = new double *[M];
24+
for (int i = 0; i < M; ++i ) {
25+
matrix[i] = new double [N];
26+
}
27+
for (int i=0; i<M; ++i){
2828
for(int j=0; j<N ; ++j){
29-
value[i][j] = i*N + j;
29+
matrix[i][j] = i*N + j;
3030
}
3131
}
3232
}
3333

34-
int main(int argc, char *argv[]) {
34+
// destroy the matrix
35+
void destroy_matrix() {
36+
for ( int i = 0; i < M; ++i ) {
37+
delete [] matrix[i];
38+
}
39+
delete [] matrix;
40+
}
3541

36-
init_data();
42+
// the computation inside each block
43+
void block_computation(int i, int j){
3744

38-
auto beg = std::chrono::high_resolution_clock::now();
39-
{
40-
for( int i=0; i<MB; ++i){
41-
for( int j=0; j<NB; ++j) {
42-
int start_i = i*B;
43-
int end_i = (i*B+B > M) ? M : i*B+B;
44-
int start_j = j*B;
45-
int end_j = (j*B+B > N) ? N : j*B+B;
46-
for ( int ii = start_i; ii < end_i; ++ii ) {
47-
for ( int jj = start_j; jj < end_j; ++jj ) {
48-
double v0 = (ii == 0) ? 0 : value[ii-1][jj];
49-
double v1 = (jj == 0) ? 0 : value[ii][jj-1];
50-
value[ii][jj] = (ii==0 && jj==0) ? 1 : calc(v0,v1);
51-
}
52-
}
53-
}
45+
int start_i = i*B;
46+
int end_i = (i*B+B > M) ? M : i*B+B;
47+
int start_j = j*B;
48+
int end_j = (j*B+B > N) ? N : j*B+B;
49+
50+
for ( int ii = start_i; ii < end_i; ++ii ) {
51+
for ( int jj = start_j; jj < end_j; ++jj ) {
52+
double v0 = ii == 0 ? 0 : matrix[ii-1][jj];
53+
double v1 = jj == 0 ? 0 : matrix[ii][jj-1];
54+
matrix[ii][jj] = ii==0 && jj==0 ? 1 : calc(v0,v1);
5455
}
5556
}
57+
}
58+
59+
// wavefront computation
60+
void wavefront() {
61+
for( int i=0; i<MB; ++i){
62+
for( int j=0; j<NB; ++j) {
63+
block_computation(i, j);
64+
}
65+
}
66+
}
67+
68+
// main function
69+
int main(int argc, char *argv[]) {
70+
71+
init_matrix();
72+
73+
auto beg = std::chrono::high_resolution_clock::now();
74+
wavefront();
5675
auto end = std::chrono::high_resolution_clock::now();
57-
std::cout << "Seq: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count() << " ms, Result = "
58-
<< value[M-1][N-1] << '\n';
76+
std::cout << "sequential program elapsed time: "
77+
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count()
78+
<< " ms\n"
79+
<< "result: " << matrix[M-1][N-1] << '\n';
5980

60-
for ( int i = 0; i < M; ++i ) delete [] value[i];
61-
delete [] value;
6281
return 0;
6382
}
6483

0 commit comments

Comments
 (0)