Skip to content

Commit 1dadd91

Browse files
updated doc/app/wavefront
1 parent bedb18a commit 1dadd91

8 files changed

Lines changed: 481 additions & 47 deletions

File tree

doc/app/wavefront/.seq.cpp.swp

12 KB
Binary file not shown.

doc/app/wavefront/omp.cpp

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#include <cstdio>
2+
#include <iostream>
3+
#include <string>
4+
#include <array>
5+
#include <vector>
6+
#include <random>
7+
#include <algorithm>
8+
#include <iterator>
9+
#include <tuple>
10+
#include <chrono>
11+
#include <cmath>
12+
13+
#include <omp.h>
14+
15+
int M= 40000, N=40000;
16+
int B = 160;
17+
int MB = (M/B) + (M%B>0);
18+
int NB = (N/B) + (N%B>0);
19+
20+
int **D; //dependency matrix
21+
22+
double **value;
23+
24+
inline double calc(double v0, double v1) {
25+
if(v0 == v1)
26+
return std::pow(v0/v1, 4.0f);
27+
else
28+
return std::max(v0,v1);
29+
}
30+
31+
//computation given block row index i, block col index j
32+
inline void block_computation(int i, int j){
33+
34+
int start_i = i*B;
35+
int end_i = (i*B+B > M) ? M : i*B+B;
36+
int start_j = j*B;
37+
int end_j = (j*B+B > N) ? N : j*B+B;
38+
39+
for ( int ii = start_i; ii < end_i; ++ii ) {
40+
for ( int jj = start_j; jj < end_j; ++jj ) {
41+
double v0 = ii == 0 ? 0 : value[ii-1][jj];
42+
double v1 = jj == 0 ? 0 : value[ii][jj-1];
43+
value[ii][jj] = ii==0 && jj==0 ? 1 : calc(v0,v1);
44+
}
45+
}
46+
47+
}
48+
49+
50+
void RunGraph() {
51+
52+
omp_set_num_threads(4);
53+
54+
#pragma omp parallel
55+
{
56+
#pragma omp single
57+
{
58+
value[M-1][N-1] = 0;
59+
for( int k=1; k <= 2*MB-1; k++) {
60+
int i, j;
61+
if(k <= MB){
62+
i = k-1;
63+
j = 0;
64+
}
65+
else{
66+
//assume matrix is square
67+
i = MB-1;
68+
j = k-MB;
69+
}
70+
71+
for(; (k <= MB && i>=0) || (k > MB && j <= NB-1) ; i--, j++){
72+
73+
if(i > 0 && j > 0){
74+
#pragma omp task depend(in:D[i-1][j], D[i][j-1]) depend(out:D[i][j]) firstprivate(i, j)
75+
block_computation(i, j);
76+
}
77+
//top left corner
78+
else if(i == 0 && j == 0){
79+
#pragma omp task depend(out:D[i][j]) firstprivate(i, j)
80+
block_computation(i, j);
81+
}
82+
//top edge
83+
else if(j+1 <= NB && i == 0 && j > 0){
84+
#pragma omp task depend(in:D[i][j-1]) depend(out:D[i][j]) firstprivate(i, j)
85+
block_computation(i, j);
86+
}
87+
//left edge
88+
else if(i+1 <= MB && i > 0 && j == 0){
89+
#pragma omp task depend(in:D[i-1][j]) depend(out:D[i][j]) firstprivate(i, j)
90+
block_computation(i, j);
91+
}
92+
//bottom right corner
93+
else if(i == MB-1 && j == NB-1){
94+
#pragma omp task depend(in:D[i-1][j] ,D[i][j-1]) firstprivate(i, j)
95+
block_computation(i, j);
96+
}
97+
else{
98+
std::cout << "There is some case not covered!!!" << std::endl;
99+
}
100+
101+
102+
}
103+
104+
// #pragma omp taskwait
105+
106+
}
107+
}
108+
}
109+
}
110+
111+
int main(int argc, char *argv[]) {
112+
113+
D = new int *[MB];
114+
for(int i=0; i<MB; ++i) D[i] = new int [NB];
115+
for(int i=0; i<MB; ++i){
116+
for(int j=0; j<NB; ++j){
117+
D[i][j] = 0;
118+
}
119+
}
120+
121+
value = new double *[M];
122+
for(int i = 0; i<M; ++i) value[i] = new double [N];
123+
for(int i=0; i<M; ++i){
124+
for(int j=0; j<N ; ++j){
125+
value[i][j] = i*N + j;
126+
}
127+
}
128+
129+
auto beg = std::chrono::high_resolution_clock::now();
130+
131+
RunGraph();
132+
133+
auto end = std::chrono::high_resolution_clock::now();
134+
std::cout << "OMP: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count() << " ms, ";
135+
std::cout << "The result is:" << value[M-1][N-1] << std::endl;
136+
137+
for ( int i = 0; i < M; ++i ) delete [] value[i];
138+
delete [] value;
139+
140+
for ( int i = 0; i < MB; ++i ) delete [] D[i];
141+
delete [] D;
142+
143+
return 0;
144+
}
145+
146+
147+
148+

doc/app/wavefront/seq.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#include <algorithm> // for std::max
2+
#include <cstdio>
3+
#include <chrono>
4+
#include <iostream>
5+
#include <thread>
6+
#include <random>
7+
#include <cmath>
8+
9+
int M=40000, N=40000;
10+
int B = 160;
11+
int MB = (M/B) + (M%B>0);
12+
int NB = (N/B) + (N%B>0);
13+
14+
double **value;
15+
16+
inline double calc(double v0, double v1) {
17+
if(v0 == v1){
18+
return std::pow(v0/v1, 4.0f);
19+
}
20+
else
21+
return std::max(v0,v1);
22+
}
23+
24+
void init_data(){
25+
value = new double *[M];
26+
for ( int i = 0; i < M; ++i ) value[i] = new double [N];
27+
for(int i=0; i<M; ++i){
28+
for(int j=0; j<N ; ++j){
29+
value[i][j] = i*N + j;
30+
}
31+
}
32+
}
33+
34+
int main(int argc, char *argv[]) {
35+
36+
init_data();
37+
38+
auto beg = std::chrono::high_resolution_clock::now();
39+
{
40+
for( int i=0; i<MB; ++i){
41+
for( int j=0; j<NB; ++j) {
42+
int start_i = i*B;
43+
int end_i = (i*B+B > M) ? M : i*B+B;
44+
int start_j = j*B;
45+
int end_j = (j*B+B > N) ? N : j*B+B;
46+
for ( int ii = start_i; ii < end_i; ++ii ) {
47+
for ( int jj = start_j; jj < end_j; ++jj ) {
48+
double v0 = (ii == 0) ? 0 : value[ii-1][jj];
49+
double v1 = (jj == 0) ? 0 : value[ii][jj-1];
50+
value[ii][jj] = (ii==0 && jj==0) ? 1 : calc(v0,v1);
51+
}
52+
}
53+
}
54+
}
55+
}
56+
auto end = std::chrono::high_resolution_clock::now();
57+
std::cout << "Seq: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count() << " ms, Result = "
58+
<< value[M-1][N-1] << '\n';
59+
60+
//for( int i=0; i<M; ++i){
61+
// for( int j=0; j<N; ++j) {
62+
// std::cout << value[i][j] << ' ';
63+
// }
64+
// std::cout << '\n';
65+
//}
66+
67+
68+
for ( int i = 0; i < M; ++i ) delete [] value[i];
69+
delete [] value;
70+
return 0;
71+
}
72+

doc/app/wavefront/taskflow.cpp

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#include <algorithm> // for std::max
2+
#include <cstdio>
3+
#include <cmath>
4+
#include <taskflow/taskflow.hpp>
5+
6+
7+
int M=40000, N=40000;
8+
int B = 160;
9+
int MB = (M/B) + (M%B>0);
10+
int NB = (N/B) + (N%B>0);
11+
12+
double **value;
13+
14+
15+
inline double calc(double v0, double v1) {
16+
if(v0 == v1)
17+
return std::pow(v0/v1, 4.0f);
18+
else
19+
return std::max(v0,v1);
20+
}
21+
22+
23+
void BuildGraph(std::vector<std::vector<tf::Task>>& node) {
24+
value[M-1][N-1] = 0;
25+
for( int i=MB; --i>=0; ) {
26+
for( int j=NB; --j>=0; ) {
27+
node[i][j].work(
28+
[i=i, j=j]() {
29+
int start_i = i*B;
30+
int end_i = (i*B+B > M) ? M : i*B+B;
31+
int start_j = j*B;
32+
int end_j = (j*B+B > N) ? N : j*B+B;
33+
for ( int ii = start_i; ii < end_i; ++ii ) {
34+
for ( int jj = start_j; jj < end_j; ++jj ) {
35+
double v0 = ii == 0 ? 0 : value[ii-1][jj];
36+
double v1 = jj == 0 ? 0 : value[ii][jj-1];
37+
value[ii][jj] = ii==0 && jj==0 ? 1 : calc(v0,v1);
38+
}
39+
}
40+
}
41+
);
42+
43+
if(j+1 < NB) node[i][j].precede(node[i][j+1]);
44+
if(i+1 < MB) node[i][j].precede(node[i+1][j]);
45+
}
46+
}
47+
}
48+
49+
double EvaluateGraph(tf::Taskflow &tf) {
50+
tf.wait_for_all();
51+
return value[M-1][N-1];
52+
}
53+
54+
void CleanupGraph(std::vector<std::vector<tf::Task>>& node) {
55+
node.clear();
56+
}
57+
58+
void init_data(){
59+
value = new double *[M];
60+
for ( int i = 0; i < M; ++i ) value[i] = new double [N];
61+
for(int i=0; i<M; ++i){
62+
for(int j=0; j<N ; ++j){
63+
value[i][j] = i*N + j;
64+
}
65+
}
66+
}
67+
68+
69+
int main(int argc, char *argv[]) {
70+
init_data();
71+
72+
double result;
73+
auto beg = std::chrono::high_resolution_clock::now();
74+
{
75+
tf::Taskflow tf(std::thread::hardware_concurrency());
76+
std::vector<std::vector<tf::Task>> node(MB);
77+
for(auto &n : node){
78+
for(size_t i=0; i<NB; i++){
79+
n.emplace_back(tf.placeholder());
80+
}
81+
}
82+
BuildGraph(node);
83+
result = EvaluateGraph(tf);
84+
CleanupGraph(node);
85+
}
86+
auto end = std::chrono::high_resolution_clock::now();
87+
std::cout << "Taskflow: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count() << " ms, Result=" <<
88+
result << "\n";
89+
90+
91+
92+
93+
for ( int i = 0; i < M; ++i ) delete [] value[i];
94+
delete [] value;
95+
return 0;
96+
}
97+
98+
99+
100+

0 commit comments

Comments
 (0)