1- #include < cstdio>
21#include < iostream>
3- #include < string>
4- #include < array>
5- #include < vector>
62#include < random>
7- #include < algorithm>
8- #include < iterator>
9- #include < tuple>
3+ #include < thread>
104#include < chrono>
115#include < cmath>
12-
6+ # include < cassert >
137#include < omp.h>
148
15- int M= 40000 , N= 40000 ;
9+ int M = 40000 , N = 40000 ;
1610int B = 160 ;
1711int MB = (M/B) + (M%B>0 );
1812int NB = (N/B) + (N%B>0 );
1913
20- int **D; // dependency matrix
14+ int **D {nullptr };
15+ double **matrix {nullptr };
2116
22- double **value;
17+ // initialize the matrix
18+ void init_matrix (){
19+ matrix = new double *[M];
20+ for (int i = 0 ; i < M; ++i ) {
21+ matrix[i] = new double [N];
22+ }
23+ for (int i=0 ; i<M; ++i){
24+ for (int j=0 ; j<N ; ++j){
25+ matrix[i][j] = i*N + j;
26+ }
27+ }
28+ }
2329
24- inline double calc (double v0, double v1) {
25- if (v0 == v1)
26- return std::pow (v0/v1, 4 .0f );
27- else
28- return std::max (v0,v1);
30+ // destroy the matrix
31+ void destroy_matrix () {
32+ for ( int i = 0 ; i < M; ++i ) {
33+ delete [] matrix[i];
34+ }
35+ delete [] matrix;
36+ }
37+
38+ // perform some nominal computations
39+ double calc (double v0, double v1) {
40+ return (v0 == v1) ? std::pow (v0/v1, 4 .0f ) : std::max (v0,v1);
2941}
3042
3143// computation given block row index i, block col index j
32- inline void block_computation (int i, int j){
44+ void block_computation (int i, int j){
3345
3446 int start_i = i*B;
3547 int end_i = (i*B+B > M) ? M : i*B+B;
@@ -38,24 +50,33 @@ inline void block_computation(int i, int j){
3850
3951 for ( int ii = start_i; ii < end_i; ++ii ) {
4052 for ( int jj = start_j; jj < end_j; ++jj ) {
41- double v0 = ii == 0 ? 0 : value [ii-1 ][jj];
42- double v1 = jj == 0 ? 0 : value [ii][jj-1 ];
43- value [ii][jj] = ii==0 && jj==0 ? 1 : calc (v0,v1);
53+ double v0 = ii == 0 ? 0 : matrix [ii-1 ][jj];
54+ double v1 = jj == 0 ? 0 : matrix [ii][jj-1 ];
55+ matrix [ii][jj] = ii==0 && jj==0 ? 1 : calc (v0,v1);
4456 }
4557 }
4658
4759}
4860
49-
50- void RunGraph () {
61+ // wavefront computation
62+ void wavefront () {
5163
52- omp_set_num_threads (4 );
64+ // set up the dependency matrix
65+ D = new int *[MB];
66+ for (int i=0 ; i<MB; ++i) D[i] = new int [NB];
67+ for (int i=0 ; i<MB; ++i){
68+ for (int j=0 ; j<NB; ++j){
69+ D[i][j] = 0 ;
70+ }
71+ }
72+
73+ omp_set_num_threads (std::thread::hardware_concurrency ());
5374
5475 #pragma omp parallel
5576 {
5677 #pragma omp single
5778 {
58- value [M-1 ][N-1 ] = 0 ;
79+ matrix [M-1 ][N-1 ] = 0 ;
5980 for ( int k=1 ; k <= 2 *MB-1 ; k++) {
6081 int i, j;
6182 if (k <= MB){
@@ -95,50 +116,32 @@ void RunGraph() {
95116 block_computation (i, j);
96117 }
97118 else {
98- std::cout << " There is some case not covered!!! " << std::endl ;
119+ assert ( false ) ;
99120 }
100-
101-
102121 }
103-
104- // #pragma omp taskwait
105-
106122 }
107123 }
108124 }
125+
126+ for ( int i = 0 ; i < MB; ++i ) delete [] D[i];
127+ delete [] D;
109128}
110129
130+ // main function
111131int main (int argc, char *argv[]) {
112132
113- D = new int *[MB];
114- for (int i=0 ; i<MB; ++i) D[i] = new int [NB];
115- for (int i=0 ; i<MB; ++i){
116- for (int j=0 ; j<NB; ++j){
117- D[i][j] = 0 ;
118- }
119- }
120-
121- value = new double *[M];
122- for (int i = 0 ; i<M; ++i) value[i] = new double [N];
123- for (int i=0 ; i<M; ++i){
124- for (int j=0 ; j<N ; ++j){
125- value[i][j] = i*N + j;
126- }
127- }
133+ init_matrix ();
128134
129135 auto beg = std::chrono::high_resolution_clock::now ();
130-
131- RunGraph ();
132-
136+ wavefront ();
133137 auto end = std::chrono::high_resolution_clock::now ();
134- std::cout << " OMP: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count () << " ms, " ;
135- std::cout << " The result is:" << value[M-1 ][N-1 ] << std::endl;
136138
137- for ( int i = 0 ; i < M; ++i ) delete [] value[i];
138- delete [] value;
139+ std::cout << " OpenMP wavefront elapsed time: "
140+ << std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count ()
141+ << " ms\n "
142+ << " result: " << matrix[M-1 ][N-1 ] << std::endl;
139143
140- for ( int i = 0 ; i < MB; ++i ) delete [] D[i];
141- delete [] D;
144+ destroy_matrix ();
142145
143146 return 0 ;
144147}
0 commit comments