Skip to content

Commit 5bf7965

Browse files
Merge branch 'dev'
2 parents 6308dd1 + 8a660cf commit 5bf7965

File tree

15 files changed

+480
-29
lines changed

15 files changed

+480
-29
lines changed

docs/Cookbook.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
<a class="el" href="chapter3.html">C3: Dynamic Tasking</a> <br />
110110
<a class="el" href="chapter4.html">C4: Conditional Tasking</a> <br />
111111
<a class="el" href="chapter5.html">C5: Composable Tasking</a> <br />
112+
<a class="el" href="chapter6.html">C6: CPU-GPU Tasking</a> <br />
112113
</p>
113114
</div></div><!-- contents -->
114115
</div><!-- doc-content -->

docs/Cookbook.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,8 @@ var Cookbook =
3333
[ "C5: Composable Tasking", "chapter5.html", [
3434
[ "Composes a Taskflow", "chapter5.html#C5_ComposesATaskflow", null ],
3535
[ "Module Task", "chapter5.html#C5_ModuleTask", null ]
36+
] ],
37+
[ "C6: CPU-GPU Tasking", "chapter6.html", [
38+
[ "Create a cudaFlow", "chapter6.html#C6_Create_a_cudaFlow", null ]
3639
] ]
3740
];

docs/chapter6.html

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
<!-- HTML header for doxygen 1.8.13-->
2+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3+
<html xmlns="http://www.w3.org/1999/xhtml">
4+
<head>
5+
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
6+
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
7+
<meta name="generator" content="Doxygen 1.8.14"/>
8+
<meta name="viewport" content="width=device-width, initial-scale=1"/>
9+
<title>Cpp-Taskflow</title>
10+
<link href="tabs.css" rel="stylesheet" type="text/css"/>
11+
<link rel="icon" type="image/x-icon" href="favicon.ico" />
12+
<script type="text/javascript" src="jquery.js"></script>
13+
<script type="text/javascript" src="dynsections.js"></script>
14+
<link href="navtree.css" rel="stylesheet" type="text/css"/>
15+
<script type="text/javascript" src="resize.js"></script>
16+
<script type="text/javascript" src="navtreedata.js"></script>
17+
<script type="text/javascript" src="navtree.js"></script>
18+
<script type="text/javascript">
19+
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
20+
$(document).ready(initResizable);
21+
/* @license-end */</script>
22+
<link href="search/search.css" rel="stylesheet" type="text/css"/>
23+
<script type="text/javascript" src="search/searchdata.js"></script>
24+
<script type="text/javascript" src="search/search.js"></script>
25+
<script type="text/javascript">
26+
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
27+
$(document).ready(function() { init_search(); });
28+
/* @license-end */
29+
</script>
30+
<link href="doxygen.css" rel="stylesheet" type="text/css" />
31+
</head>
32+
<body>
33+
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
34+
<div id="titlearea">
35+
<table cellspacing="0" cellpadding="0">
36+
<tbody>
37+
<tr style="height: 56px;">
38+
<td id="projectalign" style="padding-left: 0.5em;">
39+
<div id="projectname"><a href="https://github.com/cpp-taskflow/cpp-taskflow">Cpp-Taskflow</a>
40+
&#160;<span id="projectnumber">2.3.0</span>
41+
</div>
42+
</td>
43+
<td> <div id="MSearchBox" class="MSearchBoxInactive">
44+
<span class="left">
45+
<img id="MSearchSelect" src="search/mag_sel.png"
46+
onmouseover="return searchBox.OnSearchSelectShow()"
47+
onmouseout="return searchBox.OnSearchSelectHide()"
48+
alt=""/>
49+
<input type="text" id="MSearchField" value="Search" accesskey="S"
50+
onfocus="searchBox.OnSearchFieldFocus(true)"
51+
onblur="searchBox.OnSearchFieldFocus(false)"
52+
onkeyup="searchBox.OnSearchFieldChange(event)"/>
53+
</span><span class="right">
54+
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
55+
</span>
56+
</div>
57+
</td>
58+
</tr>
59+
</tbody>
60+
</table>
61+
</div>
62+
<!-- end header part -->
63+
<!-- Generated by Doxygen 1.8.14 -->
64+
<script type="text/javascript">
65+
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
66+
var searchBox = new SearchBox("searchBox", "search",false,'Search');
67+
/* @license-end */
68+
</script>
69+
</div><!-- top -->
70+
<div id="side-nav" class="ui-resizable side-nav-resizable">
71+
<div id="nav-tree">
72+
<div id="nav-tree-contents">
73+
<div id="nav-sync" class="sync"></div>
74+
</div>
75+
</div>
76+
<div id="splitbar" style="-moz-user-select:none;"
77+
class="ui-resizable-handle">
78+
</div>
79+
</div>
80+
<script type="text/javascript">
81+
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
82+
$(document).ready(function(){initNavTree('chapter6.html','');});
83+
/* @license-end */
84+
</script>
85+
<div id="doc-content">
86+
<!-- window showing the filter options -->
87+
<div id="MSearchSelectWindow"
88+
onmouseover="return searchBox.OnSearchSelectShow()"
89+
onmouseout="return searchBox.OnSearchSelectHide()"
90+
onkeydown="return searchBox.OnSearchSelectKey(event)">
91+
</div>
92+
93+
<!-- iframe showing the search results (closed by default) -->
94+
<div id="MSearchResultsWindow">
95+
<iframe src="javascript:void(0)" frameborder="0"
96+
name="MSearchResults" id="MSearchResults">
97+
</iframe>
98+
</div>
99+
100+
<div class="header">
101+
<div class="headertitle">
102+
<div class="title">C6: CPU-GPU Tasking </div> </div>
103+
</div><!--header-->
104+
<div class="contents">
105+
<div class="textblock"><p>Modern scientific computing typically leverages GPU-powered parallel processing cores to speed up large-scale applications. This chapters discusses how to implement heterogeneous decomposition algorithms using CPU-GPU collaborative tasking.</p>
106+
<h1><a class="anchor" id="C6_Create_a_cudaFlow"></a>
107+
Create a cudaFlow</h1>
108+
<p>Cpp-Taskflow enables concurrent CPU-GPU tasking by leveraging <a href="https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__GRAPH.html">Nvidia CUDA Graph</a>. The tasking interface is referred to as <em>cudaFlow</em>. A <a class="el" href="classtf_1_1cudaFlow.html" title="Building methods of a cuda task dependency graph. ">tf::cudaFlow</a> is a graph object created at runtime similar to dynamic tasking. It manages a task node in a taskflow and associates it with a CUDA Graph. To create a cudaFlow, emplace a callable with an argument of type <a class="el" href="classtf_1_1cudaFlow.html" title="Building methods of a cuda task dependency graph. ">tf::cudaFlow</a>. The following example implements the canonical saxpy (A·X Plus Y) task graph.</p>
109+
<div class="fragment"><div class="line"> 1: #include &lt;taskflow/taskflow.hpp&gt;</div><div class="line"> 2: </div><div class="line"> 3: <span class="comment">// saxpy (single-precision A·X Plus Y) kernel</span></div><div class="line"> 4: __global__ <span class="keywordtype">void</span> saxpy(<span class="keywordtype">int</span> n, <span class="keywordtype">float</span> a, <span class="keywordtype">float</span> *x, <span class="keywordtype">float</span> *y) {</div><div class="line"> 5: <span class="keywordtype">int</span> i = blockIdx.x*blockDim.x + threadIdx.x;</div><div class="line"> 6: <span class="keywordflow">if</span> (i &lt; n) {</div><div class="line"> 7: y[i] = a*x[i] + y[i];</div><div class="line"> 8: }</div><div class="line"> 9: }</div><div class="line">10:</div><div class="line">11: <span class="comment">// main function begins</span></div><div class="line">12: <span class="keywordtype">int</span> main() {</div><div class="line">13:</div><div class="line">14: <a class="code" href="classtf_1_1Taskflow.html">tf::Taskflow</a> taskflow;</div><div class="line">15: <a class="code" href="classtf_1_1Executor.html">tf::Executor</a> executor;</div><div class="line">16: </div><div class="line">17: <span class="keyword">const</span> <span class="keywordtype">unsigned</span> N = 1&lt;&lt;20; <span class="comment">// size of the vector</span></div><div class="line">18:</div><div class="line">19: <a class="codeRef" doxygen="/Users/twhuang/PhD/Code/cpp-taskflow/doxygen/cppreference-doxygen-web.tag.xml:http://en.cppreference.com/w/" href="http://en.cppreference.com/w/cpp/container/vector.html">std::vector&lt;float&gt;</a> hx(N, 1.0f); <span class="comment">// x vector at host</span></div><div class="line">20: <a class="codeRef" doxygen="/Users/twhuang/PhD/Code/cpp-taskflow/doxygen/cppreference-doxygen-web.tag.xml:http://en.cppreference.com/w/" href="http://en.cppreference.com/w/cpp/container/vector.html">std::vector&lt;float&gt;</a> hy(N, 2.0f); <span class="comment">// y vector at host</span></div><div class="line">21:</div><div class="line">22: <span class="keywordtype">float</span> *dx{<span class="keyword">nullptr</span>}; <span class="comment">// x vector at device</span></div><div class="line">23: <span class="keywordtype">float</span> *dy{<span class="keyword">nullptr</span>}; <span class="comment">// y vector at device</span></div><div class="line">24: </div><div class="line">25: <a class="code" href="classtf_1_1Task.html">tf::Task</a> allocate_x = taskflow.<a class="code" href="classtf_1_1FlowBuilder.html#a796e29175380f70246cf2a5639adc437">emplace</a>(</div><div class="line">26: [&amp;](){ cudaMalloc(&amp;dx, N*<span class="keyword">sizeof</span>(<span class="keywordtype">float</span>));}</div><div class="line">27: );</div><div class="line">28:</div><div class="line">29: <a class="code" href="classtf_1_1Task.html">tf::Task</a> allocate_y = taskflow.<a class="code" href="classtf_1_1FlowBuilder.html#a796e29175380f70246cf2a5639adc437">emplace</a>(</div><div class="line">30: [&amp;](){ cudaMalloc(&amp;dy, N*<span class="keyword">sizeof</span>(<span class="keywordtype">float</span>));}</div><div class="line">31: );</div><div class="line">32:</div><div class="line">33: <a class="code" href="classtf_1_1Task.html">tf::Task</a> cudaflow = taskflow.<a class="code" href="classtf_1_1FlowBuilder.html#a796e29175380f70246cf2a5639adc437">emplace</a>([&amp;](<a class="code" href="classtf_1_1cudaFlow.html">tf::cudaFlow</a>&amp; cf) {</div><div class="line">34: <span class="comment">// create data transfer tasks</span></div><div class="line">35: <a class="code" href="classtf_1_1cudaTask.html">tf::cudaTask</a> h2d_x = cf.<a class="code" href="classtf_1_1cudaFlow.html#a0f36d1b7eb0bc77deb83966b394bd1d7">copy</a>(dx, hx.data(), N); <span class="comment">// host-to-device x data transfer</span></div><div class="line">36: <a class="code" href="classtf_1_1cudaTask.html">tf::cudaTask</a> h2d_y = cf.<a class="code" href="classtf_1_1cudaFlow.html#a0f36d1b7eb0bc77deb83966b394bd1d7">copy</a>(dy, hy.data(), N); <span class="comment">// host-to-device y data transfer</span></div><div class="line">37: <a class="code" href="classtf_1_1cudaTask.html">tf::cudaTask</a> d2h_x = cf.<a class="code" href="classtf_1_1cudaFlow.html#a0f36d1b7eb0bc77deb83966b394bd1d7">copy</a>(hx.data(), dx, N); <span class="comment">// device-to-host x data transfer</span></div><div class="line">38: <a class="code" href="classtf_1_1cudaTask.html">tf::cudaTask</a> d2h_y = cf.<a class="code" href="classtf_1_1cudaFlow.html#a0f36d1b7eb0bc77deb83966b394bd1d7">copy</a>(hy.data(), dy, N); <span class="comment">// device-to-host y data transfer</span></div><div class="line">39:</div><div class="line">40: <span class="comment">// launch saxpy&lt;&lt;&lt;(N+255)/256, 256, 0&gt;&gt;&gt;(N, 2.0f, dx, dy)</span></div><div class="line">41: <a class="code" href="classtf_1_1cudaTask.html">tf::cudaTask</a> kernel = cf.<a class="code" href="classtf_1_1cudaFlow.html#adb731be71bdd436dfb5e36e6213a9a17">kernel</a>((N+255)/256, 256, 0, saxpy, N, 2.0f, dx, dy);</div><div class="line">42:</div><div class="line">43: kernel.<a class="code" href="classtf_1_1cudaTask.html#a4a9ca1a34bac47e4c9b04eb4fb2f7775">succeed</a>(h2d_x, h2d_y)</div><div class="line">44: .<a class="code" href="classtf_1_1cudaTask.html#abdd68287ec4dff4216af34d1db44d1b4">precede</a>(d2h_x, d2h_y);</div><div class="line">45: });</div><div class="line">46: cudaflow.<a class="code" href="classtf_1_1Task.html#a331b1b726555072e7c7d10941257f664">succeed</a>(allocate_x, allocate_y); <span class="comment">// overlap data allocations</span></div><div class="line">47: </div><div class="line">48: executor.<a class="code" href="classtf_1_1Executor.html#a81f35d5b0a20ac0646447eb80d97c0aa">run</a>(taskflow).wait();</div><div class="line">49:</div><div class="line">50: taskflow.<a class="code" href="classtf_1_1Taskflow.html#ac433018262e44b12c4cc9f0c4748d758">dump</a>(<a class="codeRef" doxygen="/Users/twhuang/PhD/Code/cpp-taskflow/doxygen/cppreference-doxygen-web.tag.xml:http://en.cppreference.com/w/" href="http://en.cppreference.com/w/cpp/io/basic_ostream.html">std::cout</a>); <span class="comment">// dump the taskflow</span></div><div class="line">51: }</div></div><!-- fragment --><div class="image">
110+
<object type="image/svg+xml" data="saxpy.svg" width="50%">saxpy.svg</object>
111+
</div>
112+
<p>Debrief:</p>
113+
<ul>
114+
<li>Lines 3-9 define a saxpy kernel using CUDA </li>
115+
<li>Lines 19-20 declare two host vectors, <code>hx</code> and <code>hy</code> </li>
116+
<li>Lines 22-23 declare two device vector pointers, <code>dx</code> and <code>dy</code> </li>
117+
<li>Lines 25-31 declare two tasks to allocate memory for <code>dx</code> and <code>dy</code> on device, each of <code>N*sizeof</code>(float) bytes </li>
118+
<li>Lines 33-45 create a <a class="el" href="classtf_1_1cudaFlow.html" title="Building methods of a cuda task dependency graph. ">tf::cudaFlow</a> to capture kernel work in a graph (two host-to-device data transfer tasks, one saxpy kernel task, and two device-to-host data transfer tasks) </li>
119+
<li>Lines 46-48 define the task dependency between host tasks and the cudaFlow tasks and execute the taskflow</li>
120+
</ul>
121+
<p>Cpp-Taskflow does not expend unnecessary efforts on kernel programming but focus on tasking CUDA operations with CPU work. We give users full privileges to craft a CUDA kernel that is commensurate with their domain knowledge. Users focus on developing high-performance kernels using a native CUDA toolkit, while leaving difficult task parallelism to Cpp-Taskflow. </p>
122+
</div></div><!-- contents -->
123+
</div><!-- doc-content -->
124+
<!-- start footer part -->
125+
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
126+
<ul>
127+
<li class="footer">Generated by
128+
<a href="http://www.doxygen.org/index.html">
129+
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.14 </li>
130+
</ul>
131+
</div>
132+
</body>
133+
</html>

docs/navtreedata.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ var NAVTREE =
8181
var NAVTREEINDEX =
8282
[
8383
".html",
84-
"structis__static__task.html"
84+
"structis__cudaflow__task.html"
8585
];
8686

8787
var SYNCONMSG = 'click to disable panel synchronisation';

docs/navtreeindex0.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ var NAVTREEINDEX0 =
5151
"chapter5.html":[2,5],
5252
"chapter5.html#C5_ComposesATaskflow":[2,5,0],
5353
"chapter5.html#C5_ModuleTask":[2,5,1],
54+
"chapter6.html":[2,6],
55+
"chapter6.html#C6_Create_a_cudaFlow":[2,6,0],
5456
"classes.html":[5,1],
5557
"classtf_1_1Executor.html":[5,0,0,2],
5658
"classtf_1_1Executor.html#a10e0189c6403293c276b72bb1158557e":[5,0,0,2,0],
@@ -206,8 +208,8 @@ var NAVTREEINDEX0 =
206208
"functions.html":[5,3,0],
207209
"functions_func.html":[5,3,1],
208210
"hierarchy.html":[5,2],
209-
"index.html":[0],
210211
"index.html":[],
212+
"index.html":[0],
211213
"index.html#ASimpleFirstProgram":[0,2],
212214
"index.html#CompileAndRunYourFirstProgram":[0,3],
213215
"index.html#ComposableTasking":[0,0,2],
@@ -247,7 +249,5 @@ var NAVTREEINDEX0 =
247249
"release-2-3-0.html#release-2-3-0_deprecated_items":[1,1,3],
248250
"release-2-3-0.html#release-2-3-0_download":[1,1,0],
249251
"release-2-3-0.html#release-2-3-0_new_features":[1,1,1],
250-
"structis__condition__task.html":[5,0,1],
251-
"structis__cudaflow__task.html":[5,0,2],
252-
"structis__dynamic__task.html":[5,0,3]
252+
"structis__condition__task.html":[5,0,1]
253253
};

docs/navtreeindex1.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
var NAVTREEINDEX1 =
22
{
3+
"structis__cudaflow__task.html":[5,0,2],
4+
"structis__dynamic__task.html":[5,0,3],
35
"structis__static__task.html":[5,0,4],
46
"task_8hpp_source.html":[6,0,7],
57
"taskflow_8hpp_source.html":[6,0,8],

docs/pages.html

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,9 @@
117117
<tr id="row_1_3_" class="even"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><a class="el" href="chapter3.html" target="_self">C3: Dynamic Tasking</a></td><td class="desc"></td></tr>
118118
<tr id="row_1_4_"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><a class="el" href="chapter4.html" target="_self">C4: Conditional Tasking</a></td><td class="desc"></td></tr>
119119
<tr id="row_1_5_" class="even"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><a class="el" href="chapter5.html" target="_self">C5: Composable Tasking</a></td><td class="desc"></td></tr>
120-
<tr id="row_2_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a class="el" href="FAQ.html" target="_self">Frequently Asked Questions</a></td><td class="desc"></td></tr>
121-
<tr id="row_3_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a class="el" href="Reference.html" target="_self">Reference</a></td><td class="desc"></td></tr>
120+
<tr id="row_1_6_"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><a class="el" href="chapter6.html" target="_self">C6: CPU-GPU Tasking</a></td><td class="desc"></td></tr>
121+
<tr id="row_2_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a class="el" href="FAQ.html" target="_self">Frequently Asked Questions</a></td><td class="desc"></td></tr>
122+
<tr id="row_3_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a class="el" href="Reference.html" target="_self">Reference</a></td><td class="desc"></td></tr>
122123
</table>
123124
</div><!-- directory -->
124125
</div><!-- contents -->

0 commit comments

Comments
 (0)