Remove target node from Topology

clin99 · clin99 · commit 344afc00a03f · 2019-02-11T16:11:01.000-06:00
diff --git a/example/framework.cpp b/example/framework.cpp
@@ -51,7 +51,7 @@ int main(){
   std::cout << std::endl;
 
   std::cout << "Execute the framework 4 times with a callback\n";
-  tf.run_n(f, 4, [i=0] () mutable { std::cout << "-> run #" << ++i << " finished\n"; }).get();
+  tf.run_n(f, 4, [] () mutable { std::cout << "The framework finishes\n"; }).get();
   std::cout << std::endl;
 
   std::cout << "Silently run the framework\n";
diff --git a/example/framework_dynamic_tasking.cpp b/example/framework_dynamic_tasking.cpp
@@ -82,13 +82,16 @@ void tf_traversal(std::vector<Node*>& src, Node nodes[], size_t num_nodes) {
 
   tf::Taskflow tf(4);
   tf::Framework framework;
-  for(size_t i=0; i<src.size(); i++) {
-    framework.silent_emplace([i=i, &src](auto& subflow){ traverse(src[i], subflow); });
-  }
-  tf.silent_run_n(framework, 100, [&, iteration=0]() mutable {
+  // Add a target to verify the traversal and reset nodes in each iteration
+  auto target = framework.silent_emplace([&](){
     validate(nodes, num_nodes);
     reset(nodes, num_nodes);
   });
+  for(size_t i=0; i<src.size(); i++) {
+    framework.silent_emplace([i=i, &src](auto& subflow){ traverse(src[i], subflow); }).precede(target);
+  }
+  tf.silent_run_n(framework, 100);
+
   tf.wait_for_all();  // block until finished
   
   auto end = std::chrono::system_clock::now();
diff --git a/taskflow/graph/basic_taskflow.hpp b/taskflow/graph/basic_taskflow.hpp
@@ -310,117 +310,87 @@ std::shared_future<void> BasicTaskflow<E>::run_n(Framework& f, size_t repeat, C&
   auto &tpg = _topologies.emplace_front(f, repeat);
   f._topologies.push_back(&tpg);
   
-  // PV 1/31 (twhuang): Lambda in C++17 is by default inline - no need for static
-  static const auto setup_topology = [](auto& f, auto& tpg) {
-
+  const auto setup_topology = [](auto& f, auto& tpg) {
+    tpg._num_sinks = 0;
     for(auto& n: f._graph) {
-      
-      // PR 1/31 (twhuang): I don't think we need check the emptiness
-      // of the successors over here
-      // Also, when do you clean up dynamic tasking nodes?
-      //
-      if(!n._successors.empty()) {
-        for(size_t i=0; i<n._successors.size(); i++) {
-          if(n._successors[i] == f._last_target) {
-            std::swap(n._successors[i], n._successors.back());
-            n._successors.pop_back();
-            break;
-          }
-        }
-      } 
-
       // reset the target links
       n._topology = &tpg;
       if(n.num_dependents() == 0) {
         tpg._sources.push_back(&n);
       }
       if(n.num_successors() == 0) {
-        n.precede(tpg._target);
+        tpg._num_sinks ++;       
       }
     }
   };
 
-  // PV 1/31 (twhuang): single worker - we need to remove topologies?
 
   // Iterative execution to avoid stack overflow
   if(num_workers() == 0) {
     // Clear last execution data & Build precedence between nodes and target
     setup_topology(f, tpg);
 
-    tpg._target._work = std::forward<C>(c);
-
-    // PR 1/31 (twhuang): redundant tgt_predecessors
-    const int tgt_predecessor = tpg._target._predecessors.size();
-
+    const int tgt_predecessor = tpg._num_sinks; 
     for(size_t i=0; i<repeat; i++) {
-
       _schedule(tpg._sources);
-      
-      // PR 1/31 (twhuang): why do we need to set the dependents again?
-      // Reset target 
-      f._topologies.front()->_target._predecessors.resize(tgt_predecessor);
-      f._topologies.front()->_target._dependents = tgt_predecessor;
+      f._topologies.front()->_num_sinks = tgt_predecessor;
     }
 
-    f._last_target = &tpg._target;
-    tpg._promise.set_value();
+    std::invoke(c);
+    auto &p = f._topologies.front()->_promise;
+    f._topologies.pop_front();
+    p.set_value();
   }
   else { 
     // case 1: the previous execution is still running
     if(f._topologies.size() > 1) {
-      tpg._target._work = std::forward<C>(c);
+      tpg._work = std::forward<C>(c);
     }
     // case 2: this epoch should run
     else {
       setup_topology(f, tpg);
 
       //Set up target node's work
-      tpg._target._work = [&f, c=std::function<void()>{std::forward<C>(c)}, 
-        tgt_predecessor = tpg._target._predecessors.size(), this]() mutable {
+      tpg._work = [&f, c=std::function<void()>{std::forward<C>(c)}, 
+        tgt_predecessor = tpg._num_sinks.load(std::memory_order_relaxed), this]() mutable {
 
-        std::invoke(c);
-        
         // PV 1/31 (twhuang): thread safety? 
         // case 1: we still need to run the topology again
         if(--f._topologies.front()->_repeat != 0) {
-
-          // Reset target 
-          f._topologies.front()->_target._predecessors.resize(tgt_predecessor);
-          f._topologies.front()->_target._dependents = tgt_predecessor;
-
+          f._topologies.front()->_num_sinks = tgt_predecessor;
           _schedule(f._topologies.front()->_sources); 
         }
         // case 2: the final run of this topology
         // notice that there can be another new run request before we acquire the lock
         else {
+          std::invoke(c);
+
           f._mtx.lock();
-       
+
           // If there is another run
           if(f._topologies.size() > 1) {
 
             // Set the promise
             f._topologies.front()->_promise.set_value();
 
             auto next_tpg = std::next(f._topologies.begin());
-            c = std::move(std::get<StaticWork>((*next_tpg)->_target._work));
+            //c = std::move(std::get<StaticWork>((*next_tpg)->_target._work));
+            c = std::move((*next_tpg)->_work);
 
             f._topologies.front()->_repeat = (*next_tpg)->_repeat;
             f._topologies.front()->_promise = std::move((*next_tpg)->_promise);
             f._topologies.erase(next_tpg);
 
             f._mtx.unlock();
-
-            // Reset target 
-            f._topologies.front()->_target._predecessors.resize(tgt_predecessor);
-            f._topologies.front()->_target._dependents = tgt_predecessor;
+            // The graph should be exactly the same as previous dispatch
+            f._topologies.front()->_num_sinks = tgt_predecessor;
 
             _schedule(f._topologies.front()->_sources);
           }
           else {
             // Need to back up the promise first here becuz framework might be 
             // destroy before taskflow leaves
             auto &p = f._topologies.front()->_promise; 
-            f._last_target = &(f._topologies.front()->_target);
             f._topologies.pop_front();
             f._mtx.unlock();
            
@@ -461,11 +431,8 @@ void BasicTaskflow<E>::Closure::operator () () const {
   // subflow node type 
   else {
     
-    // PV 1/31 (twhuang): emplace is enough
-    //
     // Clear the subgraph before the task execution
     if(!node->_spawned) {
-      node->_subgraph.reset();
       node->_subgraph.emplace();
     }
    
@@ -483,7 +450,12 @@ void BasicTaskflow<E>::Closure::operator () () const {
           n->_topology = node->_topology;
           n->_subtask = true;
           if(n->num_successors() == 0) {
-            n->precede(fb.detached() ? node->_topology->_target : *node);
+            if(fb.detached()) {
+              node->_topology->_num_sinks ++;
+            }
+            else {
+              n->precede(*node);
+            }
           }
           if(n->num_dependents() == 0) {
             src.emplace_back(&(*n));
@@ -506,9 +478,13 @@ void BasicTaskflow<E>::Closure::operator () () const {
   // Recover the runtime change due to dynamic tasking except the target & spawn tasks 
   // This must be done before scheduling the successors, otherwise this might cause 
   // race condition on the _dependents
-  if(num_successors && !node->_subtask) {
-    while(!node->_predecessors.empty() && node->_predecessors.back()->_subtask) {
-      node->_predecessors.pop_back();
+  //if(num_successors && !node->_subtask) {
+  if(!node->_subtask) {
+    // Only dynamic tasking needs to restore _predecessors
+    if(node->_work.index() == 1 &&  !node->_subgraph->empty()) {
+      while(!node->_predecessors.empty() && node->_predecessors.back()->_subtask) {
+        node->_predecessors.pop_back();
+      }
     }
     node->_dependents = node->_predecessors.size();
     node->_spawned = false;
@@ -520,6 +496,21 @@ void BasicTaskflow<E>::Closure::operator () () const {
       taskflow->_schedule(*(node->_successors[i]));
     }
   }
+
+  // A node without any successor should check the termination of topology
+  if(num_successors == 0) {
+    if(--(node->_topology->_num_sinks) == 0) {
+
+      // This is the last executing node 
+      bool is_framework = node->_topology->_handle.index() == 1;
+      if(node->_topology->_work != nullptr) {
+        std::invoke(node->_topology->_work);
+      }
+      if(!is_framework) {
+        node->_topology->_promise.set_value();
+      }
+    }
+  }
 }
 
 // ============================================================================
diff --git a/taskflow/graph/framework.hpp b/taskflow/graph/framework.hpp
@@ -38,7 +38,6 @@ class Framework : public FlowBuilder {
 
     std::mutex _mtx;
     std::list<Topology*> _topologies;
-    Node* _last_target {nullptr};   
 };
 
 // Constructor
diff --git a/taskflow/graph/topology.hpp b/taskflow/graph/topology.hpp
@@ -30,13 +30,14 @@ class Topology {
     std::variant<Graph, Framework*> _handle;
 
     std::promise <void> _promise;
-    size_t _repeat;
+    size_t _repeat {0};
 
     std::shared_future<void> _future;
 
     std::vector<Node*> _sources;
 
-    Node _target;
+    std::atomic<int> _num_sinks {0};
+    std::function<void()> _work {nullptr};
 };
 
 
@@ -51,15 +52,9 @@ inline Topology::Topology(Framework& f, size_t repeat): _handle(&f), _repeat(rep
 // Constructor
 inline Topology::Topology(Graph&& t) : 
   _handle(std::move(t)) {
-
-  _target._topology = this;
   
   _future = _promise.get_future().share();
 
-  _target._work = [this] () mutable { 
-    this->_promise.set_value(); 
-  };
-
   // Build the super source and super target.
   for(auto& node : std::get<Graph>(_handle)) {
 
@@ -70,7 +65,7 @@ inline Topology::Topology(Graph&& t) :
     }
 
     if(node.num_successors() == 0) {
-      node.precede(_target);
+      _num_sinks ++;
     }
   }
 }
@@ -81,14 +76,9 @@ template <typename C>
 inline Topology::Topology(Graph&& t, C&& c) : 
   _handle(std::move(t)) {
 
-  _target._topology = this;
-  
   _future = _promise.get_future().share();
 
-  _target._work = [this, c{std::forward<C>(c)}] () mutable { 
-    this->_promise.set_value();
-    c();
-  };
+  _work = std::forward<C>(c);
 
   // Build the super source and super target.
   for(auto& node : std::get<Graph>(_handle)) {
@@ -100,18 +90,15 @@ inline Topology::Topology(Graph&& t, C&& c) :
     }
 
     if(node.num_successors() == 0) {
-      node.precede(_target);
+      _num_sinks ++;
     }
   }
 }
 
 // Procedure: dump
 inline void Topology::dump(std::ostream& os) const {
-
-  assert(!(_target._subgraph));
   
-  os << "digraph Topology {\n"
-     << _target.dump();
+  os << "digraph Topology {\n";
 
   std::visit(Functors{
     [&] (const Graph& graph) {