Skip to content

Commit

Permalink
Merge pull request #107 from chainer/todo-comments
Browse files Browse the repository at this point in the history
Adding comments
  • Loading branch information
shu65 authored Aug 17, 2017
2 parents 588ed03 + 9730427 commit 8f57b59
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 4 deletions.
5 changes: 4 additions & 1 deletion chainermn/communicators/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,10 @@ def __init__(self, mpi_comm, use_nccl):

self._init_ranks()

# TODO(akiba): write why we delay initializing comms
# We have to delay the initialization of communicators. This is because
# NCCL's communicators use the current CUDA devices at the time of
# initialization. Therefore, we have to initialize NCCL communicators
# after users set the devices to use.
self.inter_mpi_comm = None
self.intra_mpi_comm = None
if self.use_nccl:
Expand Down
2 changes: 0 additions & 2 deletions chainermn/communicators/hierarchical_communicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ def allreduce_grad(self, model):
self.gpu_buffer_a.ptr(), self.gpu_buffer_b.ptr(), n_elems_total,
nccl.NCCL_FLOAT, nccl.NCCL_SUM, 0, stream.ptr)

# TODO(akiba): sync necessary?

# Inter-node allreduce
if self.intra_rank == 0:
_communication_utility.inter_allreduce_gpu(
Expand Down
3 changes: 2 additions & 1 deletion chainermn/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def scatter_dataset(dataset, comm):
assert hasattr(comm, 'send')
assert hasattr(comm, 'recv')

# TODO(akiba): write why we do not use mpi_comm.scatter
# We cannot use `mpi_comm.scatter`. This is due to MPI4py's bug.
# For large datasets, when using `mpi_comm.scatter`, it causes MemoryError.
if comm.rank == 0:
mine = None
n_total_samples = len(dataset)
Expand Down

0 comments on commit 8f57b59

Please sign in to comment.