def loss(self, x, samples): _, proposal_output = self.forward(x) batch_size = len(samples) log_weights = torch.log(proposal_output + util.epsilon) l = 0 for b in range(batch_size): value = Variable(samples[b].value, requires_grad=False) # value is one-hot l -= torch.sum(log_weights[b] * value) # Should we average this over dimensions? See http://pytorch.org/docs/nn.html#torch.nn.KLDivLoss return l
def forward(self, x): x = self.block1(x) x = self.block2(x) x = self.block3(x) return x ######################################################################## # The code does not need to be changed in CPU-mode. # # The documentation for DataParallel is # `here <http://pytorch.org/docs/nn.html#torch.nn.DataParallel>`_. # # **Primitives on which DataParallel is implemented upon:** # # # In general, pytorch’s `nn.parallel` primitives can be used independently. # We have implemented simple MPI-like primitives: # # - replicate: replicate a Module on multiple devices # - scatter: distribute the input in the first-dimension # - gather: gather and concatenate the input in the first-dimension # - parallel\_apply: apply a set of already-distributed inputs to a set of # already-distributed models. # # To give a better clarity, here function ``data_parallel`` composed using # these collectives