Python torch 模块,div() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.div()。
def validate(models, dataset, arg, cuda=False):
criterion = nn.MSELoss()
losses = []
batcher = dataset.get_batcher(shuffle=True, augment=False)
for b, (x, y) in enumerate(batcher, 1):
x = V(th.from_numpy(x).float()).cuda()
y = V(th.from_numpy(y).float()).cuda()
# Ensemble average
logit = None
for model, _ in models:
model.eval()
logit = model(x) if logit is None else logit + model(x)
logit = th.div(logit, len(models))
loss = criterion(logit, y)
losses.append(loss.data[0])
return np.mean(losses)
def predict(models, dataset, arg, cuda=False):
prediction_file = open('save/predictions.txt', 'w')
batcher = dataset.get_batcher(shuffle=False, augment=False)
for b, (x, _) in enumerate(batcher, 1):
x = V(th.from_numpy(x).float()).cuda()
# Ensemble average
logit = None
for model, _ in models:
model.eval()
logit = model(x) if logit is None else logit + model(x)
logit = th.div(logit, len(models))
prediction = logit.cpu().data[0][0]
prediction_file.write('%s\n' % prediction)
if arg.verbose and b % 100 == 0:
print('[predict] [b]:%s - prediction: %s' % (b, prediction))
# prediction_file.close()
def bn_hat_z_layers(self, hat_z_layers, z_pre_layers):
# TODO: Calculate batchnorm using GPU Tensors.
assert len(hat_z_layers) == len(z_pre_layers)
hat_z_layers_normalized = []
for i, (hat_z, z_pre) in enumerate(zip(hat_z_layers, z_pre_layers)):
if self.use_cuda:
ones = Variable(torch.ones(z_pre.size()[0], 1).cuda())
else:
ones = Variable(torch.ones(z_pre.size()[0], 1))
mean = torch.mean(z_pre, 0)
noise_var = np.random.normal(loc=0.0, scale=1 - 1e-10, size=z_pre.size())
if self.use_cuda:
var = np.var(z_pre.data.cpu().numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1])
else:
var = np.var(z_pre.data.numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1])
var = Variable(torch.FloatTensor(var))
if self.use_cuda:
hat_z = hat_z.cpu()
ones = ones.cpu()
mean = mean.cpu()
hat_z_normalized = torch.div(hat_z - ones.mm(mean), ones.mm(torch.sqrt(var + 1e-10)))
if self.use_cuda:
hat_z_normalized = hat_z_normalized.cuda()
hat_z_layers_normalized.append(hat_z_normalized)
return hat_z_layers_normalized
def train(epoch):
for e_ in range(epoch):
if (e_ + 1) % 10 == 0:
adjust_learning_rate(optimizer, e_)
cnt = 0
loss = Variable(torch.Tensor([0]))
for i_q, i_k, i_v, i_cand, i_a in zip(train_q, train_key,train_value, train_cand, train_a):
cnt += 1
i_q = i_q.unsqueeze(0) # add dimension
probs = model.forward(i_q, i_k, i_v,i_cand)
i_a = Variable(i_a)
curr_loss = loss_function(probs, i_a)
loss = torch.add(loss, torch.div(curr_loss, config.batch_size))
# naive batch implemetation, the lr is divided by batch size
if cnt % config.batch_size == 0:
print "Training loss", loss.data.sum()
loss.backward()
optimizer.step()
loss = Variable(torch.Tensor([0]))
model.zero_grad()
if cnt % config.valid_every == 0:
print "Accuracy:",eval()
def train(epoch):
for e_ in range(epoch):
if (e_ + 1) % 10 == 0:
adjust_learning_rate(optimizer, e_)
cnt = 0
loss = Variable(torch.Tensor([0]))
for i_q, i_w, i_e_p, i_a in zip(train_q, train_w, train_e_p, train_a):
cnt += 1
i_q = i_q.unsqueeze(0) # add dimension
probs = model.forward(i_q, i_w, i_e_p)
i_a = Variable(i_a)
curr_loss = loss_function(probs, i_a)
loss = torch.add(loss, torch.div(curr_loss, config.batch_size))
# naive batch implemetation, the lr is divided by batch size
if cnt % config.batch_size == 0:
print "Training loss", loss.data.sum()
loss.backward()
optimizer.step()
loss = Variable(torch.Tensor([0]))
model.zero_grad()
if cnt % config.valid_every == 0:
print "Accuracy:",eval()
def updateOutput(self, input):
assert input.dim() == 2
input_size = input.size()
self._output = self._output or input.new()
self.norm = self.norm or input.new()
self.buffer = self.buffer or input.new()
self._output.resize_as_(input)
# specialization for the infinity norm
if self.p == float('inf'):
if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor()
torch.abs(self.buffer, input)
torch.max(self.norm, self._indices, self.buffer, 1)
self.norm.add_(self.eps)
else:
self.normp = self.normp or input.new()
if self.p % 2 != 0:
torch.abs(self.buffer, input).pow_(self.p)
else:
torch.pow(self.buffer, input, self.p)
torch.sum(self.normp, self.buffer, 1).add_(self.eps)
torch.pow(self.norm, self.normp, 1./self.p)
torch.div(self._output, input, self.norm.view(-1, 1).expand_as(input))
self.output = self._output.view(input_size)
return self.output
def updateGradInput(self, input, gradOutput):
if not self.gradInput:
return
self._div = self._div or input.new()
self._output = self._output or self.output.new()
self._gradOutput = self._gradOutput or input.new()
self._expand3 = self._expand3 or input.new()
if not self.fastBackward:
self.updateOutput(input)
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
"""
dy_j -2 * (w_j - x) x - w_j
---- = ---------------- = -------
dx 2 || w_j - x || y_j
"""
# to prevent div by zero (NaN) bugs
self._output.resize_as_(self.output).copy_(self.output).add_(0.0000001)
self._view(self._gradOutput, gradOutput, gradOutput.size())
torch.div(self._div, gradOutput, self._output)
assert input.dim() == 2
batchSize = input.size(0)
self._div.resize_(batchSize, 1, outputSize)
self._expand3 = self._div.expand(batchSize, inputSize, outputSize)
if torch.typename(input) == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand3).copy_(self._expand3)
self._repeat2.mul_(self._repeat)
else:
torch.mul(self._repeat2, self._repeat, self._expand3)
torch.sum(self.gradInput, self._repeat2, 2)
self.gradInput.resize_as_(input)
return self.gradInput
def forward(self, input1, input2, y):
self.w1 = input1.new()
self.w22 = input1.new()
self.w = input1.new()
self.w32 = input1.new()
self._outputs = input1.new()
_idx = input1.new().byte()
buffer = torch.mul(input1, input2)
torch.sum(buffer, 1, out=self.w1)
epsilon = 1e-12
torch.mul(input1, input1, out=buffer)
torch.sum(buffer, 1, out=self.w22).add_(epsilon)
self._outputs.resize_as_(self.w22).fill_(1)
torch.div(self._outputs, self.w22, out=self.w22)
self.w.resize_as_(self.w22).copy_(self.w22)
torch.mul(input2, input2, out=buffer)
torch.sum(buffer, 1, out=self.w32).add_(epsilon)
torch.div(self._outputs, self.w32, out=self.w32)
self.w.mul_(self.w32)
self.w.sqrt_()
torch.mul(self.w1, self.w, out=self._outputs)
self._outputs = self._outputs.select(1, 0)
torch.eq(y, -1, out=_idx)
self._outputs[_idx] = self._outputs[_idx].add_(-self.margin).clamp_(min=0)
torch.eq(y, 1, out=_idx)
self._outputs[_idx] = self._outputs[_idx].mul_(-1).add_(1)
output = self._outputs.sum()
if self.size_average:
output = output / y.size(0)
self.save_for_backward(input1, input2, y)
return input1.new((output,))
def updateOutput(self, input):
assert input.dim() == 2
input_size = input.size()
if self._output is None:
self._output = input.new()
if self.norm is None:
self.norm = input.new()
if self.buffer is None:
self.buffer = input.new()
self._output.resize_as_(input)
# specialization for the infinity norm
if self.p == float('inf'):
if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor()
torch.abs(input, out=self.buffer)
torch.max(self._indices, self.buffer, 1, out=self.norm)
self.norm.add_(self.eps)
else:
if self.normp is None:
self.normp = input.new()
if self.p % 2 != 0:
torch.abs(input, out=self.buffer).pow_(self.p)
else:
torch.pow(input, self.p, out=self.buffer)
torch.sum(self.buffer, 1, out=self.normp).add_(self.eps)
torch.pow(self.normp, 1. / self.p, out=self.norm)
torch.div(input, self.norm.view(-1, 1).expand_as(input), out=self._output)
self.output = self._output.view(input_size)
return self.output
def forward(self, input1, input2, y):
self.w1 = input1.new()
self.w22 = input1.new()
self.w = input1.new()
self.w32 = input1.new()
self._outputs = input1.new()
_idx = input1.new().byte()
buffer = torch.mul(input1, input2)
torch.sum(buffer, 1, out=self.w1, keepdim=True)
epsilon = 1e-12
torch.mul(input1, input1, out=buffer)
torch.sum(buffer, 1, out=self.w22, keepdim=True).add_(epsilon)
self._outputs.resize_as_(self.w22).fill_(1)
torch.div(self._outputs, self.w22, out=self.w22)
self.w.resize_as_(self.w22).copy_(self.w22)
torch.mul(input2, input2, out=buffer)
torch.sum(buffer, 1, out=self.w32, keepdim=True).add_(epsilon)
torch.div(self._outputs, self.w32, out=self.w32)
self.w.mul_(self.w32)
self.w.sqrt_()
torch.mul(self.w1, self.w, out=self._outputs)
self._outputs = self._outputs.select(1, 0)
torch.eq(y, -1, out=_idx)
self._outputs[_idx] = self._outputs[_idx].add_(-self.margin).clamp_(min=0)
torch.eq(y, 1, out=_idx)
self._outputs[_idx] = self._outputs[_idx].mul_(-1).add_(1)
output = self._outputs.sum()
if self.size_average:
output = output / y.size(0)
self.save_for_backward(input1, input2, y)
return input1.new((output,))
def updateOutput(self, input):
assert input.dim() == 2
input_size = input.size()
if self._output is None:
self._output = input.new()
if self.norm is None:
self.norm = input.new()
if self.buffer is None:
self.buffer = input.new()
self._output.resize_as_(input)
# specialization for the infinity norm
if self.p == float('inf'):
if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor()
torch.abs(input, out=self.buffer)
torch.max(self._indices, self.buffer, 1, out=self.norm, keepdim=True)
self.norm.add_(self.eps)
else:
if self.normp is None:
self.normp = input.new()
if self.p % 2 != 0:
torch.abs(input, out=self.buffer).pow_(self.p)
else:
torch.pow(input, self.p, out=self.buffer)
torch.sum(self.buffer, 1, out=self.normp, keepdim=True).add_(self.eps)
torch.pow(self.normp, 1. / self.p, out=self.norm)
torch.div(input, self.norm.view(-1, 1).expand_as(input), out=self._output)
self.output = self._output.view(input_size)
return self.output
def forward(self, dec_state, context, mask=None):
"""
:param dec_state: batch x dec_dim
:param context: batch x T x enc_dim
:return: Weighted context, batch x enc_dim
Alpha weights (viz), batch x T
"""
batch, source_l, enc_dim = context.size()
assert enc_dim == self.enc_dim
# W*s over the entire batch (batch, attn_dim)
dec_contrib = self.decoder_in(dec_state)
# W*h over the entire length & batch (batch, source_l, attn_dim)
enc_contribs = self.encoder_in(
context.view(-1, self.enc_dim)).view(batch, source_l, self.attn_dim)
# tanh( Wh*hj + Ws s_{i-1} ) (batch, source_l, dim)
pre_attn = F.tanh(enc_contribs + dec_contrib.unsqueeze(1).expand_as(enc_contribs))
# v^T*pre_attn for all batches/lengths (batch, source_l)
energy = self.att_linear(pre_attn.view(-1, self.attn_dim)).view(batch, source_l)
# Apply the mask. (Might be a better way to do this)
if mask is not None:
shift = energy.max(1)[0]
energy_exp = (energy - shift.expand_as(energy)).exp() * mask
alpha = torch.div(energy_exp, energy_exp.sum(1).expand_as(energy_exp))
else:
alpha = F.softmax(energy)
weighted_context = torch.bmm(alpha.unsqueeze(1), context).squeeze(1) # (batch, dim)
return weighted_context, alpha
def rotation_error(input, target):
x1 = torch.norm(input, dim=1)
x2 = torch.norm(target, dim=1)
x1 = torch.div(input, torch.stack((x1, x1, x1, x1), dim=1))
x2 = torch.div(target, torch.stack((x2, x2, x2, x2), dim=1))
d = torch.abs(torch.sum(x1 * x2, dim=1))
theta = 2 * torch.acos(d) * 180/math.pi
theta = torch.mean(theta)
return theta
def rotation_error(input, target):
"""Gets cosine distance between input and target """
x1 = torch.norm(input, dim=1)
x2 = torch.norm(target, dim=1)
x1 = torch.div(input, torch.stack((x1, x1, x1, x1), dim=1))
x2 = torch.div(target, torch.stack((x2, x2, x2, x2), dim=1))
d = torch.abs(torch.sum(x1 * x2, dim=1))
theta = 2 * torch.acos(d) * 180/math.pi
theta = torch.mean(theta)
return theta
def forward(self, inpt):
batch_size = self.batch_size
f0 = self.features(inpt[:, 0])
f0 = f0.view(batch_size, -1)
f1 = self.features(inpt[:, 1])
f1 = f1.view(batch_size, -1)
# f2 = self.features(inpt[:, 2])
# f2 = f2.view(batch_size, -1)
#
# f3 = self.features(inpt[:, 3])
# f3 = f3.view(batch_size, -1)
#
# f4 = self.features(inpt[:, 4])
# f4 = f4.view(batch_size, -1)
#
# f = torch.stack((f0, f1, f2, f3, f4), dim=0).view(self.seq_length, batch_size, -1)
f = torch.cat((f0, f1), dim=1)
# _, hn = self.rnn(f, self.hidden)
# hn = hn[self.gru_layer - 1].view(batch_size, -1)
# hn = self.relu(hn)
# hn = self.dropout(hn)
# hn = self.regressor(hn)
hn = self.regressor(f)
trans = self.trans_regressor(hn)
# trans_norm = torch.norm(trans, dim=1)
# trans = torch.div(trans, torch.cat((trans_norm, trans_norm, trans_norm), dim=1))
scale = self.scale_regressor(hn)
rotation = self.rotation_regressor(hn)
return trans, scale, rotation
def updateOutput(self, input):
assert input.dim() == 2
input_size = input.size()
if self._output is None:
self._output = input.new()
if self.norm is None:
self.norm = input.new()
if self.buffer is None:
self.buffer = input.new()
self._output.resize_as_(input)
# specialization for the infinity norm
if self.p == float('inf'):
if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor()
torch.abs(input, out=self.buffer)
torch.max(self._indices, self.buffer, 1, out=self.norm, keepdim=True)
self.norm.add_(self.eps)
else:
if self.normp is None:
self.normp = input.new()
if self.p % 2 != 0:
torch.abs(input, out=self.buffer).pow_(self.p)
else:
torch.pow(input, self.p, out=self.buffer)
torch.sum(self.buffer, 1, out=self.normp, keepdim=True).add_(self.eps)
torch.pow(self.normp, 1. / self.p, out=self.norm)
torch.div(input, self.norm.view(-1, 1).expand_as(input), out=self._output)
self.output = self._output.view(input_size)
return self.output
def l2_norm(self,input):
input_size = input.size()
buffer = torch.pow(input, 2)
normp = torch.sum(buffer, 1).add_(1e-10)
norm = torch.sqrt(normp)
_output = torch.div(input, norm.view(-1, 1).expand_as(input))
output = _output.view(input_size)
return output
def l2_norm(self,input):
input_size = input.size()
buffer = torch.pow(input, 2)
normp = torch.sum(buffer, 1).add_(1e-10)
norm = torch.sqrt(normp)
_output = torch.div(input, norm.view(-1, 1).expand_as(input))
output = _output.view(input_size)
return output
def normalize_batch(batch):
# normalize using imagenet mean and std
mean = batch.data.new(batch.data.size())
std = batch.data.new(batch.data.size())
mean[:, 0, :, :] = 0.485
mean[:, 1, :, :] = 0.456
mean[:, 2, :, :] = 0.406
std[:, 0, :, :] = 0.229
std[:, 1, :, :] = 0.224
std[:, 2, :, :] = 0.225
batch = torch.div(batch, 255.0)
batch -= Variable(mean)
batch = batch / Variable(std)
return batch
def batch_norm_scattering(x, m,v):
m=m.expand_as(x)
v=v.expand_as(x)
x = torch.div(torch.add(x,-m),v)
return x
def forward(self, input):
x = input
if x.data.is_cuda and self.gpuDevice != 0:
x = x.cuda(self.gpuDevice)
#
if x.size()[-1] == 128:
x = self.resize2(self.resize1(x))
x = self.layer8(self.layer7(self.layer6(self.layer5(
self.layer4(self.layer3(self.layer2(self.layer1(x))))))))
x = self.layer13(self.layer12(
self.layer11(self.layer10(self.layer9(x)))))
x = self.layer14(x)
x = self.layer15(x)
x = self.layer16(x)
x = self.layer17(x)
x = self.layer18(x)
x = self.layer19(x)
x = self.layer21(x)
x = self.layer22(x)
x = x.view((-1, 736))
x_736 = x
x = self.layer25(x)
x_norm = torch.sqrt(torch.sum(x**2, 1) + 1e-6)
x = torch.div(x, x_norm.view(-1, 1).expand_as(x))
return (x, x_736)
def updateOutput(self, input):
assert input.dim() == 2
input_size = input.size()
if self._output is None:
self._output = input.new()
if self.norm is None:
self.norm = input.new()
if self.buffer is None:
self.buffer = input.new()
self._output.resize_as_(input)
# specialization for the infinity norm
if self.p == float('inf'):
if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor()
torch.abs(input, out=self.buffer)
torch.max(self._indices, self.buffer, 1, out=self.norm, keepdim=True)
self.norm.add_(self.eps)
else:
if self.normp is None:
self.normp = input.new()
if self.p % 2 != 0:
torch.abs(input, out=self.buffer).pow_(self.p)
else:
torch.pow(input, self.p, out=self.buffer)
torch.sum(self.buffer, 1, out=self.normp, keepdim=True).add_(self.eps)
torch.pow(self.normp, 1. / self.p, out=self.norm)
torch.div(input, self.norm.view(-1, 1).expand_as(input), out=self._output)
self.output = self._output.view(input_size)
return self.output
def columnwise_cosine_similarity(matrix1, matrix2):
"""Return the columnwise cosine similarity from matrix1 and matrix2.
Expect tesor of dimension (batch_size, seq_len, hidden).
Return tensor of size (batch_size, seq_len) containing the cosine
similarities."""
assert matrix1.size() == matrix2.size(), 'matrix sizes do not match'
# -> (batch_size, seq_len, 1)
n_m1 = torch.norm(matrix1, 2, 2)
n_m2 = torch.norm(matrix2, 2, 2)
# -> (batch_size, seq_len, 1)
col_norm = torch.mul(n_m1, n_m2)
# -> (batch_size, seq_len, hidden)
colprod = torch.mul(matrix1, matrix2)
# -> (batch_size, seq_len, 1)
colsum = torch.sum(colprod, 2)
# -> (batch_size, seq_len, 1)
cosine_sim = torch.div(colsum, col_norm)
# -> (batch_size, seq_len)
cosine_sim = cosine_sim.squeeze()
return cosine_sim
def full_cosine_similarity(matrix1, matrix2):
"""
Expect 2 matrices P and Q of dimension (d, n1) and (d, n2) respectively.
Return a matrix A of dimension (n1, n2) with the result of comparing each
vector to one another. A[i, j] represents the cosine similarity between
vectors P[:, i] and Q[:, j].
"""
n1 = matrix1.size(1)
n2 = matrix2.size(1)
d = matrix1.size(0)
assert d == matrix2.size(0)
# -> (d, n1, 1)
t1 = matrix1.view(d, n1, 1)
# -> (d, n1, n2)
t1 = t1.repeat(1, 1, n2)
# -> (d, 1, n2)
t2 = matrix2.view(d, 1, n2)
# -> (d, n1, n2)
t2 = t2.repeat(1, n1, 1).contiguous()
t1_x_t2 = torch.mul(t1, t2) # (d, n1, n2)
dotprod = torch.sum(t1_x_t2, 0).squeeze() # (n1, n2)
norm1 = torch.norm(t1, 2, 0) # (n1, n2)
norm2 = torch.norm(t2, 2, 0) # (n1, n2)
col_norm = torch.mul(norm1, norm2).squeeze() # (n1, n2)
return torch.div(dotprod, col_norm) # (n1, n2)
def batch_full_cosine_similarity(tensor1, tensor2):
"""
Expect 2 tensors tensor1 and tensor2 of dimension
(batch_size, seq_len_p, hidden) and (batch_size, seq_len_q, hidden)
respectively.
Return a matrix A of dimension (batch_size, seq_len_p, seq_len_q) with the
result of comparing each matrix to one another. A[k, :, :] represents the
cosine similarity between matrices P[k, :, :] and Q[k, :, :]. Then
A_k[i, j] is a scalar representing the cosine similarity between vectors
P_k[i, :] and Q_k[j, :]
"""
batch_size = tensor1.size(0)
seq_len_p = tensor1.size(1)
seq_len_q = tensor2.size(1)
hidden = tensor1.size(2)
assert batch_size == tensor2.size(0)
assert hidden == tensor2.size(2)
# -> (batch_size, seq_len_p, 1, hidden)
t1 = tensor1.unsqueeze(2)
# -> (batch_size, seq_len_p, seq_len_q, hidden)
t1 = t1.repeat(1, 1, seq_len_q, 1)
# -> (batch_size, 1, seq_len_q, hidden)
t2 = tensor2.unsqueeze(1)
# -> (batch_size, seq_len_p, seq_len_q, hidden)
t2 = t2.repeat(1, seq_len_p, 1, 1)
# -> (batch_size, seq_len_p, seq_len_q, hidden)
t1_x_t2 = torch.mul(t1, t2)
# -> (batch_size, seq_len_p, seq_len_q)
dotprod = torch.sum(t1_x_t2, 3).squeeze(3)
# norm1, norm2 and col_norm have dim (batch_size, seq_len_p, seq_len_q)
norm1 = torch.norm(t1, 2, 3)
norm2 = torch.norm(t2, 2, 3)
col_norm = torch.mul(norm1, norm2).squeeze(3)
return torch.div(dotprod, col_norm) # (batch_size, seq_len_p, seq_len_q)
def l2norm(X):
"""L2-normalize columns of X
"""
norm = torch.pow(X, 2).sum(dim=1, keepdim=True).sqrt()
X = torch.div(X, norm)
return X
def forward(self, input_n, hidden, phi, nh):
hidden = torch.cat((hidden, input_n), 2)
# Aggregate reresentations
h_conv = torch.div(torch.bmm(phi, hidden), nh)
hidden = hidden.view(-1, self.hidden_size + self.input_size)
h_conv = h_conv.view(-1, self.hidden_size + self.input_size)
# h_conv has shape (batch_size, n, hidden_size + input_size)
m1 = (torch.mm(hidden, self.W1)
.view(self.batch_size, -1, self.hidden_size))
m2 = (torch.mm(h_conv, self.W2)
.view(self.batch_size, -1, self.hidden_size))
m3 = self.b.unsqueeze(0).unsqueeze(1).expand_as(m2)
hidden = torch.sigmoid(m1 + m2 + m3)
return hidden
def forward(self, input_n, hidden, phi, nh):
hidden = torch.cat((hidden, input_n), 2)
# Aggregate reresentations
h_conv = torch.div(torch.bmm(phi, hidden), nh)
hidden = hidden.view(-1, self.hidden_size + self.input_size)
h_conv = h_conv.view(-1, self.hidden_size + self.input_size)
# h_conv has shape (batch_size, n, hidden_size + input_size)
m1 = (torch.mm(hidden, self.W1)
.view(self.batch_size, -1, self.hidden_size))
m2 = (torch.mm(h_conv, self.W2)
.view(self.batch_size, -1, self.hidden_size))
m3 = self.b.unsqueeze(0).unsqueeze(1).expand_as(m2)
hidden = torch.sigmoid(m1 + m2 + m3)
return hidden
def forward(self, input_n, hidden, phi, nh):
hidden = torch.cat((hidden, input_n), 2)
# Aggregate reresentations
h_conv = torch.div(torch.bmm(phi, hidden), nh)
hidden = hidden.view(-1, self.hidden_size + self.input_size + 2)
h_conv = h_conv.view(-1, self.hidden_size + self.input_size + 2)
# h_conv has shape (batch_size, n, hidden_size + input_size)
m1 = (torch.mm(hidden, self.W1)
.view(self.batch_size, -1, self.hidden_size))
m2 = (torch.mm(h_conv, self.W2)
.view(self.batch_size, -1, self.hidden_size))
m3 = self.b.unsqueeze(0).unsqueeze(1).expand_as(m2)
hidden = torch.sigmoid(m1 + m2 + m3)
return hidden
def forward(self, input_n, hidden, phi, nh):
self.batch_size = input_n.size()[0]
hidden = torch.cat((hidden, input_n), 2)
# Aggregate reresentations
h_conv = torch.div(torch.bmm(phi, hidden), nh)
hidden = hidden.view(-1, self.hidden_size + self.input_size)
h_conv = h_conv.view(-1, self.hidden_size + self.input_size)
# h_conv has shape (batch_size, n, hidden_size + input_size)
m1 = (torch.mm(hidden, self.W1)
.view(self.batch_size, -1, self.hidden_size))
m2 = (torch.mm(h_conv, self.W2)
.view(self.batch_size, -1, self.hidden_size))
m3 = self.b.unsqueeze(0).unsqueeze(1).expand_as(m2)
hidden = torch.sigmoid(m1 + m2 + m3)
return hidden
def backward(self, grad_output):
input, _ = self.saved_tensors
intersect, union = self.intersect, self.union
target = self.target_
gt = torch.div(target, union)
IoU2 = intersect/(union*union)
pred = torch.mul(input[:, 1], IoU2)
dDice = torch.add(torch.mul(gt, 2), torch.mul(pred, -4))
grad_input = torch.cat((torch.mul(dDice, -grad_output[0]),
torch.mul(dDice, grad_output[0])), 0)
return grad_input , None
def forward(self, input1, input2, y):
self.w1 = input1.new()
self.w22 = input1.new()
self.w = input1.new()
self.w32 = input1.new()
self._outputs = input1.new()
buffer = input1.new()
_idx = self._new_idx(input1)
torch.mul(buffer, input1, input2)
torch.sum(self.w1, buffer, 1)
epsilon = 1e-12
torch.mul(buffer, input1, input1)
torch.sum(self.w22, buffer, 1).add_(epsilon)
self._outputs.resize_as_(self.w22).fill_(1)
torch.div(self.w22, self._outputs, self.w22)
self.w.resize_as_(self.w22).copy_(self.w22)
torch.mul(buffer, input2, input2)
torch.sum(self.w32, buffer, 1).add_(epsilon)
torch.div(self.w32, self._outputs, self.w32)
self.w.mul_(self.w32)
self.w.sqrt_()
torch.mul(self._outputs, self.w1, self.w)
self._outputs = self._outputs.select(1, 0)
torch.eq(_idx, y, -1)
self._outputs[_idx] = self._outputs[_idx].add_(-self.margin).cmax_(0)
torch.eq(_idx, y, 1)
self._outputs[_idx] = self._outputs[_idx].mul_(-1).add_(1)
output = self._outputs.sum()
if self.size_average:
output = output / y.size(0)
self.save_for_backward(input1, input2, y)
return input1.new((output,))
def updateOutput(self, input, y):
input1, input2 = input[0], input[1]
# keep backward compatibility
if not self.buffer:
self.buffer = input1.new()
self.w1 = input1.new()
self.w22 = input1.new()
self.w = input1.new()
self.w32 = input1.new()
self._outputs = input1.new()
# comparison operators behave differently from cuda/c implementations
# TODO: verify name
if input1.type() == 'torch.cuda.FloatTensor':
self._idx = torch.cuda.ByteTensor()
else:
self._idx = torch.ByteTensor()
torch.mul(self.buffer, input1, input2)
torch.sum(self.w1, self.buffer, 1)
epsilon = 1e-12
torch.mul(self.buffer, input1, input1)
torch.sum(self.w22, self.buffer, 1).add_(epsilon)
# self._outputs is also used as a temporary buffer
self._outputs.resize_as_(self.w22).fill_(1)
torch.div(self.w22, self._outputs, self.w22)
self.w.resize_as_(self.w22).copy_(self.w22)
torch.mul(self.buffer, input2, input2)
torch.sum(self.w32, self.buffer, 1).add_(epsilon)
torch.div(self.w32, self._outputs, self.w32)
self.w.mul_(self.w32)
self.w.sqrt_()
torch.mul(self._outputs, self.w1, self.w)
self._outputs = self._outputs.select(1, 0)
torch.eq(self._idx, y, -1)
self._outputs[self._idx] = self._outputs[self._idx].add_(-self.margin).cmax_(0)
torch.eq(self._idx, y, 1)
self._outputs[self._idx] = self._outputs[self._idx].mul_(-1).add_(1)
self.output = self._outputs.sum()
if self.sizeAverage:
self.output = self.output / y.size(0)
return self.output
def updateGradInput(self, input, gradOutput):
if not self.gradInput:
return
self._div = self._div or input.new()
self._output = self._output or self.output.new()
self._expand4 = self._expand4 or input.new()
self._gradOutput = self._gradOutput or input.new()
if not self.fastBackward:
self.updateOutput(input)
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
"""
dy_j -2 * c_j * c_j * (w_j - x) c_j * c_j * (x - w_j)
---- = -------------------------- = ---------------------
dx 2 || c_j * (w_j - x) || y_j
"""
# to prevent div by zero (NaN) bugs
self._output.resize_as_(self.output).copy_(self.output).add_(1e-7)
self._view(self._gradOutput, gradOutput, gradOutput.size())
torch.div(self._div, gradOutput, self._output)
if input.dim() == 1:
self._div.resize_(1, outputSize)
self._expand4 = self._div.expand_as(self.weight)
if torch.type(input) == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand4).copy_(self._expand4)
self._repeat2.mul_(self._repeat)
else:
self._repeat2.mul_(self._repeat, self._expand4)
self._repeat2.mul_(self.diagCov)
torch.sum(self.gradInput, self._repeat2, 1)
self.gradInput.resize_as_(input)
elif input.dim() == 2:
batchSize = input.size(0)
self._div.resize_(batchSize, 1, outputSize)
self._expand4 = self._div.expand(batchSize, inputSize, outputSize)
if input.type() == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand4).copy_(self._expand4)
self._repeat2.mul_(self._repeat)
self._repeat2.mul_(self._repeat3)
else:
torch.mul(self._repeat2, self._repeat, self._expand4)
self._repeat2.mul_(self._expand3)
torch.sum(self.gradInput, self._repeat2, 2)
self.gradInput.resize_as_(input)
else:
raise RuntimeError("1D or 2D input expected")
return self.gradInput
def pre_train_r(self):
print('=' * 50)
if cfg.ref_pre_path:
print('Loading R_pre from %s' % cfg.ref_pre_path)
self.R.load_state_dict(torch.load(cfg.ref_pre_path))
return
# we first train the R? network with just self-regularization loss for 1,000 steps
print('pre-training the refiner network %d times...' % cfg.r_pretrain)
for index in range(cfg.r_pretrain):
syn_image_batch, _ = self.syn_train_loader.__iter__().next()
syn_image_batch = Variable(syn_image_batch).cuda(cfg.cuda_num)
self.R.train()
ref_image_batch = self.R(syn_image_batch)
r_loss = self.self_regularization_loss(ref_image_batch, syn_image_batch)
# r_loss = torch.div(r_loss, cfg.batch_size)
r_loss = torch.mul(r_loss, self.delta)
self.opt_R.zero_grad()
r_loss.backward()
self.opt_R.step()
# log every `log_interval` steps
if (index % cfg.r_pre_per == 0) or (index == cfg.r_pretrain - 1):
# figure_name = 'refined_image_batch_pre_train_step_{}.png'.format(index)
print('[%d/%d] (R)reg_loss: %.4f' % (index, cfg.r_pretrain, r_loss.data[0]))
syn_image_batch, _ = self.syn_train_loader.__iter__().next()
syn_image_batch = Variable(syn_image_batch, volatile=True).cuda(cfg.cuda_num)
real_image_batch, _ = self.real_loader.__iter__().next()
real_image_batch = Variable(real_image_batch, volatile=True)
self.R.eval()
ref_image_batch = self.R(syn_image_batch)
figure_path = os.path.join(cfg.train_res_path, 'refined_image_batch_pre_train_%d.png' % index)
generate_img_batch(syn_image_batch.data.cpu(), ref_image_batch.data.cpu(),
real_image_batch.data, figure_path)
self.R.train()
print('Save R_pre to models/R_pre.pkl')
torch.save(self.R.state_dict(), 'models/R_pre.pkl')
def pre_train_d(self):
print('=' * 50)
if cfg.disc_pre_path:
print('Loading D_pre from %s' % cfg.disc_pre_path)
self.D.load_state_dict(torch.load(cfg.disc_pre_path))
return
# and D? for 200 steps (one mini-batch for refined images, another for real)
print('pre-training the discriminator network %d times...' % cfg.r_pretrain)
self.D.train()
self.R.eval()
for index in range(cfg.d_pretrain):
real_image_batch, _ = self.real_loader.__iter__().next()
real_image_batch = Variable(real_image_batch).cuda(cfg.cuda_num)
syn_image_batch, _ = self.syn_train_loader.__iter__().next()
syn_image_batch = Variable(syn_image_batch).cuda(cfg.cuda_num)
assert real_image_batch.size(0) == syn_image_batch.size(0)
# ============ real image D ====================================================
# self.D.train()
d_real_pred = self.D(real_image_batch).view(-1, 2)
d_real_y = Variable(torch.zeros(d_real_pred.size(0)).type(torch.LongTensor)).cuda(cfg.cuda_num)
d_ref_y = Variable(torch.ones(d_real_pred.size(0)).type(torch.LongTensor)).cuda(cfg.cuda_num)
acc_real = calc_acc(d_real_pred, 'real')
d_loss_real = self.local_adversarial_loss(d_real_pred, d_real_y)
# d_loss_real = torch.div(d_loss_real, cfg.batch_size)
# ============ syn image D ====================================================
# self.R.eval()
ref_image_batch = self.R(syn_image_batch)
# self.D.train()
d_ref_pred = self.D(ref_image_batch).view(-1, 2)
acc_ref = calc_acc(d_ref_pred, 'refine')
d_loss_ref = self.local_adversarial_loss(d_ref_pred, d_ref_y)
# d_loss_ref = torch.div(d_loss_ref, cfg.batch_size)
d_loss = d_loss_real + d_loss_ref
self.opt_D.zero_grad()
d_loss.backward()
self.opt_D.step()
if (index % cfg.d_pre_per == 0) or (index == cfg.d_pretrain - 1):
print('[%d/%d] (D)d_loss:%f acc_real:%.2f%% acc_ref:%.2f%%'
% (index, cfg.d_pretrain, d_loss.data[0], acc_real, acc_ref))
print('Save D_pre to models/D_pre.pkl')
torch.save(self.D.state_dict(), 'models/D_pre.pkl')
def updateOutput(self, input, y):
input1, input2 = input[0], input[1]
# keep backward compatibility
if self.buffer is None:
self.buffer = input1.new()
self.w1 = input1.new()
self.w22 = input1.new()
self.w = input1.new()
self.w32 = input1.new()
self._outputs = input1.new()
# comparison operators behave differently from cuda/c implementations
# TODO: verify name
if input1.type() == 'torch.cuda.FloatTensor':
self._idx = torch.cuda.ByteTensor()
else:
self._idx = torch.ByteTensor()
torch.mul(input1, input2, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w1)
epsilon = 1e-12
torch.mul(input1, input1, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w22).add_(epsilon)
# self._outputs is also used as a temporary buffer
self._outputs.resize_as_(self.w22).fill_(1)
torch.div(self._outputs, self.w22, out=self.w22)
self.w.resize_as_(self.w22).copy_(self.w22)
torch.mul(input2, input2, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w32).add_(epsilon)
torch.div(self._outputs, self.w32, out=self.w32)
self.w.mul_(self.w32)
self.w.sqrt_()
torch.mul(self.w1, self.w, out=self._outputs)
self._outputs = self._outputs.select(1, 0)
torch.eq(y, -1, out=self._idx)
self._outputs[self._idx] = self._outputs[self._idx].add_(-self.margin).clamp_(min=0)
torch.eq(y, 1, out=self._idx)
self._outputs[self._idx] = self._outputs[self._idx].mul_(-1).add_(1)
self.output = self._outputs.sum()
if self.sizeAverage:
self.output = self.output / y.size(0)
return self.output
def updateGradInput(self, input, gradOutput):
if self.gradInput is None:
return
if self._div is None:
self._div = input.new()
if self._output is None:
self._output = self.output.new()
if self._gradOutput is None:
self._gradOutput = input.new()
if self._expand3 is None:
self._expand3 = input.new()
if not self.fastBackward:
self.updateOutput(input)
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
"""
dy_j -2 * (w_j - x) x - w_j
---- = ---------------- = -------
dx 2 || w_j - x || y_j
"""
# to prevent div by zero (NaN) bugs
self._output.resize_as_(self.output).copy_(self.output).add_(0.0000001)
self._view(self._gradOutput, gradOutput, gradOutput.size())
torch.div(gradOutput, self._output, out=self._div)
assert input.dim() == 2
batchSize = input.size(0)
self._div.resize_(batchSize, 1, outputSize)
self._expand3 = self._div.expand(batchSize, inputSize, outputSize)
if torch.typename(input) == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand3).copy_(self._expand3)
self._repeat2.mul_(self._repeat)
else:
torch.mul(self._repeat, self._expand3, out=self._repeat2)
torch.sum(self._repeat2, 2, out=self.gradInput)
self.gradInput.resize_as_(input)
return self.gradInput
def updateGradInput(self, input, gradOutput):
if self.gradInput is None:
return
if self._div is None:
self._div = input.new()
if self._output is None:
self._output = self.output.new()
if self._expand4 is None:
self._expand4 = input.new()
if self._gradOutput is None:
self._gradOutput = input.new()
if not self.fastBackward:
self.updateOutput(input)
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
"""
dy_j -2 * c_j * c_j * (w_j - x) c_j * c_j * (x - w_j)
---- = -------------------------- = ---------------------
dx 2 || c_j * (w_j - x) || y_j
"""
# to prevent div by zero (NaN) bugs
self._output.resize_as_(self.output).copy_(self.output).add_(1e-7)
self._view(self._gradOutput, gradOutput, gradOutput.size())
torch.div(gradOutput, self._output, out=self._div)
if input.dim() == 1:
self._div.resize_(1, outputSize)
self._expand4 = self._div.expand_as(self.weight)
if torch.type(input) == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand4).copy_(self._expand4)
self._repeat2.mul_(self._repeat)
else:
self._repeat2.mul_(self._repeat, self._expand4)
self._repeat2.mul_(self.diagCov)
torch.sum(self._repeat2, 1, out=self.gradInput)
self.gradInput.resize_as_(input)
elif input.dim() == 2:
batchSize = input.size(0)
self._div.resize_(batchSize, 1, outputSize)
self._expand4 = self._div.expand(batchSize, inputSize, outputSize)
if input.type() == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand4).copy_(self._expand4)
self._repeat2.mul_(self._repeat)
self._repeat2.mul_(self._repeat3)
else:
torch.mul(self._repeat, self._expand4, out=self._repeat2)
self._repeat2.mul_(self._expand3)
torch.sum(self._repeat2, 2, out=self.gradInput)
self.gradInput.resize_as_(input)
else:
raise RuntimeError("1D or 2D input expected")
return self.gradInput
def updateOutput(self, input, y):
input1, input2 = input[0], input[1]
# keep backward compatibility
if self.buffer is None:
self.buffer = input1.new()
self.w1 = input1.new()
self.w22 = input1.new()
self.w = input1.new()
self.w32 = input1.new()
self._outputs = input1.new()
# comparison operators behave differently from cuda/c implementations
# TODO: verify name
if input1.type() == 'torch.cuda.FloatTensor':
self._idx = torch.cuda.ByteTensor()
else:
self._idx = torch.ByteTensor()
torch.mul(input1, input2, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w1, keepdim=True)
epsilon = 1e-12
torch.mul(input1, input1, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w22, keepdim=True).add_(epsilon)
# self._outputs is also used as a temporary buffer
self._outputs.resize_as_(self.w22).fill_(1)
torch.div(self._outputs, self.w22, out=self.w22)
self.w.resize_as_(self.w22).copy_(self.w22)
torch.mul(input2, input2, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w32, keepdim=True).add_(epsilon)
torch.div(self._outputs, self.w32, out=self.w32)
self.w.mul_(self.w32)
self.w.sqrt_()
torch.mul(self.w1, self.w, out=self._outputs)
self._outputs = self._outputs.select(1, 0)
torch.eq(y, -1, out=self._idx)
self._outputs[self._idx] = self._outputs[self._idx].add_(-self.margin).clamp_(min=0)
torch.eq(y, 1, out=self._idx)
self._outputs[self._idx] = self._outputs[self._idx].mul_(-1).add_(1)
self.output = self._outputs.sum()
if self.sizeAverage:
self.output = self.output / y.size(0)
return self.output
def updateGradInput(self, input, gradOutput):
if self.gradInput is None:
return
if self._div is None:
self._div = input.new()
if self._output is None:
self._output = self.output.new()
if self._gradOutput is None:
self._gradOutput = input.new()
if self._expand3 is None:
self._expand3 = input.new()
if not self.fastBackward:
self.updateOutput(input)
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
"""
dy_j -2 * (w_j - x) x - w_j
---- = ---------------- = -------
dx 2 || w_j - x || y_j
"""
# to prevent div by zero (NaN) bugs
self._output.resize_as_(self.output).copy_(self.output).add_(0.0000001)
self._view(self._gradOutput, gradOutput, gradOutput.size())
torch.div(gradOutput, self._output, out=self._div)
assert input.dim() == 2
batchSize = input.size(0)
self._div.resize_(batchSize, 1, outputSize)
self._expand3 = self._div.expand(batchSize, inputSize, outputSize)
if torch.typename(input) == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand3).copy_(self._expand3)
self._repeat2.mul_(self._repeat)
else:
torch.mul(self._repeat, self._expand3, out=self._repeat2)
torch.sum(self._repeat2, 2, True, out=self.gradInput)
self.gradInput.resize_as_(input)
return self.gradInput
def updateGradInput(self, input, gradOutput):
if self.gradInput is None:
return
if self._div is None:
self._div = input.new()
if self._output is None:
self._output = self.output.new()
if self._expand4 is None:
self._expand4 = input.new()
if self._gradOutput is None:
self._gradOutput = input.new()
if not self.fastBackward:
self.updateOutput(input)
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
"""
dy_j -2 * c_j * c_j * (w_j - x) c_j * c_j * (x - w_j)
---- = -------------------------- = ---------------------
dx 2 || c_j * (w_j - x) || y_j
"""
# to prevent div by zero (NaN) bugs
self._output.resize_as_(self.output).copy_(self.output).add_(1e-7)
self._view(self._gradOutput, gradOutput, gradOutput.size())
torch.div(gradOutput, self._output, out=self._div)
if input.dim() == 1:
self._div.resize_(1, outputSize)
self._expand4 = self._div.expand_as(self.weight)
if torch.type(input) == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand4).copy_(self._expand4)
self._repeat2.mul_(self._repeat)
else:
self._repeat2.mul_(self._repeat, self._expand4)
self._repeat2.mul_(self.diagCov)
torch.sum(self._repeat2, 1, True, out=self.gradInput)
self.gradInput.resize_as_(input)
elif input.dim() == 2:
batchSize = input.size(0)
self._div.resize_(batchSize, 1, outputSize)
self._expand4 = self._div.expand(batchSize, inputSize, outputSize)
if input.type() == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand4).copy_(self._expand4)
self._repeat2.mul_(self._repeat)
self._repeat2.mul_(self._repeat3)
else:
torch.mul(self._repeat, self._expand4, out=self._repeat2)
self._repeat2.mul_(self._expand3)
torch.sum(self._repeat2, 2, True, out=self.gradInput)
self.gradInput.resize_as_(input)
else:
raise RuntimeError("1D or 2D input expected")
return self.gradInput
def forward(ctx, input1, input2, y, margin, size_average):
ctx.margin = margin
ctx.size_average = size_average
ctx.w1 = input1.new()
ctx.w22 = input1.new()
ctx.w = input1.new()
ctx.w32 = input1.new()
ctx._outputs = input1.new()
_idx = input1.new().byte()
buffer = torch.mul(input1, input2)
torch.sum(buffer, 1, out=ctx.w1, keepdim=True)
epsilon = 1e-12
torch.mul(input1, input1, out=buffer)
torch.sum(buffer, 1, out=ctx.w22, keepdim=True).add_(epsilon)
ctx._outputs.resize_as_(ctx.w22).fill_(1)
torch.div(ctx._outputs, ctx.w22, out=ctx.w22)
ctx.w.resize_as_(ctx.w22).copy_(ctx.w22)
torch.mul(input2, input2, out=buffer)
torch.sum(buffer, 1, out=ctx.w32, keepdim=True).add_(epsilon)
torch.div(ctx._outputs, ctx.w32, out=ctx.w32)
ctx.w.mul_(ctx.w32)
ctx.w.sqrt_()
torch.mul(ctx.w1, ctx.w, out=ctx._outputs)
ctx._outputs = ctx._outputs.select(1, 0)
torch.eq(y, -1, out=_idx)
ctx._outputs[_idx] = ctx._outputs[_idx].add_(-ctx.margin).clamp_(min=0)
torch.eq(y, 1, out=_idx)
ctx._outputs[_idx] = ctx._outputs[_idx].mul_(-1).add_(1)
output = ctx._outputs.sum()
if ctx.size_average:
output = output / y.size(0)
ctx.save_for_backward(input1, input2, y)
return input1.new((output,))
def updateOutput(self, input, y):
input1, input2 = input[0], input[1]
# keep backward compatibility
if self.buffer is None:
self.buffer = input1.new()
self.w1 = input1.new()
self.w22 = input1.new()
self.w = input1.new()
self.w32 = input1.new()
self._outputs = input1.new()
# comparison operators behave differently from cuda/c implementations
# TODO: verify name
if input1.type() == 'torch.cuda.FloatTensor':
self._idx = torch.cuda.ByteTensor()
else:
self._idx = torch.ByteTensor()
torch.mul(input1, input2, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w1, keepdim=True)
epsilon = 1e-12
torch.mul(input1, input1, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w22, keepdim=True).add_(epsilon)
# self._outputs is also used as a temporary buffer
self._outputs.resize_as_(self.w22).fill_(1)
torch.div(self._outputs, self.w22, out=self.w22)
self.w.resize_as_(self.w22).copy_(self.w22)
torch.mul(input2, input2, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w32, keepdim=True).add_(epsilon)
torch.div(self._outputs, self.w32, out=self.w32)
self.w.mul_(self.w32)
self.w.sqrt_()
torch.mul(self.w1, self.w, out=self._outputs)
self._outputs = self._outputs.select(1, 0)
torch.eq(y, -1, out=self._idx)
self._outputs[self._idx] = self._outputs[self._idx].add_(-self.margin).clamp_(min=0)
torch.eq(y, 1, out=self._idx)
self._outputs[self._idx] = self._outputs[self._idx].mul_(-1).add_(1)
self.output = self._outputs.sum()
if self.sizeAverage:
self.output = self.output / y.size(0)
return self.output
def updateGradInput(self, input, gradOutput):
if self.gradInput is None:
return
if self._div is None:
self._div = input.new()
if self._output is None:
self._output = self.output.new()
if self._gradOutput is None:
self._gradOutput = input.new()
if self._expand3 is None:
self._expand3 = input.new()
if not self.fastBackward:
self.updateOutput(input)
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
"""
dy_j -2 * (w_j - x) x - w_j
---- = ---------------- = -------
dx 2 || w_j - x || y_j
"""
# to prevent div by zero (NaN) bugs
self._output.resize_as_(self.output).copy_(self.output).add_(0.0000001)
self._view(self._gradOutput, gradOutput, gradOutput.size())
torch.div(gradOutput, self._output, out=self._div)
assert input.dim() == 2
batchSize = input.size(0)
self._div.resize_(batchSize, 1, outputSize)
self._expand3 = self._div.expand(batchSize, inputSize, outputSize)
if torch.typename(input) == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand3).copy_(self._expand3)
self._repeat2.mul_(self._repeat)
else:
torch.mul(self._repeat, self._expand3, out=self._repeat2)
torch.sum(self._repeat2, 2, True, out=self.gradInput)
self.gradInput.resize_as_(input)
return self.gradInput
def updateGradInput(self, input, gradOutput):
if self.gradInput is None:
return
if self._div is None:
self._div = input.new()
if self._output is None:
self._output = self.output.new()
if self._expand4 is None:
self._expand4 = input.new()
if self._gradOutput is None:
self._gradOutput = input.new()
if not self.fastBackward:
self.updateOutput(input)
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
"""
dy_j -2 * c_j * c_j * (w_j - x) c_j * c_j * (x - w_j)
---- = -------------------------- = ---------------------
dx 2 || c_j * (w_j - x) || y_j
"""
# to prevent div by zero (NaN) bugs
self._output.resize_as_(self.output).copy_(self.output).add_(1e-7)
self._view(self._gradOutput, gradOutput, gradOutput.size())
torch.div(gradOutput, self._output, out=self._div)
if input.dim() == 1:
self._div.resize_(1, outputSize)
self._expand4 = self._div.expand_as(self.weight)
if torch.type(input) == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand4).copy_(self._expand4)
self._repeat2.mul_(self._repeat)
else:
self._repeat2.mul_(self._repeat, self._expand4)
self._repeat2.mul_(self.diagCov)
torch.sum(self._repeat2, 1, True, out=self.gradInput)
self.gradInput.resize_as_(input)
elif input.dim() == 2:
batchSize = input.size(0)
self._div.resize_(batchSize, 1, outputSize)
self._expand4 = self._div.expand(batchSize, inputSize, outputSize)
if input.type() == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand4).copy_(self._expand4)
self._repeat2.mul_(self._repeat)
self._repeat2.mul_(self._repeat3)
else:
torch.mul(self._repeat, self._expand4, out=self._repeat2)
self._repeat2.mul_(self._expand3)
torch.sum(self._repeat2, 2, True, out=self.gradInput)
self.gradInput.resize_as_(input)
else:
raise RuntimeError("1D or 2D input expected")
return self.gradInput
def bn_hat_z_layers(self, hat_z_layers, z_pre_layers):
# TODO: Calculate batchnorm using GPU Tensors.
assert len(hat_z_layers) == len(z_pre_layers)
hat_z_layers_normalized = []
for i, (hat_z, z_pre) in enumerate(zip(hat_z_layers, z_pre_layers)):
if self.use_cuda:
ones = Variable(torch.ones(z_pre.size()[0], 1).cuda())
else:
ones = Variable(torch.ones(z_pre.size()[0], 1))
mean = torch.mean(z_pre, 0)
noise_var = np.random.normal(loc=0.0, scale=1 - 1e-10, size=z_pre.size())
if self.use_cuda:
var = np.var(z_pre.data.cpu().numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1])
else:
var = np.var(z_pre.data.numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1])
var = Variable(torch.FloatTensor(var))
if self.use_cuda:
hat_z = hat_z.cpu()
ones = ones.cpu()
mean = mean.cpu()
"""
print(z_pre.data.shape, mean.data.shape, ones.data.shape, hat_z.data.shape)
print("=========== ")
print(z_pre)
print(mean)
print(ones)
print(hat_z)
print("=========== ")
"""
#ones = ones.unsqueeze(1)
mean = mean.unsqueeze(0)
#print(z_pre.data.shape, mean.data.shape, ones.data.shape, hat_z.data.shape)
tempa = hat_z - ones.mm(mean)
tempb = ones.mm(torch.sqrt(var + 1e-10))
#hat_z_normalized = torch.div(hat_z - ones.mm(mean), ones.mm(torch.sqrt(var + 1e-10)))
hat_z_normalized = torch.div(tempa, tempb)
if self.use_cuda:
hat_z_normalized = hat_z_normalized.cuda()
hat_z_layers_normalized.append(hat_z_normalized)
return hat_z_layers_normalized
def reward(sample_solution, USE_CUDA=False):
"""
The reward for the sorting task is defined as the
length of the longest sorted consecutive subsequence.
Input sequences must all be the same length.
Example:
input | output
====================
[1 4 3 5 2] | [5 1 2 3 4]
The output gets a reward of 4/5, or 0.8
The range is [1/sourceL, 1]
Args:
sample_solution: list of len sourceL of [batch_size]
Tensors
Returns:
[batch_size] containing trajectory rewards
"""
batch_size = sample_solution[0].size(0)
sourceL = len(sample_solution)
longest = Variable(torch.ones(batch_size), requires_grad=False)
current = Variable(torch.ones(batch_size), requires_grad=False)
if USE_CUDA:
longest = longest.cuda()
current = current.cuda()
for i in range(1, sourceL):
# compare solution[i-1] < solution[i]
res = torch.lt(sample_solution[i-1], sample_solution[i])
# if res[i,j] == 1, increment length of current sorted subsequence
current += res.float()
# else, reset current to 1
current[torch.eq(res, 0)] = 1
#current[torch.eq(res, 0)] -= 1
# if, for any, current > longest, update longest
mask = torch.gt(current, longest)
longest[mask] = current[mask]
return -torch.div(longest, sourceL)
def forward(ctx, input1, input2, y, margin, size_average):
ctx.margin = margin
ctx.size_average = size_average
ctx.w1 = input1.new()
ctx.w22 = input1.new()
ctx.w = input1.new()
ctx.w32 = input1.new()
ctx._outputs = input1.new()
_idx = input1.new().byte()
buffer = torch.mul(input1, input2)
torch.sum(buffer, 1, out=ctx.w1, keepdim=True)
epsilon = 1e-12
torch.mul(input1, input1, out=buffer)
torch.sum(buffer, 1, out=ctx.w22, keepdim=True).add_(epsilon)
ctx._outputs.resize_as_(ctx.w22).fill_(1)
torch.div(ctx._outputs, ctx.w22, out=ctx.w22)
ctx.w.resize_as_(ctx.w22).copy_(ctx.w22)
torch.mul(input2, input2, out=buffer)
torch.sum(buffer, 1, out=ctx.w32, keepdim=True).add_(epsilon)
torch.div(ctx._outputs, ctx.w32, out=ctx.w32)
ctx.w.mul_(ctx.w32)
ctx.w.sqrt_()
torch.mul(ctx.w1, ctx.w, out=ctx._outputs)
ctx._outputs = ctx._outputs.select(1, 0)
torch.eq(y, -1, out=_idx)
ctx._outputs[_idx] = ctx._outputs[_idx].add_(-ctx.margin).clamp_(min=0)
torch.eq(y, 1, out=_idx)
ctx._outputs[_idx] = ctx._outputs[_idx].mul_(-1).add_(1)
output = ctx._outputs.sum()
if ctx.size_average:
output = output / y.size(0)
ctx.save_for_backward(input1, input2, y)
return input1.new((output,))
def updateOutput(self, input, y):
input1, input2 = input[0], input[1]
# keep backward compatibility
if self.buffer is None:
self.buffer = input1.new()
self.w1 = input1.new()
self.w22 = input1.new()
self.w = input1.new()
self.w32 = input1.new()
self._outputs = input1.new()
# comparison operators behave differently from cuda/c implementations
# TODO: verify name
if input1.type() == 'torch.cuda.FloatTensor':
self._idx = torch.cuda.ByteTensor()
else:
self._idx = torch.ByteTensor()
torch.mul(input1, input2, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w1, keepdim=True)
epsilon = 1e-12
torch.mul(input1, input1, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w22, keepdim=True).add_(epsilon)
# self._outputs is also used as a temporary buffer
self._outputs.resize_as_(self.w22).fill_(1)
torch.div(self._outputs, self.w22, out=self.w22)
self.w.resize_as_(self.w22).copy_(self.w22)
torch.mul(input2, input2, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w32, keepdim=True).add_(epsilon)
torch.div(self._outputs, self.w32, out=self.w32)
self.w.mul_(self.w32)
self.w.sqrt_()
torch.mul(self.w1, self.w, out=self._outputs)
self._outputs = self._outputs.select(1, 0)
torch.eq(y, -1, out=self._idx)
self._outputs[self._idx] = self._outputs[self._idx].add_(-self.margin).clamp_(min=0)
torch.eq(y, 1, out=self._idx)
self._outputs[self._idx] = self._outputs[self._idx].mul_(-1).add_(1)
self.output = self._outputs.sum()
if self.sizeAverage:
self.output = self.output / y.size(0)
return self.output