Python torch 模块,max() 实例源码
我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用torch.max()。
def pad_batch(mini_batch):
mini_batch_size = len(mini_batch)
# print mini_batch.shape
# print mini_batch
max_sent_len1 = int(np.max([len(x[0]) for x in mini_batch]))
max_sent_len2 = int(np.max([len(x[1]) for x in mini_batch]))
# print max_sent_len1, max_sent_len2
# max_token_len = int(np.mean([len(val) for sublist in mini_batch for val in sublist]))
main_matrix1 = np.zeros((mini_batch_size, max_sent_len1), dtype= np.int)
main_matrix2 = np.zeros((mini_batch_size, max_sent_len2), dtype= np.int)
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[0]):
try:
main_matrix1[i,j] = j
except IndexError:
pass
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[1]):
try:
main_matrix2[i,j] = j
except IndexError:
pass
main_matrix1_t = Variable(torch.from_numpy(main_matrix1))
main_matrix2_t = Variable(torch.from_numpy(main_matrix2))
# print main_matrix1_t.size()
# print main_matrix2_t.size()
return [main_matrix1_t, main_matrix2_t]
# return [Variable(torch.cat((main_matrix1_t, main_matrix2_t), 0))
# def pad_batch(mini_batch):
# # print mini_batch
# # print type(mini_batch)
# # print mini_batch.shape
# # for i, _ in enumerate(mini_batch):
# # print i, _
# return [Variable(torch.from_numpy(np.asarray(_))) for _ in mini_batch[0]]
def package(data, volatile=False):
"""Package data for training / evaluation."""
data = map(lambda x: json.loads(x), data)
dat = map(lambda x: map(lambda y: dictionary.word2idx[y], x['text']), data)
maxlen = 0
for item in dat:
maxlen = max(maxlen, len(item))
targets = map(lambda x: x['label'], data)
maxlen = min(maxlen, 500)
for i in range(len(data)):
if maxlen < len(dat[i]):
dat[i] = dat[i][:maxlen]
else:
for j in range(maxlen - len(dat[i])):
dat[i].append(dictionary.word2idx['<pad>'])
dat = Variable(torch.LongTensor(dat), volatile=volatile)
targets = Variable(torch.LongTensor(targets), volatile=volatile)
return dat.t(), targets
def evaluate():
"""evaluate the model while training"""
model.eval() # turn on the eval() switch to disable dropout
total_loss = 0
total_correct = 0
for batch, i in enumerate(range(0, len(data_val), args.batch_size)):
data, targets = package(data_val[i:min(len(data_val), i+args.batch_size)], volatile=True)
if args.cuda:
data = data.cuda()
targets = targets.cuda()
hidden = model.init_hidden(data.size(1))
output, attention = model.forward(data, hidden)
output_flat = output.view(data.size(1), -1)
total_loss += criterion(output_flat, targets).data
prediction = torch.max(output_flat, 1)[1]
total_correct += torch.sum((prediction == targets).float())
return total_loss[0] / (len(data_val) // args.batch_size), total_correct.data[0] / len(data_val)
def update_hyper_param(self):
for group in self._optimizer.param_groups:
group['momentum'] = self._mu_t
#group['momentum'] = max(self._mu, self._mu_t)
if self._force_non_inc_step == False:
group['lr'] = self._lr_t * self._lr_factor
# a loose clamping to prevent catastrophically large move. If the move
# is too large, we set lr to 0 and only use the momentum to move
if self._adapt_clip and (group['lr'] * np.sqrt(self._global_state['grad_norm_squared']) >= self._catastrophic_move_thresh):
group['lr'] = self._catastrophic_move_thresh / np.sqrt(self._global_state['grad_norm_squared'] + eps)
if self._verbose:
logging.warning("clip catastropic move!")
elif self._iter > self._curv_win_width:
# force to guarantee lr * grad_norm not increasing dramatically.
# Not necessary for basic use. Please refer to the comments
# in YFOptimizer.__init__ for more details
self.lr_grad_norm_avg()
debias_factor = self.zero_debias_factor()
group['lr'] = min(self._lr * self._lr_factor,
2.0 * self._global_state["lr_grad_norm_avg_min"] \
/ (np.sqrt(np.exp(self._global_state['grad_norm_squared_avg_log'] / debias_factor) ) + eps) )
return
def test_accuracy_full_batch(tokens, features, mini_batch_size, word_attn, sent_attn, th=0.5):
p = []
l = []
cnt = 0
g = gen_minibatch1(tokens, features, mini_batch_size, False)
for token, feature in g:
if cnt % 100 == 0:
print cnt
cnt +=1
# print token.size()
# y_pred = get_predictions(token, word_attn, sent_attn)
# print y_pred
y_pred = get_predictions(token, feature, word_attn, sent_attn)
# print y_pred
# _, y_pred = torch.max(y_pred, 1)
# y_pred = y_pred[:, 1]
# print y_pred
p.append(np.ndarray.flatten(y_pred.data.cpu().numpy()))
p = [item for sublist in p for item in sublist]
p = np.array(p)
return p
def evaluate_stats(net, testloader):
stats = {}
correct = 0
total = 0
before = time.time()
for i, data in enumerate(testloader, 0):
images, labels = data
if use_gpu:
images, labels = (images.cuda()), (labels.cuda(async=True))
outputs = net(Variable(images))
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
accuracy = correct / total
stats['accuracy'] = accuracy
stats['eval_time'] = time.time() - before
print('Accuracy on test images: %f' % accuracy)
return stats
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def forward(self, tokens: torch.Tensor, mask: torch.Tensor = None): #pylint: disable=arguments-differ
if mask is not None:
tokens = tokens * mask.unsqueeze(-1).float()
# Our input has shape `(batch_size, num_tokens, embedding_dim)`, so we sum out the `num_tokens`
# dimension.
summed = tokens.sum(1)
if self._averaged:
if mask is not None:
lengths = get_lengths_from_binary_sequence_mask(mask)
length_mask = (lengths > 0)
# Set any length 0 to 1, to avoid dividing by zero.
lengths = torch.max(lengths, Variable(lengths.data.new().resize_(1).fill_(1)))
else:
lengths = Variable(tokens.data.new().resize_(1).fill_(tokens.size(1)), requires_grad=False)
length_mask = None
summed = summed / lengths.unsqueeze(-1).float()
if length_mask is not None:
summed = summed * (length_mask > 0).float().unsqueeze(-1)
return summed
def logsumexp(tensor: torch.Tensor,
dim: int = -1,
keepdim: bool = False) -> torch.Tensor:
"""
A numerically stable computation of logsumexp. This is mathematically equivalent to
`tensor.exp().sum(dim, keep=keepdim).log()`. This function is typically used for summing log
probabilities.
Parameters
----------
tensor : torch.FloatTensor, required.
A tensor of arbitrary size.
dim : int, optional (default = -1)
The dimension of the tensor to apply the logsumexp to.
keepdim: bool, optional (default = False)
Whether to retain a dimension of size one at the dimension we reduce over.
"""
max_score, _ = tensor.max(dim, keepdim=keepdim)
if keepdim:
stable_vec = tensor - max_score
else:
stable_vec = tensor - max_score.unsqueeze(dim)
return max_score + (stable_vec.exp().sum(dim, keepdim=keepdim)).log()
def accuracy(self, predicted, ground_truth):
"""
Utility function for calculating the accuracy of the model.
Params
------
- predicted: (torch.FloatTensor)
- ground_truth: (torch.LongTensor)
Returns
-------
- acc: (float) % accuracy.
"""
predicted = torch.max(predicted, 1)[1]
total = len(ground_truth)
correct = (predicted == ground_truth).sum()
acc = 100 * (correct / total)
return acc
def train():
densenet.train()
corrects = total_loss = 0
for data, label in tqdm(training_data, mininterval=1,
desc='Train Processing', leave=False):
data, label = Variable(data), Variable(label)
if use_cuda:
data, label = data.cuda(), label.cuda()
optimizer.zero_grad()
target = densenet(data)
loss = criterion(target, label)
loss.backward()
optimizer.step()
total_loss += loss.data
corrects += (torch.max(target, 1)[1].view(label.size()).data == label.data).sum()
return total_loss[0]/training_size, corrects, corrects/training_size * 100.0
# ##############################################################################
# Save Model
# ##############################################################################
def max_along_time(inputs, lengths):
"""
:param inputs: [T * B * D]
:param lengths: [B]
:return: [B * D] max_along_time
"""
ls = list(lengths)
b_seq_max_list = []
for i, l in enumerate(ls):
seq_i = inputs[:l, i, :]
seq_i_max, _ = seq_i.max(dim=0)
seq_i_max = seq_i_max.squeeze()
b_seq_max_list.append(seq_i_max)
return torch.stack(b_seq_max_list)
def eval(data_iter, model, args):
model.eval()
corrects, avg_loss = 0, 0
for batch in data_iter:
feature, target = batch.text, batch.label
feature.data.t_(), target.data.sub_(1) # batch first, index align
if args.cuda:
feature, target = feature.cuda(), target.cuda()
logit = model(feature)
loss = F.cross_entropy(logit, target, size_average=False)
avg_loss += loss.data[0]
corrects += (torch.max(logit, 1)
[1].view(target.size()).data == target.data).sum()
size = len(data_iter.dataset)
avg_loss = avg_loss/size
accuracy = 100.0 * corrects/size
model.train()
print('\nEvaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n'.format(avg_loss,
accuracy,
corrects,
size))
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def test_forward_backward(self):
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from reid.loss import OIMLoss
criterion = OIMLoss(3, 3, scalar=1.0, size_average=False)
criterion.lut = torch.eye(3)
x = Variable(torch.randn(3, 3), requires_grad=True)
y = Variable(torch.range(0, 2).long())
loss = criterion(x, y)
loss.backward()
probs = F.softmax(x)
grads = probs.data - torch.eye(3)
abs_diff = torch.abs(grads - x.grad.data)
self.assertEquals(torch.log(probs).diag().sum(), -loss)
self.assertTrue(torch.max(abs_diff) < 1e-6)
def eval(data_iter, model, args):
model.eval()
corrects, avg_loss = 0, 0
for batch in data_iter:
feature, target = batch.text, batch.label
feature.data.t_(), target.data.sub_(1) # batch first, index align
if args.cuda:
feature, target = feature.cuda(), target.cuda()
logit = model(feature)
loss = F.cross_entropy(logit, target, size_average=True)
avg_loss += loss.data[0]
corrects += (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum()
size = len(data_iter.dataset)
avg_loss = loss.data[0]/size
accuracy = 100.0 * corrects/size
model.train()
print('\nEvaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n'.format(avg_loss,
accuracy,
corrects,
size))
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def compute_accuracy(self, y, t):
arc_logits, label_logits = y
true_arcs, true_labels = t.T
b, l1, l2 = arc_logits.size()
pred_arcs = arc_logits.data.max(2)[1].cpu()
true_arcs = pad_sequence(true_arcs, padding=-1, dtype=np.int64)
correct = pred_arcs.eq(true_arcs).cpu().sum()
arc_accuracy = (correct /
(b * l1 - np.sum(true_arcs.cpu().numpy() == -1)))
b, l1, d = label_logits.size()
pred_labels = label_logits.data.max(2)[1].cpu()
true_labels = pad_sequence(true_labels, padding=-1, dtype=np.int64)
correct = pred_labels.eq(true_labels).cpu().sum()
label_accuracy = (correct /
(b * l1 - np.sum(true_labels.cpu().numpy() == -1)))
accuracy = (arc_accuracy + label_accuracy) / 2
return accuracy
def train(epoch):
print('\nEpoch: %d' % epoch)
net.train()
train_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(trainloader):
if use_cuda:
inputs, targets = inputs.cuda(), targets.cuda()
optimizer.zero_grad()
inputs, targets = Variable(inputs), Variable(targets)
outputs = net(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
train_loss += loss.data[0]
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += predicted.eq(targets.data).cpu().sum()
progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
% (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
def _forward_alg(self, feats):
# calculate in log domain
# feats is len(sentence) * tagset_size
# initialize alpha with a Tensor with values all equal to -10000.
init_alphas = torch.Tensor(1, self.tagset_size).fill_(-10000.)
init_alphas[0][self.tag_to_ix[START_TAG]] = 0.
forward_var = autograd.Variable(init_alphas)
if self.use_gpu:
forward_var = forward_var.cuda()
for feat in feats:
emit_score = feat.view(-1, 1)
tag_var = forward_var + self.transitions + emit_score
max_tag_var, _ = torch.max(tag_var, dim=1)
tag_var = tag_var - max_tag_var.view(-1, 1)
forward_var = max_tag_var + torch.log(torch.sum(torch.exp(tag_var), dim=1)).view(1, -1) # ).view(1, -1)
terminal_var = (forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]).view(1, -1)
alpha = log_sum_exp(terminal_var)
# Z(x)
return alpha
def eval(self):
self.model.eval()
pred_result = {}
for _, batch in enumerate(self.dataloader_dev):
question_ids, questions, passages, passage_tokenized = batch
questions.variable(volatile=True)
passages.variable(volatile=True)
begin_, end_ = self.model(questions, passages) # batch x seq
_, pred_begin = torch.max(begin_, 1)
_, pred_end = torch.max(end_, 1)
pred = torch.stack([pred_begin, pred_end], dim=1)
for i, (begin, end) in enumerate(pred.cpu().data.numpy()):
ans = passage_tokenized[i][begin:end + 1]
qid = question_ids[i]
pred_result[qid] = " ".join(ans)
self.model.train()
return evaluate(self.dev_dataset, pred_result)
def _forward(self, batch):
_, questions, passages, answers, _ = batch
batch_num = questions.tensor.size(0)
questions.variable()
passages.variable()
begin_, end_ = self.model(questions, passages) # batch x seq
assert begin_.size(0) == batch_num
answers = Variable(answers)
if torch.cuda.is_available():
answers = answers.cuda()
begin, end = answers[:, 0], answers[:, 1]
loss = self.loss_fn(begin_, begin) + self.loss_fn(end_, end)
_, pred_begin = torch.max(begin_, 1)
_, pred_end = torch.max(end_, 1)
exact_correct_num = torch.sum(
(pred_begin == begin) * (pred_end == end))
em = exact_correct_num.data[0] / batch_num
return loss, em
def fit_batch(self, premise_batch, hypothesis_batch, y_batch):
if not hasattr(self, 'criterion'):
self.criterion = nn.NLLLoss()
if not hasattr(self, 'optimizer'):
self.optimizer = optim.Adam(self.parameters(), lr=self.options['LR'], betas=(0.9, 0.999), eps=1e-08, weight_decay=self.options['L2'])
self.optimizer.zero_grad()
preds = self.__call__(premise_batch, hypothesis_batch, training=True)
loss = self.criterion(preds, y_batch)
loss.backward()
self.optimizer.step()
_, pred_labels = torch.max(preds, dim=-1, keepdim=True)
y_true = self._get_numpy_array_from_variable(y_batch)
y_pred = self._get_numpy_array_from_variable(pred_labels)
acc = accuracy_score(y_true, y_pred)
ret_loss = self._get_numpy_array_from_variable(loss)[0]
return ret_loss, acc
def fit_batch(self, premise_batch, hypothesis_batch, y_batch):
if not hasattr(self,'criterion'):
self.criterion = nn.NLLLoss()
if not hasattr(self, 'optimizer'):
self.optimizer = optim.Adam(self.parameters(), lr=self.options['LR'], betas=(0.9, 0.999), eps=1e-08, weight_decay=self.options['L2'])
self.optimizer.zero_grad()
preds = self.__call__(premise_batch, hypothesis_batch, training= True)
loss = self.criterion(preds, y_batch)
loss.backward()
self.optimizer.step()
_, pred_labels = torch.max(preds, dim=-1, keepdim = True)
y_true = self._get_numpy_array_from_variable(y_batch)
y_pred = self._get_numpy_array_from_variable(pred_labels)
acc = accuracy_score(y_true, y_pred)
ret_loss = self._get_numpy_array_from_variable(loss)[0]
return ret_loss, acc
def test_view(self):
tensor = torch.rand(15)
template = torch.rand(3, 5)
empty = torch.Tensor()
target = template.size()
self.assertEqual(tensor.view_as(template).size(), target)
self.assertEqual(tensor.view(3, 5).size(), target)
self.assertEqual(tensor.view(torch.Size([3, 5])).size(), target)
self.assertEqual(tensor.view(-1, 5).size(), target)
self.assertEqual(tensor.view(3, -1).size(), target)
tensor_view = tensor.view(5, 3)
tensor_view.fill_(random.uniform(0, 1))
self.assertEqual((tensor_view - tensor).abs().max(), 0)
self.assertEqual(empty.view_as(empty), empty)
self.assertEqual(empty.view(0), empty)
self.assertRaises(RuntimeError, lambda: tensor.view(15, 0))
self.assertRaises(RuntimeError, lambda: tensor.view(7, -1))
self.assertRaises(RuntimeError, lambda: tensor.view(15, -1, -1))
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
#pdb.set_trace()
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def forward(self,x):
output = self.Scale(x) # for original scale
output_size = output.size()[2]
input_size = x.size()[2]
self.interp1 = nn.Upsample(size=(int(input_size*0.75)+1, int(input_size*0.75)+1), mode='bilinear')
self.interp2 = nn.Upsample(size=(int(input_size*0.5)+1, int(input_size*0.5)+1), mode='bilinear')
self.interp3 = nn.Upsample(size=(output_size, output_size), mode='bilinear')
x75 = self.interp1(x)
output75 = self.interp3(self.Scale(x75)) # for 0.75x scale
x5 = self.interp2(x)
output5 = self.interp3(self.Scale(x5)) # for 0.5x scale
out_max = torch.max(torch.max(output, output75), output5)
return [output, output75, output5, out_max]
def test_view(self):
tensor = torch.rand(15)
template = torch.rand(3, 5)
empty = torch.Tensor()
target = template.size()
self.assertEqual(tensor.view_as(template).size(), target)
self.assertEqual(tensor.view(3, 5).size(), target)
self.assertEqual(tensor.view(torch.Size([3, 5])).size(), target)
self.assertEqual(tensor.view(-1, 5).size(), target)
self.assertEqual(tensor.view(3, -1).size(), target)
tensor_view = tensor.view(5, 3)
tensor_view.fill_(random.uniform(0, 1))
self.assertEqual((tensor_view - tensor).abs().max(), 0)
self.assertEqual(empty.view_as(empty), empty)
self.assertEqual(empty.view(0), empty)
self.assertRaises(RuntimeError, lambda: tensor.view(15, 0))
self.assertRaises(RuntimeError, lambda: tensor.view(7, -1))
self.assertRaises(RuntimeError, lambda: tensor.view(15, -1, -1))
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def test(info):
global net
correct_sum = 0
total_loss_sum = 0.
total_ctr = 0
for data in testloader:
inputs, labels = data
inputs, labels = Variable(inputs), Variable(labels)
if global_cuda_available:
inputs, labels = inputs.cuda(), labels.cuda()
outputs = net(inputs)
_, predicted = torch.max(outputs.data, 1)
total_ctr += labels.size()[0]
correct_sum += (predicted == labels.data).sum()
loss = criterion(outputs, labels)
total_loss_sum += loss.data[0]
info[0] = correct_sum
info[1] = total_ctr
info[2] = total_loss_sum
def forward(self, inp, hidden):
emb = self.drop(self.encoder(inp))
outp = self.bilstm(emb, hidden)[0]
if self.pooling == 'mean':
outp = torch.mean(outp, 0).squeeze()
elif self.pooling == 'max':
outp = torch.max(outp, 0)[0].squeeze()
elif self.pooling == 'all' or self.pooling == 'all-word':
outp = torch.transpose(outp, 0, 1).contiguous()
return outp, emb
def __init__(self, config):
super(Classifier, self).__init__()
if config['pooling'] == 'mean' or config['pooling'] == 'max':
self.encoder = BiLSTM(config)
self.fc = nn.Linear(config['nhid'] * 2, config['nfc'])
elif config['pooling'] == 'all':
self.encoder = SelfAttentiveEncoder(config)
self.fc = nn.Linear(config['nhid'] * 2 * config['attention-hops'], config['nfc'])
else:
raise Exception('Error when initializing Classifier')
self.drop = nn.Dropout(config['dropout'])
self.tanh = nn.Tanh()
self.pred = nn.Linear(config['nfc'], config['class-number'])
self.dictionary = config['dictionary']
# self.init_weights()
def grad_variance(self):
global_state = self._global_state
beta = self._beta
self._grad_var = np.array(0.0, dtype=np.float32)
for group_id, group in enumerate(self._optimizer.param_groups):
for p_id, p in enumerate(group['params'] ):
if p.grad is None:
continue
grad = p.grad.data
state = self._optimizer.state[p]
if self._iter == 0:
state["grad_avg"] = grad.new().resize_as_(grad).zero_()
state["grad_avg_squared"] = 0.0
state["grad_avg"].mul_(beta).add_(1 - beta, grad)
self._grad_var += torch.sum(state["grad_avg"] * state["grad_avg"] )
if self._zero_debias:
debias_factor = self.zero_debias_factor()
else:
debias_factor = 1.0
self._grad_var /= -(debias_factor**2)
self._grad_var += global_state['grad_norm_squared_avg'] / debias_factor
# in case of negative variance: the two term are using different debias factors
self._grad_var = max(self._grad_var, eps)
if self._sparsity_debias:
self._grad_var *= self._sparsity_avg
return
def get_mu(self):
root = self.get_cubic_root()
dr = max( (self._h_max + eps) / (self._h_min + eps), 1.0 + eps)
self._mu_t = max(root**2, ( (np.sqrt(dr) - 1) / (np.sqrt(dr) + 1) )**2 )
return
def pad_batch(mini_batch):
mini_batch_size = len(mini_batch)
# print mini_batch.shape
# print mini_batch
max_sent_len1 = int(np.max([len(x[0]) for x in mini_batch]))
max_sent_len2 = int(np.max([len(x[1]) for x in mini_batch]))
# print max_sent_len1, max_sent_len2
# max_token_len = int(np.mean([len(val) for sublist in mini_batch for val in sublist]))
main_matrix1 = np.zeros((mini_batch_size, max_sent_len1), dtype= np.int)
main_matrix2 = np.zeros((mini_batch_size, max_sent_len2), dtype= np.int)
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[0]):
try:
main_matrix1[i,j] = j
except IndexError:
pass
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[1]):
try:
main_matrix2[i,j] = j
except IndexError:
pass
main_matrix1_t = Variable(torch.from_numpy(main_matrix1))
main_matrix2_t = Variable(torch.from_numpy(main_matrix2))
# print main_matrix1_t.size()
# print main_matrix2_t.size()
return [main_matrix1_t, main_matrix2_t]
# return [Variable(torch.cat((main_matrix1_t, main_matrix2_t), 0))
# def pad_batch(mini_batch):
# # print mini_batch
# # print type(mini_batch)
# # print mini_batch.shape
# # for i, _ in enumerate(mini_batch):
# # print i, _
# return [Variable(torch.from_numpy(np.asarray(_))) for _ in mini_batch[0]]
def grad_variance(self):
global_state = self._global_state
beta = self._beta
self._grad_var = np.array(0.0, dtype=np.float32)
for group_id, group in enumerate(self._optimizer.param_groups):
for p_id, p in enumerate(group['params'] ):
if p.grad is None:
continue
grad = p.grad.data
state = self._optimizer.state[p]
if self._iter == 0:
state["grad_avg"] = grad.new().resize_as_(grad).zero_()
state["grad_avg_squared"] = 0.0
state["grad_avg"].mul_(beta).add_(1 - beta, grad)
self._grad_var += torch.sum(state["grad_avg"] * state["grad_avg"] )
if self._zero_debias:
debias_factor = self.zero_debias_factor()
else:
debias_factor = 1.0
self._grad_var /= -(debias_factor**2)
self._grad_var += global_state['grad_norm_squared_avg'] / debias_factor
# in case of negative variance: the two term are using different debias factors
self._grad_var = max(self._grad_var, eps)
if self._sparsity_debias:
self._grad_var *= self._sparsity_avg
return
def get_mu(self):
root = self.get_cubic_root()
dr = (self._h_max + eps) / (self._h_min + eps)
self._mu_t = max(root**2, ( (np.sqrt(dr) - 1) / (np.sqrt(dr) + 1) )**2 )
return
def load_data(resize):
data_transforms = {
'train': transforms.Compose([
transforms.RandomSizedCrop(max(resize)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
#Higher scale-up for inception
transforms.Scale(int(max(resize)/224*256)),
transforms.CenterCrop(max(resize)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = 'PlantVillage'
dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
for x in ['train', 'val']}
dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=batch_size,
shuffle=True)
for x in ['train', 'val']}
dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
dset_classes = dsets['train'].classes
return dset_loaders['train'], dset_loaders['val']
def vec_to_classnum(onehot):
return torch.max(onehot, -1)[1][0]
def evaluate(model, testloader, args, use_cuda=False):
correct = 0
total = 0
class_correct = list(0. for i in range(2))
class_total = list(0. for i in range(2))
for i, data in enumerate(testloader, 0):
model.eval()
inputs, targets = data
inputs = inputs.unsqueeze(1)
targets = target_onehot_to_classnum_tensor(targets)
if use_cuda and cuda_ava:
inputs = Variable(inputs.float().cuda())
targets = targets.cuda()
else:
inputs = Variable(inputs.float())
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum()
c = (predicted == targets).squeeze()
for i in range(args.batch_size):
target = targets[i]
class_correct[target] += c[i]
class_total[target] += 1
print("Accuracy of the network is: %.5f %%" % (correct / total * 100))
for i in range(2):
if class_total[i] == 0:
print("Accuracy of %1s : %1s %% (%1d / %1d)" % (classes[i], "NaN", class_correct[i], class_total[i]))
else:
print("Accuracy of %1s : %.5f %% (%1d / %1d)" % (classes[i], class_correct[i] / class_total[i] * 100, class_correct[i], class_total[i]))
return correct / total
def vec_to_classnum(onehot):
return torch.max(onehot, -1)[1][0]
def evaluate(model, testloader, args, use_cuda=False):
correct = 0
total = 0
class_correct = list(0. for i in range(3))
class_total = list(0. for i in range(3))
for i, data in enumerate(testloader, 0):
if i == 100:
break
inputs, targets = data
inputs = inputs.unsqueeze(1)
targets = target_onehot_to_classnum_tensor(targets)
if use_cuda and cuda_ava:
inputs = Variable(inputs.float().cuda())
targets = targets.cuda()
else:
inputs = Variable(inputs.float())
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum()
c = (predicted == targets).squeeze()
for i in range(args.batch_size):
target = targets[i]
class_correct[target] += c[i]
class_total[target] += 1
print("Accuracy of the network is: %.5f %%" % (correct / total * 100))
for i in range(3):
if class_total[i] == 0:
print("Accuracy of %1s : %1s %% (%1d / %1d)" % (classes[i], "NaN", class_correct[i], class_total[i]))
else:
print("Accuracy of %1s : %.5f %% (%1d / %1d)" % (classes[i], class_correct[i] / class_total[i] * 100, class_correct[i], class_total[i]))
return correct / total
def vec_to_classnum(onehot):
return torch.max(onehot, -1)[1][0]
def vec_to_classnum(onehot):
return torch.max(onehot, -1)[1][0]
def evaluate(model, testloader, args, use_cuda=False):
correct = 0
total = 0
class_correct = list(0. for i in range(2))
class_total = list(0. for i in range(2))
for i, data in enumerate(testloader, 0):
model.eval()
if i == 20:
break;
inputs, targets = data
inputs = inputs.unsqueeze(1)
targets = target_onehot_to_classnum_tensor(targets)
if use_cuda and cuda_ava:
inputs = Variable(inputs.float().cuda())
targets = targets.cuda()
else:
inputs = Variable(inputs.float())
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum()
c = (predicted == targets).squeeze()
for i in range(args.batch_size):
target = targets[i]
class_correct[target] += c[i]
class_total[target] += 1
print("Accuracy of the network is: %.5f %%" % (correct / total * 100))
for i in range(2):
if class_total[i] == 0:
print("Accuracy of %1s : %1s %% (%1d / %1d)" % (classes[i], "NaN", class_correct[i], class_total[i]))
else:
print("Accuracy of %1s : %.5f %% (%1d / %1d)" % (classes[i], class_correct[i] / class_total[i] * 100, class_correct[i], class_total[i]))
return correct / total
def vec_to_classnum(onehot):
return torch.max(onehot, -1)[1][0]
def vec_to_classnum(onehot):
return torch.max(onehot, -1)[1][0]
def evaluate(model, testloader, args, use_cuda=False):
correct = 0
total = 0
class_correct = list(0. for i in range(2))
class_total = list(0. for i in range(2))
for i, data in enumerate(testloader, 0):
if i == 20:
break;
inputs, targets = data
inputs = inputs.unsqueeze(1)
targets = target_onehot_to_classnum_tensor(targets)
if use_cuda and cuda_ava:
inputs = Variable(inputs.float().cuda())
targets = targets.cuda()
else:
inputs = Variable(inputs.float())
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum()
c = (predicted == targets).squeeze()
for i in range(args.batch_size):
target = targets[i]
class_correct[target] += c[i]
class_total[target] += 1
print("Accuracy of the network is: %.5f %%" % (correct / total * 100))
for i in range(2):
if class_total[i] == 0:
print("Accuracy of %1s : %1s %% (%1d / %1d)" % (classes[i], "NaN", class_correct[i], class_total[i]))
else:
print("Accuracy of %1s : %.5f %% (%1d / %1d)" % (classes[i], class_correct[i] / class_total[i] * 100, class_correct[i], class_total[i]))
return correct / total