Python torch 模块,sort() 实例源码
我们从Python开源项目中,提取了以下48个代码示例,用于说明如何使用torch.sort()。
def assertIsOrdered(self, order, x, mxx, ixx, task):
SIZE = 4
if order == 'descending':
check_order = lambda a, b: a >= b
elif order == 'ascending':
check_order = lambda a, b: a <= b
else:
error('unknown order "{}", must be "ascending" or "descending"'.format(order))
are_ordered = True
for j, k in product(range(SIZE), range(1, SIZE)):
self.assertTrue(check_order(mxx[j][k-1], mxx[j][k]),
'torch.sort ({}) values unordered for {}'.format(order, task))
seen = set()
indicesCorrect = True
size = x.size(x.dim()-1)
for k in range(size):
seen.clear()
for j in range(size):
self.assertEqual(x[k][ixx[k][j]], mxx[k][j],
'torch.sort ({}) indices wrong for {}'.format(order, task))
seen.add(ixx[k][j])
self.assertEqual(len(seen), size)
def _prune_and_sort_spans(mention_scores: torch.FloatTensor,
num_spans_to_keep: int) -> torch.IntTensor:
"""
The indices of the top-k scoring spans according to span_scores. We return the
indices in their original order, not ordered by score, so that we can rely on
the ordering to consider the previous k spans as antecedents for each span later.
Parameters
----------
mention_scores : ``torch.FloatTensor``, required.
The mention score for every candidate, with shape (batch_size, num_spans, 1).
num_spans_to_keep : ``int``, required.
The number of spans to keep when pruning.
Returns
-------
top_span_indices : ``torch.IntTensor``, required.
The indices of the top-k scoring spans. Has shape (batch_size, num_spans_to_keep).
"""
# Shape: (batch_size, num_spans_to_keep, 1)
_, top_span_indices = mention_scores.topk(num_spans_to_keep, 1)
top_span_indices, _ = torch.sort(top_span_indices, 1)
# Shape: (batch_size, num_spans_to_keep)
top_span_indices = top_span_indices.squeeze(-1)
return top_span_indices
def compute_precision_mapping(pt):
thresh_all = []
prec_all = []
for jj in xrange(1000):
thresh = pt['details']['score'][:, jj]
prec = pt['details']['precision'][:, jj]
ind = np.argsort(thresh); # thresh, ind = torch.sort(thresh)
thresh = thresh[ind];
indexes = np.unique(thresh, return_index=True)[1]
indexes = np.sort(indexes);
thresh = thresh[indexes]
thresh = np.vstack((min(-1000, min(thresh) - 1), thresh[:, np.newaxis], max(1000, max(thresh) + 1)));
prec = prec[ind];
for i in xrange(1, len(prec)):
prec[i] = max(prec[i], prec[i - 1]);
prec = prec[indexes]
prec = np.vstack((prec[0], prec[:, np.newaxis], prec[-1]));
thresh_all.append(thresh)
prec_all.append(prec)
precision_score = {'thresh': thresh_all, "prec": prec_all}
return precision_score
def compute_precision_score_mapping(thresh, prec, score):
ind = np.argsort(thresh); # thresh, ind = torch.sort(thresh)
thresh = thresh[ind];
indexes = np.unique(thresh, return_index=True)[1]
indexes = np.sort(indexes);
thresh = thresh[indexes]
thresh = np.vstack((min(-1000, min(thresh) - 1), thresh[:, np.newaxis], max(1000, max(thresh) + 1)));
prec = prec[ind];
for i in xrange(1, len(prec)):
prec[i] = max(prec[i], prec[i - 1]);
prec = prec[indexes]
prec = np.vstack((prec[0], prec[:, np.newaxis], prec[-1]));
f = interp1d(thresh[:, 0], prec[:, 0])
val = f(score)
return val
def backward(ctx, grad_outputs):
size = grad_outputs.size(1)
segm_sorted = torch.sort(ctx.rev_segm_sorted)[1]
grad_outputs = torch.index_select(grad_outputs, 0, segm_sorted)
offset = [ctx.num_zeros]
def backward_segment(l, n):
segment_grad = grad_outputs.narrow(0, offset[0], n // l)
if l > 1:
segment_grad = _MyMax.backward(ctx.maxes[l], segment_grad)[0].view(n, size)
offset[0] += n // l
return segment_grad
segment_grads = [backward_segment(l, n) for l, n in enumerate(ctx.num_lengths) if n > 0]
grads = torch.cat(segment_grads, 0)
rev_length_sorted = torch.sort(ctx.lengths_sorted)[1]
grads = torch.index_select(grads, 0, rev_length_sorted)
return grads, None, None, None
def prune(self, size):
if size >= self.size():
return self
# Only keep the `size` most frequent entries.
freq = torch.Tensor(
[self.frequencies[i] for i in range(len(self.frequencies))])
_, idx = torch.sort(freq, 0, True)
newDict = Dict()
# Add special entries in all cases.
for i in self.special:
newDict.addSpecial(self.idxToLabel[i])
for i in idx[:size]:
newDict.add(self.idxToLabel[i])
return newDict
# Convert `labels` to indices. Use `unkWord` if not found.
# Optionally insert `bosWord` at the beginning and `eosWord` at the .
def nms(boxes, nms_thresh):
if len(boxes) == 0:
return boxes
det_confs = torch.zeros(len(boxes))
for i in range(len(boxes)):
det_confs[i] = 1-boxes[i][4]
_,sortIds = torch.sort(det_confs)
out_boxes = []
for i in range(len(boxes)):
box_i = boxes[sortIds[i]]
if box_i[4] > 0:
out_boxes.append(box_i)
for j in range(i+1, len(boxes)):
box_j = boxes[sortIds[j]]
if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh:
#print(box_i, box_j, bbox_iou(box_i, box_j, x1y1x2y2=False))
box_j[4] = 0
return out_boxes
def prune(self, size):
"Return a new dictionary with the `size` most frequent entries."
if size >= self.size():
return self
# Only keep the `size` most frequent entries.
freq = torch.Tensor(
[self.frequencies[i] for i in range(len(self.frequencies))])
_, idx = torch.sort(freq, 0, True)
newDict = Dict()
newDict.lower = self.lower
# Add special entries in all cases.
for i in self.special:
newDict.addSpecial(self.idxToLabel[i])
for i in idx[:size]:
newDict.add(self.idxToLabel[i])
return newDict
def forward(ctx, pred, labels, is_positive, ohem_ratio, group_size):
n_sample = pred.size()[0]
assert n_sample == len(labels), "mismatch between sample size and label size"
losses = torch.zeros(n_sample)
slopes = torch.zeros(n_sample)
for i in range(n_sample):
losses[i] = max(0, 1 - is_positive * pred[i, labels[i] - 1])
slopes[i] = -is_positive if losses[i] != 0 else 0
losses = losses.view(-1, group_size).contiguous()
sorted_losses, indices = torch.sort(losses, dim=1, descending=True)
keep_num = int(group_size * ohem_ratio)
loss = torch.zeros(1).cuda()
for i in range(losses.size(0)):
loss += sorted_losses[i, :keep_num].sum()
ctx.loss_ind = indices[:, :keep_num]
ctx.labels = labels
ctx.slopes = slopes
ctx.shape = pred.size()
ctx.group_size = group_size
ctx.num_group = losses.size(0)
return loss
def forward(self, input):
batch_size = input.size(0)
num_channels = input.size(1)
h = input.size(2)
w = input.size(3)
n = h * w # number of regions
kmax = self.get_positive_k(self.kmax, n)
kmin = self.get_positive_k(self.kmin, n)
sorted, indices = input.new(), input.new().long()
torch.sort(input.view(batch_size, num_channels, n), dim=2, descending=True, out=(sorted, indices))
self.indices_max = indices.narrow(2, 0, kmax)
output = sorted.narrow(2, 0, kmax).sum(2).div_(kmax)
if kmin > 0 and self.alpha is not 0:
self.indices_min = indices.narrow(2, n - kmin, kmin)
output.add_(sorted.narrow(2, n - kmin, kmin).sum(2).mul_(self.alpha / kmin)).div_(2)
self.save_for_backward(input)
return output.view(batch_size, num_channels)
def value(self):
"""Returns the model's average precision for each class
Return:
ap (FloatTensor): 1xK tensor, with avg precision for each class k
"""
if self.scores.numel() == 0:
return 0
ap = torch.zeros(self.scores.size(1))
rg = torch.arange(1, self.scores.size(0)).float()
# compute average precision for each class
for k in range(self.scores.size(1)):
# sort scores
scores = self.scores[:, k]
targets = self.targets[:, k]
# compute average precision
ap[k] = AveragePrecisionMeter.average_precision(scores, targets, self.difficult_examples)
return ap
def MAP(ground_label: torch.FloatTensor, predict_label: torch.FloatTensor):
map = 0
map_idx = 0
extracted = {}
for idx_, glab in enumerate(ground_label):
if ground_label[idx_] != 0:
extracted[idx_] = 1
val, key = torch.sort(predict_label, 0, True)
for i, idx_ in enumerate(key):
if idx_ in extracted:
map_idx += 1
map += map_idx / (i + 1)
assert (map_idx != 0)
map = map / map_idx
return map
def MRR(ground_label: torch.FloatTensor, predict_label: torch.FloatTensor):
mrr = 0
map_idx = 0
extracted = {}
for idx_, glab in enumerate(ground_label):
if ground_label[idx_] != 0:
extracted[idx_] = 1
val, key = torch.sort(predict_label, 0, True)
for i, idx_ in enumerate(key):
if idx_ in extracted:
mrr = 1.0 / (i + 1)
break
assert (mrr != 0)
return mrr
def prune(self, size):
if size >= self.size():
return self
# Only keep the `size` most frequent entries.
freq = torch.Tensor(
[self.frequencies[i] for i in range(len(self.frequencies))])
_, idx = torch.sort(freq, 0, True)
newDict = Dict()
newDict.lower = self.lower
# Add special entries in all cases.
for i in self.special:
newDict.addSpecial(self.idxToLabel[i])
for i in idx[:size]:
newDict.add(self.idxToLabel[i])
return newDict
# Convert `labels` to indices. Use `unkWord` if not found.
# Optionally insert `bosWord` at the beginning and `eosWord` at the .
def eval_model(dataset_loader, encoding, model):
model.eval()
print "evaluating model..."
top1 = imSituTensorEvaluation(1, 3, encoding)
top5 = imSituTensorEvaluation(5, 3, encoding)
mx = len(dataset_loader)
for i, (index, input, target) in enumerate(dataset_loader):
print "{}/{} batches\r".format(i+1,mx) ,
input_var = torch.autograd.Variable(input.cuda(), volatile = True)
target_var = torch.autograd.Variable(target.cuda(), volatile = True)
(scores,predictions) = model.forward_max(input_var)
(s_sorted, idx) = torch.sort(scores, 1, True)
top1.add_point(target, predictions.data, idx.data)
top5.add_point(target, predictions.data, idx.data)
print "\ndone."
return (top1, top5)
def nms(boxes, nms_thresh):
if len(boxes) == 0:
return boxes
det_confs = torch.zeros(len(boxes))
for i in range(len(boxes)):
det_confs[i] = 1-boxes[i][4]
_,sortIds = torch.sort(det_confs)
out_boxes = []
for i in range(len(boxes)):
box_i = boxes[sortIds[i]]
if box_i[4] > 0:
out_boxes.append(box_i)
for j in range(i+1, len(boxes)):
box_j = boxes[sortIds[j]]
if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh:
#print(box_i, box_j, bbox_iou(box_i, box_j, x1y1x2y2=False))
box_j[4] = 0
return out_boxes
def forward(self, anchor, positive, negative):
#eucl distance
#dist = torch.sum( (anchor - positive) ** 2 - (anchor - negative) ** 2, dim=1)\
# + self.margin
if self.dist_type == 0:
dist_p = F.pairwise_distance(anchor ,positive)
dist_n = F.pairwise_distance(anchor ,negative)
if self.dist_type == 1:
dist_p = cosine_similarity(anchor, positive)
disp_n = cosine_similarity(anchor, negative)
dist_hinge = torch.clamp(dist_p - dist_n + self.margin, min=0.0)
if self.use_ohem:
v, idx = torch.sort(dist_hinge,descending=True)
loss = torch.mean(v[0:self.ohem_bs])
else:
loss = torch.mean(dist_hinge)
return loss
def value(self):
"""Returns the model's average precision for each class
Return:
ap (FloatTensor): 1xK tensor, with avg precision for each class k
"""
if self.scores.numel() == 0:
return 0
ap = torch.zeros(self.scores.size(1))
rg = torch.arange(1, self.scores.size(0)).float()
# compute average precision for each class
for k in range(self.scores.size(1)):
# sort scores
scores = self.scores[:, k]
targets = self.targets[:, k]
# compute average precision
ap[k] = AveragePrecisionMeter.average_precision(scores, targets, self.difficult_examples)
return ap
def reindex_target(self, target, e):
""" Reindex target by embedding to be coherent. We have to invert
a permutation and add some padding to do it correctly. """
ind = torch.sort(e, 1)[1].squeeze()
# target = new_target(ind) -> new_target = target(ind_inv)
# invert permutation
ind_inv = torch.sort(ind, 1)[1]
mask = (target >= 0).astype(float)
target = target * mask
for example in xrange(self.batch_size):
tar = target[example].astype(int)
ind_inv_n = ind_inv[example].data.cpu().numpy()
tar = ind_inv_n[tar]
tar_aux = tar[np.where(mask[example] == 1)[0]]
tar[:tar_aux.shape[0]] = tar_aux
target[example] = tar
target = target * mask
return target
def eliminate_rows(self, prob_sc, ind, phis):
""" eliminate rows of phis and prob_matrix scale """
length = prob_sc.size()[1]
mask = (prob_sc[:, :, 0] > 0.85).type(dtype)
rang = (Variable(torch.range(0, length - 1).unsqueeze(0)
.expand_as(mask)).
type(dtype))
ind_sc = torch.sort(rang * (1-mask) + length * mask, 1)[1]
# permute prob_sc
m = mask.unsqueeze(2).expand_as(prob_sc)
mm = m.clone()
mm[:, :, 1:] = 0
prob_sc = (torch.gather(prob_sc * (1 - m) + mm, 1,
ind_sc.unsqueeze(2).expand_as(prob_sc)))
# compose permutations
ind = torch.gather(ind, 1, ind_sc)
active = torch.gather(1-mask, 1, ind_sc)
# permute phis
active1 = active.unsqueeze(2).expand_as(phis)
ind1 = ind.unsqueeze(2).expand_as(phis)
active2 = active.unsqueeze(1).expand_as(phis)
ind2 = ind.unsqueeze(1).expand_as(phis)
phis_out = torch.gather(phis, 1, ind1) * active1
phis_out = torch.gather(phis_out, 2, ind2) * active2
return prob_sc, ind, phis_out, active
def plot_norm_points(self, Inputs_N, e, Perms, scales, fig=1):
input = Inputs_N[0][0].data.cpu().numpy()
e = torch.sort(e, 1)[0][0].data.cpu().numpy()
Perms = [perm[0].data.cpu().numpy() for perm in Perms]
plt.figure(fig)
plt.clf()
ee = e.copy()
for i, perm in enumerate(Perms):
plt.subplot(1, len(Perms), i + 1)
colors = cm.rainbow(np.linspace(0, 1, 2 ** (scales - i)))
perm = perm[np.where(perm > 0)[0]] - 1
points = input[perm]
e_scale = ee[perm]
for node in xrange(2 ** (scales - i)):
ind = np.where(e_scale == node)[0]
pts = points[ind]
plt.scatter(pts[:, 0], pts[:, 1], c=colors[node])
ee //= 2
path = os.path.join(self.path, 'visualize_example.png')
plt.savefig(path)
def prepare_batch(xs, lens, gpu=True):
lens, idx = torch.sort(lens, 0, True)
_, ridx = torch.sort(idx, 0)
idx_exp = idx.unsqueeze(0).unsqueeze(-1).expand_as(xs)
xs = torch.gather(xs, 1, idx_exp)
xs = Variable(xs, volatile=True)
lens = Variable(lens, volatile=True)
ridx = Variable(ridx, volatile=True)
if gpu:
xs = xs.cuda()
lens = lens.cuda()
ridx = ridx.cuda()
return xs, lens, ridx
def test_median(self):
for size in (155, 156):
x = torch.rand(size, size)
x0 = x.clone()
res1val, res1ind = torch.median(x)
res2val, res2ind = torch.sort(x)
ind = int(math.floor((size+1)/2) - 1)
self.assertEqual(res2val.select(1, ind), res1val.select(1, 0), 0)
self.assertEqual(res2val.select(1, ind), res1val.select(1, 0), 0)
# Test use of result tensor
res2val = torch.Tensor()
res2ind = torch.LongTensor()
torch.median(res2val, res2ind, x)
self.assertEqual(res2val, res1val, 0)
self.assertEqual(res2ind, res1ind, 0)
# Test non-default dim
res1val, res1ind = torch.median(x, 0)
res2val, res2ind = torch.sort(x, 0)
self.assertEqual(res1val[0], res2val[ind], 0)
self.assertEqual(res1ind[0], res2ind[ind], 0)
# input unchanged
self.assertEqual(x, x0, 0)
def value(self):
# case when number of elements added are 0
if self.scores.shape[0] == 0:
return 0.5
# sorting the arrays
scores, sortind = torch.sort(torch.from_numpy(
self.scores), dim=0, descending=True)
scores = scores.numpy()
sortind = sortind.numpy()
# creating the roc curve
tpr = np.zeros(shape=(scores.size + 1), dtype=np.float64)
fpr = np.zeros(shape=(scores.size + 1), dtype=np.float64)
for i in range(1, scores.size + 1):
if self.targets[sortind[i - 1]] == 1:
tpr[i] = tpr[i - 1] + 1
fpr[i] = fpr[i - 1]
else:
tpr[i] = tpr[i - 1]
fpr[i] = fpr[i - 1] + 1
tpr /= (self.targets.sum() * 1.0)
fpr /= ((self.targets - 1.0).sum() * -1.0)
# calculating area under curve using trapezoidal rule
n = tpr.shape[0]
h = fpr[1:n] - fpr[0:n - 1]
sum_h = np.zeros(fpr.shape)
sum_h[0:n - 1] = h
sum_h[1:n] += h
area = (sum_h * tpr).sum() / 2.0
return (area, tpr, fpr)
def compute_precision_score_mapping_torch(thresh, prec, score):
thresh, ind_thresh = torch.sort(torch.from_numpy(thresh), 0, descending=False)
prec, ind_prec = torch.sort(torch.from_numpy(prec), 0, descending=False)
val = None
return val
def forward(self, inputs, lengths=None, start_state=None):
if not self._start_state_given:
batch_size = inputs.size(0)
start_hidden = self._lstm_start_hidden.unsqueeze(1).expand(2, batch_size, self._size).contiguous()
start_state = self._lstm_start_state.unsqueeze(1).expand(2, batch_size, self._size).contiguous()
start_state = (start_hidden, start_state)
if lengths is not None:
new_lengths, indices = torch.sort(lengths, dim=0, descending=True)
inputs = torch.index_select(inputs, 0, indices)
if self._start_state_given:
start_state = (torch.index_select(start_state[0], 1, indices),
torch.index_select(start_state[1], 1, indices))
new_lengths = [l.data[0] for l in new_lengths]
inputs = nn.utils.rnn.pack_padded_sequence(inputs, new_lengths, batch_first=True)
output, (h_n, c_n) = self._bilstm(inputs, start_state)
if lengths is not None:
output = nn.utils.rnn.pad_packed_sequence(output, batch_first=True)[0]
_, back_indices = torch.sort(indices, dim=0)
output = torch.index_select(output, 0, back_indices)
h_n = torch.index_select(h_n, 1, back_indices)
c_n = torch.index_select(c_n, 1, back_indices)
return output, (h_n, c_n)
def segment_max(inputs, segment_ids, num_segments=None, default=0.0):
# highly optimized to decrease the amount of actual invocation of pytorch calls
# assumes that most segments have 1 or 0 elements
segment_ids, indices = torch.sort(segment_ids)
inputs = torch.index_select(inputs, 0, indices)
output = SegmentMax.apply(inputs, segment_ids, num_segments, default)
return output
def prepare_batch(self, x, x_lens):
x_lens, x_idx = torch.sort(x_lens, 0, True)
_, x_ridx = torch.sort(x_idx)
x = x[x_idx]
x_var = Variable(x, volatile=True)
x_lens = Variable(x_lens, volatile=True)
x_ridx = Variable(x_ridx.long(), volatile=True)
if self.is_cuda:
x_var = x_var.cuda()
x_lens = x_lens.cuda()
x_ridx = x_ridx.cuda()
return x_var, x_lens, x_ridx
def prepare_batch(self, batch_data, volatile=False):
x, x_lens, ys, ys_lens = batch_data
batch_dim = 0 if self.batch_first else 1
context_dim = 1 if self.batch_first else 0
x_lens, x_idx = torch.sort(x_lens, 0, True)
_, x_ridx = torch.sort(x_idx)
ys_lens, ys_idx = torch.sort(ys_lens, batch_dim, True)
x_ridx_exp = x_ridx.unsqueeze(context_dim).expand_as(ys_idx)
xys_idx = torch.gather(x_ridx_exp, batch_dim, ys_idx)
x = x[x_idx]
ys = torch.gather(ys, batch_dim, ys_idx.unsqueeze(-1).expand_as(ys))
x = Variable(x, volatile=volatile)
x_lens = Variable(x_lens, volatile=volatile)
ys_i = Variable(ys[..., :-1], volatile=volatile).contiguous()
ys_t = Variable(ys[..., 1:], volatile=volatile).contiguous()
ys_lens = Variable(ys_lens - 1, volatile=volatile)
xys_idx = Variable(xys_idx, volatile=volatile)
if self.is_cuda:
x = x.cuda(async=True)
x_lens = x_lens.cuda(async=True)
ys_i = ys_i.cuda(async=True)
ys_t = ys_t.cuda(async=True)
ys_lens = ys_lens.cuda(async=True)
xys_idx = xys_idx.cuda(async=True)
return x, x_lens, ys_i, ys_t, ys_lens, xys_idx
def advance(self, word_lk):
"Update the status and check for finished or not."
num_words = word_lk.size(1)
# Sum the previous scores.
if len(self.prev_ks) > 0:
beam_lk = word_lk + self.scores.unsqueeze(1).expand_as(word_lk)
else:
beam_lk = word_lk[0]
flat_beam_lk = beam_lk.view(-1)
best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 1st sort
best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 2nd sort
self.all_scores.append(self.scores)
self.scores = best_scores
# bestScoresId is flattened beam x word array, so calculate which
# word and beam each score came from
prev_k = best_scores_id / num_words
self.prev_ks.append(prev_k)
self.next_ys.append(best_scores_id - prev_k * num_words)
# End condition is when top-of-beam is EOS.
if self.next_ys[-1][0] == Constants.EOS:
self.done = True
self.all_scores.append(self.scores)
return self.done
def sort_scores(self):
"Sort the scores."
return torch.sort(self.scores, 0, True)
def sample_from_probs(probs, top_n=10):
"""
truncated weighted random choice.
"""
_, indices = torch.sort(probs)
# set probabilities after top_n to 0
probs[indices.data[:-top_n]] = 0
sampled_index = torch.multinomial(probs, 1)
return sampled_index
def __init__(self, tensor, lengths):
self.original_lengths = lengths
sorted_lengths_tensor, self.sorted_idx = torch.sort(torch.LongTensor(lengths), dim=0, descending=True)
self.tensor = tensor.index_select(dim=0, index=self.sorted_idx)
self.lengths = list(sorted_lengths_tensor)
self.original_idx = torch.LongTensor(sort_idx(self.sorted_idx))
self.mask_original = torch.zeros(*self.tensor.size())
for i, length in enumerate(self.original_lengths):
self.mask_original[i][:length].fill_(1)
def sort_batch(data, seq_len):
sorted_seq_len, sorted_idx = torch.sort(seq_len, dim=0, descending=True)
sorted_data = data[sorted_idx.data]
_, reverse_idx = torch.sort(sorted_idx, dim=0, descending=False)
return sorted_data, sorted_seq_len.cuda(), reverse_idx.cuda()
def _prepare_corpora(self, corpora, bpe_encoder, src_vocab, trg_vocab):
src, trg = [], []
sizes = []
count, ignored = 0, 0
for corpus in corpora:
with corpus.reader([self._source_lang, self._target_lang]) as reader:
for source, target in reader:
src_words = bpe_encoder.encode_line(source, is_source=True)
trg_words = bpe_encoder.encode_line(target, is_source=False)
if len(src_words) > 0 and len(trg_words) > 0:
src.append(src_vocab.convertToIdx(src_words,
onmt.Constants.UNK_WORD))
trg.append(trg_vocab.convertToIdx(trg_words,
onmt.Constants.UNK_WORD,
onmt.Constants.BOS_WORD,
onmt.Constants.EOS_WORD))
sizes.append(len(src_words))
else:
ignored += 1
count += 1
if count % 100000 == 0:
self._logger.info(' %d sentences prepared' % count)
self._logger.info('Shuffling sentences')
perm = torch.randperm(len(src))
src = [src[idx] for idx in perm]
trg = [trg[idx] for idx in perm]
sizes = [sizes[idx] for idx in perm]
self._logger.info('Sorting sentences by size')
_, perm = torch.sort(torch.Tensor(sizes))
src = [src[idx] for idx in perm]
trg = [trg[idx] for idx in perm]
self._logger.info('Prepared %d sentences (%d ignored due to length == 0)' % (len(src), ignored))
return src, trg
def sortBest(self):
return torch.sort(self.scores, 0, True)
# Get the score of the best in the beam.
def assertIsOrdered(self, order, x, mxx, ixx, task):
SIZE = 4
if order == 'descending':
def check_order(a, b):
return a >= b
elif order == 'ascending':
def check_order(a, b):
return a <= b
else:
error('unknown order "{}", must be "ascending" or "descending"'.format(order))
are_ordered = True
for j, k in product(range(SIZE), range(1, SIZE)):
self.assertTrue(check_order(mxx[j][k - 1], mxx[j][k]),
'torch.sort ({}) values unordered for {}'.format(order, task))
seen = set()
indicesCorrect = True
size = x.size(x.dim() - 1)
for k in range(size):
seen.clear()
for j in range(size):
self.assertEqual(x[k][ixx[k][j]], mxx[k][j],
'torch.sort ({}) indices wrong for {}'.format(order, task))
seen.add(ixx[k][j])
self.assertEqual(len(seen), size)
def test_median(self):
for size in (155, 156):
x = torch.rand(size, size)
x0 = x.clone()
res1val, res1ind = torch.median(x)
res2val, res2ind = torch.sort(x)
ind = int(math.floor((size + 1) / 2) - 1)
self.assertEqual(res2val.select(1, ind), res1val.select(1, 0), 0)
self.assertEqual(res2val.select(1, ind), res1val.select(1, 0), 0)
# Test use of result tensor
res2val = torch.Tensor()
res2ind = torch.LongTensor()
torch.median(x, out=(res2val, res2ind))
self.assertEqual(res2val, res1val, 0)
self.assertEqual(res2ind, res1ind, 0)
# Test non-default dim
res1val, res1ind = torch.median(x, 0)
res2val, res2ind = torch.sort(x, 0)
self.assertEqual(res1val[0], res2val[ind], 0)
self.assertEqual(res1ind[0], res2ind[ind], 0)
# input unchanged
self.assertEqual(x, x0, 0)
def sort_best(self):
"""Sort the beam."""
return torch.sort(self.scores, 0, True)
# Get the score of the best in the beam.
def assertIsOrdered(self, order, x, mxx, ixx, task):
SIZE = 4
if order == 'descending':
def check_order(a, b):
return a >= b
elif order == 'ascending':
def check_order(a, b):
return a <= b
else:
error('unknown order "{}", must be "ascending" or "descending"'.format(order))
are_ordered = True
for j, k in product(range(SIZE), range(1, SIZE)):
self.assertTrue(check_order(mxx[j][k - 1], mxx[j][k]),
'torch.sort ({}) values unordered for {}'.format(order, task))
seen = set()
indicesCorrect = True
size = x.size(x.dim() - 1)
for k in range(size):
seen.clear()
for j in range(size):
self.assertEqual(x[k][ixx[k][j]], mxx[k][j],
'torch.sort ({}) indices wrong for {}'.format(order, task))
seen.add(ixx[k][j])
self.assertEqual(len(seen), size)
def test_median(self):
for size in (155, 156):
x = torch.rand(size, size)
x0 = x.clone()
res1val, res1ind = torch.median(x, keepdim=False)
res2val, res2ind = torch.sort(x)
ind = int(math.floor((size + 1) / 2) - 1)
self.assertEqual(res2val.select(1, ind), res1val, 0)
self.assertEqual(res2val.select(1, ind), res1val, 0)
# Test use of result tensor
res2val = torch.Tensor()
res2ind = torch.LongTensor()
torch.median(x, keepdim=False, out=(res2val, res2ind))
self.assertEqual(res2val, res1val, 0)
self.assertEqual(res2ind, res1ind, 0)
# Test non-default dim
res1val, res1ind = torch.median(x, 0, keepdim=False)
res2val, res2ind = torch.sort(x, 0)
self.assertEqual(res1val, res2val[ind], 0)
self.assertEqual(res1ind, res2ind[ind], 0)
# input unchanged
self.assertEqual(x, x0, 0)
def eval_model(dataset, dataset_loader, standard_encoding, model_encoding, model, trustedEncoder = False, image_group = {}):
model.eval()
print "evaluating model..."
if trustedEncoder == False:
print "not using trusted encoder. This may take signficantly longer as predictions are converted to other encoding."
mx = len(dataset_loader)
batches = []
top1 = imSituTensorEvaluation(1, 3, image_group)
top5 = imSituTensorEvaluation(5, 3, image_group)
for i, (indexes, input, target) in enumerate(dataset_loader):
if True or i % 10 == 0: print "batch {} out of {}\r".format(i+1,mx),
input_var = torch.autograd.Variable(input.cuda(), volatile = True)
#target_var = torch.autograd.Variable(target.cuda(), volatile = True)
(scores,predictions) = model.forward_max(input_var)
(s_sorted, idx) = torch.sort(scores, 1, True)
if not trustedEncoder:
predictions = standard_encoding.to_tensor(model_encoding.to_situation(predictions), False, False)
predictions = predictions.view(target.size()[0], standard_encoding.n_verbs(), -1)
else:
predictions = predictions.data
#(s_sorted, idx) = torch.sort(scores, 1, True)
top1.add_point(target, predictions, idx.data, dataset.index_image(indexes))
top5.add_point(target, predictions, idx.data, dataset.index_image(indexes))
return (top1, top5)
#assumes the predictions are grouped by image, and sorted
def predict_human_readable (dataset_loader, simple_dataset, model, outdir, top_k):
model.eval()
print "predicting..."
mx = len(dataset_loader)
for i, (input, index) in enumerate(dataset_loader):
print "{}/{} batches".format(i+1,mx)
input_var = torch.autograd.Variable(input.cuda(), volatile = True)
(scores,predictions) = model.forward_max(input_var)
#(s_sorted, idx) = torch.sort(scores, 1, True)
human = encoder.to_situation(predictions)
(b,p,d) = predictions.size()
for _b in range(0,b):
items = []
offset = _b *p
for _p in range(0, p):
items.append(human[offset + _p])
items[-1]["score"] = scores.data[_b][_p]
items = sorted(items, key = lambda x: -x["score"])[:top_k]
name = simple_dataset.images[index[_b][0]].split(".")[:-1]
name.append("predictions")
outfile = outdir + ".".join(name)
json.dump(items,open(outfile,"w"))
def assertIsOrdered(self, order, x, mxx, ixx, task):
SIZE = 4
if order == 'descending':
def check_order(a, b):
return a >= b
elif order == 'ascending':
def check_order(a, b):
return a <= b
else:
error('unknown order "{}", must be "ascending" or "descending"'.format(order))
are_ordered = True
for j, k in product(range(SIZE), range(1, SIZE)):
self.assertTrue(check_order(mxx[j][k - 1], mxx[j][k]),
'torch.sort ({}) values unordered for {}'.format(order, task))
seen = set()
indicesCorrect = True
size = x.size(x.dim() - 1)
for k in range(size):
seen.clear()
for j in range(size):
self.assertEqual(x[k][ixx[k][j]], mxx[k][j],
'torch.sort ({}) indices wrong for {}'.format(order, task))
seen.add(ixx[k][j])
self.assertEqual(len(seen), size)
def test_median(self):
for size in (155, 156):
x = torch.rand(size, size)
x0 = x.clone()
nelem = x.nelement()
res1val = torch.median(x)
res2val, _ = torch.sort(x.view(nelem))
ind = int(math.floor((nelem + 1) / 2) - 1)
self.assertEqual(res2val[ind], res1val, 0)
res1val, res1ind = torch.median(x, dim=1, keepdim=False)
res2val, res2ind = torch.sort(x)
ind = int(math.floor((size + 1) / 2) - 1)
self.assertEqual(res2val.select(1, ind), res1val, 0)
self.assertEqual(res2val.select(1, ind), res1val, 0)
# Test use of result tensor
res2val = torch.Tensor()
res2ind = torch.LongTensor()
torch.median(x, keepdim=False, out=(res2val, res2ind))
self.assertEqual(res2val, res1val, 0)
self.assertEqual(res2ind, res1ind, 0)
# Test non-default dim
res1val, res1ind = torch.median(x, 0, keepdim=False)
res2val, res2ind = torch.sort(x, 0)
self.assertEqual(res1val, res2val[ind], 0)
self.assertEqual(res1ind, res2ind[ind], 0)
# input unchanged
self.assertEqual(x, x0, 0)
def sort_pack_tensors(ft, tar, lens):
_, inds = torch.sort(lens, dim=0, descending=True)
ft, tar, lens = ft[inds], tar[inds], list(lens[inds])
ft_packed = rnn_utils.pack_padded_sequence(ft, lens, batch_first=True)
tar_packed = rnn_utils.pack_padded_sequence(tar, lens, batch_first=True)
return ft_packed, tar_packed
def append_zeros_all(fls1, fls2, mode):
lens1, lens2 = [], []
for fl1, fl2 in zip(fls1, fls2):
if mode == 'audio':
lens1.append(fl1.shape[0]), lens2.append(fl2.shape[0])
elif mode == 'specs':
lens1.append(fl1.shape[0]), lens2.append(fl2.shape[0])
else:
raise ValueError('Whaaat?')
inds1, lens1 = list(np.flip(np.argsort(lens1),0)), np.flip(np.sort(lens1),0)
inds2, lens2 = list(np.flip(np.argsort(lens2),0)), np.flip(np.sort(lens2),0)
fls1, fls2 = np.array(fls1)[inds1], np.array(fls2)[inds2]
maxlen = max([max(lens1), max(lens2)])
mixes = []
for i, (fl1, fl2) in enumerate(zip(fls1, fls2)):
if mode == 'audio':
fls1[i] = np.pad(fl1, (0, maxlen - fl1.shape[0]), 'constant')
fls2[i] = np.pad(fl2, (0, maxlen - fl2.shape[0]), 'constant')
mixes.append(fls1[i] + fls2[i])
elif mode == 'specs':
fls1[i] = np.pad(fl1, ((0, maxlen - fl1.shape[0]), (0, 0)), 'constant')
fls2[i] = np.pad(fl2, ((0, maxlen - fl2.shape[0]), (0, 0)), 'constant')
else:
raise ValueError('Whaaat?')
return list(fls1), list(fls2), mixes, lens1, lens2