Python numpy 模块,sort() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.sort()。
def test_partition_cdtype(self):
d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
('Lancelot', 1.9, 38)],
dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])
tgt = np.sort(d, order=['age', 'height'])
assert_array_equal(np.partition(d, range(d.size),
order=['age', 'height']),
tgt)
assert_array_equal(d[np.argpartition(d, range(d.size),
order=['age', 'height'])],
tgt)
for k in range(d.size):
assert_equal(np.partition(d, k, order=['age', 'height'])[k],
tgt[k])
assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
tgt[k])
d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
tgt = np.sort(d)
assert_array_equal(np.partition(d, range(d.size)), tgt)
for k in range(d.size):
assert_equal(np.partition(d, k)[k], tgt[k])
assert_equal(d[np.argpartition(d, k)][k], tgt[k])
def plot_histogram_metric(chart, sample_properties, sample_data, **kwargs):
""" Plot a HistogramMetric from the summary json """
summary_data = sample_data.summary
items = summary_data.get(kwargs['metric_name'], {}).items()
if len(items) < 1:
return None
ordering = kwargs.get('order_by', shared_constants.HISTOGRAM_METRIC_DEFAULT_ORDERING)
if ordering == shared_constants.HISTOGRAM_METRIC_ORDER_INTEGER_BIN:
items.sort(key=lambda x: convert_to_int_gracefully(x[0]))
elif ordering == shared_constants.HISTOGRAM_METRIC_ORDER_DECREASING_FREQUENCY:
items.sort(key=lambda x: -convert_to_int_gracefully(x[1]))
elif ordering == shared_constants.HISTOGRAM_METRIC_ORDER_DECREASING_PROPORTION:
items.sort(key=lambda x: -convert_to_float_gracefully(x[1]))
x, y = zip(*items)
chart['data'][0].update({'x': x, 'y': y})
return chart
def preprocess_matrix(matrix, num_bcs=None, use_bcs=None, use_genes=None, force_cells=None):
if force_cells is not None:
bc_counts = matrix.get_reads_per_bc()
bc_indices, _, _ = cr_stats.filter_cellular_barcodes_fixed_cutoff(bc_counts, force_cells)
matrix = matrix.select_barcodes(bc_indices)
elif use_bcs is not None:
bc_seqs = cr_utils.load_csv_rownames(use_bcs)
bc_indices = matrix.bcs_to_ints(bc_seqs)
matrix = matrix.select_barcodes(bc_indices)
elif num_bcs is not None and num_bcs < matrix.bcs_dim:
bc_indices = np.sort(np.random.choice(np.arange(matrix.bcs_dim), size=num_bcs, replace=False))
matrix = matrix.select_barcodes(bc_indices)
if use_genes is not None:
gene_ids = cr_utils.load_csv_rownames(use_genes)
gene_indices = matrix.gene_ids_to_ints(gene_ids)
matrix = matrix.select_genes(gene_indices)
matrix, _, _ = matrix.select_nonzero_axes()
return matrix
def create_training_test_sets(self):
# training set
scale = self.data_interval_right - self.data_interval_left
train_x = sp.stats.truncnorm.rvs(-2, 2, scale=0.25 * scale, size=self.data_size).astype(np.float32)
train_x = np.sort(train_x)
train_y = self.true_f(train_x) + 0.2 * np.random.randn(self.data_size)
self.train_x = [train_x.reshape((train_x.shape[0], 1))]
self.train_y = [train_y.reshape((train_y.shape[0], 1))]
# test set
# scale = self.test_data_interval_right - self.test_data_interval_left
# test_x = sp.stats.truncnorm.rvs(-2, 2, scale=0.25 * scale, size=self.test_data_size).astype(np.float32)
# test_x = np.sort(test_x)
# test_y = self.true_f(test_x)
self.test_x = np.arange(self.view_xrange[0], self.view_xrange[1], 0.01, dtype=np.float32)
self.test_y = self.true_f(self.test_x)
self.test_x = [self.test_x.reshape((self.test_x.shape[0], 1))]
self.test_y = [self.test_y.reshape((self.test_y.shape[0], 1))]
def create_training_test_sets(self):
# training set
train_x = np.random.uniform(self.data_interval_left, self.data_interval_right, size=self.data_size)
train_x = np.sort(train_x)
train_y = self.true_f(train_x) + 3. * np.random.randn(self.data_size)
self.train_x = [train_x.reshape((train_x.shape[0], 1))]
self.train_y = [train_y.reshape((train_y.shape[0], 1))]
# test set for visualisation
self.test_x = np.arange(self.view_xrange[0], self.view_xrange[1], 0.01, dtype=np.float32)
self.test_x = np.reshape(self.test_x, (self.test_x.shape[0], 1))
self.test_y = self.true_f(self.test_x)
self.test_y = np.reshape(self.test_y, (self.test_y.shape[0], 1))
self.test_x = [self.test_x]
self.test_y = [self.test_y]
def iter_keys_values(self, keys, inds=None, verbose=False):
for key in keys:
if key not in self.keys_:
raise RuntimeError('Key %s not found in dataset. keys: %s' % (key, self.keys_))
idx, ii = 0, 0
total_chunks = len(self.meta_file_.chunks)
inds = np.sort(inds) if inds is not None else None
for chunk_idx, chunk in enumerate(progressbar(self.meta_file_.chunks, size=total_chunks, verbose=verbose)):
data = AttrDict.load(self.get_chunk_filename(chunk_idx))
# if inds is None:
items = (data[key] for key in keys)
for item in izip(*items):
yield item
# else:
# for i, item in enumerate(data[key]):
# if inds[ii] == idx + i:
# yield item
# ii += 1
# if ii >= len(inds): break
# idx += len(data[key])
def get_best_split(X, y):
""" Obtain the best splitting point and resulting children for the data set X, y
Args:
X, y (numpy.ndarray, data set)
criterion (gini or entropy)
Returns:
dict {index: index of the feature, value: feature value, children: left and right children}
"""
best_index, best_value, best_score, children = None, None, 1e10, None
for index in range(len(X[0])):
for value in np.sort(np.unique(X[:, index])):
groups = split_node(X, y, index, value)
impurity = weighted_mse([groups[0][1], groups[1][1]])
if impurity < best_score:
best_index, best_value, best_score, children = index, value, impurity, groups
return {'index': best_index, 'value': best_value, 'children': children}
def get_best_split(X, y, criterion):
""" Obtain the best splitting point and resulting children for the data set X, y
Args:
X, y (numpy.ndarray, data set)
criterion (gini or entropy)
Returns:
dict {index: index of the feature, value: feature value, children: left and right children}
"""
best_index, best_value, best_score, children = None, None, 1, None
for index in range(len(X[0])):
for value in np.sort(np.unique(X[:, index])):
groups = split_node(X, y, index, value)
impurity = weighted_impurity([groups[0][1], groups[1][1]], criterion)
if impurity < best_score:
best_index, best_value, best_score, children = index, value, impurity, groups
return {'index': best_index, 'value': best_value, 'children': children}
def update_image_property(self, property_name, property_data, erase_property=False):
if isinstance(property_data,list) or isinstance(property_data,np.ndarray):
assert len(property_data) == len(self._labels)
property_keys = self._labels
elif isinstance(property_data,dict) or isinstance(property_data,array_dict):
property_keys = np.sort(property_data.keys())
property_data = [property_data[l] for l in property_keys]
if property_name in self._properties.keys():
if erase_property:
self._properties[property_name] = array_dict(property_data,keys=property_keys)
else:
for l,v in zip(property_keys,property_data):
self._properties[property_name][l] = v
else:
print "Creating property ",property_name," on image"
self._properties[property_name] = array_dict(property_data,keys=property_keys)
def testBsearch(self, dtype=dtype):
testarray = range(1,101)
random.shuffle(testarray)
a = numpy.array(testarray[:50], dtype)
b = numpy.array([0] + testarray[50:] + range(101,103), dtype)
a = numpy.sort(a)
self.assertEqual(mapped_struct.bsearch(a, 0), 0)
self.assertEqual(mapped_struct.bsearch(a, 101), len(a))
self.assertEqual(mapped_struct.bsearch(a, 102), len(a))
for x in a:
ix = mapped_struct.bsearch(a, x)
self.assertLess(ix, len(a))
self.assertEqual(a[ix], x)
self.assertTrue(mapped_struct.sorted_contains(a, x))
for x in b:
ix = mapped_struct.bsearch(a, x)
self.assertTrue(ix >= len(a) or a[ix] != x)
self.assertFalse(mapped_struct.sorted_contains(a, x))
def get_score_bounds_from_range(Z_min, Z_max, rho_lb, rho_ub, L0_max = None):
"global variables: L0_reg_ind"
edge_values = np.vstack([Z_min * rho_lb,
Z_max * rho_lb,
Z_min * rho_ub,
Z_max * rho_ub])
if L0_max is None or L0_max == Z_min.shape[0]:
s_min = np.sum(np.min(edge_values, axis = 0))
s_max = np.sum(np.max(edge_values, axis = 0))
else:
min_values = np.min(edge_values, axis = 0)
s_min_reg = np.sum(np.sort(min_values[L0_reg_ind])[0:L0_max])
s_min_no_reg = np.sum(min_values[~L0_reg_ind])
s_min = s_min_reg + s_min_no_reg
max_values = np.max(edge_values, axis = 0)
s_max_reg = np.sum(-np.sort(-max_values[L0_reg_ind])[0:L0_max])
s_max_no_reg = np.sum(max_values[~L0_reg_ind])
s_max = s_max_reg + s_max_no_reg
return s_min, s_max
#setup weights
def get_score_bounds(Z_min, Z_max, rho_lb, rho_ub, L0_reg_ind = None, L0_max = None):
edge_values = np.vstack([Z_min * rho_lb,
Z_max * rho_lb,
Z_min * rho_ub,
Z_max * rho_ub])
if (L0_max is None) or (L0_reg_ind is None) or (L0_max == Z_min.shape[0]):
s_min = np.sum(np.min(edge_values, axis=0))
s_max = np.sum(np.max(edge_values, axis=0))
else:
min_values = np.min(edge_values, axis=0)
s_min_reg = np.sum(np.sort(min_values[L0_reg_ind])[0:L0_max])
s_min_no_reg = np.sum(min_values[~L0_reg_ind])
s_min = s_min_reg + s_min_no_reg
max_values = np.max(edge_values, axis=0)
s_max_reg = np.sum(-np.sort(-max_values[L0_reg_ind])[0:L0_max])
s_max_no_reg = np.sum(max_values[~L0_reg_ind])
s_max = s_max_reg + s_max_no_reg
return s_min, s_max
def round_solution_pool(pool, constraints):
pool.distinct().sort()
P = pool.P
L0_reg_ind = np.isnan(constraints['coef_set'].C_0j)
L0_max = constraints['L0_max']
rounded_pool = SolutionPool(P)
for solution in pool.solutions:
# sort from largest to smallest coefficients
feature_order = np.argsort([-abs(x) for x in solution])
rounded_solution = np.zeros(shape=(1, P))
l0_norm_count = 0
for k in range(0, P):
j = feature_order[k]
if not L0_reg_ind[j]:
rounded_solution[0, j] = np.round(solution[j], 0)
elif l0_norm_count < L0_max:
rounded_solution[0, j] = np.round(solution[j], 0)
l0_norm_count += L0_reg_ind[j]
rounded_pool.add(objvals=np.nan, solutions=rounded_solution)
rounded_pool.distinct().sort()
return rounded_pool
def top_uncer_items(adata, pp, n, flag = None):
"""
Return top a flag list of top n uncertain item that not flag
"""
uncertain = np.abs(pp[:,0] - 0.5)
if flag != None:
addition = np.asarray(flag, dtype = int)*10# flagged items are not consider, increase their value
uncertain = uncertain + addition
if len(uncertain) <= n:
return np.nonzero(uncertain <= 10000000)[0]
sorted_uncertain = np.sort(uncertain)
thresh = sorted_uncertain[n]
return np.nonzero(uncertain <= thresh)[0]
def items_for_expert(adata, pp, n, flag):
"""
take n items for expert to consider
"""
combined_prob = 0.8*np.asarray(adata.taken_crowd_prob) + 0.2*pp[:,1]
uncertain = np.abs(combined_prob - 0.5)
if flag != None:
addition = np.asarray(flag, dtype = int)*10# flagged items are not consider, increase their value
uncertain = uncertain + addition
if len(uncertain) <= n:
return np.nonzero(uncertain <= 10000000)[0]
sorted_uncertain = np.sort(uncertain)
thresh = sorted_uncertain[n]
return np.nonzero(uncertain <= thresh)[0]
def flush():
prints = []
for name, vals in _since_last_flush.items():
prints.append("{}\t{}".format(name, np.mean(list(vals.values()))))
_since_beginning[name].update(vals)
x_vals = np.sort(list(_since_beginning[name].keys()))
y_vals = [_since_beginning[name][x] for x in x_vals]
plt.clf()
plt.plot(x_vals, y_vals)
plt.xlabel('iteration')
plt.ylabel(name)
plt.savefig('generated/'+name.replace(' ', '_')+'.jpg')
print("iter {}\t{}".format(_iter[0], "\t".join(prints)))
_since_last_flush.clear()
with open('log.pkl', 'wb') as f:
pickle.dump(dict(_since_beginning), f, 4)
def plot_feature_importances(feature_names, feature_importances, N=30):
importances = list(zip(feature_names, list(feature_importances)))
importances = pd.DataFrame(importances, columns=["Feature", "Importance"])
importances = importances.set_index("Feature")
# Sort by the absolute value of the importance of the feature
importances["sort"] = abs(importances["Importance"])
importances = importances.sort(columns="sort", ascending=False).drop("sort", axis=1)
importances = importances[0:N]
# Show the most important positive feature at the top of the graph
importances = importances.sort(columns="Importance", ascending=True)
with plt.style.context(('ggplot')):
fig, ax = plt.subplots(figsize=(16,12))
ax.tick_params(labelsize=16)
importances.plot(kind="barh", legend=False, ax=ax)
ax.set_frame_on(False)
ax.set_xlabel("Relative importance", fontsize=20)
ax.set_ylabel("Feature name", fontsize=20)
plt.tight_layout()
plt.title("Most important features for attack", fontsize=20).set_position([.5, 0.99])
return fig
def test_swap_random(data, seed):
a, b = data
np.random.seed(seed)
a_orig, b_orig = original.swap_random(a, b)
dcst_private._seed_numba(seed)
a_out, b_out = dcst.swap_random(a, b)
assert len(a_out) == len(b_out) == len(a) == len(b)
# Each entry should be present same number of times
ab = np.sort(np.concatenate((a, b)))
ab_out = np.sort(np.concatenate((a_out, b_out)))
assert np.allclose(ab, ab_out, atol=atol, equal_nan=True)
# Check for swaps matching
for i in range(len(a)):
ab = np.array([a[i], b[i]])
ab_out = np.array([a_out[i], b_out[i]])
assert ab[0] in ab_out
assert ab[1] in ab_out
def _hpd_interval(self, x, width):
"""
Code adapted from pymc3.stats.calc_min_interval:
https://github.com/pymc-devs/pymc3/blob/master/pymc3/stats.py
"""
x = np.sort(x)
n = len(x)
interval_idx_inc = int(np.floor(width * n))
n_intervals = n - interval_idx_inc
interval_width = x[interval_idx_inc:] - x[:n_intervals]
if len(interval_width) == 0:
raise ValueError('Too few elements for interval calculation')
min_idx = np.argmin(interval_width)
hdi_min = x[min_idx]
hdi_max = x[min_idx + interval_idx_inc]
index = ['hpd{}_{}'.format(width, x) for x in ['lower', 'upper']]
return pd.Series([hdi_min, hdi_max], index=index)
def _random_curve(self, nr_curves):
curves = []
for i in range(nr_curves-1):
curve = [(0,0)]
# exlcude the 0 and 255
_x = numpy.sort(random.sample(range(1, 255), 32))
_y = numpy.sort(random.sample(range(1, 255), 32))
#_x = numpy.sort(numpy.random.randint(1, 255, 2))
#_y = numpy.sort(numpy.random.randint(1, 255, 2))
# _x[0] and _x[1] can't be the same
curve.append((_x[0], _y[0]))
curve.append((_x[1], _y[1]))
curve.append((255,255))
curves.append(curve)
curves.append([(255,255)])
return curves
def _random_curve(self, nr_curves):
curves = []
for i in range(nr_curves-1):
curve = [(0,0)]
# exlcude the 0 and 255
_x = numpy.sort(random.sample(range(1, 255), 32))
_y = numpy.sort(random.sample(range(1, 255), 32))
#_x = numpy.sort(numpy.random.randint(1, 255, 2))
#_y = numpy.sort(numpy.random.randint(1, 255, 2))
# _x[0] and _x[1] can't be the same
curve.append((_x[0], _y[0]))
curve.append((_x[1], _y[1]))
curve.append((255,255))
curves.append(curve)
curves.append([(255,255)])
return curves
def test_randomized_svd(rows, cols, rank, dtype, transpose, n_iter, target_gen,
rgen):
rank = min(rows, cols) - 2 if rank is 'fullrank' else rank
A = target_gen(rows, cols, rank=rank, randstate=rgen, dtype=dtype)
U_ref, s_ref, V_ref = utils.truncated_svd(A, k=rank)
U, s, V = em.randomized_svd(A, rank, transpose=transpose, randstate=rgen,
n_iter=n_iter)
error_U = np.abs(U.conj().T.dot(U_ref)) - np.eye(rank)
assert_allclose(np.linalg.norm(error_U), 0, atol=1e-3)
error_V = np.abs(V.dot(V_ref.conj().T)) - np.eye(rank)
assert_allclose(np.linalg.norm(error_V), 0, atol=1e-3)
assert_allclose(s.ravel() - s_ref, 0, atol=1e-3)
# Check that singular values are returned in descending order
assert_array_equal(s, np.sort(s)[::-1])
def ecdf(x):
''' Computes the empirical cumulative distribution function of a dataset
Args:
x (`iterable`): Data.
Returns:
tuple containing:
`numpy.ndarray`: sorted data.
`numpy.ndarray`: cumulative distribution function of the data.
'''
xs = np.sort(x)
ys = np.arange(1, len(xs) + 1) / float(len(xs))
return xs, ys
def sort_xy(x, y):
''' Sorts a pair of x and y iterables, returning arrays in order of
ascending x.
Args:
x (`iterable`): a list, numpy ndarray, or other iterable to sort by.
y (`iterable`): a list, numpy ndarray, or other iterable that is y=f(x).
Returns:
tuple containing:
`iterable`: an iterable containing the sorted x elements.
`iterable`: an iterable containing the sorted y elements.
'''
# zip x and y, sort by the 0th element (x) of each tuple in zip()
_ = sorted(zip(x, y), key=itemgetter(0))
sorted_x, sorted_y = zip(*_)
return sorted_x, sorted_y
def compute_group(cls, data, scales, **params):
data = data.sort_values('x')
n = params['n']
x_unique = data['x'].unique()
if len(x_unique) < 2:
# Not enough data to fit
return pd.DataFrame()
if data['x'].dtype.kind == 'i':
if params['fullrange']:
xseq = scales.x.dimension()
else:
xseq = np.sort(x_unique)
else:
if params['fullrange']:
rangee = scales.x.dimension()
else:
rangee = [data['x'].min(), data['x'].max()]
xseq = np.linspace(rangee[0], rangee[1], n)
return predictdf(data, xseq, **params)
def bootstrap_statistics(series, statistic, n_samples=1000,
confidence_interval=0.95, random_state=None):
"""
Default parameters taken from
R's Hmisc smean.cl.boot
"""
if random_state is None:
random_state = np.random
alpha = 1 - confidence_interval
size = (n_samples, len(series))
inds = random_state.randint(0, len(series), size=size)
samples = series.values[inds]
means = np.sort(statistic(samples, axis=1))
return pd.DataFrame({'ymin': means[int((alpha/2)*n_samples)],
'ymax': means[int((1-alpha/2)*n_samples)],
'y': [statistic(series)]})
def sort_base_rules(self):
""" Sort the population lexicographically by truth vector.
This should help speed up likelihood calculations.
Note, resets the filter.
"""
# np.lexsort will sort columns by rows, with the last
# row as the primary sort key, etc; so we rotate the
# truth array by 90 degrees to get it to do what we want.
new_order = np.lexsort(np.rot90(self.base_flat_truth))
self._reordering_cache = new_order
self.base_flat_durations = self.base_flat_durations[new_order]
self.base_flat_variable_weights = self.base_flat_variable_weights[new_order]
new_flat_rules = [self.base_flat_rules[i] for i in new_order]
self.base_flat_rules = new_flat_rules
self.base_flat_truth = self.base_flat_truth[new_order]
self.base_primitive_index = {
t:i for i,t in enumerate(new_flat_rules)
}
self.reset_filter()
def number_classes(Yin, omitLabels=[]):
"""Remaps class labels to contiguous natural numbers starting at 0.
In many frameworks (e.g. caffe) class labels are mapped to indices at
the output of the CNN; hence this remapping.
Any pixels that should be ignored will have class label of -1.
"""
if Yin is None: return None
yAll = np.sort(np.unique(Yin))
yAll = [y for y in yAll if y not in omitLabels]
Yout = -1*np.ones(Yin.shape, dtype=Yin.dtype)
for yIdx, y in enumerate(yAll):
Yout[Yin==y] = yIdx
return Yout
def test_sort_flexible(self):
# Test sort on flexible dtype.
a = array(
data=[(3, 3), (3, 2), (2, 2), (2, 1), (1, 0), (1, 1), (1, 2)],
mask=[(0, 0), (0, 1), (0, 0), (0, 0), (1, 0), (0, 0), (0, 0)],
dtype=[('A', int), ('B', int)])
test = sort(a)
b = array(
data=[(1, 1), (1, 2), (2, 1), (2, 2), (3, 3), (3, 2), (1, 0)],
mask=[(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 1), (1, 0)],
dtype=[('A', int), ('B', int)])
assert_equal(test, b)
assert_equal(test.mask, b.mask)
test = sort(a, endwith=False)
b = array(
data=[(1, 0), (1, 1), (1, 2), (2, 1), (2, 2), (3, 2), (3, 3), ],
mask=[(1, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 1), (0, 0), ],
dtype=[('A', int), ('B', int)])
assert_equal(test, b)
assert_equal(test.mask, b.mask)
def compute_precision_mapping(pt):
thresh_all = []
prec_all = []
for jj in xrange(1000):
thresh = pt['details']['score'][:, jj]
prec = pt['details']['precision'][:, jj]
ind = np.argsort(thresh); # thresh, ind = torch.sort(thresh)
thresh = thresh[ind];
indexes = np.unique(thresh, return_index=True)[1]
indexes = np.sort(indexes);
thresh = thresh[indexes]
thresh = np.vstack((min(-1000, min(thresh) - 1), thresh[:, np.newaxis], max(1000, max(thresh) + 1)));
prec = prec[ind];
for i in xrange(1, len(prec)):
prec[i] = max(prec[i], prec[i - 1]);
prec = prec[indexes]
prec = np.vstack((prec[0], prec[:, np.newaxis], prec[-1]));
thresh_all.append(thresh)
prec_all.append(prec)
precision_score = {'thresh': thresh_all, "prec": prec_all}
return precision_score
def compute_precision_score_mapping(thresh, prec, score):
ind = np.argsort(thresh); # thresh, ind = torch.sort(thresh)
thresh = thresh[ind];
indexes = np.unique(thresh, return_index=True)[1]
indexes = np.sort(indexes);
thresh = thresh[indexes]
thresh = np.vstack((min(-1000, min(thresh) - 1), thresh[:, np.newaxis], max(1000, max(thresh) + 1)));
prec = prec[ind];
for i in xrange(1, len(prec)):
prec[i] = max(prec[i], prec[i - 1]);
prec = prec[indexes]
prec = np.vstack((prec[0], prec[:, np.newaxis], prec[-1]));
f = interp1d(thresh[:, 0], prec[:, 0])
val = f(score)
return val
def argsort(a, axis=-1):
"""Returns the indices that would sort an array with a stable sorting.
Args:
a (cupy.ndarray): Array to sort.
axis (int or None): Axis along which to sort. Default is -1, which
means sort along the last axis. If None is supplied, the array is
flattened before sorting.
Returns:
cupy.ndarray: Array of indices that sort ``a``.
.. note::
For its implementation reason, ``cupy.argsort`` does not support
``kind`` and ``order`` parameters.
.. seealso:: :func:`numpy.argsort`
"""
return a.argsort(axis=axis)
def msort(a):
"""Returns a copy of an array sorted along the first axis.
Args:
a (cupy.ndarray): Array to be sorted.
Returns:
cupy.ndarray: Array of the same type and shape as ``a``.
.. note:
``cupy.msort(a)``, the CuPy counterpart of ``numpy.msort(a)``, is
equivalent to ``cupy.sort(a, axis=0)``.
.. seealso:: :func:`numpy.msort`
"""
# TODO(takagi): Support float16 and bool.
return sort(a, axis=0)
# TODO(okuta): Implement sort_complex
def create_component_sframe(g, baseid_name='page_id', layer_name='layer'):
"""Get component SFrame enriched with structural properties for each component"""
columns = g.vertices.column_names()
columns.remove('__id')
columns.remove('component_id')
# Append s to have unique column names (required by graphlab)
gb_dict = {c + 's': gl.aggregate.CONCAT(c) for c in columns}
gb_dict['nids'] = gl.aggregate.CONCAT('__id')
gb_dict['node_count'] = gl.aggregate.COUNT('__id')
comps = g.vertices.groupby('component_id', gb_dict)
comps['width'] = comps.apply(lambda x: len(np.unique(x[layer_name + 's'])))
comps['height'] = comps.apply(lambda x: len(np.unique(x[baseid_name + 's'])))
edges = g.edges.groupby('component_id', {'src': gl.aggregate.CONCAT('__src_id'),
'tgt': gl.aggregate.CONCAT('__dst_id')})
comps = comps.join(edges, 'component_id')
return comps.sort('node_count', False)
def test_multicollinearity(df, target_name, r2_threshold = 0.89):
'''Tests if any of the features could be predicted from others with R2 >= 0.89
input: dataframe, name of target (to exclude)
'''
r2s = pd.DataFrame()
for feature in df.columns.difference([target_name]):
model = sk.linear_model.Ridge()
model.fit(df[df.columns.difference([target_name,feature])], df[feature])
pos = np.in1d(model.coef_, np.sort(model.coef_)[-5:])
r2s = r2s.append(pd.DataFrame({'r2':sk.metrics.r2_score(df[feature],\
model.predict(df[df.columns.difference([target_name, feature])])),\
'predictors' : str(df.columns.difference([target_name, feature])[np.ravel(np.argwhere(pos == True))].tolist())}, index = [feature]))
print('Testing', feature)
print('-----------------')
if len(r2s[r2s['r2'] >= r2_threshold]) > 0:
print('Multicollinearity detected')
print(r2s[r2s['r2'] >= r2_threshold])
else:
print('No multicollinearity')
def wsparsify(w_gpu, percentage):
"""
Keeps only as many entries nonzero as specified by percentage.
"""
w = w_gpu.get()
vals = sort(w)[::-1]
idx = floor(prod(w.shape()) * percentage/100)
zw_gpu = cua.zeros_like(w_gpu) # gpu array filled with zeros
tw_gpu = cua.empty_like(w_gpu) # gpu array containing threshold
tw_gpu.fill(vals[idx])
w_gpu = cua.if_positive(w_gpu > tw_gpu, w_gpu, zw_gpu)
del zw_gpu
del tw_gpu
return w_gpu
def sparsify(x, percentage):
"""
Keeps only as many entries nonzero as specified by percentage.
Note that only the larges values are kept.
--------------------------------------------------------------------------
Usage:
Call: y = sparsify(x, percentage)
Input: x input ndarray x
percentage percentage of nonzero entries in y
Output: sparsified version of x
--------------------------------------------------------------------------
Copyright (C) 2011 Michael Hirsch
"""
vals = np.sort(x.flatten())[::-1]
idx = np.floor(np.prod(x.shape) * percentage/100)
x[x < vals[idx]] = 0
return x
def buckets(x, y, size=50):
assert len(x[0]) == len(y[0])
num_inputs = len(x)
samples = x + y
num_items = len(samples)
xy = zip(*samples)
xy.sort(key=lambda i: len(i[0]))
t_len = size
idx = 0
bucks = [[[]] for _ in range(num_items)]
for item in xy:
if len(item[0]) > t_len:
if len(bucks[0][idx]) > 0:
for buck in bucks:
buck.append([])
idx += 1
while len(item[0]) > t_len:
t_len += size
for i in range(num_items):
#print item[i]
bucks[i][idx].append(item[i])
return bucks[:num_inputs], bucks[num_inputs:]
def biased_out(prediction, bias):
out = []
b_pres = []
for pre in prediction:
b_pres.append(pre[:,0] - pre[:,1])
props = np.concatenate(b_pres)
props = np.sort(props)[::-1]
idx = int(bias*len(props))
if idx == len(props):
idx -= 1
th = props[idx]
print 'threshold: ', th, 1 / (1 + np.exp(-th))
for pre in b_pres:
pre[pre >= th] = 0
pre[pre != 0] = 1
out.append(pre)
return out
def ecdf(x):
"""Empirical cumulative distribution function
Given a 1D array of values, returns a function f(q) that outputs the
fraction of values less than or equal to q.
Parameters
----------
x : 1D array
values for which to compute CDF
Returns
----------
ecdf_fun: Callable[[float], float]
function that returns the value of the CDF at a given point
"""
xp = np.sort(x)
yp = np.arange(len(xp) + 1) / len(xp)
def ecdf_fun(q):
return yp[np.searchsorted(xp, q, side="right")]
return ecdf_fun
def calc_volume(roi):
# oar and ptv are lists using str(z) as keys
# each item is an ordered list of points representing a polygon
# polygon n is inside polygon n-1, then the current accumulated polygon is
# polygon n subtracted from the accumulated polygon up to and including polygon n-1
# Same method DICOM uses to handle rings and islands
volume = 0.
all_z_values = [round(float(z), 2) for z in list(roi)]
all_z_values = np.sort(all_z_values)
thicknesses = np.abs(np.diff(all_z_values))
thicknesses = np.append(thicknesses, np.min(thicknesses))
all_z_values = all_z_values.tolist()
for z in list(roi):
# z in coord will not necessarily go in order of z, convert z to float to lookup thickness
# also used to check for top and bottom slices, to add area of those contours
thickness = thicknesses[all_z_values.index(round(float(z), 2))]
shapely_roi = points_to_shapely_polygon(roi[z])
if shapely_roi:
volume += shapely_roi.area * thickness
return round(volume / 1000., 2)
def __init__(self,p=[-0.9594,4.294],pprior=None,
N=50,x=None,**kwargs):
f=lambda t,s: np.array([t-s*abs(t),t+s*abs(t)])
if pprior is None:
self.pprior={'p'+str(i) : f(t,10) for i,t in enumerate(p) }
self.label=self.pprior.keys()
self.ndim=len(p)
self.p=p
if x is None:
self.N=N
self.x = np.sort(10*np.random.rand(N))
else:
self.N=len(x)
self.x=x
self.y,self.yerr=self.data(**kwargs)
# As prior, we assume an 'uniform' prior (i.e. constant prob. density)
def test_encode_data_roundtrip():
minrand, maxrand = np.sort(np.random.randint(-427, 8848, 2))
testdata = np.round((np.sum(
np.dstack(
np.indices((512, 512),
dtype=np.float64)),
axis=2) / (511. + 511.)) * maxrand, 2) + minrand
baseval = -1000
interval = 0.1
rtripped = _decode(data_to_rgb(testdata.copy(), baseval, interval), baseval, interval)
assert testdata.min() == rtripped.min()
assert testdata.max() == rtripped.max()
def projsplx_multi(Y):
n, m = Y.shape
if n==1:
X = projsplx(Y)
else:
Y1 = -np.sort(-Y,axis=1)
tmpsum = np.zeros(n)
tmax = np.zeros(n)
bget = np.zeros(n, dtype=bool)
for ii in xrange(0,m-1):
active = (bget==False)
tmpsum[active] = tmpsum[active] + Y1[active][:,ii]
tmax[active] = (tmpsum[active] - 1)/(ii+1)
deactivate = (tmax>=Y1[:,ii+1]) & active
bget[deactivate] = True
active = (bget==False)
tmax[active] = (tmpsum[active] + Y1[active][:,m-1] - 1)/m
X = (Y.transpose() - tmax).transpose()
X[X<0.0] = 0.0
return X
def projsplx(y):
y1 = np.array(y, copy=True)
m = y1.shape[1]
bget = False
y1[0][::-1].sort()
tmpsum = 0
for ii in xrange(0,m-1):
tmpsum = tmpsum + y1[0][ii]
tmax = (tmpsum - 1)/ii
if tmax >= y1[0][ii+1]:
bget = True
break
if not bget:
tmax = (tmpsum + y1[0][m] -1)/m
y1 = y1 - tmax
y1[y1<0.0] = 0.0
return y1
def cond_projsplx_multi(Y,a_mat):
n, m = Y.shape
A = a_mat
s = -np.sort(-(A*Y),axis=1)
index = np.argsort(-(A*Y), axis=1)
tmpsum = np.zeros(n)
tmpsumdom = np.zeros(n)
bget = np.zeros(n, dtype=bool)
A_sort = A[np.arange(np.shape(A)[0])[:,np.newaxis], index]
cond_s = s/(A_sort**2)
tmax = np.zeros(n)
for ii in xrange(0,m-1):
active = (bget==False)
tmpsum[active] = tmpsum[active] + cond_s[active][:,ii]
tmpsumdom[active] = tmpsumdom[active]+ 1.0/A_sort[active][:,ii]**2
tmax[active] = (tmpsum[active] - 1)/tmpsumdom[active]
deactivate = (tmax >= s[:,ii+1]) & active
bget[deactivate] = True
active = (bget==False)
tmax[active] = (tmpsum[active] + cond_s[active][:,m-1] - 1)/(tmpsumdom[active]+1.0/(A_sort[active][:,m-1])**2)
X = (Y - np.matlib.repmat(tmax.reshape(n,1),1,m)*1.0/A)
X[X<0.0] = 0.0
X = X/A
return X
def get_symmetry_code_tri(pts):
if len(pts) == 1:
return '_s3()'
elif len(pts) == 3:
# Symmetry group [[a, a, b], [a, b, a], [b, a, a]].
# Find the equal value `a`.
tol = 1.0e-12
beta = pts[0] - pts[0][0]
ct = numpy.count_nonzero(abs(beta) < tol)
assert ct in [1, 2], beta
val = pts[0][0] if ct == 2 else pts[0][1]
return '_s21({:.15e})'.format(val)
# Symmetry group [[a, b, c], [c, a, b], ...].
assert len(pts) == 6
# Take the two largest value from a, b, c.
pt0 = numpy.sort(pts[0])
return '_s111({:.15e}, {:.15e})'.format(pt0[2], pt0[1])
def get_quadrature_points(order):
"""
Returns the quadrature points for Gauss-Lobatto quadrature
as a function of the order of the polynomial we want to
represent.
See: https://en.wikipedia.org/wiki/Gaussian_quadrature
"""
return np.sort(np.concatenate((np.array([-1,1]),
poly.basis(order).deriv().roots())))
def trataGroups(objeto):
current = list(filter(None.__ne__, objeto))
current = np.sort(current, axis=0)
for i in range(len(current[0])):
current_ = [j[i] for j in current]
mean_ = np.round(np.mean(current_, axis=0), 4)
deviation_ = np.round(np.std(current_, axis=0, ddof=1), 4)
return [mean_, deviation_]
def PA(samples, variables):
datasets = 5000
eig_vals = []
for i in range(datasets):
data = np.random.standard_normal((variables, samples))
cor_ = np.corrcoef(data)
eig_vals.append(np.sort(np.linalg.eig(cor_)[0])[::-1])
quantile = (np.round(np.percentile(eig_vals, 95.0, axis=0), 4))
mean_ = (np.round(np.mean(eig_vals, axis=0), 4))
return quantile