Python numpy 模块,bincount() 实例源码
我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用numpy.bincount()。
def hog(img):
h, w = img.shape
gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
mag, ang = cv2.cartToPolar(gx, gy)
bins = np.int32(bin_n*ang/(2*np.pi)) # quantizing binvalues in (0...16)
bin_cells = ()
mag_cells = ()
for i in range(wc):
for j in range(hc):
bin_cells += (bins[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)
mag_cells += (mag[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)
#np.bincount() return times of each number appear
hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
hist = np.hstack(hists) # hist is a 16*wc*hc vector
return hist
def log_likelihood(self, data):
nks = np.bincount(self.labels_, minlength=self.n_clusters) # number of points in each cluster
n, d = data.shape
log_likelihood = 0
covar_matrices = self.covariances(self.labels_, cluster_centers=self.cluster_centers_, data=data)
covar_matrix_det_v = np.linalg.det(covar_matrices)
self._inv_covar_matrices = self._matrix_inverses(covar_matrices)
for k, nk in enumerate(nks):
if self.verbose == 1:
print('log_likelihood: covar_matrix_det = {}'.format(covar_matrix_det_v[k]))
term_1 = nk * (np.log(float(nk)/n) - 0.5 * d * np.log(2*np.pi) - 0.5 * np.log(abs(covar_matrix_det_v[k])))
cdist_result = cdist(data[self.labels_ == k], np.array([self.cluster_centers_[k]]), metric='mahalanobis', VI=self._inv_covar_matrices[k])
cdist_no_nan = cdist_result[~np.isnan(cdist_result)] # to deal with nans returned by cdist
term_2 = -0.5 * (np.sum(cdist_no_nan))
k_sum = term_1 + term_2
log_likelihood += k_sum
if np.isnan(log_likelihood) or log_likelihood == float('inf'):
raise Exception('ll is nan or inf')
return log_likelihood
def generate_environment_assignments(n, num_sources):
'''Randomly assign `n` counts to one of `num_sources` environments.
Parameters
----------
n : int
Number of environment assignments to generate.
num_sources : int
Number of possible environment states (this includes the 'Unknown').
Returns
-------
seq_env_assignments : np.array
1D vector of length `n`. The ith entry is the environment assignment of
the ith feature.
envcounts : np.array
1D vector of length `num_sources`. The ith entry is the total number of
entries in `seq_env_assignments` which are equal to i.
'''
seq_env_assignments = np.random.choice(np.arange(num_sources), size=n,
replace=True)
envcounts = np.bincount(seq_env_assignments, minlength=num_sources)
return seq_env_assignments, envcounts
def getPixelIoU(gtImg,submImg):
#TODO TEST THOROUGHLY
def compress(img):
intImg=np.empty(img.shape[:2],dtype='int32')
if len(img.shape)==3:
intImg[:,:]=img[:,:,0]
intImg[:,:]+=(256*img[:,:,1])
intImg[:,:]+=((256**2)*img[:,:,1])
else:
intImg[:,:]=img[:,:]
un=np.unique(intImg)
idx=np.zeros(un.max()+1)
idx[un]=np.arange(un.shape[0],dtype='int32')
return idx[intImg],un.max()+1
if gtImg.shape[:2]!=submImg[:2]:
raise Exception("gtImg and submImg must have the same size")
gt,maxGt=compress(gtImg)
subm,maxSubm=compress(gtImg)
comb=gt*maxSubm+subm
intMatrix=np.bincount(comb.reshape(-1)).reshape([maxSubm,maxGt])
uMatrix=np.zeros(intMatrix.shape)
uMatrix[:,:]+=intMatrix.sum(axis=0)[None,:]
uMatrix[:,:]+=intMatrix.sum(axis=1)[:,None]
uMatrix-=intMatrix
return intMatrix/uMatrix.astype('float64'),intMatrix,uMatrix
def test_with_incorrect_minlength(self):
x = np.array([], dtype=int)
assert_raises_regex(TypeError, "an integer is required",
lambda: np.bincount(x, minlength="foobar"))
assert_raises_regex(ValueError, "must be positive",
lambda: np.bincount(x, minlength=-1))
assert_raises_regex(ValueError, "must be positive",
lambda: np.bincount(x, minlength=0))
x = np.arange(5)
assert_raises_regex(TypeError, "an integer is required",
lambda: np.bincount(x, minlength="foobar"))
assert_raises_regex(ValueError, "minlength must be positive",
lambda: np.bincount(x, minlength=-1))
assert_raises_regex(ValueError, "minlength must be positive",
lambda: np.bincount(x, minlength=0))
def add(self, arr):
if not isinstance(arr, np.ndarray):
arr = np.array(arr)
arr = arr.flatten()
self.min = min(self.min, arr.min())
self.max = max(self.max, arr.max())
self.sum += arr.sum()
self.num += len(arr)
self.sum_squares += (arr ** 2).sum()
indices = np.searchsorted(self.bucket_limits, arr, side='right')
new_counts = np.bincount(indices, minlength=self.buckets.shape[0])
if new_counts.shape[0] > self.buckets.shape[0]:
# This should only happen with nans and extremely large values
assert new_counts.shape[0] == self.buckets.shape[0] + 1, new_counts.shape
new_counts = new_counts[:self.buckets.shape[0]]
self.buckets += new_counts
def add(self, arr):
if not isinstance(arr, np.ndarray):
arr = np.array(arr)
arr = arr.flatten()
self.min = min(self.min, arr.min())
self.max = max(self.max, arr.max())
self.sum += arr.sum()
self.num += len(arr)
self.sum_squares += (arr ** 2).sum()
indices = np.searchsorted(self.bucket_limits, arr, side='right')
new_counts = np.bincount(indices, minlength=self.buckets.shape[0])
if new_counts.shape[0] > self.buckets.shape[0]:
# This should only happen with nans and extremely large values
assert new_counts.shape[0] == self.buckets.shape[0] + 1, new_counts.shape
new_counts = new_counts[:self.buckets.shape[0]]
self.buckets += new_counts
def hog(img):
h, w = img.shape
gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
mag, ang = cv2.cartToPolar(gx, gy)
bins = np.int32(bin_n*ang/(2*np.pi)) # quantizing binvalues in (0...16)
bin_cells = ()
mag_cells = ()
for i in range(wc):
for j in range(hc):
bin_cells += (bins[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)
mag_cells += (mag[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)
hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
hist = np.hstack(hists) # hist is a 16*wc*hc vector
return hist
def hog(img):
h, w = img.shape
gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
mag, ang = cv2.cartToPolar(gx, gy)
bins = np.int32(bin_n*ang/(2*np.pi)) # quantizing binvalues in (0...bin_n)
bin_cells = ()
mag_cells = ()
for i in range(wc):
for j in range(hc):
bin_cells += (bins[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)
mag_cells += (mag[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)
hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
hist = np.hstack(hists) # hist is a bin_n*wc*hc vector
return hist
def hog(img):
h, w = img.shape
gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
mag, ang = cv2.cartToPolar(gx, gy)
bins = np.int32(bin_n*ang/(2*np.pi)) # quantizing binvalues in (0...16)
bin_cells = ()
mag_cells = ()
for i in range(wc):
for j in range(hc):
bin_cells += (bins[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)
mag_cells += (mag[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)
hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
hist = np.hstack(hists) # hist is a 16*wc*hc vector
return hist
def selected_features(self):
"""Get the number of times a feature was selected
"""
if len(self.best_estimator_):
# Get selected features from the best estimator :
iterator = product(range(self._rep), range(self._nfolds))
fselected = []
featrange = np.arange(self._nfeat)[np.newaxis, ...]
for k, i in iterator:
estimator = self.best_estimator_[k][i].get_params()['features']
fselected.extend(list(estimator.transform(featrange).ravel().astype(int)))
# Get the count for each feature :
bins = np.bincount(np.array(fselected))
selectedBins = np.zeros((self._nfeat,), dtype=int)
selectedBins[np.arange(len(bins))] = bins
# Put everything in a Dataframe :
resum = pd.DataFrame({'Name':self._name, 'Count':selectedBins,
'Percent':100*selectedBins/selectedBins.sum()}, columns=['Name', 'Count', 'Percent'])
return resum
else:
print('You must run the fit() method before')
def get_confusion_matrix(self, gt_label, pred_label, class_num):
"""
Calcute the confusion matrix by given label and pred
:param gt_label: the ground truth label
:param pred_label: the pred label
:param class_num: the nunber of class
:return: the confusion matrix
"""
index = (gt_label * class_num + pred_label).astype('int32')
label_count = np.bincount(index)
confusion_matrix = np.zeros((class_num, class_num))
for i_label in range(class_num):
for i_pred_label in range(class_num):
cur_index = i_label * class_num + i_pred_label
if cur_index < len(label_count):
confusion_matrix[i_label, i_pred_label] = label_count[cur_index]
return confusion_matrix
def bm25_weight(X, K1=100, B=0.8):
""" Weighs each row of a sparse matrix X by BM25 weighting """
# calculate idf per term (user)
X = coo_matrix(X)
N = float(X.shape[0])
idf = log(N / (1 + bincount(X.col)))
# calculate length_norm per document (artist)
row_sums = numpy.ravel(X.sum(axis=1))
average_length = row_sums.mean()
length_norm = (1.0 - B) + B * row_sums / average_length
# weight matrix rows by bm25
X.data = X.data * (K1 + 1.0) / (K1 * length_norm[X.row] + X.data) * idf[X.col]
return X
def predict(self, X):
y_preds = np.empty((X.shape[0], len(self.trees)))
# Let each tree make a prediction on the data
for i, tree in enumerate(self.trees):
# Indices of the features that the tree has trained on
idx = tree.feature_indices
# Make a prediction based on those features
prediction = tree.predict(X[:, idx])
y_preds[:, i] = prediction
y_pred = []
# For each sample
for sample_predictions in y_preds:
# Select the most common class prediction
y_pred.append(np.bincount(sample_predictions.astype('int')).argmax())
return y_pred
def _class_frequencies(X, y):
"""Count the number of non-zero values for each class y in sparse X."""
labels = np.unique(y)
if len(labels) > 2:
raise ValueError("Delta works only with binary classification problems")
# Indices for each type of labels in y
N1 = np.where(y == labels[0])[0]
N2 = np.where(y == labels[1])[0]
# Number of positive documents that each term appears on
df1 = np.bincount(X[N1].nonzero()[1], minlength=X.shape[1])
# Number of negative documents that each term appears on
df2 = np.bincount(X[N2].nonzero()[1], minlength=X.shape[1])
return N1.shape[0], df1, N2.shape[0], df2
def get_his_std_qi( data_pixel_qi, max_cts=None):
'''
YG. Dev 16, 2016
Calculate the photon histogram for one q by giving
Parameters:
data_pixel_qi: one-D array, for the photon counts
max_cts: for bin max, bin will be [0,1,2,..., max_cts]
Return:
bins
his
std
'''
if max_cts is None:
max_cts = np.max( data_pixel_qi ) +1
bins = np.arange(max_cts)
dqn, dqm = data_pixel_qi.shape
#get histogram here
H = np.apply_along_axis(np.bincount, 1, np.int_(data_pixel_qi), minlength= max_cts )/dqm
#do average for different frame
his = np.average( H, axis=0)
std = np.std( H, axis=0 )
#cal average photon counts
kmean= np.average(data_pixel_qi )
return bins, his, std, kmean
def get_confusion_matrix(self, gt_label, pred_label, class_num):
"""
Calcute the confusion matrix by given label and pred
:param gt_label: the ground truth label
:param pred_label: the pred label
:param class_num: the nunber of class
:return: the confusion matrix
"""
index = (gt_label * class_num + pred_label).astype('int32')
label_count = np.bincount(index)
confusion_matrix = np.zeros((class_num, class_num))
for i_label in range(class_num):
for i_pred_label in range(class_num):
cur_index = i_label * class_num + i_pred_label
if cur_index < len(label_count):
confusion_matrix[i_label, i_pred_label] = label_count[cur_index]
return confusion_matrix
def get_confusion_matrix(self, gt_label, pred_label, class_num):
"""
Calcute the confusion matrix by given label and pred
:param gt_label: the ground truth label
:param pred_label: the pred label
:param class_num: the nunber of class
:return: the confusion matrix
"""
index = (gt_label * class_num + pred_label).astype('int32')
label_count = np.bincount(index)
confusion_matrix = np.zeros((class_num, class_num))
for i_label in range(class_num):
for i_pred_label in range(class_num):
cur_index = i_label * class_num + i_pred_label
if cur_index < len(label_count):
confusion_matrix[i_label, i_pred_label] = label_count[cur_index]
return confusion_matrix
def test_with_incorrect_minlength(self):
x = np.array([], dtype=int)
assert_raises_regex(TypeError, "an integer is required",
lambda: np.bincount(x, minlength="foobar"))
assert_raises_regex(ValueError, "must be positive",
lambda: np.bincount(x, minlength=-1))
assert_raises_regex(ValueError, "must be positive",
lambda: np.bincount(x, minlength=0))
x = np.arange(5)
assert_raises_regex(TypeError, "an integer is required",
lambda: np.bincount(x, minlength="foobar"))
assert_raises_regex(ValueError, "minlength must be positive",
lambda: np.bincount(x, minlength=-1))
assert_raises_regex(ValueError, "minlength must be positive",
lambda: np.bincount(x, minlength=0))
def test_particle_octree_counts():
np.random.seed(int(0x4d3d3d3))
# Eight times as many!
data = {}
bbox = []
for i, ax in enumerate('xyz'):
DW = DRE[i] - DLE[i]
LE = DLE[i]
data["particle_position_%s" % ax] = \
np.random.normal(0.5, scale=0.05, size=(NPART*8)) * DW + LE
bbox.append( [DLE[i], DRE[i]] )
bbox = np.array(bbox)
for n_ref in [16, 32, 64, 512, 1024]:
ds = load_particles(data, 1.0, bbox = bbox, n_ref = n_ref)
dd = ds.all_data()
bi = dd["io","mesh_id"]
v = np.bincount(bi.astype("intp"))
assert_equal(v.max() <= n_ref, True)
bi2 = dd["all","mesh_id"]
assert_equal(bi, bi2)
def _parse_output(self):
unique_ids = np.unique(self.tags)
counts = np.bincount(self.tags + 1)
sort_indices = np.argsort(self.tags)
grab_indices = np.indices(self.tags.shape).ravel()[sort_indices]
dens = self.densities[sort_indices]
cp = 0
for i in unique_ids:
cp_c = cp + counts[i + 1]
if i == -1:
cp += counts[i + 1]
continue
group_indices = grab_indices[cp:cp_c]
self._groups.append(self._halo_class(self, i, group_indices,
ptype=self.ptype))
md_i = np.argmax(dens[cp:cp_c])
px, py, pz = \
[self.particle_fields['particle_position_%s' % ax][group_indices]
for ax in 'xyz']
self._max_dens[i] = (dens[cp:cp_c][md_i], px[md_i],
py[md_i], pz[md_i])
cp += counts[i + 1]
def _setup_particles(self, x, y, z, setup_fields=None):
"""
Assigns grids to particles and sets up particle positions. *setup_fields* is
a dict of fields other than the particle positions to set up.
"""
particle_grids, particle_grid_inds = self.ds.index._find_points(x, y, z)
idxs = np.argsort(particle_grid_inds)
self.particles[:, self.posx_index] = x[idxs]
self.particles[:, self.posy_index] = y[idxs]
self.particles[:, self.posz_index] = z[idxs]
self.NumberOfParticles = np.bincount(particle_grid_inds.astype("intp"),
minlength=self.num_grids)
if self.num_grids > 1:
np.add.accumulate(self.NumberOfParticles.squeeze(),
out=self.ParticleGridIndices[1:])
else:
self.ParticleGridIndices[1] = self.NumberOfParticles.squeeze()
if setup_fields is not None:
for key, value in setup_fields.items():
field = (self.ptype, key) if isinstance(key, string_types) else key
if field not in self.default_fields:
self.particles[:,self.field_list.index(field)] = value[idxs]
def train_test_split_per_class(X, y, train_size=None, test_size=None):
sh = np.array(X.shape)
num_classes = len(np.bincount(y))
sh[0] = 0
X_train_arr = np.zeros(sh, dtype=X.dtype)
X_test_arr = np.zeros(sh, dtype=X.dtype)
y_train_arr = np.zeros((0), dtype=y.dtype)
y_test_arr = np.zeros((0), dtype=y.dtype)
for i in range(num_classes):
X_train, X_test, y_train, y_test = train_test_split(X[y==i], y[y==i],
train_size=train_size,
test_size=test_size)
X_train_arr = np.append(X_train_arr, X_train, axis=0)
X_test_arr = np.append(X_test_arr, X_test, axis=0)
y_train_arr = np.append(y_train_arr, y_train)
y_test_arr = np.append(y_test_arr, y_test)
return X_train_arr, X_test_arr, y_train_arr, y_test_arr
def check_generate_valid_indexes(self, num_examples, batch_size):
T = 90
scheme = EpochwiseShuffledInfiniteScheme(num_examples, batch_size)
uniquenesses = []
all_indexes = []
for i in range(T):
indexes = next(scheme)
is_unique = len(indexes) == len(np.unique(indexes))
uniquenesses.append(is_unique)
all_indexes.append(indexes)
assert np.all(uniquenesses)
counts = np.bincount(np.concatenate(all_indexes).ravel())
expected_counts = [batch_size * T // num_examples] * num_examples
assert np.array_equal(counts, expected_counts)
def entropy_score(labels):
"""
entropy = sum(p*log(1/p))
"""
n_labels = labels.shape[0]
if n_labels <= 1:
return 0.0
counts = np.bincount(labels)
probs = counts / float(n_labels)
n_classes = np.count_nonzero(probs)
if n_classes <= 1:
return 0.0
entropy = 0.0
for p in probs:
entropy -= p*np.log(p)
return entropy
def split(self, X, y, groups=None):
splits = super(BalancedKFold, self).split(X, y, groups)
y = np.array(y)
for train_index, test_index in splits:
split_y = y[test_index]
classes_y, y_inversed = np.unique(split_y, return_inverse=True)
min_y = min(np.bincount(y_inversed))
new_index = np.zeros(min_y * len(classes_y), dtype=int)
for cls in classes_y:
cls_index = test_index[split_y == cls]
if len(cls_index) > min_y:
cls_index = np.random.choice(
cls_index, size=min_y, replace=False)
new_index[cls * min_y:(cls + 1) * min_y] = cls_index
yield train_index, new_index
def test_univariate_categorical():
# This test generates univariate data from a nominal variable with 6 levels
# and probability vector p_theory, and performs a chi-square test on
# posterior samples from MvKde.
rng = gu.gen_rng(2)
N_SAMPLES = 1000
p_theory = [.3, .1, .2, .15, .15, .1]
samples_test = rng.choice(range(6), p=p_theory, size=N_SAMPLES)
kde = MultivariateKde(
[7], None, distargs={O: {ST: [C], SA:[{'k': 6}]}}, rng=rng)
# Incorporate observations.
for rowid, x in enumerate(samples_test):
kde.incorporate(rowid, {7: x})
kde.transition()
# Posterior samples.
samples_gen = kde.simulate(-1, [7], N=N_SAMPLES)
f_obs = np.bincount([s[7] for s in samples_gen])
f_exp = np.bincount(samples_test)
_, pval = chisquare(f_obs, f_exp)
assert 0.05 < pval
# Get some coverage on logpdf_score.
assert kde.logpdf_score() < 0
def test_crp_decrement(N, alpha, seed):
A = gu.simulate_crp(N, alpha, rng=gu.gen_rng(seed))
Nk = list(np.bincount(A))
# Decrement all counts by 1.
Nk = [n-1 if n > 1 else n for n in Nk]
# Decrement rowids.
crp = simulate_crp_gpm(N, alpha, rng=gu.gen_rng(seed))
targets = [c for c in crp.counts if crp.counts[c] > 1]
seen = set([])
for r, c in crp.data.items():
if c in targets and c not in seen:
seen.add(c)
crp.unincorporate(r)
if seen == len(targets):
break
assert_crp_equality(alpha, Nk, crp)
def test_conditional_real(state):
# Simulate from the conditional Z|X
fig, axes = plt.subplots(2,3)
fig.suptitle('Conditional Simulation Of Indicator Z Given Data X')
# Compute representative data sample for each indicator.
means = [np.mean(DATA[DATA[:,1]==t], axis=0)[0] for t in INDICATORS]
for mean, indicator, ax in zip(means, INDICATORS, axes.ravel('F')):
samples_subpop = [s[1] for s in
state.simulate(-1, [1], {0:mean}, None, N_SAMPLES)]
ax.hist(samples_subpop, color='g', alpha=.4)
ax.set_title('True Indicator %d' % indicator)
ax.set_xlabel('Simulated Indicator')
ax.set_xticks(INDICATORS)
ax.set_ylabel('Frequency')
ax.set_ylim([0, ax.get_ylim()[1]+10])
ax.grid()
# Check that the simulated indicator agrees with true indicator.
true_ind_a = indicator
true_ind_b = indicator-1 if indicator % 2 else indicator+1
counts = np.bincount(samples_subpop)
frac = sum(counts[[true_ind_a, true_ind_b]])/float(sum(counts))
assert .8 < frac
def plot_dist_discrete(X, output, clusters, ax=None, Y=None, hist=True):
# Create a new axis?
if ax is None:
_, ax = plt.subplots()
# Set up x axis.
X = np.asarray(X, dtype=int)
x_max = max(X)
Y = range(int(x_max)+1)
X_hist = np.bincount(X) / float(len(X))
ax.bar(Y, X_hist, color='gray', edgecolor='none')
# Compute weighted pdfs
pdf = np.zeros((len(clusters), len(Y)))
W = [log(clusters[k].N) - log(float(len(X))) for k in clusters]
for i, k in enumerate(clusters):
pdf[i,:] = np.exp(
[W[i] + clusters[k].logpdf(None, {output:y}) for y in Y])
color, alpha = gu.curve_color(i)
ax.bar(Y, pdf[i,:], color=color, edgecolor='none', alpha=alpha)
# Plot the sum of pdfs.
ax.bar(
Y, np.sum(pdf, axis=0), color='none', edgecolor='black', linewidth=3)
ax.set_xlim([0, x_max+1])
# Title.
ax.set_title(clusters.values()[0].name())
return ax
def onehot(self, data, min_length=None):
if min_length == None:
min_length = self.vocab_size
return np.bincount(data, minlength=min_length)
def test_sample_from_probs2_gof(size):
set_random_seed(size)
probs = np.exp(2 * np.random.random(size)).astype(np.float32)
counts = np.zeros(size, dtype=np.int32)
num_samples = 2000 * size
probs2 = np.tile(probs, (num_samples, 1))
samples = sample_from_probs2(probs2)
probs /= probs.sum() # Normalize afterwards.
counts = np.bincount(samples, minlength=size)
print(counts)
print(probs * num_samples)
gof = multinomial_goodness_of_fit(probs, counts, num_samples, plot=True)
assert 1e-2 < gof
def count_pairs(assignments, v1, v2, M):
"""Construct sufficient statistics for (v1, v2) pairs.
Args:
assignments: An _ x V assignment matrix with values in range(M).
v1, v2: Column ids of the assignments matrix.
M: The number of possible assignment bins.
Returns:
An M x M array of counts.
"""
assert v1 != v2
pairs = assignments[:, v1].astype(np.int32) * M + assignments[:, v2]
return np.bincount(pairs, minlength=M * M).reshape((M, M))
def _fast_hist(self, label_true, label_pred, n_class):
mask = (label_true >= 0) & (label_true < n_class)
hist = np.bincount(
n_class * label_true[mask].astype(int) +
label_pred[mask], minlength=n_class**2).reshape(n_class, n_class)
return hist
def relabel_by_size(labels):
""" Relabel clusters so they are sorted by number of members, descending.
Args: labels (np.array(int)): 1-based cluster labels """
order = np.argsort(np.argsort(-np.bincount(labels)))
return 1 + order[labels]
def get_cluster_sizes(clustering):
""" Returns a numpy array containing cell-counts for each cluster """
return np.bincount(clustering.clusters)[1:]
def add_many(self, elems):
self.active = True
elems = np.copy(elems).astype(np.int_)
elems[elems > self.max_value] = 1 + self.max_value
self.counts += np.bincount(elems, minlength=len(self.counts))
def get_cdna_mol_counts_per_gene(self, gene_index, remove_none_gene=True):
mol_genes = self.get_column('gene')
num_genes = len(gene_index.get_genes())
gene_counts = np.bincount(mol_genes, minlength=num_genes + 1)
if remove_none_gene:
gene_counts = gene_counts[:num_genes]
return gene_counts
def get_leaf(labels):
# Obtain the leaf as the majority of the labels
return np.bincount(labels).argmax()
def compute_class_frequencies(segment,num_classes):
if isinstance(segment,list):
segment = np.asarray(segment)
f = 1.0 * np.bincount(segment.reshape(-1,).astype(int),minlength=num_classes) / np.prod(segment.shape)
return f
def compute_centralvoxel_frequencies(segment,minlength):
if isinstance(segment,list):
segment = np.asarray(segment)
shape = segment.shape[-3:]
middle_coordinate = np.zeros(3,int)
for it_coordinate,coordinate in enumerate(shape):
if coordinate%2==0:
middle_coordinate[it_coordinate] = coordinate / 2 - 1
else:
middle_coordinate[it_coordinate] = coordinate/2
segment = segment.reshape((-1,) + shape)
f = 1.0 * np.bincount(segment[:,middle_coordinate[0],middle_coordinate[1],middle_coordinate[2]].reshape(-1,).astype(int),minlength=minlength) / np.prod(segment.shape[:-3])
return f
def get_class_distribution(self, subject_list):
class_frequencies = np.zeros(self.n_classes)
for subj in subject_list:
labels = subj.load_labels()
mask = subj.load_ROI_mask()
class_frequencies += np.bincount(labels.flatten().astype('int'), weights=mask.flatten(),
minlength=self.n_classes)
return class_frequencies
def get_class_weights(self,subject_list, mask_bool = True):
class_frequencies = np.zeros(self.n_classes)
for subj in subject_list:
labels = subj.load_labels()
if mask_bool == 'ROI':
mask = subj.load_ROI_mask()
class_frequencies += np.bincount(labels.flatten().astype('int'), weights=mask.flatten().astype('int'),
minlength=self.n_classes)
elif mask_bool == 'labels':
mask = np.zeros_like(labels)
mask[labels > 0] = 1
# print(np.bincount(labels.flatten().astype('int'), weights=mask.flatten().astype('int'),
# minlength=self.n_classes))
class_frequencies += np.bincount(labels.flatten().astype('int'), weights=mask.flatten().astype('int'),
minlength=self.n_classes+1)[1:]
else :
class_frequencies += np.bincount(labels.flatten().astype('int'),
minlength=self.n_classes)
class_frequencies = class_frequencies / np.sum(class_frequencies)
class_weight = np.sort(class_frequencies)[int(np.ceil(1.0*self.n_classes/2))] / class_frequencies
class_weight[np.where(class_frequencies == 0)[0]] = 0 #avoid infinit weight
return class_weight
def epoch_voting(Y, chunk_size):
Y_new = Y.copy()
for i in range(1+len(Y_new)/chunk_size):
epoch = Y_new[i*chunk_size:(i+1)*chunk_size]
if len(epoch) != 0: winner = np.bincount(epoch).argmax()
Y_new[i*chunk_size:(i+1)*chunk_size] = winner
return Y_new
def est_pmf(self, samples, normalize=True, eps=1e-10):
"""Estimate probability mass function from samples
:param np.ndarray samples: `(n_samples, len(self.nsoutdims))`
array of samples
:param bool normalize: True: Return normalized probability
estimates (default). False: Return integer outcome counts.
:returns: Estimated probabilities as ndarray `est_pmf` with
shape `self.nsoutdims`
`n_samples * est_pmf[i1, ..., ik]` provides the number of
occurences of outcome `(i1, ..., ik)` in `samples`.
"""
n_samples = samples.shape[0]
n_out = np.prod(self.nsoutdims)
if samples.ndim > 1:
samples = self.pack_samples(samples)
counts = np.bincount(samples, minlength=n_out)
assert counts.shape == (n_out,)
counts = counts.reshape(self.nsoutdims)
assert counts.sum() == n_samples
if normalize:
return counts / n_samples
else:
return counts
def fit(self, data):
"""
Run K-Means on data n_init times.
Parameters
----------
data: numpy array
Returns
-------
No value is returned.
Function sets the following two object params:
self.labels_
self.cluster_centers_
"""
data = np.array(data)
labels, cluster_centers = [], []
for i in range(self.n_init):
if not self.warm_start:
self.cluster_centers_ = None
self._global_covar_matrices = None
self._inv_covar_matrices = None
self._fit(data)
labels += [self.labels_]
cluster_centers += [self.cluster_centers_]
self.inertias_ += [self._inertia(data)]
self.log_likelihoods_ += [self.log_likelihood(data)]
best_idx = np.argmin(self.inertias_)
self.labels_ = labels[best_idx]
self.all_labels_ = labels
self.best_log_likelihood_ = self.log_likelihoods_[best_idx]
self.best_inertia_ = self.inertias_[best_idx]
self.cluster_centers_ = cluster_centers[best_idx]
if self.verbose == 1:
print('fit: n_clusters: {}, label bin count: {}'.format(self.n_clusters, np.bincount(self.labels_, minlength=self.n_clusters)))