Python numpy 模块,unique() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.unique()。
def __init__(self, filename, target_map, classifier='svm'):
self.seed_ = 0
self.filename_ = filename
self.target_map_ = target_map
self.target_ids_ = (np.unique(target_map.keys())).astype(np.int32)
self.epoch_no_ = 0
self.st_time_ = time.time()
# Setup classifier
print('-------------------------------')
print('====> Building Classifier, setting class weights')
if classifier == 'svm':
self.clf_hyparams_ = {'C':[0.01, 0.1, 1.0, 10.0, 100.0], 'class_weight': ['balanced']}
self.clf_base_ = LinearSVC(random_state=self.seed_)
elif classifier == 'sgd':
self.clf_hyparams_ = {'alpha':[0.0001, 0.001, 0.01, 0.1, 1.0, 10.0], 'class_weight':['auto']} # 'loss':['hinge'],
self.clf_ = SGDClassifier(loss='log', penalty='l2', shuffle=False, random_state=self.seed_,
warm_start=True, n_jobs=-1, n_iter=1, verbose=4)
else:
raise Exception('Unknown classifier type %s. Choose from [sgd, svm, gradient-boosting, extra-trees]'
% classifier)
def silhouette_score(series, clusters):
distances = np.zeros((series.shape[0], series.shape[0]))
for idx_a, metric_a in enumerate(series):
for idx_b, metric_b in enumerate(series):
distances[idx_a, idx_b] = _sbd(metric_a, metric_b)[0]
labels = np.zeros(series.shape[0])
for i, (cluster, indicies) in enumerate(clusters):
for index in indicies:
labels[index] = i
# silhouette is only defined, if we have 2 clusters with assignments at
# minimum
if len(np.unique(labels)) == 1 or (len(np.unique(labels)) >= distances.shape[0]):
#if len(np.unique(labels)) == 1:
return labels, -1
else:
return labels, _silhouette_score(distances, labels, metric='precomputed')
def transform(self, img, lbl):
img = img[:, :, ::-1]
img = img.astype(np.float64)
img -= self.mean
img = m.imresize(img, (self.img_size[0], self.img_size[1]))
# Resize scales images from 0 to 255, thus we need
# to divide by 255.0
img = img.astype(float) / 255.0
# NHWC -> NCWH
img = img.transpose(2, 0, 1)
lbl = self.encode_segmap(lbl)
classes = np.unique(lbl)
lbl = lbl.astype(float)
lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
lbl = lbl.astype(int)
assert(np.all(classes == np.unique(lbl)))
img = torch.from_numpy(img).float()
lbl = torch.from_numpy(lbl).long()
return img, lbl
def get_normalized_dispersion(mat_mean, mat_var, nbins=20):
mat_disp = (mat_var - mat_mean) / np.square(mat_mean)
quantiles = np.percentile(mat_mean, np.arange(0, 100, 100 / nbins))
quantiles = np.append(quantiles, mat_mean.max())
# merge bins with no difference in value
quantiles = np.unique(quantiles)
if len(quantiles) <= 1:
# pathological case: the means are all identical. just return raw dispersion.
return mat_disp
# calc median dispersion per bin
(disp_meds, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, mat_disp, statistic='median', bins=quantiles)
# calc median absolute deviation of dispersion per bin
disp_meds_arr = disp_meds[disp_bins-1] # 0th bin is empty since our quantiles start from 0
disp_abs_dev = abs(mat_disp - disp_meds_arr)
(disp_mads, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, disp_abs_dev, statistic='median', bins=quantiles)
# calculate normalized dispersion
disp_mads_arr = disp_mads[disp_bins-1]
disp_norm = (mat_disp - disp_meds_arr) / disp_mads_arr
return disp_norm
def gl_init(self,array_table):
self.gl_hide = False
self.gl_vertex_array = gl.VertexArray()
glBindVertexArray(self.gl_vertex_array)
self.gl_vertex_buffer = gl.Buffer()
glBindBuffer(GL_ARRAY_BUFFER,self.gl_vertex_buffer)
self.gl_element_count = 3*gl_count_triangles(self)
self.gl_element_buffer = gl.Buffer()
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,self.gl_element_buffer)
vertex_type = numpy.dtype([array_table[attribute].field() for attribute in self.attributes])
vertex_count = sum(len(primitive.vertices) for primitive in self.primitives)
vertex_array = numpy.empty(vertex_count,vertex_type)
for attribute in self.attributes:
array_table[attribute].load(self,vertex_array)
vertex_array,element_map = numpy.unique(vertex_array,return_inverse=True)
element_array = gl_create_element_array(self,element_map,self.gl_element_count)
glBufferData(GL_ARRAY_BUFFER,vertex_array.nbytes,vertex_array,GL_STATIC_DRAW)
glBufferData(GL_ELEMENT_ARRAY_BUFFER,element_array.nbytes,element_array,GL_STATIC_DRAW)
def get_best_split(X, y, criterion):
""" Obtain the best splitting point and resulting children for the data set X, y
Args:
X, y (numpy.ndarray, data set)
criterion (gini or entropy)
Returns:
dict {index: index of the feature, value: feature value, children: left and right children}
"""
best_index, best_value, best_score, children = None, None, 1, None
for index in range(len(X[0])):
for value in np.sort(np.unique(X[:, index])):
groups = split_node(X, y, index, value)
impurity = weighted_impurity([groups[0][1], groups[1][1]], criterion)
if impurity < best_score:
best_index, best_value, best_score, children = index, value, impurity, groups
return {'index': best_index, 'value': best_value, 'children': children}
def consideronlylabels(self, list2consider, verbose = False):
"""
Add labels to the ignoredlabels list (set) and update the self._labels cache.
"""
if isinstance(list2consider, int):
list2consider = [list2consider]
toignore = set(np.unique(self.image))-set(list2consider)
integers = np.vectorize(lambda x : int(x))
toignore = integers(list(toignore)).tolist()
if verbose: print 'Adding labels', toignore,'to the list of labels to ignore...'
self._ignoredlabels.update(toignore)
if verbose: print 'Updating labels list...'
self._labels = self.__labels()
def main(max_iter):
# prepare
npdl.utils.random.set_seed(1234)
# data
digits = load_digits()
X_train = digits.data
X_train /= np.max(X_train)
Y_train = digits.target
n_classes = np.unique(Y_train).size
# model
model = npdl.model.Model()
model.add(npdl.layers.Dense(n_out=500, n_in=64, activation=npdl.activations.ReLU()))
model.add(npdl.layers.Dense(n_out=n_classes, activation=npdl.activations.Softmax()))
model.compile(loss=npdl.objectives.SCCE(), optimizer=npdl.optimizers.SGD(lr=0.005))
# train
model.fit(X_train, npdl.utils.data.one_hot(Y_train), max_iter=max_iter, validation_split=0.1)
def get_weighted_mask(self, image_shape, mask_shape,ROI_mask=None, labels_mask=None):
if labels_mask is None:
raise ValueError('SamplingScheme error: please specify a labels_mask for this sampling scheme')
print(np.unique(labels_mask))
mask_boundaries = self.get_mask_boundaries(image_shape, mask_shape,ROI_mask)
final_mask = np.zeros((self.n_categories,) + labels_mask.shape, dtype="int16")
for index_cat in range(self.n_categories):
final_mask[index_cat] = (labels_mask == index_cat,) * mask_boundaries
final_mask = 1.0 * final_mask / np.reshape(np.sum(np.reshape(final_mask,(self.n_categories,-1)),axis=1),(self.n_categories,)+(1,)*len(image_shape))
print(np.sum(np.reshape(final_mask,(self.n_categories,-1)),axis=1))
return final_mask
def get_channel_id_by_file_name(self, filename):
"""
Checking parameters of NCS, NSE and NTT Files for given filename and
return channel_id if result is consistent
:param filename:
:return:
"""
channel_ids = []
channel_ids += [k for k in self.parameters_ncs if
self.parameters_ncs[k]['filename'] == filename]
channel_ids += [k for k in self.parameters_nse if
self.parameters_nse[k]['filename'] == filename]
channel_ids += [k for k in self.parameters_ntt if
self.parameters_ntt[k]['filename'] == filename]
if len(np.unique(np.asarray(channel_ids))) == 1:
return channel_ids[0]
elif len(channel_ids) > 1:
raise ValueError(
'Ambiguous channel ids detected. Filename %s is associated'
' to different channels of NCS and NSE and NTT %s'
'' % (filename, channel_ids))
else: # if filename was not detected
return None
def __read_unit(self, unit_id, channel_idx):
"""
Creates unit with unit id for given channel id.
"""
# define a name for spiketrain
# (unique identifier: 1000 * elid + unit_nb)
name = "Unit {0}".format(1000 * channel_idx + unit_id)
# define description for spiketrain
desc = 'Unit from channel: {0}, id: {1}'.format(
channel_idx, self.__get_unit_classification(unit_id))
un = Unit(
name=name,
description=desc,
file_origin='.'.join([self._filenames['nev'], 'nev']))
# add additional annotations
un.annotate(ch_idx=int(channel_idx))
un.annotate(unit_id=int(unit_id))
return un
def __draw_pk2(self):
self.__cleanPk2()
if self.units is not None:
unique_units = np.unique(self.units)
unique_units = unique_units.tolist()
pca_1,pca_2 = self.PCAusedList.currentText().split("-")
pca_1 = np.int(pca_1)-1
pca_2 = np.int(pca_2)-1
if self.wavePCAs[0].shape[0]>2:
xs = self.wavePCAs[:,pca_1]
ys = self.wavePCAs[:,pca_2]
self.PcaScatterItem = []
seg_num = 5000
for i,ite_unit in enumerate(unique_units):
mask = self.units==ite_unit
temp_xs = xs[mask]
temp_ys = ys[mask]
segs = int(ceil(temp_xs.shape[0]/float(seg_num)))
for j in range(segs):
temp_xs_j = temp_xs[j*seg_num:(j+1)*seg_num]
temp_ys_j = temp_ys[j*seg_num:(j+1)*seg_num]
self.PcaScatterItem.append(pg.ScatterPlotItem(temp_xs_j,temp_ys_j,pen=self.colors[ite_unit],brush=self.colors[ite_unit],size=3,symbol="o"))
for i in range(len(self.PcaScatterItem)):
self.pk2.addItem(self.PcaScatterItem[i])
def get_channel_id_by_file_name(self, filename):
"""
Checking parameters of NCS, NSE and NTT Files for given filename and
return channel_id if result is consistent
:param filename:
:return:
"""
channel_ids = []
channel_ids += [k for k in self.parameters_ncs if
self.parameters_ncs[k]['filename'] == filename]
channel_ids += [k for k in self.parameters_nse if
self.parameters_nse[k]['filename'] == filename]
channel_ids += [k for k in self.parameters_ntt if
self.parameters_ntt[k]['filename'] == filename]
if len(np.unique(np.asarray(channel_ids))) == 1:
return channel_ids[0]
elif len(channel_ids) > 1:
raise ValueError(
'Ambiguous channel ids detected. Filename %s is associated'
' to different channels of NCS and NSE and NTT %s'
'' % (filename, channel_ids))
else: # if filename was not detected
return None
def __read_unit(self, unit_id, channel_idx):
"""
Creates unit with unit id for given channel id.
"""
# define a name for spiketrain
# (unique identifier: 1000 * elid + unit_nb)
name = "Unit {0}".format(1000 * channel_idx + unit_id)
# define description for spiketrain
desc = 'Unit from channel: {0}, id: {1}'.format(
channel_idx, self.__get_unit_classification(unit_id))
un = Unit(
name=name,
description=desc,
file_origin='.'.join([self._filenames['nev'], 'nev']))
# add additional annotations
un.annotate(ch_idx=int(channel_idx))
un.annotate(unit_id=int(unit_id))
return un
def cal_event_count(timestamps):
"""Calculate event count based on timestamps.
Parameters
----------
timestamps : numpy.ndarray
timestamps array in 1D array
Returns
-------
event_arr : numpy.ndarray
array has 2 rows, first row contains timestamps,
second row consists of corresponding event count at particular
timestep
"""
event_ts, event_count = np.unique(timestamps, return_counts=True)
return np.asarray((event_ts, event_count))
def recode_groups(groups, propensity):
# Code groups as 0 and 1
groups = (groups == groups.unique()[0])
N = len(groups)
N1 = groups[groups == 1].index
N2 = groups[groups == 0].index
g1 = propensity[groups == 1]
g2 = propensity[groups == 0]
# Check if treatment groups got flipped - the smaller should correspond to N1/g1
if len(N1) > len(N2):
N1, N2, g1, g2 = N2, N1, g2, g1
return groups, N1, N2, g1, g2
################################################################################
############################# Base Matching Class ##############################
################################################################################
def minScalErr(stec,el,z,thisBias):
"""
this determines the slope of the vTEC vs. Elevation line, which
should be minimized in the minimum scalloping technique for
receiver bias removal
inputs:
stec - time indexed Series of slant TEC values
el - corresponding elevation values, also Series
z - mapping function values to convert to vTEC from entire file, may
contain nans, Series
thisBias - the bias to be tested and minimized
"""
intel=np.asarray(el[stec.index],int) # bin the elevation values into int
sTEC=np.asarray(stec,float)
zmap = z[stec.index]
c=np.array([(i,np.average((sTEC[intel==i]-thisBias)
/zmap[intel==i])) for i in np.unique(intel) if i>30])
return np.polyfit(c[:,0],c[:,1],1)[0]
def filter_sort_unique(self, max_objval=float('Inf')):
# filter
if max_objval < float('inf'):
good_idx = self.objvals <= max_objval
self.objvals = self.objvals[good_idx]
self.solutions = self.solutions[good_idx]
if len(self.objvals) > 0:
sort_idx = np.argsort(self.objvals)
self.objvals = self.objvals[sort_idx]
self.solutions = self.solutions[sort_idx]
# unique
b = np.ascontiguousarray(self.solutions).view(
np.dtype((np.void, self.solutions.dtype.itemsize * self.P)))
_, unique_idx = np.unique(b, return_index=True)
self.objvals = self.objvals[unique_idx]
self.solutions = self.solutions[unique_idx]
def reset(self):
""" Resets the state of the generator"""
self.step = 0
Y = np.argmax(self.Y,1)
labels = np.unique(Y)
idx = []
smallest = len(Y)
for i,label in enumerate(labels):
where = np.where(Y==label)[0]
if smallest > len(where):
self.slabel = i
smallest = len(where)
idx.append(where)
self.idx = idx
self.labels = labels
self.n_per_class = int(self.batch_size // len(labels))
self.n_batches = int(np.ceil((smallest//self.n_per_class)))+1
self.update_probabilities()
def __init__(self, X, Y, batch_size,cropsize=0, truncate=False, sequential=False,
random=True, val=False, class_weights=None):
assert len(X) == len(Y), 'X and Y must be the same length {}!={}'.format(len(X),len(Y))
if sequential: print('Using sequential mode')
print ('starting normal generator')
self.X = X
self.Y = Y
self.rnd_idx = np.arange(len(Y))
self.Y_last_epoch = []
self.val = val
self.step = 0
self.i = 0
self.cropsize=cropsize
self.truncate = truncate
self.random = False if sequential or val else random
self.batch_size = int(batch_size)
self.sequential = sequential
self.c_weights = class_weights if class_weights else dict(zip(np.unique(np.argmax(Y,1)),np.ones(len(np.argmax(Y,1)))))
assert set(np.argmax(Y,1)) == set([int(x) for x in self.c_weights.keys()]), 'not all labels in class weights'
self.n_batches = int(len(X)//batch_size if truncate else np.ceil(len(X)/batch_size))
if self.random: self.randomize()
def next_normal(self):
x_batch = self.X[self.step*self.batch_size:(self.step+1)*self.batch_size]
y_batch = self.Y[self.step*self.batch_size:(self.step+1)*self.batch_size]
diff = len(x_batch[0]) - self.cropsize
if self.cropsize!=0 and not self.val:
start = np.random.choice(np.arange(0,diff+5,5), len(x_batch))
x_batch = [x[start[i]:start[i]+self.cropsize,:] for i,x in enumerate(x_batch)]
elif self.cropsize !=0 and self.val:
x_batch = [x[diff//2:diff//2+self.cropsize] for i,x in enumerate(x_batch)]
x_batch = np.array(x_batch, dtype=np.float32)
y_batch = np.array(y_batch, dtype=np.int32)
self.step+=1
if self.val:
self.Y_last_epoch.extend(y_batch)
return x_batch # for validation generator, save the new y_labels
else:
weights = np.ones(len(y_batch))
for t in np.unique(np.argmax(y_batch,1)):
weights[np.argmax(y_batch,1)==t] = self.c_weights[t]
return (x_batch,y_batch)
def get_preds_true_for_task(self,train_tasks, test_tasks, param_dict):
t = param_dict['task_num']
X = train_tasks[t]['X']
y = train_tasks[t]['Y']
test_X = test_tasks[t]['X']
true_y = list(test_tasks[t]['Y'].flatten())
if len(y)==0 or len(X)==0 or len(test_X) == 0 or len(true_y)==0:
return None, None
if self.cant_train_with_one_class and len(np.unique(y))==1:
preds = list(np.unique(y)[0]*np.ones(len(true_y)))
else:
preds = self.train_and_predict_task(t, X, y, test_X, param_dict)
return preds, true_y
def getClasses(labels):
"""
Get unique values from a column of labels.
Parameters
----------
labels: array-like of shape = [number_samples] or [number_samples, number_outputs]
The target values (class labels in classification).
Return
----------
classes: ndarray
The sorted unique labels
ids: ndarray
The indices of the first occurrences of the unique values in the original array.
"""
uniques, ids = numpy.unique(labels, return_inverse=True)
return uniques, ids
def grid_spacing(self):
interval = [1,10]
p1 = Parameter('A', 'integer', lower=interval[0], upper=interval[1])
p2 = Parameter('B', 'continuous', lower=interval[0], upper=interval[1])
p3 = Parameter('C', 'categorical', possible_values=['Bla1', 'Bla2'])
p4 = Parameter('D', 'boolean')
grid_sizes = {'A': 5, 'B': 6}
grid_search = GridSearchOptimizer(model, [p1, p2, p3, p4], clf_score, grid_sizes)
grid = grid_search.grid
for params in grid:
self.assertIn(params['A'], range(*interval))
self.assertIn(params['B']>=interval[0])
self.assertIn(params['B']<=interval[1])
self.assertIn(params['C'], ['Bla1', 'Bla2'])
self.assertIn(params['D'], ['True', 'False'])
lenA = len(np.unique([params['A'] for params in grid]))
lenB = len(np.unique([params['B'] for params in grid]))
lenC = len(np.unique([params['C'] for params in grid]))
lenD = len(np.unique([params['D'] for params in grid]))
self.assertTrue((lenA==grid_sizes['A']) or (lenA==grid_sizes['A']+1))
self.assertTrue((lenB==grid_sizes['B']) or (lenB==grid_sizes['B']+1))
self.assertTrue((lenC==grid_sizes['C']) or (lenC==grid_sizes['C']+1))
self.assertTrue((lenD==grid_sizes['D']) or (lenD==grid_sizes['D']+1))
def logscale_spec(spec, sr=44100, factor=20.):
timebins, freqbins = np.shape(spec)
scale = np.linspace(0, 1, freqbins) ** factor
scale *= (freqbins-1)/max(scale)
scale = np.unique(np.round(scale))
# create spectrogram with new freq bins
newspec = np.complex128(np.zeros([timebins, len(scale)]))
for i in range(0, len(scale)):
if i == len(scale)-1:
newspec[:,i] = np.sum(spec[:,scale[i]:], axis=1)
else:
newspec[:,i] = np.sum(spec[:,scale[i]:scale[i+1]], axis=1)
# list center freq of bins
allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
freqs = []
for i in range(0, len(scale)):
if i == len(scale)-1:
freqs += [np.mean(allfreqs[scale[i]:])]
else:
freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])]
return newspec, freqs
def logscale_spec(spec, sr=44100, factor=20.):
timebins, freqbins = np.shape(spec)
scale = np.linspace(0, 1, freqbins) ** factor
scale *= (freqbins-1)/max(scale)
scale = np.unique(np.round(scale))
# create spectrogram with new freq bins
newspec = np.complex128(np.zeros([timebins, len(scale)]))
for i in range(0, len(scale)):
if i == len(scale)-1:
newspec[:,i] = np.sum(spec[:,scale[i]:], axis=1)
else:
newspec[:,i] = np.sum(spec[:,scale[i]:scale[i+1]], axis=1)
# list center freq of bins
allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
freqs = []
for i in range(0, len(scale)):
if i == len(scale)-1:
freqs += [np.mean(allfreqs[scale[i]:])]
else:
freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])]
return newspec, freqs
def free_parameters(self, data):
"""
Compute free parameters for the model fit using K-Means
"""
K = np.unique(self.labels_).shape[0] # number of clusters
n, d = data.shape
r = (K - 1) + (K * d)
if self.metric == 'euclidean':
r += 1 # one parameter for variance
elif self.metric == 'mahalanobis':
if self.covar_type == 'full' and self.covar_tied:
r += (d * (d + 1) * 0.5) # half of the elements (including diagonal) in the matrix
if self.covar_type == 'full' and not self.covar_tied:
r += (d * (d + 1) * 0.5 * K) # half of the elements (including diagonal) in the matrix
if self.covar_type == 'diag' and self.covar_tied:
r += d # diagonal elements of the matrix
if self.covar_type == 'diag' and not self.covar_tied:
r += (d * K) # diagonal elements of the matrix
if self.covar_type == 'spher' and self.covar_tied:
r += 1 # all diagonal elements are equal
if self.covar_type == 'spher' and not self.covar_tied:
r += K # all diagonal elements are equal
return r
def sim_target_supervised(target_data, target_labels, sigma, idx, target_params):
cur_labels = target_labels[idx]
N = cur_labels.shape[0]
N_labels = len(np.unique(cur_labels))
Gt, mask = np.zeros((N, N)), np.zeros((N, N))
for i in range(N):
for j in range(N):
if cur_labels[i] == cur_labels[j]:
Gt[i, j] = 0.8
mask[i, j] = 1
else:
Gt[i, j] = 0.1
mask[i, j] = 0.8 / (N_labels - 1)
return np.float32(Gt), np.float32(mask)
def get_Surface_Potentials(mtrue, survey, src, field_obj):
phi = field_obj['phi']
CCLoc = mesh.gridCC
XLoc = np.unique(mesh.gridCC[:, 0])
surfaceInd, zsurfaceLoc = get_Surface(mtrue, XLoc)
phiSurface = phi[surfaceInd]
phiScale = 0.
if(survey == "Pole-Dipole" or survey == "Pole-Pole"):
refInd = Utils.closestPoints(mesh, [xmax+60., 0.], gridLoc='CC')
# refPoint = CCLoc[refInd]
# refSurfaceInd = np.where(xSurface == refPoint[0])
# phiScale = np.median(phiSurface)
phiScale = phi[refInd]
phiSurface = phiSurface - phiScale
return XLoc, phiSurface, phiScale
def Plot_ChargesDensity(XYZ, sig0, sig1, R, E0, ax):
xr, yr, zr = np.unique(XYZ[:, 0]), np.unique(XYZ[:, 1]), np.unique(XYZ[:, 2])
xcirc = xr[np.abs(xr) <= R]
Et, Ep, Es = get_ElectricField(XYZ, sig0, sig1, R, E0)
rho = get_ChargesDensity(XYZ, sig0, sig1, R, Et, Ep)
ax.set_xlim([xr.min(), xr.max()])
ax.set_ylim([yr.min(), yr.max()])
ax.set_aspect('equal')
Cplot = ax.pcolor(xr, yr, rho.reshape(xr.size, yr.size))
cb1 = plt.colorbar(Cplot, ax=ax)
cb1.set_label(label= 'Charge Density ($C/m^2$)', size=ftsize_label) #weight='bold')
cb1.ax.tick_params(labelsize=ftsize_axis)
ax.plot(xcirc, np.sqrt(R**2-xcirc**2), '--k', xcirc, -np.sqrt(R**2-xcirc**2), '--k')
ax.set_ylabel('Y coordinate ($m$)', fontsize=ftsize_label)
ax.set_xlabel('X coordinate ($m$)', fontsize=ftsize_label)
ax.tick_params(labelsize=ftsize_axis)
ax.set_title('Charges Density', fontsize=ftsize_title)
return ax
def get_Surface_Potentials(mtrue, survey, src, field_obj):
phi = field_obj['phi']
CCLoc = mesh.gridCC
XLoc = np.unique(mesh.gridCC[:, 0])
surfaceInd, zsurfaceLoc = get_Surface(mtrue, XLoc)
phiSurface = phi[surfaceInd]
phiScale = 0.
if(survey == "Pole-Dipole" or survey == "Pole-Pole"):
refInd = Utils.closestPoints(mesh, [xmax+60., 0.], gridLoc='CC')
# refPoint = CCLoc[refInd]
# refSurfaceInd = np.where(xSurface == refPoint[0])
# phiScale = np.median(phiSurface)
phiScale = phi[refInd]
phiSurface = phiSurface - phiScale
return XLoc, phiSurface, phiScale
def unique(eq):
eq = eqsize(eq)
c1 = [None] * eq.shape
for i in range(0, eq.size):
c1.append[i] = hash(eq[i])
c1 = np.asarray(c1)
if c1.ndim == 1:
_, ia, ic = np.unique(c1, return_index=True, return_inverse=True)
ia = (ia[:, ]).conj().T
ic = (ic[:, ]).conj().T
u = eq[ia]
else:
a = c1
b = np.ascontiguousarray(a).view(
np.dtype((np.void, a.dtype.itemsize * a.shape[1])))
_, ia, ic = np.unique(b, return_index=True, return_inverse=True)
return u, ia, ic
def getTypeProblem (self, solution_filename):
''' Get the type of problem directly from the solution file (in case we do not have an info file)'''
if 'task' not in self.info.keys():
solution = np.array(data_converter.file_to_array(solution_filename))
target_num = solution.shape[1]
self.info['target_num']=target_num
if target_num == 1: # if we have only one column
solution = np.ravel(solution) # flatten
nbr_unique_values = len(np.unique(solution))
if nbr_unique_values < len(solution)/8:
# Classification
self.info['label_num'] = nbr_unique_values
if nbr_unique_values == 2:
self.info['task'] = 'binary.classification'
self.info['target_type'] = 'Binary'
else:
self.info['task'] = 'multiclass.classification'
self.info['target_type'] = 'Categorical'
else:
# Regression
self.info['label_num'] = 0
self.info['task'] = 'regression'
self.info['target_type'] = 'Numerical'
else:
# Multilabel or multiclass
self.info['label_num'] = target_num
self.info['target_type'] = 'Binary'
if any(item > 1 for item in map(np.sum,solution.astype(int))):
self.info['task'] = 'multilabel.classification'
else:
self.info['task'] = 'multiclass.classification'
return self.info['task']
def tiedrank(a):
''' Return the ranks (with base 1) of a list resolving ties by averaging.
This works for numpy arrays.'''
m=len(a)
# Sort a in ascending order (sa=sorted vals, i=indices)
i=a.argsort()
sa=a[i]
# Find unique values
uval=np.unique(a)
# Test whether there are ties
R=np.arange(m, dtype=float)+1 # Ranks with base 1
if len(uval)!=m:
# Average the ranks for the ties
oldval=sa[0]
newval=sa[0]
k0=0
for k in range(1,m):
newval=sa[k]
if newval==oldval:
# moving average
R[k0:k+1]=R[k-1]*(k-k0)/(k-k0+1)+R[k]/(k-k0+1)
else:
k0=k;
oldval=newval
# Invert the index
S=np.empty(m)
S[i]=R
return S
def binarization (array):
''' Takes a binary-class datafile and turn the max value (positive class) into 1 and the min into 0'''
array = np.array(array, dtype=float) # conversion needed to use np.inf after
if len(np.unique(array)) > 2:
raise ValueError ("The argument must be a binary-class datafile. {} classes detected".format(len(np.unique(array))))
# manipulation which aims at avoid error in data with for example classes '1' and '2'.
array[array == np.amax(array)] = np.inf
array[array == np.amin(array)] = 0
array[array == np.inf] = 1
return np.array(array, dtype=int)
def __init__(self, images, labels, fake_data=False):
if fake_data:
self._num_examples = 10000
else:
assert images.shape[0] == labels.shape[0], (
"images.shape: %s labels.shape: %s" % (images.shape,
labels.shape))
self._num_examples = images.shape[0]
# Convert shape from [num examples, rows, columns, depth]
# to [num examples, rows*columns] (assuming depth == 1)
self.imageShape = images.shape[1:]
self.imageChannels = self.imageShape[2]
images = images.reshape(images.shape[0],
images.shape[1] * images.shape[2] * images.shape[3])
# Convert from [0, 255] -> [0.0, 1.0].
images = images.astype(numpy.float32)
images = numpy.multiply(images, 1.0 / 255.0)
self._images = images
self._labels = labels
try:
if len(numpy.shape(self._labels)) == 1:
self._labels = dense_to_one_hot(self._labels,len(numpy.unique(self._labels)))
except:
traceback.print_exc()
self._epochs_completed = 0
self._index_in_epoch = 0
def __init__(self, images, labels, fake_data=False):
if fake_data:
self._num_examples = 10000
else:
assert images.shape[0] == labels.shape[0], (
"images.shape: %s labels.shape: %s" % (images.shape,
labels.shape))
self._num_examples = images.shape[0]
# Convert shape from [num examples, rows, columns, depth]
# to [num examples, rows*columns] (assuming depth == 1)
self.imageShape = images.shape[1:]
self.imageChannels = self.imageShape[2]
images = images.reshape(images.shape[0],
images.shape[1] * images.shape[2] * images.shape[3])
# Convert from [0, 255] -> [0.0, 1.0].
images = images.astype(numpy.float32)
images = numpy.multiply(images, 1.0 / 255.0)
self._images = images
self._labels = labels
try:
if len(numpy.shape(self._labels)) == 1:
self._labels = dense_to_one_hot(self._labels,len(numpy.unique(self._labels)))
except:
traceback.print_exc()
self._epochs_completed = 0
self._index_in_epoch = 0
def cluster_service(path, service, cluster_size, prev_metadata=None):
filename = os.path.join(path, service["preprocessed_filename"])
df = pd.read_csv(filename, sep="\t", index_col='time', parse_dates=True)
initial_idx = None
if prev_metadata:
initial_idx = get_initial_clustering(service["name"], prev_metadata, df.columns)
# adjust cluster_size if an initial assigment has been found
if initial_idx is not None:
cluster_size = len(np.unique(initial_idx))
prefix = "%s/%s-cluster-%d" % (path, service["name"], cluster_size)
if os.path.exists(prefix + "_1.png"):
print("skip " + prefix)
return (None, None)
cluster_metrics, score, filenames = do_kshape(prefix, df, cluster_size, initial_idx)
if cluster_size < 2:
# no silhouette_score for cluster size 1
return (None, None)
print("silhouette_score: %f" % score)
# protect the write access to the metadata file
metadata_lock.acquire()
with metadata.update(path) as data:
for srv in data["services"]:
if srv["name"] == service["name"]:
if "clusters" not in srv:
srv["clusters"] = {}
d = dict(silhouette_score=score, filenames=filenames, metrics=cluster_metrics)
srv["clusters"][cluster_size] = d
metadata_lock.release()
return (service["name"], cluster_size)
def view_waveforms_clusters(data, halo, threshold, templates, amps_lim, n_curves=200, save=False):
nb_templates = templates.shape[1]
n_panels = numpy.ceil(numpy.sqrt(nb_templates))
mask = numpy.where(halo > -1)[0]
clust_idx = numpy.unique(halo[mask])
fig = pylab.figure()
square = True
center = len(data[0] - 1)//2
for count, i in enumerate(xrange(nb_templates)):
if square:
pylab.subplot(n_panels, n_panels, count + 1)
if (numpy.mod(count, n_panels) != 0):
pylab.setp(pylab.gca(), yticks=[])
if (count < n_panels*(n_panels - 1)):
pylab.setp(pylab.gca(), xticks=[])
subcurves = numpy.where(halo == clust_idx[count])[0]
for k in numpy.random.permutation(subcurves)[:n_curves]:
pylab.plot(data[k], '0.5')
pylab.plot(templates[:, count], 'r')
pylab.plot(amps_lim[count][0]*templates[:, count], 'b', alpha=0.5)
pylab.plot(amps_lim[count][1]*templates[:, count], 'b', alpha=0.5)
xmin, xmax = pylab.xlim()
pylab.plot([xmin, xmax], [-threshold, -threshold], 'k--')
pylab.plot([xmin, xmax], [threshold, threshold], 'k--')
#pylab.ylim(-1.5*threshold, 1.5*threshold)
ymin, ymax = pylab.ylim()
pylab.plot([center, center], [ymin, ymax], 'k--')
pylab.title('Cluster %d' %i)
if nb_templates > 0:
pylab.tight_layout()
if save:
pylab.savefig(os.path.join(save[0], 'waveforms_%s' %save[1]))
pylab.close()
else:
pylab.show()
del fig
def check_consistent_length(*arrays):
"""Check that all arrays have consistent first dimensions.
Checks whether all objects in arrays have the same shape or length.
Parameters
----------
*arrays : list or tuple of input objects.
Objects that will be checked for consistent length.
"""
uniques = np.unique([_num_samples(X) for X in arrays if X is not None])
if len(uniques) > 1:
raise ValueError("Found arrays with inconsistent numbers of samples: "
"%s" % str(uniques))
def transform(self, img, lbl):
"""transform
:param img:
:param lbl:
"""
img = img[:, :, ::-1]
img = img.astype(np.float64)
img -= self.mean
img = m.imresize(img, (self.img_size[0], self.img_size[1]))
# Resize scales images from 0 to 255, thus we need
# to divide by 255.0
img = img.astype(float) / 255.0
# NHWC -> NCWH
img = img.transpose(2, 0, 1)
classes = np.unique(lbl)
lbl = lbl.astype(float)
lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
lbl = lbl.astype(int)
if not np.all(classes == np.unique(lbl)):
print("WARN: resizing labels yielded fewer classes")
if not np.all(np.unique(lbl) < self.n_classes):
raise ValueError("Segmentation map contained invalid class values")
img = torch.from_numpy(img).float()
lbl = torch.from_numpy(lbl).long()
return img, lbl
def transform(self, img, lbl):
"""transform
:param img:
:param lbl:
"""
img = img[:, :, ::-1]
img = img.astype(np.float64)
img -= self.mean
img = m.imresize(img, (self.img_size[0], self.img_size[1]))
# Resize scales images from 0 to 255, thus we need
# to divide by 255.0
img = img.astype(float) / 255.0
# NHWC -> NCWH
img = img.transpose(2, 0, 1)
classes = np.unique(lbl)
lbl = lbl.astype(float)
lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
lbl = lbl.astype(int)
if not np.all(classes == np.unique(lbl)):
print("WARN: resizing labels yielded fewer classes")
if not np.all(np.unique(lbl) < self.n_classes):
raise ValueError("Segmentation map contained invalid class values")
img = torch.from_numpy(img).float()
lbl = torch.from_numpy(lbl).long()
return img, lbl
def fit(self, X, C, y, regions, kernelType, reml=True, maxiter=100):
#construct a list of kernel names (one for each region)
if (kernelType == 'adapt'): kernelNames = self.buildKernelAdapt(X, C, y, regions, reml, maxiter)
else: kernelNames = [kernelType] * len(regions)
#perform optimization
kernelObj, hyp_kernels, sig2e, fixedEffects = self.optimize(X, C, y, kernelNames, regions, reml, maxiter)
#compute posterior distribution
Ktraintrain = kernelObj.getTrainKernel(hyp_kernels)
post = self.infExact_scipy_post(Ktraintrain, C, y, sig2e, fixedEffects)
#fix intercept if phenotype is binary
if (len(np.unique(y)) == 2):
controls = (y<y.mean())
cases = ~controls
meanVec = C.dot(fixedEffects)
mu, var = self.getPosteriorMeanAndVar(np.diag(Ktraintrain), Ktraintrain, post, meanVec)
fixedEffects[0] -= optimize.minimize_scalar(self.getNegLL, args=(mu, np.sqrt(sig2e+var), controls, cases), method='brent').x
#construct trainObj
trainObj = dict([])
trainObj['sig2e'] = sig2e
trainObj['hyp_kernels'] = hyp_kernels
trainObj['fixedEffects'] = fixedEffects
trainObj['kernelNames'] = kernelNames
return trainObj
def load_scan(path):
slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
#slices.sort(key = lambda x: int(x.InstanceNumber))
acquisitions = [x.AcquisitionNumber for x in slices]
vals, counts = np.unique(acquisitions, return_counts=True)
vals = vals[::-1] # reverse order so the later acquisitions are first (the np.uniques seems to always return the ordered 1 2 etc.
counts = counts[::-1]
## take the acquistions that has more entries; if these are identical take the later entrye
acq_val_sel = vals[np.argmax(counts)]
##acquisitions = sorted(np.unique(acquisitions), reverse=True)
if len(vals) > 1:
print ("WARNING ##########: MULTIPLE acquisitions & counts, acq_val_sel, path: ", vals, counts, acq_val_sel, path)
slices2= [x for x in slices if x.AcquisitionNumber == acq_val_sel]
slices = slices2
## ONE path includes 2 acquisitions (2 sets), take the latter acquiisiton only whihch cyupically is better than the first/previous ones.
## example of the '../input/stage1/b8bb02d229361a623a4dc57aa0e5c485'
#slices.sort(key = lambda x: int(x.ImagePositionPatient[2])) # from v 8, BUG should be float
slices.sort(key = lambda x: float(x.ImagePositionPatient[2])) # from v 9
try:
slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
except:
slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
for s in slices:
s.SliceThickness = slice_thickness
return slices
def largest_label_volume(im, bg=-1):
vals, counts = np.unique(im, return_counts=True)
counts = counts[vals != bg]
vals = vals[vals != bg]
if len(counts) > 0:
return vals[np.argmax(counts)]
else:
return None
#image=sample_image
def get_chunks_by_gem_group(self):
""" Return exactly one chunk per gem group."""
gem_group_arr = self.get_column('gem_group')
# verify gem groups are sorted
assert np.all(np.diff(gem_group_arr)>=0)
unique_ggs = np.unique(gem_group_arr)
gg_key = lambda i: gem_group_arr[i]
chunk_iter = self.get_chunks_from_partition(unique_ggs, gg_key)
for (gg, chunk) in zip(unique_ggs, chunk_iter):
yield (gg, chunk[0], chunk[1])
def compute_readpairs_per_umi_threshold(reads, subsample_rate):
''' Compute a threshold above which the UMIs are unlikely to be PCR off-products.
reads (np.array(int)) - Read pairs for each UMI
subsample_rate (float) - Subsample reads to this fraction.
Returns threshold (int) - The RPPU threshold in the subsampled space '''
if len(np.unique(reads)) < 2:
print 'Skipping RPPU threshold calculation.'
return 1
print 'RPPU subsample rate: %0.4f' % subsample_rate
reads = np.random.binomial(reads, subsample_rate)
reads = reads[reads > 0]
if len(np.unique(reads)) < 2:
print 'Subsampling gave a degenerate distribution of RPPU. Skipping RPPU threshold calculation.'
return 1
new_n50 = tk_stats.NX(reads, 0.5)
print 'New N50: %d:' % new_n50
# Log-transform counts
log_reads = np.log(reads)
# Run K-Means. Reshape necessary because kmeans takes a matrix.
kmeans = sk_cluster.KMeans(2).fit(log_reads.reshape((-1,1)))
kmeans.predict(log_reads.reshape((-1,1)))
# Take the cluster with the smallest mean
min_cluster = np.argsort(np.ravel(kmeans.cluster_centers_))[0]
print 'RPPU component means: ' + str(list(iter(np.exp(kmeans.cluster_centers_))))
print 'RPPU component members: ' + str(np.bincount(kmeans.labels_))
# Take the max element in the min-cluster
threshold = np.max(reads[kmeans.labels_ == min_cluster])
return threshold
def append_data_column(ds, column):
# Extend the dataset to fit the new data
new_count = column.shape[0]
existing_count = ds.shape[0]
ds.resize((existing_count + new_count,))
levels = get_levels(ds)
if levels is not None:
# update levels if we have new unique values
if type(column.values) == p.Categorical:
added_levels = set(column.values.categories) - set(levels)
elif len(column) == 0:
# Workaround for bug in pandas - get a crash in .unique() for an empty series
added_levels = set([])
else:
added_levels = set(column.unique()) - set(levels)
new_levels = list(levels)
new_levels.extend(added_levels)
# Check if the new categorical column has more levels
# than the current bit width supports.
# If so, rewrite the existing column data w/ more bits
if len(new_levels) > np.iinfo(ds.dtype).max:
new_dtype = pick_cat_dtype(len(new_levels))
ds = widen_cat_column(ds, new_dtype)
new_levels = np.array(new_levels, dtype=np.object)
new_data = make_index_array(new_levels, column.values, ds.dtype)
clear_levels(ds)
create_levels(ds, new_levels)
else:
new_data = column
# Append new data
ds[existing_count:(existing_count + new_count)] = new_data
def _label2rgb_avg(label_field, image, bg_label=0, bg_color=(0, 0, 0)):
"""Visualise each segment in `label_field` with its mean color in `image`.
Parameters
----------
label_field : array of int
A segmentation of an image.
image : array, shape ``label_field.shape + (3,)``
A color image of the same spatial shape as `label_field`.
bg_label : int, optional
A value in `label_field` to be treated as background.
bg_color : 3-tuple of int, optional
The color for the background label
Returns
-------
out : array, same shape and type as `image`
The output visualization.
"""
out = np.zeros_like(image)
labels = np.unique(label_field)
bg = (labels == bg_label)
if bg.any():
labels = labels[labels != bg_label]
out[bg] = bg_color
for label in labels:
mask = (label_field == label).nonzero()
color = image[mask].mean(axis=0)
out[mask] = color
return out