Python numpy 模块,int() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.int()。
def pad_batch(mini_batch):
mini_batch_size = len(mini_batch)
# print mini_batch.shape
# print mini_batch
max_sent_len1 = int(np.max([len(x[0]) for x in mini_batch]))
max_sent_len2 = int(np.max([len(x[1]) for x in mini_batch]))
# print max_sent_len1, max_sent_len2
# max_token_len = int(np.mean([len(val) for sublist in mini_batch for val in sublist]))
main_matrix1 = np.zeros((mini_batch_size, max_sent_len1), dtype= np.int)
main_matrix2 = np.zeros((mini_batch_size, max_sent_len2), dtype= np.int)
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[0]):
try:
main_matrix1[i,j] = j
except IndexError:
pass
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[1]):
try:
main_matrix2[i,j] = j
except IndexError:
pass
main_matrix1_t = Variable(torch.from_numpy(main_matrix1))
main_matrix2_t = Variable(torch.from_numpy(main_matrix2))
# print main_matrix1_t.size()
# print main_matrix2_t.size()
return [main_matrix1_t, main_matrix2_t]
# return [Variable(torch.cat((main_matrix1_t, main_matrix2_t), 0))
# def pad_batch(mini_batch):
# # print mini_batch
# # print type(mini_batch)
# # print mini_batch.shape
# # for i, _ in enumerate(mini_batch):
# # print i, _
# return [Variable(torch.from_numpy(np.asarray(_))) for _ in mini_batch[0]]
def getTrainTestKernel(self, params, Xtest):
self.checkParams(params)
params_kernels = params[len(self.kernels):]
#compute Kd and EE
Kd = np.zeros((self.n, Xtest[0].shape[0], len(self.kernels)))
params_ind = 0
kernel_paramsArr = params[len(self.kernels):]
for k_i, k in enumerate(self.kernels):
numHyp = k.getNumParams()
kernelParams_range = np.array(xrange(params_ind, params_ind+numHyp), dtype=np.int)
kernel_params = kernel_paramsArr[kernelParams_range]
Kd[:,:,k_i] = k.getTrainTestKernel(kernel_params, Xtest[k_i])
params_ind += numHyp
EE = elsympol(Kd, len(self.kernels))
#compute K
K=0
for i in xrange(len(self.kernels)): K += np.exp(2*params[i]) * EE[:,:,i+1]
return K
def getTestKernelDiag(self, params, Xtest):
self.checkParams(params)
params_kernels = params[len(self.kernels):]
#compute Kd and EE
Kd = np.zeros((Xtest[0].shape[0], 1, len(self.kernels)))
params_ind = 0
kernel_paramsArr = params[len(self.kernels):]
for k_i, k in enumerate(self.kernels):
numHyp = k.getNumParams()
kernelParams_range = np.array(xrange(params_ind, params_ind+numHyp), dtype=np.int)
kernel_params = kernel_paramsArr[kernelParams_range]
Kd[:,0,k_i] = k.getTestKernelDiag(kernel_params, Xtest[k_i])
params_ind += numHyp
EE = elsympol(Kd, len(self.kernels))
#compute K
K=0
for i in xrange(len(self.kernels)): K += np.exp(2*params[i]) * EE[:,:,i+1]
return K
def Kdim(self, kdimParams):
if (self.prevKdimParams is not None and np.max(np.abs(kdimParams-self.prevKdimParams)) < self.epsilon): return self.cache['Kdim']
K = np.zeros((self.n, self.n, len(self.kernels)))
params_ind = 0
for k_i, k in enumerate(self.kernels):
numHyp = k.getNumParams()
kernelParams_range = np.array(xrange(params_ind, params_ind+numHyp), dtype=np.int)
kernel_params = kdimParams[kernelParams_range]
if ((numHyp == 0 and 'Kdim' in self.cache) or (numHyp>0 and self.prevKdimParams is not None and np.max(np.abs(kernel_params-self.prevKdimParams[kernelParams_range])) < self.epsilon)):
K[:,:,k_i] = self.cache['Kdim'][:,:,k_i]
else:
K[:,:,k_i] = k.getTrainKernel(kernel_params)
params_ind += numHyp
self.prevKdimParams = kdimParams.copy()
self.cache['Kdim'] = K
return K
def get_depth_info_json(info):
fixed_info = {int(x): y for (x, y) in info.iteritems()}
total_depth_counts = sum(fixed_info.values())
median_depth = None
sorted_depths = sorted(fixed_info.keys())
seen_depth_count = 0
mean_depth = 0.0
for depth in sorted_depths:
seen_depth_count += fixed_info[depth]
mean_depth += float(depth*fixed_info[depth])/float(total_depth_counts)
if seen_depth_count > total_depth_counts/2 and median_depth is None:
median_depth = depth
zero_cov_fract = tk_stats.robust_divide(float(fixed_info.get(0, 0.0)), float(total_depth_counts))
return (mean_depth, median_depth, zero_cov_fract)
def frame_to_json(bboxes, targets):
"""
{'polygon': [{'x': [1,2,3], 'y': [2,3,4], 'object': 3}]}
Also decorated (see decorate_frame with pretty_names, polygons, targets)
"""
assert(len(bboxes) == len(targets))
if len(bboxes):
bb = bboxes.astype(np.int32)
return {'polygon':
[{'x': [int(b[0]), int(b[0]), int(b[2]), int(b[2])],
'y': [int(b[1]), int(b[3]), int(b[3]), int(b[1])],
'object': int(object_id)} \
for object_id, b in zip(targets, bb)]}
else:
return {}
def plot_confusion_matrix(cm, clf_target_names, title='Confusion matrix', cmap=plt.cm.jet):
target_names = map(lambda key: key.replace('_','-'), clf_target_names)
for idx in range(len(cm)):
cm[idx,:] = (cm[idx,:] * 100.0 / np.sum(cm[idx,:])).astype(np.int)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
# plt.matshow(cm)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(clf_target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
# plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
def load_ROI_mask(self):
proxy = nib.load(self.FLAIR_FILE)
image_array = np.asarray(proxy.dataobj)
mask = np.ones_like(image_array)
mask[np.where(image_array < 90)] = 0
# img = nib.Nifti1Image(mask, proxy.affine)
# nib.save(img, join(modalities_path,'mask.nii.gz'))
struct_element_size = (20, 20, 20)
mask_augmented = np.pad(mask, [(21, 21), (21, 21), (21, 21)], 'constant', constant_values=(0, 0))
mask_augmented = binary_closing(mask_augmented, structure=np.ones(struct_element_size, dtype=bool)).astype(
np.int)
return mask_augmented[21:-21, 21:-21, 21:-21].astype('bool')
def __draw_pk2(self):
self.__cleanPk2()
if self.units is not None:
unique_units = np.unique(self.units)
unique_units = unique_units.tolist()
pca_1,pca_2 = self.PCAusedList.currentText().split("-")
pca_1 = np.int(pca_1)-1
pca_2 = np.int(pca_2)-1
if self.wavePCAs[0].shape[0]>2:
xs = self.wavePCAs[:,pca_1]
ys = self.wavePCAs[:,pca_2]
self.PcaScatterItem = []
seg_num = 5000
for i,ite_unit in enumerate(unique_units):
mask = self.units==ite_unit
temp_xs = xs[mask]
temp_ys = ys[mask]
segs = int(ceil(temp_xs.shape[0]/float(seg_num)))
for j in range(segs):
temp_xs_j = temp_xs[j*seg_num:(j+1)*seg_num]
temp_ys_j = temp_ys[j*seg_num:(j+1)*seg_num]
self.PcaScatterItem.append(pg.ScatterPlotItem(temp_xs_j,temp_ys_j,pen=self.colors[ite_unit],brush=self.colors[ite_unit],size=3,symbol="o"))
for i in range(len(self.PcaScatterItem)):
self.pk2.addItem(self.PcaScatterItem[i])
def __unitsNumChanged(self):
if hasattr(self,"auto_result"):
if self.autoSortThisCheck.isChecked():
self.saveChannelCheck.setChecked(False)
self.chnResultPool.pop(self.selectChan,None)
self.units = self.auto_result.copy()
self.units[self.units>int(self.unitsNumWgt.currentText())] = 0
self.__draw_pk3()
self.__update_pk3_roi()
self.__draw_pk2()
if self.pca_3d is True:
self.__draw_3D_PCA()
# The SpikeSorting class use this class to draw multiple lines quickly in a memory-efficient way.
def __read_condition(self):
'''
Read the parameter values for a single stimulus condition.
Returns nothing.
'''
# float32 -- SpikeTrain length in ms
self.__t_stop = np.fromfile(self._fsrc, dtype=np.float32, count=1)[0]
# float32 -- number of stimulus parameters
numelements = int(np.fromfile(self._fsrc, dtype=np.float32,
count=1)[0])
# [float32] * numelements -- stimulus parameter values
paramvals = np.fromfile(self._fsrc, dtype=np.float32,
count=numelements).tolist()
# organize the parameers into a dictionary with arbitrary names
paramnames = ['Param%s' % i for i in range(len(paramvals))]
self.__params = dict(zip(paramnames, paramvals))
def __draw_pk2(self):
self.__cleanPk2()
if self.units is not None:
unique_units = np.unique(self.units)
unique_units = unique_units.tolist()
pca_1,pca_2 = self.PCAusedList.currentText().split("-")
pca_1 = np.int(pca_1)-1
pca_2 = np.int(pca_2)-1
if self.wavePCAs[0].shape[0]>2:
xs = self.wavePCAs[:,pca_1]
ys = self.wavePCAs[:,pca_2]
self.PcaScatterItem = []
seg_num = 5000
for i,ite_unit in enumerate(unique_units):
mask = self.units==ite_unit
temp_xs = xs[mask]
temp_ys = ys[mask]
segs = int(ceil(temp_xs.shape[0]/float(seg_num)))
for j in range(segs):
temp_xs_j = temp_xs[j*seg_num:(j+1)*seg_num]
temp_ys_j = temp_ys[j*seg_num:(j+1)*seg_num]
self.PcaScatterItem.append(pg.ScatterPlotItem(temp_xs_j,temp_ys_j,pen=self.colors[ite_unit],brush=self.colors[ite_unit],size=3,symbol="o"))
for i in range(len(self.PcaScatterItem)):
self.pk2.addItem(self.PcaScatterItem[i])
def __unitsNumChanged(self):
if hasattr(self,"auto_result"):
if self.autoSortThisCheck.isChecked():
self.saveChannelCheck.setChecked(False)
self.chnResultPool.pop(self.selectChan,None)
self.units = self.auto_result.copy()
self.units[self.units>int(self.unitsNumWgt.currentText())] = 0
self.__draw_pk3()
self.__update_pk3_roi()
self.__draw_pk2()
if self.pca_3d is True:
self.__draw_3D_PCA()
# The SpikeSorting class use this class to draw multiple lines quickly in a memory-efficient way.
def _load_spike_times(self, fetfilename):
"""Reads and returns the spike times and features"""
f = file(fetfilename, 'r')
# Number of clustering features is integer on first line
nbFeatures = int(f.readline().strip())
# Each subsequent line consists of nbFeatures values, followed by
# the spike time in samples.
names = ['fet%d' % n for n in xrange(nbFeatures)]
names.append('spike_time')
# Load into recarray
data = mlab.csv2rec(f, names=names, skiprows=1, delimiter=' ')
f.close()
# get features
features = np.array([data['fet%d' % n] for n in xrange(nbFeatures)])
# Return the spike_time column
return data['spike_time'], features.transpose()
def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
ignored = 0
features, labels, name = np.empty((0,161)), np.empty(0), np.empty(0)
for label, sub_dir in enumerate(sub_dirs):
print sub_dir
for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
try:
mfccs, chroma, mel, contrast, tonnetz = extract_features(fn)
ext_features = np.hstack([mfccs, chroma, mel, contrast, tonnetz])
features = np.vstack([features,ext_features])
l = [fn.split('-')[1]] * (mfccs.shape[0])
labels = np.append(labels, l)
except (KeyboardInterrupt, SystemExit):
raise
except:
ignored += 1
print "Ignored files: ", ignored
return np.array(features), np.array(labels, dtype = np.int)
def shorten_motifs( contig_motifs, highscore_motifs ):
"""
Keep only the shortest, most concise version of the high scoring
motifs (reduces redundancy).
"""
keeper_motifs = set(highscore_motifs.keys())
if len(highscore_motifs)>0:
shortest_contiguous = min([len(m.split("-")[0]) for m in highscore_motifs.keys()])
# (1) Sort by keys; shortest motif to longest
motifs_s = sorted(highscore_motifs, key=len)
# (2) For each motif, check if it's contained in a longer version of other motifs
for m in motifs_s:
motif_str = m.split("-")[0]
motif_idx = int(m.split("-")[1])
for remaining in list(keeper_motifs):
remaining_str = remaining.split("-")[0]
remaining_idx = int(remaining.split("-")[1])
match = re.search(motif_str, remaining_str)
if match != None and (motif_idx + match.start()) == remaining_idx and len(remaining_str) > len(motif_str):
# 3. If True, remove the longer version
keeper_motifs.remove(remaining)
return keeper_motifs
def spontaneousnode_count(
infecting_vec,
infected_vec,
node_vec,
D,
):
'''
Returns a vector with count of spontanous infections for nodes.
Arguments:
infecting_vec - vector of infecting event ids
infected_vec - vector of event ids
node_vec - vector of infected node ids
D - number of nodes
'''
spontaneous_nodes = node_vec[infecting_vec == infected_vec]
updates = np.zeros((D, 1))
for node in spontaneous_nodes:
updates[int(node)] += 1
return updates
def spontaneousmeme_count(
infecting_vec,
infected_vec,
eventmemes,
M,
):
'''
Returns a vector with count of spontanous infections for memes.
Arguments:
infecting_vec - vector of infecting event ids
infected_vec - vector of event ids
eventmemes - vector of meme ids
M - number of memes
'''
spontaneous_memes = eventmemes[infecting_vec == infected_vec]
updates = np.zeros((M, 1))
for meme in spontaneous_memes:
updates[int(meme)] += 1
return updates
def infecting_node(infected_vec, infecting_vec, node_vec):
'''
Returns a vector of nodes of infecting events.
Arguments:
infecting_vec - vector of infecting event ids
infected_vec - vector of event ids
node_vec - vector of infected node ids
'''
infecting_node_vec = []
eventid_to_node = {}
for (evid, inf_evid, nodeid) in izip(infected_vec, infecting_vec,
node_vec):
eventid_to_node[int(evid)] = nodeid
infecting_node_vec.append(eventid_to_node[int(inf_evid)])
infecting_node_vec = np.array(infecting_node_vec).flatten()
return (infecting_node_vec, eventid_to_node)
def parse_args():
"""
Parse input arguments
"""
parser = argparse.ArgumentParser(description='Train SVMs (old skool)')
parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
default=0, type=int)
parser.add_argument('--def', dest='prototxt',
help='prototxt file defining the network',
default=None, type=str)
parser.add_argument('--net', dest='caffemodel',
help='model to test',
default=None, type=str)
parser.add_argument('--cfg', dest='cfg_file',
help='optional config file', default=None, type=str)
parser.add_argument('--imdb', dest='imdb_name',
help='dataset to train on',
default='voc_2007_trainval', type=str)
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
return args
def load_Y(X_signals_paths):
"""
Given attribute (train or test) of feature, read all 9 features into an
np ndarray of shape [sample_sequence_idx, time_step, feature_num]
argument: X_signals_paths str attribute of feature: 'train' or 'test'
return: np ndarray, tensor of features
"""
X_signals = []
for signal_type_path in X_signals_paths:
file = open(signal_type_path, 'rb')
# Read dataset from disk, dealing with text files' syntax
X_signals.append(
[np.array(serie, dtype=np.int) for serie in [
row.strip().split(' ') for row in file
]]
)
file.close()
return np.concatenate((X_signals[0],X_signals[1],X_signals[2],X_signals[3],X_signals[4],X_signals[5],X_signals[6]),axis=0)
def plot2D(data, title=None, viz_type=None, fs=None, line_names=None):
"""Visualize 2D data using plotly.
Parameters
----------
data : array_like
Data to be plotted, separated along the first dimension (rows).
title : string
Add title to be displayed on plot
type : string{None, 'time', 'linFFT', 'logFFT'}
Type of data to be displayed. [Default: None]
fs : int
Sampling rate in Hz. [Default: 44100]
"""
layout = layout_2D(viz_type, title)
traces = prepare_2D_traces(data, viz_type, fs, line_names=line_names)
showTrace(traces, layout=layout, title=title)
def sph_harm_all(nMax, az, el, type='complex'):
'''Compute all sphercial harmonic coefficients up to degree nMax.
Parameters
----------
nMax : (int)
Maximum degree of coefficients to be returned. n >= 0
az: (float), array_like
Azimuthal (longitudinal) coordinate [0, 2pi], also called Theta.
el : (float), array_like
Elevation (colatitudinal) coordinate [0, pi], also called Phi.
Returns
-------
y_mn : (complex float), array_like
Complex spherical harmonics of degrees n [0 ... nMax] and all corresponding
orders m [-n ... n], sampled at [az, el]. dim1 corresponds to az/el pairs,
dim2 to oder/degree (m, n) pairs like 0/0, -1/1, 0/1, 1/1, -2/2, -1/2 ...
'''
m, n = mnArrays(nMax)
mA, azA = _np.meshgrid(m, az)
nA, elA = _np.meshgrid(n, el)
return sph_harm(mA, nA, azA, elA, type=type)
def kr_full_spec(fs, radius, NFFT, temperature=20):
"""Returns full spectrum kr
Parameters
----------
fs : int
Sampling rate in Hertz
radius : float
Radius
NFFT : int
Number of frequency bins
temperature : float, optional
Temperature in degree Celcius (Default: 20 C)
Returns
-------
kr : array_like
kr vector of length NFFT/2 + 1 spanning the frequencies of 0:fs/2
"""
freqs = _np.linspace(0, fs / 2, NFFT / 2 + 1)
return kr(freqs, radius, temperature)
# DEBUG
def whiteNoise(fftData, noiseLevel=80):
'''Adds White Gaussian Noise of approx. 16dB crest to a FFT block.
Parameters
----------
fftData : array of complex floats
Input fftData block (e.g. from F/D/T or S/W/G)
noiseLevel : int, optional
Average noise Level in dB [Default: -80dB]
Returns
-------
noisyData : array of complex floats
Output fftData block including white gaussian noise
'''
dimFactor = 10**(noiseLevel / 20)
fftData = _np.atleast_2d(fftData)
channels = fftData.shape[0]
NFFT = fftData.shape[1] * 2 - 2
nNoise = _np.random.rand(channels, NFFT)
nNoise = dimFactor * nNoise / _np.mean(_np.abs(nNoise))
nNoiseSpectrum = _np.fft.rfft(nNoise, axis=1)
return fftData + nNoiseSpectrum
def radial_filter_fullspec(max_order, NFFT, fs, array_configuration, amp_maxdB=40):
"""Generate NFFT/2 + 1 modal radial filter of orders 0:max_order for frequencies 0:fs/2, wraps radial_filter()
Parameters
----------
max_order : int
Maximum order
NFFT : int
Order of FFT (number of bins), should be a power of 2.
fs : int
Sampling frequency
array_configuration : ArrayConfiguration
List/Tuple/ArrayConfiguration, see io.ArrayConfiguration
amp_maxdB : int, optional
Maximum modal amplification limit in dB [Default: 40]
Returns
-------
dn : array_like
Vector of modal frequency domain filter of shape [max_order + 1 x NFFT / 2 + 1]
"""
freqs = _np.linspace(0, fs / 2, NFFT / 2 + 1)
orders = _np.r_[0:max_order + 1]
return radial_filter(orders, freqs, array_configuration, amp_maxdB=amp_maxdB)
def spherical_noise(gridData=None, order_max=8, spherical_harmonic_bases=None):
''' Returns order-limited random weights on a spherical surface
Parameters
----------
gridData : io.SphericalGrid
SphericalGrid containing azimuth and colatitude
order_max : int, optional
Spherical order limit [Default: 8]
Returns
-------
noisy_weights : array_like, complex
Noisy weigths
'''
if spherical_harmonic_bases is None:
if gridData is None:
raise TypeError('Either a grid or the spherical harmonic bases have to be provided.')
gridData = SphericalGrid(*gridData)
spherical_harmonic_bases = sph_harm_all(order_max, gridData.azimuth, gridData.colatitude)
else:
order_max = _np.int(_np.sqrt(spherical_harmonic_bases.shape[1]) - 1)
return _np.inner(spherical_harmonic_bases, _np.random.randn((order_max + 1) ** 2) + 1j * _np.random.randn((order_max + 1) ** 2))
def place_graph(graphs, half_block_width, img, i, j, x, y):
"""
Rendering neuron block
:param graphs : Neuron graphs
:param half_block_width : int(neuron_block_width / 2)
:param img : Canvas
:param i : i-th hidden layer
:param j : j-th neuron in i-th hidden layer
:param x : (x, y) is the center of the neuron graph on the canvas
:param y : (x, y) is the center of the neuron graph on the canvas
:return : None
"""
############################################################
# Write your code here! #
############################################################
pass
############################################################
# End #
############################################################
def place_graph(graphs, half_block_width, img, i, j, x, y):
"""
Render neuron graph
:param graphs : Neuron graphs
:param half_block_width : int(neuron_graph_width / 2)
:param img : Canvas
:param i : i-th hidden layer
:param j : j-th neuron in i-th hidden layer
:param x : (x, y) is the center of the neuron graph on the canvas
:param y : (x, y) is the center of the neuron graph on the canvas
"""
############################################################
# Write your code here! #
############################################################
graph = graphs[i][j]
img[y - half_block_width:y + half_block_width, x - half_block_width:x + half_block_width] = graph
############################################################
# End #
############################################################
def construct_train_valid_sets(self, data, p=1/3.):
''' Construct train/valid sets from a dataset, by selecting p% of samples as valid, and the rest for train.mat
data: tuple (X,Y)
p: real ]0,1[, the pourcentage of samples to take as validation.
The samples will be selected randomly.
'''
x = data[0]
y = data[1]
nbr = x.shape[0]
nbr_vald = int(nbr * p)
index = np.arange(nbr)
# shuffle the index
np.random.shuffle(index)
idx_valid = index[:nbr_vald]
idx_train = index[nbr_vald:]
x_train = x[idx_train]
y_train = y[idx_train]
x_valid = x[idx_valid]
y_valid = y[idx_valid]
return [(x_train, y_train), (x_valid, y_valid)]
def project_shapes(self, set_y, bbox, window):
"""Project shapes from original plane to a new one [-1,+1].
"""
nbr = set_y.shape[0]
nfids = int(set_y.shape[1]/2)
std_w= window[0]
std_h = window[1]
for i in xrange(nbr):
XY=set_y[i,:]
X = XY[:nfids]
Y = XY[nfids:]
bb = bbox[i,:]
center_x = bb[0] + bb[2]/2 # x_up + w/2
center_y = bb[1] + bb[3]/2 # y_up + h/2
X = (X - center_x) / (std_w/2)
Y = (Y - center_y) / (std_h /2)
XY=np.concatenate((X, Y))
set_y[i] = XY
return set_y
def GenCR(MCMCPar,pCR):
if type(pCR) is np.ndarray:
p=np.ndarray.tolist(pCR)[0]
else:
p=pCR
CR=np.zeros((MCMCPar.seq * MCMCPar.steps),dtype=np.float)
L = np.random.multinomial(MCMCPar.seq * MCMCPar.steps, p, size=1)
L2 = np.concatenate((np.zeros((1),dtype=np.int), np.cumsum(L)),axis=0)
r = np.random.permutation(MCMCPar.seq * MCMCPar.steps)
for zz in range(0,MCMCPar.nCR):
i_start = L2[zz]
i_end = L2[zz+1]
idx = r[i_start:i_end]
CR[idx] = np.float(zz+1)/MCMCPar.nCR
CR = np.reshape(CR,(MCMCPar.seq,MCMCPar.steps))
return CR, L
def parse_args():
"""
Parse input arguments
"""
parser = argparse.ArgumentParser(description='Train SVMs (old skool)')
parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
default=0, type=int)
parser.add_argument('--def', dest='prototxt',
help='prototxt file defining the network',
default=None, type=str)
parser.add_argument('--net', dest='caffemodel',
help='model to test',
default=None, type=str)
parser.add_argument('--cfg', dest='cfg_file',
help='optional config file', default=None, type=str)
parser.add_argument('--imdb', dest='imdb_name',
help='dataset to train on',
default='voc_2007_trainval', type=str)
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
return args
def parse_args():
"""
Parse input arguments
"""
parser = argparse.ArgumentParser(description='Train SVMs (old skool)')
parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
default=0, type=int)
parser.add_argument('--def', dest='prototxt',
help='prototxt file defining the network',
default=None, type=str)
parser.add_argument('--net', dest='caffemodel',
help='model to test',
default=None, type=str)
parser.add_argument('--cfg', dest='cfg_file',
help='optional config file', default=None, type=str)
parser.add_argument('--imdb', dest='imdb_name',
help='dataset to train on',
default='voc_2007_trainval', type=str)
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
return args
def binarization (array):
''' Takes a binary-class datafile and turn the max value (positive class) into 1 and the min into 0'''
array = np.array(array, dtype=float) # conversion needed to use np.inf after
if len(np.unique(array)) > 2:
raise ValueError ("The argument must be a binary-class datafile. {} classes detected".format(len(np.unique(array))))
# manipulation which aims at avoid error in data with for example classes '1' and '2'.
array[array == np.amax(array)] = np.inf
array[array == np.amin(array)] = 0
array[array == np.inf] = 1
return np.array(array, dtype=int)
def sparse_file_to_sparse_list (filename, verbose=True):
''' Converts a sparse data file to a sparse list, so that :
sparse_list[i][j] = (a,b) means matrix[i][a]=b'''
data_file = open(filename, "r")
if verbose: print ("Reading {}...".format(filename))
lines = data_file.readlines()
if verbose: print ("Converting {} to correct array")
data = [lines[i].split(' ') for i in range (len(lines))]
del lines #djajetic 11.11.2015 questionable
if verbose: print ("Converting {} to sparse list".format (filename))
return [[tuple(map(int, data[i][j].rstrip().split(':'))) for j in range(len(data[i])) if data[i][j] != '\n'] for i in range (len(data))]
def convert_to_bin(Ycont, nval, verbose=True):
''' Convert numeric vector to binary (typically classification target values)'''
if verbose: print ("\t_______ Converting to binary representation")
Ybin=[[0]*nval for x in xrange(len(Ycont))]
for i in range(len(Ybin)):
line = Ybin[i]
line[np.int(Ycont[i])]=1
Ybin[i] = line
return Ybin
def galfit_getcentralcoordinate(modelfile,coordorigin=1,verbose=True):
"""
Return the central coordinates of a GALFIT model extracted using the reference image WCS and the FITSECT keyword
--- INPUT ---
modelfile Path and name to GALFIT model fits file to retrieve central coordinates for
coordorigin Origin of coordinates in reference image to use when converting pixels to degrees (skycoord)
verbose Toggle verbosity
--- EXAMPLE OF USE ---
fileG = '/Volumes/DATABCKUP2/TDOSEextractions/models_cutouts/model8685multicomponent/model_acs_814w_candels-cdfs-02_cut_v1.0_id8685_cutout7p0x7p0arcsec.fits' # Gauss components
fileS = '/Volumes/DATABCKUP2/TDOSEextractions/models_cutouts/model8685multicomponent/model_acs_814w_candels-cdfs-02_cut_v1.0_id9262_cutout2p0x2p0arcsec.fits' # Sersic components
xpix, ypix, ra_model, dec_model = tu.galfit_getcentralcoordinate(fileG,coordorigin=1)
"""
if verbose: print ' - Will extract central coordinates from '+modelfile
refimg_hdr = pyfits.open(modelfile)[1].header
model_hdr = pyfits.open(modelfile)[2].header
imgwcs = wcs.WCS(tu.strip_header(refimg_hdr.copy()))
fit_region = model_hdr['FITSECT']
cutrange_low_x = int(float(fit_region.split(':')[0].split('[')[-1]))
cutrange_low_y = int(float(fit_region.split(',')[-1].split(':')[0]))
xsize = model_hdr['NAXIS1']
ysize = model_hdr['NAXIS2']
xpix = cutrange_low_x + int(xsize/2.)
ypix = cutrange_low_y + int(ysize/2.)
if verbose: print ' - Converting pixel position to coordinates using a pixel origin='+str(coordorigin)
skycoord = wcs.utils.pixel_to_skycoord(xpix,ypix,imgwcs,origin=coordorigin)
ra_model = skycoord.ra.value
dec_model = skycoord.dec.value
return xpix,ypix,ra_model,dec_model
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
def pad_batch(mini_batch):
mini_batch_size = len(mini_batch)
# print mini_batch.shape
# print mini_batch
max_sent_len1 = int(np.max([len(x[0]) for x in mini_batch]))
max_sent_len2 = int(np.max([len(x[1]) for x in mini_batch]))
# print max_sent_len1, max_sent_len2
# max_token_len = int(np.mean([len(val) for sublist in mini_batch for val in sublist]))
main_matrix1 = np.zeros((mini_batch_size, max_sent_len1), dtype= np.int)
main_matrix2 = np.zeros((mini_batch_size, max_sent_len2), dtype= np.int)
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[0]):
try:
main_matrix1[i,j] = j
except IndexError:
pass
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[1]):
try:
main_matrix2[i,j] = j
except IndexError:
pass
main_matrix1_t = Variable(torch.from_numpy(main_matrix1))
main_matrix2_t = Variable(torch.from_numpy(main_matrix2))
# print main_matrix1_t.size()
# print main_matrix2_t.size()
return [main_matrix1_t, main_matrix2_t]
# return [Variable(torch.cat((main_matrix1_t, main_matrix2_t), 0))
# def pad_batch(mini_batch):
# # print mini_batch
# # print type(mini_batch)
# # print mini_batch.shape
# # for i, _ in enumerate(mini_batch):
# # print i, _
# return [Variable(torch.from_numpy(np.asarray(_))) for _ in mini_batch[0]]
def do_kshape(name_prefix, df, cluster_size, initial_clustering=None):
columns = df.columns
matrix = []
for c in columns:
matrix.append(zscore(df[c]))
res = kshape(matrix, cluster_size, initial_clustering)
labels, score = silhouette_score(np.array(matrix), res)
# keep a reference of which metrics are in each cluster
cluster_metrics = defaultdict(list)
# we keep it in a dict: cluster_metrics[<cluster_nr>]{<metric_a>, <metric_b>}
for i, col in enumerate(columns):
cluster_metrics[int(labels[i])].append(col)
filenames = []
for i, (centroid, assigned_series) in enumerate(res):
d = {}
for serie in assigned_series:
d[columns[serie]] = pd.Series(matrix[serie], index=df.index)
d["centroid"] = pd.Series(centroid, index=df.index)
df2 = pd.DataFrame(d)
figure = df2.plot()
figure.legend(loc='center left', bbox_to_anchor=(1, 0.5))
name = "%s_%d" % (name_prefix, (i+1))
filename = name + ".tsv.gz"
print(filename)
df2.to_csv(filename, sep="\t", compression='gzip')
filenames.append(os.path.basename(filename))
graphs.write(df2, name + ".png")
return cluster_metrics, score, filenames
def get_initial_clustering(service, metadata, metrics):
s_score, cluster_metrics = msu.get_cluster_metrics(metadata, service)
if not cluster_metrics:
return None
common_metrics_all = set()
initial_idx = np.zeros(len(metrics), dtype=np.int)
for key, value in cluster_metrics.iteritems():
other_metrics = value['other_metrics']
common_metrics = set(metrics) & set(other_metrics)
for metric in list(common_metrics):
initial_idx[list(metrics).index(metric)] = int(key)
common_metrics_all = (common_metrics_all | common_metrics)
# assign remaining metrics to a new cluster (if any)
remaining_metrics = list(metrics - common_metrics_all)
remaining_cluster = max(initial_idx) + 1
if remaining_metrics:
for metric in remaining_metrics:
initial_idx[list(metrics).index(metric)] = remaining_cluster
return initial_idx
def deriveKernel(self, params, i):
self.checkParamsI(params, i)
params_kernels = params[len(self.kernels):]
#sf2 derivatives
if (i < len(self.kernels)):
EE = self.getEE(params_kernels)
#if (i==2): Z = 2*np.exp(2*params[i]) * EE[:,:,i+1]; print i, Z[:5, :5]; sys.exit(0)
return 2*np.exp(2*params[i]) * EE[:,:,i+1]
#params_kernel derivatives
else:
params_ind = len(self.kernels)
for k_i, k in enumerate(self.kernels):
numHyp = k.getNumParams()
if (i not in xrange(params_ind, params_ind+numHyp)):
params_ind += numHyp
continue
#now we found our kernel
dKj = k.deriveKernel(params[params_ind:params_ind+numHyp], i-params_ind)
Kd = self.Kdim(params_kernels)
range1 = np.array(xrange(0,k_i), dtype=np.int)
range2 = np.array(xrange(k_i+1, len(self.kernels)), dtype=np.int)
Kd_nocov = Kd[:, :, np.concatenate((range1, range2))]
E = elsympol(Kd_nocov, len(self.kernels)-1) #R-1th elementary sym polyn
K=0
for ii in xrange(len(self.kernels)):
K += E[:,:,ii]*np.exp(2*params[ii])
#if (i==5): Z = dKj * K; print i, Z[:5, :5]; sys.exit(0)
return dKj * K
raise Exception('Invalid parameter')
def load_scan(path):
slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
#slices.sort(key = lambda x: int(x.InstanceNumber))
acquisitions = [x.AcquisitionNumber for x in slices]
vals, counts = np.unique(acquisitions, return_counts=True)
vals = vals[::-1] # reverse order so the later acquisitions are first (the np.uniques seems to always return the ordered 1 2 etc.
counts = counts[::-1]
## take the acquistions that has more entries; if these are identical take the later entrye
acq_val_sel = vals[np.argmax(counts)]
##acquisitions = sorted(np.unique(acquisitions), reverse=True)
if len(vals) > 1:
print ("WARNING ##########: MULTIPLE acquisitions & counts, acq_val_sel, path: ", vals, counts, acq_val_sel, path)
slices2= [x for x in slices if x.AcquisitionNumber == acq_val_sel]
slices = slices2
## ONE path includes 2 acquisitions (2 sets), take the latter acquiisiton only whihch cyupically is better than the first/previous ones.
## example of the '../input/stage1/b8bb02d229361a623a4dc57aa0e5c485'
#slices.sort(key = lambda x: int(x.ImagePositionPatient[2])) # from v 8, BUG should be float
slices.sort(key = lambda x: float(x.ImagePositionPatient[2])) # from v 9
try:
slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
except:
slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
for s in slices:
s.SliceThickness = slice_thickness
return slices
def generate_markers_3d(image):
#Creation of the internal Marker
marker_internal = image < -400
marker_internal_labels = np.zeros(image.shape).astype(np.int16)
for i in range(marker_internal.shape[0]):
marker_internal[i] = segmentation.clear_border(marker_internal[i])
marker_internal_labels[i] = measure.label(marker_internal[i])
#areas = [r.area for r in measure.regionprops(marker_internal_labels)]
areas = [r.area for i in range(marker_internal.shape[0]) for r in measure.regionprops(marker_internal_labels[i])]
for i in range(marker_internal.shape[0]):
areas = [r.area for r in measure.regionprops(marker_internal_labels[i])]
areas.sort()
if len(areas) > 2:
for region in measure.regionprops(marker_internal_labels[i]):
if region.area < areas[-2]:
for coordinates in region.coords:
marker_internal_labels[i, coordinates[0], coordinates[1]] = 0
marker_internal = marker_internal_labels > 0
#Creation of the external Marker
# 3x3 structuring element with connectivity 1, used by default
struct1 = ndimage.generate_binary_structure(2, 1)
struct1 = struct1[np.newaxis,:,:] # expand by z axis .
external_a = ndimage.binary_dilation(marker_internal, structure=struct1, iterations=10)
external_b = ndimage.binary_dilation(marker_internal, structure=struct1, iterations=55)
marker_external = external_b ^ external_a
#Creation of the Watershed Marker matrix
#marker_watershed = np.zeros((512, 512), dtype=np.int) # origi
marker_watershed = np.zeros((marker_external.shape), dtype=np.int)
marker_watershed += marker_internal * 255
marker_watershed += marker_external * 128
return marker_internal, marker_external, marker_watershed
def seq(start, stop, step=1):
n = int(round((stop - start)/float(step)))
if n > 1:
return([start + step*i for i in range(n+1)])
else:
return([])
def draw_circles(image,cands,origin,spacing):
#make empty matrix, which will be filled with the mask
image_mask = np.zeros(image.shape, dtype=np.int16)
#run over all the nodules in the lungs
for ca in cands.values:
#get middel x-,y-, and z-worldcoordinate of the nodule
#radius = np.ceil(ca[4])/2 ## original: replaced the ceil with a very minor increase of 1% ....
radius = (ca[4])/2 + 0.51 * spacing[0] # increasing by circa half of distance in z direction .... (trying to capture wider region/border for learning ... and adress the rough net .
coord_x = ca[1]
coord_y = ca[2]
coord_z = ca[3]
image_coord = np.array((coord_z,coord_y,coord_x))
#determine voxel coordinate given the worldcoordinate
image_coord = world_2_voxel(image_coord,origin,spacing)
#determine the range of the nodule
#noduleRange = seq(-radius, radius, RESIZE_SPACING[0]) # original, uniform spacing
noduleRange_z = seq(-radius, radius, spacing[0])
noduleRange_y = seq(-radius, radius, spacing[1])
noduleRange_x = seq(-radius, radius, spacing[2])
#x = y = z = -2
#create the mask
for x in noduleRange_x:
for y in noduleRange_y:
for z in noduleRange_z:
coords = world_2_voxel(np.array((coord_z+z,coord_y+y,coord_x+x)),origin,spacing)
#if (np.linalg.norm(image_coord-coords) * RESIZE_SPACING[0]) < radius: ### original (contrained to a uniofrm RESIZE)
if (np.linalg.norm((image_coord-coords) * spacing)) < radius:
image_mask[int(np.round(coords[0])),int(np.round(coords[1])),int(np.round(coords[2]))] = int(1)
return image_mask
def seq(start, stop, step=1):
n = int(round((stop - start)/float(step)))
if n > 1:
return([start + step*i for i in range(n+1)])
else:
return([])
def draw_circles(image,cands,origin,spacing):
#make empty matrix, which will be filled with the mask
image_mask = np.zeros(image.shape, dtype=np.int16)
#run over all the nodules in the lungs
for ca in cands.values:
#get middel x-,y-, and z-worldcoordinate of the nodule
#radius = np.ceil(ca[4])/2 ## original: replaced the ceil with a very minor increase of 1% ....
radius = (ca[4])/2 + 0.51 * spacing[0] # increasing by circa half of distance in z direction .... (trying to capture wider region/border for learning ... and adress the rough net .
coord_x = ca[1]
coord_y = ca[2]
coord_z = ca[3]
image_coord = np.array((coord_z,coord_y,coord_x))
#determine voxel coordinate given the worldcoordinate
image_coord = world_2_voxel(image_coord,origin,spacing)
#determine the range of the nodule
#noduleRange = seq(-radius, radius, RESIZE_SPACING[0]) # original, uniform spacing
noduleRange_z = seq(-radius, radius, spacing[0])
noduleRange_y = seq(-radius, radius, spacing[1])
noduleRange_x = seq(-radius, radius, spacing[2])
#x = y = z = -2
#create the mask
for x in noduleRange_x:
for y in noduleRange_y:
for z in noduleRange_z:
coords = world_2_voxel(np.array((coord_z+z,coord_y+y,coord_x+x)),origin,spacing)
#if (np.linalg.norm(image_coord-coords) * RESIZE_SPACING[0]) < radius: ### original (contrained to a uniofrm RESIZE)
if (np.linalg.norm((image_coord-coords) * spacing)) < radius:
image_mask[int(np.round(coords[0])),int(np.round(coords[1])),int(np.round(coords[2]))] = int(1)
return image_mask
def _unpack_header_adjunct(hdr):
"""
Unpacks the adjunct from a BLUE file header dictionary. Expects
the 'adjunct' field to be a 256 byte raw string, or None (in which
case it returns with default values).
"""
# Convert the adjunct (file type specific) part of the header
file_class = int(hdr['type'] / 1000)
endian = _rep_tran[hdr['head_rep']]
hdr.update(_unpack_blue_struct(hdr['adjunct'],
_bluestructs['T%dADJUNCT' % file_class],
endian))
del hdr['adjunct']
if file_class == 3:
hdr['subr'] = _unpack_blue_struct_array(hdr['subr'],
_bluestructs['SUBRECSTRUCT'],
hdr['subrecords'], endian)
elif file_class == 5:
hdr['comp'] = _unpack_blue_struct_array(hdr['comp'],
_bluestructs['COMPSTRUCT'],
hdr['components'], endian)
hdr['quadwords'] = _unpack_blue_struct(hdr['quadwords'],
_bluestructs['T5QUADWORDS'],
endian)
elif file_class == 6:
# For Type 6000 files, the hdr['subr'] field gets filled in from
# information in the extended header, which is not unpacked
# until after the main header and header adjunct are unpacked.
# See _open_t6subrecords(), which gets called by readheader().
# Just initialize it to an empty list here.
hdr['subr'] = []
def pack_header(hdr, structured=0):
"""
Returns the header in the form of a string.
Pack the given BLUE header dictionary to its binary format for
storage on disk. All keywords, main and extended, are packed
and updated in the hdr dict that is passed in.
If you intend on writing this header out to disk, do not forget to
write out the extended header as well. It should be written at
512*hdr['ext_start'] bytes offset from the beginning of the file
and the extended header itself should be padded out to a multiple
of 512 bytes.
If there is an extended header present, setting the optional
<structured> argument to true (default is false) will cause any
Python dictionaries, lists, and tuples to be packed with their
structures intact. See the doc string for pack_keywords() for
more details.
"""
# Leave the user's copy alone
hdr = hdr.copy()
# Pack the main header keywords
pack_main_header_keywords(hdr)
# We have to pack the adjunct part explicitly because it is a union
endian = _rep_tran[hdr['head_rep']]
# Recalculate class from current type (user may have changed)
file_class = int(hdr['type'] / 1000)
hdr['adjunct'] = _pack_blue_struct(hdr, _bluestructs['T%dADJUNCT' %
file_class], endian)
# Pack the extended header if present
if hdr.has_key('ext_header'):
pack_ext_header(hdr, structured=structured)
return _pack_blue_struct(hdr, _bluestructs['HEADER'], endian)