Python numpy 模块,mean() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.mean()。
def pad_batch(mini_batch):
mini_batch_size = len(mini_batch)
# print mini_batch.shape
# print mini_batch
max_sent_len1 = int(np.max([len(x[0]) for x in mini_batch]))
max_sent_len2 = int(np.max([len(x[1]) for x in mini_batch]))
# print max_sent_len1, max_sent_len2
# max_token_len = int(np.mean([len(val) for sublist in mini_batch for val in sublist]))
main_matrix1 = np.zeros((mini_batch_size, max_sent_len1), dtype= np.int)
main_matrix2 = np.zeros((mini_batch_size, max_sent_len2), dtype= np.int)
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[0]):
try:
main_matrix1[i,j] = j
except IndexError:
pass
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[1]):
try:
main_matrix2[i,j] = j
except IndexError:
pass
main_matrix1_t = Variable(torch.from_numpy(main_matrix1))
main_matrix2_t = Variable(torch.from_numpy(main_matrix2))
# print main_matrix1_t.size()
# print main_matrix2_t.size()
return [main_matrix1_t, main_matrix2_t]
# return [Variable(torch.cat((main_matrix1_t, main_matrix2_t), 0))
# def pad_batch(mini_batch):
# # print mini_batch
# # print type(mini_batch)
# # print mini_batch.shape
# # for i, _ in enumerate(mini_batch):
# # print i, _
# return [Variable(torch.from_numpy(np.asarray(_))) for _ in mini_batch[0]]
def _cascade_evaluation(self, X_test, y_test):
""" Evaluate the accuracy of the cascade using X and y.
:param X_test: np.array
Array containing the test input samples.
Must be of the same shape as training data.
:param y_test: np.array
Test target values.
:return: float
the cascade accuracy.
"""
casc_pred_prob = np.mean(self.cascade_forest(X_test), axis=0)
casc_pred = np.argmax(casc_pred_prob, axis=1)
casc_accuracy = accuracy_score(y_true=y_test, y_pred=casc_pred)
print('Layer validation accuracy = {}'.format(casc_accuracy))
return casc_accuracy
def evaluate(self, dataset):
predictions = self.predict(dataset[:,0])
confusion_matrix = sklearn_confusion_matrix(dataset[:,1], predictions, labels=self.__classes)
precisions = []
recalls = []
accuracies = []
for gender in self.__classes:
idx = self.__classes_indexes[gender]
precision = 1
recall = 1
if np.sum(confusion_matrix[idx,:]) > 0:
precision = confusion_matrix[idx][idx]/np.sum(confusion_matrix[idx,:])
if np.sum(confusion_matrix[:, idx]) > 0:
recall = confusion_matrix[idx][idx]/np.sum(confusion_matrix[:, idx])
precisions.append(precision)
recalls.append(recall)
precision = np.mean(precisions)
recall = np.mean(recalls)
f1 = (2*(precision*recall))/float(precision+recall)
accuracy = np.sum(confusion_matrix.diagonal())/float(np.sum(confusion_matrix))
return precision, recall, accuracy, f1
def reshape_array(array, newsize, pixcombine='sum'):
"""
Reshape an array to a give size using either the sum, mean or median of the pixels binned
Note that the old array dimensions have to be multiples of the new array dimensions
--- INPUT ---
array Array to reshape (combine pixels)
newsize New size of array
pixcombine The method to combine the pixels with. Choices are sum, mean and median
"""
sh = newsize[0],array.shape[0]//newsize[0],newsize[1],array.shape[1]//newsize[1]
pdb.set_trace()
if pixcombine == 'sum':
reshapedarray = array.reshape(sh).sum(-1).sum(1)
elif pixcombine == 'mean':
reshapedarray = array.reshape(sh).mean(-1).mean(1)
elif pixcombine == 'median':
reshapedarray = array.reshape(sh).median(-1).median(1)
return reshapedarray
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
def compute_nystrom(ds_name, use_node_labels, embedding_dim, community_detection_method, kernels):
if ds_name=="SYNTHETIC":
graphs, labels = generate_synthetic()
else:
graphs, labels = load_data(ds_name, use_node_labels)
communities, subgraphs = compute_communities(graphs, use_node_labels, community_detection_method)
print("Number of communities: ", len(communities))
lens = []
for community in communities:
lens.append(community.number_of_nodes())
print("Average size: %.2f" % np.mean(lens))
Q=[]
for idx, k in enumerate(kernels):
model = Nystrom(k, n_components=embedding_dim)
model.fit(communities)
Q_t = model.transform(communities)
Q_t = np.vstack([np.zeros(embedding_dim), Q_t])
Q.append(Q_t)
return Q, subgraphs, labels, Q_t.shape
def xloads(self):
# Xloadings
A = self.data_.transpose().values
B = self.fscores.transpose().values
A_mA = A - A.mean(1)[:, None]
B_mB = B - B.mean(1)[:, None]
ssA = (A_mA**2).sum(1)
ssB = (B_mB**2).sum(1)
xloads_ = (np.dot(A_mA, B_mB.T) /
np.sqrt(np.dot(ssA[:, None], ssB[None])))
xloads = pd.DataFrame(
xloads_, index=self.manifests, columns=self.latent)
return xloads
def encode_and_store(batch_x, output_dir, file_name):
"""
Args:
1. batch_x: Batch of 32*32 images which will go inside our autoencoder.
2. output_dir: Dir path for storing all encoded features for given `batch_x`.
Features will be stored in the form of JSON file.
3. file_name: File name of JSON file.
"""
global AUTO_ENCODER
if AUTO_ENCODER is None:
load_AE()
norm_batch = np.zeros(batch_x.shape)
for i in range(len(batch_x)):
norm_batch[i] = (batch_x[i] - np.mean(batch_x[i])) / np.std(batch_x[i])
output_dict = {
'name' : file_name,
'encoded': AUTO_ENCODER.transform(norm_batch).tolist()}
with open(output_dir+file_name+'.json', 'w') as f:
json.dump(output_dict, f)
def plot_qual(ax, quallist, invert=False):
'''
Create a FastQC-like "?Per base sequence quality?" plot
Plot average quality per position
zip will stop when shortest read is exhausted
'''
sns.set_style("darkgrid")
if invert:
l_Q, = ax.plot(np.array([np.mean(position) for position in zip(
*[list(reversed(read)) for read in quallist])]), 'orange', label="Quality")
ax.set_xlabel('Position in read from end')
ax.set_xticklabels(-1 * ax.get_xticks().astype(int))
else:
l_Q, = ax.plot(np.array([np.mean(position)
for position in zip(*quallist)]), 'orange', label="Quality")
ax.set_xlabel('Position in read from start')
return l_Q
def update_data_sort_order(self, new_sort_order=None):
if new_sort_order is not None:
self.current_order = new_sort_order
self.update_sort_idcs()
self.data_image.set_extent((self.raw_lags[0], self.raw_lags[-1],
0, len(self.sort_idcs)))
self.data_ax.set_ylim(0, len(self.sort_idcs))
all_raw_data = self.raw_data
all_raw_data /= (1 + self.raw_data.mean(1)[:, np.newaxis])
if len(all_raw_data) > 0:
cmax = 0.5*all_raw_data.max()
cmin = 0.5*all_raw_data.min()
all_raw_data = all_raw_data[self.sort_idcs, :]
else:
cmin = 0
cmax = 1
self.data_image.set_data(all_raw_data)
self.data_image.set_clim(cmin, cmax)
self.data_selection.set_y(len(self.sort_idcs)-len(self.selected_points))
self.data_selection.set_height(len(self.selected_points))
self.update_data_plot()
def get_color_medio(self, roi, a,b,imprimir = False):
xl,yl,ch = roi.shape
roiyuv = cv2.cvtColor(roi,cv2.COLOR_RGB2YUV)
roihsv = cv2.cvtColor(roi,cv2.COLOR_RGB2HSV)
h,s,v=cv2.split(roihsv)
mask=(h<5)
h[mask]=200
roihsv = cv2.merge((h,s,v))
std = np.std(roiyuv.reshape(xl*yl,3),axis=0)
media = np.mean(roihsv.reshape(xl*yl,3), axis=0)-60
mediayuv = np.mean(roiyuv.reshape(xl*yl,3), axis=0)
if std[0]<12 and std[1]<12 and std[2]<12:
#if (std[0]<15 and std[2]<15) or ((media[0]>100 or media[0]<25) and (std[0]>10)):
media = np.mean(roihsv.reshape(xl*yl,3), axis=0)
# el amarillo tiene 65 de saturacion y sobre 200
if media[1]<60: #and (abs(media[0]-30)>10):
# blanco
return [-10,0,0]
else:
return media
else:
return None
def information_ratio(algorithm_returns, benchmark_returns):
"""
http://en.wikipedia.org/wiki/Information_ratio
Args:
algorithm_returns (np.array-like):
All returns during algorithm lifetime.
benchmark_returns (np.array-like):
All benchmark returns during algo lifetime.
Returns:
float. Information ratio.
"""
relative_returns = algorithm_returns - benchmark_returns
relative_deviation = relative_returns.std(ddof=1)
if zp_math.tolerant_equals(relative_deviation, 0) or \
np.isnan(relative_deviation):
return 0.0
return np.mean(relative_returns) / relative_deviation
def mypsd(Rates,time_range,bin_w = 5., nmax = 4000):
bins = np.arange(0,len(time_range),1)
#print bins
a,b = np.histogram(Rates, bins)
ff = (1./len(bins))*abs(np.fft.fft(Rates- np.mean(Rates)))**2
Fs = 1./(1*0.001)
freq2 = np.fft.fftfreq(len(bins))[0:len(bins/2)+1] # d= dt
freq = np.fft.fftfreq(len(bins))[:len(ff)/2+1]
px = ff[0:len(ff)/2+1]
max_px = np.max(px[1:])
idx = px == max_px
corr_freq = freq[pl.find(idx)]
new_px = px
max_pow = new_px[pl.find(idx)]
return new_px,freq,corr_freq[0],freq2, max_pow
def testStartStopModulation(self):
radiusInMilliRad= 12.4
frequencyInHz= 100.
centerInMilliRad= [-10, 15]
self._tt.setTargetPosition(centerInMilliRad)
self._tt.startModulation(radiusInMilliRad,
frequencyInHz,
centerInMilliRad)
self.assertTrue(
np.allclose(
[1, 1, 0],
self._ctrl.getWaveGeneratorStartStopMode()))
waveform= self._ctrl.getWaveform(1)
wants= self._tt._milliRadToGcsUnitsOneAxis(-10, self._tt.AXIS_A)
got= np.mean(waveform)
self.assertAlmostEqual(
wants, got, msg="wants %g, got %g" % (wants, got))
wants= self._tt._milliRadToGcsUnitsOneAxis(-10 + 12.4, self._tt.AXIS_A)
got= np.max(waveform)
self.assertAlmostEqual(
wants, got, msg="wants %g, got %g" % (wants, got))
self._tt.stopModulation()
self.assertTrue(
np.allclose(centerInMilliRad, self._tt.getTargetPosition()))
def monitor(data_feeder):
"""
Cost and time of test_fn on a given dataset section.
Pass only one of `valid_feeder` or `test_feeder`.
Don't pass `train_feed`.
:returns:
Mean cost over the input dataset (data_feeder)
Total time spent
"""
_total_time = time()
_h0 = numpy.zeros((BATCH_SIZE, N_RNN, H0_MULT*DIM), dtype='float32')
_big_h0 = numpy.zeros((BATCH_SIZE, N_RNN, H0_MULT*BIG_DIM), dtype='float32')
_costs = []
_data_feeder = load_data(data_feeder)
for _seqs, _reset, _mask in _data_feeder:
_cost, _big_h0, _h0 = test_fn(_seqs, _big_h0, _h0, _reset, _mask)
_costs.append(_cost)
return numpy.mean(_costs), time() - _total_time
def monitor(data_feeder):
"""
Cost and time of test_fn on a given dataset section.
Pass only one of `valid_feeder` or `test_feeder`.
Don't pass `train_feed`.
:returns:
Mean cost over the input dataset (data_feeder)
Total time spent
"""
_total_time = time()
_h0 = numpy.zeros((BATCH_SIZE, N_RNN, H0_MULT*DIM), dtype='float32')
_costs = []
_data_feeder = load_data(data_feeder)
for _seqs, _reset, _mask in _data_feeder:
_cost, _h0 = test_fn(_seqs, _h0, _reset, _mask)
_costs.append(_cost)
return numpy.mean(_costs), time() - _total_time
def monitor(data_feeder):
"""
Cost and time of test_fn on a given dataset section.
Pass only one of `valid_feeder` or `test_feeder`.
Don't pass `train_feed`.
:returns:
Mean cost over the input dataset (data_feeder)
Total time spent
"""
_total_time = time()
_h0 = numpy.zeros((BATCH_SIZE, N_RNN, H0_MULT*DIM), dtype='float32')
_costs = []
_data_feeder = load_data(data_feeder)
for _seqs, _reset, _mask in _data_feeder:
_cost, _h0 = test_fn(_seqs, _h0, _reset, _mask)
_costs.append(_cost)
return numpy.mean(_costs), time() - _total_time
def doesnt_match(self, words):
"""
Which word from the given list doesn't go with the others?
Example::
>>> trained_model.doesnt_match("breakfast cereal dinner lunch".split())
'cereal'
"""
words = [word for word in words if word in self.vocab] # filter out OOV words
logger.debug("using words %s" % words)
if not words:
raise ValueError("cannot select a word from an empty list")
# which word vector representation is furthest away from the mean?
selection = self.syn0norm[[self.vocab[word].index for word in words]]
mean = np.mean(selection, axis=0)
sim = np.dot(selection, mean / np.linalg.norm(mean))
return words[np.argmin(sim)]
def effective_sample_size(x, mu, var, logger):
"""
Calculate the effective sample size of sequence generated by MCMC.
:param x:
:param mu: mean of the variable
:param var: variance of the variable
:param logger: logg
:return: effective sample size of the sequence
Make sure that `mu` and `var` are correct!
"""
# batch size, time, dimension
b, t, d = x.shape
ess_ = np.ones([d])
for s in range(1, t):
p = auto_correlation_time(x, s, mu, var)
if np.sum(p > 0.05) == 0:
break
else:
for j in range(0, d):
if p[j] > 0.05:
ess_[j] += 2.0 * p[j] * (1.0 - float(s) / t)
logger.info('ESS: max [%f] min [%f] / [%d]' % (t / np.min(ess_), t / np.max(ess_), t))
return t / ess_
def apply(self, referenceSamples=None, testSamples=None, gaussianCenters=None) :
"""
Calculates the alpha-relative Pearson divergence score
"""
densityRatioEstimator = AlphaRelativeDensityRatioEstimator(self.alphaConstraint ,
self.sigmaWidth ,
self.lambdaRegularizer,
self.kernelBasis )
# Estimate alpha relative density ratio and pearson divergence score
(r_alpha_Xref, r_alpha_Xtest) = densityRatioEstimator.apply(referenceSamples, testSamples, gaussianCenters)
PE_divergence = ( numpy.mean(r_alpha_Xref) -
( 0.5 * ( self.alphaConstraint * numpy.mean(r_alpha_Xref ** 2) +
(1.0 - self.alphaConstraint) * numpy.mean(r_alpha_Xtest ** 2) ) ) - 0.5)
return (PE_divergence, r_alpha_Xtest)
def test(self, input_path, output_path):
if not self.load()[0]:
raise Exception("No model is found, please train first")
mean, std = self.sess.run([self.mean, self.std])
images = np.empty((1, self.im_size[0], self.im_size[1], self.im_size[2], 1), dtype=np.float32)
#labels = np.empty((1, self.im_size[0], self.im_size[1], self.im_size[2], self.nclass), dtype=np.float32)
for f in input_path:
images[0, ..., 0], read_info = read_testing_inputs(f, self.roi[0], self.im_size, output_path)
probs = self.sess.run(self.probs, feed_dict = { self.images: (images - mean) / std,
self.is_training: True,
self.keep_prob: 1 })
#print(self.roi[1] + os.path.basename(f) + ":" + str(dice))
output_file = os.path.join(output_path, self.roi[1] + '_' + os.path.basename(f))
f_h5 = h5py.File(output_file, 'w')
if self.roi[0] < 0:
f_h5['predictions'] = restore_labels(np.argmax(probs[0], 3), self.roi[0], read_info)
else:
f_h5['probs'] = restore_labels(probs[0, ..., 1], self.roi[0], read_info)
f_h5.close()
def transfer_color(content, style):
import scipy.linalg as sl
# Mean and covariance of content
content_mean = np.mean(content, axis = (0, 1))
content_diff = content - content_mean
content_diff = np.reshape(content_diff, (-1, content_diff.shape[2]))
content_covariance = np.matmul(content_diff.T, content_diff) / (content_diff.shape[0])
# Mean and covariance of style
style_mean = np.mean(style, axis = (0, 1))
style_diff = style - style_mean
style_diff = np.reshape(style_diff, (-1, style_diff.shape[2]))
style_covariance = np.matmul(style_diff.T, style_diff) / (style_diff.shape[0])
# Calculate A and b
A = np.matmul(sl.sqrtm(content_covariance), sl.inv(sl.sqrtm(style_covariance)))
b = content_mean - np.matmul(A, style_mean)
# Construct new style
new_style = np.reshape(style, (-1, style.shape[2])).T
new_style = np.matmul(A, new_style).T
new_style = np.reshape(new_style, style.shape)
new_style = new_style + b
return new_style
def get_selective_mirrors(self, number=None):
"""get mirror genotypic directions from worst solutions.
Details:
To be called after the mean has been updated.
Takes the last ``number=sp.lam_mirr`` entries in the
``self.pop[self.fit.idx]`` as solutions to be mirrored.
Do not take a mirror if it is suspected to stem from a
previous mirror in order to not go endlessly back and forth.
"""
if number is None:
number = self.sp.lam_mirr
if not hasattr(self, '_indices_of_selective_mirrors'):
self._indices_of_selective_mirrors = []
res = []
for i in range(1, number + 1):
if 'all-selective-mirrors' in self.opts['vv'] or self.fit.idx[-i] not in self._indices_of_selective_mirrors:
res.append(self.mean_old - self.pop[self.fit.idx[-i]])
assert len(res) >= number - len(self._indices_of_selective_mirrors)
return res
# ____________________________________________________________
def result(self):
"""return a `CMAEvolutionStrategyResult` `namedtuple`.
:See: `cma.evolution_strategy.CMAEvolutionStrategyResult`
or try ``help(...result)`` on the ``result`` property
of an `CMAEvolutionStrategy` instance or on the
`CMAEvolutionStrategyResult` instance itself.
"""
# TODO: how about xcurrent?
# return CMAEvolutionStrategyResult._generate(self)
res = self.best.get() + ( # (x, f, evals) triple
self.countevals,
self.countiter,
self.gp.pheno(self.mean),
self.gp.scales * self.sigma * self.sigma_vec.scaling *
self.dC**0.5)
try:
return CMAEvolutionStrategyResult(*res)
except NameError:
return res
def result_pretty(self, number_of_runs=0, time_str=None,
fbestever=None):
"""pretty print result.
Returns `result` of ``self``.
"""
if fbestever is None:
fbestever = self.best.f
s = (' after %i restart' + ('s' if number_of_runs > 1 else '')) \
% number_of_runs if number_of_runs else ''
for k, v in self.stop().items():
print('termination on %s=%s%s' % (k, str(v), s +
(' (%s)' % time_str if time_str else '')))
print('final/bestever f-value = %e %e' % (self.best.last.f,
fbestever))
if self.N < 9:
print('incumbent solution: ' + str(list(self.gp.pheno(self.mean, into_bounds=self.boundary_handler.repair))))
print('std deviation: ' + str(list(self.sigma * self.sigma_vec.scaling * np.sqrt(self.dC) * self.gp.scales)))
else:
print('incumbent solution: %s ...]' % (str(self.gp.pheno(self.mean, into_bounds=self.boundary_handler.repair)[:8])[:-1]))
print('std deviations: %s ...]' % (str((self.sigma * self.sigma_vec.scaling * np.sqrt(self.dC) * self.gp.scales)[:8])[:-1]))
return self.result
def isotropic_mean_shift(self):
"""normalized last mean shift, under random selection N(0,I)
distributed.
Caveat: while it is finite and close to sqrt(n) under random
selection, the length of the normalized mean shift under
*systematic* selection (e.g. on a linear function) tends to
infinity for mueff -> infty. Hence it must be used with great
care for large mueff.
"""
z = self.sm.transform_inverse((self.mean - self.mean_old) /
self.sigma_vec.scaling)
# works unless a re-parametrisation has been done
# assert Mh.vequals_approximately(z, np.dot(es.B, (1. / es.D) *
# np.dot(es.B.T, (es.mean - es.mean_old) / es.sigma_vec)))
z /= self.sigma * self.sp.cmean
z *= self.sp.weights.mueff**0.5
return z
def sample(self, number, lazy_update_gap=None, same_length=False):
self.update_now(lazy_update_gap)
arz = self.randn(number, self.dimension)
if same_length:
if same_length is True:
len_ = self.chiN
else:
len_ = same_length # presumably N**0.5, useful if self.opts['CSA_squared']
for i in rglen(arz):
ss = sum(arz[i]**2)
if 1 < 3 or ss > self.N + 10.1:
arz[i] *= len_ / ss**0.5
# or to average
# arz *= 1 * self.const.chiN / np.mean([sum(z**2)**0.5 for z in arz])
ary = np.dot(self.B, (self.D * arz).T).T
# self.ary = ary # needed whatfor?
return ary
def norm(self, x):
"""compute the Mahalanobis norm that is induced by the
statistical model / sample distribution, specifically by
covariance matrix ``C``. The expected Mahalanobis norm is
about ``sqrt(dimension)``.
Example
-------
>>> import cma, numpy as np
>>> sm = cma.sampler.GaussFullSampler(np.ones(10))
>>> x = np.random.randn(10)
>>> d = sm.norm(x)
`d` is the norm "in" the true sample distribution,
sampled points have a typical distance of ``sqrt(2*sm.dim)``,
where ``sm.dim`` is the dimension, and an expected distance of
close to ``dim**0.5`` to the sample mean zero. In the example,
`d` is the Euclidean distance, because C = I.
"""
return sum((np.dot(self.B.T, x) / self.D)**2)**0.5
def sample(self, number, same_length=False):
arz = self.randn(number, self.dimension)
if same_length:
if same_length is True:
len_ = self.chin
else:
len_ = same_length # presumably N**0.5, useful if self.opts['CSA_squared']
for i in rglen(arz):
ss = sum(arz[i]**2)
if 1 < 3 or ss > self.N + 10.1:
arz[i] *= len_ / ss**0.5
# or to average
# arz *= 1 * self.const.chiN / np.mean([sum(z**2)**0.5 for z in arz])
ary = self.C**0.5 * arz
# self.ary = ary # needed whatfor?
return ary
def norm(self, x):
"""compute the Mahalanobis norm that is induced by the
statistical model / sample distribution, specifically by
covariance matrix ``C``. The expected Mahalanobis norm is
about ``sqrt(dimension)``.
Example
-------
>>> import cma, numpy as np
>>> sm = cma.sampler.GaussFullSampler(np.ones(10))
>>> x = np.random.randn(10)
>>> d = sm.norm(x)
`d` is the norm "in" the true sample distribution,
sampled points have a typical distance of ``sqrt(2*sm.dim)``,
where ``sm.dim`` is the dimension, and an expected distance of
close to ``dim**0.5`` to the sample mean zero. In the example,
`d` is the Euclidean distance, because C = I.
"""
return sum(np.asarray(x)**2 / self.C)**0.5
def update_measure(self):
"""updated noise level measure using two fitness lists ``self.fit`` and
``self.fitre``, return ``self.noiseS, all_individual_measures``.
Assumes that ``self.idx`` contains the indices where the fitness
lists differ.
"""
lam = len(self.fit)
idx = np.argsort(self.fit + self.fitre)
ranks = np.argsort(idx).reshape((2, lam))
rankDelta = ranks[0] - ranks[1] - np.sign(ranks[0] - ranks[1])
# compute rank change limits using both ranks[0] and ranks[1]
r = np.arange(1, 2 * lam) # 2 * lam - 2 elements
limits = [0.5 * (Mh.prctile(np.abs(r - (ranks[0, i] + 1 - (ranks[0, i] > ranks[1, i]))),
self.theta * 50) +
Mh.prctile(np.abs(r - (ranks[1, i] + 1 - (ranks[1, i] > ranks[0, i]))),
self.theta * 50))
for i in self.idx]
# compute measurement
# max: 1 rankchange in 2*lambda is always fine
s = np.abs(rankDelta[self.idx]) - Mh.amax(limits, 1) # lives roughly in 0..2*lambda
self.noiseS += self.cum * (np.mean(s) - self.noiseS)
return self.noiseS, s
def mse(ypredict, ytrue):
"""
>>> mse(1.0, 3.0)
4.0
"""
diff = ypredict - ytrue
return np.mean(diff**2)
def compute_score(self, gts, res):
"""
Computes Rouge-L score given a set of reference and candidate sentences for the dataset
Invoked by evaluate_captions.py
:param hypo_for_image: dict : candidate / test sentences with "image name" key and "tokenized sentences" as values
:param ref_for_image: dict : reference MS-COCO sentences with "image name" key and "tokenized sentences" as values
:returns: average_score: float (mean ROUGE-L score computed by averaging scores for all the images)
"""
assert(gts.keys() == res.keys())
imgIds = gts.keys()
score = []
for id in imgIds:
hypo = res[id]
ref = gts[id]
score.append(self.calc_score(hypo, ref))
# Sanity check.
assert(type(hypo) is list)
assert(len(hypo) == 1)
assert(type(ref) is list)
assert(len(ref) > 0)
average_score = np.mean(np.array(score))
return average_score, np.array(score)
def analytic_convolution_gaussian(mu1,covar1,mu2,covar2):
"""
The analytic vconvolution of two Gaussians is simply the sum of the two mean vectors
and the two convariance matrixes
--- INPUT ---
mu1 The mean of the first gaussian
covar1 The covariance matrix of of the first gaussian
mu2 The mean of the second gaussian
covar2 The covariance matrix of of the second gaussian
"""
muconv = mu1+mu2
covarconv = covar1+covar2
return muconv, covarconv
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
def smooth_colors(src, dst, src_l):
blur_amount = BLUR_FRACTION * np.linalg.norm(np.mean(src_l[LEFT_EYE_IDX], axis = 0) - np.mean(src_l[RIGHT_EYE_IDX], axis = 0))
blur_amount = (int)(blur_amount)
if blur_amount % 2 == 0:
blur_amount += 1
src_blur = cv2.GaussianBlur(src, (blur_amount, blur_amount), 0)
dst_blur = cv2.GaussianBlur(dst, (blur_amount, blur_amount), 0)
dst_blur += (128 * ( dst_blur <= 1.0 )).astype(dst_blur.dtype)
return (np.float64(dst) * np.float64(src_blur)/np.float64(dst_blur))
def get_tm_opp(pts1, pts2):
# Transformation matrix - ( Translation + Scaling + Rotation )
# using Procuster analysis
pts1 = np.float64(pts1)
pts2 = np.float64(pts2)
m1 = np.mean(pts1, axis = 0)
m2 = np.mean(pts2, axis = 0)
# Removing translation
pts1 -= m1
pts2 -= m2
std1 = np.std(pts1)
std2 = np.std(pts2)
std_r = std2/std1
# Removing scaling
pts1 /= std1
pts2 /= std2
U, S, V = np.linalg.svd(np.transpose(pts1) * pts2)
# Finding the rotation matrix
R = np.transpose(U * V)
return np.vstack([np.hstack((std_r * R,
np.transpose(m2) - std_r * R * np.transpose(m1))), np.matrix([0.0, 0.0, 1.0])])
def fit(self, x):
s = x.shape
x = x.copy().reshape((s[0],np.prod(s[1:])))
m = np.mean(x, axis=0)
x -= m
sigma = np.dot(x.T,x) / x.shape[0]
U, S, V = linalg.svd(sigma)
tmp = np.dot(U, np.diag(1./np.sqrt(S+self.regularization)))
tmp2 = np.dot(U, np.diag(np.sqrt(S+self.regularization)))
self.ZCA_mat = th.shared(np.dot(tmp, U.T).astype(th.config.floatX))
self.inv_ZCA_mat = th.shared(np.dot(tmp2, U.T).astype(th.config.floatX))
self.mean = th.shared(m.astype(th.config.floatX))
def apply(self, x):
s = x.shape
if isinstance(x, np.ndarray):
return np.dot(x.reshape((s[0],np.prod(s[1:]))) - self.mean.get_value(), self.ZCA_mat.get_value()).reshape(s)
elif isinstance(x, T.TensorVariable):
return T.dot(x.flatten(2) - self.mean.dimshuffle('x',0), self.ZCA_mat).reshape(s)
else:
raise NotImplementedError("Whitening only implemented for numpy arrays or Theano TensorVariables")
def invert(self, x):
s = x.shape
if isinstance(x, np.ndarray):
return (np.dot(x.reshape((s[0],np.prod(s[1:]))), self.inv_ZCA_mat.get_value()) + self.mean.get_value()).reshape(s)
elif isinstance(x, T.TensorVariable):
return (T.dot(x.flatten(2), self.inv_ZCA_mat) + self.mean.dimshuffle('x',0)).reshape(s)
else:
raise NotImplementedError("Whitening only implemented for numpy arrays or Theano TensorVariables")
# T.nnet.relu has some issues with very large inputs, this is more stable
def softmax_loss(p_true, output_before_softmax):
output_before_softmax -= T.max(output_before_softmax, axis=1, keepdims=True)
if p_true.ndim==2:
return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - T.sum(p_true*output_before_softmax, axis=1))
else:
return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - output_before_softmax[T.arange(p_true.shape[0]),p_true])
def get_output_for(self, input, **kwargs):
return T.mean(input, axis=(2,3))
def run_epoch_doc(docs, labels, tags, tm, pad_id, cf):
batches = int(math.ceil(float(len(docs))/cf.batch_size))
accs = []
for b in xrange(batches):
d, y, m, t, num_docs = get_batch_doc(docs, labels, tags, b, cf.doc_len, cf.tag_len, cf.batch_size, pad_id)
prob = sess.run(tm.sup_probs, {tm.doc:d, tm.label:y, tm.sup_mask: m, tm.tag: t})
pred = np.argmax(prob, axis=1)
accs.extend(pred[:num_docs] == y[:num_docs])
print "\ntest classification accuracy = %.3f" % np.mean(accs)
def print_corpus_stats(name, sents, docs, stats):
print name + ":"
print "\tno. of docs =", len(docs[0])
if len(sents[0]) > 0:
print "\ttopic model no. of sequences =", len(sents[0])
print "\ttopic model no. of tokens =", sum([ len(item[2])-1 for item in sents[0] ])
print "\toriginal doc mean len =", stats[3]
print "\toriginal doc max len =", stats[4]
print "\toriginal doc min len =", stats[5]
if len(sents[1]) > 0:
print "\tlanguage model no. of sequences =", len(sents[1])
print "\tlanguage model no. of tokens =", sum([ len(item[2])-1 for item in sents[1] ])
print "\toriginal sent mean len =", stats[0]
print "\toriginal sent max len =", stats[1]
print "\toriginal sent min len =", stats[2]
def MSE(self, responses):
mean = np.mean(responses, axis=0)
return np.mean((responses - mean) ** 2)
def make_leaf(self, responses):
self.leaf = np.mean(responses, axis=0)
def predict(self, point):
response = []
for i in range(self.ntrees):
response.append(self.trees[i].predict(point))
return np.mean(response, axis=0)
def normaliza(self, X):
correction = np.sqrt((len(X) - 1) / len(X)) # std factor corretion
mean_ = np.mean(X, 0)
scale_ = np.std(X, 0)
X = X - mean_
X = X / (scale_ * correction)
return X
def gof(self):
r2mean = np.mean(self.r2.T[self.endoexo()[0]].values)
AVEmean = self.AVE().copy()
totalblock = 0
for i in range(self.lenlatent):
block = self.data_[self.Variables['measurement']
[self.Variables['latent'] == self.latent[i]]]
block = len(block.columns.values)
totalblock += block
AVEmean[self.latent[i]] = AVEmean[self.latent[i]] * block
AVEmean = np.sum(AVEmean) / totalblock
return np.sqrt(AVEmean * r2mean)
def srmr(self):
srmr = (self.empirical() - self.implied())
srmr = np.sqrt(((srmr.values) ** 2).mean())
return srmr
def dataInfo(self):
sd_ = np.std(self.data, 0)
mean_ = np.mean(self.data, 0)
skew = scipy.stats.skew(self.data)
kurtosis = scipy.stats.kurtosis(self.data)
w = [scipy.stats.shapiro(self.data.ix[:, i])[0]
for i in range(len(self.data.columns))]
return [mean_, sd_, skew, kurtosis, w]
def htmt(self):
htmt_ = pd.DataFrame(pd.DataFrame.corr(self.data_),
index=self.manifests, columns=self.manifests)
mean = []
allBlocks = []
for i in range(self.lenlatent):
block_ = self.Variables['measurement'][
self.Variables['latent'] == self.latent[i]]
allBlocks.append(list(block_.values))
block = htmt_.ix[block_, block_]
mean_ = (block - np.diag(np.diag(block))).values
mean_[mean_ == 0] = np.nan
mean.append(np.nanmean(mean_))
comb = [[k, j] for k in range(self.lenlatent)
for j in range(self.lenlatent)]
comb_ = [(np.sqrt(mean[comb[i][1]] * mean[comb[i][0]]))
for i in range(self.lenlatent ** 2)]
comb__ = []
for i in range(self.lenlatent ** 2):
block = (htmt_.ix[allBlocks[comb[i][1]],
allBlocks[comb[i][0]]]).values
# block[block == 1] = np.nan
comb__.append(np.nanmean(block))
htmt__ = np.divide(comb__, comb_)
where_are_NaNs = np.isnan(htmt__)
htmt__[where_are_NaNs] = 0
htmt = pd.DataFrame(np.tril(htmt__.reshape(
(self.lenlatent, self.lenlatent)), k=-1), index=self.latent, columns=self.latent)
return htmt