Python numpy 模块,std() 实例源码
我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用numpy.std()。
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def clipped_linscale_img(img_array,
cap=255.0,
lomult=2.0,
himult=2.0):
'''
This clips the image between the values:
[median(img_array) - lomult*stdev(img_array),
median(img_array) + himult*stdev(img_array)]
and returns a linearly scaled image using the cap given.
'''
img_med, img_stdev = np.median(img_array), np.std(img_array)
clipped_linear_img = np.clip(img_array,
img_med-lomult*img_stdev,
img_med+himult*img_stdev)
return cap*clipped_linear_img/(img_med+himult*img_stdev)
def __SubDoWavelets(self,waveforms):
scales = 4
dimensions = 10
nspk,ls = waveforms.shape
cc = pywt.wavedec(waveforms,"haar",mode="symmetric",level=scales,axis=-1)
cc = np.hstack(cc)
sd = list()
for i in range(ls):
test_data = cc[:,i]
thr_dist = np.std(test_data,ddof=1)*3
thr_dist_min = np.mean(test_data)-thr_dist
thr_dist_max = np.mean(test_data)+thr_dist
aux = test_data[(test_data>thr_dist_min)&(test_data<thr_dist_max)]
if aux.size > 10:
sd.append(self.__test_ks(aux))
else:
sd.append(0)
ind = np.argsort(sd)
ind = ind[::-1]
coeff = ind[:dimensions]
waveletspk = cc[:,coeff]
return waveletspk
def encode_and_store(batch_x, output_dir, file_name):
"""
Args:
1. batch_x: Batch of 32*32 images which will go inside our autoencoder.
2. output_dir: Dir path for storing all encoded features for given `batch_x`.
Features will be stored in the form of JSON file.
3. file_name: File name of JSON file.
"""
global AUTO_ENCODER
if AUTO_ENCODER is None:
load_AE()
norm_batch = np.zeros(batch_x.shape)
for i in range(len(batch_x)):
norm_batch[i] = (batch_x[i] - np.mean(batch_x[i])) / np.std(batch_x[i])
output_dict = {
'name' : file_name,
'encoded': AUTO_ENCODER.transform(norm_batch).tolist()}
with open(output_dir+file_name+'.json', 'w') as f:
json.dump(output_dict, f)
def get_color_medio(self, roi, a,b,imprimir = False):
xl,yl,ch = roi.shape
roiyuv = cv2.cvtColor(roi,cv2.COLOR_RGB2YUV)
roihsv = cv2.cvtColor(roi,cv2.COLOR_RGB2HSV)
h,s,v=cv2.split(roihsv)
mask=(h<5)
h[mask]=200
roihsv = cv2.merge((h,s,v))
std = np.std(roiyuv.reshape(xl*yl,3),axis=0)
media = np.mean(roihsv.reshape(xl*yl,3), axis=0)-60
mediayuv = np.mean(roiyuv.reshape(xl*yl,3), axis=0)
if std[0]<12 and std[1]<12 and std[2]<12:
#if (std[0]<15 and std[2]<15) or ((media[0]>100 or media[0]<25) and (std[0]>10)):
media = np.mean(roihsv.reshape(xl*yl,3), axis=0)
# el amarillo tiene 65 de saturacion y sobre 200
if media[1]<60: #and (abs(media[0]-30)>10):
# blanco
return [-10,0,0]
else:
return media
else:
return None
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def fit(self, X_train, y_train, X_valid, y_valid, X_test, y_test, steps=400):
tf.global_variables_initializer().run()
redirect=FDRedirector(STDERR)
for i in range(steps):
redirect.start()
feed_dict = {self.labels:y_train}
for key, tensor in self.features.items():
feed_dict[tensor] = X_train[key]
predictions, loss = sess.run([self.prediction, self.train_op], feed_dict=feed_dict)
if i % 10 == 0:
print("step:{} loss:{:.3g} np.std(predictions):{:.3g}".format(i, loss, np.std(predictions)))
self.threshold = float(min(self.threshold_from_data(X_valid, y_valid), self.threshold_from_data(X_train, y_train)))
tf.get_collection_ref("threshold")[0] = self.threshold
self.print_metrics(X_train, y_train, "Training")
self.print_metrics(X_valid, y_valid, "Validation")
errors = redirect.stop()
if errors:
print(errors)
self.print_metrics(X_test, y_test, "Test")
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def information_ratio(algorithm_returns, benchmark_returns):
"""
http://en.wikipedia.org/wiki/Information_ratio
Args:
algorithm_returns (np.array-like):
All returns during algorithm lifetime.
benchmark_returns (np.array-like):
All benchmark returns during algo lifetime.
Returns:
float. Information ratio.
"""
relative_returns = algorithm_returns - benchmark_returns
relative_deviation = relative_returns.std(ddof=1)
if zp_math.tolerant_equals(relative_deviation, 0) or \
np.isnan(relative_deviation):
return 0.0
return np.mean(relative_returns) / relative_deviation
def _normalise_data(self):
self.train_x_mean = np.zeros(self.input_dim)
self.train_x_std = np.ones(self.input_dim)
self.train_y_mean = np.zeros(self.output_dim)
self.train_y_std = np.ones(self.output_dim)
if self.normalise_data:
self.train_x_mean = np.mean(self.train_x, axis=0)
self.train_x_std = np.std(self.train_x, axis=0)
self.train_x_std[self.train_x_std == 0] = 1.
self.train_x = (self.train_x - np.full(self.train_x.shape, self.train_x_mean, dtype=np.float32)) / \
np.full(self.train_x.shape, self.train_x_std, dtype=np.float32)
self.test_x = (self.test_x - np.full(self.test_x.shape, self.train_x_mean, dtype=np.float32)) / \
np.full(self.test_x.shape, self.train_x_std, dtype=np.float32)
self.train_y_mean = np.mean(self.train_y, axis=0)
self.train_y_std = np.std(self.train_y, axis=0)
if self.train_y_std == 0:
self.train_y_std[self.train_y_std == 0] = 1.
self.train_y = (self.train_y - self.train_y_mean) / self.train_y_std
def test(self, input_path, output_path):
if not self.load()[0]:
raise Exception("No model is found, please train first")
mean, std = self.sess.run([self.mean, self.std])
images = np.empty((1, self.im_size[0], self.im_size[1], self.im_size[2], 1), dtype=np.float32)
#labels = np.empty((1, self.im_size[0], self.im_size[1], self.im_size[2], self.nclass), dtype=np.float32)
for f in input_path:
images[0, ..., 0], read_info = read_testing_inputs(f, self.roi[0], self.im_size, output_path)
probs = self.sess.run(self.probs, feed_dict = { self.images: (images - mean) / std,
self.is_training: True,
self.keep_prob: 1 })
#print(self.roi[1] + os.path.basename(f) + ":" + str(dice))
output_file = os.path.join(output_path, self.roi[1] + '_' + os.path.basename(f))
f_h5 = h5py.File(output_file, 'w')
if self.roi[0] < 0:
f_h5['predictions'] = restore_labels(np.argmax(probs[0], 3), self.roi[0], read_info)
else:
f_h5['probs'] = restore_labels(probs[0, ..., 1], self.roi[0], read_info)
f_h5.close()
def lowpass_random(n_samples, cutoff, n_dim=None, rng = None, normalize = False, slope=0):
"""
Return a random lowpass-filtered signal.
:param n_samples:
:param cutoff:
:param rng:
:return:
"""
rng = get_rng(rng)
assert 0<=cutoff<=1, "Cutoff must be in the range 0 (pure DC) to 1 (sample frequency)"
base_signal = rng.randn(n_samples) if n_dim is None else rng.randn(n_samples, n_dim)
lowpass_signal = lowpass(base_signal, cutoff)
if normalize:
lowpass_signal = lowpass_signal/np.std(lowpass_signal)
if slope != 0:
ramp = slope*np.arange(len(lowpass_signal))
lowpass_signal = lowpass_signal+(ramp if n_dim is None else ramp[:, None])
return lowpass_signal
def __test_ks(self,x):
x = x[~np.isnan(x)]
n = x.size
x.sort()
yCDF = np.arange(1,n+1)/float(n)
notdup = np.hstack([np.diff(x,1),[1]])
notdup = notdup>0
x_expcdf = x[notdup]
y_expcdf = np.hstack([[0],yCDF[notdup]])
zScores = (x_expcdf-np.mean(x))/np.std(x,ddof=1);
mu = 0
sigma = 1
theocdf = 0.5*erfc(-(zScores-mu)/(np.sqrt(2)*sigma))
delta1 = y_expcdf[:-1]-theocdf
delta2 = y_expcdf[1:]-theocdf
deltacdf = np.abs(np.hstack([delta1,delta2]))
KSmax = deltacdf.max()
return KSmax
def __SubDoWavelets(self,waveforms):
scales = 4
dimensions = 10
nspk,ls = waveforms.shape
cc = pywt.wavedec(waveforms,"haar",mode="symmetric",level=scales,axis=-1)
cc = np.hstack(cc)
sd = list()
for i in range(ls):
test_data = cc[:,i]
thr_dist = np.std(test_data,ddof=1)*3
thr_dist_min = np.mean(test_data)-thr_dist
thr_dist_max = np.mean(test_data)+thr_dist
aux = test_data[(test_data>thr_dist_min)&(test_data<thr_dist_max)]
if aux.size > 10:
sd.append(self.__test_ks(aux))
else:
sd.append(0)
ind = np.argsort(sd)
ind = ind[::-1]
coeff = ind[:dimensions]
waveletspk = cc[:,coeff]
return waveletspk
def __test_ks(self,x):
x = x[~np.isnan(x)]
n = x.size
x.sort()
yCDF = np.arange(1,n+1)/float(n)
notdup = np.hstack([np.diff(x,1),[1]])
notdup = notdup>0
x_expcdf = x[notdup]
y_expcdf = np.hstack([[0],yCDF[notdup]])
zScores = (x_expcdf-np.mean(x))/np.std(x,ddof=1);
mu = 0
sigma = 1
theocdf = 0.5*erfc(-(zScores-mu)/(np.sqrt(2)*sigma))
delta1 = y_expcdf[:-1]-theocdf
delta2 = y_expcdf[1:]-theocdf
deltacdf = np.abs(np.hstack([delta1,delta2]))
KSmax = deltacdf.max()
return KSmax
def zscore(x):
"""Computes the Z-score of a vector x. Removes the mean and divides by the
standard deviation. Has a failback if std is 0 to return all zeroes.
Parameters
----------
x: list of int
Input time-series
Returns
-------
z: list of float
Z-score normalized time-series
"""
mean = np.mean(x)
sd = np.std(x)
if sd == 0:
z = np.zeros_like(x)
else:
z = (x - mean)/sd
return z
def process(dic, p, s = 0, normalize = 1.0):
#x = [5000, 10000, 20000, 40000, 80000, 150000]
#x = [1000, 5000, 10000]
a = ['vs_true', 'vs_false', 'tc', 'mv']
data = {}
for algo in a:
y = zip(*dic[(p, algo)])[s]
m = np.mean(y)
sd = np.std(y)
print p, algo, "%.4f" % (m/normalize) #, "%.2f" % sd
data[algo] = np.asarray(y) * 1.0 / normalize
#print data[algo]
#print data['mv']
print 'vsfalse', scipy.stats.ttest_1samp(data['tc'] - data['vs_false'], 0)
print 'tc', scipy.stats.ttest_1samp(data['tc'] - data['vs_true'], 0)
print 'mv', scipy.stats.ttest_1samp(data['mv'] - data['vs_true'], 0)
def mean_variance_normalisation(h5f, mvn_h5f, vad=None):
"""Do mean variance normlization. Optionnaly use a vad.
Parameters:
----------
h5f: str. h5features file name
mvn_h5f: str, h5features output name
"""
dset = h5py.File(h5f).keys()[0]
if vad is not None:
raise NotImplementedError
else:
data = h5py.File(h5f)[dset]['features'][:]
features = data
epsilon = np.finfo(data.dtype).eps
mean = np.mean(data)
std = np.std(data)
mvn_features = (features - mean) / (std + epsilon)
shutil.copy(h5f, mvn_h5f)
h5py.File(mvn_h5f)[dset]['features'][:] = mvn_features
def update_summary(
var_up,
var,
start,
end,
):
diff = np.abs(var_up - var)
reldiff = diff / var
# filter out nan's
try:
reldiff = reldiff[~np.isnan(reldiff)]
except:
pass
return (np.mean(diff), np.std(diff), np.mean(reldiff),
np.std(reldiff), (end - start).microseconds)
def apply_metric_results_macro_average(results, metric,
print_full_result=False):
for method in results.keys():
max_train = max(results[method].keys())
for train_perc in sorted(results[method].keys()):
samples = len(results[method][train_perc])
if print_full_result:
print ':'.join(map(str, [train_perc, method])) + ',' \
+ ','.join(map(lambda x: '{:.2f}'.format(x),
[metric(a, b, train_perc=train_perc,
max_train=max_train) for (a, b) in
results[method][train_perc]]))
metric_val = ' '.join(map(str, ['%.2f' % np.mean([metric(a,
b, train_perc=train_perc,
max_train=max_train) for (a, b) in
results[method][train_perc]]), "\pm",
'%.2f' % np.std([metric(a, b,
train_perc=train_perc,
max_train=max_train) for (a, b) in
results[method][train_perc]])]))
results[method][train_perc] = (metric_val, samples)
def test_bootstrap_replicate_1d(data, seed):
np.random.seed(seed)
x = dcst.bootstrap_replicate_1d(data, np.mean)
np.random.seed(seed)
x_correct = original.bootstrap_replicate_1d(data[~np.isnan(data)], np.mean)
assert (np.isnan(x) and np.isnan(x_correct, atol=atol, equal_nan=True)) \
or np.isclose(x, x_correct, atol=atol, equal_nan=True)
np.random.seed(seed)
x = dcst.bootstrap_replicate_1d(data, np.median)
np.random.seed(seed)
x_correct = original.bootstrap_replicate_1d(data[~np.isnan(data)], np.median)
assert (np.isnan(x) and np.isnan(x_correct, atol=atol, equal_nan=True)) \
or np.isclose(x, x_correct, atol=atol, equal_nan=True)
np.random.seed(seed)
x = dcst.bootstrap_replicate_1d(data, np.std)
np.random.seed(seed)
x_correct = original.bootstrap_replicate_1d(data[~np.isnan(data)], np.std)
assert (np.isnan(x) and np.isnan(x_correct, atol=atol, equal_nan=True)) \
or np.isclose(x, x_correct, atol=atol, equal_nan=True)
def inspect(self, output = True):
''' short function that returns the image values: mean,
standard deviation, max, min and size of image
if output is True, it prints to the console the string containing the
formatted value
'''
m = np.mean(self.data)
s = np.std(self.data)
u = np.max(self.data)
l = np.min(self.data)
d = self.data.shape
if output:
s = "Mean: {0:.2f} | Std: {1:.2f} | Max: {2:.2f}|Min: {3:.2f} | \
Dim: {4[0]}x{4[1]}".format(m, s, u, l, d)
print(s)
return s
return (m, s, u, l, d)
def csvwrite(_imagefile, _feature_data, write_dir):
print("Writing FEATURE.CSV file...")
feature_file = os.path.splitext(_imagefile)[0]
feature_file = feature_file.replace("IR", "Features")
name = feature_file + '.csv';
with open(name, 'w') as csvfile:
fieldnames = ['mean_value', 'euler_number', 'major_axis', 'area', 'solidity', 'std', 'eccentricity',
'eq_diameter', 'minor_axis']
fieldnames.extend(getHistFeatureKeys())
writer = csv.DictWriter(csvfile, fieldnames=fieldnames);
writer.writeheader()
for cluster in _feature_data:
data = {key:value for key, value in cluster.items() if key in fieldnames}
writer.writerow(data)
print write_dir
os.rename(name, write_dir + "\\" + "output.csv")
#copy2(outpu, _junk)
#os.rename(_junk, "output.csv")
print("FEATURE.CSV file is Written")
def updatePlot(self, data):
""" Update the plot """
plt.figure(self.fig.number)
#assert (data.shape[1] == self.nbCh), 'new data does not have the same number of channels'
#assert (data.shape[0] == self.nbPoints), 'new data does not have the same number of points'
data = data - np.mean(data,axis=0)
std_data = np.std(data,axis=0)
std_data[np.where(std_data == 0)] = 1
data = data/std_data*self.chRange/5.0
for i, chName in enumerate(self.chNames):
self.chLinesDict[chName].set_ydata(data[:,i]+self.offsets[i])
plt.draw()
def normalise_images(X):
'''
Helper for making the images zero mean and unit standard deviation i.e. `white`
'''
X_white = np.zeros(X.shape, dtype=np.float32)
for ii in range(X.shape[0]):
Xc = X[ii,:,:,:]
mc = Xc.mean()
sc = Xc.std()
Xc_white = np.divide((Xc - mc), sc)
X_white[ii,:,:,:] = Xc_white
return X_white.astype(np.float32)
def get_tm_opp(pts1, pts2):
# Transformation matrix - ( Translation + Scaling + Rotation )
# using Procuster analysis
pts1 = np.float64(pts1)
pts2 = np.float64(pts2)
m1 = np.mean(pts1, axis = 0)
m2 = np.mean(pts2, axis = 0)
# Removing translation
pts1 -= m1
pts2 -= m2
std1 = np.std(pts1)
std2 = np.std(pts2)
std_r = std2/std1
# Removing scaling
pts1 /= std1
pts2 /= std2
U, S, V = np.linalg.svd(np.transpose(pts1) * pts2)
# Finding the rotation matrix
R = np.transpose(U * V)
return np.vstack([np.hstack((std_r * R,
np.transpose(m2) - std_r * R * np.transpose(m1))), np.matrix([0.0, 0.0, 1.0])])
def normaliza(self, X):
correction = np.sqrt((len(X) - 1) / len(X)) # std factor corretion
mean_ = np.mean(X, 0)
scale_ = np.std(X, 0)
X = X - mean_
X = X / (scale_ * correction)
return X
def dataInfo(self):
sd_ = np.std(self.data, 0)
mean_ = np.mean(self.data, 0)
skew = scipy.stats.skew(self.data)
kurtosis = scipy.stats.kurtosis(self.data)
w = [scipy.stats.shapiro(self.data.ix[:, i])[0]
for i in range(len(self.data.columns))]
return [mean_, sd_, skew, kurtosis, w]
def normaliza(X):
mean_ = np.mean(X, 0)
scale_ = np.std(X, 0)
X = X - mean_
X = X / (scale_)
return X
# FOC = preditors (X)
# HOC = response (Y)
# T as scores
def bench_on(runner, sym, Ns, trials, dtype=None):
global args, kernel, out, mkl_layer
prepare = globals().get("prepare_"+sym, prepare_default)
kernel = globals().get("kernel_"+sym, None)
if not kernel:
kernel = getattr(np.linalg, sym)
out_lvl = runner.__doc__.split('.')[0].strip()
func_s = kernel.__doc__.split('.')[0].strip()
log.debug('Preparing input data for %s (%s).. ' % (sym, func_s))
args = [prepare(int(i)) for i in Ns]
it = range(len(Ns))
# pprint(Ns)
out = np.empty(shape=(len(Ns), trials))
b = body(trials)
tic, toc = (0, 0)
log.debug('Warming up %s (%s).. ' % (sym, func_s))
runner(range(1000), empty_work)
kernel(*args[0])
runner(range(1000), empty_work)
log.debug('Benchmarking %s on %s: ' % (func_s, out_lvl))
gc_old = gc.isenabled()
# gc.disable()
tic = time.time()
runner(it, b)
toc = time.time() - tic
if gc_old:
gc.enable()
if 'reused_pool' in globals():
del globals()['reused_pool']
#calculate average time and min time and also keep track of outliers (max time in the loop)
min_time = np.amin(out)
max_time = np.amax(out)
mean_time = np.mean(out)
stdev_time = np.std(out)
#print("Min = %.5f, Max = %.5f, Mean = %.5f, stdev = %.5f " % (min_time, max_time, mean_time, stdev_time))
#final_times = [min_time, max_time, mean_time, stdev_time]
print('## %s: Outter:%s, Inner:%s, Wall seconds:%f\n' % (sym, out_lvl, mkl_layer, float(toc)))
return out
def prewhiten(x):
mean = np.mean(x)
std = np.std(x)
std_adj = np.maximum(std, 1.0/np.sqrt(x.size))
y = np.multiply(np.subtract(x, mean), 1/std_adj)
return y
def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
assert(embeddings1.shape[0] == embeddings2.shape[0])
assert(embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = KFold(n_splits=nrof_folds, shuffle=False)
val = np.zeros(nrof_folds)
far = np.zeros(nrof_folds)
diff = np.subtract(embeddings1, embeddings2)
dist = np.sum(np.square(diff),1)
indices = np.arange(nrof_pairs)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
# Find the threshold that gives FAR = far_target
far_train = np.zeros(nrof_thresholds)
for threshold_idx, threshold in enumerate(thresholds):
_, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
if np.max(far_train)>=far_target:
f = interpolate.interp1d(far_train, thresholds, kind='slinear')
threshold = f(far_target)
else:
threshold = 0.0
val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
val_mean = np.mean(val)
far_mean = np.mean(far)
val_std = np.std(val)
return val_mean, val_std, far_mean
def std_normalize(batch):
norm_batch = np.zeros(batch.shape)
for i in range(len(batch)):
norm_batch[i] = (batch[i] - np.mean(batch[i])) / np.std(batch[i])
return norm_batch
# Argument parser. This script expects 2 necessory positional args.
def standardize(self, x):
if self.preprocessing_function:
x = self.preprocessing_function(x)
if self.rescale:
x *= self.rescale
# x is a single image, so it doesn't have image number at index 0
img_channel_axis = self.channel_axis - 1
if self.samplewise_center:
x -= np.mean(x, axis=img_channel_axis, keepdims=True)
if self.samplewise_std_normalization:
x /= (np.std(x, axis=img_channel_axis, keepdims=True) + 1e-7)
if self.featurewise_center:
if self.mean is not None:
x -= self.mean
else:
warnings.warn('This ImageDataGenerator specifies '
'`featurewise_center`, but it hasn\'t'
'been fit on any training data. Fit it '
'first by calling `.fit(numpy_data)`.')
if self.featurewise_std_normalization:
if self.std is not None:
x /= (self.std + 1e-7)
else:
warnings.warn('This ImageDataGenerator specifies '
'`featurewise_std_normalization`, but it hasn\'t'
'been fit on any training data. Fit it '
'first by calling `.fit(numpy_data)`.')
if self.zca_whitening:
if self.principal_components is not None:
flatx = np.reshape(x, (x.size))
whitex = np.dot(flatx, self.principal_components)
x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2]))
else:
warnings.warn('This ImageDataGenerator specifies '
'`zca_whitening`, but it hasn\'t'
'been fit on any training data. Fit it '
'first by calling `.fit(numpy_data)`.')
return x
def get_regression_data(name, split, data_path=data_path):
path = '{}{}.csv'.format(data_path, name)
if not os.path.isfile(path):
download(name +'.csv', data_path=data_path)
data = pandas.read_csv(path, header=None).values
if name in ['energy', 'naval']:
# there are two Ys for these, but take only the first
X_full = data[:, :-2]
Y_full = data[:, -2]
else:
X_full = data[:, :-1]
Y_full = data[:, -1]
X, Y, Xs, Ys = make_split(X_full, Y_full, split)
############# whiten inputs
X_mean, X_std = np.average(X, 0), np.std(X, 0)+1e-6
X = (X - X_mean)/X_std
Xs = (Xs - X_mean)/X_std
return X, Y[:, None], Xs, Ys[:, None]
def test_against_numpy_std(self):
stream = [np.random.random((16, 7, 3)) for _ in range(10)]
stack = np.stack(stream, axis = -1)
with catch_warnings():
simplefilter('ignore')
for axis in (0, 1, 2, None):
for ddof in range(4):
with self.subTest('axis = {}, ddof = {}'.format(axis, ddof)):
from_numpy = np.std(stack, axis = axis, ddof = ddof)
from_ivar = last(istd(stream, axis = axis, ddof = ddof))
self.assertSequenceEqual(from_numpy.shape, from_ivar.shape)
self.assertTrue(np.allclose(from_ivar, from_numpy))
def calculate_volatility(self, daily_returns):
if len(daily_returns) <= 1:
return 0.0
return np.std(daily_returns, ddof=1) * math.sqrt(252)
def calculate_volatility(self, daily_returns):
return np.std(daily_returns, ddof=1) * math.sqrt(self.num_trading_days)
def downside_risk(algorithm_returns, mean_returns, normalization_factor):
rets = algorithm_returns.round(8)
mar = mean_returns.round(8)
mask = rets < mar
downside_diff = rets[mask] - mar[mask]
if len(downside_diff) <= 1:
return 0.0
return np.std(downside_diff, ddof=1) * math.sqrt(normalization_factor)
def test_stddev(context, data):
"""
Tests the stddev transform by manually keeping track of the prices
in a naiive way and asserting that our stddev is the same.
This accounts for the corrected ddof.
"""
mins = sum(context.mins_for_days[-context.days:])
for sid in data:
assert_allclose(
data[sid].stddev(context.days),
np.std(context.price_bars[sid][-mins:], ddof=1),
)
def summarize_bootstrapped_top_n(top_n_boot):
top_n_bcs_mean = np.mean(top_n_boot)
top_n_bcs_sd = np.std(top_n_boot)
top_n_bcs_var = np.var(top_n_boot)
result = {}
result['filtered_bcs_var'] = top_n_bcs_var
result['filtered_bcs_cv'] = tk_stats.robust_divide(top_n_bcs_sd, top_n_bcs_mean)
result['filtered_bcs_lb'] = round(scipy.stats.norm.ppf(0.025, top_n_bcs_mean, top_n_bcs_sd))
result['filtered_bcs_ub'] = round(scipy.stats.norm.ppf(0.975, top_n_bcs_mean, top_n_bcs_sd))
result['filtered_bcs'] = round(top_n_bcs_mean)
return result
def __compute_bnn_training_error(self):
"""Compute BNN training error on most recent episode."""
exp = np.reshape(self.episode_buffer_bnn, (len(self.episode_buffer_bnn),-1))
episode_X = np.array([np.hstack([exp[tt,0],exp[tt,1]]) for tt in xrange(exp.shape[0])])
episode_Y = np.array([exp[tt,3] for tt in xrange(exp.shape[0])])
if self.state_diffs:
# subtract previous state
episode_Y -= episode_X[:,:self.num_dims]
l2_errors = self.network.get_td_error(np.hstack([episode_X, np.tile(self.weight_set, (episode_X.shape[0],1))]), episode_Y, 0.0, 1.0)
self.mean_episode_errors[self.instance_iter,self.episode_iter] = np.mean(l2_errors)
self.std_episode_errors[self.instance_iter,self.episode_iter] = np.std(l2_errors)
if self.print_output:
print('BNN Error: {}'.format(self.mean_episode_errors[self.instance_iter,self.episode_iter]))
def __repr__(self):
s = f'Sampler: {self.sampler_type}\n'
s += f'Train size: {self.train_size}\n'
s += f'Test size: {self.test_size}\n'
s += f'Normalise: {self.normalise_data}\n'
s += f'X: mean={self.train_x_mean}, std={self.train_x_std}\n'
s += f'Y: mean={self.train_y_mean}, std={self.train_y_std}\n'
return s