Python numpy 模块,count_nonzero() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.count_nonzero()。
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def action_label_counts(directory, data_loader, n_actions=18, n=None):
episode_paths = frame.episode_paths(directory)
label_counts = [0, 0]
action_label_counts = [[0, 0] for i in range(n_actions)]
if n is not None:
np.random.shuffle(episode_paths)
episode_paths = episode_paths[:n]
for episode_path in tqdm.tqdm(episode_paths):
try:
features, labels = data_loader.load_features_and_labels([episode_path])
except:
traceback.print_exc()
else:
for label in range(len(label_counts)):
label_counts[label] += np.count_nonzero(labels == label)
for action in range(n_actions):
actions = np.reshape(np.array(features["action"]), [-1])
action_label_counts[action][label] += np.count_nonzero(
np.logical_and(labels == label, actions == action))
return label_counts, action_label_counts
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def _set_seq_qual_metrics(self, seq, qual, seq_type, cache):
cache.seq_types.add(seq_type)
qvs = tk_fasta.get_qvs(qual)
num_bases_q30 = np.count_nonzero(qvs >= 30)
# Don't count no-calls towards Q30 denominator.
# Assume no-calls get Q <= 2
num_bases_called = np.count_nonzero(qvs > 2)
num_bases = len(seq)
num_bases_n = seq.count('N')
cache.total_bases[seq_type] += num_bases
cache.called_bases[seq_type] += num_bases_called
cache.q30_bases[seq_type] += num_bases_q30
cache.n_bases[seq_type] += num_bases_n
def test_fill_missing():
info = CloudVolume.create_new_info(
num_channels=1, # Increase this number when we add more tests for RGB
layer_type='image',
data_type='uint8',
encoding='raw',
resolution=[ 1,1,1 ],
voxel_offset=[0,0,0],
volume_size=[128,128,64],
mesh='mesh',
chunk_size=[ 64,64,64 ],
)
vol = CloudVolume('file:///tmp/cloudvolume/empty_volume', mip=0, info=info)
vol.commit_info()
vol = CloudVolume('file:///tmp/cloudvolume/empty_volume', mip=0, fill_missing=True)
assert np.count_nonzero(vol[:]) == 0
vol = CloudVolume('file:///tmp/cloudvolume/empty_volume', mip=0, fill_missing=True, cache=True)
assert np.count_nonzero(vol[:]) == 0
assert np.count_nonzero(vol[:]) == 0
vol.flush_cache()
delete_layer('/tmp/cloudvolume/empty_volume')
def _constrained_sum_sample_pos(n, total):
# in this setting, there will be no empty groups generated by this function
n = int(n)
total = int(total)
normalized_list = [int(total) + 1]
while sum(normalized_list) > total and np.greater_equal(normalized_list, np.zeros(n)).all():
indicator = True
while indicator:
normalized_list = list(map(round, map(lambda x: x * total, np.random.dirichlet(np.ones(n), 1).tolist()[0])))
normalized_list = list(map(int, normalized_list))
indicator = len(normalized_list) - np.count_nonzero(normalized_list) != 0
sum_ = 0
for ind, q in enumerate(normalized_list):
if ind < len(normalized_list) - 1:
sum_ += q
# TODO: there is a bug here; sometimes it assigns -1 to the end of the array, but pass the while condition
normalized_list[len(normalized_list) - 1] = abs(total - sum_)
assert sum(normalized_list) == total, "ERROR: the constrainedSumSamplePos-sampled list does not sum to #edges."
return map(str, normalized_list)
def calculateCoM(self, dpt):
"""
Calculate the center of mass
:param dpt: depth image
:return: (x,y,z) center of mass
"""
dc = dpt.copy()
dc[dc < self.minDepth] = 0
dc[dc > self.maxDepth] = 0
cc = ndimage.measurements.center_of_mass(dc > 0)
num = numpy.count_nonzero(dc)
com = numpy.array((cc[1]*num, cc[0]*num, dc.sum()), numpy.float)
if num == 0:
return numpy.array((0, 0, 0), numpy.float)
else:
return com/num
def compute_test_accuracy(X_test, Y_test, model, prediction_type, cellgroup_map_array):
prediction = model.predict(X_test)
auc = []
if prediction_type=="cellgroup":
prediction = np.dot(prediction, cellgroup_map_array)
Y_test = np.dot(Y_test, cellgroup_map_array)
mask = ~np.logical_or(Y_test.sum(1)==0, Y_test.sum(1)==Y_test.shape[1])
for y,pred in zip(Y_test.T,prediction.T):
pos = np.logical_and(mask, y==1)
neg = np.logical_and(mask, y==0)
try:
U = stats.mannwhitneyu(pred[pos], pred[neg])[0]
auc.append(1.-U/(np.count_nonzero(pos)*np.count_nonzero(neg)))
except ValueError:
auc.append(0.5)
return auc
def aePredict(self, graph):
self.initCG()
graph = graph.cleaned()
carriers = self.getLSTMFeatures(graph.nodes)
beamconf = AEBeamConfiguration(len(graph.nodes), 1, np.array(graph.heads), self.stack_features, self.buffer_features)
beamconf.initconf(0, self.root_first)
while not beamconf.isComplete(0):
valid = beamconf.validTransitions(0)
if np.count_nonzero(valid) < 1:
break
scores, exprs = self._aeEvaluate(beamconf.extractFeatures(0), carriers)
best, bestscore = max(((i, s) for i, s in enumerate(scores) if valid[i]), key=itemgetter(1))
beamconf.makeTransition(0, best)
graph.heads = [i if i > 0 else 0 for i in list(beamconf.getHeads(0))]
return graph
def test_nonzero_twodim(self):
x = np.array([[0, 1, 0], [2, 0, 3]])
assert_equal(np.count_nonzero(x), 3)
assert_equal(np.nonzero(x), ([0, 1, 1], [1, 0, 2]))
x = np.eye(3)
assert_equal(np.count_nonzero(x), 3)
assert_equal(np.nonzero(x), ([0, 1, 2], [0, 1, 2]))
x = np.array([[(0, 1), (0, 0), (1, 11)],
[(1, 1), (1, 0), (0, 0)],
[(0, 0), (1, 5), (0, 1)]], dtype=[('a', 'f4'), ('b', 'u1')])
assert_equal(np.count_nonzero(x['a']), 4)
assert_equal(np.count_nonzero(x['b']), 5)
assert_equal(np.nonzero(x['a']), ([0, 1, 1, 2], [2, 0, 1, 1]))
assert_equal(np.nonzero(x['b']), ([0, 0, 1, 2, 2], [0, 2, 0, 1, 2]))
assert_(not x['a'].T.flags.aligned)
assert_equal(np.count_nonzero(x['a'].T), 4)
assert_equal(np.count_nonzero(x['b'].T), 5)
assert_equal(np.nonzero(x['a'].T), ([0, 1, 1, 2], [1, 1, 2, 0]))
assert_equal(np.nonzero(x['b'].T), ([0, 0, 1, 2, 2], [0, 1, 2, 0, 2]))
def test_zeros(self):
types = np.typecodes['AllInteger'] + np.typecodes['AllFloat']
for dt in types:
d = np.zeros((13,), dtype=dt)
assert_equal(np.count_nonzero(d), 0)
# true for ieee floats
assert_equal(d.sum(), 0)
assert_(not d.any())
d = np.zeros(2, dtype='(2,4)i4')
assert_equal(np.count_nonzero(d), 0)
assert_equal(d.sum(), 0)
assert_(not d.any())
d = np.zeros(2, dtype='4i4')
assert_equal(np.count_nonzero(d), 0)
assert_equal(d.sum(), 0)
assert_(not d.any())
d = np.zeros(2, dtype='(2,4)i4, (2,4)i4')
assert_equal(np.count_nonzero(d), 0)
def decode(self, vec, pretty=False, strict=True):
# TODO: Whether we should use 'strict' mode depends on whether the model
# we got this vector from does softmax sampling of visibles. Anywhere this
# is called on fantasy samples, we should use the model to set this param.
if issparse(vec):
vec = vec.toarray().reshape(-1)
assert vec.shape == (self.nchars * self.maxlen,)
chars = []
for position_index in range(self.maxlen):
# Hack - insert a tab between name parts in binomial mode
if isinstance(self, BinomialShortTextCodec) and pretty and position_index == self.maxlen/2:
chars.append('\t')
subarr = vec[position_index * self.nchars:(position_index + 1) * self.nchars]
if np.count_nonzero(subarr) != 1 and strict:
char = self.MYSTERY
else:
char_index = np.argmax(subarr)
char = self.alphabet[char_index]
if pretty and char == self.FILLER:
# Hack
char = ' ' if isinstance(self, BinomialShortTextCodec) else ''
chars.append(char)
return ''.join(chars)
def count_per_month(career_months_array):
'''Month_Form
Returns number of employees remaining for each month (not retired).
Cumulative sum of career_months_array input (np array) that are
greater or equal to each incremental loop month number.
Note: alternate method to this function is value count of mnums:
df_actives_each_month = pd.DataFrame(df_idx.mnum.value_counts())
df_actives_each_month.columns = ['count']
input
career_months_array
output of career_months function. This input is an array
containing the number of months each employee will work until
retirement.
'''
max_career = career_months_array.max() + 1
emp_count_array = np.zeros(max_career)
for i in range(0, max_career):
emp_count_array[i] = np.count_nonzero(career_months_array >= i)
return emp_count_array.astype(int)
# GENERATE MONTH SKELETON
def describe_numeric_1d(series, **kwargs):
stats = {'mean': series.mean(), 'std': series.std(), 'variance': series.var(), 'min': series.min(),
'max': series.max()}
stats['range'] = stats['max'] - stats['min']
for x in np.array([0.05, 0.25, 0.5, 0.75, 0.95]):
stats[pretty_name(x)] = series.dropna().quantile(x) # The dropna() is a workaround for https://github.com/pydata/pandas/issues/13098
stats['iqr'] = stats['75%'] - stats['25%']
stats['kurtosis'] = series.kurt()
stats['skewness'] = series.skew()
stats['sum'] = series.sum()
stats['mad'] = series.mad()
stats['cv'] = stats['std'] / stats['mean'] if stats['mean'] else np.NaN
stats['type'] = "NUM"
stats['n_zeros'] = (len(series) - np.count_nonzero(series))
stats['p_zeros'] = stats['n_zeros'] / len(series)
# Histograms
stats['histogram'] = histogram(series, **kwargs)
stats['mini_histogram'] = mini_histogram(series, **kwargs)
return pd.Series(stats, name=series.name)
def get_symmetry_code_tri(pts):
if len(pts) == 1:
return '_s3()'
elif len(pts) == 3:
# Symmetry group [[a, a, b], [a, b, a], [b, a, a]].
# Find the equal value `a`.
tol = 1.0e-12
beta = pts[0] - pts[0][0]
ct = numpy.count_nonzero(abs(beta) < tol)
assert ct in [1, 2], beta
val = pts[0][0] if ct == 2 else pts[0][1]
return '_s21({:.15e})'.format(val)
# Symmetry group [[a, b, c], [c, a, b], ...].
assert len(pts) == 6
# Take the two largest value from a, b, c.
pt0 = numpy.sort(pts[0])
return '_s111({:.15e}, {:.15e})'.format(pt0[2], pt0[1])
def score(self):
'Return score from B perspective. If W is winning, score is negative.'
working_board = np.copy(self.board)
while EMPTY in working_board:
unassigned_spaces = np.where(working_board == EMPTY)
c = unassigned_spaces[0][0], unassigned_spaces[1][0]
territory, borders = find_reached(working_board, c)
border_colors = set(working_board[b] for b in borders)
X_border = BLACK in border_colors
O_border = WHITE in border_colors
if X_border and not O_border:
territory_color = BLACK
elif O_border and not X_border:
territory_color = WHITE
else:
territory_color = UNKNOWN # dame, or seki
place_stones(working_board, territory_color, territory)
return np.count_nonzero(working_board == BLACK) - np.count_nonzero(working_board == WHITE) - self.komi
def step(self):
"""
Half of the step of k-means
"""
if self.step_completed:
d = self.data.X
points = [d[self.clusters == i] for i in range(len(self.centroids))]
for i in range(len(self.centroids)):
c_points = points[i]
self.centroids[i, :] = (np.average(c_points, axis=0)
if len(c_points) > 0 else np.nan)
# reinitialize empty centroids
nan_c = np.isnan(self.centroids).any(axis=1)
if np.count_nonzero(nan_c) > 0:
self.centroids[nan_c] = self.random_positioning(
np.count_nonzero(nan_c))
self.centroids_moved = True
else:
self.clusters = self.find_clusters(self.centroids)
self.centroids_moved = False
self.step_no += 1
self.centroids_history = self.set_list(
self.centroids_history, self.step_no, np.copy(self.centroids))
def rmse(self, tid_counts):
error = np.zeros(shape=[self.cls_nb])
err_nb = 0
self._progress('\ntid \t true_count \t obs_count \t difference',
end='\n', verbosity=VERBOSITY.VERBOSE)
for tid in tid_counts:
true_counts = self.tid_counts[tid]
obs_counts = tid_counts[tid]
diff = np.asarray(true_counts) - np.asarray(obs_counts)
err_nb += np.count_nonzero(diff)
error += diff*diff
if diff.any():
self._progress('{} \t{} \t{} \t{}'.format(tid, true_counts, obs_counts, diff),
end='\n', verbosity=VERBOSITY.VERBOSE)
error /= len(tid_counts)
rmse = np.sqrt(error).sum() / self.cls_nb
error_fraction = err_nb / (len(tid_counts)* self.cls_nb)
return rmse, error_fraction
def test_rank_archimedean_spiral():
def archimedean_spiral(n_steps=100, max_radius=1.0, turns=4.0):
r = np.linspace(0.0, max_radius, n_steps)
angle = r * 2.0 * np.pi * turns / max_radius
x = r * np.cos(angle)
y = r * np.sin(angle)
return np.hstack((x[:, np.newaxis], y[:, np.newaxis])), r
X_train, r_train = archimedean_spiral(n_steps=100)
X_test, r_test = archimedean_spiral(n_steps=1000, max_radius=1.1)
rsvm = RankingSVM(random_state=0)
rsvm.fit(X_train)
y_train = rsvm.predict(X_train)
y_test = rsvm.predict(X_test)
assert_true(np.all(y_train[1:] < y_train[:-1]))
assert_greater(np.count_nonzero(y_test[1:] < y_test[:-1]), 970)
def ser(x, y):
"""Measure symbol error rate between symbols in x and y.
:param x: symbol array #1
:param y: symbol array #2
:returns: symbol error rate
>>> import arlpy
>>> arlpy.comms.ser([0,1,2,3], [0,1,2,2])
0.25
"""
x = _np.asarray(x, dtype=_np.int)
y = _np.asarray(y, dtype=_np.int)
n = _np.product(_np.shape(x))
e = _np.count_nonzero(x^y)
return float(e)/n
def tokenize(self, file_name):
"""Tokenizes the file and produces a dataset."""
lines = read_lines(file_name)
random.shuffle(lines)
unk = self.word_dict.get_idx('<unk>')
dataset, total, unks = [], 0, 0
for line in lines:
tokens = line.split()
input_idxs = self.context_dict.w2i(get_tag(tokens, 'input'))
word_idxs = self.word_dict.w2i(get_tag(tokens, 'dialogue'))
item_idxs = self.item_dict.w2i(get_tag(tokens, 'output'))
dataset.append((input_idxs, word_idxs, item_idxs))
# compute statistics
total += len(input_idxs) + len(word_idxs) + len(item_idxs)
unks += np.count_nonzero([idx == unk for idx in word_idxs])
if self.verbose:
print('dataset %s, total %d, unks %s, ratio %0.2f%%' % (
file_name, total, unks, 100. * unks / total))
return dataset
def polyfit_baseline(bands, intensities, poly_order=5, num_stdv=3.,
max_iter=200):
'''Iteratively fits a polynomial, discarding far away points as peaks.
Similar in spirit to ALS and related methods.
Automated method for subtraction of fluorescence from biological Raman spectra
Lieber & Mahadevan-Jansen 2003
'''
fit_pts = intensities.copy()
# precalculate [x^p, x^p-1, ..., x^1, x^0]
poly_terms = bands[:, None] ** np.arange(poly_order, -1, -1)
for _ in range(max_iter):
coefs = np.polyfit(bands, fit_pts.T, poly_order)
baseline = poly_terms.dot(coefs).T
diff = fit_pts - baseline
thresh = diff.std(axis=-1) * num_stdv
mask = diff > np.array(thresh, copy=False)[..., None]
unfitted = np.count_nonzero(mask)
if unfitted == 0:
break
fit_pts[mask] = baseline[mask] # these points are peaks, discard
else:
print("Warning: polyfit_baseline didn't converge in %d iters" % max_iter)
return baseline
def calculateCoM(self, dpt):
"""
Calculate the center of mass
:param dpt: depth image
:return: (x,y,z) center of mass
"""
dc = dpt.copy()
dc[dc < self.minDepth] = 0
dc[dc > self.maxDepth] = 0
cc = ndimage.measurements.center_of_mass(dc > 0)
num = numpy.count_nonzero(dc)
com = numpy.array((cc[1]*num, cc[0]*num, dc.sum()), numpy.float)
if num == 0:
return numpy.array((0, 0, 0), numpy.float)
else:
return com/num
def restore_shape(arry, step, r):
'''Reduces and adjust the shape and content of `arry` according to r.
Args:
arry: A 2d array with shape of [T, C]
step: An int. Overlapping span.
r: Reduction factor
Returns:
A 2d array with shape of [-1, C*r]
'''
T, C = arry.shape
sliced = np.split(arry, list(range(step, T, step)), axis=0)
started = False
for s in sliced:
if not started:
restored = np.vstack(np.split(s, r, axis=1))
started = True
else:
restored = np.vstack((restored, np.vstack(np.split(s, r, axis=1))))
# Trim zero paddings
restored = restored[:np.count_nonzero(restored.sum(axis=1))]
return restored
def get_index_first_non_zero_slice(self, dimension):
"""Get the index of the first non zero slice in this map.
Args:
dimension (int): the dimension to search in
Returns:
int: the slice index with the first non zero values.
"""
slice_index = [slice(None)] * (self.max_dimension() + 1)
if dimension > len(slice_index) - 1:
raise ValueError('The given dimension {} is not supported.'.format(dimension))
for index in range(self.shape[dimension]):
slice_index[dimension] = index
if np.count_nonzero(self.data[slice_index]) > 0:
return index
return 0
def test_get_mask():
chunk = test_get_chunks(n_chunks=1)[0]
distance = 3
n_side = 32
mask = get_mask(distance, chunk.shape, dims=(2, 1, 0))
n_side_shell = n_side - 2*distance
count_exp = 2*n_side_shell**2 + (n_side_shell - 1)*4*(n_side_shell - 2)
count_got = np.count_nonzero(mask)
print('DEBUG: non-zeros exp: {} | got: {}'.format(count_exp, count_got))
assert count_exp == count_got
distance = 5
n_side_shell = n_side - 2*distance
mask = get_mask(distance, chunk.shape, dims=(2, 1))
count_exp = (n_side_shell - 1)*4*n_side
count_got = np.count_nonzero(mask)
print('DEBUG: non-zeros exp: {} | got: {}'.format(count_exp, count_got))
assert count_exp == count_got
def get_padded_seq_lengths(padded):
"""Returns the number of (seq_len) non-nan elements per sequence.
:param padded: 2d or 3d tensor with dim 2 the time dimension
"""
if len(padded.shape) == 2:
# (n_seqs,n_timesteps)
seq_lengths = np.count_nonzero(~np.isnan(padded), axis=1)
elif len(padded.shape) == 3:
# (n_seqs,n_timesteps,n_features,..)
seq_lengths = np.count_nonzero(~np.isnan(padded[:, :, 0]), axis=1)
else:
print('not yet implemented')
# TODO
return seq_lengths
def import_data(data_csvs_in,
types_csv_in,
values_csv_in,
groups_csv_in,
dataset_out,
encoding='utf-8'):
"""Import a comma-delimited list of csv files into internal treecat format.
Common encodings include: utf-8, cp1252.
"""
schema = load_schema(types_csv_in, values_csv_in, groups_csv_in, encoding)
data = np.concatenate([
load_data(schema, data_csv_in, encoding)
for data_csv_in in data_csvs_in.split(',')
])
data.flags.writeable = False
print('Imported data shape: [{}, {}]'.format(data.shape[0], data.shape[1]))
ragged_index = schema['ragged_index']
for v, name in enumerate(schema['feature_names']):
beg, end = ragged_index[v:v + 2]
count = np.count_nonzero(data[:, beg:end].max(1))
if count == 0:
print('WARNING: No values found for feature {}'.format(name))
feature_types = [TY_MULTINOMIAL] * len(schema['feature_names'])
table = Table(feature_types, ragged_index, data)
dataset = {
'schema': schema,
'table': table,
}
pickle_dump(dataset, dataset_out)
def build_feature_files(base_directory,
new_directory,
data_loader,
n=None,
negative_example_keep_prob=1.0):
os.makedirs(new_directory, exist_ok=False)
episode_paths = frame.episode_paths(base_directory)
label_counts = [0, 0]
if n is not None:
np.random.shuffle(episode_paths)
episode_paths = episode_paths[:n]
for episode_path in tqdm.tqdm(episode_paths):
try:
features, labels = data_loader.load_features_and_labels([episode_path])
except:
traceback.print_exc()
else:
keep = np.logical_or(labels, (np.less(
np.random.rand(len(labels)), negative_example_keep_prob)))
labels = labels[keep]
for i in range(len(label_counts)):
label_counts[i] += np.count_nonzero(labels == i)
features = {k: v[keep] for k, v in features.items()}
new_path = path_relative_to_new_directory(base_directory, new_directory, episode_path,
".features")
os.makedirs(os.path.dirname(new_path), exist_ok=True)
with open(new_path, 'wb') as f:
pickle.dump((features, labels), f)
return label_counts
def threshold_from_data(self, X, y):
y_bool = y == 1. ## true if x is a catast
y_pred = self.predict_proba(X)
if np.count_nonzero(y) == 0:
return np.max(y_pred)
return np.min(y_pred[y_bool][:,1]) # TODO CHANGED FROM WILL CODE
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
"""Determines a threshold for classifying examples as positive
Args:
y: labels
y_pred: scores from the classifier
recall: Threshold is set to classify at least this fraction of positive
labelled examples as positive
false_positive_margin: Threshold is set to acheive desired recall, and
then is extended to include an additional fraction of negative
labelled examples equal to false_positive_margin (This allows adding
a buffer to the threshold while maintaining a constant "cost")
"""
n_positive = np.count_nonzero(y)
n_negative = len(y) - n_positive
if n_positive == 0:
return np.max(y_pred)
if false_positive_margin == 0 and recall == 1:
return np.min(y_pred[y])
ind = np.argsort(y_pred)
y_pred_sorted = y_pred[ind]
y_sorted = y[ind]
so_far = [0, 0]
j = 0
for i in reversed(range(len(y_sorted))):
so_far[y_sorted[i]] += 1
if so_far[1] >= int(np.floor(recall * n_positive)):
j = i
break
so_far = [0, 0]
if false_positive_margin == 0:
return y_pred_sorted[j]
k = 0
for i in reversed(range(j)):
so_far[y_sorted[i]] += 1
if so_far[0] >= false_positive_margin * n_negative:
k = i
break
return y_pred_sorted[k]
def predict_proba(self, features):
predictions = []
for classifier in self.classifiers:
predictions.append(classifier.predict_raw(features))
return np.count_nonzero(predictions)
def predict_proba_raw(self, obs=None, action=None):
predictions = []
for classifier in self.classifiers:
predictions.append(classifier.predict_raw(obs, action))
return np.count_nonzero(predictions)
def predict_raw(self, obs=None, action=None):
predictions = []
for classifier in self.classifiers:
predictions.append(classifier.predict_raw(obs, action))
return self.apply_threshold(np.count_nonzero(predictions))
def predict_raw_with_score(self, obs=None, action=None):
predictions = []
for classifier in self.classifiers:
predictions.append(classifier.predict_raw(obs, action))
return self.apply_threshold(np.count_nonzero(predictions)), np.count_nonzero(predictions)
def predict(self, features):
predictions = []
for classifier in self.classifiers:
predictions.append(classifier.predict(features))
return self.apply_threshold(np.count_nonzero(predictions))
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
"""Determines a threshold for classifying examples as positive
Args:
y: labels
y_pred: scores from the classifier
recall: Threshold is set to classify at least this fraction of positive
labelled examples as positive
false_positive_margin: Threshold is set to acheive desired recall, and
then is extended to include an additional fraction of negative
labelled examples equal to false_positive_margin (This allows adding
a buffer to the threshold while maintaining a constant "cost")
"""
n_positive = np.count_nonzero(y)
n_negative = len(y) - n_positive
if n_positive == 0:
return np.max(y_pred)
if false_positive_margin == 0 and recall == 1:
return np.min(y_pred[y])
ind = np.argsort(y_pred)
y_pred_sorted = y_pred[ind]
y_sorted = y[ind]
so_far = [0, 0]
j = 0
for i in reversed(range(len(y_sorted))):
so_far[y_sorted[i]] += 1
if so_far[1] >= int(np.floor(recall * n_positive)):
j = i
break
so_far = [0, 0]
if false_positive_margin == 0:
return y_pred_sorted[j]
k = 0
for i in reversed(range(j)):
so_far[y_sorted[i]] += 1
if so_far[0] >= false_positive_margin * n_negative:
k = i
break
return y_pred_sorted[k]
def predict_proba(self, features):
predictions = []
for classifier in self.classifiers:
predictions.append(classifier.predict_raw(features))
return np.count_nonzero(predictions)
def predict_proba_raw(self, obs=None, action=None):
predictions = []
for classifier in self.classifiers:
predictions.append(classifier.predict_raw(obs, action))
return np.count_nonzero(predictions)
def predict_raw_with_score(self, obs=None, action=None):
predictions = []
for classifier in self.classifiers:
predictions.append(classifier.predict_raw(obs, action))
return self.apply_threshold(np.count_nonzero(predictions)), np.count_nonzero(predictions)
def predict(self, features):
predictions = []
for classifier in self.classifiers:
predictions.append(classifier.predict(features))
return self.apply_threshold(np.count_nonzero(predictions))
def threshold_from_data(self, X, y):
y_bool = y == 1. ## true if x is a catast
y_pred = self.predict_proba(X)
if np.count_nonzero(y) == 0:
return np.max(y_pred)
return np.min(y_pred[y_bool][:,1]) # TODO CHANGED FROM WILL CODE
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
"""Determines a threshold for classifying examples as positive
Args:
y: labels
y_pred: scores from the classifier
recall: Threshold is set to classify at least this fraction of positive
labelled examples as positive
false_positive_margin: Threshold is set to acheive desired recall, and
then is extended to include an additional fraction of negative
labelled examples equal to false_positive_margin (This allows adding
a buffer to the threshold while maintaining a constant "cost")
"""
n_positive = np.count_nonzero(y)
n_negative = len(y) - n_positive
if n_positive == 0:
return np.max(y_pred)
if false_positive_margin == 0 and recall == 1:
return np.min(y_pred[y])
ind = np.argsort(y_pred)
y_pred_sorted = y_pred[ind]
y_sorted = y[ind]
so_far = [0, 0]
j = 0
for i in reversed(range(len(y_sorted))):
so_far[y_sorted[i]] += 1
if so_far[1] >= int(np.floor(recall * n_positive)):
j = i
break
so_far = [0, 0]
if false_positive_margin == 0:
return y_pred_sorted[j]
k = 0
for i in reversed(range(j)):
so_far[y_sorted[i]] += 1
if so_far[0] >= false_positive_margin * n_negative:
k = i
break
return y_pred_sorted[k]