Python numpy 模块,delete() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.delete()。
def get(self, X):
X = np.array(X)
X_nan = np.isnan(X)
imputed = self.meanImput(X.copy())
if len(self.estimators_) > 1:
for i, estimator_ in enumerate(self.estimators_):
X_s = np.delete(imputed, i, 1)
y_nan = X_nan[:, i]
X_unk = X_s[y_nan]
result_ = []
if len(X_unk) > 0:
for unk in X_unk:
result_.append(estimator_.predict(unk))
X[y_nan, i] = result_
return X
def loadLogoSet(path, rows,cols,test_data_rate=0.15):
random.seed(612)
_, imgID = readItems('data.txt')
y, _ = modelDict(path)
nPics = len(y)
faceassset = np.zeros((nPics,rows,cols), dtype = np.uint8) ### gray images
noImg = []
for i in range(nPics):
temp = cv2.imread(path +'logo/'+imgID[i]+'.jpg', 0)
if temp == None:
noImg.append(i)
elif temp.size < 1000:
noImg.append(i)
else:
temp = cv2.resize(temp,(cols, rows), interpolation = cv2.INTER_CUBIC)
faceassset[i,:,:] = temp
y = np.delete(y, noImg,0); faceassset = np.delete(faceassset, noImg, 0)
nPics = len(y)
index = random.sample(np.arange(nPics), int(nPics*test_data_rate))
x_test = faceassset[index,:,:]; x_train = np.delete(faceassset, index, 0)
y_test = y[index]; y_train = np.delete(y, index, 0)
return (x_train, y_train), (x_test, y_test)
def repeat(tensor: tf.Tensor, repeats: int, axis: int) -> tf.Tensor:
"""
Repeat elements of the input tensor in the specified axis ``repeats``-times.
.. note::
Chaining of this op may produce TF warnings although the performance seems to be unaffected.
:param tensor: TF tensor to be repeated
:param repeats: number of repeats
:param axis: axis to repeat
:return: tensor with repeated elements
"""
shape = tensor.get_shape().as_list()
dims = np.arange(len(tensor.shape))
prepare_perm = np.hstack(([axis], np.delete(dims, axis)))
restore_perm = np.hstack((dims[1:axis+1], [0], dims[axis+1:]))
indices = tf.cast(tf.floor(tf.range(0, shape[axis]*repeats)/tf.constant(repeats)), 'int32')
shuffled = tf.transpose(tensor, prepare_perm)
repeated = tf.gather(shuffled, indices)
return tf.transpose(repeated, restore_perm)
def main():
iris = load_iris()
test_idx = [0, 50, 100]
# training Data
train_target = np.delete(iris.target, test_idx)
train_data = np.delete(iris.data, test_idx, axis=0)
# testing data
test_target = iris.target[test_idx]
test_data = iris.data[test_idx]
# Train Classifier
clf = tree.DecisionTreeClassifier()
clf = clf.fit(train_data, train_target)
print(clf.predict(test_data))
# Run main
def _calc_B_for_tetra3d11(nodes,volume):
A = np.ones((4,4))
belta = np.zeros(4)
gama = np.zeros(4)
delta = np.zeros(4)
for i,nd in enumerate(nodes):
A[i,1:] = nd.coord
for i in range(4):
belta[i] = (-1)**(i+1)*np.linalg.det(np.delete(np.delete(A,i,0),1,1))
gama[i] = (-1)**(i+2)*np.linalg.det(np.delete(np.delete(A,i,0),2,1))
delta[i] = (-1)**(i+1)*np.linalg.det(np.delete(np.delete(A,i,0),3,1))
B = 1./(6.*volume)*np.array([[belta[0],0.,0.,belta[1],0.,0.,belta[2],0.,0.,belta[3],0.,0.],
[0.,gama[0],0.,0.,gama[1],0.,0.,gama[2],0.,0.,gama[3],0.],
[0.,0.,delta[0],0.,0.,delta[1],0.,0.,delta[2],0.,0.,delta[3]],
[gama[0],belta[0],0.,gama[1],belta[1],0.,gama[2],belta[2],0,gama[3],belta[3],0.],
[0.,delta[0],gama[0],0.,delta[1],gama[1],0.,delta[2],gama[2],0.,delta[3],gama[3]],
[delta[0],0.,belta[0],delta[1],0.,belta[1],delta[2],0.,belta[2],delta[3],0,belta[3]]])
return B
def append_neg_and_retrain(self, feat=None, force=False):
if feat is not None:
num = feat.shape[0]
self.neg = np.vstack((self.neg, feat))
self.num_neg_added += num
if self.num_neg_added > self.retrain_limit or force:
self.num_neg_added = 0
new_w_b, pos_scores, neg_scores = self.train()
# scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1]
# easy_inds = np.where(neg_scores < self.evict_thresh)[0]
not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0]
if len(not_easy_inds) > 0:
self.neg = self.neg[not_easy_inds, :]
# self.neg = np.delete(self.neg, easy_inds)
print(' Pruning easy negatives')
print(' Cache holds {} pos examples and {} neg examples'.
format(self.pos.shape[0], self.neg.shape[0]))
print(' {} pos support vectors'.format((pos_scores <= 1).sum()))
print(' {} neg support vectors'.format((neg_scores >= -1).sum()))
return new_w_b
else:
return None
def tune_tal(mono_phi_score, tal_list):
errs = []
tals = []
for tal in tal_list:
err = []
for i in range(len(mono_phi_score)):
mono_1 = numpy.delete(mono_phi_score, i, axis=0)
dim_h = mono_phi_score[i][:-1]
value_h, alpha = train_predict_regression(mono_1, dim_h, tal)
err.append((value_h - mono_phi_score[i][-1])**2)
err = numpy.mean(err)
errs.append(err)
tals.append(tal)
print 'regression tal:', tal, 'err', err
idx = numpy.argmin(errs)
return tals[idx]
def append_neg_and_retrain(self, feat=None, force=False):
if feat is not None:
num = feat.shape[0]
self.neg = np.vstack((self.neg, feat))
self.num_neg_added += num
if self.num_neg_added > self.retrain_limit or force:
self.num_neg_added = 0
new_w_b, pos_scores, neg_scores = self.train()
# scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1]
# easy_inds = np.where(neg_scores < self.evict_thresh)[0]
not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0]
if len(not_easy_inds) > 0:
self.neg = self.neg[not_easy_inds, :]
# self.neg = np.delete(self.neg, easy_inds)
print(' Pruning easy negatives')
print(' Cache holds {} pos examples and {} neg examples'.
format(self.pos.shape[0], self.neg.shape[0]))
print(' {} pos support vectors'.format((pos_scores <= 1).sum()))
print(' {} neg support vectors'.format((neg_scores >= -1).sum()))
return new_w_b
else:
return None
def append_neg_and_retrain(self, feat=None, force=False):
if feat is not None:
num = feat.shape[0]
self.neg = np.vstack((self.neg, feat))
self.num_neg_added += num
if self.num_neg_added > self.retrain_limit or force:
self.num_neg_added = 0
new_w_b, pos_scores, neg_scores = self.train()
# scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1]
# easy_inds = np.where(neg_scores < self.evict_thresh)[0]
not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0]
if len(not_easy_inds) > 0:
self.neg = self.neg[not_easy_inds, :]
# self.neg = np.delete(self.neg, easy_inds)
print(' Pruning easy negatives')
print(' Cache holds {} pos examples and {} neg examples'.
format(self.pos.shape[0], self.neg.shape[0]))
print(' {} pos support vectors'.format((pos_scores <= 1).sum()))
print(' {} neg support vectors'.format((neg_scores >= -1).sum()))
return new_w_b
else:
return None
def eliminate_overlapping_locations(f, separation):
""" Makes sure that no position is within `separation` from each other, by
deleting one of the that are to close to each other.
"""
separation = validate_tuple(separation, f.shape[1])
assert np.greater(separation, 0).all()
# Rescale positions, so that pairs are identified below a distance of 1.
f = f / separation
while True:
duplicates = cKDTree(f, 30).query_pairs(1)
if len(duplicates) == 0:
break
to_drop = []
for pair in duplicates:
to_drop.append(pair[1])
f = np.delete(f, to_drop, 0)
return f * separation
def setdiff(eq1, eq2):
eq1, eq2 = eqsize(eq1, eq2)
c1 = [None] * eq1.shape
c2 = [None] * eq2.shape
for i in range(0, eq1.size):
c1.append[i] = hash(eq2[i])
for i in range(0, eq2.size):
c2[i] = hash(eq2[i])
ia = np.delete(np.arange(np.alen(c1)), np.searchsorted(c1, c2))
ia = (ia[:]).conj().T
p = eq1[ia]
return p, ia
def McCormack(x_nods_quantity, grid, transfer_velocity, time_step, x_step):
if (transfer_velocity[0] > 0):
new_grid = grid
for m in range(2, x_nods_quantity - 1):
sigma = transfer_velocity[m] * time_step / x_step
new_grid[m] = grid[m] - np.dot(sigma, (grid[m] - grid[m-1])) + \
np.dot(sigma**2, (grid[m] - grid[m-2]))
else:
new_grid = grid
for m in range(2, x_nods_quantity - 1):
sigma = transfer_velocity[m] * time_step / x_step
new_grid[m] = grid[m] - np.dot(sigma, (grid[m+1] - grid[m])) + \
np.dot(sigma ** 2, (grid[m+2] - grid[m]))
#new_grid = np.delete(grid, [0, 1])
# returning array without additional nod and border condition
return new_grid
def test_silence_frame_removal_given_hts_labels():
qs_file_name = join(DATA_DIR, "questions-radio_dnn_416.hed")
binary_dict, continuous_dict = hts.load_question_set(qs_file_name)
input_state_label = join(DATA_DIR, "label_state_align", "arctic_a0001.lab")
labels = hts.load(input_state_label)
features = fe.linguistic_features(labels,
binary_dict,
continuous_dict,
add_frame_features=True,
subphone_features="full"
)
# Remove silence frames
indices = labels.silence_frame_indices()
features = np.delete(features, indices, axis=0)
y = np.fromfile(join(DATA_DIR, "nn_no_silence_lab_425", "arctic_a0001.lab"),
dtype=np.float32).reshape(-1, features.shape[-1])
assert features.shape == y.shape
assert np.allclose(features, y)
# Make sure we can get same results with Merlin
def validate(self):
wav_dir = join(self.data_root, self.subset, "wav")
if not isdir(wav_dir):
raise RuntimeError("{} doesn't exist.".format(wav_dir))
miss_indices = []
for idx, name in enumerate(self.names):
wav_path = join(wav_dir, name + ".wav")
if not exists(wav_path):
miss_indices.append(idx)
if len(miss_indices) > 0:
warn("{}/{} wav files were missing in subset {}.".format(
len(miss_indices), len(self.names), self.subset))
self.names = np.delete(self.names, miss_indices)
self.transcriptions = np.delete(self.transcriptions, miss_indices)
def data_split(arr):
'''
num2 = df.values
num2 = np.delete(num2,)
'''
df2 = df
df3 = df
#print arr
df2 = df2.drop([i for i in arr])
df3 = df3.drop([i for i in xrange(0,len(df)) if i not in arr])
return (df2,df3)
def FileReader(file_list,param_list):
row_add = np.zeros(shape=(1,len(param_list)+1))
for file in file_list:
hdulist = fits.open(file,memmap=True)
data_in = hdulist[1].data
col_add = np.zeros(shape=(len(data_in),1))
print file
for param in param_list:
data_now = np.reshape(data_in[param],(len(data_in[param]),1))
col_add = np.append(col_add,data_now,axis=1)
row_add = np.append(row_add,col_add,axis=0)
del hdulist
row_add = np.delete(row_add,0,axis=0)
row_add = np.delete(row_add,0,axis=1)
return row_add
def create_vertex_groups(groups=['common', 'not_used'], weights=[0.0, 0.0], ob=None):
'''Creates vertex groups and sets weights. "groups" is a list of strings
for the names of the groups. "weights" is a list of weights corresponding
to the strings. Each vertex is assigned a weight for each vertex group to
avoid calling vertex weights that are not assigned. If the groups are
already present, the previous weights will be preserved. To reset weights
delete the created groups'''
if ob is None:
ob = bpy.context.object
vg = ob.vertex_groups
for g in range(0, len(groups)):
if groups[g] not in vg.keys(): # Don't create groups if there are already there
vg.new(groups[g])
vg[groups[g]].add(range(0,len(ob.data.vertices)), weights[g], 'REPLACE')
else:
vg[groups[g]].add(range(0,len(ob.data.vertices)), 0, 'ADD') # This way we avoid resetting the weights for existing groups.
def linregress(self):
"""Get the linear regression of the mean values in this plot. Returns
a tuple containing the best-fit line y-values for this plotter's
t_axis, the drift coefficient, and the ``linregress`` named tuple from
scipy.stats.linregress."""
cleandata = np.delete(self.plot_vars.means, self.bad_indices.means)
cleantimes = np.delete(self.t_axis, self.bad_indices.means)
if len(cleandata) != 0:
r = scipy.stats.linregress(cleantimes, cleandata)
bestfit = r.slope * self.t_axis + r.intercept
driftcoeff = r.slope / SEC_PER[self.t_units]
else:
bestfit = 0
driftcoeff = 0
r = None
return self.LinRegress(bestfit=bestfit, driftcoeff=driftcoeff,
linregress=r)
def trend(self):
"""Subtract the trend specified in
``Plotter.plot_properties['detrend']`` from each plot. Trend can be
the 'mean' value of the plot, the 'linear' least squares best fit, a
custom-specified number, or simply 'none' if no trend should be
removed."""
if self.plot_properties['detrend'] == 'mean':
# delete bad indices before calculating the trend, since they
# can skew the trend.
cleandata = np.delete(self.plot_vars.means, self.bad_indices.means)
if len(cleandata) != 0:
trend = cleandata.mean()
else:
trend = 0
elif self.plot_properties['detrend'] == 'none':
trend = 0
elif self.plot_properties['detrend'] == 'linear':
trend, driftcoeff, linregress = self.linregress
else:
trend = self.plot_properties['detrend']
return trend
def plot_timeseries(self, ax, **kwargs):
"""Scale up by 10^9 since plots are in ns, not seconds.
Remove any indices considered bad in ``plot_properties``"""
# define the variables for our plots
y = np.delete(self.plot_vars.means - self.trend,
self.bad_indices.means) / SEC_PER['ns']
t = np.delete(self.t_axis, self.bad_indices.means)
yerr = np.delete(self.plot_vars.stds,
self.bad_indices.means) / SEC_PER['ns']
mint = np.delete(self.t_axis, self.bad_indices.mins)
miny = np.delete(self.plot_vars.mins - self.trend,
self.bad_indices.mins) / SEC_PER['ns']
maxt = np.delete(self.t_axis, self.bad_indices.maxs)
maxy = np.delete(self.plot_vars.maxs - self.trend,
self.bad_indices.maxs) / SEC_PER['ns']
# plot everything, but only if the plotted data has nonzero length
# in order to avoid an annoying matplotlib bug when adding legends.
if len(t) != 0:
ax.errorbar(t, y, marker="o", color="green", linestyle='none',
yerr=yerr, label="Means +/- Std. Dev.")
if len(mint) != 0:
ax.scatter(mint, miny, marker="^", color="blue", label="Minima")
if len(maxt) != 0:
ax.scatter(maxt, maxy, marker="v", color="red", label="Maxima")
def plot_timeseries(self, ax, **kwargs):
"""Scale up by 10^9 since plots are in ns, not seconds.
Remove any indices considered bad in ``plot_properties``"""
# define the variables for our plots
t = np.delete(self.t_axis, self.bad_indices.means)
y = np.delete(self.plot_vars.means - self.trend,
self.bad_indices.means) / SEC_PER['ns']
yerr = np.delete(self.plot_vars.stds,
self.bad_indices.means) / SEC_PER['ns']
mint = np.delete(self.t_axis, self.bad_indices.absmins)
miny = np.delete(self.plot_vars.absmins - self.trend,
self.bad_indices.absmins) / SEC_PER['ns']
maxt = np.delete(self.t_axis, self.bad_indices.absmaxs)
maxy = np.delete(self.plot_vars.absmaxs - self.trend,
self.bad_indices.absmaxs) / SEC_PER['ns']
# plot everything, but only if the plotted data has nonzero length
# in order to avoid an annoying matplotlib bug when adding legends.
if len(t) != 0:
ax.errorbar(t, y, marker="o", color="green", linestyle='none',
yerr=yerr, label="Means +/- Std. Dev.")
if len(mint) != 0:
ax.scatter(mint,miny,marker="^", color="blue", label="Abs. Minima")
if len(maxt) != 0:
ax.scatter(maxt,maxy,marker="v", color="red", label="Abs. Maxima")
def plot_timeseries(self, ax, **kwargs):
ax.plot(np.delete(self.t_axis, self.bad_indices.means),
np.delete(self.plot_vars.means - self.trend,
self.bad_indices.means) / SEC_PER['ns'],
marker="o", color="green", label="Recorded Signal")
# put the start and/or end time in the plot as a vertical line
unitfactor = SEC_PER[self.t_units]
dq_start = (self.dq_segment.start.gpsSeconds - self.start) / unitfactor
dq_end = (self.dq_segment.end.gpsSeconds - self.start) / unitfactor
zorder = self.plot_properties['start_end_zorder']
if self.t_lim[0] <= dq_start:
deep_pink = '#FF1493'
plot_vertical_marker(ax, [dq_start], zorder=zorder,
label="Start of Segment", color=deep_pink)
if dq_end <= self.t_lim[1]:
midnight_blue = '#191970'
plot_vertical_marker(ax, [dq_end], zorder=zorder,
label="End of Segment", color=midnight_blue)
def append_neg_and_retrain(self, feat=None, force=False):
if feat is not None:
num = feat.shape[0]
self.neg = np.vstack((self.neg, feat))
self.num_neg_added += num
if self.num_neg_added > self.retrain_limit or force:
self.num_neg_added = 0
new_w_b, pos_scores, neg_scores = self.train()
# scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1]
# easy_inds = np.where(neg_scores < self.evict_thresh)[0]
not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0]
if len(not_easy_inds) > 0:
self.neg = self.neg[not_easy_inds, :]
# self.neg = np.delete(self.neg, easy_inds)
print(' Pruning easy negatives')
print(' Cache holds {} pos examples and {} neg examples'.
format(self.pos.shape[0], self.neg.shape[0]))
print(' {} pos support vectors'.format((pos_scores <= 1).sum()))
print(' {} neg support vectors'.format((neg_scores >= -1).sum()))
return new_w_b
else:
return None
def remove_indexes(self, rm_idx_list, rearranged_props):
"""
The k-points with velocity < 1 cm/s (either in valence or conduction band) are taken out as those are
troublesome later with extreme values (e.g. too high elastic scattering rates)
:param rm_idx_list ([int]): the kpoint indexes that need to be removed for each property
:param rearranged_props ([str]): list of properties for which some indexes need to be removed
:return:
"""
for i, tp in enumerate(["n", "p"]):
for ib in range(self.cbm_vbm[tp]["included"]):
rm_idx_list_ib = list(set(rm_idx_list[tp][ib]))
rm_idx_list_ib.sort(reverse=True)
rm_idx_list[tp][ib] = rm_idx_list_ib
logging.debug("# of {}-type kpoints indexes with low velocity or off-energy: {}".format(tp,len(rm_idx_list_ib)))
for prop in rearranged_props:
self.kgrid[tp][prop] = np.array([np.delete(self.kgrid[tp][prop][ib], rm_idx_list[tp][ib], axis=0) \
for ib in range(self.cbm_vbm[tp]["included"])])
def transform(self, X):
check_is_fitted(self, ['statistics_', 'estimators_', 'gamma_'])
X = check_array(X, copy=True, dtype=np.float64, force_all_finite=False)
if X.shape[1] != self.statistics_.shape[1]:
raise ValueError("X has %d features per sample, expected %d"
% (X.shape[1], self.statistics_.shape[1]))
X_nan = np.isnan(X)
imputed = self.initial_imputer.fit_transform(X)
if len(self.estimators_) > 1:
for i, estimator_ in enumerate(self.estimators_):
X_s = np.delete(imputed, i, 1)
y_nan = X_nan[:, i]
X_unk = X_s[y_nan]
if len(X_unk) > 0:
X[y_nan, i] = estimator_.predict(X_unk)
else:
estimator_ = self.estimators_[0]
X[X_nan] = estimator_.inverse_transform(estimator_.transform(imputed))[X_nan]
return X
def _run_TR_from_scan_onsets(self, n_T, scan_onsets=None):
if scan_onsets is None:
# assume that all data are acquired within the same scan.
n_run = 1
run_TRs = np.array([n_T], dtype=int)
else:
# Each value in the scan_onsets tells the index at which
# a new scan starts. For example, if n_T = 500, and
# scan_onsets = [0,100,200,400], this means that the time points
# of 0-99 are from the first scan, 100-199 are from the second,
# 200-399 are from the third and 400-499 are from the fourth
run_TRs = np.int32(np.diff(np.append(scan_onsets, n_T)))
run_TRs = np.delete(run_TRs, np.where(run_TRs == 0))
n_run = run_TRs.size
# delete run length of 0 in case of duplication in scan_onsets.
logger.info('I infer that the number of volumes'
' in each scan are: {}'.format(run_TRs))
return run_TRs, n_run
def chooseErrorData(self, game, lesson=None):
'''
Choose saved error function data by lesson and game name in
history database.
'''
self.history.setGame(game)
self.load()
if lesson is not None:
self.error_data_training = np.split(self.data[0,:],
np.argwhere(self.data[0,:] == -1))[lesson][1:]
self.error_data_test = np.split(self.data[1,:],
np.argwhere(self.data[1,:] == -1))[lesson][1:]
else:
self.error_data_training = np.delete(self.data[0,:],
np.argwhere(self.data[0,:]==-1))
self.error_data_test = np.delete(self.data[1,:],
np.argwhere(self.data[1,:]==-1))
# ------------------- for test and show reasons only ----------------------
def append_neg_and_retrain(self, feat=None, force=False):
if feat is not None:
num = feat.shape[0]
self.neg = np.vstack((self.neg, feat))
self.num_neg_added += num
if self.num_neg_added > self.retrain_limit or force:
self.num_neg_added = 0
new_w_b, pos_scores, neg_scores = self.train()
# scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1]
# easy_inds = np.where(neg_scores < self.evict_thresh)[0]
not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0]
if len(not_easy_inds) > 0:
self.neg = self.neg[not_easy_inds, :]
# self.neg = np.delete(self.neg, easy_inds)
print(' Pruning easy negatives')
print(' Cache holds {} pos examples and {} neg examples'.
format(self.pos.shape[0], self.neg.shape[0]))
print(' {} pos support vectors'.format((pos_scores <= 1).sum()))
print(' {} neg support vectors'.format((neg_scores >= -1).sum()))
return new_w_b
else:
return None
def add_state(self, state):
if state is None:
self.queue = None
return
state = np.asarray(state)
axis = len(state.shape) # extra dimension for observation
observation = np.reshape(state, state.shape + (1,))
if self.queue is None:
self.queue = np.repeat(observation, self.stacked_num, axis=axis)
else:
# remove oldest observation from the beginning of the observation queue
self.queue = np.delete(self.queue, 0, axis=axis)
# append latest observation to the end of the observation queue
self.queue = np.append(self.queue, observation, axis=axis)
def margins(doc_scores):
margin_win = np.zeros_like(doc_scores)
margin_lose = np.zeros_like(doc_scores)
for j in range(doc_scores.shape[1]):
my_scores = doc_scores[:, j]
others = np.delete(doc_scores, j, axis=1)
if FROM == 'second':
margin_win[:, j] = np.maximum(my_scores - others.max(axis=1), 0)
margin_lose[:, j] = np.maximum(others.min(axis=1) - my_scores, 0)
if FROM == 'other':
margin_win[:, j] = np.maximum(my_scores - others.min(axis=1), 0)
margin_lose[:, j] = np.maximum(others.max(axis=1) - my_scores, 0)
elif FROM == 'median':
margin_win[:, j] = np.maximum(my_scores - np.median(others,
axis=1), 0)
margin_lose[:, j] = np.maximum(np.median(others, axis=1) -
my_scores, 0)
return margin_win, margin_lose
def filter_annotations(self, image_group, annotations_group, group):
# test all annotations
for index, (image, annotations) in enumerate(zip(image_group, annotations_group)):
assert(isinstance(annotations, np.ndarray)), '\'load_annotations\' should return a list of numpy arrays, received: {}'.format(type(annotations))
# test x2 < x1 | y2 < y1 | x1 < 0 | y1 < 0 | x2 <= 0 | y2 <= 0 | x2 >= image.shape[1] | y2 >= image.shape[0]
invalid_indices = np.where(
(annotations[:, 2] <= annotations[:, 0]) |
(annotations[:, 3] <= annotations[:, 1]) |
(annotations[:, 0] < 0) |
(annotations[:, 1] < 0) |
(annotations[:, 2] > image.shape[1]) |
(annotations[:, 3] > image.shape[0])
)[0]
# delete invalid indices
if len(invalid_indices):
warnings.warn('Image with id {} (shape {}) contains the following invalid boxes: {}.'.format(
group[index],
image.shape,
[annotations[invalid_index, :] for invalid_index in invalid_indices]
))
annotations_group[index] = np.delete(annotations, invalid_indices, axis=0)
return image_group, annotations_group
def cellslice(UC, P_UC, slicing):
if slicing == 1:
P_UCS = P_UC
UCS = UC
else:
P_UCS = 0 # points in sliced unit cell
UCS = zeros([6, 1])
for i in range(P_UC):
if UC[0, i] in (2, 5, 7): # noslicing edges, rotators, diagnostics
UCS = hstack((UCS, UC[:, i].reshape(6, 1)))
P_UCS += 1
else:
UCS = hstack((UCS, UC[:, i].reshape(6, 1).repeat(slicing, 1)))
P_UCS += slicing
UCS = delete(UCS, 0, axis=1)
UCS[1, :] = UCS[1, :]/slicing
s = hstack((0, cumsum(UCS[1, :])))
return s, UCS, P_UCS
def append_neg_and_retrain(self, feat=None, force=False):
if feat is not None:
num = feat.shape[0]
self.neg = np.vstack((self.neg, feat))
self.num_neg_added += num
if self.num_neg_added > self.retrain_limit or force:
self.num_neg_added = 0
new_w_b, pos_scores, neg_scores = self.train()
# scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1]
# easy_inds = np.where(neg_scores < self.evict_thresh)[0]
not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0]
if len(not_easy_inds) > 0:
self.neg = self.neg[not_easy_inds, :]
# self.neg = np.delete(self.neg, easy_inds)
print(' Pruning easy negatives')
print(' Cache holds {} pos examples and {} neg examples'.
format(self.pos.shape[0], self.neg.shape[0]))
print(' {} pos support vectors'.format((pos_scores <= 1).sum()))
print(' {} neg support vectors'.format((neg_scores >= -1).sum()))
return new_w_b
else:
return None
def test_fit_to_less_width(self):
"""Fit a tensor to a smalles width (i.e. trimming).
Given a 3D tensor of shape [batch, length, width], apply the
`ops.fit()` operator to it with the a smaller `width` as the
target one and check that the last axis of the tensor have been
deleted.
"""
batch = 2
length = 5
width = 4
fit_width = 3
delta = width - fit_width
shape = [None, None, None]
input_ = tf.placeholder(dtype=tf.float32, shape=shape)
output = ops.fit(input_, fit_width)
input_actual = np.random.rand(batch, length, width) # pylint: disable=I0011,E1101
delete_idx = [width - (i + 1) for i in range(delta)]
output_expected = np.delete(input_actual, delete_idx, axis=2) # pylint: disable=I0011,E1101
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output_actual = sess.run(output, {input_: input_actual})
self.assertAllClose(output_expected, output_actual)
def prepare_data(img_folder):
X, Y, captcha_text = vecmp.load_dataset(folder=img_folder)
# invert and normalize to [0,1]
#X = (255- Xdata)/255.0
# standarization
# compute mean across the rows, sum elements from each column and divide
x_mean = X.mean(axis=0)
x_std = X.std(axis=0)
X = (X - x_mean) / (x_std + 0.00001)
test_size = min(1000, X.shape[0])
random_idx = np.random.choice(X.shape[0], test_size, replace=False)
test_X = X[random_idx, :]
test_Y = Y[random_idx, :]
X = np.delete(X, random_idx, axis=0)
Y = np.delete(Y, random_idx, axis=0)
return (X,Y,test_X,test_Y)
def append_neg_and_retrain(self, feat=None, force=False):
if feat is not None:
num = feat.shape[0]
self.neg = np.vstack((self.neg, feat))
self.num_neg_added += num
if self.num_neg_added > self.retrain_limit or force:
self.num_neg_added = 0
new_w_b, pos_scores, neg_scores = self.train()
# scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1]
# easy_inds = np.where(neg_scores < self.evict_thresh)[0]
not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0]
if len(not_easy_inds) > 0:
self.neg = self.neg[not_easy_inds, :]
# self.neg = np.delete(self.neg, easy_inds)
print(' Pruning easy negatives')
print(' Cache holds {} pos examples and {} neg examples'.
format(self.pos.shape[0], self.neg.shape[0]))
print(' {} pos support vectors'.format((pos_scores <= 1).sum()))
print(' {} neg support vectors'.format((neg_scores >= -1).sum()))
return new_w_b
else:
return None
def __init__(self, table,reg=False,lamda=0):
"""Initializes Class for Linear Regression
Parameters
----------
table : ndarray(n-rows,m-features + 1)
Numerical training data, last column as training values
reg : Boolean
Set True to enable regularization, false by default
"""
#regularization parameters
self.reg = reg
self.lamda = lamda
self.num_training = np.shape(table)[0]
# remove the last column from training data to extract features data
self.X = np.delete(table, -1, 1)
# add a column of ones in front of the training data
self.X = np.insert(self.X, 0, np.ones(self.num_training), axis=1)
self.num_features = np.shape(self.X)[1]
# extract the values of the training set from the provided data
self.y = table[:, self.num_features - 1]
# create parameters and initialize to 1
self.theta = np.ones(self.num_features)
def compute_cost(self):
"""Computes cost based on the current values of the parameters
Returns
-------
cost : float
Cost of the selection of current set of parameters
"""
hypothesis = LogisticRegression.sigmoid(np.dot(self.X, self.theta))
#new ndarray to prevent intercept from theta array to be changed
theta=np.delete(self.theta,0)
#regularization term
reg = (self.lamda/2*self.num_training)*np.sum(np.power(theta,2))
cost = -(np.sum(self.y * np.log(hypothesis) + (1 - self.y) * (np.log(1 - hypothesis)))) / self.num_training
#if regularization is true, add regularization term and return cost
if self.reg:
return cost + reg
return cost
def unpad(matrix):
'''
Strip off a column (e.g. of ones). Transform from:
array([[1., 2., 3., 1.],
[2., 3., 4., 1.],
[5., 6., 7., 1.]])
to:
array([[1., 2., 3.],
[2., 3., 4.],
[5., 6., 7.]])
'''
if matrix.ndim != 2 or matrix.shape[1] != 4:
raise ValueError("Invalid shape %s: unpad expects nx4" % (matrix.shape,))
if not all(matrix[:, 3] == 1.):
raise ValueError('Expected a column of ones')
return np.delete(matrix, 3, axis=1)
def BFS(self, start, fs=None):
'''
Returns the BFS tree for the graph starting from start
'''
to_be_processed = np.array([start], dtype=np.int)
known = np.array([], dtype=np.int)
tree = np.array([], dtype=object)
if fs is None:
fs = self.FSs
while len(to_be_processed) > 0:
# pop
current_node = to_be_processed[-1]
to_be_processed = np.delete(to_be_processed, -1)
for node in fs[current_node]:
if node not in known:
known = np.append(known, node)
tree = np.append(tree, None)
tree[-1] = (current_node, node)
# push
to_be_processed = np.insert(to_be_processed, 0, node)
return tree
def DFS(self, start, fs=None):
'''
Returns the DFS tree for the graph starting from start
'''
to_be_processed = np.array([start], dtype=np.int)
known = np.array([], dtype=np.int)
tree = np.array([], dtype=object)
if fs is None:
fs = self.FSs
while len(to_be_processed) > 0:
# pop
current_node = to_be_processed[0]
to_be_processed = np.delete(to_be_processed, 0)
for node in fs[current_node]:
if node not in known:
known = np.append(known, node)
tree = np.append(tree, None)
tree[-1] = (current_node, node)
# push
to_be_processed = np.insert(to_be_processed, 0, node)
return tree
def topological_sort(self):
'''
Returns a list topological sorted nodes
'''
if self.is_cyclic(self.FSs):
print 'cannot apply labels, graph contains cycles'
return
big_l = [] # Empty list that will contain the sorted elements
# Set of all nodes with no incoming edges
big_s = set([0])
bs_copy = self.BSs.copy()
while len(big_s) > 0:
n = big_s.pop()
big_l.append(n)
for m in self.FSs[n]:
bs_copy[m] = np.delete(bs_copy[m], np.where(bs_copy[m] == n))
# bs_copy[m].remove(n)
if len(bs_copy[m]) == 0:
big_s.add(int(m))
return big_l
def _mask_clip(self, row_or_col):
'''
Cuts out items from matrix that do not contain at least k values on axis=0
'''
mat = self.mat
k = self.k
lil = mat.tolil()
to_remove = []
for idx, i in enumerate(lil.rows):
if len(i) < k:
to_remove.append(idx)
lil.rows = np.delete(lil.rows, to_remove)
lil.data = np.delete(lil.data, to_remove)
if row_or_col == 'row':
self.row_idx = np.delete(range(lil.shape[0]), to_remove)
elif row_or_col == 'col':
self.col_idx = np.delete(range(lil.shape[0]), to_remove)
remaining = lil.shape[0] - len(to_remove)
lil = lil[:remaining]
self.mat = lil
return self
def __call__(self, index_list, padded_value=-1):
"""
Args:
index_list (np.ndarray): list of word indices.
Batch size 1 is expected.
padded_value (int): the value used for padding
Returns:
word_list (list): list of words
"""
# Remove padded values
assert type(index_list) == np.ndarray, 'index_list should be np.ndarray.'
index_list = np.delete(index_list, np.where(index_list == -1), axis=0)
# Convert from indices to the corresponding words
word_list = list(map(lambda x: self.map_dict[x], index_list))
return word_list
def __call__(self, index_list, padded_value=-1):
"""
Args:
index_list (list): phone indices
padded_value (int): the value used for padding
Returns:
str_phone (string): a sequence of phones
"""
# Remove padded values
assert type(index_list) == np.ndarray, 'index_list should be np.ndarray.'
index_list = np.delete(index_list, np.where(index_list == -1), axis=0)
# Convert from indices to the corresponding phones
phone_list = list(map(lambda x: self.map_dict[x], index_list))
str_phone = ' '.join(phone_list)
return str_phone
def buildTree(self, data, features):
classification = data[:, -1]
uniqueValues = set(classification)
if len(uniqueValues) == 1:
return classification[0]
if len(data[0]) == 1:
return self.majorityCnt(classification)
infomatinoGain = InformationGain()
bestFeature = infomatinoGain.chooseBestFeatureToSplit(data)
bestFeatureLabel = features[bestFeature]
decisionTree = {bestFeatureLabel: {}}
featureValues = set(data[:, bestFeature])
tmpFeatures = np.delete(features, bestFeature, axis=0)
for value in featureValues:
subData = infomatinoGain.splitData(data, bestFeature, value)
decisionTree[bestFeatureLabel][value] = self.buildTree(subData, tmpFeatures)
return decisionTree
def append_neg_and_retrain(self, feat=None, force=False):
if feat is not None:
num = feat.shape[0]
self.neg = np.vstack((self.neg, feat))
self.num_neg_added += num
if self.num_neg_added > self.retrain_limit or force:
self.num_neg_added = 0
new_w_b, pos_scores, neg_scores = self.train()
# scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1]
# easy_inds = np.where(neg_scores < self.evict_thresh)[0]
not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0]
if len(not_easy_inds) > 0:
self.neg = self.neg[not_easy_inds, :]
# self.neg = np.delete(self.neg, easy_inds)
print(' Pruning easy negatives')
print(' Cache holds {} pos examples and {} neg examples'.
format(self.pos.shape[0], self.neg.shape[0]))
print(' {} pos support vectors'.format((pos_scores <= 1).sum()))
print(' {} neg support vectors'.format((neg_scores >= -1).sum()))
return new_w_b
else:
return None
def BFS(self, start, fs=None):
'''
Returns the BFS tree for the graph starting from start
'''
to_be_processed = np.array([start], dtype=np.int)
known = np.array([], dtype=np.int)
tree = np.array([], dtype=object)
if fs is None:
fs = self.FSs
while len(to_be_processed) > 0:
# pop
current_node = to_be_processed[-1]
to_be_processed = np.delete(to_be_processed, -1)
for node in fs[current_node]:
if node not in known:
known = np.append(known, node)
tree = np.append(tree, None)
tree[-1] = (current_node, node)
# push
to_be_processed = np.insert(to_be_processed, 0, node)
return tree
def DFS(self, start, fs=None):
'''
Returns the DFS tree for the graph starting from start
'''
to_be_processed = np.array([start], dtype=np.int)
known = np.array([], dtype=np.int)
tree = np.array([], dtype=object)
if fs is None:
fs = self.FSs
while len(to_be_processed) > 0:
# pop
current_node = to_be_processed[0]
to_be_processed = np.delete(to_be_processed, 0)
for node in fs[current_node]:
if node not in known:
known = np.append(known, node)
tree = np.append(tree, None)
tree[-1] = (current_node, node)
# push
to_be_processed = np.insert(to_be_processed, 0, node)
return tree
def topological_sort(self):
'''
Returns a list topological sorted nodes
'''
if self.is_cyclic(self.FSs):
print 'cannot apply labels, graph contains cycles'
return
big_l = [] # Empty list that will contain the sorted elements
# Set of all nodes with no incoming edges
big_s = set([0])
bs_copy = self.BSs.copy()
while len(big_s) > 0:
n = big_s.pop()
big_l.append(n)
for m in self.FSs[n]:
bs_copy[m] = np.delete(bs_copy[m], np.where(bs_copy[m] == n))
# bs_copy[m].remove(n)
if len(bs_copy[m]) == 0:
big_s.add(int(m))
return big_l