Python numpy 模块,isinf() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.isinf()。
def cosine_similarity_self(A):
similarity = np.dot(A, A.T)
square_mag = np.diag(similarity)
inv_square_mag = 1 / square_mag
inv_square_mag[np.isinf(inv_square_mag)] = 0
inv_mag = np.sqrt(inv_square_mag)
cosine = similarity * inv_mag
cosine = cosine.T * inv_mag
return cosine
# document should be a list of sentences
# method = "word2vec", "lda", "tfidf"
# def extraction(document, method="rawText"):
#
# # graph = build_graph(document, method) # document is a list of sentences
#
# calculated_page_rank = networkx.pagerank(graph, weight="weight")
#
# # most important sentences in descending order of importance
# sentences = sorted(calculated_page_rank, key=calculated_page_rank.get, reverse=False)
#
# return sentences[0:4]
def df_type_to_str(i):
'''
Convert into simple datatypes from pandas/numpy types
'''
if isinstance(i, np.bool_):
return bool(i)
if isinstance(i, np.int_):
return int(i)
if isinstance(i, np.float):
if np.isnan(i):
return 'NaN'
elif np.isinf(i):
return str(i)
return float(i)
if isinstance(i, np.uint):
return int(i)
if type(i) == bytes:
return i.decode('UTF-8')
if isinstance(i, (tuple, list)):
return str(i)
if i is pd.NaT: # not identified as a float null
return 'NaN'
return str(i)
def map(self, data):
data = data[self.fieldName]
colors = np.empty((len(data), 4))
default = np.array(fn.colorTuple(self['Default'])) / 255.
colors[:] = default
for v in self.param('Values'):
mask = data == v.maskValue
c = np.array(fn.colorTuple(v.value())) / 255.
colors[mask] = c
#scaled = np.clip((data-self['Min']) / (self['Max']-self['Min']), 0, 1)
#cmap = self.value()
#colors = cmap.map(scaled, mode='float')
#mask = np.isnan(data) | np.isinf(data)
#nanColor = self['NaN']
#nanColor = (nanColor.red()/255., nanColor.green()/255., nanColor.blue()/255., nanColor.alpha()/255.)
#colors[mask] = nanColor
return colors
def map(self, data):
data = data[self.fieldName]
colors = np.empty((len(data), 4))
default = np.array(fn.colorTuple(self['Default'])) / 255.
colors[:] = default
for v in self.param('Values'):
mask = data == v.maskValue
c = np.array(fn.colorTuple(v.value())) / 255.
colors[mask] = c
#scaled = np.clip((data-self['Min']) / (self['Max']-self['Min']), 0, 1)
#cmap = self.value()
#colors = cmap.map(scaled, mode='float')
#mask = np.isnan(data) | np.isinf(data)
#nanColor = self['NaN']
#nanColor = (nanColor.red()/255., nanColor.green()/255., nanColor.blue()/255., nanColor.alpha()/255.)
#colors[mask] = nanColor
return colors
def _test_get_one_exchange_neighbourhood(self, hp):
cs = ConfigurationSpace()
num_neighbors = 0
if not isinstance(hp, list):
hp = [hp]
for hp_ in hp:
cs.add_hyperparameter(hp_)
if np.isinf(hp_.get_num_neighbors()):
num_neighbors += 4
else:
num_neighbors += hp_.get_num_neighbors()
cs.seed(1)
config = cs.get_default_configuration()
all_neighbors = []
for i in range(100):
neighborhood = get_one_exchange_neighbourhood(config, i)
for new_config in neighborhood:
self.assertNotEqual(config, new_config)
all_neighbors.append(new_config)
return all_neighbors
def get_series_mean_std_peryear(word_time_series, i_year_words, one_minus=False, start_year=1900, end_year=2000, year_inc=1, exclude_partial_missing=False):
"""
Return the mean and stderr arrays for the values of the words specified per year in i_year_words for specified years
"""
means = []
stderrs = []
r_word_time_series = {}
if exclude_partial_missing:
for word, time_series in word_time_series.iteritems():
if not np.isnan(np.sum(time_series.values())):
r_word_time_series[word] = time_series
else:
r_word_time_series = word_time_series
for year in xrange(start_year, end_year + 1, year_inc):
word_array = np.array([r_word_time_series[word][year] for word in i_year_words[year]
if word in r_word_time_series and not np.isnan(r_word_time_series[word][year]) and not np.isinf(r_word_time_series[word][year])])
if len(word_array) == 0:
continue
if one_minus:
word_array = 1 - word_array
means.append(word_array.mean())
stderrs.append(word_array.std())
return np.array(means), np.array(stderrs)
def PPMI_matrix(M):
M = scale_sim_mat(M)
nm_nodes = len(M)
col_s = np.sum(M, axis=0).reshape(1,nm_nodes)
row_s = np.sum(M, axis=1).reshape(nm_nodes,1)
D = np.sum(col_s)
rowcol_s = np.dot(row_s,col_s)
PPMI = np.log(np.divide(D*M,rowcol_s))
PPMI[np.isnan(PPMI)] = 0.0
PPMI[np.isinf(PPMI)] = 0.0
PPMI[np.isneginf(PPMI)] = 0.0
PPMI[PPMI<0] = 0.0
return PPMI
def test_zero_division(self):
with np.errstate(all="ignore"):
for t in [np.complex64, np.complex128]:
a = t(0.0)
b = t(1.0)
assert_(np.isinf(b/a))
b = t(complex(np.inf, np.inf))
assert_(np.isinf(b/a))
b = t(complex(np.inf, np.nan))
assert_(np.isinf(b/a))
b = t(complex(np.nan, np.inf))
assert_(np.isinf(b/a))
b = t(complex(np.nan, np.nan))
assert_(np.isnan(b/a))
b = t(0.)
assert_(np.isnan(b/a))
def hypothesis(self,x,theta):
l_theta = []
for i in range(len(theta)):
#print theta[i]
thetaX = x.dot(theta[i])# wx
thetaX_exp = np.exp(thetaX) # exp(wx)
l_theta.append(thetaX_exp)
l_theta = np.array(l_theta)
#print np.shape(l_theta)
thetaX_exp_sum = np.sum(l_theta) # sum of exp(wx)
#print thetaX_exp_sum
p = l_theta.T / thetaX_exp_sum # 5xlen(x) predicted results
if np.isinf(p).any(): # deal with overflow in results.
inf_idx = np.isinf(p) # idx where overflow occurs
val = np.sum(p, 0) / np.sum(inf_idx, 0) * inf_idx # values to be used to substitution
p[inf_idx] = val[inf_idx] # substitute values
return p.T
#### predict the labels for a set of observations ####
def hypothesis(self,x,theta):
l_theta = []
for i in range(len(theta)):
thetaX = x.dot(theta[i]) ## wx ##
thetaX_exp = np.exp(thetaX) ## exp(wx)##
if np.isinf(thetaX_exp):
print "overflow"
###prova a mettere una matrice con probabilita tutte uguali in caso di overflow
l_theta.append(thetaX_exp)
l_theta = np.array(l_theta)
thetaX_exp_sum = np.sum(l_theta) ## sum of exp(wx) ##
p = l_theta.T / thetaX_exp_sum ## 5xlen(x) predicted results ##
#print np.sum(p)
'''if np.isinf(p).any(): ## deal with overflow in results ##
inf_idx = np.isinf(p) ## idx where overflow occurs ##
val = np.sum(p, 0) / np.sum(inf_idx, 0) * inf_idx ## values to be used to substitution ##
p[inf_idx] = val[inf_idx] ## substitute values ##'''
return p.T
## Calcolo la Derivata della Funzione di Costo ##
def ln_posterior(mcmc_p, joker_params, data):
if joker_params._fixed_jitter:
mcmc_p = list(mcmc_p)
mcmc_p.insert(5, -np.inf) # HACK: whoa, major hackage!
p = from_mcmc_params(mcmc_p).reshape(len(mcmc_p))
lnp = ln_prior(p, joker_params)
if np.isinf(lnp):
return lnp
lnl = ln_likelihood(p, joker_params, data)
lnprob = lnp + lnl.sum()
if np.isnan(lnprob):
return -np.inf
return lnprob
def _get_viewpoint_estimation_labels(viewpoint_data, clss, num_classes):
"""Bounding-box regression targets are stored in a compact form in the
roidb.
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded.
Returns:
view_target_data (ndarray): N x 3K blob of regression targets
view_loss_weights (ndarray): N x 3K blob of loss weights
"""
view_targets = np.zeros((clss.size, 3 * num_classes), dtype=np.float32)
view_loss_weights = np.zeros(view_targets.shape, dtype=np.float32)
inds = np.where( (clss > 0) & np.isfinite(viewpoint_data[:,0]) & np.isfinite(viewpoint_data[:,1]) & np.isfinite(viewpoint_data[:,2]) )[0]
for ind in inds:
cls = clss[ind]
start = 3 * cls
end = start + 3
view_targets[ind, start:end] = viewpoint_data[ind, :]
view_loss_weights[ind, start:end] = [1., 1., 1.]
assert not np.isinf(view_targets).any(), 'viewpoint undefined'
return view_targets, view_loss_weights
def compute_document_similarity(X):
'''
From a matrix of unit distances, computes the cosine similarity
then changes to the angular distance (for a proper metric).
'''
S = cdist(X, X, metric='cosine')
S -= 1
S *= -1
S[S > 1] = 1.0
S[S < 0] = 0.0
# Set nan values to zero
S[np.isnan(S)] = 0
# Convert to angular distance (a proper metric)
S = 1 - (np.arccos(S) / np.pi)
assert(not np.isnan(S).any())
assert(not np.isinf(S).any())
return S
def do_test_delta_internal (self, net, nreps, ntasks, pct):
for ri in range(nreps):
arrv = net.sample (ntasks)
obs = arrv.subset_by_task (pct)
samples = net.slice_resample (obs, 0, 5)
arrv_from = samples[len(samples)-1]
print "Computing LIK0"
lik0 = net.log_prob (arrv_from)
for e in arrv_from:
if not e.obs_d:
# print "Testing evt ", e
dfn = qnet.GGkGibbs(net, arrv_from, e, lik0).dfn()
d0 = e.d
d_test = [ d0+delta for delta in [ -0.5, -0.1, 0.1, 0.5, 1.0, 1.5, 3.0 ] ]
for d1 in d_test:
# print "Testing departure ", d1
lik_incremental = dfn(d1)
if numpy.isinf (lik_incremental): continue # probably right
lik_true = self.compute_full_lik (net, arrv_from, e, d1)
print "%d %.4f %.4f %.4f %.4f" % (e.eid, d0, d1, lik_incremental, lik_true)
if numpy.isinf(lik_true):
self.assertTrue (numpy.isinf(lik_incremental))
else:
self.assertAlmostEquals (lik_true, lik_incremental, 5)
def huber_loss(y_true, y_pred, clip_value):
# Huber loss, see https://en.wikipedia.org/wiki/Huber_loss and
# https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b
# for details.
assert clip_value > 0.
x = y_true - y_pred
if np.isinf(clip_value):
# Spacial case for infinity since Tensorflow does have problems
# if we compare `K.abs(x) < np.inf`.
return .5 * tf.square(x)
condition = tf.abs(x) < clip_value
squared_loss = .5 * tf.square(x)
linear_loss = clip_value * (tf.abs(x) - .5 * clip_value)
return tf.where(condition, squared_loss, linear_loss) # condition, true, false
def zdivide(x, y):
"""
Return x/y, with 0 instead of NaN where y is 0.
Parameters
----------
x : array_like
Numerator
y : array_like
Denominator
Returns
-------
z : ndarray
Quotient `x`/`y`
"""
with np.errstate(divide='ignore', invalid='ignore'):
div = x / y
div[np.logical_or(np.isnan(div), np.isinf(div))] = 0
return div
def _get_viewpoint_estimation_labels(viewpoint_data, clss, num_classes):
"""Bounding-box regression targets are stored in a compact form in the
roidb.
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded.
Returns:
view_target_data (ndarray): N x 3K blob of regression targets
view_loss_weights (ndarray): N x 3K blob of loss weights
"""
view_targets = np.zeros((clss.size, 3 * num_classes), dtype=np.float32)
view_loss_weights = np.zeros(view_targets.shape, dtype=np.float32)
inds = np.where( (clss > 0) & np.isfinite(viewpoint_data[:,0]) & np.isfinite(viewpoint_data[:,1]) & np.isfinite(viewpoint_data[:,2]) )[0]
for ind in inds:
cls = clss[ind]
start = 3 * cls
end = start + 3
view_targets[ind, start:end] = viewpoint_data[ind, :]
view_loss_weights[ind, start:end] = [1., 1., 1.]
assert not np.isinf(view_targets).any(), 'viewpoint undefined'
return view_targets, view_loss_weights
def sim_inv_mag(M):
'''
Compute similarity matrix and the inverse of the magnitude on its diagonal for vectors.
The 'M' is a matrix containing input vectors.
'''
# base similarity matrix (all dot products)
# replace this with A.dot(A.T).todense() for sparse representation
similarity = np.dot(M, M.T)
# squared magnitude of preference vectors (number of occurrences)
square_mag = np.diag(similarity)
# inverse squared magnitude
inv_square_mag = 1 / square_mag
# if it doesn't occur, set it's inverse magnitude to zero (instead of inf)
inv_square_mag[np.isinf(inv_square_mag)] = 0
# inverse of the magnitude
inv_mag = np.sqrt(inv_square_mag)
return similarity, inv_mag
def test_zero_safe_divide(self):
from blmath.numerics.operations import zero_safe_divide
numerator = np.ones((5, 5))
numerator[3, 3] = 0.
denominator = np.ones((5, 5))
denominator[2, 2] = 0.
denominator[3, 3] = 0.
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
true_divide = np.true_divide(numerator, denominator)
safe_divide = zero_safe_divide(numerator, denominator)
self.assertTrue(np.isinf(true_divide[2, 2]))
self.assertEqual(safe_divide[2, 2], 0.)
self.assertTrue(np.isnan(true_divide[3, 3]))
self.assertEqual(safe_divide[3, 3], 0.)
def zero_safe_divide(a, b, default_error_value=0.):
"""Element-wise division that accounts for floating point errors.
Both invalid floating-point (e.g. 0. / 0.) and divide be zero errors are
suppressed. Resulting values (NaN and Inf respectively) are replaced with
`default_error_value`.
"""
import numpy as np
with np.errstate(invalid='ignore', divide='ignore'):
quotient = np.true_divide(a, b)
bad_value_indices = np.logical_or(
np.isnan(quotient), np.isinf(quotient))
quotient[bad_value_indices] = default_error_value
return quotient
def __call__(self,alpha):
"""
Posterior distribution
Returns
---------
lnprob: float
Natural log of posterior probability
"""
lp = self.lnprior(alpha)
if np.isinf(lp):
return -np.inf
else:
return np.atleast_1d(lp + self.lnlike(alpha))[0]
def test_zero_division(self):
with np.errstate(all="ignore"):
for t in [np.complex64, np.complex128]:
a = t(0.0)
b = t(1.0)
assert_(np.isinf(b/a))
b = t(complex(np.inf, np.inf))
assert_(np.isinf(b/a))
b = t(complex(np.inf, np.nan))
assert_(np.isinf(b/a))
b = t(complex(np.nan, np.inf))
assert_(np.isinf(b/a))
b = t(complex(np.nan, np.nan))
assert_(np.isnan(b/a))
b = t(0.)
assert_(np.isnan(b/a))
def log_mat(x, n, g_coeff, c_1, const):
with np.errstate(divide='ignore', invalid='ignore'):
K = g_coeff.shape[0] - 1
thres = 2 * c_1 * math.log(n) / n
[T, X] = np.meshgrid(thres, x)
ratio = np.clip(2*X/T - 1, 0, 1)
# force MATLAB-esque behavior with NaN, inf
ratio[T == 0] = 1.0
ratio[X == 0] = 0.0
q = np.reshape(np.arange(K), [1, 1, K])
g = np.tile(np.reshape(g_coeff, [1, 1, K + 1]), [c_1.shape[1], 1])
g[:, :, 0] = g[:, :, 0] + np.log(thres)
MLE = np.log(X) + (1-X) / (2*X*n)
MLE[X == 0] = -np.log(n) - const
tmp = (n*X[:,:,np.newaxis] - q)/(T[:,:,np.newaxis]*(n - q))
polyApp = np.sum(np.cumprod(np.dstack([np.ones(T.shape + (1,)), tmp]),
axis=2) * g, axis=2)
polyFail = np.logical_or(np.isnan(polyApp), np.isinf(polyApp))
polyApp[polyFail] = MLE[polyFail]
return ratio*MLE + (1-ratio)*polyApp
def _get_viewpoint_estimation_labels(viewpoint_data, clss, num_classes):
"""Bounding-box regression targets are stored in a compact form in the
roidb.
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded.
Returns:
view_target_data (ndarray): N x 3K blob of regression targets
view_loss_weights (ndarray): N x 3K blob of loss weights
"""
view_targets = np.zeros((clss.size, 3 * num_classes), dtype=np.float32)
view_loss_weights = np.zeros(view_targets.shape, dtype=np.float32)
inds = np.where( (clss > 0) & np.isfinite(viewpoint_data[:,0]) & np.isfinite(viewpoint_data[:,1]) & np.isfinite(viewpoint_data[:,2]) )[0]
for ind in inds:
cls = clss[ind]
start = 3 * cls
end = start + 3
view_targets[ind, start:end] = viewpoint_data[ind, :]
view_loss_weights[ind, start:end] = [1., 1., 1.]
assert not np.isinf(view_targets).any(), 'viewpoint undefined'
return view_targets, view_loss_weights
def _get_viewpoint_estimation_labels(viewpoint_data, clss, num_classes):
"""Bounding-box regression targets are stored in a compact form in the
roidb.
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded.
Returns:
view_target_data (ndarray): N x 3K blob of regression targets
view_loss_weights (ndarray): N x 3K blob of loss weights
"""
view_targets = np.zeros((clss.size, 3 * num_classes), dtype=np.float32)
view_loss_weights = np.zeros(view_targets.shape, dtype=np.float32)
inds = np.where( (clss > 0) & np.isfinite(viewpoint_data[:,0]) & np.isfinite(viewpoint_data[:,1]) & np.isfinite(viewpoint_data[:,2]) )[0]
for ind in inds:
cls = clss[ind]
start = 3 * cls
end = start + 3
view_targets[ind, start:end] = viewpoint_data[ind, :]
view_loss_weights[ind, start:end] = [1., 1., 1.]
assert not np.isinf(view_targets).any(), 'viewpoint undefined'
return view_targets, view_loss_weights
def compute_by_noise_pow(self, signal, n_pow):
s_spec = np.fft.fftpack.fft(signal * self._window)
s_amp = np.absolute(s_spec)
s_phase = np.angle(s_spec)
gamma = self._calc_aposteriori_snr(s_amp, n_pow)
xi = self._calc_apriori_snr(gamma)
self._prevGamma = gamma
nu = gamma * xi / (1.0 + xi)
self._G = (self._gamma15 * np.sqrt(nu) / gamma) * np.exp(-nu / 2.0) *\
((1.0 + nu) * spc.i0(nu / 2.0) + nu * spc.i1(nu / 2.0))
idx = np.less(s_amp ** 2.0, n_pow)
self._G[idx] = self._constant
idx = np.isnan(self._G) + np.isinf(self._G)
self._G[idx] = xi[idx] / (xi[idx] + 1.0)
idx = np.isnan(self._G) + np.isinf(self._G)
self._G[idx] = self._constant
self._G = np.maximum(self._G, 0.0)
amp = self._G * s_amp
amp = np.maximum(amp, 0.0)
amp2 = self._ratio * amp + (1.0 - self._ratio) * s_amp
self._prevAmp = amp
spec = amp2 * np.exp(s_phase * 1j)
return np.real(np.fft.fftpack.ifft(spec))
def compute_by_noise_pow(self, signal, n_pow):
s_spec = np.fft.fftpack.fft(signal * self._window)
s_amp = np.absolute(s_spec)
s_phase = np.angle(s_spec)
gamma = self._calc_aposteriori_snr(s_amp, n_pow)
xi = self._calc_apriori_snr(gamma)
# xi = self._calc_apriori_snr2(gamma,n_pow)
self._prevGamma = gamma
nu = gamma * xi / (1.0 + xi)
self._G = xi / (1.0 + xi) * np.exp(0.5 * spc.exp1(nu))
idx = np.less(s_amp ** 2.0, n_pow)
self._G[idx] = self._constant
idx = np.isnan(self._G) + np.isinf(self._G)
self._G[idx] = xi[idx] / (xi[idx] + 1.0)
idx = np.isnan(self._G) + np.isinf(self._G)
self._G[idx] = self._constant
self._G = np.maximum(self._G, 0.0)
amp = self._G * s_amp
amp = np.maximum(amp, 0.0)
amp2 = self._ratio * amp + (1.0 - self._ratio) * s_amp
self._prevAmp = amp
spec = amp2 * np.exp(s_phase * 1j)
return np.real(np.fft.fftpack.ifft(spec))
def compute_by_noise_pow(self, signal, n_pow):
s_spec = np.fft.fftpack.fft(signal * self._window)
s_amp = np.absolute(s_spec)
s_phase = np.angle(s_spec)
gamma = self._calc_aposteriori_snr(s_amp, n_pow)
# xi = self._calc_apriori_snr2(gamma,n_pow)
xi = self._calc_apriori_snr(gamma)
self._prevGamma = gamma
u = 0.5 - self._mu / (4.0 * np.sqrt(gamma * xi))
self._G = u + np.sqrt(u ** 2.0 + self._tau / (gamma * 2.0))
idx = np.less(s_amp ** 2.0, n_pow)
self._G[idx] = self._constant
idx = np.isnan(self._G) + np.isinf(self._G)
self._G[idx] = xi[idx] / (xi[idx] + 1.0)
idx = np.isnan(self._G) + np.isinf(self._G)
self._G[idx] = self._constant
self._G = np.maximum(self._G, 0.0)
amp = self._G * s_amp
amp = np.maximum(amp, 0.0)
amp2 = self._ratio * amp + (1.0 - self._ratio) * s_amp
self._prevAmp = amp
spec = amp2 * np.exp(s_phase * 1j)
return np.real(np.fft.fftpack.ifft(spec))
def get_power_adjusted_price(books, n=10, power=2):
'''
Returns the percent change of an average of order prices weighted by inverse
distance-wieghted volume for each data point in DataFrame of book data
'''
def calc_adjusted_price(book):
def calc(x):
return 0 if x.price-book.mid==0 else x.amount*(.5*book.width/(x.price-book.mid))**power
bid_inv = 1/book.bids.iloc[:n].apply(calc, axis=1)
ask_inv = 1/book.asks.iloc[:n].apply(calc, axis=1)
bid_price = book.bids.price.iloc[:n]
ask_price = book.asks.price.iloc[:n]
sum_numerator = (bid_price*bid_inv + ask_price*ask_inv).sum()
sum_denominator = (bid_inv + ask_inv).sum()
# if np.isnan(sum_numerator) or np.isinf(sum_numerator) or sum_numerator == 0.0 or np.isnan(sum_denominator) or np.isinf(sum_denominator) or sum_denominator == 0.0:
# return 0
quotient = sum_numerator / sum_denominator
# if quotient < 0.0:
# return 0
return quotient
adjusted = books.apply(calc_adjusted_price, axis=1)
return (adjusted/books.mid).apply(log).fillna(0)
def get_reward(self, next_state):
p, v, ID, a = next_state['p'], next_state['v'], int(next_state['ID']), next_state['a']
p_f, v_f, l_f = next_state['p_l1'], next_state['v_l1'], next_state['l_l1']
distance = (p_f-l_f) - p
h = distance / v
h = 10 if np.isinf(h) else h # avoid reward to inf
#desired_headway = 1
if h < 1.3 and h >= 1:
reward = 4*(1.3-h)
elif h > 0.7 and h < 1:
reward = 4*(h-0.7)
elif h >= 1.3:
reward = -2*(h-1.3)
else:
# h<=0.7
reward = -1*(0.7-h)
self.cars[ID].reward = reward
return reward
def test_filter_on_column_with_inf():
# Test that the function exclude columns where feature value is 'inf'
data = pd.DataFrame({'id': np.arange(1, 5, dtype='int64'),
'feature_1': [1.5601, 0, 2.33, 11.32],
'feature_ok': np.arange(1, 5)})
data['feature_with_inf'] = 1/data['feature_1']
bad_df = data[np.isinf(data['feature_with_inf'])].copy()
good_df = data[~np.isinf(data['feature_with_inf'])].copy()
bad_df.reset_index(drop=True, inplace=True)
good_df.reset_index(drop=True, inplace=True)
output_df, output_excluded_df = filter_on_column(data,
'feature_with_inf',
'id',
exclude_zeros=False,
exclude_zero_sd=True)
print(output_df)
assert_frame_equal(output_df, good_df)
assert_frame_equal(output_excluded_df, bad_df)
def remove_outliers_by_classifier(X, y, dates, model, m=0.9):
#xgboost = XGBoost(max_depth=2, num_round=6000)
if np.isnan(X).any():
print("X contains NaN")
if np.isinf(X).any():
print("X contains inf")
if np.isnan(np.log(y)).any():
print("y contains nan")
if np.isinf(np.log(y)).any():
print("y contains inf")
print("X=", X.shape)
print("y=", y.shape)
model.fit(X, y)
y_pred = model.predict(X)
diff_values = np.abs(y_pred - y)
abs_diff_vals = np.abs(diff_values)
sorted_indexes = sorted(range(len(abs_diff_vals)), key = lambda x: abs_diff_vals[x])
sorted_indexes_lead = sorted_indexes[:int(len(abs_diff_vals)*m)]
return X[sorted_indexes_lead], y[sorted_indexes_lead], dates[sorted_indexes_lead]
def reldist_linpol(tx_soa, beacon_soa):
# Interpolate between two nearest beacon samples
beacon_rx0, beacon_rx1 = beacon_soa[:, 0], beacon_soa[:, 1]
tx_rx0, tx_rx1 = tx_soa[:, 0], tx_soa[:, 1]
high_idx = np.searchsorted(beacon_rx0, tx_rx0)
low_idx = high_idx - 1
length = len(beacon_soa[:, 0])
if high_idx[-1] >= length:
high_idx[-1] = length - 1
if low_idx[0] < 0:
high_idx[0] = 0
weight = ((tx_rx0 - beacon_rx0[low_idx]) /
(beacon_rx0[high_idx] - beacon_rx0[low_idx]))
weight[np.isinf(weight)] = 1 # remove nan
# Reldist in samples
reldist = (tx_rx1 - (beacon_rx1[low_idx] * (1-weight) +
beacon_rx1[high_idx] * weight)) # / 2.0
return reldist
def af_fit(self, params):
# TODO: fix me for continuos prediction
seasonal_errors = []
self.pred_vs_true = []
for s,t in self.fit_test_season_pairs:
weights = np.exp(self.fitness(params, self.predictor_arrays[s][self.tree.root.season_tips[s],:]))
pred_af = self.weighted_af(self.seqs[s],weights)
#seasonal_errors.append(np.mean(np.sum((pred_af-self.af[t])**2, axis=0), axis=0))
future_diameter = 0.5*np.sum(np.sum(self.af[t]*(1-self.af[t]), axis=0), axis=0)
seasonal_errors.append(np.sum(np.sum(pred_af*(1-self.af[t]), axis=0), axis=0)-future_diameter)
good_ind = self.af[s]*(1-self.af[s])>0.05
self.pred_vs_true.append(np.array(zip(self.af[s][good_ind], self.af[t][good_ind], pred_af[good_ind])))
mean_error = np.mean(seasonal_errors)
if any(np.isnan(seasonal_errors)+np.isinf(seasonal_errors)):
mean_error = 1e10
self.last_fit = mean_error
if self.verbose>2: print params, self.last_fit
return mean_error + regularization*np.sum(params**2)
def _get_viewpoint_estimation_labels(viewpoint_data, clss, num_classes):
"""Bounding-box regression targets are stored in a compact form in the
roidb.
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded.
Returns:
view_target_data (ndarray): N x 3K blob of regression targets
view_loss_weights (ndarray): N x 3K blob of loss weights
"""
view_targets = np.zeros((clss.size, 3 * num_classes), dtype=np.float32)
view_loss_weights = np.zeros(view_targets.shape, dtype=np.float32)
inds = np.where( (clss > 0) & np.isfinite(viewpoint_data[:,0]) & np.isfinite(viewpoint_data[:,1]) & np.isfinite(viewpoint_data[:,2]) )[0]
for ind in inds:
cls = clss[ind]
start = 3 * cls
end = start + 3
view_targets[ind, start:end] = viewpoint_data[ind, :]
view_loss_weights[ind, start:end] = [1., 1., 1.]
assert not np.isinf(view_targets).any(), 'viewpoint undefined'
return view_targets, view_loss_weights
def test_sum_inf(self):
import pandas.core.nanops as nanops
s = Series(np.random.randn(10))
s2 = s.copy()
s[5:8] = np.inf
s2[5:8] = np.nan
self.assertTrue(np.isinf(s.sum()))
arr = np.random.randn(100, 100).astype('f4')
arr[:, 2] = np.inf
with cf.option_context("mode.use_inf_as_null", True):
assert_almost_equal(s.sum(), s2.sum())
res = nanops.nansum(arr, axis=1)
self.assertTrue(np.isinf(res).all())
def test_zero_division(self):
with np.errstate(all="ignore"):
for t in [np.complex64, np.complex128]:
a = t(0.0)
b = t(1.0)
assert_(np.isinf(b/a))
b = t(complex(np.inf, np.inf))
assert_(np.isinf(b/a))
b = t(complex(np.inf, np.nan))
assert_(np.isinf(b/a))
b = t(complex(np.nan, np.inf))
assert_(np.isinf(b/a))
b = t(complex(np.nan, np.nan))
assert_(np.isnan(b/a))
b = t(0.)
assert_(np.isnan(b/a))
def fit(self, Y):
"""
Generates the RBF coefficients to fit a set of given data values Y for centers self.centers
:param Y: A set of dependent data values corresponding to self.centers
:return: Void, sets the self.coefs values
"""
kernel_matrix = self.EvaluateCentersKernel()
kernel_matrix[np.isinf(kernel_matrix)] = 0 # TODO: Is there a better way to avoid the diagonal?
monomial_basis = poly.GetMonomialBasis(self.dimension, self.poly_degree)
poly_matrix = poly.BuildPolynomialMatrix(monomial_basis, self.centers.transpose()) # TODO: Probably remove transpose requirement
poly_shape = np.shape(poly_matrix)
# Get the number of columns, as we need to make an np.zeros((num_cols,num_cols))
num_cols = poly_shape[1]
num_rbf_coefs = len(self.centers)
zero_mat = np.zeros((num_cols,num_cols))
upper_matrix = np.hstack((kernel_matrix, poly_matrix))
lower_matrix = np.hstack((poly_matrix.transpose(),zero_mat))
rbf_matrix = np.vstack((upper_matrix,lower_matrix))
Y = np.concatenate((Y,np.zeros((num_cols)))) # Extend with zeros for the polynomial annihilation
self.coefs = sl.solve(rbf_matrix, Y, sym_pos=False)
def test_beam_statistics(RE, resize, kernel, uint_mode, thresh_mode, min_area,
thresh_factor, filter_kernel, image_num, cent_num,
image_delay, ad_data, image_data,
lcls_two_bounce_system):
_, _, _, y1, y2 = lcls_two_bounce_system
array_str = "image1.array_data"
size_str = "image1.array_size"
def test_plan():
stats = yield from beam_statistics(
[y1, y2], array_field=array_str, size_field=size_str,
cent_num=cent_num, image_num=image_num,
kernel=kernel, resize=resize, uint_mode=uint_mode,
thresh_factor=thresh_factor, filter_kernel=filter_kernel,
thresh_mode=thresh_mode, md="all", image_delay=image_delay,
ad_data=ad_data, image_data=image_data)
for _, det in stats.items():
for key, val in det.items():
if key == "md":
continue
assert(not np.isnan(val) or not np.isinf(val) or not None)
RE(run_wrapper(test_plan()))
def equal(a, b, exact):
if array_equal(a, b):
return True
if hasattr(a, 'dtype') and a.dtype in ['f4', 'f8']:
nnans = isnan(a).sum()
if nnans > 0:
# For results containing NaNs, just check that the number
# of NaNs is the same in both arrays. This check could be
# made more exhaustive, but checking element by element in
# python space is very expensive in general.
return nnans == isnan(b).sum()
ninfs = isinf(a).sum()
if ninfs > 0:
# Ditto for Inf's
return ninfs == isinf(b).sum()
if exact:
return (shape(a) == shape(b)) and alltrue(ravel(a) == ravel(b), axis=0)
else:
if hasattr(a, 'dtype') and a.dtype == 'f4':
atol = 1e-5 # Relax precission for special opcodes, like fmod
else:
atol = 1e-8
return (shape(a) == shape(b) and
allclose(ravel(a), ravel(b), atol=atol))
def calc_specialist_weights(numsamps):
"""
Calculates vector of specialist weights.
Args:
numsamps: A nonnegative vector of ints, specifying the number of samples on which each specialist predicts.
Returns:
A vector of floats specifying each specialist's weight (1/(fraction of data supported)).
If numsamps[i] == 0 for some specialist i, the corresponding weight will be 0.
Note that the return value is invariant to the scaling of numsamps by a positive constant.
Similarly, calculating numsamps using a uniform random subsample of a dataset
will result in approximately the same return value as using the full dataset.
"""
weights = 1.0/numsamps
weights[np.isinf(weights)] = 0.0
return np.max(numsamps)*weights
def _get_viewpoint_estimation_labels(viewpoint_data, clss, num_classes):
"""Bounding-box regression targets are stored in a compact form in the
roidb.
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded.
Returns:
view_target_data (ndarray): N x 3K blob of regression targets
view_loss_weights (ndarray): N x 3K blob of loss weights
"""
view_targets = np.zeros((clss.size, 3 * num_classes), dtype=np.float32)
view_loss_weights = np.zeros(view_targets.shape, dtype=np.float32)
inds = np.where( (clss > 0) & np.isfinite(viewpoint_data[:,0]) & np.isfinite(viewpoint_data[:,1]) & np.isfinite(viewpoint_data[:,2]) )[0]
for ind in inds:
cls = clss[ind]
start = 3 * cls
end = start + 3
view_targets[ind, start:end] = viewpoint_data[ind, :]
view_loss_weights[ind, start:end] = [1., 1., 1.]
assert not np.isinf(view_targets).any(), 'viewpoint undefined'
return view_targets, view_loss_weights
def test_zero_division(self):
with np.errstate(all="ignore"):
for t in [np.complex64, np.complex128]:
a = t(0.0)
b = t(1.0)
assert_(np.isinf(b/a))
b = t(complex(np.inf, np.inf))
assert_(np.isinf(b/a))
b = t(complex(np.inf, np.nan))
assert_(np.isinf(b/a))
b = t(complex(np.nan, np.inf))
assert_(np.isinf(b/a))
b = t(complex(np.nan, np.nan))
assert_(np.isnan(b/a))
b = t(0.)
assert_(np.isnan(b/a))
def get_cubic_root(self):
# We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2
# where x = sqrt(mu).
# We substitute x, which is sqrt(mu), with x = y + 1.
# It gives y^3 + py = q
# where p = (D^2 h_min^2)/(2*C) and q = -p.
# We use the Vieta's substution to compute the root.
# There is only one real solution y (which is in [0, 1] ).
# http://mathworld.wolfram.com/VietasSubstitution.html
# eps in the numerator is to prevent momentum = 1 in case of zero gradient
if np.isnan(self._dist_to_opt) or np.isnan(self._h_min) or np.isnan(self._grad_var) \
or np.isinf(self._dist_to_opt) or np.isinf(self._h_min) or np.isinf(self._grad_var):
logging.warning("Input to cubic solver has invalid nan/inf value!")
raise Exception("Input to cubic solver has invalid nan/inf value!")
p = (self._dist_to_opt + eps)**2 * (self._h_min + eps)**2 / 2 / (self._grad_var + eps)
w3 = (-math.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0
w = math.copysign(1.0, w3) * math.pow(math.fabs(w3), 1.0/3.0)
y = w - p / 3.0 / (w + eps)
x = y + 1
if self._verbose:
logging.debug("p %f, denominator %f", p, self._grad_var + eps)
logging.debug("w3 %f ", w3)
logging.debug("y %f, denominator %f", y, w + eps)
if np.isnan(x) or np.isinf(x):
logging.warning("Output from cubic is invalid nan/inf value!")
raise Exception("Output from cubic is invalid nan/inf value!")
return x
def check_entry(key, value):
if key != 'period_label':
return np.isnan(value) or np.isinf(value)
else:
return False
############################
# Risk Metric Calculations #
############################
def check_data(X, X_names, Y):
#type checks
assert type(X) is np.ndarray, "type(X) should be numpy.ndarray"
assert type(Y) is np.ndarray, "type(Y) should be numpy.ndarray"
assert type(X_names) is list, "X_names should be a list"
#sizes and uniqueness
N, P = X.shape
assert N > 0, 'X matrix must have at least 1 row'
assert P > 0, 'X matrix must have at least 1 column'
assert len(Y) == N, 'len(Y) should be same as # of rows in X'
assert len(list(set(X_names))) == len(X_names), 'X_names is not unique'
assert len(X_names) == P, 'len(X_names) should be same as # of cols in X'
#X_matrix values
if '(Intercept)' in X_names:
assert all(X[:, X_names.index('(Intercept)')] == 1.0), "'(Intercept)' column should only be composed of 1s"
else:
warnings.warn("there is no column named '(Intercept)' in X_names")
assert np.all(~np.isnan(X)), 'X has nan entries'
assert np.all(~np.isinf(X)), 'X has inf entries'
#Y vector values
assert all((Y == 1)|(Y == -1)), 'Y[i] should = [-1,1] for all i'
if all(Y == 1):
warnings.warn("all Y_i == 1 for all i")
if all(Y == -1):
warnings.warn("all Y_i == -1 for all i")
#TODO (optional) collect warnings and return those?
def setRange(self, mn, mx):
"""Set the range of values displayed by the axis.
Usually this is handled automatically by linking the axis to a ViewBox with :func:`linkToView <pyqtgraph.AxisItem.linkToView>`"""
if any(np.isinf((mn, mx))) or any(np.isnan((mn, mx))):
raise Exception("Not setting range to [%s, %s]" % (str(mn), str(mx)))
self.range = [mn, mx]
if self.autoSIPrefix:
self.updateAutoSIPrefix()
self.picture = None
self.update()
def siScale(x, minVal=1e-25, allowUnicode=True):
"""
Return the recommended scale factor and SI prefix string for x.
Example::
siScale(0.0001) # returns (1e6, '?')
# This indicates that the number 0.0001 is best represented as 0.0001 * 1e6 = 100 ?Units
"""
if isinstance(x, decimal.Decimal):
x = float(x)
try:
if np.isnan(x) or np.isinf(x):
return(1, '')
except:
print(x, type(x))
raise
if abs(x) < minVal:
m = 0
x = 0
else:
m = int(np.clip(np.floor(np.log(abs(x))/np.log(1000)), -9.0, 9.0))
if m == 0:
pref = ''
elif m < -8 or m > 8:
pref = 'e%d' % (m*3)
else:
if allowUnicode:
pref = SI_PREFIXES[m+8]
else:
pref = SI_PREFIXES_ASCII[m+8]
p = .001**m
return (p, pref)
def map(self, data):
data = data[self.fieldName]
scaled = np.clip((data-self['Min']) / (self['Max']-self['Min']), 0, 1)
cmap = self.value()
colors = cmap.map(scaled, mode='float')
mask = np.isnan(data) | np.isinf(data)
nanColor = self['NaN']
nanColor = (nanColor.red()/255., nanColor.green()/255., nanColor.blue()/255., nanColor.alpha()/255.)
colors[mask] = nanColor
return colors
def setRange(self, mn, mx):
"""Set the range of values displayed by the axis.
Usually this is handled automatically by linking the axis to a ViewBox with :func:`linkToView <pyqtgraph.AxisItem.linkToView>`"""
if any(np.isinf((mn, mx))) or any(np.isnan((mn, mx))):
raise Exception("Not setting range to [%s, %s]" % (str(mn), str(mx)))
self.range = [mn, mx]
if self.autoSIPrefix:
self.updateAutoSIPrefix()
self.picture = None
self.update()