Python math 模块,log() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用math.log()。
def decode(self, input_vectors, output):
tgt_toks = [self.tgt_vocab[tok] for tok in output]
w = dynet.parameter(self.decoder_w)
b = dynet.parameter(self.decoder_b)
s = self.dec_lstm.initial_state()
s = s.add_input(dynet.concatenate([
input_vectors[-1],
dynet.vecInput(self.args.hidden_dim)
]))
loss = []
for tok in tgt_toks:
out_vector = dynet.affine_transform([b, w, s.output()])
probs = dynet.softmax(out_vector)
loss.append(-dynet.log(dynet.pick(probs, tok.i)))
embed_vector = self.tgt_lookup[tok.i]
attn_vector = self.attend(input_vectors, s)
inp = dynet.concatenate([embed_vector, attn_vector])
s = s.add_input(inp)
loss = dynet.esum(loss)
return loss
def _factor_target_indices(self, Y_inds, vocab_size=None, base=2):
if vocab_size is None:
vocab_size = len(self.dp.word_index)
print >>sys.stderr, "Factoring targets of vocabulary size: %d"%(vocab_size)
num_vecs = int(math.ceil(math.log(vocab_size)/math.log(base))) + 1
base_inds = []
div_Y_inds = Y_inds
print >>sys.stderr, "Number of factors: %d"%num_vecs
for i in range(num_vecs):
new_inds = div_Y_inds % base
if i == num_vecs - 1:
if new_inds.sum() == 0:
# Most significant "digit" is a zero. Omit it.
break
base_inds.append(new_inds)
div_Y_inds = numpy.copy(div_Y_inds/base)
base_vecs = [self._make_one_hot(base_inds_i, base) for base_inds_i in base_inds]
return base_vecs
def normalvariate(self, mu, sigma):
"""Normal distribution.
mu is the mean, and sigma is the standard deviation.
"""
# mu = mean, sigma = standard deviation
# Uses Kinderman and Monahan method. Reference: Kinderman,
# A.J. and Monahan, J.F., "Computer generation of random
# variables using the ratio of uniform deviates", ACM Trans
# Math Software, 3, (1977), pp257-260.
random = self.random
while 1:
u1 = random()
u2 = 1.0 - random()
z = NV_MAGICCONST*(u1-0.5)/u2
zz = z*z/4.0
if zz <= -_log(u2):
break
return mu + z*sigma
## -------------------- lognormal distribution --------------------
def expovariate(self, lambd):
"""Exponential distribution.
lambd is 1.0 divided by the desired mean. It should be
nonzero. (The parameter would be called "lambda", but that is
a reserved word in Python.) Returned values range from 0 to
positive infinity if lambd is positive, and from negative
infinity to 0 if lambd is negative.
"""
# lambd: rate lambd = 1/mean
# ('lambda' is a Python reserved word)
# we use 1-random() instead of random() to preclude the
# possibility of taking the log of zero.
return -_log(1.0 - self.random())/lambd
## -------------------- von Mises distribution --------------------
def test_simple_scaling():
Quantity.set_prefs(spacer=None, show_label=None, label_fmt=None, label_fmt_full=None)
q=Quantity('1kg')
assert q.render() == '1 kg'
assert q.render(scale=0.001, show_units=False) == '1'
with pytest.raises(KeyError, message="Unable to convert between 'fuzz' and 'g'."):
q.render(scale='fuzz')
q=Quantity('1', units='g', scale=1000)
assert q.render() == '1 kg'
assert q.render(scale=(0.0022046, 'lbs')) == '2.2046 lbs'
q=Quantity('1', scale=(1000, 'g'))
assert q.render() == '1 kg'
assert q.render(scale=lambda v, u: (0.0022046*v, 'lbs')) == '2.2046 lbs'
def dB(v, u):
return 20*math.log(v, 10), 'dB'+u
def adB(v, u):
return pow(10, v/20), u[2:] if u.startswith('dB') else u
q=Quantity('-40 dBV', scale=adB)
assert q.render() == '10 mV'
assert q.render(scale=dB) == '-40 dBV'
def score_samples(self, X):
"""Return the log-likelihood of each sample
See. "Pattern Recognition and Machine Learning"
by C. Bishop, 12.2.1 p. 574
or http://www.miketipping.com/papers/met-mppca.pdf
Parameters
----------
X: array, shape(n_samples, n_features)
The data.
Returns
-------
ll: array, shape (n_samples,)
Log-likelihood of each sample under the current model
"""
check_is_fitted(self, 'mean_')
X = check_array(X)
Xr = X - self.mean_
n_features = X.shape[1]
log_like = np.zeros(X.shape[0])
precision = self.get_precision()
log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
log_like -= .5 * (n_features * log(2. * np.pi)
- fast_logdet(precision))
return log_like
def compute_possibility(self, start_position, seg_index, oov_pattern, oov_dct, oov_ctx):
# ???????????
weight, j = 0, start_position
test_word = []
for tag in oov_pattern:
word_content = self.words_graph.get_word(seg_index[j][0], seg_index[j][1]).content
oov_freq = oov_dct.get_frequence(
word_content,
self.oov_tag_encode(tag)
)
#print('tag:{} word:{} freq:{} start_prob:{}'.format(
# tag, word_content, oov_freq, oov_ctx.prob_to_frequence(oov_ctx.start_prob[self.oov_tag_encode(tag)])))
test_word.append(self.words_graph.get_word(seg_index[j][0], seg_index[j][1]).content)
#????: dPOSPoss=log((double)(m_context.GetFrequency(0,m_nBestTag[i])+1))-log((double)(nFreq+1));
poss = math.log(float(oov_ctx.prob_to_frequence(oov_ctx.start_prob[self.oov_tag_encode(tag)]))) - math.log(float(oov_freq + 1))
weight += poss
j += 1
#print('compute_possibility() {} {} = {}'.format(oov_pattern, ''.join(test_word), weight))
return weight
def __init__( self, get_params_function, try_params_function ):
self.get_params = get_params_function
self.try_params = try_params_function
self.max_iter = 81 # maximum iterations per configuration
self.eta = 3 # defines configuration downsampling rate (default = 3)
self.logeta = lambda x: log( x ) / log( self.eta )
self.s_max = int( self.logeta( self.max_iter ))
self.B = ( self.s_max + 1 ) * self.max_iter
self.results = [] # list of dicts
self.counter = 0
self.best_loss = np.inf
self.best_counter = -1
# can be called multiple times
def calc_mean_lp_scores(log_prob_scores: List[float],
lengths: List[int]) -> List[Union[None, float]]:
r"""
.. math:
\frac{%
\log P_\text{model}\left(\xi\right)
}{%
\text{length}\left(\xi\right)
}
>>> '{:.3f}'.format(calc_mean_lp_scores([-14.7579], [4])[0])
'-3.689'
"""
mean_lp_scores = []
for score, length in zip(log_prob_scores, lengths):
x = None \
if score is None or length == 0 \
else float(score) / float(length)
mean_lp_scores.append(x)
return mean_lp_scores
def calc_norm_lp_div_scores(
log_prob_scores: List[float],
unigram_scores: List[float]) -> List[Union[None, float]]:
r"""
.. math:
\frac{%
\log P_\text{model}\left(\xi\right)
}{%
\log P_\text{unigram}\left(\xi\right)
}
>>> '{:.3f}'.format(calc_norm_lp_div_scores([-14.7579], [-35.6325])[0])
'-0.414'
"""
results = []
for log_prob, unigram_score in zip(log_prob_scores, unigram_scores):
if log_prob is None or numpy.isclose(unigram_score, 0.0, rtol=1e-05):
x = None
else:
x = (-1.0) * float(log_prob) / float(unigram_score)
results.append(x)
return results
def calc_norm_lp_sub_scores(
log_prob_scores: List[float],
unigram_scores: List[float]) -> List[Union[None, float]]:
r"""
.. math:
\log P_\text{model}\left(\xi\right)
- \log P_\text{unigram}\left(\xi\right)
>>> '{:.3f}'.format(calc_norm_lp_sub_scores([-14.7579], [-35.6325])[0])
'20.875'
"""
results = []
for log_prob, unigram_score in zip(log_prob_scores, unigram_scores):
if log_prob is None or numpy.isclose(unigram_score, 0.0, rtol=1e-05):
x = None
else:
x = float(log_prob) - float(unigram_score)
results.append(x)
return results
def normalvariate(self, mu, sigma):
"""Normal distribution.
mu is the mean, and sigma is the standard deviation.
"""
# mu = mean, sigma = standard deviation
# Uses Kinderman and Monahan method. Reference: Kinderman,
# A.J. and Monahan, J.F., "Computer generation of random
# variables using the ratio of uniform deviates", ACM Trans
# Math Software, 3, (1977), pp257-260.
random = self.random
while 1:
u1 = random()
u2 = 1.0 - random()
z = NV_MAGICCONST*(u1-0.5)/u2
zz = z*z/4.0
if zz <= -_log(u2):
break
return mu + z*sigma
## -------------------- lognormal distribution --------------------
def expovariate(self, lambd):
"""Exponential distribution.
lambd is 1.0 divided by the desired mean. It should be
nonzero. (The parameter would be called "lambda", but that is
a reserved word in Python.) Returned values range from 0 to
positive infinity if lambd is positive, and from negative
infinity to 0 if lambd is negative.
"""
# lambd: rate lambd = 1/mean
# ('lambda' is a Python reserved word)
random = self.random
u = random()
while u <= 1e-7:
u = random()
return -_log(u)/lambd
## -------------------- von Mises distribution --------------------
def randint(minvalue, maxvalue):
"""Returns a random integer x with minvalue <= x <= maxvalue"""
# Safety - get a lot of random data even if the range is fairly
# small
min_nbits = 32
# The range of the random numbers we need to generate
range = maxvalue - minvalue
# Which is this number of bytes
rangebytes = ceil(math.log(range, 2) / 8.)
# Convert to bits, but make sure it's always at least min_nbits*2
rangebits = max(rangebytes * 8, min_nbits * 2)
# Take a random number of bits between min_nbits and rangebits
nbits = random.randint(min_nbits, rangebits)
return (read_random_int(nbits) % range) + minvalue
def randomized_primality_testing(n, k):
"""Calculates whether n is composite (which is always correct) or
prime (which is incorrect with error probability 2**-k)
Returns False if the number if composite, and True if it's
probably prime.
"""
q = 0.5 # Property of the jacobi_witness function
# t = int(math.ceil(k / math.log(1/q, 2)))
t = ceil(k / math.log(1/q, 2))
for i in range(t+1):
x = randint(1, n-1)
if jacobi_witness(x, n): return False
return True
def idf(tf_dic_list,global_idf_dic,silent=1):
"""
Input:
global_idf_dic = {} # word -> idf, which may be updated in place
"""
if silent==0:
print("idf ...")
doc_len = len(tf_dic_list)
idf_dic_list = [] # [{word:idf} for each sample]
for c,tf_dic in enumerate(tf_dic_list):
idf_dic = {}
for word in tf_dic:
if word not in global_idf_dic:
n_containing = sum([word in tf_dic for tf_dic in tf_dic_list])
global_idf_dic[word] = log(doc_len/(1.0+n_containing))
idf_dic[word] = global_idf_dic[word]
idf_dic_list.append(idf_dic)
if silent == 0 and c>0 and c%100 == 0:
print("{} documents done, total {}, word {}, idf {}".format(c,len(tf_dic_list),word,global_idf_dic[word]))
return idf_dic_list
def _ndcg_at(k, label_col):
def ndcg_at_k(predicted, actual):
# TODO: Taking in rn and then re-sorting might not be necessary, but i can't
# find any real guarantee that they would come in order after a groupBy + collect_list,
# since they were only ordered within the window function.
predicted = [row[label_col] for row in sorted(predicted, key=lambda r: r.rn)]
actual = [row[label_col] for row in sorted(actual, key=lambda r: r.rn)]
dcg = 0.
for i, label in enumerate(predicted):
# This form is used to match EvalNDCG in xgboost
dcg += ((1 << label) - 1) / math.log(i + 2.0, 2)
idcg = 0.
for i, label in enumerate(actual):
idcg += ((1 << label) - 1) / math.log(i + 2.0, 2)
if idcg == 0:
return 0
else:
return dcg / idcg
return F.udf(ndcg_at_k, pyspark.sql.types.DoubleType())
def launch():
opts, h5_files, motifs_fn = __parseArgs()
__initLog(opts)
motifs = np.loadtxt(motifs_fn, dtype="str", ndmin=1)
motifs,not_found = find_motifs_in_control(opts, motifs)
if len(not_found)>0:
logging.warning("")
logging.warning(" ******************** Important *********************")
logging.warning(" Did not find %s motifs in %s:" % (len(not_found), opts.control_pkl_name))
for nf in not_found:
logging.warning(" %s" % nf)
logging.warning(" These motif(s) will be removed from further analysis.")
logging.warning(" These %s motifs will be kept:" % len(motifs))
for m in motifs:
logging.warning(" %s" % m)
logging.warning(" ****************************************************")
logging.warning("")
else:
logging.info("Found entries for all %s motifs in %s" % (len(motifs), opts.control_pkl_name))
build_profiles(opts, h5_files, motifs, motifs_fn)
print >> sys.stderr, "mBin methylation profiling has finished running. See log for details."
def get_differentially_private_std(sensitivity, epsilon, delta,
tol=DEFAULT_SIGMA_TOLERANCE):
'''
Determine smallest standard deviation for a normal distribution such that
the probability of a value violating epsilon-differential privacy is at
most delta.
'''
# std upper bound determined by improving result in literature,
# Hardt and Roth, "Beating Randomized Response on Incoherent Matrices"
# Thm. 2.6 (and the Lemma in App. A) can be improved to provide the
# following upper bound
std_upper_bound = (float(sensitivity)/epsilon) * (4.0/3.0) *\
(2 * math.log(1.0/delta))**(0.5)
std_lower_bound = tol # use small but non-zero value for std lower-bound
if (satisfies_dp(sensitivity, epsilon, delta, std_lower_bound) is True):
raise ValueError('Could not find lower bound for std interval.')
std = interval_boolean_binary_search(\
lambda x: satisfies_dp(sensitivity, epsilon, delta, x), std_lower_bound,
std_upper_bound, tol, return_true=True)
return std
def _encode_ratio(inval, outval):
'''
Calculate the log ratio between inbound and outbound traffic.
Positive when outval > inval, and negative when inval > outval.
Returns a non-infinite floating point value:
- zero when inval and outval are zero,
- a large negative number (< -100) when outval is zero, and
- a large positive number (> 100) when inval is zero, and
- log(base 2)(outval/inval) otherwise.
'''
inval = float(inval)
outval = float(outval)
if inval == 0.0 and outval == 0.0:
return 0.0
elif inval == 0.0:
return sys.float_info.max_exp
elif outval == 0.0:
return sys.float_info.min_exp
else:
return math.log(outval/inval, 2)
def calculate_oobatake_dS(seq, temp):
"""Get dS using Oobatake method in units cal/mol.
Args:
seq (str, Seq, SeqRecord): Amino acid sequence
temp (float): Temperature in degrees C
Returns:
float: dS in units cal/mol
"""
seq = ssbio.protein.sequence.utils.cast_to_str(seq)
dS = 0
temp += 273.15
T0 = 298.15
dCp_sum = _sum_of_dCp(seq)
for aa in seq:
S0 = oobatake_dictionary[aa]['dS']
dS += S0
return dS + dCp_sum * math.log(temp / T0)
def calculate_dill_dG(seq_len, temp):
"""Get free energy of unfolding (dG) using Dill method in units J/mol.
Args:
seq_len (int): Length of amino acid sequence
temp (float): Temperature in degrees C
Returns:
float: Free energy of unfolding dG (J/mol)
"""
Th = 373.5 # This quantity affects the up-and-down of the dG vs temperature curve (dG values)
Ts = 385 # This quantity affects the left-and-right
temp += 273.15
dH = (4.0 * seq_len + 143) * 1000
dS = 13.27 * seq_len + 448
dCp = (0.049 * seq_len + 0.85) * 1000
dG = dH + dCp * (temp - Th) - temp * dS - temp * dCp * math.log(float(temp) / Ts)
return dG
def Rstr(self):
array2=[]
prixe = math.log(0.03637 / float(252) + 1)
ret = self.sharedf
ret['change']=ret['change']-prixe
rstr = []
print 1
if len(ret) > 525:
for z in range(0, 504):
array2.append(math.pow(math.pow(float(1) / 2, float(1 / float(126))), (503 - z)))
for h in range(0,525):
rstr.append(numpy.NaN)
for c in range(525, len(ret)):
rett=0
for f in range(0,len(duan)-21):
rett=rett+duan.iloc[f, 16]*array2[f]
rstr.append(rett)
print rstr
ret['rstr'] = rstr
return ret[['date','rstr']]
def Cmra(self):
df=self.sharedf
cc=[]
cmra=[]
prixe=math.log(0.03637/float(12)+1)
df=df.set_index('date')
df1=df['change']
for x in range(20,len(df1.index)+1):
cc.append(df1[x-20:x].sum()-prixe)
dd=[]
for x in range(12,len(cc)+1):
dd.append(sum(cc[x-12:x]))
for x in range(252,len(dd)+1):
cmra.append(max(cc[x-252:x])-min(cc[x-252:x]))
df=df[281:]
df['cmra']=cmra
df['date']=df.index
df=pandas.DataFrame(df.reset_index(drop=True))
return df[['date','cmra']]
def blackcox_pd(equity, extasset, sigma):
"""Compute the probability of default for external assets following a
Geometric Brownian Motion and the Black and Cox model.
Parameters:
equity (float): equity
extasset (float): external assets
sigma (float): volatility of the Geometric Browninan Motion
Returns:
probability of default
"""
if equity <= 0.0:
return 1.0
if equity >= extasset:
return 0.0
else:
#return 1 + (- 1/2 * (1 + math.erf((-math.log(1 - equity/extasset) - sigma**2/2) /
# (math.sqrt(2) * sigma)) )
# + (extasset/equity)/2 * (1 + math.erf((math.log(1 - equity/extasset) - sigma**2/2) /
# (math.sqrt(2) * sigma)) ) )
return (1/2 * (1 + math.erf((math.log(1 - equity/extasset) + sigma**2/2) /
(math.sqrt(2) * sigma)) ) +
(extasset/(extasset - equity))/2 * (1 + math.erf((math.log(1 - equity/extasset) - sigma**2/2) /
(math.sqrt(2) * sigma)) ) )
def ndcg(self, y_true, y_pred, k = 20):
s = 0.
c = self.zipped(y_true, y_pred)
c_g = sorted(c, key=lambda x:x[0], reverse=True)
c_p = sorted(c, key=lambda x:x[1], reverse=True)
#idcg = [0. for i in range(k)]
idcg = np.zeros([k], dtype=np.float32)
dcg = np.zeros([k], dtype=np.float32)
#dcg = [0. for i in range(k)]
for i, (g,p) in enumerate(c_g):
if g > self.rel_threshold:
idcg[i:] += (math.pow(2., g) - 1.) / math.log(2. + i)
if i >= k:
break
for i, (g,p) in enumerate(c_p):
if g > self.rel_threshold:
dcg[i:] += (math.pow(2., g) - 1.) / math.log(2. + i)
if i >= k:
break
for idx, v in enumerate(idcg):
if v == 0.:
dcg[idx] = 0.
else:
dcg[idx] /= v
return dcg
def ndcg(self, y_true, y_pred, k = 20):
s = 0.
c = self.zipped(y_true, y_pred)
c_g = sorted(c, key=lambda x:x[0], reverse=True)
c_p = sorted(c, key=lambda x:x[1], reverse=True)
#idcg = [0. for i in range(k)]
idcg = np.zeros([k], dtype=np.float32)
dcg = np.zeros([k], dtype=np.float32)
#dcg = [0. for i in range(k)]
for i, (g,p) in enumerate(c_g):
if g > self.rel_threshold:
idcg[i:] += (math.pow(2., g) - 1.) / math.log(2. + i)
if i >= k:
break
for i, (g,p) in enumerate(c_p):
if g > self.rel_threshold:
dcg[i:] += (math.pow(2., g) - 1.) / math.log(2. + i)
if i >= k:
break
for idx, v in enumerate(idcg):
if v == 0.:
dcg[idx] = 0.
else:
dcg[idx] /= v
return dcg
def _log_likelihood(Y,gamma,sigma):
"""
Compute the log-likelihood for the Generalized Pareto Distribution (?=0)
Parameters
----------
Y : numpy.array
observations
gamma : float
GPD index parameter
sigma : float
GPD scale parameter (>0)
Returns
----------
float
log-likelihood of the sample Y to be drawn from a GPD(?,?,?=0)
"""
n = Y.size
if gamma != 0:
tau = gamma/sigma
L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum()
else:
L = n * ( 1 + log(Y.mean()) )
return L
def _quantile(self,gamma,sigma):
"""
Compute the quantile at level 1-q
Parameters
----------
gamma : float
GPD parameter
sigma : float
GPD parameter
Returns
----------
float
quantile at level 1-q for the GPD(?,?,?=0)
"""
r = self.n * self.proba / self.Nt
if gamma != 0:
return self.init_threshold + (sigma/gamma)*(pow(r,-gamma)-1)
else:
return self.init_threshold - sigma*log(r)
def _log_likelihood(Y,gamma,sigma):
"""
Compute the log-likelihood for the Generalized Pareto Distribution (?=0)
Parameters
----------
Y : numpy.array
observations
gamma : float
GPD index parameter
sigma : float
GPD scale parameter (>0)
Returns
----------
float
log-likelihood of the sample Y to be drawn from a GPD(?,?,?=0)
"""
n = Y.size
if gamma != 0:
tau = gamma/sigma
L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum()
else:
L = n * ( 1 + log(Y.mean()) )
return L
def _log_likelihood(Y,gamma,sigma):
"""
Compute the log-likelihood for the Generalized Pareto Distribution (?=0)
Parameters
----------
Y : numpy.array
observations
gamma : float
GPD index parameter
sigma : float
GPD scale parameter (>0)
Returns
----------
float
log-likelihood of the sample Y to be drawn from a GPD(?,?,?=0)
"""
n = Y.size
if gamma != 0:
tau = gamma/sigma
L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum()
else:
L = n * ( 1 + log(Y.mean()) )
return L
def _quantile(self,gamma,sigma):
"""
Compute the quantile at level 1-q
Parameters
----------
gamma : float
GPD parameter
sigma : float
GPD parameter
Returns
----------
float
quantile at level 1-q for the GPD(?,?,?=0)
"""
r = self.n * self.proba / self.Nt
if gamma != 0:
return self.init_threshold + (sigma/gamma)*(pow(r,-gamma)-1)
else:
return self.init_threshold - sigma*log(r)
def _log_likelihood(Y,gamma,sigma):
"""
Compute the log-likelihood for the Generalized Pareto Distribution (?=0)
Parameters
----------
Y : numpy.array
observations
gamma : float
GPD index parameter
sigma : float
GPD scale parameter (>0)
Returns
----------
float
log-likelihood of the sample Y to be drawn from a GPD(?,?,?=0)
"""
n = Y.size
if gamma != 0:
tau = gamma/sigma
L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum()
else:
L = n * ( 1 + log(Y.mean()) )
return L
def get_results(self):
result = {}
fn = self.rec_fn()
txt = open(fn).read()
params_txt = open(self.par_fn()).read().splitlines()
columns = ['parameter','value','scale','offset']
param_vals = [dict(zip(columns,line.strip().split())) for line in params_txt[1:]]
params = pd.DataFrame(param_vals)
params = params.set_index('parameter')
for col in columns[1:]:
params[col] = params[col].astype('f')
result['results_file']=fn
result['text']=txt
result['parameters'] = params
if self.detailed_log:
result['log'] = self.read_logs()
return result
def get_similarity(word_list1, word_list2):
"""??????????????????
Keyword arguments:
word_list1, word_list2 -- ???????????????????
"""
words = list(set(word_list1 + word_list2))
vector1 = [float(word_list1.count(word)) for word in words]
vector2 = [float(word_list2.count(word)) for word in words]
vector3 = [vector1[x]*vector2[x] for x in xrange(len(vector1))]
vector4 = [1 for num in vector3 if num > 0.]
co_occur_num = sum(vector4)
if abs(co_occur_num) <= 1e-12:
return 0.
denominator = math.log(float(len(word_list1))) + math.log(float(len(word_list2))) # ??
if abs(denominator) < 1e-12:
return 0.
return co_occur_num / denominator
def lonlat_to_pixel(self, lonlat, zoom):
"Converts a longitude, latitude coordinate pair for the given zoom level."
# Setting up, unpacking the longitude, latitude values and getting the
# number of pixels for the given zoom level.
lon, lat = self.get_lon_lat(lonlat)
npix = self._npix[zoom]
# Calculating the pixel x coordinate by multiplying the longitude value
# with the number of degrees/pixel at the given zoom level.
px_x = round(npix + (lon * self._degpp[zoom]))
# Creating the factor, and ensuring that 1 or -1 is not passed in as the
# base to the logarithm. Here's why:
# if fac = -1, we'll get log(0) which is undefined;
# if fac = 1, our logarithm base will be divided by 0, also undefined.
fac = min(max(sin(DTOR * lat), -0.9999), 0.9999)
# Calculating the pixel y coordinate.
px_y = round(npix + (0.5 * log((1 + fac) / (1 - fac)) * (-1.0 * self._radpp[zoom])))
# Returning the pixel x, y to the caller of the function.
return (px_x, px_y)
def getPSD( df , dw = 0.05, roverlap = 0.5, window='hanning', detrend='constant') :
"""
Compute the power spectral density
"""
if type(df) == pd.Series : df = pd.DataFrame(df)
nfft = int ( (2*pi / dw) / dx(df) )
nperseg = 2**int(log(nfft)/log(2))
noverlap = nperseg * roverlap
""" Return the PSD of a time signal """
try :
from scipy.signal import welch
except :
raise Exception("Welch function not found, please install scipy > 0.12")
data = []
for iSig in range(df.shape[1]) :
test = welch( df.values[:,iSig] , fs = 1. / dx(df) , window=window, nperseg=nperseg, noverlap=noverlap, nfft=nfft, detrend=detrend, return_onesided=True, scaling='density')
data.append( test[1] / (2*pi) )
xAxis = test[0][:] * 2*pi
return pd.DataFrame( data = np.transpose(data), index = xAxis , columns = [ "psd("+ str(x) +")" for x in df.columns ] )
def find_top_two_peaks(sdata):
samples = len(sdata)
fft_size = 2**int(floor(log(samples)/log(2.0)))
freq = fft(sdata[0:fft_size])
pdata = numpy.zeros(fft_size)
for i in xrange(fft_size): pdata[i] = abs(freq[i])
peak = 0
peak1 = 0
peak2 = 0
peak1_index = 0
peak2_index = 0
for i in xrange(fft_size/2):
if (pdata[i] > peak1):
peak1 = pdata[i]
peak1_index = i
for i in xrange(fft_size/2):
if (pdata[i] > peak2) and (abs(i - peak1_index) > 4):
peak2 = pdata[i]
peak2_index = i
return (peak1,peak1_index,peak2,peak2_index)
# REMOVAL CASES
def save_fft(fil,audio_in):
samples = len(audio_in)
fft_size = 2**int(floor(log(samples)/log(2.0)))
freq = fft(audio_in[0:fft_size])
s_data = numpy.zeros(fft_size/2)
x_data = numpy.zeros(fft_size/2)
peak = 0;
for j in xrange(fft_size/2):
if (abs(freq[j]) > peak):
peak = abs(freq[j])
for j in xrange(fft_size/2):
x_data[j] = log(2.0*(j+1.0)/fft_size);
if (x_data[j] < -10):
x_data[j] = -10
s_data[j] = 10.0*log(abs(freq[j])/peak)/log(10.0)
plt.ylim([-50,0])
plt.plot(x_data,s_data)
plt.title('fft log power')
plt.grid()
fields = fil.split('.')
plt.savefig(fields[0]+'_fft.png', bbox_inches="tight")
plt.clf()
plt.close()
def normalvariate(self, mu, sigma):
"""Normal distribution.
mu is the mean, and sigma is the standard deviation.
"""
# mu = mean, sigma = standard deviation
# Uses Kinderman and Monahan method. Reference: Kinderman,
# A.J. and Monahan, J.F., "Computer generation of random
# variables using the ratio of uniform deviates", ACM Trans
# Math Software, 3, (1977), pp257-260.
random = self.random
while 1:
u1 = random()
u2 = 1.0 - random()
z = NV_MAGICCONST*(u1-0.5)/u2
zz = z*z/4.0
if zz <= -_log(u2):
break
return mu + z*sigma
## -------------------- lognormal distribution --------------------
def expovariate(self, lambd):
"""Exponential distribution.
lambd is 1.0 divided by the desired mean. It should be
nonzero. (The parameter would be called "lambda", but that is
a reserved word in Python.) Returned values range from 0 to
positive infinity if lambd is positive, and from negative
infinity to 0 if lambd is negative.
"""
# lambd: rate lambd = 1/mean
# ('lambda' is a Python reserved word)
# we use 1-random() instead of random() to preclude the
# possibility of taking the log of zero.
return -_log(1.0 - self.random())/lambd
## -------------------- von Mises distribution --------------------
def get_test_probs(cmd_args, ngrams_test, corpus_files, model):
""" Get sum of probabilities for ngrams of test data. """
# Initialize probs
sumprobs = {}
for lang in corpus_files:
sumprobs[lang] = 0.0
for ngram in ngrams_test:
for lang in corpus_files:
sumprobs[lang] += ngrams_test[ngram] * probability.LaplaceProbDist.logprob(model.smoothed[lang], ngram)
# The population prior is mostly useful for really small test snippets
if not cmd_args.no_prior:
for lang in corpus_files:
# Strip trailing .txt, and check if it's in the population statistics dict
lang_prefix = lang[:-4]
if lang_prefix in model.stats:
# Normalize population counts by approximate total number of people on earth
sumprobs[lang] += math.log(model.stats[lang_prefix] / 8e9)
else:
# If language isn't in the language population statistics,
# assume median value of all langs, which is about 500K
sumprobs[lang] += math.log(500000 / 8e9)
return sumprobs
def construct_pssm(cds, length=90, out_path="", prob=None):
"""
Construct Position Specific Scoring Matrices with log-likelihood values
length: size of analyzed region from start, in bp (sequences that are not this size are discarded)
prob : a dict of bases with a priori expected probabilities
"""
cds = cds[0]
if not prob:
prob = {"a":0.25, "t":0.25, "g":0.25, "c":0.25}
m = {"a":[0]*length, "t":[0]*length, "g":[0]*length, "c":[0]*length}
tot_gene = 0.0
for gene in cds:
if len(cds[gene]) >= length:
tot_gene += 1
for i in range(length):
m[cds[gene][i]][i] += 1
for k in m:
m[k] = [log((v/tot_gene)/prob[k]) for v in m[k]]
if out_path:
h = open(out_path, "w")
h.write(","+",".join([str(i) for i in range(1,length+1)])+"\n")
for b in ["a", "t", "g", "c"]:
h.write(b+","+",".join(["%.2f" % v for v in m[b]])+"\n")
h.close()
return m
def shrink_bgest(r,rvar,theta):
"""Bernoulli-Gaussian MMSE estimator
Perform MMSE estimation E[x|r]
for x ~ BernoulliGaussian(lambda,xvar1)
r|x ~ Normal(x,rvar)
The parameters theta[0],theta[1] represent
The variance of non-zero x[i]
xvar1 = abs(theta[0])
The probability of nonzero x[i]
lamba = 1/(exp(theta[1])+1)
"""
xvar1 = abs(theta[...,0])
loglam = theta[...,1] # log(1/lambda - 1)
beta = 1/(1+rvar/xvar1)
r2scale = r*r*beta/rvar
rho = tf.exp(loglam - .5*r2scale ) * tf.sqrt(1 +xvar1/rvar)
rho1 = rho+1
xhat = beta*r/rho1
dxdr = beta*((1+rho*(1+r2scale) ) / tf.square( rho1 ))
dxdr = tf.reduce_mean(dxdr,0)
return (xhat,dxdr)
def load_trackball_action(self, action):
cbTracballOutput = self.builder.get_object("cbTracballOutput")
cbAxisOutput = self.builder.get_object("cbAxisOutput")
sclFriction = self.builder.get_object("sclFriction")
self._recursing = True
if isinstance(action.action, MouseAction):
self.set_cb(cbTracballOutput, "mouse", 1)
self.set_cb(cbAxisOutput, "trackball", 2)
elif isinstance(action.action, XYAction):
if isinstance(action.action.x, AxisAction):
if action.action.x.parameters[0] == Axes.ABS_X:
self.set_cb(cbTracballOutput, "left", 1)
else:
self.set_cb(cbTracballOutput, "right", 1)
self.set_cb(cbAxisOutput, "trackball", 2)
elif isinstance(action.action.x, MouseAction):
if self.editor.get_id() in STICKS:
self.set_cb(cbAxisOutput, "wheel_stick", 2)
else:
self.set_cb(cbAxisOutput, "wheel_pad", 2)
if action.friction <= 0:
sclFriction.set_value(0)
else:
sclFriction.set_value(math.log(action.friction * 1000.0, 10))
self._recursing = False
def decode(self, encoding, input, output):
"""
Single training example decoding function
:param encoding: last hidden state from encoder
:param input: source sentence
:param output: target sentence
:return: loss value
"""
src_toks = [self.src_vocab[tok] for tok in input]
tgt_toks = [self.tgt_vocab[tok] for tok in output]
w = dynet.parameter(self.decoder_w)
b = dynet.parameter(self.decoder_b)
s = self.dec_lstm.initial_state().add_input(encoding)
loss = []
sent = []
for tok in tgt_toks:
out_vector = dynet.affine_transform([b, w, s.output()])
probs = dynet.softmax(out_vector)
cross_ent_loss = - dynet.log(dynet.pick(probs, tok.i))
loss.append(cross_ent_loss)
embed_vector = self.tgt_lookup[tok.i]
s = s.add_input(embed_vector)
loss = dynet.esum(loss)
return loss
def beam_search_generate(self, src_seq, beam_n=5):
dynet.renew_cg()
embedded = self.embed_seq(src_seq)
input_vectors = self.encode_seq(embedded)
w = dynet.parameter(self.decoder_w)
b = dynet.parameter(self.decoder_b)
s = self.dec_lstm.initial_state()
s = s.add_input(input_vectors[-1])
beams = [{"state": s,
"out": [],
"err": 0}]
completed_beams = []
while len(completed_beams) < beam_n:
potential_beams = []
for beam in beams:
if len(beam["out"]) > 0:
embed_vector = self.tgt_lookup[beam["out"][-1].i]
s = beam["state"].add_input(embed_vector)
out_vector = dynet.affine_transform([b, w, s.output()])
probs = dynet.softmax(out_vector)
probs = probs.vec_value()
for potential_next_i in range(len(probs)):
potential_beams.append({"state": s,
"out": beam["out"]+[self.tgt_vocab[potential_next_i]],
"err": beam["err"]-math.log(probs[potential_next_i])})
potential_beams.sort(key=lambda x:x["err"])
beams = potential_beams[:beam_n-len(completed_beams)]
completed_beams = completed_beams+[beam for beam in beams if beam["out"][-1] == self.tgt_vocab.END_TOK
or len(beam["out"]) > 5*len(src_seq)]
beams = [beam for beam in beams if beam["out"][-1] != self.tgt_vocab.END_TOK
and len(beam["out"]) <= 5*len(src_seq)]
completed_beams.sort(key=lambda x:x["err"])
return [beam["out"] for beam in completed_beams]
def fEntropy(countByte, countTotal):
x = float(countByte) / countTotal
if x > 0:
return - x * math.log(x, 2)
else:
return 0.0
def Print(lines, options):
print(lines)
filename = None
if options.scan:
filename = 'PDFiD.log'
if options.output != '':
filename = options.output
if filename:
logfile = open(filename, 'a')
logfile.write(lines + '\n')
logfile.close()
def Main():
moredesc = '''
Arguments:
pdf-file and zip-file can be a single file, several files, and/or @file
@file: run PDFiD on each file listed in the text file specified
wildcards are supported
Source code put in the public domain by Didier Stevens, no Copyright
Use at your own risk
https://DidierStevens.com'''
oParser = optparse.OptionParser(usage='usage: %prog [options] [pdf-file|zip-file|url|@file] ...\n' + __description__ + moredesc, version='%prog ' + __version__)
oParser.add_option('-s', '--scan', action='store_true', default=False, help='scan the given directory')
oParser.add_option('-a', '--all', action='store_true', default=False, help='display all the names')
oParser.add_option('-e', '--extra', action='store_true', default=False, help='display extra data, like dates')
oParser.add_option('-f', '--force', action='store_true', default=False, help='force the scan of the file, even without proper %PDF header')
oParser.add_option('-d', '--disarm', action='store_true', default=False, help='disable JavaScript and auto launch')
oParser.add_option('-p', '--plugins', type=str, default='', help='plugins to load (separate plugins with a comma , ; @file supported)')
oParser.add_option('-c', '--csv', action='store_true', default=False, help='output csv data when using plugins')
oParser.add_option('-m', '--minimumscore', type=float, default=0.0, help='minimum score for plugin results output')
oParser.add_option('-v', '--verbose', action='store_true', default=False, help='verbose (will also raise catched exceptions)')
oParser.add_option('-S', '--select', type=str, default='', help='selection expression')
oParser.add_option('-o', '--output', type=str, default='', help='output to log file')
(options, args) = oParser.parse_args()
if len(args) == 0:
if options.disarm:
print('Option disarm not supported with stdin')
options.disarm = False
if options.scan:
print('Option scan not supported with stdin')
options.scan = False
filenames = ['']
else:
try:
filenames = ExpandFilenameArguments(args)
except Exception as e:
print(e)
return
PDFiDMain(filenames, options)