我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.special.digamma()。
def mi(x, y, k=3, base=2): """ Mutual information of x and y x, y should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]] if x is a one-dimensional scalar and we have four samples """ assert len(x) == len(y), "Lists should have same length" assert k <= len(x) - 1, "Set k smaller than num. samples - 1" intens = 1e-10 # small noise to break degeneracy, see doc. x = [list(p + intens * nr.rand(len(x[0]))) for p in x] y = [list(p + intens * nr.rand(len(y[0]))) for p in y] points = zip2(x, y) # Find nearest neighbors in joint space, p=inf means max-norm tree = ss.cKDTree(points) dvec = [tree.query(point, k + 1, p=float('inf'))[0][k] for point in points] a, b, c, d = avgdigamma(x, dvec), avgdigamma(y, dvec), digamma(k), digamma(len(x)) return (-a - b + c + d) / log(base)
def cmi(x, y, z, k=3, base=2): """ Mutual information of x and y, conditioned on z x, y, z should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]] if x is a one-dimensional scalar and we have four samples """ assert len(x) == len(y), "Lists should have same length" assert k <= len(x) - 1, "Set k smaller than num. samples - 1" intens = 1e-10 # small noise to break degeneracy, see doc. x = [list(p + intens * nr.rand(len(x[0]))) for p in x] y = [list(p + intens * nr.rand(len(y[0]))) for p in y] z = [list(p + intens * nr.rand(len(z[0]))) for p in z] points = zip2(x, y, z) # Find nearest neighbors in joint space, p=inf means max-norm tree = ss.cKDTree(points) dvec = [tree.query(point, k + 1, p=float('inf'))[0][k] for point in points] a, b, c, d = avgdigamma(zip2(x, z), dvec), avgdigamma(zip2(y, z), dvec), avgdigamma(z, dvec), digamma(k) return (-a - b + c + d) / log(base)
def inv_psi(y, tol=1e-10, n_iter=100, psi=psi): """ Inverse digamma function """ from numpy import exp from scipy.special import digamma ## initial value if y < -5 / 3. - EULER_MASCHERONI: x = -1 / (EULER_MASCHERONI + y) else: x = 0.5 + exp(y) ## Newton root finding for dummy in range(n_iter): z = digamma(x) - y if abs(z) < tol: break x -= z / d_approx_psi(x) return x
def mi(x, y, k=3, base=2): """Mutual information of x and y. x,y should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]] if x is a one-dimensional scalar and we have four samples. """ assert len(x)==len(y), 'Lists should have same length.' assert k <= len(x) - 1, 'Set k smaller than num samples - 1.' intens = 1e-10 # Small noise to break degeneracy, see doc. x = [list(p + intens*nr.rand(len(x[0]))) for p in x] y = [list(p + intens*nr.rand(len(y[0]))) for p in y] points = zip2(x,y) # Find nearest neighbors in joint space, p=inf means max-norm. tree = ss.cKDTree(points) dvec = [tree.query(point, k+1, p=float('inf'))[0][k] for point in points] a = avgdigamma(x,dvec) b = avgdigamma(y,dvec) c = digamma(k) d = digamma(len(x)) return (-a-b+c+d) / log(base)
def cmi(x, y, z, k=3, base=2): """Mutual information of x and y, conditioned on z x,y,z should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]] if x is a one-dimensional scalar and we have four samples """ assert len(x)==len(y), 'Lists should have same length.' assert k <= len(x) - 1, 'Set k smaller than num samples - 1.' intens = 1e-10 # Small noise to break degeneracy, see doc. x = [list(p + intens*nr.rand(len(x[0]))) for p in x] y = [list(p + intens*nr.rand(len(y[0]))) for p in y] z = [list(p + intens*nr.rand(len(z[0]))) for p in z] points = zip2(x,y,z) # Find nearest neighbors in joint space, p=inf means max-norm. tree = ss.cKDTree(points) dvec = [tree.query(point, k+1, p=float('inf'))[0][k] for point in points] a = avgdigamma(zip2(x,z), dvec) b = avgdigamma(zip2(y,z), dvec) c = avgdigamma(z,dvec) d = digamma(k) return (-a-b+c+d) / log(base)
def _Eq5(_lambda,K,_x_u,m_pre_c,shot_comments_vector,user_comment,_comment_2_user_matrix,_n_t_c): m_pre_s=Eq._Eq2(_lambda,K) print _n_t_c #V*C #cacluate _term_2 comment in every shot total=[] for i,users in enumerate(_comment_2_user_matrix): #sum all comment in one shot _rows=np.zeros(K) for j,user in enumerate(users): #x_u x_u=_x_u[user_comment.keys().index(user)] shared_term=x_u*_lambda[i]+m_pre_c[i][j] _rows+=x_u*dlgt(shared_term)* \ (digamma(np.sum(lgt(shared_term)))\ -digamma(np.sum(lgt(shared_term))+np.sum(shot_comments_vector[i][j]))\ +digamma(lgt(shared_term)+_n_t_c[i][j])\ -digamma(lgt(shared_term))) total.append(_rows) _term = -1 * _lambda - m_pre_s+np.array(total) return _term
def _Eq6(_lambda,K,_x_u,m_pre_c,user_comment,_comment_2_user_matrix,shot_comments_vector,_n_t_c): #traverse all users total=[] for index,key in enumerate(user_comment): temp=np.zeros(K) for comment in user_comment[key]: i,j=comment[0],comment[1] shared_term=_x_u[index]*_lambda[i]+m_pre_c[i][j] temp+=_lambda[i]*dlgt(shared_term)\ *(digamma(np.sum(lgt(shared_term))\ -digamma(np.sum(lgt(shared_term))+np.sum(shot_comments_vector[i][j])) +digamma(lgt(shared_term)+_n_t_c[i][j])\ -digamma(lgt(shared_term)))) total.append(-1*_x_u[index] +temp) return np.array(total)
def _calc_gamma_psi(log_d, alpha, log_beta, gamma, log_psi0): log_psi = log_psi0 count = 0 #print ((np.exp(log_psi1 - log_psi) ** 2).sum()) while count == 0 \ or ((np.exp(log_psi1) - np.exp(log_psi)) ** 2).sum() > 0.001 \ or ((gamma1 - gamma) ** 2).sum() > 0.001: #print ('gamma psi:', count, ((np.exp(log_psi1) - np.exp(log_psi)) ** 2).sum()) log_psi1 = log_psi gamma1 = gamma psi_offset = (digamma(gamma))[:, np.newaxis, np.newaxis, :] log_psi = log_beta[np.newaxis, :, :, :] + psi_offset log_psi = log_normalize(log_psi, axis=3) gamma = np.exp(logsumexp(logsumexp(log_d[:, :, :, np.newaxis] + log_psi, axis=1), axis=1)) + alpha[np.newaxis, :] count += 1 #log_psi = np.average([log_psi0, log_psi], axis=0, weights=[0.9, 0.1]) # weak learning return (gamma, log_psi)
def _calc_alpha_beta(log_d, alpha0, log_beta0, gamma, log_psi): log_beta = logsumexp(log_psi + log_d[:, :, :, np.newaxis], axis=0) log_beta = log_normalize(log_beta, axis=1) log_smooth = np.log(10) alpha = alpha0 N = gamma.shape[0] zero = 1e-30 gamma_digamma_sum = digamma(gamma.sum(axis=1))[:, np.newaxis] g_offset = (digamma(gamma) - gamma_digamma_sum).sum(axis=0) / N # using log def next_alpha(alpha): das = digamma(alpha.sum()) g = alpha * N * (das - digamma(alpha) + g_offset) h = alpha * N * (das + g_offset) z = N * das x = (alpha * g / h).sum() w = (alpha ** 2 / h).sum() return np.exp(np.log(alpha) - (g - x * alpha / (1/z + w)) / h) return (alpha, log_beta)
def p_overlap(self, s_overlap, motif_st, l_overlap, is_rc=False): """ p_overlap caclute the probability of a given sequence segment overlapping with a motif, given the alignment. Args: s_overlap: string representing overlapping portion of the motif motif_st: the begin idx of the motif l_overlap: the length of the alignment. is_rc: if the reverse complementary motif should be used. Returns: a float representation of the likelihood of observing overlapping sequence given this alignment. """ motif_end = motif_st + l_overlap pwm = self.lmbda_rc if is_rc else self.lmbda pwm_overlap = np.array(pwm)[motif_st+1:motif_end+1] assert len(pwm_overlap) == len(s_overlap) prod = 1.0 for base, rho in zip(s_overlap, pwm_overlap): prod *= np.exp(special.digamma(rho[tools.IDX[base]])- special.digamma(sum(rho))) return prod
def motif_aligns_and_ps(kmer, m, m_idx, phi, kmmm): """motif_aligns_and_ps finds all possible aligments of the motif and the kmer and returns likelihoods of each alignment.""" alignments, align_ps = m.score_all_alignments(kmer, phi) if kmmm.variational: assert m.variational component_term = np.exp(special.digamma(float(kmmm.theta[m_idx])/len(align_ps))) align_ps = [p*component_term for p in align_ps] else: assert not m.variational align_ps = [p*kmmm.gamma[m_idx] for p in align_ps] if kmer != tools.rev_comp(kmer): # we do not need to do this for reverse palandromic # seqeunces because their counts have not been collapsed # with reverse complements. align_ps = [p*2.0 for p in align_ps] return align_ps, alignments
def calcELBOSingleDoc_Fast(wc_d, DocTopicCount_d, Prior_d, sumR_d, alphaEbeta): ''' Evaluate ELBO contributions for single doc, dropping terms constant wrt local step. Note: key to some progress was remembering that Prior_d is not just exp(ElogPi) but that it is usually altered by a multiplicative constant for safety we can find this constant offset (in logspace), and adjust sumR_d accordingly ''' theta_d = DocTopicCount_d + alphaEbeta[:-1] thetaRem = alphaEbeta[-1] digammaSum = digamma(theta_d.sum() + thetaRem) ElogPi_d = digamma(theta_d) - digammaSum ElogPiRem = digamma(thetaRem) - digammaSum cDir = -1 * c_Dir(theta_d[np.newaxis,:], thetaRem) slackOn = np.inner(DocTopicCount_d + alphaEbeta[:-1] - theta_d, ElogPi_d.flatten()) slackOff = (alphaEbeta[-1] - thetaRem) * ElogPiRem rest = np.inner(wc_d, np.log(sumR_d)) - np.inner(DocTopicCount_d, np.log(Prior_d+1e-100)) return cDir + slackOn + slackOff + rest
def L_hdp(beta, omega, Tvec, alpha): ''' Compute top-tier of hdp bound. ''' K = omega.size assert K == beta.size assert K == Tvec.size rho = OROB.beta2rho(beta, K) eta1 = rho * omega eta0 = (1-rho) * omega digamma_omega = digamma(omega) digamma_eta1 = digamma(eta1) digamma_eta0 = digamma(eta0) ElogU = digamma_eta1 - digamma_omega Elog1mU = digamma_eta0 - digamma_omega Lscore = \ np.sum(gammaln(eta1) + gammaln(eta0) - gammaln(omega)) \ + np.inner(nDoc + 1 - eta1, ElogU) \ + np.inner(nDoc * OROB.kvec(K) + gamma - eta0, Elog1mU) \ + alpha * np.inner(beta, Tvec) return Lscore
def _E_logdetL(self, k=None): dvec = np.arange(1, self.D + 1, dtype=np.float) if k is 'all': dvec = dvec[:, np.newaxis] retVec = self.D * LOGTWO * np.ones(self.K) for kk in xrange(self.K): retVec[kk] -= self.GetCached('logdetB', kk) nuT = self.Post.nu[np.newaxis, :] retVec += np.sum(digamma(0.5 * (nuT + 1 - dvec)), axis=0) return retVec elif k is None: nu = self.Prior.nu else: nu = self.Post.nu[k] return self.D * LOGTWO \ - self.GetCached('logdetB', k) \ + np.sum(digamma(0.5 * (nu + 1 - dvec)))
def _E_logphiT(self, k=None): ''' Calculate transpose of topic-word matrix Important to make a copy of the matrix so it is C-contiguous, which leads to much much faster matrix operations. Returns ------- ElogphiT : 2D array, vocab_size x K ''' if k is None or k == 'prior': lam = self.Prior.lam ElogphiT = digamma(lam) - digamma(np.sum(lam)) elif k == 'all': ElogphiT = self.Post.lam.T.copy() digamma(ElogphiT, out=ElogphiT) digammaColSumVec = digamma(np.sum(self.Post.lam, axis=1)) ElogphiT -= digammaColSumVec[np.newaxis,:] else: ElogphiT = digamma(self.Post.lam[k]) - \ digamma(self.Post.lam[k].sum()) assert ElogphiT.flags.c_contiguous return ElogphiT
def _E_logphiT(self, k=None): ''' Calculate transpose of expected phi matrix Important to make a copy of the matrix so it is C-contiguous, which leads to much much faster matrix operations. Returns ------- ElogphiT : 2D array, vocab_size x K ''' if k == 'all': dlam1T = self.Post.lam1.T.copy() dlambothT = self.Post.lam0.T.copy() dlambothT += dlam1T digamma(dlam1T, out=dlam1T) digamma(dlambothT, out=dlambothT) return dlam1T - dlambothT ElogphiT = self._E_logphi(k).T.copy() return ElogphiT
def _E_log1mphiT(self, k=None): ''' Calculate transpose of expected 1-minus-phi matrix Important to make a copy of the matrix so it is C-contiguous, which leads to much much faster matrix operations. Returns ------- ElogphiT : 2D array, vocab_size x K ''' if k == 'all': # Copy so lam1T/lam0T are C-contig and can be shared mem. lam1T = self.Post.lam1.T.copy() lam0T = self.Post.lam0.T.copy() return digamma(lam0T) - digamma(lam1T + lam0T) ElogphiT = self._E_log1mphi(k).T.copy() return ElogphiT
def _E_logdetL(self, k=None): dvec = np.arange(1, self.D + 1, dtype=np.float) if k == 'all': dvec = dvec[:, np.newaxis] retVec = self.D * LOGTWO * np.ones(self.K) for kk in xrange(self.K): retVec[kk] -= self.GetCached('logdetB', kk) nuT = self.Post.nu[np.newaxis, :] retVec += np.sum(digamma(0.5 * (nuT + 1 - dvec)), axis=0) return retVec elif k is None: nu = self.Prior.nu else: nu = self.Post.nu[k] return self.D * LOGTWO \ - self.GetCached('logdetB', k) \ + np.sum(digamma(0.5 * (nu + 1 - dvec)))
def _E_logL(self, k=None): ''' Returns ------- E_logL : 1D array, size D ''' if k == 'all': # retVec : K x D retVec = LOGTWO - np.log(self.Post.beta.copy()) # no strided! retVec += digamma(0.5 * self.Post.nu)[:, np.newaxis] return retVec elif k is None: nu = self.Prior.nu beta = self.Prior.beta else: nu = self.Post.nu[k] beta = self.Post.beta[k] return LOGTWO - np.log(beta) + digamma(0.5 * nu)
def get_vlb(self): # E[ln p(mu) / q(mu)] part h_0, J_0, h_mf, J_mf = self.h_0, self.J_0, self.h_mf, self.J_mf Emu, Emu2 = self._E_mu p_mu_avgengy = -1./2*J_0*Emu2 + h_0*Emu \ - 1./2*(h_0**2/J_0) + 1./2*np.log(J_0) - 1./2*np.log(2*np.pi) q_mu_entropy = 1./2*np.log(2*np.pi*np.e/J_mf) # E[ln p(sigmasq) / q(sigmasq)] part alpha_0, beta_0, alpha_mf, beta_mf = \ self.alpha_0, self.beta_0, self.alpha_mf, self.beta_mf (Esigmasqinv, Elnsigmasq) = self._E_sigmasq p_sigmasq_avgengy = (-alpha_0-1)*Elnsigmasq + (-beta_0)*Esigmasqinv \ - (special.gammaln(alpha_0) - alpha_0*np.log(beta_0)) q_sigmasq_entropy = alpha_mf + np.log(beta_mf) + special.gammaln(alpha_mf) \ - (1+alpha_mf)*special.digamma(alpha_mf) return p_mu_avgengy + q_mu_entropy + p_sigmasq_avgengy + q_sigmasq_entropy
def get_vlb(self): # E[ln p(mu) / q(mu)] part h_0, J_0, h_mf, J_mf = self.h_0, self.J_0, self.h_mf, self.J_mf Emu, Emu2 = self._E_mu p_mu_avgengy = -1./2*J_0*Emu2 + h_0*Emu \ - 1./2*(h_0**2/J_0) + 1./2*np.log(J_0) - 1./2*np.log(2*np.pi) q_mu_entropy = 1./2*np.log(2*np.pi*np.e/J_mf) # E[ln p(sigmasq) / q(sigmasq)] part alpha_0, beta_0, alpha_mf, beta_mf = \ self.alpha_0, self.beta_0, self.alpha_mf, self.beta_mf (Esigmasqinv, Elnsigmasq) = self._E_sigmasq p_sigmasq_avgengy = (-alpha_0-1)*Elnsigmasq + (-beta_0)*Esigmasqinv \ - (special.gammaln(alpha_0) - alpha_0*np.log(beta_0)) q_sigmasq_entropy = alpha_mf + np.log(beta_mf) + special.gammaln(alpha_mf) \ - (1+alpha_mf)*special.digamma(alpha_mf) return np.sum(p_mu_avgengy + q_mu_entropy + p_sigmasq_avgengy + q_sigmasq_entropy)
def entropy(x, k=3, base=2): """ The classic K-L k-nearest neighbor continuous entropy estimator x should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]] if x is a one-dimensional scalar and we have four samples """ assert k <= len(x) - 1, "Set k smaller than num. samples - 1" d = len(x[0]) N = len(x) intens = 1e-10 # small noise to break degeneracy, see doc. x = [list(p + intens * nr.rand(len(x[0]))) for p in x] tree = ss.cKDTree(x) nn = [tree.query(point, k + 1, p=float('inf'))[0][k] for point in x] const = digamma(N) - digamma(k) + d * log(2) return (const + d * np.mean(map(log, nn))) / log(base)
def avgdigamma(points, dvec): # This part finds number of neighbors in some radius in the marginal space # returns expectation value of <psi(nx)> N = len(points) tree = ss.cKDTree(points) avg = 0. for i in range(N): dist = dvec[i] # subtlety, we don't include the boundary point, # but we are implicitly adding 1 to kraskov def bc center point is included num_points = len(tree.query_ball_point(points[i], dist - 1e-15, p=float('inf'))) avg += digamma(num_points) / N return avg
def summand(k, b): return digamma(k)*poisson.pmf(k, b)
def __fixed_point_iteration(self, imps, clks, alpha, beta): numerator_alpha = 0.0 numerator_beta = 0.0 denominator = 0.0 for i in range(len(imps)): numerator_alpha += (special.digamma(clks[i]+alpha) - special.digamma(alpha)) numerator_beta += (special.digamma(imps[i]-clks[i]+beta) - special.digamma(beta)) denominator += (special.digamma(imps[i]+alpha+beta) - special.digamma(alpha+beta)) return alpha*(numerator_alpha/denominator), beta*(numerator_beta/denominator)
def _calc_entropy(self, thetas_ss, n_acc, k): """Calculate the entropy as described in Nunes & Balding, 2010. E = log( pi^(q/2) / gamma(q/2+1) ) - digamma(k) + log(n) + q/n * sum_{i=1}^n( log(R_{i, k}) ), where R_{i, k} is the Euclidean distance from the parameter theta_i to its kth nearest neighbour; q is the dimensionality of the parameter; and n is the number of the accepted parameters n_acc in the rejection sampling. Parameters ---------- thetas_ss : array_like Parameters accepted upon the rejection sampling using the summary-statistics combination ss. n_acc : int Number of the accepted parameters. k : int Nearest neighbour to be searched. Returns ------- float Entropy. """ q = thetas_ss.shape[1] # Calculate the distance to the kth nearest neighbour across all accepted parameters. searcher_knn = cKDTree(thetas_ss) sum_log_dist_knn = 0 for theta_ss in thetas_ss: dist_knn = searcher_knn.query(theta_ss, k=k)[0][-1] sum_log_dist_knn += np.log(dist_knn) # Calculate the entropy. E = np.log(np.pi**(q / 2) / gamma((q / 2) + 1)) - digamma(k) \ + np.log(n_acc) + (q / n_acc) * sum_log_dist_knn return E
def _KSG_mi(data,split,k=5): ''' Estimate the mutual information I(X;Y) from samples {x_i,y_i}_{i=1}^N Using KSG mutual information estimator Input: data: 2D list of size N*(d_x + d_y) split: should be d_x, splitting the data into two parts, X and Y k: k-nearest neighbor parameter Output: one number of I(X;Y) ''' assert split >=1, "x must have at least one dimension" assert split <= len(data[0]) - 1, "y must have at least one dimension" N = len(data) x = data[:,:split] y = data[:,split:] dx = len(x[0]) dy = len(y[0]) tree_xy = ss.cKDTree(data) tree_x = ss.cKDTree(x) tree_y = ss.cKDTree(y) knn_dis = [tree_xy.query(point,k+1,p=2)[0][k] for point in data] ans = digamma(k) + log(N) + vd(dx,2) + vd(dy,2) - vd(dx+dy,2) for i in range(N): ans += -log(len(tree_y.query_ball_point(y[i],knn_dis[i],p=2))-1)/N - log(len(tree_x.query_ball_point(x[i],knn_dis[i],p=2))-1)/N return ans
def f_hyperprior_log_prima(x, weight_prod, K, a=1, b=1): """ Derivative of Log distribution """ res = (a-1) *1. / x - b + np.log(weight_prod) + K * np.log(K) - K * special.digamma(x) for i in range(K): res += special.digamma(x + i*1./K) return res
def lnZ_Exponential_MSE(M): return (np.log(M) - digamma(M)) ** 2 + lnZ_Exponential_var(M) # Estimating estimator MSEs by sampling
def entropy(x, k=3, base=2): """The classic K-L k-nearest neighbor continuous entropy estimator. x should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]] if x is a one-dimensional scalar and we have four samples """ assert k <= len(x)-1, 'Set k smaller than num. samples - 1.' d = len(x[0]) N = len(x) intens = 1e-10 # Small noise to break degeneracy, see doc. x = [list(p + intens*nr.rand(len(x[0]))) for p in x] tree = ss.cKDTree(x) nn = [tree.query(point, k+1, p=float('inf'))[0][k] for point in x] const = digamma(N) - digamma(k) + d*log(2) return (const + d*np.mean(map(log, nn))) / log(base)
def avgdigamma(points, dvec): # This part finds number of neighbors in some radius in the marginal space # returns expectation value of <psi(nx)>. N = len(points) tree = ss.cKDTree(points) avg = 0. for i in xrange(N): dist = dvec[i] # Subtlety, we don't include the boundary point, # but we are implicitly adding 1 to kraskov def bc center point is included. num_points = len(tree.query_ball_point(points[i], dist-1e-15, p=float('inf'))) avg += digamma(num_points) / N return avg
def __fixed_point_iteration(self, tries, success, alpha, beta): '''fixed point iteration''' sumfenzialpha = 0.0 sumfenzibeta = 0.0 sumfenmu = 0.0 for i in range(len(tries)): sumfenzialpha += (special.digamma(success[i]+alpha) - special.digamma(alpha)) sumfenzibeta += (special.digamma(tries[i]-success[i]+beta) - special.digamma(beta)) sumfenmu += (special.digamma(tries[i]+alpha+beta) - special.digamma(alpha+beta)) return alpha*(sumfenzialpha/sumfenmu), beta*(sumfenzibeta/sumfenmu)
def __fixed_point_iteration(self, imps, clks, alpha, beta): numerator_alpha = 0.0 numerator_beta = 0.0 denominator = 0.0 for i in range(len(imps)): numerator_alpha += (special.digamma(clks[i] + alpha) - special.digamma(alpha)) numerator_beta += (special.digamma(imps[i] - clks[i] + beta) - special.digamma(beta)) denominator += (special.digamma(imps[i] + alpha + beta) - special.digamma(alpha + beta)) return alpha * (numerator_alpha / denominator), beta * (numerator_beta / denominator)
def _dll(self, x): alpha = self.get_alpha(x) return -(np.sum(self._dll_common(x)\ * (special.digamma(np.sum(alpha, axis=1))[:,np.newaxis,np.newaxis]\ - special.digamma(np.sum(self.n_m_z+alpha, axis=1))[:,np.newaxis,np.newaxis]\ + special.digamma(self.n_m_z+alpha)[:,:,np.newaxis]\ - special.digamma(alpha)[:,:,np.newaxis]), axis=0)\ - x / (self.sigma ** 2))
def _dll(self, x): alpha = self.get_alpha(x) result = np.sum(self.vecs[:,np.newaxis,:] * alpha[:,:,np.newaxis]\ * (special.digamma(np.sum(alpha, axis=1))[:,np.newaxis,np.newaxis]\ - special.digamma(np.sum(self.n_m_z+alpha, axis=1))[:,np.newaxis,np.newaxis]\ + special.digamma(self.n_m_z+alpha)[:,:,np.newaxis]\ - special.digamma(alpha)[:,:,np.newaxis]), axis=0)\ - x / (self.sigma ** 2) result = -result return result
def update_phi(self, doc_number, time): """ Update variational multinomial parameters, based on a document and a time-slice. This is done based on the original Blei-LDA paper, where: log_phi := beta * exp(?(gamma)), over every topic for every word. TODO: incorporate lee-sueng trick used in **Lee, Seung: Algorithms for non-negative matrix factorization, NIPS 2001**. """ num_topics = self.lda.num_topics # digamma values dig = np.zeros(num_topics) for k in range(0, num_topics): dig[k] = digamma(self.gamma[k]) n = 0 # keep track of iterations for phi, log_phi for word_id, count in self.doc: for k in range(0, num_topics): self.log_phi[n][k] = dig[k] + self.lda.topics[word_id][k] log_phi_row = self.log_phi[n] phi_row = self.phi[n] # log normalize v = log_phi_row[0] for i in range(1, len(log_phi_row)): v = np.logaddexp(v, log_phi_row[i]) # subtract every element by v log_phi_row = log_phi_row - v phi_row = np.exp(log_phi_row) self.log_phi[n] = log_phi_row self.phi[n] = phi_row n +=1 # increase iteration return self.phi, self.log_phi
def compute_lda_lhood(self): """ compute the likelihood bound """ num_topics = self.lda.num_topics gamma_sum = np.sum(self.gamma) # to be used in DIM # sigma_l = 0 # sigma_d = 0 lhood = gammaln(np.sum(self.lda.alpha)) - gammaln(gamma_sum) self.lhood[num_topics] = lhood # influence_term = 0 digsum = digamma(gamma_sum) model = "DTM" for k in range(0, num_topics): # below code only to be used in DIM mode # if ldapost.doc_weight is not None and (model == "DIM" or model == "fixed"): # influence_topic = ldapost.doc_weight[k] # influence_term = - ((influence_topic * influence_topic + sigma_l * sigma_l) / 2.0 / (sigma_d * sigma_d)) e_log_theta_k = digamma(self.gamma[k]) - digsum lhood_term = (self.lda.alpha[k] - self.gamma[k]) * e_log_theta_k + gammaln(self.gamma[k]) - gammaln(self.lda.alpha[k]) # TODO: check why there's an IF n = 0 for word_id, count in self.doc: if self.phi[n][k] > 0: lhood_term += count * self.phi[n][k] * (e_log_theta_k + self.lda.topics[word_id][k] - self.log_phi[n][k]) n += 1 self.lhood[k] = lhood_term lhood += lhood_term # in case of DIM add influence term # lhood += influence_term return lhood
def limit_k(self, N, directed=True): alpha, gmma, delta = self.get_hyper() N = int(N) if directed is True: m = alpha * N * (digamma(N+alpha) - digamma(alpha)) else: m = alpha * N * (digamma(2*N+alpha) - digamma(alpha)) # Number of class in the CRF K = int(gmma * (digamma(m+gmma) - digamma(gmma))) return K
def calc_lower_ll(self): g = digamma(self.gamma) - digamma(self.gamma.sum(axis=1)[:, np.newaxis]) N = self._data.nsamples psi_d = np.exp(self.log_psi + self._data.log_d[:, :, :, np.newaxis]) ll = self.ll_offset ll += (psi_d.sum(axis=0) * self.log_beta).sum() ll += (psi_d.sum(axis=1).sum(axis=1) * g).sum() ll += N * (gammaln(self.alpha.sum()) - gammaln(self.alpha).sum()) + (self.alpha * g.sum(axis=0)).sum() ll -= (gammaln(self.gamma.sum(axis=1)) - gammaln(self.gamma).sum(axis=1) + (self.gamma * g).sum(axis=1)).sum() ll -= (psi_d * self.log_psi).sum() return ll
def checkFacts_pdf_Phi( nu=0, tau=0, phi_grid=None, **kwargs): cPrior = c_Prior(nu=nu, tau=tau) if phi_grid is None: phi_grid = make_phi_grid(**kwargs) logpdf_grid = tau * phi_grid - nu * c_Phi(phi_grid) - cPrior pdf_grid = np.exp(logpdf_grid) mu_grid = phi2mu(phi_grid) IntegralVal = np.trapz(pdf_grid, phi_grid) E_phi_numeric = np.trapz(pdf_grid * phi_grid, phi_grid) E_phi_formula = -(0.5 * nu + 1) / tau E_mu_numeric = np.trapz(pdf_grid * mu_grid, phi_grid) E_mu_formula = tau/nu mode_phi_numeric = phi_grid[np.argmax(pdf_grid)] mode_phi_formula = mu2phi(tau/nu) E_c_numeric = np.trapz(pdf_grid * c_Phi(phi_grid), phi_grid) E_c_formula = - 0.5 * digamma(0.5 * nu + 1) + 0.5 * np.log(tau) print "nu=%7.3f tau=%7.3f" % (nu, tau) print " Integral=% 7.3f should be % 7.3f" % (IntegralVal, 1.0) print " E[mu]=% 7.3f should be % 7.3f" % (E_mu_numeric, E_mu_formula) print " E[phi]=% 7.3f should be % 7.3f" % (E_phi_numeric, E_phi_formula) print " E[c(phi)]=% 7.3f should be % 7.3f" % (E_c_numeric, E_c_formula) print " mode[phi]=% 7.3f should be % 7.3f" % ( mode_phi_numeric, mode_phi_formula)
def E_log_d(w_E=None, P_EE=None, pnu=None, ptau=None, **kwargs): ''' Expected value of log of precision parameter delta Returns ------- Elogdelta : scalar ''' return digamma(0.5 * pnu) - np.log(0.5 * ptau)
def _E_logphi(self, k=None): if k is None or k == 'prior': lam = self.Prior.lam Elogphi = digamma(lam) - digamma(np.sum(lam)) elif k == 'all': AMat = self.Post.lam Elogphi = digamma(AMat) \ - digamma(np.sum(AMat, axis=1))[:, np.newaxis] else: Elogphi = digamma(self.Post.lam[k]) - \ digamma(self.Post.lam[k].sum()) return Elogphi
def _E_logphi(self, k=None): if k is None or k == 'prior': lam1 = self.Prior.lam1 lam0 = self.Prior.lam0 elif k == 'all': lam1 = self.Post.lam1 lam0 = self.Post.lam0 else: lam1 = self.Post.lam1[k] lam0 = self.Post.lam0[k] Elogphi = digamma(lam1) - digamma(lam1 + lam0) return Elogphi
def _E_logphiT_log1mphiT(self, k=None): if k == 'all': lam1T = self.Post.lam1.T.copy() lam0T = self.Post.lam0.T.copy() digammaBoth = digamma(lam1T + lam0T) ElogphiT = digamma(lam1T) - digammaBoth Elog1mphiT = digamma(lam0T) - digammaBoth else: ElogphiT = self._E_logphiT(k) Elog1mphiT = self._E_log1mphiT(k) return ElogphiT, Elog1mphiT
def calcELBOForSingleDocFromCountVec( DocTopicCount_K=None, cts_U=None, sumResp_U=None, alphaEbeta_K=None, logLik_UK=None, L_max=0.0): ''' Compute ELBO for single doc as function of doc-topic counts. Returns ------- L : scalar float equals ELBO as function of local parameters of single document up to an additive constant independent of DocTopicCount ''' theta_K = DocTopicCount_K + alphaEbeta_K logPrior_K = digamma(theta_K) L_theta = np.sum(gammaln(theta_K)) - np.inner(DocTopicCount_K, logPrior_K) explogPrior_K = np.exp(logPrior_K) if sumResp_U is None: maxlogLik_U = np.max(logLik_UK, axis=1) explogLik_UK = logLik_UK - maxlogLik_U[:,np.newaxis] np.exp(explogLik_UK, out=explogLik_UK) sumResp_U = np.dot(explogLik_UK, explogPrior_K) L_max = np.inner(cts_U, maxlogLik_U) L_resp = np.inner(cts_U, np.log(sumResp_U)) return L_theta + L_resp + L_max
def updateLPGivenDocTopicCount(LP, DocTopicCount, alphaEbeta, alphaEbetaRem=None): ''' Update local parameters given doc-topic counts for many docs. Returns for FiniteTopicModel (alphaEbetaRem is None) -------- LP : dict of local params, with updated fields * theta : 2D array, nDoc x K * ElogPi : 2D array, nDoc x K Returns for HDPTopicModel (alphaEbetaRem is not None) -------- * theta : 2D array, nDoc x K * ElogPi : 2D array, nDoc x K * thetaRem : scalar * ElogPiRem : scalar ''' theta = DocTopicCount + alphaEbeta if alphaEbetaRem is None: # FiniteTopicModel digammaSumTheta = digamma(theta.sum(axis=1)) else: # HDPTopicModel digammaSumTheta = digamma(theta.sum(axis=1) + alphaEbetaRem) LP['thetaRem'] = alphaEbetaRem LP['ElogPiRem'] = digamma(alphaEbetaRem) - digammaSumTheta LP['digammaSumTheta'] = digammaSumTheta # Used for merges ElogPi = digamma(theta) - digammaSumTheta[:, np.newaxis] LP['theta'] = theta LP['ElogPi'] = ElogPi return LP