我们从Python开源项目中,提取了以下22个代码示例,用于说明如何使用scipy.stats.kstest()。
def test_evidence(self): # 2 sigma tolerance tolerance = 2.0*np.sqrt(self.work.NS.state.info/self.work.NS.Nlive) print('2-sigma statistic error in logZ: {0:0.3f}'.format(tolerance)) print('Analytic logZ {0}'.format(self.model.analytic_log_Z)) print('Estimated logZ {0}'.format(self.work.NS.logZ)) pos=self.work.posterior_samples['x'] #t,pval=stats.kstest(pos,self.model.distr.cdf) stat,pval = stats.normaltest(pos.T) print('Normal test p-value {0}'.format(str(pval))) plt.figure() plt.hist(pos.ravel(),normed=True) x=np.linspace(self.model.bounds[0][0],self.model.bounds[0][1],100) plt.plot(x,self.model.distr.pdf(x)) plt.title('NormalTest pval = {0}'.format(pval)) plt.savefig('posterior.png') plt.figure() plt.plot(pos.ravel(),',') plt.title('chain') plt.savefig('chain.png') self.assertTrue(np.abs(self.work.NS.logZ - GaussianModel.analytic_log_Z)<tolerance, 'Incorrect evidence for normalised distribution: {0:.3f} instead of {1:.3f}'.format(self.work.NS.logZ,GaussianModel.analytic_log_Z )) self.assertTrue(pval>0.01,'Normaltest test failed: KS stat = {0}'.format(pval))
def kolmogorov_smirnov_normality_test(X,y): """ Performs the one sample Kolmogorov-Smirnov test, testing wheter the feature values of each class are drawn from a normal distribution Keyword arguments: X -- The feature vectors y -- The target vector """ kolmogorov_smirnov={} # print kolmogorov_smirnov for feature_col in xrange(len(X[0])): kolmogorov_smirnov[feature_col]=values=[] for class_index in xrange(2): values.append(stats.kstest(X[y==class_index,feature_col], 'norm')) #debug for f in xrange(23): print kolmogorov_smirnov[f] return kolmogorov_smirnov
def testDirichletSample(self): with self.test_session(): alpha = [1., 2] dirichlet = dirichlet_lib.Dirichlet(alpha) n = constant_op.constant(100000) samples = dirichlet.sample(n) sample_values = samples.eval() self.assertEqual(sample_values.shape, (100000, 2)) self.assertTrue(np.all(sample_values > 0.0)) self.assertLess( stats.kstest( # Beta is a univariate distribution. sample_values[:, 0], stats.beta( a=1., b=2.).cdf)[0], 0.01)
def testBetaSample(self): with self.test_session(): a = 1. b = 2. beta = beta_lib.Beta(a, b) n = constant_op.constant(100000) samples = beta.sample(n) sample_values = samples.eval() self.assertEqual(sample_values.shape, (100000,)) self.assertFalse(np.any(sample_values < 0.0)) self.assertLess( stats.kstest( # Beta is a univariate distribution. sample_values, stats.beta( a=1., b=2.).cdf)[0], 0.01) # The standard error of the sample mean is 1 / (sqrt(18 * n)) self.assertAllClose( sample_values.mean(axis=0), stats.beta.mean(a, b), atol=1e-2) self.assertAllClose( np.cov(sample_values, rowvar=0), stats.beta.var(a, b), atol=1e-1) # Test that sampling with the same seed twice gives the same results.
def testExponentialSample(self): with self.test_session(): lam = constant_op.constant([3.0, 4.0]) lam_v = [3.0, 4.0] n = constant_op.constant(100000) exponential = exponential_lib.Exponential(lam=lam) samples = exponential.sample(n, seed=137) sample_values = samples.eval() self.assertEqual(sample_values.shape, (100000, 2)) self.assertFalse(np.any(sample_values < 0.0)) for i in range(2): self.assertLess( stats.kstest( sample_values[:, i], stats.expon(scale=1.0 / lam_v[i]).cdf)[0], 0.01)
def subtest_normal_distrib(self, xs, mean, std): _, pvalue = stats.kstest(xs, 'norm', (mean, std)) self.assertGreater(pvalue, 3e-3)
def test_mc_normal(): """kstest returns: 1. KS statistic 2. pvalue if pvalue > 0.05 (5%) we accept the null hypothesis: H0: our random variable from simulation follow the distribution with the parameters obtained from 'stats.dist.fit'. """ x = montecarlo.normal(1000, 0, 1) ks = stats.kstest(x, 'norm', stats.norm.fit(x)) assert ks[1] > .05
def test_mc_lognormal(): x = montecarlo.lognormal(1000, 1, .1) ks = stats.kstest(x, 'lognorm', stats.lognorm.fit(x)) assert ks[1] > .05
def test_mc_gumbelr(): x = montecarlo.gumbel_r(1000, 1, .1) ks = stats.kstest(x, 'gumbel_r', stats.gumbel_r.fit(x)) assert ks[1] > .05
def _is_normal(self, tensor, mean, std): if isinstance(tensor, Variable): tensor = tensor.data samples = list(tensor.view(-1)) p_value = stats.kstest(samples, 'norm', args=(mean, std)).pvalue return p_value > 0.0001
def _is_uniform(self, tensor, a, b): if isinstance(tensor, Variable): tensor = tensor.data samples = list(tensor.view(-1)) p_value = stats.kstest(samples, 'uniform', args=(a, (b - a))).pvalue return p_value > 0.0001
def _is_normal(self, tensor, mean, std): if isinstance(tensor, Variable): tensor = tensor.data samples = list(tensor.view(-1)) p_value = stats.kstest(samples, 'norm', args=(mean, std))[1] return p_value > 0.0001
def _is_uniform(self, tensor, a, b): if isinstance(tensor, Variable): tensor = tensor.data samples = list(tensor.view(-1)) p_value = stats.kstest(samples, 'uniform', args=(a, (b - a)))[1] return p_value > 0.0001
def norm_EDA(vectors, lg, embedding): """EDA on the norm of the vectors vectors = word embedding vectors lg = language embedding = gensim, polyglot, etc.""" # L2 norm of vectors, then normalize distribution of L2 norms vectors_norm = np.linalg.norm(vectors, axis=1) vectors_norm_normalized = (vectors_norm - vectors_norm.mean()) \ / vectors_norm.std() # Histogram compared to normal dist plt.figure(figsize=(10, 6)) plt.xlim((-3, 5)) plt.hist(vectors_norm_normalized, bins=100, normed=True) x = np.linspace(-3, 3, 100) plt.plot(x, norm.pdf(x, 0, 1), color='r', linewidth=3) plt.savefig('../images/' + lg + '_' + embedding + '_norm.png') plt.close('all') # Anderson Darling # If test stat is greater than crit val, reject ho=normal # crit_val_1 is critical value for p-value of 1% ad = anderson(vectors_norm_normalized, 'norm') ad_test_stat = ad.statistic ad_crit_val_1 = ad.critical_values[-1] ad_result = 'Reject' if ad_test_stat > ad_crit_val_1 else 'Fail to Reject' # Kolmogorov-Smirnov ks_p_val = kstest(vectors_norm_normalized, 'norm')[1] ks_result = 'Reject' if ks_p_val < .01 else 'Fail to Reject' # Shapiro sh_p_val = shapiro(vectors_norm_normalized)[1] sh_result = 'Reject' if sh_p_val < .01 else 'Fail to Reject' result = (ad_test_stat, ad_crit_val_1, ad_result, ks_p_val, ks_result, sh_p_val, sh_result) return result
def _kstest(self, loc, scale, samples): # Uses the Kolmogorov-Smirnov test for goodness of fit. ks, _ = stats.kstest(samples, stats.laplace(loc, scale=scale).cdf) # Return True when the test passes. return ks < 0.02
def _kstest(self, alpha, beta, samples): # Uses the Kolmogorov-Smirnov test for goodness of fit. ks, _ = stats.kstest(samples, stats.gamma(alpha, scale=1 / beta).cdf) # Return True when the test passes. return ks < 0.02
def _kstest(self, alpha, beta, samples): # Uses the Kolmogorov-Smirnov test for goodness of fit. ks, _ = stats.kstest(samples, stats.invgamma(alpha, scale=beta).cdf) # Return True when the test passes. return ks < 0.02
def testExponentialSampleMultiDimensional(self): with self.test_session(): batch_size = 2 lam_v = [3.0, 22.0] lam = constant_op.constant([lam_v] * batch_size) exponential = exponential_lib.Exponential(lam=lam) n = 100000 samples = exponential.sample(n, seed=138) self.assertEqual(samples.get_shape(), (n, batch_size, 2)) sample_values = samples.eval() self.assertFalse(np.any(sample_values < 0.0)) for i in range(2): self.assertLess( stats.kstest( sample_values[:, 0, i], stats.expon(scale=1.0 / lam_v[i]).cdf)[0], 0.01) self.assertLess( stats.kstest( sample_values[:, 1, i], stats.expon(scale=1.0 / lam_v[i]).cdf)[0], 0.01)