我们从Python开源项目中,提取了以下18个代码示例,用于说明如何使用scipy.stats.expon()。
def fit(self, noise_samples, track_samples): import scipy.stats as stats track_area = np.sum(track_samples > 0, axis=(1, 2)) area_distribution_params = stats.expon.fit(track_area) area_distribution = stats.expon(*area_distribution_params) return LowBrightnessGenerator( noise_samples, track_samples, signal_level=self._signal_level, track_rate=self._track_rate, white_noise_rate=self._white_noise_rate, white_noise_maximum=self._white_noise_maximum, pseudo_tracks_rate=self._pseudo_tracks_rate, pseudo_track_sparseness=self._pseudo_track_sparseness, pseudo_track_width=self._pseudo_track_width, area_distribution=area_distribution )
def testExponentialLogPDF(self): with session.Session(): batch_size = 6 lam = constant_op.constant([2.0] * batch_size) lam_v = 2.0 x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0], dtype=np.float32) exponential = exponential_lib.Exponential(lam=lam) expected_log_pdf = stats.expon.logpdf(x, scale=1 / lam_v) log_pdf = exponential.log_prob(x) self.assertEqual(log_pdf.get_shape(), (6,)) self.assertAllClose(log_pdf.eval(), expected_log_pdf) pdf = exponential.prob(x) self.assertEqual(pdf.get_shape(), (6,)) self.assertAllClose(pdf.eval(), np.exp(expected_log_pdf))
def __init__(self, data, mleDiffCutoff=1.0): print [min(data), max(data)] distributions = [st.laplace, st.norm, st.expon, st.dweibull, st.invweibull, st.lognorm, st.uniform] mles = [] for distribution in distributions: pars = distribution.fit(data) mle = distribution.nnlf(pars, data) mles.append(mle) results = [(distribution.name, mle) for distribution, mle in zip(distributions, mles)] for dist in sorted(zip(distributions, mles), key=lambda d: d[1]): print dist best_fit = sorted(zip(distributions, mles), key=lambda d: d[1])[0] print 'Best fit reached using {}, MLE value: {}'.format(best_fit[0].name, best_fit[1]) self.modelSets = [] self.modelOptions = [mod[0].name for mod in sorted(zip(distributions, mles), key=lambda d: d[1])] ## list of scipy distribution ids sorted by their MLEs given the data ## [0] is best, [1], next best and so on for model in sorted(zip(distributions, mles), key=lambda d: d[1]): if(model[0].name in getAvailableDistributionsByScipyIds()): try: modelDist = getDistributionByScipyId(model[0].name, data) self.modelSets.append([modelDist, model[1]]) ## append the distribution object and the MLE value for this ## particular distribution & the data ## ah frig, I think in the bimodal case, it will be ## something like except RuntimeError: pass else: ## nothing that can be done here, if we dont have a object of ## the distribution needed available, we cant do much about it pass
def __init__(self, scenario_flag = "Freeway_Free"): """ Totally five scenarios are supported here: Freeway_Night, Freeway_Free, Freeway_Rush; Urban_Peak, Urban_Nonpeak. The PDFs of the vehicle speed and the inter-vehicle space are adapted from existing references. """ if scenario_flag == "Freeway_Night": self.headway_random = expon(0.0, 1.0/256.41) meanSpeed = 30.93 #m/s stdSpeed = 1.2 #m/s elif scenario_flag == "Freeway_Free": self.headway_random = lognorm(0.75, 0.0, np.exp(3.4)) meanSpeed = 29.15 #m/s stdSpeed = 1.5 #m/s elif scenario_flag == "Freeway_Rush": self.headway_random = lognorm(0.5, 0.0, np.exp(2.5)) meanSpeed = 10.73 #m/s stdSpeed = 2.0 #m/s elif scenario_flag == "Urban_Peak": scale = 1.096 c = 0.314 loc = 0.0 self.headway_random = fisk(c, loc, scale) meanSpeed = 6.083 #m/s stdSpeed = 1.2 #m/s elif scenario_flag == "Urban_Nonpeak": self.headway_random = lognorm(0.618, 0.0, np.exp(0.685)) meanSpeed = 12.86 #m/s stdSpeed = 1.5 #m/s else: raise self.speed_random = norm(meanSpeed, stdSpeed)
def test_randomized_search_grid_scores(): # Make a dataset with a lot of noise to get various kind of prediction # errors across CV folds and parameter settings X, y = make_classification(n_samples=200, n_features=100, n_informative=3, random_state=0) # XXX: as of today (scipy 0.12) it's not possible to set the random seed # of scipy.stats distributions: the assertions in this test should thus # not depend on the randomization params = dict(C=expon(scale=10), gamma=expon(scale=0.1)) n_cv_iter = 3 n_search_iter = 30 search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_cv_iter, param_distributions=params, iid=False) search.fit(X, y) assert_equal(len(search.grid_scores_), n_search_iter) # Check consistency of the structure of each cv_score item for cv_score in search.grid_scores_: assert_equal(len(cv_score.cv_validation_scores), n_cv_iter) # Because we set iid to False, the mean_validation score is the # mean of the fold mean scores instead of the aggregate sample-wise # mean score assert_almost_equal(np.mean(cv_score.cv_validation_scores), cv_score.mean_validation_score) assert_equal(list(sorted(cv_score.parameters.keys())), list(sorted(params.keys()))) # Check the consistency with the best_score_ and best_params_ attributes sorted_grid_scores = list(sorted(search.grid_scores_, key=lambda x: x.mean_validation_score)) best_score = sorted_grid_scores[-1].mean_validation_score assert_equal(search.best_score_, best_score) tied_best_params = [s.parameters for s in sorted_grid_scores if s.mean_validation_score == best_score] assert_true(search.best_params_ in tied_best_params, "best_params_={0} is not part of the" " tied best models: {1}".format( search.best_params_, tied_best_params))
def _impose_white_noise(self, data): import scipy.stats as stats original_shape = data.shape noise_area_distr = stats.expon(scale = self._white_noise_rate) data = data.reshape(data.shape[0], -1) s = data.shape[1] for i in xrange(data.shape[0]): n_white_noise = int(np.minimum(noise_area_distr.rvs(size=1), self._white_noise_maximum) * s) indx = np.random.choice(s, size=n_white_noise, replace=False) data[i, indx] = self._signal_level return data.reshape(original_shape)
def get_area_distribution(tracks, fit=False): area = np.sum(tracks > 0, axis=(1, 2)) if not fit: count = np.bincount(area) probability = count / float(np.sum(count)) return stats.rv_discrete( a=0, b=np.max(probability.shape[0]), name='signal distribution', values=(np.arange(count.shape[0]), probability) ) else: exp_params = stats.expon.fit(area) return stats.expon(*exp_params)
def testExponentialCDF(self): with session.Session(): batch_size = 6 lam = constant_op.constant([2.0] * batch_size) lam_v = 2.0 x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0], dtype=np.float32) exponential = exponential_lib.Exponential(lam=lam) expected_cdf = stats.expon.cdf(x, scale=1 / lam_v) cdf = exponential.cdf(x) self.assertEqual(cdf.get_shape(), (6,)) self.assertAllClose(cdf.eval(), expected_cdf)
def testExponentialMean(self): with session.Session(): lam_v = np.array([1.0, 4.0, 2.5]) expected_mean = stats.expon.mean(scale=1 / lam_v) exponential = exponential_lib.Exponential(lam=lam_v) self.assertEqual(exponential.mean().get_shape(), (3,)) self.assertAllClose(exponential.mean().eval(), expected_mean)
def testExponentialVariance(self): with session.Session(): lam_v = np.array([1.0, 4.0, 2.5]) expected_variance = stats.expon.var(scale=1 / lam_v) exponential = exponential_lib.Exponential(lam=lam_v) self.assertEqual(exponential.variance().get_shape(), (3,)) self.assertAllClose(exponential.variance().eval(), expected_variance)
def testExponentialEntropy(self): with session.Session(): lam_v = np.array([1.0, 4.0, 2.5]) expected_entropy = stats.expon.entropy(scale=1 / lam_v) exponential = exponential_lib.Exponential(lam=lam_v) self.assertEqual(exponential.entropy().get_shape(), (3,)) self.assertAllClose(exponential.entropy().eval(), expected_entropy)
def testExponentialSampleMultiDimensional(self): with self.test_session(): batch_size = 2 lam_v = [3.0, 22.0] lam = constant_op.constant([lam_v] * batch_size) exponential = exponential_lib.Exponential(lam=lam) n = 100000 samples = exponential.sample(n, seed=138) self.assertEqual(samples.get_shape(), (n, batch_size, 2)) sample_values = samples.eval() self.assertFalse(np.any(sample_values < 0.0)) for i in range(2): self.assertLess( stats.kstest( sample_values[:, 0, i], stats.expon(scale=1.0 / lam_v[i]).cdf)[0], 0.01) self.assertLess( stats.kstest( sample_values[:, 1, i], stats.expon(scale=1.0 / lam_v[i]).cdf)[0], 0.01)
def __init__(self, ecc_prior=True, p_prior=True, inc_prior=True, m1_prior=True, m2_prior=True, para_prior=True, para_est=0, para_err=0, m1_est=0, m1_err=0, m2_est=0, m2_err=0, ecc_beta_a=0.867, ecc_beta_b=3.03, ecc_J08_sig=0.3, ecc_Rexp_lamda=5.12, ecc_Rexp_a=0.781, ecc_Rexp_sig=0.272, ecc_ST08_a=4.33, ecc_ST08_k=0.2431,p_gamma=-0.7, mins_ary=[],maxs_ary=[]): ## check min and max range arrays if (len(mins_ary)>1) and (len(maxs_ary)>1): self.mins_ary = mins_ary self.maxs_ary = maxs_ary else: raise IOError('\n\n No min/max ranges were provided to the priors object!!') ## push in two manual constants self.days_per_year = 365.2422 self.sec_per_year = 60*60*24*self.days_per_year ## choices ## choices:'2e', 'ST08','J08', 'RayExp', 'beta', 'uniform'. Default is 'beta'. self.e_prior = ecc_prior ## `best-fit' alpha and beta from Kipping+2013 self.ecc_beta = stats.beta(ecc_beta_a,ecc_beta_b) #https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.beta.html ## 'best-fit' for the basic Rayleigh from Juric+2008 self.ecc_J08_sig = ecc_J08_sig ### Put this into the advanced settings ??? ## `best-fit' alpha, lambda and sig from Kipping+2013 self.ecc_Rexp_lamda = ecc_Rexp_lamda ### Put this into the advanced settings ??? self.ecc_Rexp_a = ecc_Rexp_a ### Put this into the advanced settings ??? self.ecc_Rexp_sig = ecc_Rexp_sig ### Put this into the advanced settings ??? #https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rayleigh.html#scipy.stats.rayleigh self.ecc_R = stats.rayleigh() #https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.expon.html self.ecc_exp = stats.expon() ## `best-fit' for the Shen&Turner 2008 pdf self.ecc_ST08_a = ecc_ST08_a ### Put this into the advanced settings ??? self.ecc_ST08_k = ecc_ST08_k ### Put this into the advanced settings ??? #self.ecc_norm = stats.norm #self.ecc_norm_mean = ## #self.ecc_norm_sig = ## #self.ecc_norm.pdf(ecc,loc=self.ecc_norm_mean, scale=self.ecc_norm_sig) ## choices: power-law, Jeffrey's self.p_prior = p_prior self.p_gamma = p_gamma ## choices:sin, cos self.inc_prior = inc_prior ## choices:IMF, PDMF self.m1_prior = m1_prior ## choices:CMF, IMF, PDMF self.m2_prior = m2_prior ## choices:gauss self.para_prior = para_prior ## values necessary to calculate priors self.para_est = para_est self.para_err = para_err self.m1_est = m1_est self.m1_err = m1_err self.m2_est = m2_est self.m2_err = m2_err
def get_model(n_obs=50, true_params=None, seed_obs=None, stochastic=True): """Return a complete Ricker model in inference task. This is a simplified example that achieves reasonable predictions. For more extensive treatment and description using 13 summary statistics, see: Wood, S. N. (2010) Statistical inference for noisy nonlinear ecological dynamic systems, Nature 466, 1102–1107. Parameters ---------- n_obs : int, optional Number of observations. true_params : list, optional Parameters with which the observed data is generated. seed_obs : int, optional Seed for the observed data generation. stochastic : bool, optional Whether to use the stochastic or deterministic Ricker model. Returns ------- m : elfi.ElfiModel """ if stochastic: simulator = partial(stochastic_ricker, n_obs=n_obs) if true_params is None: true_params = [3.8, 0.3, 10.] else: simulator = partial(ricker, n_obs=n_obs) if true_params is None: true_params = [3.8] m = elfi.ElfiModel() y_obs = simulator(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) sim_fn = partial(simulator, n_obs=n_obs) sumstats = [] if stochastic: elfi.Prior(ss.expon, np.e, 2, model=m, name='t1') elfi.Prior(ss.truncnorm, 0, 5, model=m, name='t2') elfi.Prior(ss.uniform, 0, 100, model=m, name='t3') elfi.Simulator(sim_fn, m['t1'], m['t2'], m['t3'], observed=y_obs, name='Ricker') sumstats.append(elfi.Summary(partial(np.mean, axis=1), m['Ricker'], name='Mean')) sumstats.append(elfi.Summary(partial(np.var, axis=1), m['Ricker'], name='Var')) sumstats.append(elfi.Summary(num_zeros, m['Ricker'], name='#0')) elfi.Discrepancy(chi_squared, *sumstats, name='d') else: # very simple deterministic case elfi.Prior(ss.expon, np.e, model=m, name='t1') elfi.Simulator(sim_fn, m['t1'], observed=y_obs, name='Ricker') sumstats.append(elfi.Summary(partial(np.mean, axis=1), m['Ricker'], name='Mean')) elfi.Distance('euclidean', *sumstats, name='d') return m
def test_random_search_cv_results(): # Make a dataset with a lot of noise to get various kind of prediction # errors across CV folds and parameter settings X, y = make_classification(n_samples=200, n_features=100, n_informative=3, random_state=0) # scipy.stats dists now supports `seed` but we still support scipy 0.12 # which doesn't support the seed. Hence the assertions in the test for # random_search alone should not depend on randomization. n_splits = 3 n_search_iter = 30 params = dict(C=expon(scale=10), gamma=expon(scale=0.1)) random_search = dcv.RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_splits, iid=False, param_distributions=params, return_train_score=True) random_search.fit(X, y) random_search_iid = dcv.RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_splits, iid=True, param_distributions=params, return_train_score=True) random_search_iid.fit(X, y) param_keys = ('param_C', 'param_gamma') score_keys = ('mean_test_score', 'mean_train_score', 'rank_test_score', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split0_train_score', 'split1_train_score', 'split2_train_score', 'std_test_score', 'std_train_score', 'mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time') n_cand = n_search_iter for search, iid in zip((random_search, random_search_iid), (False, True)): assert iid == search.iid cv_results = search.cv_results_ # Check results structure check_cv_results_array_types(cv_results, param_keys, score_keys) check_cv_results_keys(cv_results, param_keys, score_keys, n_cand) # For random_search, all the param array vals should be unmasked assert not (any(cv_results['param_C'].mask) or any(cv_results['param_gamma'].mask))
def generate(self, N = 1.0e+3): import scipy.stats as stats N = int(N) data = np.ndarray( shape=(N, ) + self._background_samples.shape[1:], dtype=self._background_samples.dtype ) mask = np.zeros( shape=data.shape, dtype='int8' ) data = self._get_background(data) data = self._impose_white_noise(data) n_tracks_distr = stats.expon(scale=self._track_rate) n_ptracks_distr = stats.expon(scale=self._pseudo_tracks_rate) track_area_distr = self._area_distribution track_stream = random_samples_stream(self._track_samples) track_max_x = self._background_samples.shape[1] - self._track_samples.shape[1] track_max_y = self._background_samples.shape[2] - self._track_samples.shape[2] for i in xrange(N): n_tracks = int(n_tracks_distr.rvs(size=1)) for _ in xrange(n_tracks): track = track_stream.next() x, y = np.random.randint(track_max_x), np.random.randint(track_max_y) impose(track, data[i], x, y) impose(track, mask[i], x, y, level=1) n_ptracks = int(n_ptracks_distr.rvs(size=1)) for _ in xrange(n_ptracks): ptrack = pseudo_track( area_distribution=track_area_distr, signal_distribution=self._signal_level, width = self._pseudo_track_width, sparseness=self._pseudo_track_sparseness, patch_size=self._track_samples.shape[1], dtype=self._track_samples.dtype ) x, y = np.random.randint(track_max_x), np.random.randint(track_max_y) impose(ptrack, data[i], x, y, level=self._signal_level) impose(ptrack, mask[i], x, y, level=-1) return data, mask