我们从Python开源项目中,提取了以下28个代码示例,用于说明如何使用scipy.stats.sem()。
def prepare_matplotlib_data(self, data_dict): if type(data_dict.keys()[0]) == int: x = sorted(data_dict.keys(), key=int) elif type(data_dict.keys()[0]) == str or type(data_dict.keys()[0]) == unicode: x = sorted(data_dict.keys()) data_means = [] data_sds = [] data_sems = [] for p in x: mean = np.mean(data_dict[p]) sd = np.std(data_dict[p]) sem = ss.sem(data_dict[p]) data_means.append(mean) data_sds.append(sd) data_sems.append(sem) return x, data_means, data_sds, data_sems
def test_t_dist(self): mean = 100 stdev = 100 sample_size = [250, 500, 1000, 2500, 3500, 5000, 8000, 10000] for i in sample_size: samples = np.random.normal(loc=mean, scale=stdev, size=i) bsr = bs.bootstrap(samples, stat_func=bs_stats.mean, alpha=0.05) mr = st.t.interval(1 - 0.05, len(samples) - 1, loc=np.mean(samples), scale=st.sem(samples)) self.assertAlmostEqual( bsr.lower_bound, mr[0], delta=mr[0] / 100. ) self.assertAlmostEqual( bsr.upper_bound, mr[1], delta=mr[1] / 100. )
def tearDown(self): lyaps = [] weights = [] for T in np.arange(self.DDE.t, self.DDE.t+1000, 10): _, lyap, weight = self.DDE.integrate(T) lyaps.append(lyap) weights.append(weight) lyaps = np.vstack(lyaps) lyap_start = 40 for i,lyap_control in enumerate(lyap_controls): lyap = np.average(lyaps[lyap_start:,i], weights=weights[lyap_start:]) stderr = sem(lyaps[lyap_start:,i]) print(lyap,stderr) self.assertAlmostEqual(lyap_control, lyap, delta=3*stderr)
def evaluate_cross_validation(clf, X, y, K): # create a k-fold croos validation iterator of k=5 folds cv = KFold(len(y), K, shuffle=True, random_state=0) # by default the score used is the one returned by score method of the estimator (accuracy) scores = cross_val_score(clf, X, y, cv=cv) print scores print ("Mean score: {0:.3f} (+/-{1:.3f})").format( np.mean(scores), sem(scores))
def sem(self) -> float: """ Standard error of the mean (standard deviation / sqrt(observations)) """ return st.sem(self.array)
def std_error_mean(self) -> float: """ Standard error of the mean (standard deviation / sqrt(observations)) """ return st.sem(self.array)
def conf(x, width=0.95, axis=None): def conf1(v): return spstats.t.interval(width, len(v)-1, loc=np.mean(v), scale=spstats.sem(v)) return conf1(x.ravel()) if axis is None else np.apply_along_axis(conf1, axis, x)
def plot_data(self, series): f, (ax1) = plt.subplots(1, 1, sharex=True, sharey=False, figsize=(5.0, 4.0)) data_xtick_labels = self.data["10"].keys() data_xticks = [int(x) for x in data_xtick_labels] ylabel = None if series == "mean": ylabel = "Mean Latency" elif series == "sd": ylabel = "Standard Deviation of Latency" elif series == "sem": ylabel = "Standard Error of Mean of Latency" self.plot_lines_with_error_bars(series, ax1, "Per Link Latency", ylabel, "", y_scale='linear', x_min_factor=0.75, x_max_factor=1.1, y_min_factor=0.9, y_max_factor=1, xticks=data_xticks, xtick_labels=data_xtick_labels) xlabels = ax1.get_xticklabels() plt.setp(xlabels, rotation=0, fontsize=10) # Shrink current axis's height by 25% on the bottom box = ax1.get_position() ax1.set_position([box.x0, box.y0 + box.height * 0.3, box.width, box.height * 0.7]) handles, labels = ax1.get_legend_handles_labels() ax1.legend(handles, labels, shadow=True, fontsize=10, loc='upper center', ncol=2, markerscale=1.0, frameon=True, fancybox=True, columnspacing=0.5, bbox_to_anchor=[0.5, -0.25]) plt.savefig(series + "_latency_evaluation_" + self.evaluation_type + ".png", dpi=1000) plt.show()
def main(): # # Vary the delays (in miilseconds) on the links link_latencies = [0]#[5, 10, 15, 20, 25] # Vary the the amount of 'load' that is running by modifying the background emulation threads background_specs = [0]#[0, 10, 20, 30, 40] evaluation_type = "replay" script_dir = os.path.dirname(os.path.realpath(__file__)) idx = script_dir.index('NetPower_TestBed') base_dir = script_dir[0:idx] + "NetPower_TestBed" bro_dnp3_parser_dir = base_dir + "/dnp3_timing/dnp3_parser_bro/" # bro_json_log_conf = "/usr/local/bro/share/bro/policy/tuning/json-logs.bro" bro_json_log_conf = "/home/rakesh/bro/scripts/policy/tuning/json-logs.bro" # bro_cmd = "/usr/local/bro/bin/bro" bro_cmd = "/usr/bin/bro" p = PCAPPostProcessing(base_dir, bro_cmd, bro_json_log_conf, bro_dnp3_parser_dir, link_latencies, background_specs, evaluation_type) p.collect_data() for series in ["mean", "sd", "sem"]: p.plot_data(series) # plt.hist(p.data['0']['5'], bins=40, histtype="step") # plt.hist(p.data['0']['10'], bins=40, histtype="step") # plt.hist(p.data['0']['15'], bins=40, histtype="step") # plt.hist(p.data['0']['20'], bins=40, histtype="step") # plt.hist(p.data['0']['25'], bins=40, histtype="step") # # plt.show() #p.process_plotly()
def tearDown(self): self.initialise_integrator() times = range(10,100000,10) data = np.vstack( self.ODE.integrate(time)[1] for time in times ) result = np.average(data[1000:], axis=0) margin = standard_error(data[1000:], axis=0) print(data,result,margin) self.assertLess( np.max(margin), 0.003 ) for i in range(self.n): self.assertLess( result[i]-lyaps[i], 3*margin[i] )
def test_aggregate_str_func(self): def _check_results(grouped): # single series result = grouped['A'].agg('std') expected = grouped['A'].std() assert_series_equal(result, expected) # group frame by function name result = grouped.aggregate('var') expected = grouped.var() assert_frame_equal(result, expected) # group frame by function dict result = grouped.agg(OrderedDict([['A', 'var'], ['B', 'std'], ['C', 'mean'], ['D', 'sem']])) expected = DataFrame(OrderedDict([['A', grouped['A'].var( )], ['B', grouped['B'].std()], ['C', grouped['C'].mean()], ['D', grouped['D'].sem()]])) assert_frame_equal(result, expected) by_weekday = self.tsframe.groupby(lambda x: x.weekday()) _check_results(by_weekday) by_mwkday = self.tsframe.groupby([lambda x: x.month, lambda x: x.weekday()]) _check_results(by_mwkday)
def test_tab_completion(self): grp = self.mframe.groupby(level='second') results = set([v for v in dir(grp) if not v.startswith('_')]) expected = set( ['A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter', 'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max', 'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot', 'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', 'head', 'irow', 'describe', 'cummax', 'quantile', 'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna', 'cumsum', 'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill', 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', 'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin', 'ffill', 'bfill', 'pad', 'backfill']) self.assertEqual(results, expected)
def test_ops_general(self): ops = [('mean', np.mean), ('median', np.median), ('std', np.std), ('var', np.var), ('sum', np.sum), ('prod', np.prod), ('min', np.min), ('max', np.max), ('first', lambda x: x.iloc[0]), ('last', lambda x: x.iloc[-1]), ('count', np.size), ] try: from scipy.stats import sem except ImportError: pass else: ops.append(('sem', sem)) df = DataFrame(np.random.randn(1000)) labels = np.random.randint(0, 50, size=1000).astype(float) for op, targop in ops: result = getattr(df.groupby(labels), op)().astype(float) expected = df.groupby(labels).agg(targop) try: tm.assert_frame_equal(result, expected) except BaseException as exc: exc.args += ('operation: %s' % op, ) raise
def test_nansem(self): tm.skip_if_no_package('scipy.stats') tm._skip_if_scipy_0_17() from scipy.stats import sem self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=True, allow_obj='convert')
def mean_confidence_interval(data, confidence=0.95): mean = np.mean(data) conf_int = st.t.interval(confidence, len(data) - 1, loc=mean, scale=st.sem(data)) return mean, conf_int[0], conf_int[1]
def mean_confidence_interval(data, confidence=0.95): a = 1.0*np.array(data) n = len(a) m, se = np.mean(a), st.sem(a) h = se * st.t._ppf((1+confidence)/2., n-1) return h
def plot_cluster_error(ax): res_ftemp = "spatial_analysis/{}_{}_ifs.pkz" for exp in ["dots", "sticks"]: subjects = get_subject_order(exp) color = get_colormap(exp, as_cmap=False)[20] errs = [] for subj in subjects: res = moss.load_pkl(res_ftemp.format(subj, exp)) x = res.steps norm = res.null.mean() errs.append(res.real / norm) errs = np.vstack(errs) mean = errs.mean(axis=0) ax.plot(x, mean, color=color, lw=2) sem = stats.sem(errs, axis=0) ax.fill_between(x, mean - sem, mean + sem, alpha=.2, color=color) ax.axhline(y=1, lw=1, dashes=[5, 2], color=".5", zorder=0, xmin=.02, xmax=.98) ax.set(xlim=(0, 42), ylim=(.55, 1.45), yticks=[.6, .8, 1, 1.2, 1.4], xticks=[0, 10, 20, 30, 40], xlabel="Neighborhood radius (mm)", ylabel="Normalized error") sns.despine(ax=ax, trim=True)
def ci(x): ymin, ymax = st.t.interval(0.95, x.count()-1, loc=0, scale=st.sem(x)) return ymax
def plot_return(agent, returns, data=None): ''' Plot return over time. ''' plt.plot(returns, agent.colour, label=agent.legend) interval = 5000 if data != None: for i in range(returns.size/interval): plt.errorbar(1+i*interval, returns[i*interval], yerr=t.sem(data[:, i*interval]), fmt=agent.colour) plt.axis([0, returns.size, 0.0, 0.8]) plt.xlabel('Episodes') plt.title('Average Return') plt.ylabel('Average Return')
def ax_plot_lines(ax, xs, ys, colors, shapes, linestyles, errorbar=False, linewidth=LINEWIDTH): lines = [] for (x, y, c, s, l) in zip(xs, ys, colors, shapes, linestyles): if errorbar: # y should be a list of lists in this case mean = [np.mean(yl) for yl in y] error = [ss.sem(yl) for yl in y] l = ax.errorbar(x, mean, yerr=error, color=c, marker=s, linestyle=l, ecolor=c) else: l, = ax.plot(x, y, color=c, marker=s, linestyle=l, linewidth=linewidth) lines.append(l) return lines
def get_relation_strength(table_file, top=10, normalize=False, return_sem=False, return_all=False): type_list = load_all_pairs(table_file) scores = {k: [abs(v.combined_score) for v in type_list[k][:top]] for k in type_list} mean = {k: np.mean(scores[k]) for k in type_list} if return_all: return scores, mean, {k: ss.sem(scores[k]) for k in type_list} elif return_sem: return mean, {k: ss.sem(scores[k]) for k in type_list} elif normalize: max_v = max(mean.values()) return {k: mean[k] / max_v for k in mean} else: return mean
def make_probes_ba_traj_fig(models1, models2=None, palette=None): # TODO ylim """ Returns fig showing trajectory of probes balanced accuracy """ start = time.time() sns.set_style('white') # load data xys = [] model_groups = [models1] if models2 is None else [models1, models2] for n, models in enumerate(model_groups): model_probes_ba_trajs = [] for nn, model in enumerate(models): model_probes_ba_trajs.append(model.get_traj('probes_ba')) x = models[0].get_data_step_axis() traj_mat = np.asarray([traj[:len(x)] for traj in model_probes_ba_trajs]) # all trajs are truncated to shortest y = np.mean(traj_mat, axis=0) sem = [stats.sem(model_probes_bas) for model_probes_bas in traj_mat.T] xys.append((x, y, sem)) # fig fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 3)) ax.set_ylim([50, 75]) ax.set_xlabel('Mini Batch', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.set_ylabel('Probes Balanced Accuracy', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.tick_params(axis='both', which='both', top='off', right='off') ax.xaxis.set_major_formatter(FuncFormatter(human_format)) ax.yaxis.grid(True) # plot for (x, y, sem) in xys: color = next(palette) if palette is not None else 'black' ax.plot(x, y, '-', linewidth=FigsConfigs.LINEWIDTH, color=color) ax.fill_between(x, np.add(y, sem), np.subtract(y, sem), alpha=FigsConfigs.FILL_ALPHA, color='grey') plt.tight_layout() print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start)) return fig
def display_scores(params, scores, append_star=False): """Format the mean score +/- std error for params""" params = ", ".join("{0}={1}".format(k, v) for k, v in params.items()) line = "{0}:\t{1:.3f} (+/-{2:.3f})".format( params, np.mean(scores), sem(scores)) if append_star: line += " *" return line
def display_grid_scores(grid_scores, top=None): """Helper function to format a report on a grid of scores""" grid_scores = sorted(grid_scores, key=lambda x: x[1], reverse=True) if top is not None: grid_scores = grid_scores[:top] # Compute a threshold for staring models with overlapping # stderr: _, best_mean, best_scores = grid_scores[0] threshold = best_mean - 2 * sem(best_scores) for params, mean_score, scores in grid_scores: append_star = mean_score + 2 * sem(scores) > threshold print(display_scores(params, scores, append_star=append_star))
def create_scipy_features(base_features, sentinel): r"""Calculate the skew, kurtosis, and other statistical features for each row. Parameters ---------- base_features : numpy array The feature dataframe. sentinel : float The number to be imputed for NaN values. Returns ------- sp_features : numpy array The calculated SciPy features. """ logger.info("Creating SciPy Features") # Generate scipy features logger.info("SciPy Feature: geometric mean") row_gmean = sps.gmean(base_features, axis=1) logger.info("SciPy Feature: kurtosis") row_kurtosis = sps.kurtosis(base_features, axis=1) logger.info("SciPy Feature: kurtosis test") row_ktest, pvalue = sps.kurtosistest(base_features, axis=1) logger.info("SciPy Feature: normal test") row_normal, pvalue = sps.normaltest(base_features, axis=1) logger.info("SciPy Feature: skew") row_skew = sps.skew(base_features, axis=1) logger.info("SciPy Feature: skew test") row_stest, pvalue = sps.skewtest(base_features, axis=1) logger.info("SciPy Feature: variation") row_var = sps.variation(base_features, axis=1) logger.info("SciPy Feature: signal-to-noise ratio") row_stn = sps.signaltonoise(base_features, axis=1) logger.info("SciPy Feature: standard error of mean") row_sem = sps.sem(base_features, axis=1) sp_features = np.column_stack((row_gmean, row_kurtosis, row_ktest, row_normal, row_skew, row_stest, row_var, row_stn, row_sem)) sp_features = impute_values(sp_features, 'float64', sentinel) sp_features = StandardScaler().fit_transform(sp_features) # Return new SciPy features logger.info("SciPy Feature Count : %d", sp_features.shape[1]) return sp_features # # Function create_clusters #
def test_cythonized_aggers(self): data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], 'B': ['A', 'B'] * 6, 'C': np.random.randn(12)} df = DataFrame(data) df.loc[2:10:2, 'C'] = nan def _testit(name): op = lambda x: getattr(x, name)() # single column grouped = df.drop(['B'], axis=1).groupby('A') exp = {} for cat, group in grouped: exp[cat] = op(group['C']) exp = DataFrame({'C': exp}) exp.index.name = 'A' result = op(grouped) assert_frame_equal(result, exp) # multiple columns grouped = df.groupby(['A', 'B']) expd = {} for (cat1, cat2), group in grouped: expd.setdefault(cat1, {})[cat2] = op(group['C']) exp = DataFrame(expd).T.stack(dropna=False) exp.index.names = ['A', 'B'] exp.name = 'C' result = op(grouped)['C'] if not tm._incompat_bottleneck_version(name): assert_series_equal(result, exp) _testit('count') _testit('sum') _testit('std') _testit('var') _testit('sem') _testit('mean') _testit('median') _testit('prod') _testit('min') _testit('max')
def make_test_and_train_pp_traj_fig(models1, models2=None, palette=None, ): """ Returns fig showing trajectory of test and train perplexity """ start = time.time() sns.set_style('white') # load data xys = [] model_groups = [models1] if models2 is None else [models1, models2] for n, models in enumerate(model_groups): model_test_pp_trajs = [] model_train_pp_trajs = [] for model in models: model_test_pp_trajs.append(model.get_traj('test_pp')) model_train_pp_trajs.append(model.get_traj('train_pp')) x = models[0].get_data_step_axis() traj_mat1 = np.asarray([traj[:len(x)] for traj in model_test_pp_trajs]) traj_mat2 = np.asarray([traj[:len(x)] for traj in model_train_pp_trajs]) y1 = np.mean(traj_mat1, axis=0) y2 = np.mean(traj_mat2, axis=0) sem1 = [stats.sem(row) for row in np.asarray(traj_mat1).T] sem2 = [stats.sem(row) for row in np.asarray(traj_mat2).T] xys.append((x, y1, y2, sem1, sem2)) # fig fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 3)) ax.set_ylim([0, models1[0].terms.num_set_]) ax.set_ylabel('Perplexity', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.tick_params(axis='both', which='both', top='off', right='off') ax.set_xlabel('Mini Batch', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.xaxis.set_major_formatter(FuncFormatter(human_format)) ax.yaxis.grid(True) # plot for (x, y1, y2, sem1, sem2) in xys: color = next(palette) if palette is not None else 'black' ax.plot(x, y1, '-', linewidth=FigsConfigs.LINEWIDTH, color=color, linestyle='-', label='Test') ax.plot(x, y2, '-', linewidth=FigsConfigs.LINEWIDTH, color=color, linestyle='--', label='Train') ax.fill_between(x, np.add(y1, sem1), np.subtract(y1, sem1), alpha=FigsConfigs.FILL_ALPHA, color='grey') ax.fill_between(x, np.add(y2, sem2), np.subtract(y2, sem2), alpha=FigsConfigs.FILL_ALPHA, color='grey') plt.tight_layout() plt.legend(loc='best') print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start)) return fig