我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用scipy.stats.shapiro()。
def samples_are_normal(*samples, **options): """Test whether each sample differs from a normal distribution. Use both Shapiro-Wilk test and D'Agostino and Pearson's test to test the null hypothesis that the sample is drawn from a normal distribution. Returns: List of tuples (is_normal(bool),(statistic(float),pvalue(float)). """ alpha = options.get('alpha', 0.05) results = [] for sample in samples: (_, shapiro_pvalue) = shapiro_result = shapiro(sample) (_, normaltest_pvalue) = normaltest_result = normaltest(sample) results.append(( not (normaltest_pvalue < alpha and shapiro_pvalue < alpha), shapiro_result, normaltest_result )) return results
def assert_normality(data): """Description of assert_normality The Shapiro-Wilk test tests the null hypothesis that the data was drawn from a normal distribution if test_stat and p_value close to 1 then data is normal """ print("Values " + str(data)) statistic, pvalue = stats.shapiro(data) print("Shapiro Statistic " + str(statistic) + " and p-value " + str(pvalue)) if pvalue > 0.05: print("Normal") return True else: print("Not normal") return False
def shapiroWilkTest(nAlgorithms,hyperVolumeList): shapiroWilk = [] #print len(hyperVolumeList) for i in range(nAlgorithms): #print i algorithmList = np.array(hyperVolumeList[i]) shapiroTest= stats.shapiro(algorithmList) #print shapiroTest shapiroWilk.append(shapiroTest) return shapiroWilk
def __init__(self, ax=None, algorithm='shapiro', features=None, orient='h', show_feature_names=True, **kwargs): """ Initialize the class with the options required to rank and order features as well as visualize the result. """ super(Rank1D, self).__init__( ax=None, algorithm=algorithm, features=features, show_feature_names=show_feature_names, **kwargs ) self.orientation_ = orient
def norm_EDA(vectors, lg, embedding): """EDA on the norm of the vectors vectors = word embedding vectors lg = language embedding = gensim, polyglot, etc.""" # L2 norm of vectors, then normalize distribution of L2 norms vectors_norm = np.linalg.norm(vectors, axis=1) vectors_norm_normalized = (vectors_norm - vectors_norm.mean()) \ / vectors_norm.std() # Histogram compared to normal dist plt.figure(figsize=(10, 6)) plt.xlim((-3, 5)) plt.hist(vectors_norm_normalized, bins=100, normed=True) x = np.linspace(-3, 3, 100) plt.plot(x, norm.pdf(x, 0, 1), color='r', linewidth=3) plt.savefig('../images/' + lg + '_' + embedding + '_norm.png') plt.close('all') # Anderson Darling # If test stat is greater than crit val, reject ho=normal # crit_val_1 is critical value for p-value of 1% ad = anderson(vectors_norm_normalized, 'norm') ad_test_stat = ad.statistic ad_crit_val_1 = ad.critical_values[-1] ad_result = 'Reject' if ad_test_stat > ad_crit_val_1 else 'Fail to Reject' # Kolmogorov-Smirnov ks_p_val = kstest(vectors_norm_normalized, 'norm')[1] ks_result = 'Reject' if ks_p_val < .01 else 'Fail to Reject' # Shapiro sh_p_val = shapiro(vectors_norm_normalized)[1] sh_result = 'Reject' if sh_p_val < .01 else 'Fail to Reject' result = (ad_test_stat, ad_crit_val_1, ad_result, ks_p_val, ks_result, sh_p_val, sh_result) return result
def draw(path): data = metadata.load(path) p_values_pearson = [] p_values_shapiro = [] norm_dist_path = os.path.join(path, "normtest_distribution.png") if os.path.exists(norm_dist_path): print("path exists %s, skip" % norm_dist_path) #return for srv in data["services"]: filename = os.path.join(path, srv["filename"]) df = load_timeseries(filename, srv) columns = [] for c in df.columns: if (not df[c].isnull().all()) and df[c].var() != 0: columns.append(c) df = df[columns] n = len(columns) if n == 0: continue fig, axis = plt.subplots(n, 2) fig.set_figheight(n * 4) fig.set_figwidth(30) for i, col in enumerate(df.columns): serie = df[col].dropna() sns.boxplot(x=serie, ax=axis[i, 0]) statistic_1, p_value_1 = normaltest(serie) p_values_pearson.append(p_value_1) statistic_2, p_value_2 = shapiro(serie) p_values_shapiro.append(p_value_2) templ = """Pearson's normtest: statistic: %f p-value: %E -> %s Shapiro-Wilk test for normality: statistic: %f p-value: %E -> %s """ outcome_1 = "not normal distributed" if p_value_1 < 0.05 else "normal distributed" outcome_2 = "not normal distributed" if p_value_2 < 0.05 else "normal distributed" text = templ % (statistic_1, p_value_1, outcome_1, statistic_2, p_value_2, outcome_2) axis[i, 1].axis('off') axis[i, 1].text(0.05, 0.05, text, fontsize=18) plot_path = os.path.join(path, "%s_normtest.png" % srv["name"]) plt.savefig(plot_path) print(plot_path) fig, axis = plt.subplots(2) fig.set_figheight(8) measurement = os.path.dirname(os.path.join(path,'')) name = "Distribution of p-value for Pearson's normtest for %s" % measurement plot = sns.distplot(pd.Series(p_values_pearson, name=name), rug=True, kde=False, norm_hist=False, ax=axis[0]) name = "Distribution of p-value for Shapiro-Wilk's normtest for %s" % measurement plot = sns.distplot(pd.Series(p_values_shapiro, name=name), rug=True, kde=False, norm_hist=False, ax=axis[1]) fig.savefig(norm_dist_path) print(norm_dist_path)
def rank1d(X, y=None, ax=None, algorithm='shapiro', features=None, orient='h', show_feature_names=True, **kwargs): """Scores each feature with the algorithm and ranks them in a bar plot. This helper function is a quick wrapper to utilize the Rank1D Visualizer (Transformer) for one-off analysis. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values ax : matplotlib axes the axis to plot the figure on. algorithm : one of {'shapiro', }, default: 'shapiro' The ranking algorithm to use, default is 'Shapiro-Wilk. features : list A list of feature names to use. If a DataFrame is passed to fit and features is None, feature names are selected as the columns of the DataFrame. orient : 'h' or 'v' Specifies a horizontal or vertical bar chart. show_feature_names : boolean, default: True If True, the feature names are used to label the axis ticks in the plot. Returns ------- ax : matplotlib axes Returns the axes that the parallel coordinates were drawn on. """ # Instantiate the visualizer visualizer = Rank1D(ax, algorithm, features, orient, show_feature_names, **kwargs) # Fit and transform the visualizer (calls draw) visualizer.fit(X, y, **kwargs) visualizer.transform(X) # Return the axes object on the visualizer return visualizer.ax