我们从Python开源项目中,提取了以下6个代码示例,用于说明如何使用scipy.stats.kruskal()。
def kruskal(data): """ https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kruskal.html """ if len(data) == 2: statistic, pvalue = stats.kruskal(data[0], data[1]) elif len(data) == 3: statistic, pvalue = stats.kruskal(data[0], data[1], data[2]) elif len(data) == 4: statistic, pvalue = stats.kruskal(data[0], data[1], data[2], data[3]) else: utils.print_error("TODO kruskal manage more values") print("Kruskal Statistic " + str(statistic) + " and p-value " + str(pvalue)) if pvalue > 0.05: # same return False else: # different return True
def main(stats_dir): utils.print_success("Statistical analysis") stats_file = listdir(utils.abs_path_dir(stats_dir)) for filen in stats_file: utils.print_info(filen) data, names = read_data_2(stats_dir, filen) if assert_homoscedasticity(data): if anova(data): tukey(data, names) else: print("All means are the same") else: if kruskal(data): print("cf R") # Dunn # Conover-Iman # Dwass-Steel-Citchlow-Fligner else: print("All means are the same")
def _p_test(self,v,grouped_data,is_continuous,is_categorical, is_normal,min_observed,catlevels, pval=np.nan,ptest='Not tested'): """ Compute p value """ # do not test if any sub-group has no observations if min_observed == 0: warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v)) return pval,ptest # continuous if is_continuous and is_normal: # normally distributed ptest = 'One-way ANOVA' test_stat, pval = stats.f_oneway(*grouped_data) elif is_continuous and not is_normal: # non-normally distributed ptest = 'Kruskal-Wallis' test_stat, pval = stats.kruskal(*grouped_data) # categorical elif is_categorical: # default to chi-squared ptest = 'Chi-squared' chi2, pval, dof, expected = stats.chi2_contingency(grouped_data) # if any expected cell counts are < 5, chi2 may not be valid # if this is a 2x2, switch to fisher exact if expected.min() < 5: if grouped_data.shape == (2,2): ptest = 'Fisher''s exact' oddsratio, pval = stats.fisher_exact(grouped_data) else: ptest = 'Chi-squared (warning: expected count < 5)' warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v)) return pval,ptest
def kruskalWallisTest(nAlgorithms,hyperVolumeList): #stats.kruskal(algoritmo1, algoritmo2) print 'entre a kruskalwallis' kruskal = [] for i in range(nAlgorithms): algorithm = np.array(hyperVolumeList[i]) j =i+1 while j < nAlgorithms: algorithmCompare = np.array(hyperVolumeList[j]) kruskalTest = stats.kruskal(algorithm, algorithmCompare) kruskal.append(kruskalTest) print kruskal j +=1 return kruskal
def _evalstat(x, bsl, meth, n_perm, metric, maxstat, tail): """Statistical evaluation of features [x] = [xn] = (nFce, npts, nTrials) [bsl] = (nFce, nTrials) """ # Get shape of xF : nf, npts, nt = x.shape pvalues = np.ones((nf, npts)) # Permutations : if meth == 'permutation': perm = perm_swaparray(a, b, n_perm=200, axis=-1, rndstate=0) from brainpipe.xPOO.stats import permutation # Pre-define permutations : pObj = permutation(n_perm) perm = np.zeros((n_perm, nf, npts)) # For each permutation : for p in range(n_perm): # Get 1D iterations : ite = product(range(nf), range(npts)) permT = np.random.permutation(2*nt) for f, pts in ite: bs, xs = bsl[f, :], x[f, pts, :] # Reshape data : subX = np.vstack((bsl[f, :], x[f, pts, :])).reshape(2*nt,) # Shuffle data : subX = subX[permT].reshape(nt, 2) # Normalize data : subX = normalize(subX[:, 0], subX[:, 1], norm=norm) # Get mean of data : perm[p, f, pts] = np.mean(subX) # Get final pvalues : pvalues = pObj.perm2p(np.mean(xn, 2), perm, tail=tail, maxstat=maxstat) # Wilcoxon test : elif meth == 'wilcoxon': from scipy.stats import wilcoxon # Get iterations : ite = product(range(nf), range(npts)) # Compute wilcoxon : for k, i in ite: _, pvalues[k, i] = wilcoxon(x[k, i, :], bsl[k, :]) # Kruskal-Wallis : elif meth == 'kruskal': from scipy.stats import kruskal # Get iterations : ite = product(range(nf), range(npts)) # Compute Kruskal-Wallis : for k, i in ite: _, pvalues[k, i] = kruskal(x[k, i, :], bsl[k, :]) return pvalues
def snr(M, list1, list2, threshold = None, significance = False): """ Performs a signal-to-noise ratio test on M, assuming samples are in rows and genes are in columns list1 - List of row indices for first group list2 - List of row indices for second group threshold - Minimum SNR ratio to report significance - Run kruskal ttest (requires scipy) Returns a reverse-ordered list of (ratio, index, mean1, mean2, pvalue) tuples, where index is the column index of the gene, and mean1 and mean2 correspond to the mean for that particular gene in list1 and list2, respectively. pvalue is blank if significance is False. If signifance is true (and scipy is installed) a pvalue will be assigned. Be ware this increases processing time significantly (ha). """ ratios = [] N1 = M.take(tuple(list1), 0) N2 = M.take(tuple(list2), 0) N1mean, N2mean = N1.mean(0), N2.mean(0) means = numpy.abs(N1mean - N2mean) stds = N1.std(0) + N2.std(0) if stds.all(): rats = means / stds else: rats = numpy.zeros((len(means),), dtype=numpy.float32) for i in xrange(len(stds)): if stds[i]: rats[i] = means[i] / stds[i] for i in xrange(M.shape[1]): rat = rats[i] mean1, mean2 = N1mean[i], N2mean[i] if threshold is None or rat >= threshold: if PVAL and significance: pval = st.kruskal(N1[:,i], N2[:,i])[1] else: pval = '' ratios.append( (rat, i, mean1, mean2, pval) ) ratios.sort(reverse=True) return ratios