Python scipy.stats 模块，kruskal() 实例源码

我们从Python开源项目中，提取了以下6个代码示例，用于说明如何使用scipy.stats.kruskal()。

项目：ISM2017 作者：ybayle | 项目源码 | 文件源码

def kruskal(data):
    """
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kruskal.html
    """
    if len(data) == 2:
        statistic, pvalue = stats.kruskal(data[0], data[1])
    elif len(data) == 3:
        statistic, pvalue = stats.kruskal(data[0], data[1], data[2])
    elif len(data) == 4:
        statistic, pvalue = stats.kruskal(data[0], data[1], data[2], data[3])
    else:
        utils.print_error("TODO kruskal manage more values")
    print("Kruskal Statistic " + str(statistic) + " and p-value " + str(pvalue))
    if pvalue > 0.05:
        # same
        return False
    else:
        # different
        return True

项目：ISM2017 作者：ybayle | 项目源码 | 文件源码

def main(stats_dir):
    utils.print_success("Statistical analysis")
    stats_file = listdir(utils.abs_path_dir(stats_dir))
    for filen in stats_file:
        utils.print_info(filen)
        data, names = read_data_2(stats_dir, filen)
        if assert_homoscedasticity(data):
            if anova(data):
                tukey(data, names)
            else:
                print("All means are the same")
        else:
            if kruskal(data):
                print("cf R")
                # Dunn
                # Conover-Iman
                # Dwass-Steel-Citchlow-Fligner
            else:
                print("All means are the same")

项目：tableone 作者：tompollard | 项目源码 | 文件源码

def _p_test(self,v,grouped_data,is_continuous,is_categorical,
            is_normal,min_observed,catlevels,
            pval=np.nan,ptest='Not tested'):
        """
        Compute p value
        """

        # do not test if any sub-group has no observations
        if min_observed == 0:
            warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))
            return pval,ptest

        # continuous
        if is_continuous and is_normal:
            # normally distributed
            ptest = 'One-way ANOVA'
            test_stat, pval = stats.f_oneway(*grouped_data)
        elif is_continuous and not is_normal:
            # non-normally distributed
            ptest = 'Kruskal-Wallis'
            test_stat, pval = stats.kruskal(*grouped_data)
        # categorical
        elif is_categorical:
            # default to chi-squared
            ptest = 'Chi-squared'
            chi2, pval, dof, expected = stats.chi2_contingency(grouped_data)
            # if any expected cell counts are < 5, chi2 may not be valid
            # if this is a 2x2, switch to fisher exact
            if expected.min() < 5:
                if grouped_data.shape == (2,2):
                    ptest = 'Fisher''s exact'
                    oddsratio, pval = stats.fisher_exact(grouped_data)
                else:
                    ptest = 'Chi-squared (warning: expected count < 5)'
                    warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))

        return pval,ptest

项目：TFG 作者：alu0100505078 | 项目源码 | 文件源码

def kruskalWallisTest(nAlgorithms,hyperVolumeList):
    #stats.kruskal(algoritmo1, algoritmo2)
    print 'entre a kruskalwallis'

    kruskal = []
    for i in range(nAlgorithms):
        algorithm = np.array(hyperVolumeList[i])
        j =i+1
        while j < nAlgorithms:
            algorithmCompare = np.array(hyperVolumeList[j])
            kruskalTest = stats.kruskal(algorithm, algorithmCompare)
            kruskal.append(kruskalTest)
            print kruskal
            j +=1

    return kruskal

项目：brainpipe 作者：EtienneCmb | 项目源码 | 文件源码

def _evalstat(x, bsl, meth, n_perm, metric, maxstat, tail):
    """Statistical evaluation of features

    [x] = [xn] = (nFce, npts, nTrials)
    [bsl] = (nFce, nTrials)
    """
    # Get shape of xF :
    nf, npts, nt = x.shape
    pvalues = np.ones((nf, npts))

    # Permutations :
    if meth == 'permutation':
        perm = perm_swaparray(a, b, n_perm=200, axis=-1, rndstate=0)
        from brainpipe.xPOO.stats import permutation
        # Pre-define permutations :
        pObj = permutation(n_perm)
        perm = np.zeros((n_perm, nf, npts))
        # For each permutation :
        for p in range(n_perm):
            # Get 1D iterations :
            ite = product(range(nf), range(npts))
            permT = np.random.permutation(2*nt)
            for f, pts in ite:
                bs, xs = bsl[f, :], x[f, pts, :]
                # Reshape data :
                subX = np.vstack((bsl[f, :], x[f, pts, :])).reshape(2*nt,)
                # Shuffle data :
                subX = subX[permT].reshape(nt, 2)
                # Normalize data :
                subX = normalize(subX[:, 0], subX[:, 1], norm=norm)
                # Get mean of data :
                perm[p, f, pts] = np.mean(subX)
        # Get final pvalues :
        pvalues = pObj.perm2p(np.mean(xn, 2), perm, tail=tail,
                              maxstat=maxstat)

    # Wilcoxon test :
    elif meth == 'wilcoxon':
        from scipy.stats import wilcoxon
        # Get iterations :
        ite = product(range(nf), range(npts))
        # Compute wilcoxon :
        for k, i in ite:
            _, pvalues[k, i] = wilcoxon(x[k, i, :], bsl[k, :])

    # Kruskal-Wallis :
    elif meth == 'kruskal':
        from scipy.stats import kruskal
        # Get iterations :
        ite = product(range(nf), range(npts))
        # Compute Kruskal-Wallis :
        for k, i in ite:
            _, pvalues[k, i] = kruskal(x[k, i, :], bsl[k, :])

    return pvalues

项目：ECoG-ClusterFlow 作者：sugeerth | 项目源码 | 文件源码

def snr(M, list1, list2, threshold = None, significance = False):
    """

    Performs a signal-to-noise ratio test on M, assuming samples are in rows and genes are in columns

        list1       - List of row indices for first group
        list2       - List of row indices for second group
        threshold   - Minimum SNR ratio to report
        significance - Run kruskal ttest (requires scipy)

    Returns a reverse-ordered list of (ratio, index, mean1, mean2, pvalue) tuples, where index is the column index of the gene,
    and mean1 and mean2 correspond to the mean for that particular gene in list1 and list2, respectively.  pvalue is blank if significance
    is False.

    If signifance is true (and scipy is installed) a pvalue will be assigned. Be ware this increases processing
    time significantly (ha).

    """

    ratios = []

    N1 = M.take(tuple(list1), 0)
    N2 = M.take(tuple(list2), 0)

    N1mean, N2mean = N1.mean(0), N2.mean(0)
    means = numpy.abs(N1mean - N2mean)
    stds  = N1.std(0) + N2.std(0)

    if stds.all():
        rats = means / stds
    else:
        rats = numpy.zeros((len(means),), dtype=numpy.float32)
        for i in xrange(len(stds)):
            if stds[i]:
                rats[i] = means[i] / stds[i]

    for i in xrange(M.shape[1]):

        rat = rats[i]
        mean1, mean2 = N1mean[i], N2mean[i]

        if threshold is None or rat >= threshold:

            if PVAL and significance:
                pval = st.kruskal(N1[:,i], N2[:,i])[1]
            else:
                pval = ''

            ratios.append( (rat, i, mean1, mean2, pval) )

    ratios.sort(reverse=True)

    return ratios