我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用scipy.stats.fisher_exact()。
def _p_test(self,v,grouped_data,is_continuous,is_categorical, is_normal,min_observed,catlevels, pval=np.nan,ptest='Not tested'): """ Compute p value """ # do not test if any sub-group has no observations if min_observed == 0: warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v)) return pval,ptest # continuous if is_continuous and is_normal: # normally distributed ptest = 'One-way ANOVA' test_stat, pval = stats.f_oneway(*grouped_data) elif is_continuous and not is_normal: # non-normally distributed ptest = 'Kruskal-Wallis' test_stat, pval = stats.kruskal(*grouped_data) # categorical elif is_categorical: # default to chi-squared ptest = 'Chi-squared' chi2, pval, dof, expected = stats.chi2_contingency(grouped_data) # if any expected cell counts are < 5, chi2 may not be valid # if this is a 2x2, switch to fisher exact if expected.min() < 5: if grouped_data.shape == (2,2): ptest = 'Fisher''s exact' oddsratio, pval = stats.fisher_exact(grouped_data) else: ptest = 'Chi-squared (warning: expected count < 5)' warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v)) return pval,ptest
def test_bin_fisher(intv_bin_ip, intv_bin_con, with_control=True, correction_method='fdr_bh'): """DOCSTRING Args Returns """ if intv_bin_ip.shape[0] != 1: raise Exception('Fisher exact test does not deal with replicates.') intv_counter = intv_bin_ip.shape[1] assert intv_counter == intv_bin_con.shape[1] binscore = np.empty(intv_counter) binsignal = np.empty(intv_counter) ip_sum = np.sum(intv_bin_ip[0,]) con_sum = np.sum(intv_bin_con[0,]) for i in range(intv_counter): this_ip = intv_bin_ip[0, i] others_ip = ip_sum - this_ip this_con = intv_bin_con[0, i] others_con = con_sum - this_con if this_ip == 0: binsignal[i], binscore[i] = np.nan, 1.0 continue _, binscore[i] = fisher_exact([[this_ip, others_ip], [this_con, others_con]], alternative='greater') if with_control: binsignal[i] = this_ip/others_ip / this_con*others_con else: binsignal[i] = this_ip adj = multipletests(binscore, alpha=0.05, method=correction_method) binscore_adj = adj[1] return binsignal, binscore_adj
def fisher_exact(*_args, **_kwargs): raise NotImplementedError ### Indices to marginals arguments:
def fisher(cls, *marginals): """Scores bigrams using Fisher's Exact Test (Pedersen 1996). Less sensitive to small counts than PMI or Chi Sq, but also more expensive to compute. Requires scipy. """ n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals) (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative='less') return pvalue
def get_tf_associations(self, test): # test = {not dissociated,associated} tf_set = set() # this is the set in which all Term - Function pairs will be contained # that cannot be dissociated (i.e, for which we do not know for sure that # they are not associated) - done with Fisher Exact tests for onto in set(self.ontological): if onto not in ['body','thing']: continue d_onto = self.data[self.ontological == onto] for li in range(30): terms = set([w for dd in d_onto for w in dd[li]]) for term in terms: for annot in set(self.annotation): valid = False if annot == 'UF': continue d_onto_annot = self.data[(self.ontological == onto) * (self.annotation == annot)] aa = len([t for t in d_onto_annot if term in t[li]]) # + term + function ab = len(d_onto_annot) - aa # - term + function ba = len([t for t in d_onto if term in t[li]]) - aa # + term - function bb = len(d_onto) - (aa + ab + ba) # - term - function if test == 'not dissociated' and fisher_exact([[aa,ab],[ba,bb]],'less')[1] > .05: valid = True tf_set.add((li,term,annot)) if test == 'associated' and fisher_exact([[aa,ab],[ba,bb]],'greater')[1] < .05: valid = True tf_set.add((li,term,annot)) # if aa > 0: print('%s,%d,%s,%s,%r,%d,%d,%d' % (onto,li,term,annot,valid,aa,ba,ab)) return tf_set
def _get_fisher_scores_from_counts(self, cat_word_counts, not_cat_word_counts): cat_not_word_counts = cat_word_counts.sum() - cat_word_counts not_cat_not_word_counts = not_cat_word_counts.sum() - not_cat_word_counts def do_fisher_exact(x): return fisher_exact([[x[0], x[1]], [x[2], x[3]]], alternative='greater') odds_ratio, p_values = np.apply_along_axis( do_fisher_exact, 0, np.array([cat_word_counts, cat_not_word_counts, not_cat_word_counts, not_cat_not_word_counts])) return odds_ratio, p_values
def run_test(q1_pos, q2_pos, q1_neg,q2_neg): ''' this method takes four parallel arrays representing a 2X2 contingency table. the length of these parallel arrays denotes the number of tests that will be run, either a chi-squared test or an fisher-exact test are run, epending whether the requriments for a reliable chi-squared test are satisifed. Bonferroni correction is then applied by adjusting the p-values for all of the tests We return two parellel arrays, the first array is the p-values of for the tests, the second array is the test value e.g. the chi-squared value or the fisher-exact oddsratio. ''' input = [q1_pos, q2_pos, q1_neg,q2_neg] n = len(input[0]) if not all(len(x) == n for x in input): raise BaseException ("length of input lists must be of same length") pvalues = [] test_values = [] for i in range(0,n): obs = np.array([ [input[0][i],input[1][i]],[input[2][i],input[3][i]] ]) if useFisherExact(obs): p = fisher_exact(obs)[1] t = fisher_exact(obs)[0] else: p = chi2_contingency(obs)[1] t = chi2_contingency(obs)[0] pvalues.append(p) test_values.append(t) #applying Bonferroni correction adjustedPValues = [ float(p)/float(n) for p in pvalues] return [adjustedPValues, test_values]
def concordance(series1, series2, method, nreps=1000): """ Measures the concordance between two pandas Series and returns a pvalue and measure of concordance. Parameters ---------- series1, series2 : pandas Series Series with matching indexes. method : str ['fisher', 'spearman', 'kendalltau', 'empirical', 'cohen'] nreps : int number of repititions to build the null. Only needed if method is 'empirical' Returns ------- measure : float some sort of measure of concordance (e.g. r for the correlation methods, n_observed - mean(n_expected) for empirical, etc) p : float p value of observed concordance between series1 and series2 """ if method == 'fisher': # Note: this automatically ignores any bugs which were not present # in both series. mat = pd.crosstab(series1, series2) return fisher_exact(mat) elif method == 'spearman': return spearmanr(series1, series2) elif method == 'kendalltau': return kendalltau(series1, series2, nan_policy='omit') elif method == 'empirical': return empirical_pval(series1, series2, nreps) elif method == 'cohen': tmp = pd.concat((series1, series2), axis=1).dropna() return cohen_kappa_score(tmp.iloc[:, 0], tmp.iloc[:, 1]), np.nan else: raise ValueError('Unknown concordance method.')
def single_side_pathway_enrichment(pathway_definitions, gene_signature, n_genes): """Identify overrepresented pathways using the Fisher's exact test for significance on a given pathway definition and gene signature. (FDR correction for multiple testing is applied in `_significant_pathways_dataframe`). Parameters ----------- pathway_definitions : dict(str -> set(str)) Pathway definitions, *post*-overlap-correction if this function is called from `pathway_enrichment_with_overlap_correction`. A pathway (key) is defined by a set of genes (value). gene_signature : set(str) The set of genes we consider to be enriched in a feature. n_genes : int The total number of genes for which we have assigned weights in the features of an unsupervised model. Returns ----------- pandas.Series, for each pathway, the p-value from applying the Fisher's exact test. """ if not gene_signature: return pd.Series(name="p-value") pvalues_list = [] for pathway, definition in pathway_definitions.items(): if isinstance(definition, tuple): definition = set.union(*definition) both_definition_and_signature = len(definition & gene_signature) in_definition_not_signature = (len(definition) - both_definition_and_signature) in_signature_not_definition = (len(gene_signature) - both_definition_and_signature) neither_definition_nor_signature = (n_genes - both_definition_and_signature - in_definition_not_signature - in_signature_not_definition) contingency_table = np.array( [[both_definition_and_signature, in_signature_not_definition], [in_definition_not_signature, neither_definition_nor_signature]]) try: _, pvalue = stats.fisher_exact( contingency_table, alternative="greater") pvalues_list.append(pvalue) # FPE can occur when `neither_definition_nor_signature` is very # large and `both_definition_and_signature` is very small (near zero) except FloatingPointError: pvalues_list.append(1.0) pvalues_series = pd.Series( pvalues_list, index=pathway_definitions.keys(), name="p-value") return pvalues_series
def fishers_exact_plot(data, condition1, condition2, ax=None, condition1_value=None, alternative="two-sided", **kwargs): """ Perform a Fisher's exact test to compare to binary columns Parameters ---------- data: Pandas dataframe Dataframe to retrieve information from condition1: str First binary column to compare (and used for test sidedness) condition2: str Second binary column to compare ax : Axes, default None Axes to plot on condition1_value: If `condition1` is not a binary column, split on =/!= to condition1_value alternative: Specify the sidedness of the test: "two-sided", "less" or "greater" """ plot = sb.barplot( x=condition1, y=condition2, ax=ax, data=data, **kwargs ) plot.set_ylabel("Percent %s" % condition2) condition1_mask = get_condition_mask(data, condition1, condition1_value) count_table = pd.crosstab(data[condition1], data[condition2]) print(count_table) oddsratio, p_value = fisher_exact(count_table, alternative=alternative) add_significance_indicator(plot=plot, significant=p_value <= 0.05) only_percentage_ticks(plot) if alternative != "two-sided": raise ValueError("We need to better understand the one-sided Fisher's Exact test") sided_str = "two-sided" print("Fisher's Exact Test: OR: {}, p-value={} ({})".format(oddsratio, p_value, sided_str)) return FishersExactResults(oddsratio=oddsratio, p_value=p_value, sided_str=sided_str, with_condition1_series=data[condition1_mask][condition2], without_condition1_series=data[~condition1_mask][condition2], plot=plot)