我们从Python开源项目中,提取了以下20个代码示例,用于说明如何使用scipy.stats.kendalltau()。
def kendalltau(rankA, rankB): if len(rankA) != len(rankB): raise TypeError("The two rank lists must be of the same length.") N = len(rankA) if isinstance(rankA[0], tuple): rankA = [rankA[i][0] for i in range(N)] if isinstance(rankB[0], tuple): rankB = [rankB[i][0] for i in range(N)] listA = [i for i in range(N)] listB = [rankB.index(rankA[i]) for i in range(N)] return kendalltau(listA, listB)[0]
def get_corr_func(method): if method in ['kendall', 'spearman']: from scipy.stats import kendalltau, spearmanr def _pearson(a, b): return np.corrcoef(a, b)[0, 1] def _kendall(a, b): rs = kendalltau(a, b) if isinstance(rs, tuple): return rs[0] return rs def _spearman(a, b): return spearmanr(a, b)[0] _cor_methods = { 'pearson': _pearson, 'kendall': _kendall, 'spearman': _spearman } return _cor_methods[method]
def _heuristic_element_order(samples): ''' Finds an order of elements that heuristically facilitates vine modelling. For this purpose, Kendall's tau is calculated between samples of pairs of elements and elements are scored according to the sum of absolute Kendall's taus of pairs the elements appear in. Parameters ---------- samples : array_like n-by-d matrix of samples where n is the number of samples and d is the number of marginals. Returns ------- order : array_like Permutation of all element indices reflecting descending scores. ''' dim = samples.shape[1] # Score elements according to total absolute Kendall's tau score = np.zeros(dim) for i in range(1, dim): for j in range(i): tau, _ = kendalltau(samples[:, i], samples[:, j]) score[i] += np.abs(tau) score[j] += np.abs(tau) # Get order indices for descending score order = score.argsort()[::-1] return order
def kendall_tau(y_true, y_pred): """ Calculate Kendall's tau between ``y_true`` and ``y_pred``. :param y_true: The true/actual/gold labels for the data. :type y_true: array-like of float :param y_pred: The predicted/observed labels for the data. :type y_pred: array-like of float :returns: Kendall's tau if well-defined, else 0 """ ret_score = kendalltau(y_true, y_pred)[0] return ret_score if not np.isnan(ret_score) else 0.0
def kendall_tau(y_true, y_score): from scipy.stats import kendalltau ret_score = kendalltau(y_true, y_score)[0] return ret_score if not np.isnan(ret_score) else 0.0
def calc_correl(self, dev_pred, test_pred): dev_prs, _ = pearsonr(dev_pred, self.dev_y_org) test_prs, _ = pearsonr(test_pred, self.test_y_org) dev_spr, _ = spearmanr(dev_pred, self.dev_y_org) test_spr, _ = spearmanr(test_pred, self.test_y_org) dev_tau, _ = kendalltau(dev_pred, self.dev_y_org) test_tau, _ = kendalltau(test_pred, self.test_y_org) return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau
def kendall(y, z, nb_sample=100000): """Compute Kendall's correlation coefficient.""" if len(y) > nb_sample: idx = np.arange(len(y)) np.random.shuffle(idx) idx = idx[:nb_sample] y = y[idx] z = z[idx] return kendalltau(y, z)[0]
def test_corr_rank(self): tm._skip_if_no_scipy() import scipy import scipy.stats as stats # kendall and spearman A = tm.makeTimeSeries() B = tm.makeTimeSeries() A[-5:] = A[:5] result = A.corr(B, method='kendall') expected = stats.kendalltau(A, B)[0] self.assertAlmostEqual(result, expected) result = A.corr(B, method='spearman') expected = stats.spearmanr(A, B)[0] self.assertAlmostEqual(result, expected) # these methods got rewritten in 0.8 if scipy.__version__ < LooseVersion('0.9'): raise nose.SkipTest("skipping corr rank because of scipy version " "{0}".format(scipy.__version__)) # results from R A = Series( [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, - 0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606]) B = Series( [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292, 1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375]) kexp = 0.4319297 sexp = 0.5853767 self.assertAlmostEqual(A.corr(B, method='kendall'), kexp) self.assertAlmostEqual(A.corr(B, method='spearman'), sexp)
def test_nancorr_kendall(self): tm.skip_if_no_package('scipy.stats') from scipy.stats import kendalltau targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0] targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method='kendall') targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0] targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0] self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method='kendall')
def compare_scores(byus, bydarpa): """byus = { team: score }, bydarpa = { team: score }""" assert frozenset(byus.keys()) == frozenset(bydarpa.keys()) our_ranking = ordered_sets(byus) darpa_ranking = ordered_sets(bydarpa) our_picks = our_ranking.values()[0] darpa_picks = darpa_ranking.values()[0] from scipy import stats # scipy takes them as ordered lists teamorder = list(byus.keys()) vals_us = [ byus[t] for t in teamorder ] vals_darpa = [ bydarpa[t] for t in teamorder ] tau, p_value = stats.kendalltau(vals_us, vals_darpa) def names(teams_set): return '[' + ' '.join(sorted(n.split()[0] for n in teams_set)) + ']' if our_picks == darpa_picks: print "[ ] All first choice(s)",names(our_picks),"match, excellent!" elif our_picks.isdisjoint(darpa_picks): print "[XX] Our first choice(s)",names(our_picks)," completely different from DARPA's",names(darpa_picks) else: print "[__] Partial match between our first choice(s) and DARPA's. Both have",names(darpa_picks&our_picks),"(we also have:",names(our_picks-darpa_picks)," -- darpa also has:",names(darpa_picks-our_picks),")" print " FOR US:" for score,teams in our_ranking.iteritems(): print " ","%+.4f"%score,names(teams) print " DARPA:" for score,teams in darpa_ranking.iteritems(): print " ","%+.4f"%score,names(teams) print " %s Kendall tau: %.4f (p-value for being correlated: %.6f)" % (("<7" if tau < 0.7 else "<8") if tau < 0.8 else " ", tau, p_value)
def evaluate(self, data: List[ContextAndQuestion], true_len, **kwargs): best_spans = kwargs["span"] span_logits = kwargs["score"] if self.eval == "triviaqa": scores = trivia_span_scores(data, best_spans) elif self.eval == "squad": scores = squad_span_scores(data, best_spans) else: raise RuntimeError() has_answer = np.array([len(x.answer.answer_spans) > 0 for x in data]) selected_paragraphs = {} for i, point in enumerate(data): if self.per_doc: key = (point.question_id, point.doc_id) else: key = point.question_id if key not in selected_paragraphs: selected_paragraphs[key] = i elif span_logits[i] > span_logits[selected_paragraphs[key]]: selected_paragraphs[key] = i selected_paragraphs = list(selected_paragraphs.values()) out = { "question-text-em": scores[selected_paragraphs, 2].mean(), "question-text-f1": scores[selected_paragraphs, 3].mean(), } if self.k_tau: out["text-em-k-tau"] = kendalltau(span_logits, scores[:, 2])[0] out["text-f1-k-tau"] = kendalltau(span_logits, scores[:, 3])[0] if self.paragraph_level: out["paragraph-text-em"] = scores[has_answer, 2].mean() out["paragraph-text-f1"] = scores[has_answer, 3].mean() prefix = "b%d/" % self.bound return Evaluation({prefix+k: v for k,v in out.items()})
def evaluate(self, data: List[ContextAndQuestion], true_len, **kargs): if self.text_eval == "triviaqa": scores = trivia_span_scores(data, kargs["spans"]) elif self.text_eval == "squad": scores = squad_span_scores(data, kargs["spans"]) else: raise RuntimeError() has_answer = [len(x.answer.answer_spans) > 0 for x in data] aggregated_scores = scores[has_answer].mean(axis=0) prefix ="b%d/" % self.bound scalars = { prefix + "accuracy": aggregated_scores[0], prefix + "f1": aggregated_scores[1], prefix + "text-accuracy": aggregated_scores[2], prefix + "text-f1": aggregated_scores[3] } if self.rank_metric == "spr": metric = spearmanr elif self.rank_metric == "k-tau": metric = kendalltau else: raise ValueError() if "none_prob" in kargs: none_conf = kargs["none_prob"] scalars[prefix + "none-text-f1-" + self.rank_metric] = metric(none_conf, scores[:, 3])[0] scalars[prefix + "none-span-accuracy-" + self.rank_metric] = metric(none_conf, scores[:, 0])[0] conf = kargs["conf"] scalars[prefix + "score-text-f1-" + self.rank_metric] = metric(conf, scores[:, 3])[0] scalars[prefix + "score-span-accuracy-" + self.rank_metric] = metric(conf, scores[:, 0])[0] return Evaluation(scalars)
def distance(a,b): #return 1-dot(norm(a),norm(b)) #cosine similarity #return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm #pearson correlation in negative so lower is better #return 1- dot(norm(a),norm(b)) #tanimoto distance #return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b)) return 1-sci.kendalltau(a,b)[0] #kendall tau # Load the benchmark
def distance(a,b): return scipy.spatial.distance.cosine(a,b) # ya incluye el 1-cos(ab) #return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm #pearson correlation in negative so lower is better #return 1- dot(norm(a),norm(b)) #tanimoto distance #return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b)) #return sci.kendalltau(a,b) #kendall tau # Load the benchmark
def distance(a,b): return 1-dot(norm(a),norm(b)) #cosine similarity #return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm #pearson correlation in negative so lower is better #return 1- dot(norm(a),norm(b)) #tanimoto distance #return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b)) #return sci.kendalltau(a,b) #kendall tau # Load the benchmark
def select(self, X, Y, select_count=100): corr = [] for i in range(X.shape[1]): kd = kendalltau(X[:, i], Y) corr.append((i, abs(kd.correlation))) corr = sorted(corr, key=operator.itemgetter)[0:select_count] indices = [x for x, y in corr] return X[:, indices], indices
def do_kendallt(list1, list2, alpha=0.05): c, p = kendalltau(list1, list2) if p < alpha: return c return 'n.s.'
def Conf_Measure(RegModel, Train_Data, True_Labels, ModelType): Predictions = RegModel.predict(Train_Data) tau, p_value = stats.kendalltau(True_Labels, Predictions) R2_Measure = r2_score(True_Labels, Predictions) print('The Kindell Coefficient of ', ModelType, ' model is ', tau,' with a p-value of ',p_value) print('The R Square of ', ModelType, ' model is ', R2_Measure) print('') return(tau, p_value, R2_Measure)
def concordance(series1, series2, method, nreps=1000): """ Measures the concordance between two pandas Series and returns a pvalue and measure of concordance. Parameters ---------- series1, series2 : pandas Series Series with matching indexes. method : str ['fisher', 'spearman', 'kendalltau', 'empirical', 'cohen'] nreps : int number of repititions to build the null. Only needed if method is 'empirical' Returns ------- measure : float some sort of measure of concordance (e.g. r for the correlation methods, n_observed - mean(n_expected) for empirical, etc) p : float p value of observed concordance between series1 and series2 """ if method == 'fisher': # Note: this automatically ignores any bugs which were not present # in both series. mat = pd.crosstab(series1, series2) return fisher_exact(mat) elif method == 'spearman': return spearmanr(series1, series2) elif method == 'kendalltau': return kendalltau(series1, series2, nan_policy='omit') elif method == 'empirical': return empirical_pval(series1, series2, nreps) elif method == 'cohen': tmp = pd.concat((series1, series2), axis=1).dropna() return cohen_kappa_score(tmp.iloc[:, 0], tmp.iloc[:, 1]), np.nan else: raise ValueError('Unknown concordance method.')