我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.stats.pearsonr()。
def corr_fea(df,cols,de=None,bar=0.9): from scipy.stats import pearsonr xcols = [] for c,i in enumerate(cols[:-1]): for j in cols[c+1:]: if i==j: continue #score = pearsonr(df[i],df[j])[0] score = df[i].corr(df[j]) #print(i,j,score) if score>bar: df["%s-%s"%(i,j)] = df[i]-df[j] if de is not None: de["%s-%s"%(i,j)] = de[i]-de[j] xcols.append(j) if score<-bar: df["%s+%s"%(i,j)] = df[i]+df[j] if de is not None: de["%s+%s"%(i,j)] = de[i]+de[j] xcols.append(j) return xcols
def word_sim_test(filename, pos_vectors): delim = ',' actual_sim_list, pred_sim_list = [], [] missed = 0 with open(filename, 'r') as pairs: for pair in pairs: w1, w2, actual_sim = pair.strip().split(delim) try: w1_vec = create_word_vector(w1, pos_vectors) w2_vec = create_word_vector(w2, pos_vectors) pred = float(np.inner(w1_vec, w2_vec)) actual_sim_list.append(float(actual_sim)) pred_sim_list.append(pred) except KeyError: missed += 1 spearman, _ = st.spearmanr(actual_sim_list, pred_sim_list) pearson, _ = st.pearsonr(actual_sim_list, pred_sim_list) return spearman, pearson, missed
def sim_getCorrelation(We,words,f, weight4ind, scoring_function, params): f = open(f,'r') lines = f.readlines() golds = [] seq1 = [] seq2 = [] for i in lines: i = i.split("\t") p1 = i[0]; p2 = i[1]; score = float(i[2]) X1, X2 = data_io.getSeqs(p1,p2,words) seq1.append(X1) seq2.append(X2) golds.append(score) x1,m1 = data_io.prepare_data(seq1) x2,m2 = data_io.prepare_data(seq2) m1 = data_io.seq2weight(x1, m1, weight4ind) m2 = data_io.seq2weight(x2, m2, weight4ind) scores = scoring_function(We,x1,x2,m1,m2, params) preds = np.squeeze(scores) return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
def getCorrelation(model,words,f, params=[]): f = open(f,'r') lines = f.readlines() preds = [] golds = [] seq1 = [] seq2 = [] for i in lines: i = i.split("\t") p1 = i[0]; p2 = i[1]; score = float(i[2]) X1, X2 = data_io.getSeqs(p1,p2,words) seq1.append(X1) seq2.append(X2) golds.append(score) x1,m1 = data_io.prepare_data(seq1) x2,m2 = data_io.prepare_data(seq2) if params and params.weightfile: m1 = data_io.seq2weight(x1, m1, params.weight4ind) m2 = data_io.seq2weight(x2, m2, params.weight4ind) scores = model.scoring_function(x1,x2,m1,m2) preds = np.squeeze(scores) return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
def get_pearson_coeff(similar_stroke): stroke1 = similar_stroke[0] stroke2 = similar_stroke[1] min_len = min(len(stroke1), len(stroke2)) sx1 = [stroke1[i][0] for i in range(0, min_len)] sx2 = [stroke2[i][0] for i in range(0, min_len)] sy1 = [stroke1[i][1] for i in range(0, min_len)] sy2 = [stroke2[i][1] for i in range(0, min_len)] x_pearson = pearsonr(sx1, sy1)[0] y_pearson = pearsonr(sy1, sy2)[0] if x_pearson > 0.5 or y_pearson > 0.5: print similar_stroke[2], similar_stroke[3] print x_pearson, y_pearson plt.plot(sx1, label = "Stroke 1 X Co-ordinate") plt.plot(sx2, label = "Stroke 2 X Co-ordinate") plt.plot(sy1, label = "Stroke 1 Y Co-ordinate") plt.plot(sy2, label = "Stroke 2 Y Co-ordinate") plt.legend(ncol= 2, fancybox=True) plt.show()
def eval_sts(ycat, y, name, quiet=False): """ Evaluate given STS regression-classification predictions and print results. """ if ycat.ndim == 1: ypred = ycat else: ypred = loader.sts_categorical2labels(ycat) if y.ndim == 1: ygold = y else: ygold = loader.sts_categorical2labels(y) pr = pearsonr(ypred, ygold)[0] sr = spearmanr(ypred, ygold)[0] e = mse(ypred, ygold) if not quiet: print('%s Pearson: %f' % (name, pr,)) print('%s Spearman: %f' % (name, sr,)) print('%s MSE: %f' % (name, e,)) return STSRes(pr, sr, e)
def getCorrelation(model,words,f): f = open(f,'r') lines = f.readlines() preds = [] golds = [] seq1 = [] seq2 = [] for i in lines: i = i.split("\t") p1 = i[1]; p2 = i[2]; score = float(i[0]) if len(p1.split()[0].split('_')) == 2: X1, X2, SX1, SX2 = getSeqs2(p1,p2,words) else: X1, X2 = getSeqs(p1,p2,words) seq1.append(X1) seq2.append(X2) golds.append(score) x1,m1 = utils.prepare_data(seq1) x2,m2 = utils.prepare_data(seq2) scores = model.scoring_function(x1,x2,m1,m2) preds = np.squeeze(scores) return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
def getCorrelation2(model,words,f): f = open(f,'r') lines = f.readlines() preds = [] golds = [] seq1 = [] seq2 = [] sseq1 = [] sseq2 = [] for i in lines: i = i.split("\t") p1 = i[1]; p2 = i[2]; score = float(i[0]) X1, X2, SX1, SX2 = getSeqs2(p1,p2,words) seq1.append(X1) seq2.append(X2) sseq1.append(SX1) sseq2.append(SX2) golds.append(score) x1,m1,s1 = utils.prepare_data2(seq1,sseq1) x2,m2,s2 = utils.prepare_data2(seq2,sseq2) scores = model.scoring_function2(x1,x2,m1,m2,s1,s2) preds = np.squeeze(scores) return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
def test_compute_correlations_between_versions_default_columns(): df_old = pd.DataFrame({'spkitemid': ['a', 'b', 'c'], 'feature1': [1.3, 1.5, 2.1], 'feature2': [1.1, 6.2, 2.1], 'sc1': [2, 3, 4]}) df_new = pd.DataFrame({'spkitemid': ['a', 'b', 'c'], 'feature1': [-1.3, -1.5, -2.1], 'feature2': [1.1, 6.2, 2.1], 'sc1': [2, 3, 4]}) df_cors = compute_correlations_between_versions(df_old, df_new) assert_equal(df_cors.get_value('feature1', 'old_new'), -1.0) assert_equal(df_cors.get_value('feature2', 'old_new'), 1.0) assert_equal(df_cors.get_value('feature1', 'human_old'), pearsonr(df_old['feature1'], df_old['sc1'])[0]) assert_equal(df_cors.get_value('feature1', 'human_new'), pearsonr(df_new['feature1'], df_new['sc1'])[0]) assert_equal(df_cors.get_value('feature1', "N"), 3)
def test_compute_correlations_between_versions_custom_columns(): df_old = pd.DataFrame({'id': ['a', 'b', 'c'], 'feature1': [1.3, 1.5, 2.1], 'feature2': [1.1, 6.2, 2.1], 'r1': [2, 3, 4]}) df_new = pd.DataFrame({'id': ['a', 'b', 'c'], 'feature1': [-1.3, -1.5, -2.1], 'feature2': [1.1, 6.2, 2.1], 'r1': [2, 3, 4]}) df_cors = compute_correlations_between_versions(df_old, df_new, human_score='r1', id_column='id') assert_equal(df_cors.get_value('feature1', 'old_new'), -1.0) assert_equal(df_cors.get_value('feature2', 'old_new'), 1.0) assert_equal(df_cors.get_value('feature1', 'human_old'), pearsonr(df_old['feature1'], df_old['r1'])[0]) assert_equal(df_cors.get_value('feature1', 'human_new'), pearsonr(df_new['feature1'], df_new['r1'])[0]) assert_equal(df_cors.get_value('feature1', "N"), 3)
def plot_correlation(X, Y, title, corr=None): if corr == None: corr, _ = stats.pearsonr(X, Y) # extract 90-th percentile thresh = np.percentile(Y, 99) X90 = X[X > thresh] Y90 = Y[X > thresh] sample = np.random.choice(X90.shape[0], size=100, replace=False) Xsample = X90[sample] Ysample = Y90[sample] plt.scatter(Xsample, Ysample, color="red") plt.xlim([np.min(Xsample), np.max(Xsample)]) plt.ylim([np.min(Ysample), np.max(Ysample)]) plt.title("{:s} (corr: {:.3f})".format(title, corr)) plt.xlabel("X") plt.ylabel("Y")
def _corrfunc(x, y, **kws): """ Annotate grid with correaltion coefficient. Solution from http://stackoverflow.com/a/30942817 """ if args.c == 'spearman': r, _ = stats.spearmanr(x, y) corr_type = 'Rho' elif args.c == 'pearson': r, _ = stats.pearsonr(x, y) corr_type = 'r' else: raise Exception('Invalid correlation statistic.') correlations.append(r) ax = plotter.plt.gca() ax.annotate("{} = {:.2f}".format(corr_type, r), xy=(.1, .9), xycoords=ax.transAxes)
def train_model(lrmodel, X, Y, devX, devY, devscores): """ Train model, using pearsonr on dev for early stopping """ done = False best = -1.0 r = np.arange(1,6) while not done: # Every 100 epochs, check Pearson on development set lrmodel.fit(X, Y, verbose=2, shuffle=False, validation_data=(devX, devY)) yhat = np.dot(lrmodel.predict_proba(devX, verbose=2), r) score = pearsonr(yhat, devscores)[0] if score > best: print score best = score bestlrmodel = copy.deepcopy(lrmodel) else: done = True yhat = np.dot(bestlrmodel.predict_proba(devX, verbose=2), r) score = pearsonr(yhat, devscores)[0] print 'Dev Pearson: ' + str(score) return bestlrmodel
def test_partial_fit(): data = load_diabetes() clf = MLPRegressor(n_epochs=1) X, y = data['data'], data['target'] for _ in range(30): clf.partial_fit(X, y) y_pred = clf.predict(X) assert pearsonr(y_pred, y)[0] > 0.5
def joint_plot(x, y, xlabel=None, ylabel=None, xlim=None, ylim=None, loc="best", color='#0485d1', size=8, markersize=50, kind="kde", scatter_color="r"): with sns.axes_style("darkgrid"): if xlabel and ylabel: g = SubsampleJointGrid(xlabel, ylabel, data=DataFrame(data={xlabel: x, ylabel: y}), space=0.1, ratio=2, size=size, xlim=xlim, ylim=ylim) else: g = SubsampleJointGrid(x, y, size=size, space=0.1, ratio=2, xlim=xlim, ylim=ylim) g.plot_joint(sns.kdeplot, shade=True, cmap="Blues") g.plot_sub_joint(plt.scatter, 1000, s=20, c=scatter_color, alpha=0.3) g.plot_marginals(sns.distplot, kde=False, rug=False) g.annotate(ss.pearsonr, fontsize=25, template="{stat} = {val:.2g}\np = {p:.2g}") g.ax_joint.set_yticklabels(g.ax_joint.get_yticks()) g.ax_joint.set_xticklabels(g.ax_joint.get_xticks()) return g
def plotCorrelation(stats): #columnsToDrop = ['sleep_interval_max_len', 'sleep_interval_min_len', # 'sleep_interval_avg_len', 'sleep_inefficiency', # 'sleep_hours', 'total_hours'] #stats = stats.drop(columnsToDrop, axis=1) g = sns.PairGrid(stats) def corrfunc(x, y, **kws): r, p = scipystats.pearsonr(x, y) ax = plt.gca() ax.annotate("r = {:.2f}".format(r),xy=(.1, .9), xycoords=ax.transAxes) ax.annotate("p = {:.2f}".format(p),xy=(.2, .8), xycoords=ax.transAxes) if p>0.04: ax.patch.set_alpha(0.1) g.map_upper(plt.scatter) g.map_diag(plt.hist) g.map_lower(sns.kdeplot, cmap="Blues_d") g.map_upper(corrfunc) sns.plt.show()
def pearson_correlation_matrix(X): """ Computes the Pearson Correlation matrix Keyword arguments: X -- The feature vectors """ n_features=len(X[0]) correlation_matrix=np.zeros(shape=(n_features,n_features)) for i in xrange(n_features): for j in xrange(n_features): pearson_corr=stats.pearsonr(X[:,i],X[:,j])[0] correlation_matrix[i][j]=pearson_corr return correlation_matrix
def pearson_between_feature_class(X,y,threshold): """ Computes the Pearson Correlation between each feature and the target class and keeps the higlhy correlated features-class Keyword arguments: X -- The feature vectors y -- The target vector threshold -- Threshold value used to decide which features to keep (above the threshold) """ if verbose: print '\nPerforming Feature Selection based on the correlation between each feature and class ...' feature_indexes=[] for i in xrange(len(X[0])): if abs(stats.pearsonr(X[:,i],y)[0])>threshold: feature_indexes+=[i] if len(feature_indexes)!=0: return X[:,feature_indexes],feature_indexes #return selected features and original index features else: return X,feature_indexes
def getCorrelation(model,words,f): f = open(f,'r') lines = f.readlines() preds = [] golds = [] seq1 = [] seq2 = [] for i in lines: i = i.split("\t") p1 = i[0]; p2 = i[1]; score = float(i[2]) X1, X2 = getSeqs(p1,p2,words) seq1.append(X1) seq2.append(X2) golds.append(score) x1,m1 = utils.prepare_data(seq1) x2,m2 = utils.prepare_data(seq2) scores = model.scoring_function(x1,x2,m1,m2) preds = np.squeeze(scores) return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
def pearson(X, y): r = [] p = [] for c in X.columns: r_, p_ = pearsonr(X[c], y) r.append(r_) p.append(p_) dfr = pd.DataFrame(index=range(1, 1+len(X.columns))) dfr['pearson'] = r dfr['pearson_p'] = p return dfr
def _calculate(self, input): input = input[~np.isnan(input).any(axis=1)] return pearsonr(input[:,0], input[:,1])
def calcCorrelation(df,col1,col2): x,y,n = discardNans(df,col1,col2) return stats.pearsonr(x, y)
def run(self): self.nepoch = 0 bestpr = -1 early_stop_count = 0 r = np.arange(1, 6) stop_train = False # Preparing data trainX, trainy, devX, devy, testX, testy = self.prepare_data( self.train['X'], self.train['y'], self.valid['X'], self.valid['y'], self.test['X'], self.test['y']) # Training while not stop_train and self.nepoch <= self.maxepoch: self.trainepoch(trainX, trainy, nepoches=50) yhat = np.dot(self.predict_proba(devX), r) pr = pearsonr(yhat, self.devscores)[0] # early stop on Pearson if pr > bestpr: bestpr = pr bestmodel = copy.deepcopy(self.model) elif self.early_stop: if early_stop_count >= 3: stop_train = True early_stop_count += 1 self.model = bestmodel yhat = np.dot(self.predict_proba(testX), r) return bestpr, yhat
def report_metrics(yhat, y): # report metrics of training set r2 = r2_score(y, yhat) var_exp = explained_variance_score(y, yhat) r = stats.pearsonr(yhat, y)[0] logger.info('Model metrics for training set: r2={:.2f}, Variance explained={:.2f}, Pearson\'r={:.2f}'.format(r2, var_exp, r))
def return_correlations(instances, labels): feature_correlation = {} nplabels = numpy.array(labels) for i in range(instances.shape[1]): feature_vals = instances[:,i].toarray() corr,p = stats.pearsonr(feature_vals,nplabels) feature_correlation[i] = [corr,p] return feature_correlation[i]
def calculate_ordinal_correlation_feature_labels(instances,labels): # calculate correlation by feature feature_correlation = [] for i in range(instances.shape[1]): feature_vals = instances[:,i].transpose().toarray()[0] try: corr,p = stats.pearsonr(feature_vals,labels) if math.isnan(corr): corr = 0 except: corr = 0 feature_correlation.append([i,abs(corr),corr,p]) sorted_feature_correlation = sorted(feature_correlation,key=lambda k : k[1],reverse=True) return sorted_feature_correlation
def calculate_feature_correlation(instances): # calculate correlation by feature feature_correlation = [] for i in range(instances.shape[1]): feature_vals_i = instances[:,i].transpose().toarray()[0] for j in range(i+1,instances.shape[1]): feature_vals_j = instances[:,j].transpose().toarray()[0] try: corr,p = stats.pearsonr(feature_vals_i,feature_vals_j) if math.isnan(corr): corr = 0 except: corr = 0 feature_correlation.append([i,j,abs(corr),corr,p]) return feature_correlation
def get_scores(self): self.model.eval() num_classes = self.dataset_cls.NUM_CLASSES predict_classes = torch.arange(1, num_classes + 1).expand(self.batch_size, num_classes) test_kl_div_loss = 0 predictions = [] true_labels = [] for batch in self.data_loader: output = self.model(batch.sentence_1, batch.sentence_2, batch.ext_feats) test_kl_div_loss += F.kl_div(output, batch.label, size_average=False).data[0] # handle last batch which might have smaller size if len(predict_classes) != len(batch.sentence_1): predict_classes = torch.arange(1, num_classes + 1).expand(len(batch.sentence_1), num_classes) if self.data_loader.device != -1: with torch.cuda.device(self.device): predict_classes = predict_classes.cuda() true_labels.append((predict_classes * batch.label.data).sum(dim=1)) predictions.append((predict_classes * output.data.exp()).sum(dim=1)) del output predictions = torch.cat(predictions).cpu().numpy() true_labels = torch.cat(true_labels).cpu().numpy() test_kl_div_loss /= len(batch.dataset.examples) pearson_r = pearsonr(predictions, true_labels)[0] spearman_r = spearmanr(predictions, true_labels)[0] return [pearson_r, spearman_r, test_kl_div_loss], ['pearson_r', 'spearman_r', 'KL-divergence loss']
def get_scores(self): self.model.eval() num_classes = self.dataset_cls.NUM_CLASSES predict_classes = torch.arange(0, num_classes).expand(self.batch_size, num_classes) test_kl_div_loss = 0 predictions = [] true_labels = [] for batch in self.data_loader: output = self.model(batch.sentence_1, batch.sentence_2, batch.ext_feats) test_kl_div_loss += F.kl_div(output, batch.label, size_average=False).data[0] # handle last batch which might have smaller size if len(predict_classes) != len(batch.sentence_1): predict_classes = torch.arange(0, num_classes).expand(len(batch.sentence_1), num_classes) if self.data_loader.device != -1: with torch.cuda.device(self.device): predict_classes = predict_classes.cuda() true_labels.append((predict_classes * batch.label.data).sum(dim=1)) predictions.append((predict_classes * output.data.exp()).sum(dim=1)) del output predictions = torch.cat(predictions).cpu().numpy() true_labels = torch.cat(true_labels).cpu().numpy() test_kl_div_loss /= len(batch.dataset.examples) pearson_r = pearsonr(predictions, true_labels)[0] return [pearson_r, test_kl_div_loss], ['pearson_r', 'KL-divergence loss']
def pearson(y_true, y_pred): """ Calculate Pearson product-moment correlation coefficient between ``y_true`` and ``y_pred``. :param y_true: The true/actual/gold labels for the data. :type y_true: array-like of float :param y_pred: The predicted/observed labels for the data. :type y_pred: array-like of float :returns: Pearson product-moment correlation coefficient if well-defined, else 0 """ ret_score = pearsonr(y_true, y_pred)[0] return ret_score if not np.isnan(ret_score) else 0.0
def pearson_correlation(a,b,topics): from scipy.stats import pearsonr a = fill_list_from_dict(a,topics) b = fill_list_from_dict(b,topics) return pearsonr(a,b)[0]
def test_phase_randomize(): from brainiak.utils.utils import phase_randomize import numpy as np from scipy.fftpack import fft import math from scipy.stats import pearsonr # Generate auto-correlated signals nv = 2 T = 100 ns = 3 D = np.zeros((nv, T, ns)) for v in range(nv): for s in range(ns): D[v, :, s] = np.sin(np.linspace(0, math.pi * 5 * (v + 1), T)) + \ np.sin(np.linspace(0, math.pi * 6 * (s + 1), T)) freq = fft(D, axis=1) D_pr = phase_randomize(D) freq_pr = fft(D_pr, axis=1) p_corr = pearsonr(np.angle(freq).flatten(), np.angle(freq_pr).flatten())[0] assert np.isclose(abs(freq), abs(freq_pr)).all(), \ "Amplitude spectrum not preserved under phase randomization" assert abs(p_corr) < 0.03, \ "Phases still correlated after randomization"
def plot_pearson(name): """Plot the pearsin coeff of the neurons for each layer""" data_array = utils.get_data(name) ws = data_array['weights'] f = plt.figure(figsize=(12, 8)) axes = f.add_subplot(111) #The number of neurons in each layer - #TODO need to change it to be auto sizes =[10,7, 5, 4,3,2 ] #The mean of pearson coeffs of all the layers pearson_mean =[] #Go over all the layers for layer in range(len(sizes)): inner_pearson_mean =[] #Go over all the weights in the layer for k in range(len(ws)): ws_current = np.squeeze(ws[k][0][0][-1]) #Go over the neurons for neuron in range(len(ws_current[layer])): person_t = [] #Go over the rest of the neurons for neuron_second in range(neuron+1, len(ws_current[layer])): pearson_c, p_val =sis.pearsonr(ws_current[layer][neuron], ws_current[layer][neuron_second]) person_t.append(pearson_c) inner_pearson_mean.append(np.mean(person_t)) pearson_mean.append(np.mean(inner_pearson_mean)) #Plot the coeff axes.bar(np.arange(1,7), np.abs(np.array(pearson_mean))*np.sqrt(sizes), align='center') axes.set_xlabel('Layer') axes.set_ylabel('Abs(Pearson)*sqrt(N_i)') rects = axes.patches # Now make some labels labels = ["L%d (%d nuerons)" % (i, j) for i, j in zip(range(len(rects)), sizes)] plt.xticks(np.arange(1,7), labels)
def circ_corrcc(alpha, x): """Correlation coefficient between one circular and one linear random variable. Args: alpha: vector Sample of angles in radians x: vector Sample of linear random variable Returns: rho: float Correlation coefficient pval: float p-value Code taken from the Circular Statistics Toolbox for Matlab By Philipp Berens, 2009 Python adaptation by Etienne Combrisson """ if len(alpha) is not len(x): raise ValueError('The length of alpha and x must be the same') n = len(alpha) # Compute correlation coefficent for sin and cos independently rxs = pearsonr(x,np.sin(alpha))[0] rxc = pearsonr(x,np.cos(alpha))[0] rcs = pearsonr(np.sin(alpha),np.cos(alpha))[0] # Compute angular-linear correlation (equ. 27.47) rho = np.sqrt((rxc**2 + rxs**2 - 2*rxc*rxs*rcs)/(1-rcs**2)); # Compute pvalue pval = 1 - chi2.cdf(n*rho**2,2); return rho, pval
def compare_distances(A,B,random_samples=[],s=200,pvalues=False): if len(random_samples) == 0: random_samples = np.zeros(A.shape[1],dtype=np.bool) random_samples[:min(s,A.shape[1])] = True np.random.shuffle(random_samples) dist_x = distance.pdist(A[:,random_samples].T,'euclidean') dist_y = distance.pdist(B[:,random_samples].T,'euclidean') pear = pearsonr(dist_x,dist_y) spear = spearmanr(dist_x,dist_y) if pvalues: return pear,spear else: return pear[0],spear[0]
def sum_corr(view1,view2,flag=''): print("test correlation") corr = 0 for i,j in zip(view1,view2): corr += measures.pearsonr(i,j)[0] print('avg sum corr ::',flag,'::',corr/len(view1))
def cal_sim(model,ind1,ind2=1999): view1 = np.load("test_v1.npy")[0:ind1] view2 = np.load("test_v2.npy")[0:ind2] label1 = np.load('test_l.npy') x1 = project(model,[view1,np.zeros_like(view1)]) x2 = project(model,[np.zeros_like(view2),view2]) label2 = [] count = 0 MAP=0 for i,j in enumerate(x1): cor = [] AP=0 for y in x2: temp1 = j.tolist() temp2 = y.tolist() cor.append(pearsonr(temp1,temp2)) #if i == np.argmax(cor): # count+=1 #val=[(q,(i*ind1+p))for p,q in enumerate(cor)] val=[(q,p)for p,q in enumerate(cor)] val.sort() val.reverse() label2.append(val[0:4]) t = [w[1]for w in val[0:7]] #print t for x,y in enumerate(t): if y in range(i,i+5): AP+=1/(x+1) print(t) print(AP) MAP+=AP #print 'accuracy :- ',float(count)*100/ind1,'%' print('MAP is : ',MAP/ind1)
def pearson_scorer(estimator, X, y): logging.info('predicting ...') predicted = estimator.predict(y) return pearsonr(list(predicted), y)
def calc_correl(self, dev_pred, test_pred): dev_prs, _ = pearsonr(dev_pred, self.dev_y_org) test_prs, _ = pearsonr(test_pred, self.test_y_org) dev_spr, _ = spearmanr(dev_pred, self.dev_y_org) test_spr, _ = spearmanr(test_pred, self.test_y_org) dev_tau, _ = kendalltau(dev_pred, self.dev_y_org) test_tau, _ = kendalltau(test_pred, self.test_y_org) return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau
def check_similarity_match(X_embed, S): """ Since SimEcs are supposed to project the data into an embedding space where the target similarities can be linearly approximated, check if X_embed*X_embed^T = S (check mean squared error and Spearman correlation coefficient) Inputs: - X_embed: Nxd matrix with coordinates in the embedding space - S: NxN matrix with target similarities (do whatever transformations were done before using this as input to the SimEc, e.g. centering, etc.) Returns: - msq, rho, r: mean squared error, Spearman and Pearson correlation coefficent between linear kernel of embedding and target similarities (mean squared error is more exact, corrcoef a more relaxed error measure) """ # compute linear kernel as approximated similarities S_approx = X_embed.dot(X_embed.T) # to get results that are comparable across similarity measures, we have to normalize them somehow, # in this case by dividing by the absolute max value of the target similarity matrix n = np.max(np.abs(S)) S_norm = S/n S_approx /= n # compute mean squared error msqe = np.mean((S_norm - S_approx) ** 2) # compute Spearman correlation coefficient rho = spearmanr(S_norm.flatten(), S_approx.flatten())[0] # compute Pearson correlation coefficient r = pearsonr(S_norm.flatten(), S_approx.flatten())[0] return msqe, rho, r
def compute_score(self, conf, hy): conf['_r2'] = r2_score(self.test_y, hy) conf['_spearmanr'] = spearmanr(self.test_y, hy)[0] conf['_pearsonr'] = pearsonr(self.test_y, hy)[0] conf['_score'] = conf['_' + self.score] # print(conf)
def generate_two_correlated_time_series(size, rho): num_samples = size num_variables = 2 cov = [[1.0, rho], [rho, 1.0]] L = np.linalg.cholesky(cov) uncorrelated = np.random.standard_normal((num_variables, num_samples)) correlated = np.dot(L, uncorrelated) x, y = correlated rho, p_val = stats.pearsonr(x, y) return x, y, rho
def _plot_correlation_func(x, y): r, p = pearsonr(x, y) title = "Cor($X_1$, $X_2$) = %.3f" % r pylab.scatter(x, y) pylab.title(title) pylab.xlabel("$X_1$") pylab.ylabel("$X_2$") f1 = scipy.poly1d(scipy.polyfit(x, y, 1)) pylab.plot(x, f1(x), "r--", linewidth=2) # pylab.xticks([w*7*24 for w in [0,1,2,3,4]], ['week %i'%(w+1) for w in # [0,1,2,3,4]])
def SubCorr_statistic(self,data_x=None,data_y=None): if data_x is None: data_x=self.data_x if data_y is None: data_y=self.data_y dx = shape(data_x)[1] stats_value = zeros(dx) for dd in range(dx): stats_value[dd] = pearsonr(data_x[:,[dd]],data_y)[0]**2 SubCorr = sum(stats_value)/float(dx) return SubCorr
def test_corr(self): tm._skip_if_no_scipy() import scipy.stats as stats # full overlap self.assertAlmostEqual(self.ts.corr(self.ts), 1) # partial overlap self.assertAlmostEqual(self.ts[:15].corr(self.ts[5:]), 1) self.assertTrue(isnull(self.ts[:15].corr(self.ts[5:], min_periods=12))) ts1 = self.ts[:15].reindex(self.ts.index) ts2 = self.ts[5:].reindex(self.ts.index) self.assertTrue(isnull(ts1.corr(ts2, min_periods=12))) # No overlap self.assertTrue(np.isnan(self.ts[::2].corr(self.ts[1::2]))) # all NA cp = self.ts[:10].copy() cp[:] = np.nan self.assertTrue(isnull(cp.corr(cp))) A = tm.makeTimeSeries() B = tm.makeTimeSeries() result = A.corr(B) expected, _ = stats.pearsonr(A, B) self.assertAlmostEqual(result, expected)
def de_ps(X,y): dim = X.shape[1] de = min(2000,dim) clf = SelectKBest(lambda X, Y: np.array(map(lambda x:pearsonr(x, Y), X.T)).T, k=de) clf.fit(X,y) def _func(X1,X2): return clf.transform(X1),clf.transform(X2) return _func
def pearson(mat1, mat2): """Root mean square error between two matrices, ignoring zeroes""" assert mat1.shape == mat2.shape #convert to vectors vec1 = mat1.flatten() vec2 = mat2.flatten() #remove zeroes nonzero = [i for i in range(len(vec1)) if vec1[i] != 0 and vec2[i] != 0] vec1 = vec1[nonzero] vec2 = vec2[nonzero] r, p = st.pearsonr(vec1, vec2) return r
def RSA(m1,m2): '''RSA analysis will compare the similarity of two matrices ''' from scipy.stats import pearsonr import scipy.linalg import numpy # This will take the diagonal of each matrix (and the other half is changed to nan) and flatten to vector vectorm1 = m1.mask(numpy.triu(numpy.ones(m1.shape)).astype(numpy.bool)).values.flatten() vectorm2 = m2.mask(numpy.triu(numpy.ones(m2.shape)).astype(numpy.bool)).values.flatten() # Now remove the nans m1defined = numpy.argwhere(~numpy.isnan(numpy.array(vectorm1,dtype=float))) m2defined = numpy.argwhere(~numpy.isnan(numpy.array(vectorm2,dtype=float))) idx = numpy.intersect1d(m1defined,m2defined) return pearsonr(vectorm1[idx],vectorm2[idx])[0]
def forward(self, bottom, top): """Compute the SROCC and LCC and output them to top.""" #ipdb.set_trace() testPreds = bottom[0].data testPreds = np.reshape(testPreds,testPreds.shape[0]) testLabels = bottom[1].data testLabels = np.reshape(testLabels,testLabels.shape[0]) top[0].data[...] = stats.spearmanr(testPreds, testLabels)[0] top[1].data[...] = stats.pearsonr(testPreds, testLabels)[0]