我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用sklearn.metrics.cohen_kappa_score()。
def test_cohen_kappa(): # These label vectors reproduce the contingency matrix from Artstein and # Poesio (2008), Table 1: np.array([[20, 20], [10, 50]]). y1 = np.array([0] * 40 + [1] * 60) y2 = np.array([0] * 20 + [1] * 20 + [0] * 10 + [1] * 50) kappa = cohen_kappa_score(y1, y2) assert_almost_equal(kappa, .348, decimal=3) assert_equal(kappa, cohen_kappa_score(y2, y1)) # Add spurious labels and ignore them. y1 = np.append(y1, [2] * 4) y2 = np.append(y2, [2] * 4) assert_equal(cohen_kappa_score(y1, y2, labels=[0, 1]), kappa) assert_almost_equal(cohen_kappa_score(y1, y1), 1.) # Multiclass example: Artstein and Poesio, Table 4. y1 = np.array([0] * 46 + [1] * 44 + [2] * 10) y2 = np.array([0] * 52 + [1] * 32 + [2] * 16) assert_almost_equal(cohen_kappa_score(y1, y2), .8013, decimal=4)
def print_metrics_regression(y_true, predictions, verbose=1): predictions = np.array(predictions) predictions = np.maximum(predictions, 0).flatten() y_true = np.array(y_true) y_true_bins = [get_bin_custom(x, CustomBins.nbins) for x in y_true] prediction_bins = [get_bin_custom(x, CustomBins.nbins) for x in predictions] cf = metrics.confusion_matrix(y_true_bins, prediction_bins) if verbose: print "Custom bins confusion matrix:" print cf kappa = metrics.cohen_kappa_score(y_true_bins, prediction_bins, weights='linear') mad = metrics.mean_absolute_error(y_true, predictions) mse = metrics.mean_squared_error(y_true, predictions) mape = mean_absolute_percentage_error(y_true, predictions) if verbose: print "Mean absolute deviation (MAD) =", mad print "Mean squared error (MSE) =", mse print "Mean absolute percentage error (MAPE) =", mape print "Cohen kappa score =", kappa return {"mad": mad, "mse": mse, "mape": mape, "kappa": kappa}
def _kappa_helper(y_true, y_pred, weights=None): # weights can be None, 'linear', or 'quadratic' def flatten(y): if len(y.shape) > 1 and y.shape[1] > 1: y = np.argmax(y, axis=1) y = y.reshape(-1) return y y_true = flatten(y_true) y_pred = flatten(y_pred) return cohen_kappa_score(y_true, y_pred, weights=weights)
def results2df(results, dataset, n_ctrl, n_case, n_features): """ Converts the results dictionary into tidy dataframe format. Parameters ---------- results : dict results from cv_and_roc function dataset : str dataset ID n_ctrl, n_case, n_features : int number of controls, cases, and features Returns ------- resultsdf : pandas DataFrame dataframe with 'mean_fpr', 'mean_tpr', 'fisher_p', and 'roc_auc' columns from the results dict, 'kappa' from cohen_kappa_score(results['y_preds']), and 'dataset', 'H_smpls', 'dis_smpls', and 'num_features' from the input parameters """ # Directly calling pd.DataFrame.from_dict doesn't work because # this dictionary contains arrays, matrices, etc.. resultsdf = pd.DataFrame(data=np.array((results['mean_fpr'], results['mean_tpr'])).T, columns=['mean_fpr', 'mean_tpr']) resultsdf['roc_auc'] = results['roc_auc'] resultsdf['fisher_p'] = results['fisher_p'] resultsdf['dataset'] = dataset resultsdf['H_smpls'] = n_ctrl resultsdf['dis_smpls'] = n_case resultsdf['num_features'] = n_features resultsdf['kappa'] = cohen_kappa_score( results['y_true'], results['y_preds']) return resultsdf
def concordance(series1, series2, method, nreps=1000): """ Measures the concordance between two pandas Series and returns a pvalue and measure of concordance. Parameters ---------- series1, series2 : pandas Series Series with matching indexes. method : str ['fisher', 'spearman', 'kendalltau', 'empirical', 'cohen'] nreps : int number of repititions to build the null. Only needed if method is 'empirical' Returns ------- measure : float some sort of measure of concordance (e.g. r for the correlation methods, n_observed - mean(n_expected) for empirical, etc) p : float p value of observed concordance between series1 and series2 """ if method == 'fisher': # Note: this automatically ignores any bugs which were not present # in both series. mat = pd.crosstab(series1, series2) return fisher_exact(mat) elif method == 'spearman': return spearmanr(series1, series2) elif method == 'kendalltau': return kendalltau(series1, series2, nan_policy='omit') elif method == 'empirical': return empirical_pval(series1, series2, nreps) elif method == 'cohen': tmp = pd.concat((series1, series2), axis=1).dropna() return cohen_kappa_score(tmp.iloc[:, 0], tmp.iloc[:, 1]), np.nan else: raise ValueError('Unknown concordance method.')
def evaluate_ensemble(models_dir, test_images, out_dir, rf=False): # Get images and true classes img_arr, y_true = imgs_by_class_to_th_array(test_images, CLASS_LABELS) print img_arr.shape y_pred_all = [] # Load each model for i, model_dir in enumerate(get_subdirs(models_dir)): # Load model if rf: print "Loading CNN+RF #{}".format(i) model_config, rf_pkl = locate_config(model_dir) model = RetinaRF(model_config, rf_pkl=rf_pkl) else: print "Loading CNN #{}".format(i) config_file = glob(join(model_dir, '*.yaml'))[0] model = RetiNet(config_file).model # Predicted probabilities print "Making predictions..." ypred_out = join(out_dir, 'ypred_{}.npy'.format(i)) if not exists(ypred_out): y_preda = model.predict(img_arr) np.save(ypred_out, y_preda) else: y_preda = np.load(ypred_out) y_pred_all.append(y_preda) y_pred = np.argmax(y_preda, axis=1) kappa = cohen_kappa_score(y_true, y_pred, weights='quadratic') confusion(y_true, y_pred, CLASS_LABELS, join(out_dir, 'confusion_split{}_k={:.3f}.png'.format(i, kappa))) # Evaluate ensemble y_preda_ensemble = np.mean(np.dstack(y_pred_all), axis=2) y_pred_ensemble = np.argmax(y_preda_ensemble, axis=1) kappa = cohen_kappa_score(y_true, y_pred_ensemble) confusion(y_true, y_pred_ensemble, CLASS_LABELS, join(out_dir, 'confusion_ensemble_k={:.3f}.png'.format(kappa)))
def cohens_kappa(results, workers): """ Compute Cohen's Kappa on all workers that answered at least 5 HITs :param results: :return: """ answers_per_worker = { worker_id : { key : results[key][worker_id] for key in results.keys() if worker_id in results[key] } for worker_id in workers } answers_per_worker = { worker_id : answers for worker_id, answers in answers_per_worker.iteritems() if len(answers) >= 5 } curr_workers = answers_per_worker.keys() worker_pairs = [(worker1, worker2) for worker1 in curr_workers for worker2 in curr_workers if worker1 != worker2] label_index = { True : 1, False : 0 } pairwise_kappa = { worker_id : { } for worker_id in answers_per_worker.keys() } # Compute pairwise Kappa for (worker1, worker2) in worker_pairs: mutual_hits = set(answers_per_worker[worker1].keys()).intersection(set(answers_per_worker[worker2].keys())) mutual_hits = set([hit for hit in mutual_hits if not pandas.isnull(hit)]) if len(mutual_hits) >= 5: worker1_labels = np.array([label_index[answers_per_worker[worker1][key][0]] for key in mutual_hits]) worker2_labels = np.array([label_index[answers_per_worker[worker2][key][0]] for key in mutual_hits]) curr_kappa = cohen_kappa_score(worker1_labels, worker2_labels) if not math.isnan(curr_kappa): pairwise_kappa[worker1][worker2] = curr_kappa pairwise_kappa[worker2][worker1] = curr_kappa # Remove worker answers with low agreement to others workers_to_remove = set() for worker, kappas in pairwise_kappa.iteritems(): if np.mean(kappas.values()) < 0.1: print 'Removing %s' % worker workers_to_remove.add(worker) kappa = np.mean([k for worker1 in pairwise_kappa.keys() for worker2, k in pairwise_kappa[worker1].iteritems() if not worker1 in workers_to_remove and not worker2 in workers_to_remove]) # Return the average return kappa, workers_to_remove