Python sklearn.metrics 模块，cohen_kappa_score() 实例源码

我们从Python开源项目中，提取了以下8个代码示例，用于说明如何使用sklearn.metrics.cohen_kappa_score()。

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_cohen_kappa():
    # These label vectors reproduce the contingency matrix from Artstein and
    # Poesio (2008), Table 1: np.array([[20, 20], [10, 50]]).
    y1 = np.array([0] * 40 + [1] * 60)
    y2 = np.array([0] * 20 + [1] * 20 + [0] * 10 + [1] * 50)
    kappa = cohen_kappa_score(y1, y2)
    assert_almost_equal(kappa, .348, decimal=3)
    assert_equal(kappa, cohen_kappa_score(y2, y1))

    # Add spurious labels and ignore them.
    y1 = np.append(y1, [2] * 4)
    y2 = np.append(y2, [2] * 4)
    assert_equal(cohen_kappa_score(y1, y2, labels=[0, 1]), kappa)

    assert_almost_equal(cohen_kappa_score(y1, y1), 1.)

    # Multiclass example: Artstein and Poesio, Table 4.
    y1 = np.array([0] * 46 + [1] * 44 + [2] * 10)
    y2 = np.array([0] * 52 + [1] * 32 + [2] * 16)
    assert_almost_equal(cohen_kappa_score(y1, y2), .8013, decimal=4)

项目：mimic3-benchmarks 作者：YerevaNN | 项目源码 | 文件源码

def print_metrics_regression(y_true, predictions, verbose=1):
    predictions = np.array(predictions)
    predictions = np.maximum(predictions, 0).flatten()
    y_true = np.array(y_true)

    y_true_bins = [get_bin_custom(x, CustomBins.nbins) for x in y_true]
    prediction_bins = [get_bin_custom(x, CustomBins.nbins) for x in predictions]
    cf = metrics.confusion_matrix(y_true_bins, prediction_bins)
    if verbose:
        print "Custom bins confusion matrix:"
        print cf

    kappa = metrics.cohen_kappa_score(y_true_bins, prediction_bins,
                                      weights='linear')
    mad = metrics.mean_absolute_error(y_true, predictions)
    mse = metrics.mean_squared_error(y_true, predictions)
    mape = mean_absolute_percentage_error(y_true, predictions)

    if verbose:
        print "Mean absolute deviation (MAD) =", mad
        print "Mean squared error (MSE) =", mse
        print "Mean absolute percentage error (MAPE) =", mape
        print "Cohen kappa score =", kappa

    return {"mad": mad,
            "mse": mse,
            "mape": mape,
            "kappa": kappa}

项目：tefla 作者：litan | 项目源码 | 文件源码

def _kappa_helper(y_true, y_pred, weights=None):
    # weights can be None, 'linear', or 'quadratic'
    def flatten(y):
        if len(y.shape) > 1 and y.shape[1] > 1:
            y = np.argmax(y, axis=1)
            y = y.reshape(-1)
        return y

    y_true = flatten(y_true)
    y_pred = flatten(y_pred)
    return cohen_kappa_score(y_true, y_pred, weights=weights)

项目：microbiomeHD 作者：cduvallet | 项目源码 | 文件源码

def results2df(results, dataset, n_ctrl, n_case, n_features):
    """
    Converts the results dictionary into tidy dataframe format.

    Parameters
    ----------
    results : dict
        results from cv_and_roc function
    dataset : str
        dataset ID
    n_ctrl, n_case, n_features : int
        number of controls, cases, and features

    Returns
    -------
    resultsdf : pandas DataFrame
        dataframe with 'mean_fpr', 'mean_tpr', 'fisher_p', and 'roc_auc' columns
        from the results dict, 'kappa' from
        cohen_kappa_score(results['y_preds']), and 'dataset', 'H_smpls',
        'dis_smpls', and 'num_features' from the input parameters
    """
    # Directly calling pd.DataFrame.from_dict doesn't work because
    # this dictionary contains arrays, matrices, etc..
    resultsdf = pd.DataFrame(data=np.array((results['mean_fpr'],
                                            results['mean_tpr'])).T,
                             columns=['mean_fpr', 'mean_tpr'])
    resultsdf['roc_auc'] = results['roc_auc']
    resultsdf['fisher_p'] = results['fisher_p']

    resultsdf['dataset'] = dataset
    resultsdf['H_smpls'] = n_ctrl
    resultsdf['dis_smpls'] = n_case
    resultsdf['num_features'] = n_features

    resultsdf['kappa'] = cohen_kappa_score(
        results['y_true'], results['y_preds'])

    return resultsdf

项目：microbiomeHD 作者：cduvallet | 项目源码 | 文件源码

def concordance(series1, series2, method, nreps=1000):
    """
    Measures the concordance between two pandas Series and returns a pvalue
    and measure of concordance.

    Parameters
    ----------
    series1, series2 : pandas Series
        Series with matching indexes.
    method : str
        ['fisher', 'spearman', 'kendalltau', 'empirical', 'cohen']
    nreps : int
        number of repititions to build the null. Only needed if method is
        'empirical'

    Returns
    -------
    measure : float
        some sort of measure of concordance (e.g. r for the correlation
        methods, n_observed - mean(n_expected) for empirical, etc)
    p : float
        p value of observed concordance between series1 and series2
    """

    if method == 'fisher':
        # Note: this automatically ignores any bugs which were not present
        # in both series.
        mat = pd.crosstab(series1, series2)
        return fisher_exact(mat)

    elif method == 'spearman':
        return spearmanr(series1, series2)

    elif method == 'kendalltau':
        return kendalltau(series1, series2, nan_policy='omit')

    elif method == 'empirical':
        return empirical_pval(series1, series2, nreps)

    elif method == 'cohen':
        tmp = pd.concat((series1, series2), axis=1).dropna()
        return cohen_kappa_score(tmp.iloc[:, 0], tmp.iloc[:, 1]), np.nan

    else:
        raise ValueError('Unknown concordance method.')

项目：qtim_ROP 作者：QTIM-Lab | 项目源码 | 文件源码

def evaluate_ensemble(models_dir, test_images, out_dir, rf=False):

    # Get images and true classes
    img_arr, y_true = imgs_by_class_to_th_array(test_images, CLASS_LABELS)
    print img_arr.shape

    y_pred_all = []

    # Load each model
    for i, model_dir in enumerate(get_subdirs(models_dir)):

        # Load model
        if rf:
            print "Loading CNN+RF #{}".format(i)
            model_config, rf_pkl = locate_config(model_dir)
            model = RetinaRF(model_config, rf_pkl=rf_pkl)
        else:
            print "Loading CNN #{}".format(i)
            config_file = glob(join(model_dir, '*.yaml'))[0]
            model = RetiNet(config_file).model

        # Predicted probabilities
        print "Making predictions..."
        ypred_out = join(out_dir, 'ypred_{}.npy'.format(i))

        if not exists(ypred_out):
            y_preda = model.predict(img_arr)
            np.save(ypred_out, y_preda)
        else:
            y_preda = np.load(ypred_out)

        y_pred_all.append(y_preda)
        y_pred = np.argmax(y_preda, axis=1)

        kappa = cohen_kappa_score(y_true, y_pred, weights='quadratic')
        confusion(y_true, y_pred, CLASS_LABELS, join(out_dir, 'confusion_split{}_k={:.3f}.png'.format(i, kappa)))

    # Evaluate ensemble
    y_preda_ensemble = np.mean(np.dstack(y_pred_all), axis=2)
    y_pred_ensemble = np.argmax(y_preda_ensemble, axis=1)
    kappa = cohen_kappa_score(y_true, y_pred_ensemble)
    confusion(y_true, y_pred_ensemble, CLASS_LABELS, join(out_dir, 'confusion_ensemble_k={:.3f}.png'.format(kappa)))

项目：Chirps 作者：vered1986 | 项目源码 | 文件源码

def cohens_kappa(results, workers):
    """
    Compute Cohen's Kappa on all workers that answered at least 5 HITs
    :param results:
    :return:
    """
    answers_per_worker = { worker_id : { key : results[key][worker_id] for key in results.keys()
                                         if worker_id in results[key] }
                           for worker_id in workers }
    answers_per_worker = { worker_id : answers for worker_id, answers in answers_per_worker.iteritems()
                           if len(answers) >= 5 }
    curr_workers = answers_per_worker.keys()
    worker_pairs = [(worker1, worker2) for worker1 in curr_workers for worker2 in curr_workers if worker1 != worker2]

    label_index = { True : 1, False : 0 }
    pairwise_kappa = { worker_id : { } for worker_id in answers_per_worker.keys() }

    # Compute pairwise Kappa
    for (worker1, worker2) in worker_pairs:

        mutual_hits = set(answers_per_worker[worker1].keys()).intersection(set(answers_per_worker[worker2].keys()))
        mutual_hits = set([hit for hit in mutual_hits if not pandas.isnull(hit)])

        if len(mutual_hits) >= 5:

            worker1_labels = np.array([label_index[answers_per_worker[worker1][key][0]] for key in mutual_hits])
            worker2_labels = np.array([label_index[answers_per_worker[worker2][key][0]] for key in mutual_hits])
            curr_kappa = cohen_kappa_score(worker1_labels, worker2_labels)

            if not math.isnan(curr_kappa):
                pairwise_kappa[worker1][worker2] = curr_kappa
                pairwise_kappa[worker2][worker1] = curr_kappa

    # Remove worker answers with low agreement to others
    workers_to_remove = set()

    for worker, kappas in pairwise_kappa.iteritems():
        if np.mean(kappas.values()) < 0.1:
            print 'Removing %s' % worker
            workers_to_remove.add(worker)

    kappa = np.mean([k for worker1 in pairwise_kappa.keys() for worker2, k in pairwise_kappa[worker1].iteritems()
                     if not worker1 in workers_to_remove and not worker2 in workers_to_remove])

    # Return the average
    return kappa, workers_to_remove

项目：Chirps 作者：vered1986 | 项目源码 | 文件源码

def cohens_kappa(results, workers):
    """
    Compute Cohen's Kappa on all workers that answered at least 5 HITs
    :param results:
    :return:
    """
    answers_per_worker = { worker_id : { key : results[key][worker_id] for key in results.keys()
                                         if worker_id in results[key] }
                           for worker_id in workers }
    answers_per_worker = { worker_id : answers for worker_id, answers in answers_per_worker.iteritems()
                           if len(answers) >= 5 }
    curr_workers = answers_per_worker.keys()
    worker_pairs = [(worker1, worker2) for worker1 in curr_workers for worker2 in curr_workers if worker1 != worker2]

    label_index = { True : 1, False : 0 }
    pairwise_kappa = { worker_id : { } for worker_id in answers_per_worker.keys() }

    # Compute pairwise Kappa
    for (worker1, worker2) in worker_pairs:

        mutual_hits = set(answers_per_worker[worker1].keys()).intersection(set(answers_per_worker[worker2].keys()))
        mutual_hits = set([hit for hit in mutual_hits if not pandas.isnull(hit)])

        if len(mutual_hits) >= 5:

            worker1_labels = np.array([label_index[answers_per_worker[worker1][key][0]] for key in mutual_hits])
            worker2_labels = np.array([label_index[answers_per_worker[worker2][key][0]] for key in mutual_hits])
            curr_kappa = cohen_kappa_score(worker1_labels, worker2_labels)

            if not math.isnan(curr_kappa):
                pairwise_kappa[worker1][worker2] = curr_kappa
                pairwise_kappa[worker2][worker1] = curr_kappa

    # Remove worker answers with low agreement to others
    workers_to_remove = set()

    for worker, kappas in pairwise_kappa.iteritems():
        if np.mean(kappas.values()) < 0.1:
            print 'Removing %s' % worker
            workers_to_remove.add(worker)

    kappa = np.mean([k for worker1 in pairwise_kappa.keys() for worker2, k in pairwise_kappa[worker1].iteritems()
                     if not worker1 in workers_to_remove and not worker2 in workers_to_remove])

    # Return the average
    return kappa, workers_to_remove