Python sklearn.metrics 模块，roc_auc_score() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用sklearn.metrics.roc_auc_score()。

项目：melanoma-transfer 作者：learningtitans | 项目源码 | 文件源码

def calc_auc(y_pred_proba, labels, exp_run_folder, classifier, fold):

    auc = roc_auc_score(labels, y_pred_proba)
    fpr, tpr, thresholds = roc_curve(labels, y_pred_proba)
    curve_roc = np.array([fpr, tpr])
    dataile_id = open(exp_run_folder+'/data/roc_{}_{}.txt'.format(classifier, fold), 'w+')
    np.savetxt(dataile_id, curve_roc)
    dataile_id.close()
    plt.plot(fpr, tpr, label='ROC curve: AUC={0:0.2f}'.format(auc))
    plt.xlabel('1-Specificity')
    plt.ylabel('Sensitivity')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.grid(True)
    plt.title('ROC Fold {}'.format(fold))
    plt.legend(loc="lower left")
    plt.savefig(exp_run_folder+'/data/roc_{}_{}.pdf'.format(classifier, fold), format='pdf')
    return auc

项目：human-rl 作者：gsastry | 项目源码 | 文件源码

def classification_metrics(y, y_pred, threshold):
    metrics = {}
    metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
    metrics['np.std(y_pred)'] = np.std(y_pred)
    metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
    denom = np.count_nonzero(y == False)
    num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
    if denom > 0:
        metrics['fpr'] = float(num) / float(denom)
    if any(y) and not all(y):
        metrics['auc'] = roc_auc_score(y, y_pred)
        y_pred_bool = y_pred >= threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
    return metrics

项目：aed-by-cnn 作者：tweihaha | 项目源码 | 文件源码

def auc_score(res_list):
    gp_list = np.array([])
    anno_list = np.array([])
    for res in res_list:
        g_pred = res.g_pred
        anno = res.annotation
        if g_pred.shape[-1] < anno.shape[-1]:
            anno = np.delete(anno, range(g_pred.shape[-1], anno.shape[-1]), axis=-1)
        elif g_pred.shape[-1] > anno.shape[-1]:
            g_pred = np.delete(g_pred, range(anno.shape[-1], g_pred.shape[-1]), axis=-1)
        gp_list = g_pred.T if len(gp_list) == 0 else np.append(gp_list, g_pred.T, axis=0)
        anno_list = anno.T if len(anno_list) == 0 else np.append(anno_list, anno.T, axis=0)

    assert(gp_list.shape == anno_list.shape)

    from sklearn.metrics import roc_auc_score
    class_auc = roc_auc_score(anno_list, gp_list, average=None)
    print('AUC of Classes:')
    print(class_auc)
    all_micro_auc = roc_auc_score(anno_list, gp_list, average='micro')
    print('Total micro AUC: {}'.format(all_micro_auc))

    all_macro_auc = roc_auc_score(anno_list, gp_list, average='macro')
    print('Total macro AUC: {}'.format(all_macro_auc))

项目：human-rl 作者：gsastry | 项目源码 | 文件源码

def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics

项目：human-rl 作者：gsastry | 项目源码 | 文件源码

def classification_metrics(y, y_pred, threshold):
    metrics = {}
    metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
    metrics['np.std(y_pred)'] = np.std(y_pred)
    metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
    denom = np.count_nonzero(y == False)
    num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
    if denom > 0:
        metrics['fpr'] = float(num) / float(denom)
    if any(y) and not all(y):
        metrics['auc'] = roc_auc_score(y, y_pred)
        y_pred_bool = y_pred >= threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
    return metrics

项目：human-rl 作者：gsastry | 项目源码 | 文件源码

def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics

项目：human-rl 作者：gsastry | 项目源码 | 文件源码

def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics

项目：human-rl 作者：gsastry | 项目源码 | 文件源码

def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics

项目：human-rl 作者：gsastry | 项目源码 | 文件源码

def classification_metrics(y, y_pred, threshold):
    metrics = {}
    metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
    metrics['np.std(y_pred)'] = np.std(y_pred)
    metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
    denom = np.count_nonzero(y == False)
    num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
    if denom > 0:
        metrics['fpr'] = float(num) / float(denom)
    if any(y) and not all(y):
        metrics['auc'] = roc_auc_score(y, y_pred)
        y_pred_bool = y_pred >= threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
    return metrics

项目：human-rl 作者：gsastry | 项目源码 | 文件源码

def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics

项目：human-rl 作者：gsastry | 项目源码 | 文件源码

def classification_metrics(y, y_pred, threshold):
    metrics = {}
    metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
    metrics['np.std(y_pred)'] = np.std(y_pred)
    metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
    denom = np.count_nonzero(y == False)
    num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
    if denom > 0:
        metrics['fpr'] = float(num) / float(denom)
    if any(y) and not all(y):
        metrics['auc'] = roc_auc_score(y, y_pred)
        y_pred_bool = y_pred >= threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
    return metrics

项目：ProtScan 作者：gianlucacorrado | 项目源码 | 文件源码

def score(self, profiles, bin_sites):
        """Compute AUC ROC from predictions."""
        app_profiles = list()
        app_true_vals = list()
        for k, profile in profiles.iteritems():
            app_profiles.append(profile)
            true_vals = np.zeros(len(profile))
            bins = bin_sites.get(k, False)
            if bins is not False:
                for s, e, _ in bins:
                    true_vals[s:e] = 1
            app_true_vals.append(true_vals)
        vec_profiles = np.concatenate(app_profiles)
        vec_true_vals = np.concatenate(app_true_vals)
        roc_auc = roc_auc_score(vec_true_vals, vec_profiles)
        return roc_auc

项目：PersonalizedMultitaskLearning 作者：mitmedialab | 项目源码 | 文件源码

def getAccuracyAucOnAllTasks(self, task_list):
        all_task_Y = []
        all_preds = []
        for i in range(len(task_list)):
            preds, task_Y = self.getPredsTrueOnOneTask(task_list,i)
            if preds is None:
                # Skipping task because it does not have valid data
                continue
            if len(task_Y)>0:
                all_task_Y.extend(task_Y)
                all_preds.extend(preds)
        if not helper.containsEachLabelType(all_preds):
            print "for some bizarre reason, the preds for all tasks are the same class"
            print "preds", all_preds
            print "true_y", all_task_Y
            auc = np.nan
        else:
            auc=roc_auc_score(all_task_Y, all_preds)
        acc=hblr.getBinaryAccuracy(all_preds,all_task_Y)
        return acc,auc

项目：PersonalizedMultitaskLearning 作者：mitmedialab | 项目源码 | 文件源码

def getAccuracyAucOnOneTask(self, task_list, task, debug=False):
        X_t, y_t = self.extractTaskData(task_list,task)
        if len(X_t) == 0:
            return np.nan, np.nan

        preds = self.internal_predict(X_t, int(task))

        if debug:
            print "y_t:", y_t
            print "preds:", preds

        acc = helper.getBinaryAccuracy(preds,y_t)
        if len(y_t) > 1 and helper.containsEachSVMLabelType(y_t) and helper.containsEachSVMLabelType(preds):
            auc = roc_auc_score(y_t, preds)
        else:
            auc = np.nan

        return acc, auc

项目：deep-mil-for-whole-mammogram-classification 作者：wentaozhu | 项目源码 | 文件源码

def on_epoch_end(self, epoch, logs={}):
    if epoch % self.interval == 0:
      y_pred = self.model.predict(self.X_val, verbose=0)
      #print(np.sum(y_pred[:,1]))
      #y_true = np.argmax(self.y_val, axis=1)
      #y_pred = np.argmax(y_pred, axis=1)
      #print(y_true.shape, y_pred.shape)
      if self.mymil:
        score = roc_auc_score(self.y_val.max(axis=1), y_pred.max(axis=1))  
      else: score = roc_auc_score(self.y_val[:,1], y_pred[:,1])
      print("interval evaluation - epoch: {:d} - auc: {:.2f}".format(epoch, score))
      if score > self.auc:
        self.auc = score
        for f in os.listdir('./'):
          if f.startswith(self.filepath+'auc'):
            os.remove(f)
        self.model.save(self.filepath+'auc'+str(score)+'ep'+str(epoch)+'.hdf5')

项目：deep-mil-for-whole-mammogram-classification 作者：wentaozhu | 项目源码 | 文件源码

def perform(self, node, inputs, output_storage):
        """
        Calculate ROC AUC score.

        Parameters
        ----------
        node : Apply instance
            Symbolic inputs and outputs.
        inputs : list
            Sequence of inputs.
        output_storage : list
            List of mutable 1-element lists.
        """
        if roc_auc_score is None:
            raise RuntimeError("Could not import from sklearn.")
        y_true, y_score = inputs
        try:
            roc_auc = roc_auc_score(y_true, y_score)
        except ValueError:
            roc_auc = np.nan
        #rvalue = np.array((roc_auc, prec, reca, f1))
        #[0][0]
        output_storage[0][0] = theano._asarray(roc_auc, dtype=config.floatX)

项目：stacker 作者：bamine | 项目源码 | 文件源码

def setUp(self):
        os.putenv("KMP_DUPLICATE_LIB_OK", "TRUE")
        self.X_class, self.y_class = datasets.make_classification(random_state=42)
        self.X_reg, self.y_reg = datasets.make_regression(random_state=42)
        self.classification_optimizers = [XGBoostOptimizer, RandomForestOptimizer]
        self.regression_optimizers = [XGBoostOptimizer, RandomForestOptimizer]
        self.class_scorer = Scorer("auc_error", lambda y_pred, y_true: 1 - metrics.roc_auc_score(y_pred, y_true))
        self.reg_scorer = Scorer("mse", metrics.mean_squared_error)

        self.classification_task_split = \
            Task("class_split", self.X_class, self.y_class, "classification", test_size=0.1, random_state=42)
        self.regression_task_split = \
            Task("reg_split", self.X_class, self.y_class, "regression", test_size=0.1, random_state=42)

        self.classification_task_cv = \
            Task("class_cv", self.X_reg, self.y_reg, "classification", cv=5, random_state=42)
        self.regression_task_cv = \
            Task("reg_cv", self.X_reg, self.y_reg, "regression", cv=5, random_state=42)

项目：mitre 作者：gerberlab | 项目源码 | 文件源码

def classifier_accuracy_report(self, prediction_vector, threshold=0.5):
        """ Determine AUC and other metrics, write report.

        prediction_vector: vector of booleans (or outcome
        probabilities) of length n_subjects,
        e.g. self.point_predictions, self.ensemble_probabilities()...
        If this has dtype other than bool, prediction_vector > threshold
        is used for the confusion matrix.

        Returns: one string (multiple lines joined with \n, including
        trailing newline) containing a formatted report.

        """
        auc = roc_auc_score(self.model.data.y.astype(float), prediction_vector.astype(float))
        if not (prediction_vector.dtype == np.bool):
            prediction_vector = prediction_vector >= threshold
        conf = confusion_matrix(self.model.data.y, prediction_vector)

        lines = ['AUC: %.3f' % auc,
                 'Confusion matrix: \n\t%s' % str(conf).replace('\n','\n\t')]
        return '\n'.join(lines) + '\n'


    ######################################## 
    # BAYES-FACTOR-BASED METHODS

项目：score-zeroshot 作者：pedro-morgado | 项目源码 | 文件源码

def eval_semantics(scores, gt, args):
    from sklearn.metrics import roc_auc_score
    num_semantics = gt.shape[1]
    acc, auc = np.nan*np.zeros((num_semantics,)), np.nan*np.zeros((num_semantics,))
    if args.semantics == ATTRIBUTES:
        for s, (pred, lbl) in enumerate(zip(scores.T, gt.T)):
            acc[s] = (pred*(lbl-0.5) > 0).astype(float).mean()
            if sum(lbl == 0) > 0 and sum(lbl == 1) > 0:
                auc[s] = roc_auc_score(lbl, pred)

    else:
        for s, (pred, lbl) in enumerate(zip(scores, gt.T)):
            acc[s] = (pred.argmax(axis=1) == lbl).astype(float).mean()
            onehot = np.zeros(pred.shape)
            for i, l in enumerate(lbl):
                onehot[i, int(l)] = 1
            if (onehot.sum(axis=0) == 0).sum() == 0:
                auc[s] = roc_auc_score(onehot, pred)
    return acc, auc

项目：rtb-unbiased-learning 作者：wnzhang | 项目源码 | 文件源码

def test():
    y = []
    yp = []
    fi = open(sys.argv[1], 'r')
    for line in fi:
        data = ints(line.replace(":1", "").split())
        clk = data[1]
        mp = data[2]
        fsid = 3 # feature start id
        pred = 0.0
        for i in range(fsid, len(data)):
            feat = data[i]
            if feat in featWeight:
                pred += featWeight[feat]
        pred = sigmoid(pred)
        y.append(clk)
        yp.append(pred)
    fi.close()
    auc = roc_auc_score(y, yp)
    rmse = math.sqrt(mean_squared_error(y, yp))
    print str(round) + '\t' + str(auc) + '\t' + str(rmse)

项目：deep-action-proposals 作者：escorciav | 项目源码 | 文件源码

def report_metrics(y_dset, y_pred, batch_size, dset='Val'):
    # Print additional metrics involving predictions
    n_rows = (y_dset.shape[0] / batch_size) * batch_size
    y_true = y_dset[0:n_rows, :].flatten()
    y_pred = y_pred.flatten()

    val_ap = average_precision_score(y_true, y_pred)
    val_roc = roc_auc_score(y_true, y_pred)

    n = y_true.size
    n_pos = y_true.sum()
    idx_sorted = np.argsort(-y_pred)
    val_rec = []

    logging.info(dset + "-AP {:.6f}".format(val_ap))
    logging.info(dset + "-ROC {:.6f}".format(val_roc))
    for i, v in enumerate([10, 25, 50, 75, 100]):
        tp = y_true[idx_sorted[:int(v * n / 100)]].sum()
        val_rec.append(tp * 1.0 / n_pos)
        logging.info(dset + "-R{} {:.6f}".format(v, val_rec[i]))
    return val_ap, val_rec[2]


# ############################## Main program #################################

项目：postlearn 作者：TomAugspurger | 项目源码 | 文件源码

def plot_roc_curve(y_true, y_score, ax=None):
    '''
    Plot the Receiving Operator Characteristic curved, including the
    Area under the Curve (AUC) score.

    Parameters
    ----------
    y_true : array
    y_score : array
    ax : matplotlib.axes, defaults to new axes

    Returns
    -------
    ax : matplotlib.axes
    '''
    ax = ax or plt.axes()
    auc = metrics.roc_auc_score(y_true, y_score)
    fpr, tpr, _ = metrics.roc_curve(y_true, y_score)
    ax.plot(fpr, tpr)
    ax.annotate('AUC: {:.2f}'.format(auc), (.8, .2))
    ax.plot([0, 1], [0, 1], linestyle='--', color='k')
    return ax

项目：CCIT 作者：rajatsen91 | 项目源码 | 文件源码

def cross_validate(classifier, n_folds = 5):
    '''Custom cross-validation module I always use '''
    train_X = classifier['train_X']
    train_y = classifier['train_y']
    model = classifier['model']
    score = 0.0

    skf = KFold(n_splits = n_folds)
    for train_index, test_index in skf.split(train_X):
        X_train, X_test = train_X[train_index], train_X[test_index]
        y_train, y_test = train_y[train_index], train_y[test_index]
        clf = model.fit(X_train,y_train)
        pred = clf.predict_proba(X_test)[:,1]
        #print 'cross', roc_auc_score(y_test,pred)
        score = score + roc_auc_score(y_test,pred)

    return score/n_folds

项目：PEP 作者：ma-compbio | 项目源码 | 文件源码

def analyzeResult_temp(data,model,DataVecs):
    predict = model.predict(DataVecs)
    data['predict'] = predict
    print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
    answer1 = data[data["label"] == 1]
    answer2 = data[data["label"] == 0]
    print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
    print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
    try:
        result_auc = model.predict_proba(DataVecs)
        print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
            average_precision_score(data["label"],result_auc[:,1])))
        print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
            recall_score(data["label"],data["predict"]),
            f1_score(data["label"],data["predict"]),
            matthews_corrcoef(data["label"],data["predict"])))
    except:
        print "ROC unavailable"

# Performance evaluation and result analysis uing adjusted thresholds

项目：PEP 作者：ma-compbio | 项目源码 | 文件源码

def analyzeResult(data,model,DataVecs,threshold):
    predict = model.predict_proba(DataVecs)[:,1]
    True,False=1,0
    data['predict'] = (predict > threshold)
    print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
    answer1 = data[data["label"] == 1]
    answer2 = data[data["label"] == 0]
    print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
    print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
    try:
        result_auc = model.predict_proba(DataVecs)
        print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
            average_precision_score(data["label"],result_auc[:,1])))
        print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
            recall_score(data["label"],data["predict"]),
            f1_score(data["label"],data["predict"]),
            matthews_corrcoef(data["label"],data["predict"])))
    except:
        print "ROC unavailable"

# Performance evaluation

项目：pumil 作者：levelfour | 项目源码 | 文件源码

def print_evaluation_result(clf, bags_test, args):
  pred_score = np.array([clf(B.data()) for B in bags_test])
  pred_label = np.array([1 if score >= 0 else -1 for score in pred_score])
  true_label = np.array([B.y for B in bags_test])
  a = accuracy (pred_label, true_label)  # accuracy
  p = precision(pred_label, true_label)  # precision
  r = recall   (pred_label, true_label)  # recall
  f = f_score  (pred_label, true_label)  # F-score
  auc = metrics.roc_auc_score((true_label+1)/2, pred_score)

  if not args.aucplot:
    sys.stdout.write("""# accuracy,precision,recall,f-score,ROC-AUC
{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n""".format(a, p, r, f, auc))
    sys.stdout.flush()

  else:
    sys.stdout.write("""# accuracy,precision,recall,f-score,ROC-AUC
# {:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n""".format(a, p, r, f, auc))
    sys.stdout.flush()
    np.savetxt(sys.stdout.buffer, np.c_[pred_score, true_label])

项目：KAGGLE_AVITO_2016 作者：ZFTurbo | 项目源码 | 文件源码

def run_train_with_model(train, features, model_path):
    start_time = time.time()

    gbm = xgb.Booster()
    gbm.load_model(model_path)

    print("Validating...")
    check = gbm.predict(xgb.DMatrix(train[features]))
    score = roc_auc_score(train['isDuplicate'].values, check)
    validation_df = pd.DataFrame({'itemID_1': train['itemID_1'].values, 'itemID_2': train['itemID_2'].values,
                                  'isDuplicate': train['isDuplicate'].values, 'probability': check})
    print('AUC score value: {:.6f}'.format(score))

    imp = get_importance(gbm, features)
    print('Importance array: ', imp)

    print('Prediction time: {} minutes'.format(round((time.time() - start_time)/60, 2)))
    return validation_df, score

项目：MLAlgorithms 作者：rushter | 项目源码 | 文件源码

def classification():
    # Generate a random binary classification problem.
    X, y = make_classification(n_samples=350, n_features=15, n_informative=10,
                               random_state=1111, n_classes=2,
                               class_sep=1., n_redundant=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15,
                                                        random_state=1111)

    model = GradientBoostingClassifier(n_estimators=50, max_depth=4,
                                       max_features=8, learning_rate=0.1)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print(predictions)
    print(predictions.min())
    print(predictions.max())
    print('classification, roc auc score: %s'
          % roc_auc_score(y_test, predictions))

项目：MLAlgorithms 作者：rushter | 项目源码 | 文件源码

def test_mlp():
    y_train_onehot = one_hot(y_train)
    y_test_onehot = one_hot(y_test)

    model = NeuralNet(
        layers=[
            Dense(256, Parameters(init='uniform', regularizers={'W': L2(0.05)})),
            Activation('relu'),
            Dropout(0.5),
            Dense(128, Parameters(init='normal', constraints={'W': MaxNorm()})),
            Activation('relu'),
            Dense(2),
            Activation('softmax'),
        ],
        loss='categorical_crossentropy',
        optimizer=Adadelta(),
        metric='accuracy',
        batch_size=64,
        max_epochs=25,

    )
    model.fit(X_train, y_train_onehot)
    predictions = model.predict(X_test)
    assert roc_auc_score(y_test_onehot[:, 0], predictions[:, 0]) >= 0.95

项目：kaggle_art 作者：small-yellow-duck | 项目源码 | 文件源码

def train_classifier(x_train, y_train, x_cv, y_cv):
    clf = RandomForestClassifier(n_estimators=100)

    print 'starting fit'
    # excluding the patient_id column from the fit and prediction (patient_id?)
    clf.fit(x_train[::5], y_train[::5])
    print 'starting pred'

    y_pred = np.zeros(x_cv.shape[0])
    for i in xrange(4):
        y_pred[i::4] = clf.predict_proba(x_cv[i::4])[:, 1]

    if y_cv is not None:
        print roc_auc_score(y_cv, y_pred)

    return y_pred, clf

项目：drugADR 作者：cosylabiiit | 项目源码 | 文件源码

def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
    clf.fit(X_t_train, y_train)
    app = dict()
    score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
    avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
    prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
    rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
    avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
    metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
    #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    app['F2 Score'] = avg_sample_score
    app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
    app['P_AUPR'] = avg_prec
    app['Precision'] = prec_score
    app['Recall'] = rec_score
    return app

项目：EEG-Grasp-Kaggle 作者：esube | 项目源码 | 文件源码

def score(net, samples=4096):
    """Compute the area under the curve, ROC score from a trained net

    We take `samples` random samples and compute the ROC AUC
    score on those samples. 
    """
    source = net.batch_iterator_test.source
    test_indices = make_valid_indices(source, samples)
    predicted = net.predict_proba(test_indices)
    if predicted.shape[-1] != N_EVENTS:
        predicted = decode(predicted)
    actual = source.events[test_indices]
    try:
        return roc_auc_score(actual.reshape(-1), predicted.reshape(-1))
    except:
        return 0

项目：Uber-DS-Challenge 作者：bjherger | 项目源码 | 文件源码

def run_statsmodels_models(train, test, model_description):
    """
    Run logistic regression model to predict whether a signed up driver ever actually drove.
    :param input_df: Data frame prepared for statsmodels regression
    :type input_df: pd.DataFrame
    :return: AUC for model generated
    :rtype: float
    """
    # Run model on all observations
    # Use dmatrices to format data
    logging.info('Running model w/ description: %s' %model_description)
    logging.debug('Train df: \n%s' % train.describe())
    logging.debug('Test df: \n%s' % test.describe())
    y_train, X_train = dmatrices(model_description, data=train, return_type='dataframe', NA_action='drop')
    y_test, X_test = dmatrices(model_description, data=test, return_type='dataframe', NA_action='drop')

    # Create, fit model
    mod = sm.Logit(endog=y_train, exog=X_train)
    res = mod.fit(method='bfgs', maxiter=100)

    # Output model summary
    print train['city_name'].value_counts()
    print train['signup_channel'].value_counts()
    print res.summary()

    # Create, output AUC
    predicted = res.predict(X_test)
    auc = roc_auc_score(y_true=y_test, y_score=predicted)
    print 'AUC for 20%% holdout: %s' %auc

    # Return AUC for model generated
    return auc



# Main section

项目：jdata 作者：learn2Pro | 项目源码 | 文件源码

def modelfit(alg, dtrain, predictors, useTrainCV=True, cv_folds=5, early_stopping_rounds=50):
    if useTrainCV:
        xgb_param = alg.get_xgb_params()
        xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain['label'].values)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
                          metrics='auc', early_stopping_rounds=early_stopping_rounds, show_progress=False)
        alg.set_params(n_estimators=cvresult.shape[0])
    # Fit the algorithm on the data
    alg.fit(dtrain[predictors], dtrain['label'], eval_metric='auc')

    # Predict training set:
    dtrain_predictions = alg.predict(dtrain[predictors])
    dtrain_predprob = alg.predict_proba(dtrain[predictors])[:, 1]

    # Print model report:
    print "\nModel Report"
    print "Accuracy : %.4g" % metrics.accuracy_score(dtrain['Disbursed'].values, dtrain_predictions)
    print "AUC Score (Train): %f" % metrics.roc_auc_score(dtrain['Disbursed'], dtrain_predprob)

    feat_imp = pd.Series(alg.booster().get_fscore()).sort_values(ascending=False)
    feat_imp.plot(kind='bar', title='Feature Importances')
    plt.ylabel('Feature Importance Score')

项目：retain 作者：mp2893 | 项目源码 | 文件源码

def calculate_auc(test_model, dataset, options):
    batchSize = options['batchSize']
    useTime = options['useTime']

    n_batches = int(np.ceil(float(len(dataset[0])) / float(batchSize)))
    scoreVec = []
    for index in xrange(n_batches):
        batchX = dataset[0][index*batchSize:(index+1)*batchSize]
        if useTime:
            batchT = dataset[2][index*batchSize:(index+1)*batchSize]
            x, t, lengths = padMatrixWithTime(batchX, batchT, options)
            scores = test_model(x, t, lengths)
        else:
            x, lengths = padMatrixWithoutTime(batchX, options)
            scores = test_model(x, lengths)
        scoreVec.extend(list(scores))
    labels = dataset[1]
    auc = roc_auc_score(list(labels), list(scoreVec))
    return auc

项目：muffnn 作者：civisanalytics | 项目源码 | 文件源码

def test_cross_val_predict():
    # Make sure it works in cross_val_predict for multiclass.

    X, y = load_iris(return_X_y=True)
    y = LabelBinarizer().fit_transform(y)
    X = StandardScaler().fit_transform(X)

    mlp = MLPClassifier(n_epochs=10,
                        solver_kwargs={'learning_rate': 0.05},
                        random_state=4567).fit(X, y)

    cv = KFold(n_splits=4, random_state=457, shuffle=True)
    y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba')
    auc = roc_auc_score(y, y_oos, average=None)

    assert np.all(auc >= 0.96)

项目：snape 作者：mbernico | 项目源码 | 文件源码

def score_binary_classification(y, y_hat, report=True):
    """
    Create binary classification output
    :param y: true value
    :param y_hat: class 1 probabilities
    :param report:
    :return:
    """
    y_hat_class = [1 if x >= 0.5 else 0 for x in y_hat]  # convert probability to class for classification report

    report_string = "---Binary Classification Score--- \n"
    report_string += classification_report(y, y_hat_class)
    score = roc_auc_score(y, y_hat)
    report_string += "\nAUC = " + str(score)

    if report:
        print(report_string)

    return score, report_string

项目：dm-challenge 作者：ping133 | 项目源码 | 文件源码

def find_optimal_C_for_AUC(xTrain, yTrain, xTest, yTest):
    C_2d_range = [10.0 ** i for i in range(-3, 3)]

    accuracy = np.array([])
    auc_score = np.array([])

    for Ctry in C_2d_range:
        clf = SVC(C=Ctry, kernel="linear", probability=True)
        clf.fit(xTrain, yTrain)
        pred = clf.predict(xTest)
        pred_proba = clf.predict_proba(xTest)
        accuracy = np.append(accuracy, np.average(yTest == pred))
        auc_score = np.append(auc_score,
                              roc_auc_score(yTest, pred_proba[:, 1]))
        print "C: {}" .format(Ctry)
        print "accuracy: {}" .format(accuracy[-1])
        print "AUC: {}" .format(auc_score[-1])

    # Extract the optimal parameters to train the final model
    best_auc_idx = np.where(auc_score == max(auc_score))[0]
    best_acc_idx = np.where(accuracy == max(accuracy[best_auc_idx]))[0]
    best_C = C_2d_range[best_acc_idx[0]]

    return best_C

项目：stacking 作者：ikki407 | 项目源码 | 文件源码

def eval_pred( y_true, y_pred, eval_type):
    if eval_type == 'logloss':#eval_type??????
        loss = ll( y_true, y_pred )
        print "logloss: ", loss
        return loss            

    elif eval_type == 'auc':
        loss = AUC( y_true, y_pred )
        print "AUC: ", loss
        return loss             

    elif eval_type == 'rmse':
        loss = np.sqrt(mean_squared_error(y_true, y_pred))
        print "rmse: ", loss
        return loss




######### BaseModel Class #########

项目：nf1_inactivation 作者：greenelab | 项目源码 | 文件源码

def train_test_error(clf, train_x, train_y, test_x, test_y):
    """
    Return training and testing errors according to input classifier

    Arguments:
    :param clf: classifier sklearn object
    :param train_x: gene expression matrix
    :param train_y: list of labels
    :param test_x: gene expression matrix
    :param test_y: list of labels

    Output:
    Returns training and testing auroc
    """
    model = clf.fit(train_x, train_y)
    pred_y = model.predict(train_x)
    train_err = roc_auc_score(train_y, pred_y, average='weighted')
    pred_y = model.predict(test_x)
    test_err = roc_auc_score(test_y, pred_y, average='weighted')
    return train_err, test_err

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_iforest_performance():
    """Test Isolation Forest performs well"""

    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)
    X_train = np.r_[X + 2, X - 2]
    X_train = X[:100]

    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X_test = np.r_[X[100:], X_outliers]
    y_test = np.array([0] * 20 + [1] * 20)

    # fit the model
    clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train)

    # predict scores (the lower, the more normal)
    y_pred = clf.predict(X_test)

    # check that there is at most 6 errors (false positive or false negative)
    assert_greater(roc_auc_score(y_test, y_pred), 0.98)

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_score_scale_invariance():
    # Test that average_precision_score and roc_auc_score are invariant by
    # the scaling or shifting of probabilities
    y_true, _, probas_pred = make_prediction(binary=True)

    roc_auc = roc_auc_score(y_true, probas_pred)
    roc_auc_scaled = roc_auc_score(y_true, 100 * probas_pred)
    roc_auc_shifted = roc_auc_score(y_true, probas_pred - 10)
    assert_equal(roc_auc, roc_auc_scaled)
    assert_equal(roc_auc, roc_auc_shifted)

    pr_auc = average_precision_score(y_true, probas_pred)
    pr_auc_scaled = average_precision_score(y_true, 100 * probas_pred)
    pr_auc_shifted = average_precision_score(y_true, probas_pred - 10)
    assert_equal(pr_auc, pr_auc_scaled)
    assert_equal(pr_auc, pr_auc_shifted)

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_predict_proba_binary():
    # Test that predict_proba works as expected for binary class."""
    X = X_digits_binary[:50]
    y = y_digits_binary[:50]

    clf = MLPClassifier(hidden_layer_sizes=5)
    clf.fit(X, y)
    y_proba = clf.predict_proba(X)
    y_log_proba = clf.predict_log_proba(X)

    (n_samples, n_classes) = y.shape[0], 2

    proba_max = y_proba.argmax(axis=1)
    proba_log_max = y_log_proba.argmax(axis=1)

    assert_equal(y_proba.shape, (n_samples, n_classes))
    assert_array_equal(proba_max, proba_log_max)
    assert_array_equal(y_log_proba, np.log(y_proba))

    assert_equal(roc_auc_score(y, y_proba[:, 1]), 1.0)

项目：crayimage 作者：yandexdataschool | 项目源码 | 文件源码

def evaluate(self):
    t, result = timed(
      lambda: self.em.predict(self.hists).reshape(-1),
      repeat=1
    )

    auc = roc_auc_score(self.cats.reshape(-1), result)
    acc = accuracy_score(self.cats, result > 0.5)

    for i in xrange(result.shape[0]):
      print('%d: %.2e' % (self.cats.reshape(-1)[i], result[i]))

    print('Time %.2f millisec' % (t * 1000.0))
    print('AUC: %.3f' % auc)

    return acc, auc

项目：crayimage 作者：yandexdataschool | 项目源码 | 文件源码

def evaluate(self):
    for scores in self.em.fit(self.X, self.cats, iterations=100, learning_rate=1.0):
      print np.mean(scores)

    for p in self.em.kernel.params:
      print p, p.get_value()
    auc = roc_auc_score(self.cats.reshape(-1), result)
    acc = accuracy_score(self.cats, result > 0.5)

    for i in xrange(result.shape[0]):
      print('%d: %.2e' % (self.cats.reshape(-1)[i], result[i]))

    print('Time %.2f millisec' % (t * 1000.0))
    print('AUC: %.3f' % auc)

    return acc, auc

项目：crayimage 作者：yandexdataschool | 项目源码 | 文件源码

def b(self, digital_levels = 250):
    minX, maxX = np.min(self.X), np.max(self.X)

    digital = np.floor((self.X - minX) / (maxX - minX) * (digital_levels - 1)).astype('uint16')
    assert np.max(digital) < digital_levels
    counts = np.ndarray(shape=(self.X.shape[0], digital_levels), dtype='uint8')
    ndcount(digital.T, counts)
    print 'counts done'

    result = one_class_em(counts)

    auc = roc_auc_score(self.cats, result[:, 1])

    predictions = np.argmax(result, axis=1)
    acc = accuracy(predictions, self.cats)

    return acc, auc

项目：Sacred_Deep_Learning 作者：AAbercrombie0492 | 项目源码 | 文件源码

def on_epoch_end(self, epoch, logs={}):
        import numpy as np
        from sklearn.metrics import recall_score, precision_score, roc_auc_score, f1_score
        y_pred = self.model.predict(self.X_val)
        y_pred = np.argmax(y_pred, axis=1)

        recall = recall_score(self.y_val, y_pred, average=None).mean()
        self.recall.append(recall)
        logs['recall'] = recall

        precision = precision_score(self.y_val, y_pred, average=None).mean()
        self.precision.append(precision)
        logs['precision'] = precision

        auc = roc_auc_score(self.y_val, y_pred, average=None).mean()
        self.auc.append(auc)
        logs['auc'] = auc

        f1 = f1_score(self.y_val, y_pred, average=None).mean()
        self.f1.append(f1)
        logs['f1'] = f1

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def auc_metric(solution, prediction, task='binary.classification'):
    ''' Normarlized Area under ROC curve (AUC).
    Return Gini index = 2*AUC-1 for  binary classification problems.
    Should work for a vector of binary 0/1 (or -1/1)"solution" and any discriminant values
    for the predictions. If solution and prediction are not vectors, the AUC
    of the columns of the matrices are computed and averaged (with no weight).
    The same for all classification problems (in fact it treats well only the
    binary and multilabel classification problems).'''
    #auc = metrics.roc_auc_score(solution, prediction, average=None)
    # There is a bug in metrics.roc_auc_score: auc([1,0,0],[1e-10,0,0]) incorrect
    label_num=solution.shape[1]
    auc=np.empty(label_num)
    for k in range(label_num):
        r_ = tiedrank(prediction[:,k])
        s_ = solution[:,k]
        if sum(s_)==0: print('WARNING: no positive class example in class {}'.format(k+1))
        npos = sum(s_==1)
        nneg = sum(s_<1)
        auc[k] = (sum(r_[s_==1]) - npos*(npos+1)/2) / (nneg*npos)
    return 2*mvmean(auc)-1


### END CLASSIFICATION METRICS 

# ======= Specialized scores ========
# We run all of them for all tasks even though they don't make sense for some tasks

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def auc_score_(solution, prediction):
    auc = metrics.roc_auc_score(solution, prediction, average=None)
    return mvmean(auc)

### SOME I/O functions

项目：hyperband 作者：zygmuntz | 项目源码 | 文件源码

def train_and_eval_sklearn_classifier( clf, data ):

    x_train = data['x_train']
    y_train = data['y_train']

    x_test = data['x_test']
    y_test = data['y_test'] 

    clf.fit( x_train, y_train ) 

    try:
        p = clf.predict_proba( x_train )[:,1]   # sklearn convention
    except IndexError:
        p = clf.predict_proba( x_train )

    ll = log_loss( y_train, p )
    auc = AUC( y_train, p )
    acc = accuracy( y_train, np.round( p ))

    print "\n# training | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc )

    #

    try:
        p = clf.predict_proba( x_test )[:,1]    # sklearn convention
    except IndexError:
        p = clf.predict_proba( x_test )

    ll = log_loss( y_test, p )
    auc = AUC( y_test, p )
    acc = accuracy( y_test, np.round( p ))

    print "# testing  | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc ) 

    #return { 'loss': 1 - auc, 'log_loss': ll, 'auc': auc }
    return { 'loss': ll, 'log_loss': ll, 'auc': auc }

###

# "clf", even though it's a regressor