我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用sklearn.model_selection.LeaveOneOut()。
def get_cv_method(method, **kwargs): if method == 'kfold': return KFold(**kwargs) elif method == 'skfold': return StratifiedKFold(**kwargs) elif method == 'loo': return LeaveOneOut() elif method == 'shuffle_split': return ShuffleSplit(**kwargs) elif method == 'split': return TrainTestSplit(**kwargs) elif method == 's_shuffle_split': return StratifiedShuffleSplit(**kwargs) elif method == 'time_series': return TimeSeriesSplit(**kwargs) else: raise AttributeError('Invalid CV method - %s!' % method)
def _leave_one_out(algr, X, y): loo = LeaveOneOut() square_error_sum = 0.0 for train_index, test_index in loo.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] model = algr.fit(X_train, y_train.ravel()) predicted_y = model.predict(X_test) square_error_sum += float(y_test[0] - predicted_y) ** 2 mse = square_error_sum / X.shape[0] print '-----------------------' print 'Leave One Out?mse ' , mse print '-----------------------'
def cv_LinearRegression_Bias( xM, yV): """ N_it times iteration is performed for cross_validation in order to make further average effect. The flag of 'disp' is truned off so each iteration will not shown. """ #print( "cv_LinearRegression_None", xM.shape, yV.shape) X, y = np.array( xM)[:,0], np.array( yV)[:,0] # only 1-dim is allowed for both X and y assert (X.ndim == 1) or (X.shape[2] == 1) and (yV.ndim == 1) or (yV.shape[2] == 1) loo_c = model_selection.LeaveOneOut() loo = loo_c.split( X) yP = y.copy() for train, test in loo: bias = np.mean(y[train] - X[train]) yP[test] = X[test] + bias cv_score_le = np.abs( np.array( y - yP)).tolist() o_d = {'median_abs_err': np.median( cv_score_le), 'mean_abs_err': np.mean( cv_score_le), 'std_abs_err': np.std( cv_score_le), # this can be std(err) 'list': cv_score_le, 'ci': "t.b.d", 'yVp': X.tolist()} return o_d
def basic_quality(self, target, feature_vector): assert (len(target) == len(feature_vector)) if self.quality == 'NWP': sort_data_p = np.array([x for (y,x) in sorted(zip(feature_vector, target), key=lambda x: x[0])]) sort_data_n = np.array([x for (y,x) in sorted(zip(-1.0 * feature_vector, target), key=lambda x: x[0])]) p_nwp = QualityMeasure.calc_nwp(sort_data_p) n_nwp = QualityMeasure.calc_nwp(sort_data_n) return min(n_nwp, p_nwp) if self.quality == 'corrcoef': return 1 - abs(np.corrcoef(target, feature_vector)[0][1]) if self.quality == 'mutual_info': m = MINE() m.compute_score(target, feature_vector) return 1.0 - m.mic() if self.quality == 'chi2': return 1 - chi2(abs(feature_vector.reshape(len(feature_vector), 1)), target)[0][0] if self.quality == 'distcorr': return 1 - distcorr(target, feature_vector) if self.quality == 'distree': data = np.column_stack((feature_vector, self.random_feature)) clf = DecisionTreeClassifier(max_depth=5, random_state=0) clf.fit(data, target) return 1.0 - clf.feature_importances_[0] if self.quality == 'knnscore': errors = [] clf = KNeighborsClassifier() data = np.array([feature_vector]).transpose() loo = LeaveOneOut() for train, test in loo.split(data): clf = KNeighborsClassifier() clf.fit(data[train], target[train]) errors.append(accuracy_score(target[test], clf.predict(data[test]))) return 1.0 - np.mean(errors) return 'WRONG QUALITY NAME'
def test_nested_cv(): # Test if nested cross validation works with different combinations of cv rng = np.random.RandomState(0) X, y = make_classification(n_samples=15, n_classes=2, random_state=0) labels = rng.randint(0, 5, 15) cvs = [LeaveOneLabelOut(), LeaveOneOut(), LabelKFold(), StratifiedKFold(), StratifiedShuffleSplit(n_iter=3, random_state=0)] for inner_cv, outer_cv in combinations_with_replacement(cvs, 2): gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]}, cv=inner_cv) cross_val_score(gs, X=X, y=y, labels=labels, cv=outer_cv, fit_params={'labels': labels})
def test_cross_val_predict(): boston = load_boston() X, y = boston.data, boston.target cv = KFold() est = Ridge() # Naive loop (should be same as cross_val_predict): preds2 = np.zeros_like(y) for train, test in cv.split(X, y): est.fit(X[train], y[train]) preds2[test] = est.predict(X[test]) preds = cross_val_predict(est, X, y, cv=cv) assert_array_almost_equal(preds, preds2) preds = cross_val_predict(est, X, y) assert_equal(len(preds), len(y)) cv = LeaveOneOut() preds = cross_val_predict(est, X, y, cv=cv) assert_equal(len(preds), len(y)) Xsp = X.copy() Xsp *= (Xsp > np.median(Xsp)) Xsp = coo_matrix(Xsp) preds = cross_val_predict(est, Xsp, y) assert_array_almost_equal(len(preds), len(y)) preds = cross_val_predict(KMeans(), X) assert_equal(len(preds), len(y)) class BadCV(): def split(self, X, y=None, labels=None): for i in range(4): yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8]) assert_raises(ValueError, cross_val_predict, est, X, y, cv=BadCV())
def test_cross_validator_with_default_params(): n_samples = 4 n_unique_labels = 4 n_folds = 2 p = 2 n_iter = 10 # (the default value) X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) X_1d = np.array([1, 2, 3, 4]) y = np.array([1, 1, 2, 2]) labels = np.array([1, 2, 3, 4]) loo = LeaveOneOut() lpo = LeavePOut(p) kf = KFold(n_folds) skf = StratifiedKFold(n_folds) lolo = LeaveOneLabelOut() lopo = LeavePLabelOut(p) ss = ShuffleSplit(random_state=0) ps = PredefinedSplit([1, 1, 2, 2]) # n_splits = np of unique folds = 2 loo_repr = "LeaveOneOut()" lpo_repr = "LeavePOut(p=2)" kf_repr = "KFold(n_folds=2, random_state=None, shuffle=False)" skf_repr = "StratifiedKFold(n_folds=2, random_state=None, shuffle=False)" lolo_repr = "LeaveOneLabelOut()" lopo_repr = "LeavePLabelOut(n_labels=2)" ss_repr = ("ShuffleSplit(n_iter=10, random_state=0, test_size=0.1, " "train_size=None)") ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))" n_splits = [n_samples, comb(n_samples, p), n_folds, n_folds, n_unique_labels, comb(n_unique_labels, p), n_iter, 2] for i, (cv, cv_repr) in enumerate(zip( [loo, lpo, kf, skf, lolo, lopo, ss, ps], [loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr, ss_repr, ps_repr])): # Test if get_n_splits works correctly assert_equal(n_splits[i], cv.get_n_splits(X, y, labels)) # Test if the cross-validator works as expected even if # the data is 1d np.testing.assert_equal(list(cv.split(X, y, labels)), list(cv.split(X_1d, y, labels))) # Test that train, test indices returned are integers for train, test in cv.split(X, y, labels): assert_equal(np.asarray(train).dtype.kind, 'i') assert_equal(np.asarray(train).dtype.kind, 'i') # Test if the repr works without any errors assert_equal(cv_repr, repr(cv))