我们从Python开源项目中,提取了以下13个代码示例,用于说明如何使用sklearn.cross_validation.LeaveOneOut()。
def Second_Model_KRR(Scaled_Input_Data, Output_Data): T0 = time.time() n = len(Scaled_Input_Data) Grid_Dict = {"alpha": [1e0, 1e-1, 1e-2],"gamma": np.logspace(-2, 1, 3)} krr_Tuned = GridSearchCV(KernelRidge(kernel='rbf', gamma=0.1), cv=5 ,param_grid=Grid_Dict, scoring="mean_absolute_error") krr_Tuned.fit(Scaled_Input_Data, Output_Data) KRR_MSE = KernelRidge(kernel='rbf', alpha=krr_Tuned.best_params_['alpha'], gamma=krr_Tuned.best_params_['gamma']) KRR_Time = time.time() - T0 print('The computational time of Kernel Ridge Regression for ', n, ' examples is: ', KRR_Time) MSEs_KRR = cross_validation.cross_val_score(KRR_MSE, Scaled_Input_Data, Output_Data, cv=cross_validation.LeaveOneOut(n), scoring="mean_absolute_error") MeanMSE_KRR = np.mean(list(MSEs_KRR)) print('The average MSE of Kernel Ridge Regression for ', n, ' examples is: ', (-1*MeanMSE_KRR)) return(MeanMSE_KRR, krr_Tuned)
def _cross_val_score_loo_r0( lm, X, y): """ mean_square_error metric is used from sklearn.metric. Return -------- The mean squared error values are returned. """ if len( y.shape) == 1: y = np.array( [y]).T kf = cross_validation.LeaveOneOut( y.shape[0]) score_l = list() for tr, te in kf: lm.fit( X[tr,:], y[tr,:]) yp = lm.predict( X[te, :]) score_l.append( metrics.mean_squared_error( y[te,:], yp)) return score_l
def cross_val_score_loo( lm, X, y): """ mean_square_error metric is used from sklearn.metric. Return -------- The mean squared error values are returned. """ # Transformed to array if they are list, np.mat X = np.array( X) y = np.array( y) # Later, assert can be used to define the size of X and y if len( y.shape) == 1: y = np.array( [y]).T kf = cross_validation.LeaveOneOut( y.shape[0]) # flatterned error vectors for each point are stored in this vector. errors_l = list() for tr, te in kf: lm.fit( X[tr,:], y[tr,:]) yp = lm.predict( X[te, :]) errors_l.extend( (y[te,:] - yp).flatten().tolist()) return errors_l
def cv_pilot_reg_only(self, alpha = 0): model = self.model yT_a = self.rx_p["yT_a"] x_a = self.rx_p["x_a"] # kf = KFold() # loo = cross_validation.LeaveOneOut( x_a.shape[0]) if alpha == 0: lm = linear_model.LinearRegression() else: lm = getattr( linear_model, model)(alpha) scores = codes.cross_val_score_loo( lm, yT_a, x_a) # Output is stored with enviromental variables. pdi = pd.DataFrame() pdi["model"] = [model] pdi["alpha"] = [alpha] pdi["metric"] = ["mean_squared_error"] pdi["E[scores]"] = [np.mean(np.power(scores,2))] # MSE pdi["std[scores]"] = ["t.b.d."] pdi["scores"] = [scores] return pdi
def test_cross_val_generator_with_indices(): X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) y = np.array([1, 1, 2, 2]) labels = np.array([1, 2, 3, 4]) # explicitly passing indices value is deprecated loo = cval.LeaveOneOut(4) lpo = cval.LeavePOut(4, 2) kf = cval.KFold(4, 2) skf = cval.StratifiedKFold(y, 2) lolo = cval.LeaveOneLabelOut(labels) lopo = cval.LeavePLabelOut(labels, 2) ps = cval.PredefinedSplit([1, 1, 2, 2]) ss = cval.ShuffleSplit(2) for cv in [loo, lpo, kf, skf, lolo, lopo, ss, ps]: for train, test in cv: assert_not_equal(np.asarray(train).dtype.kind, 'b') assert_not_equal(np.asarray(train).dtype.kind, 'b') X[train], X[test] y[train], y[test]
def test_cross_val_generator_with_default_indices(): X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) y = np.array([1, 1, 2, 2]) labels = np.array([1, 2, 3, 4]) loo = cval.LeaveOneOut(4) lpo = cval.LeavePOut(4, 2) kf = cval.KFold(4, 2) skf = cval.StratifiedKFold(y, 2) lolo = cval.LeaveOneLabelOut(labels) lopo = cval.LeavePLabelOut(labels, 2) ss = cval.ShuffleSplit(2) ps = cval.PredefinedSplit([1, 1, 2, 2]) for cv in [loo, lpo, kf, skf, lolo, lopo, ss, ps]: for train, test in cv: assert_not_equal(np.asarray(train).dtype.kind, 'b') assert_not_equal(np.asarray(train).dtype.kind, 'b') X[train], X[test] y[train], y[test]
def accuracy(features, labels): from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn import cross_validation # We use logistic regression because it is very fast. # Feel free to experiment with other classifiers clf = Pipeline([('preproc', StandardScaler()), ('classifier', LogisticRegression())]) cv = cross_validation.LeaveOneOut(len(features)) scores = cross_validation.cross_val_score( clf, features, labels, cv=cv) return scores.mean()
def First_Model_SVR(Scaled_Input_Data, Output_Data): T0 = time.time() n = len(Scaled_Input_Data) Grid_Dict = {"C": [1e-2, 1e-1,1e0, 1e1, 1e2],"gamma": np.logspace(-4, 2, 6)} svr_Tuned = GridSearchCV(SVR(kernel='rbf', gamma=0.1, tol = 0.005), cv=5,param_grid=Grid_Dict, scoring="mean_absolute_error") svr_Tuned.fit(Scaled_Input_Data, Output_Data) SVR_MSE = SVR(kernel='rbf', C=svr_Tuned.best_params_['C'], gamma=svr_Tuned.best_params_['gamma'], tol = 0.01) SVR_Time = time.time() - T0 print('The computational time of Radial based Support Vector Regression for ', n, ' examples is: ', SVR_Time) MSEs_SVR = cross_validation.cross_val_score(SVR_MSE, Scaled_Input_Data, Output_Data, cv=cross_validation.LeaveOneOut(n), scoring="mean_absolute_error") MeanMSE_SVR = np.mean(list(MSEs_SVR)) print('The average MSE of Radial based Support Vector Regression for ', n, ' examples is: ', (-1*MeanMSE_SVR)) return(MeanMSE_SVR, svr_Tuned)
def RF_Model(Scaled_Input_Data, Output_Data): T0 = time.time() n = len(Scaled_Input_Data) RFModel = RandomForestRegressor() RFModel.fit(Scaled_Input_Data, Output_Data) RF_Time = time.time() - T0 print('The computational time of Random Forest Regression for ', n, ' examples is: ', RF_Time) MSEs_RF = cross_validation.cross_val_score(RFModel, Scaled_Input_Data, Output_Data, cv=cross_validation.LeaveOneOut(n), scoring="mean_absolute_error") MeanMSE_RF = np.mean(list(MSEs_RF)) print('The average MSE of Random Forest Regression for ', n, ' examples is: ', (-1*MeanMSE_RF)) return(MeanMSE_RF, RFModel)
def _cv_pilot_reg_only_r0(self, alpha = 0): model = self.model yT_a = self.rx_p["yT_a"] x_a = self.rx_p["x_a"] # kf = KFold() # loo = cross_validation.LeaveOneOut( x_a.shape[0]) if alpha == 0: lm = linear_model.LinearRegression() else: lm = getattr( linear_model, model)(alpha) scores = codes.cross_val_score_loo( lm, yT_a, x_a) return scores
def _cv_pilot_reg_r0(self, alpha = 0): """ Cross-validatin scores are evaluated using LOO. SNRpilot is equal to SNR, which is SNRdata. """ Npilot = self.Npilot SNRpilot = self.SNR model = self.model BPSK, s_a, x_flat_a, x_a = gen_BPSK( Npilot, self.Nt) # H_a = gen_H( self.Nr, self.Nt) # H_a = self.H_a y_a = gen_Rx( self.Nr, Npilot, SNRpilot, self.H_a, x_a) yT_a = y_a.T # kf = KFold() # loo = cross_validation.LeaveOneOut( x_a.shape[0]) if alpha == 0: lm = linear_model.LinearRegression() else: lm = getattr( linear_model, model)(alpha) scores = codes.cross_val_score_loo( lm, yT_a, x_a) return scores
def test_cross_val_predict(): boston = load_boston() X, y = boston.data, boston.target cv = cval.KFold(len(boston.target)) est = Ridge() # Naive loop (should be same as cross_val_predict): preds2 = np.zeros_like(y) for train, test in cv: est.fit(X[train], y[train]) preds2[test] = est.predict(X[test]) preds = cval.cross_val_predict(est, X, y, cv=cv) assert_array_almost_equal(preds, preds2) preds = cval.cross_val_predict(est, X, y) assert_equal(len(preds), len(y)) cv = cval.LeaveOneOut(len(y)) preds = cval.cross_val_predict(est, X, y, cv=cv) assert_equal(len(preds), len(y)) Xsp = X.copy() Xsp *= (Xsp > np.median(Xsp)) Xsp = coo_matrix(Xsp) preds = cval.cross_val_predict(est, Xsp, y) assert_array_almost_equal(len(preds), len(y)) preds = cval.cross_val_predict(KMeans(), X) assert_equal(len(preds), len(y)) def bad_cv(): for i in range(4): yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8]) assert_raises(ValueError, cval.cross_val_predict, est, X, y, cv=bad_cv())
def loo_proba(x, y, clf_used='rf', use_pca=False, params=None): """Perform leave-one-out Parameters ---------- x : np.array features y : np.array labels clf_used : str classifier use_pca : bool perform principal component analysis on features x in advance params : dict parameter for classifier Returns ------- np.array, np.array class probability, hard classification """ # print "Performing LOO with %s and %d features. Using PCA: %s" % \ # (clf_used, x.shape[1], str(use_pca)) if use_pca: old_dim = x.shape[1] pca = PCA(n_components=0.999) x = pca.fit_transform(x) # print pca.explained_variance_ratio_ # print "Reduced feature space dimension %d, instead of %d" % (x.shape[1], # old_dim) nans_in_X = np.sum(np.isnan(x)) if nans_in_X > 0: # print np.where(np.isnan(x)) # print "Found %d nans in features, converting to number." % nans_in_X x = np.nan_to_num(x) loo = cross_validation.LeaveOneOut(len(x)) shape = (len(x), len(list(set(y)))) prob = np.zeros(shape, dtype=np.float) pred = np.zeros((len(x), 1), dtype=np.int) cnt = 0 # print "rf params:", rf_params for train_ixs, test_ixs in loo: x_train = x[train_ixs] x_test = x[test_ixs] y_train = y[train_ixs] clf = init_clf(clf_used, params) clf.fit(x_train, y_train) prob[cnt] = clf.predict_proba(x_test) pred[cnt] = clf.predict(x_test) np.set_printoptions(formatter={'float': '{: 0.3f}'.format}) # if pred[cnt] == y[test_ixs]: # print test_ixs, "\t", prob[cnt], pred[cnt], y[test_ixs] # else: # print test_ixs, "\t", prob[cnt], pred[cnt], y[test_ixs], "\t WRONG" cnt += 1 return prob, pred