我们从Python开源项目中,提取了以下12个代码示例,用于说明如何使用sklearn.linear_model.RidgeCV()。
def __remodel__(self, model_type, regr, __X_train, __Y_train): """ Function to retrain certain models based on optimal alphas and/or ratios """ if model_type == "ridge": alpha = regr.alpha_ regr = linear_model.RidgeCV(alphas = self.__realpha__(alpha), cv = 10) elif model_type == "lasso": alpha = regr.alpha_ regr = linear_model.LassoCV(alphas = self.__realpha__(alpha), max_iter = 5000, cv = 10) elif model_type == "elasticnet": alpha = regr.alpha_ ratio = regr.l1_ratio_ regr = linear_model.ElasticNetCV(l1_ratio = self.__reratio__(ratio), alphas = self.__elasticnet_init["alpha"], max_iter = 1000, cv = 3) regr.fit(__X_train, __Y_train) return regr
def predicted_vs_actual_sale_price(self, x_train, y_train, title_name): # Split the training data into an extra set of test x_train_split, x_test_split, y_train_split, y_test_split = train_test_split(x_train, y_train) print(np.shape(x_train_split), np.shape(x_test_split), np.shape(y_train_split), np.shape(y_test_split)) lasso = LassoCV(alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1], max_iter=50000, cv=10) # lasso = RidgeCV(alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, # 0.3, 0.6, 1], cv=10) lasso.fit(x_train_split, y_train_split) y_predicted = lasso.predict(X=x_test_split) plt.figure(figsize=(10, 5)) plt.scatter(y_test_split, y_predicted, s=20) rmse_pred_vs_actual = self.rmse(y_predicted, y_test_split) plt.title(''.join([title_name, ', Predicted vs. Actual.', ' rmse = ', str(rmse_pred_vs_actual)])) plt.xlabel('Actual Sale Price') plt.ylabel('Predicted Sale Price') plt.plot([min(y_test_split), max(y_test_split)], [min(y_test_split), max(y_test_split)]) plt.tight_layout()
def train_ridge_linear_model(_train_x, train_y, _predict_x, sample_weight=None): print_title("Ridge Regressor") train_x, predict_x = \ standarize_feature(_train_x, _predict_x) # using the default CV alphas = [0.1, 1, 10, 100, 1e3, 1e4, 2e4, 5e4, 8e4, 1e5, 1e6, 1e7, 1e8] reg = linear_model.RidgeCV(alphas=alphas, store_cv_values=True) #reg.fit(train_x, train_y, sample_weight=sample_weight) reg.fit(train_x, train_y) cv_mse = np.mean(reg.cv_values_, axis=0) print("alphas: %s" % alphas) print("CV MSE: %s" % cv_mse) print("Best alpha using built-in RidgeCV: %f" % reg.alpha_) # generate the prediction using the best model alpha = reg.alpha_ reg = linear_model.Ridge(alpha=alpha) #reg.fit(train_x, train_y, sample_weight=sample_weight) reg.fit(train_x, train_y) predict_y = reg.predict(predict_x) train_y_pred = reg.predict(train_x) return {"y": predict_y, "train_y": train_y_pred, "coef": reg.coef_}
def test_get_errors_param(self): """ Test known models we can get the cv errors for alpha selection """ # Test original CV models for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV): try: model = AlphaSelection(model()) X, y = make_regression() model.fit(X, y) errors = model._find_errors_param() self.assertTrue(len(errors) > 0) except YellowbrickValueError: self.fail("could not find errors on {}".format(model.name))
def test_clusterer_enforcement(self): """ Assert that only clustering estimators can be passed to cluster viz """ nomodels = [ SVC, SVR, Ridge, RidgeCV, LinearRegression, RandomForestClassifier ] for nomodel in nomodels: with self.assertRaises(YellowbrickTypeError): visualizer = ClusteringScoreVisualizer(nomodel()) models = [ KMeans, MiniBatchKMeans, AffinityPropagation, MeanShift, DBSCAN, Birch ] for model in models: try: visualizer = ClusteringScoreVisualizer(model()) except YellowbrickTypeError: self.fail("could not pass clustering estimator to visualizer")
def build_signature_model(X,gidx,n_alphas=5): model = RidgeCV(alphas=(.1,1,10,100,1000,10000,100000),cv=5) model.fit(X[gidx].T,X.T) return model
def test_regressor_cv(self): """ Ensure only "CV" regressors are allowed """ for model in (SVR, Ridge, Lasso, LassoLars, ElasticNet): with self.assertRaises(YellowbrickTypeError): alphas = AlphaSelection(model()) for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV): try: alphas = AlphaSelection(model()) except YellowbrickTypeError: self.fail("could not instantiate RegressorCV on alpha selection")
def test_store_cv_values(self): """ Assert that store_cv_values is true on RidgeCV """ model = AlphaSelection(RidgeCV()) self.assertTrue(model.estimator.store_cv_values) model = AlphaSelection(RidgeCV(store_cv_values=True)) self.assertTrue(model.estimator.store_cv_values) model = AlphaSelection(RidgeCV(store_cv_values=False)) self.assertTrue(model.estimator.store_cv_values)
def test_get_alphas_param(self): """ Assert that we can get the alphas from ridge, lasso, and elasticnet """ alphas = np.logspace(-10, -2, 100) # Test original CV models for model in (RidgeCV, LassoCV, ElasticNetCV): try: model = AlphaSelection(model(alphas=alphas)) malphas = model._find_alphas_param() self.assertTrue(np.array_equal(alphas, malphas)) except YellowbrickValueError: self.fail("could not find alphas on {}".format(model.name))
def fit_thresholds(self, data, alpha, batch_size=128, verbose=0, validation_data=None, cv=None, top_k=None): inputs = np.hstack([data[k] for k in self._graph_inputs]) probs = self.predict(data, batch_size=batch_size) targets = {k: data[k] for k in self._graph_outputs} if isinstance(alpha, list): if validation_data is None and cv is None: warnings.warn("Neither validation data, nor the number of " "cross-validation folds is provided. " "The alpha parameter for threshold model will " "be selected based on the default " "cross-validation procedure in RidgeCV.") elif validation_data is not None: val_inputs = np.hstack([validation_data[k] for k in self._graph_inputs]) val_probs = self.predict(validation_data) val_targets = {k: validation_data[k] for k in self._graph_outputs} if verbose: sys.stdout.write("Constructing thresholds.") sys.stdout.flush() self.t_models = {} for k in self._graph_outputs: if verbose: sys.stdout.write(".") sys.stdout.flush() T = self._construct_thresholds(probs[k], targets[k]) if isinstance(alpha, list): if validation_data is not None: val_T = self._construct_thresholds(val_probs[k], val_targets[k], top_k=top_k) score_best, alpha_best = -np.Inf, None for a in alpha: model = lm.Ridge(alpha=a).fit(inputs, T) score = model.score(val_inputs, val_T) if score > score_best: score_best, alpha_best = score, a alpha = alpha_best else: model = lm.RidgeCV(alphas=alpha, cv=cv).fit(inputs, T) alpha = model.alpha_ self.t_models[k] = lm.Ridge(alpha=alpha) self.t_models[k].fit(inputs, T) if verbose: sys.stdout.write("Done.\n") sys.stdout.flush()
def ridge_regression(): ridge = RidgeCV(alphas=[0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1, 3, 6, 10, 30, 60]) ridge.fit(X_train, y_train) alpha = ridge.alpha_ print("Best alpha :", alpha) print("Try again for more precision with alphas centered around " + str(alpha)) ridge = RidgeCV(alphas=[alpha * .6, alpha * .65, alpha * .7, alpha * .75, alpha * .8, alpha * .85, alpha * .9, alpha * .95, alpha, alpha * 1.05, alpha * 1.1, alpha * 1.15, alpha * 1.25, alpha * 1.3, alpha * 1.35, alpha * 1.4], cv=10) ridge.fit(X_train, y_train) alpha = ridge.alpha_ print("Best alpha :", alpha) print("Ridge RMSE on Training set :", rmse_cv(ridge, X_train, y_train).mean()) print("Ridge RMSE on Test set :", rmse_cv(ridge, X_test, y_test).mean()) y_train_rdg = ridge.predict(X_train) y_test_rdg = ridge.predict(X_test) # Plot residuals plt.scatter(y_train_rdg, y_train_rdg - y_train, c="blue", marker="s", label="Training data") plt.scatter(y_test_rdg, y_test_rdg - y_test, c="lightgreen", marker="s", label="Validation data") plt.title("Linear regression with Ridge regularization") plt.xlabel("Predicted values") plt.ylabel("Residuals") plt.legend(loc="upper left") plt.hlines(y=0, xmin=10.5, xmax=13.5, color="red") plt.show() # Plot predictions plt.scatter(y_train_rdg, y_train, c="blue", marker="s", label="Training data") plt.scatter(y_test_rdg, y_test, c="lightgreen", marker="s", label="Validation data") plt.title("Linear regression with Ridge regularization") plt.xlabel("Predicted values") plt.ylabel("Real values") plt.legend(loc="upper left") plt.plot([10.5, 13.5], [10.5, 13.5], c="red") plt.show() # Plot important coefficients coefs = pd.Series(ridge.coef_, index=X_train.columns) print("Ridge picked " + str(sum(coefs != 0)) + " features and eliminated the other " + \ str(sum(coefs == 0)) + " features") imp_coefs = pd.concat([coefs.sort_values().head(10), coefs.sort_values().tail(10)]) imp_coefs.plot(kind="barh") plt.title("Coefficients in the Ridge Model") plt.show() return ridge
def online(X_org, y_org, test_x, test_uid): n_folds = 5 verbose = True shuffle = False X = X_org y = y_org X_submission = test_x if shuffle: idx = np.random.permutation(y.size) X = X[idx] y = y[idx] skf = list(StratifiedKFold(y, n_folds)) clfs = [ RandomForestClassifier().set_params(**INITIAL_PARAMS.get("RFC:one", {})), ExtraTreesClassifier().set_params(**INITIAL_PARAMS.get("ETC:one", {})), GradientBoostingClassifier().set_params(**INITIAL_PARAMS.get("GBC:one", {})), LogisticRegression().set_params(**INITIAL_PARAMS.get("LR:one", {})), xgb.XGBClassifier().set_params(**INITIAL_PARAMS.get("XGBC:two", {})), xgb.XGBClassifier().set_params(**INITIAL_PARAMS.get("XGBC:one", {})), ] print "Creating train and test sets for blending." dataset_blend_train = np.zeros((X.shape[0], len(clfs))) dataset_blend_test = np.zeros((X_submission.shape[0], len(clfs))) for j, clf in enumerate(clfs): print j, clf dataset_blend_test_j = np.zeros((X_submission.shape[0], len(skf))) for i, (train, test) in enumerate(skf): print "Fold", i X_train = X[train] y_train = y[train] X_test = X[test] y_test = y[test] clf.fit(X_train, y_train) y_submission = clf.predict_proba(X_test)[:,1] dataset_blend_train[test, j] = y_submission dataset_blend_test_j[:, i] = clf.predict_proba(X_submission)[:,1] dataset_blend_test[:,j] = dataset_blend_test_j.mean(1) print "Blending." # clf = LogisticRegression(C=2, penalty='l2', class_weight='balanced', n_jobs=-1) clf = linear_model.RidgeCV( alphas=np.linspace(0, 200), cv=LM_CV_NUM) # clf = GradientBoostingClassifier(learning_rate=0.02, subsample=0.5, max_depth=6, n_estimators=100) clf.fit(dataset_blend_train, y) # y_submission = clf.predict_proba(dataset_blend_test)[:,1] print clf.coef_, clf.intercept_ y_submission = clf.predict(dataset_blend_test) # for RidgeCV print "Linear stretch of predictions to [0,1]" y_submission = (y_submission - y_submission.min()) / (y_submission.max() - y_submission.min()) print "blend result" save_submission(os.path.join(consts.SUBMISSION_PATH, MODEL_NAME + '_' + strftime("%m_%d_%H_%M_%S", localtime()) + '.csv'), test_uid, y_submission)