我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用sklearn.linear_model.LassoCV()。
def __remodel__(self, model_type, regr, __X_train, __Y_train): """ Function to retrain certain models based on optimal alphas and/or ratios """ if model_type == "ridge": alpha = regr.alpha_ regr = linear_model.RidgeCV(alphas = self.__realpha__(alpha), cv = 10) elif model_type == "lasso": alpha = regr.alpha_ regr = linear_model.LassoCV(alphas = self.__realpha__(alpha), max_iter = 5000, cv = 10) elif model_type == "elasticnet": alpha = regr.alpha_ ratio = regr.l1_ratio_ regr = linear_model.ElasticNetCV(l1_ratio = self.__reratio__(ratio), alphas = self.__elasticnet_init["alpha"], max_iter = 1000, cv = 3) regr.fit(__X_train, __Y_train) return regr
def run_lasso(X, y, max_iter=3000, cv=5, n_threads=1): """ Implement LassoCV in sklearn Args: X (np.array): scaled X. y (pd.df): four columns response table. max_iter (int): max iteration. cv (int): CV fold. n_threads (int): Number of threads to use for parallel computing. Returns: float: trained alpha value. """ logger.info('Implementing LassoCV with {} iter. and {}-fold CV'.format(max_iter, cv)) # generate logit response y_logit = logit((y.nMut + 0.5) / (y.length * y.N)) # sub-sampling X and y (300,000) use_ix = np.random.choice(y_logit.shape[0], 300000, replace=False) Xsub = X[use_ix, :] ysub = y_logit[use_ix] reg = LassoCV(max_iter=max_iter, cv=cv, copy_X=False, n_jobs=n_threads) lassocv = reg.fit(Xsub, ysub) logger.info('LassoCV alpha = {}'.format(lassocv.alpha_)) return lassocv.alpha_
def predicted_vs_actual_sale_price(self, x_train, y_train, title_name): # Split the training data into an extra set of test x_train_split, x_test_split, y_train_split, y_test_split = train_test_split(x_train, y_train) print(np.shape(x_train_split), np.shape(x_test_split), np.shape(y_train_split), np.shape(y_test_split)) lasso = LassoCV(alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1], max_iter=50000, cv=10) # lasso = RidgeCV(alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, # 0.3, 0.6, 1], cv=10) lasso.fit(x_train_split, y_train_split) y_predicted = lasso.predict(X=x_test_split) plt.figure(figsize=(10, 5)) plt.scatter(y_test_split, y_predicted, s=20) rmse_pred_vs_actual = self.rmse(y_predicted, y_test_split) plt.title(''.join([title_name, ', Predicted vs. Actual.', ' rmse = ', str(rmse_pred_vs_actual)])) plt.xlabel('Actual Sale Price') plt.ylabel('Predicted Sale Price') plt.plot([min(y_test_split), max(y_test_split)], [min(y_test_split), max(y_test_split)]) plt.tight_layout()
def get_logistic_regression_coefs_l1(self, category, clf=LassoCV(alphas=[0.1, 0.001], max_iter=10000, n_jobs=-1)): ''' Computes l1-penalized logistic regression score. Parameters ---------- category : str category name to score Returns ------- (coefficient array, accuracy, majority class baseline accuracy) ''' from sklearn.cross_validation import cross_val_predict y = self._get_mask_from_category(category) y_continuous = self._get_continuous_version_boolean_y(y) # X = TfidfTransformer().fit_transform(self._X) X = self._X clf.fit(X, y_continuous) y_hat = (cross_val_predict(clf, X, y_continuous) > 0) acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat) clf.fit(X, y_continuous) return clf.coef_, acc, baseline
def test_get_errors_param(self): """ Test known models we can get the cv errors for alpha selection """ # Test original CV models for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV): try: model = AlphaSelection(model()) X, y = make_regression() model.fit(X, y) errors = model._find_errors_param() self.assertTrue(len(errors) > 0) except YellowbrickValueError: self.fail("could not find errors on {}".format(model.name))
def lasso_train(X, y): model_lasso = LassoCV(alphas = [1, 0.1, 0.001, 0.0005]).fit(X, y) print ('lasso mean cv is ',rmse_cv(model_lasso,X,y).mean()) return model_lasso #%%
def train_lasso_model(_train_x, train_y, _predict_x): print_title("Lasso Regressor") train_x, predict_x = \ standarize_feature(_train_x, _predict_x) reg = linear_model.LassoCV( precompute=True, cv=5, verbose=1, n_jobs=4) reg.fit(train_x, train_y) print("alphas: %s" % reg.alphas_) print("mse path: %s" % np.mean(reg.mse_path_, axis=1)) itemindex = np.where(reg.alphas_ == reg.alpha_) print("itemindex: %s" % itemindex) _mse = np.mean(reg.mse_path_[itemindex[0], :]) print("Best alpha using bulit-in LassoCV: %f(mse: %f)" % (reg.alpha_, _mse)) alpha = reg.alpha_ reg = linear_model.Lasso(alpha=alpha) reg.fit(train_x, train_y) n_nonzeros = (reg.coef_ != 0).sum() print("Non-zeros coef: %d" % n_nonzeros) predict_y = reg.predict(predict_x) train_y_pred = reg.predict(train_x) return {"y": predict_y, "train_y": train_y_pred, "coef": reg.coef_}
def test_real_model(self): """ Test that model name works for sklearn estimators """ model1 = LassoCV() model2 = LSHForest() model3 = KMeans() model4 = RandomForestClassifier() self.assertEqual(get_model_name(model1), 'LassoCV') self.assertEqual(get_model_name(model2), 'LSHForest') self.assertEqual(get_model_name(model3), 'KMeans') self.assertEqual(get_model_name(model4), 'RandomForestClassifier')
def test_regressor_cv(self): """ Ensure only "CV" regressors are allowed """ for model in (SVR, Ridge, Lasso, LassoLars, ElasticNet): with self.assertRaises(YellowbrickTypeError): alphas = AlphaSelection(model()) for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV): try: alphas = AlphaSelection(model()) except YellowbrickTypeError: self.fail("could not instantiate RegressorCV on alpha selection")
def test_get_alphas_param(self): """ Assert that we can get the alphas from ridge, lasso, and elasticnet """ alphas = np.logspace(-10, -2, 100) # Test original CV models for model in (RidgeCV, LassoCV, ElasticNetCV): try: model = AlphaSelection(model(alphas=alphas)) malphas = model._find_alphas_param() self.assertTrue(np.array_equal(alphas, malphas)) except YellowbrickValueError: self.fail("could not find alphas on {}".format(model.name))
def Lasso_regression(): lasso = LassoCV(alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1], max_iter=50000, cv=10) lasso.fit(train_split, y) alpha = lasso.alpha_ print("Best alpha :", alpha) print("Try again for more precision with alphas centered around " + str(alpha)) lasso = LassoCV(alphas=[alpha * .6, alpha * .65, alpha * .7, alpha * .75, alpha * .8, alpha * .85, alpha * .9, alpha * .95, alpha, alpha * 1.05, alpha * 1.1, alpha * 1.15, alpha * 1.25, alpha * 1.3, alpha * 1.35, alpha * 1.4], max_iter=50000, cv=10) lasso.fit(train_split, y) alpha = lasso.alpha_ print("Best alpha :", alpha) print("Lasso RMSE on Training set :", rmse_cv(lasso, train_split, y).mean()) y_train_las = lasso.predict(train_split) # Plot residuals plt.scatter(y_train_las, y_train_las - y, c="blue", marker="s", label="Training data") plt.title("Linear regression with Lasso regularization") plt.xlabel("Predicted values") plt.ylabel("Residuals") plt.legend(loc="upper left") plt.hlines(y=0, xmin=10.5, xmax=13.5, color="red") plt.show() # Plot predictions plt.scatter(y_train_las, y, c="blue", marker="s", label="Training data") plt.title("Linear regression with Lasso regularization") plt.xlabel("Predicted values") plt.ylabel("Real values") plt.legend(loc="upper left") plt.plot([10.5, 13.5], [10.5, 13.5], c="red") plt.show() # # Plot important coefficients coefs = pd.DataFrame(lasso.coef_, index=X_train.columns,columns=['value']) # print("Lasso picked " + str(sum(coefs != 0)) + " features and eliminated the other " + \ # str(sum(coefs == 0)) + " features") # imp_coefs = pd.concat([coefs.sort_values().head(10), # coefs.sort_values().tail(10)]) # imp_coefs.plot(kind="barh") # plt.title("Coefficients in the Lasso Model") # plt.show() return coefs,lasso