我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.linear_model.Ridge()。
def refit_model(self): """Learns a new surrogate model using the data observed so far. """ # only fit the model if there is data for it. if len(self.known_models) > 0: self._build_feature_maps(self.known_models, self.ngram_maxlen, self.thres) X = sp.vstack([ self._compute_features(mdl) for mdl in self.known_models], "csr") y = np.array(self.known_scores, dtype='float64') #A = np.dot(X.T, X) + lamb * np.eye(X.shape[1]) #b = np.dot(X.T, y) self.surr_model = lm.Ridge(self.lamb_ridge) self.surr_model.fit(X, y) # NOTE: if the search space has holes, it break. needs try/except module.
def model_cross_valid(X,Y): seed = 7 kfold = model_selection.KFold(n_splits=10, random_state=seed) def bulid_model(model_name): model = model_name() return model scoring = 'neg_mean_squared_error' # + random fest boost lstm gbdt for model_name in [LinearRegression,ElasticNet]: #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]: model = bulid_model(model_name) results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring) print(model_name,results.mean())
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'alpha': np.logspace(-5,5) } ] reg = GridSearchCV(linear_model.Ridge(alpha = 0.5), tuned_parameters, cv=5, scoring='mean_squared_error') reg.fit(self.X_train, self.y_train) print "Best parameters set found on development set:\n" print reg.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in reg.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print reg.scorer_ print "MSE for test data set:" y_true, y_pred = self.y_test, reg.predict(self.X_test) print mean_squared_error(y_pred, y_true)
def solveSingle(self,inputDF,outputDict,rho,beta_target): I,J,V,Y=[],[],[],[] fd = {} # mapping feature names to consecutive integers, starting with 0 for i,(id, x) in enumerate(inputDF.items()): l = outputDict.get(id) for k,v in x.items(): I.append(i) J.append(k) V.append(v) upd(fd,k) Y.append(l) J = map(lambda k: fd[k], J) X = sparse.coo_matrix((V,(I,J)),shape=(I[-1]+1,len(fd))) fd_reverse = [k for k,v in sorted(fd.items(), key = lambda t: t[1])] # y_new = y - X . beta_target # converting a proximal least square problem to a ridge regression ZmUl = np.array([beta_target.get(k,0) for k in fd_reverse]) y_new = np.array(Y) - X * ZmUl ridge = Ridge(alpha = rho , fit_intercept=False) ret = ridge.fit(X,y_new) #ret = self.lr.fit(X,y_new) # ordered list of feature names according to their integer ids in fd #raise ValueError('fd_reverse = %s \n X = %s \n J = %s \n I = %s \n V = %s \n Y = %s \n y_new = %s \n ret.coef_ = %s \n ZmUl = %s \n'\ # %(str(fd_reverse), str(X), str(J), str(I), str(V), str(Y), str(y_new), str(ret.coef_), str(ZmUl))) return dict(zip(fd_reverse, (ret.coef_ + ZmUl).tolist()))
def test_classes__property(): # Test that classes_ property matches best_estimator_.classes_ X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) Cs = [.1, 1, 10] grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs}) grid_search.fit(X, y) assert_array_equal(grid_search.best_estimator_.classes_, grid_search.classes_) # Test that regressors do not have a classes_ attribute grid_search = dcv.GridSearchCV(Ridge(), {'alpha': [1.0, 2.0]}) grid_search.fit(X, y) assert not hasattr(grid_search, 'classes_') # Test that the grid searcher has no classes_ attribute before it's fit grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs}) assert not hasattr(grid_search, 'classes_') # Test that the grid searcher has no classes_ attribute without a refit grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs}, refit=False) grid_search.fit(X, y) assert not hasattr(grid_search, 'classes_')
def fit_regression(X, y, regression_class=LinearRegression, regularization_const=.001): ''' Given a dataset and some solutions (X, y) a regression class (from scikit learn) and an Lambda which is required if the regression class is Lasso or Ridge X (pandas DataFrame): The data. y (pandas DataFrame or Series): The answers. regression_class (class): One of sklearn.linear_model.[LinearRegression, Ridge, Lasso] regularization_const: the regularization_const value (regularization parameter) for Ridge or Lasso. Called alpha by scikit learn for interface reasons. Return: tuple, (the_fitted_regressor, mean(cross_val_score)). ''' if regression_class is LinearRegression: predictor = regression_class() else: predictor = regression_class(alpha=regularization_const, normalize=True) predictor.fit(X, y) cross_scores = cross_val_score(predictor, X, y=y, scoring='neg_mean_squared_error') cross_scores_corrected = np.sqrt(-1 * cross_scores) # Scikit learn returns negative vals && we need root return (predictor, np.mean(cross_scores_corrected))
def train_ridge_linear_model(_train_x, train_y, _predict_x, sample_weight=None): print_title("Ridge Regressor") train_x, predict_x = \ standarize_feature(_train_x, _predict_x) # using the default CV alphas = [0.1, 1, 10, 100, 1e3, 1e4, 2e4, 5e4, 8e4, 1e5, 1e6, 1e7, 1e8] reg = linear_model.RidgeCV(alphas=alphas, store_cv_values=True) #reg.fit(train_x, train_y, sample_weight=sample_weight) reg.fit(train_x, train_y) cv_mse = np.mean(reg.cv_values_, axis=0) print("alphas: %s" % alphas) print("CV MSE: %s" % cv_mse) print("Best alpha using built-in RidgeCV: %f" % reg.alpha_) # generate the prediction using the best model alpha = reg.alpha_ reg = linear_model.Ridge(alpha=alpha) #reg.fit(train_x, train_y, sample_weight=sample_weight) reg.fit(train_x, train_y) predict_y = reg.predict(predict_x) train_y_pred = reg.predict(train_x) return {"y": predict_y, "train_y": train_y_pred, "coef": reg.coef_}
def test_clusterer_enforcement(self): """ Assert that only clustering estimators can be passed to cluster viz """ nomodels = [ SVC, SVR, Ridge, RidgeCV, LinearRegression, RandomForestClassifier ] for nomodel in nomodels: with self.assertRaises(YellowbrickTypeError): visualizer = ClusteringScoreVisualizer(nomodel()) models = [ KMeans, MiniBatchKMeans, AffinityPropagation, MeanShift, DBSCAN, Birch ] for model in models: try: visualizer = ClusteringScoreVisualizer(model()) except YellowbrickTypeError: self.fail("could not pass clustering estimator to visualizer")
def residual_smooth(trajectory, reg_alpha, back_horizon): # Alternative method to calculate the smooth coefficients: try to fit y-values directly to explain smoothness clf = linear_model.Ridge(alpha = reg_alpha) residual_ar_seg = np.empty(shape = [trajectory.shape[0],back_horizon]) #initialize an empty array to hold the autoregressed position values residual = trajectory.copy() #initialize position vector to simply be the output vector for item in inPlay: for i in range(back_horizon): temp = np.roll(residual[item[0]:(item[1]+1)],i+1) for j in range(i+1): temp[j] = 0 residual_ar_seg[item[0]:(item[1]+1),i] = temp.copy() rows_to_delete = [] for item in inPlay: for i in range(2*back_horizon): rows_to_delete.append(item[0]+i) residual = np.delete(residual, rows_to_delete,0) residual_ar_seg = np.delete(residual_ar_seg, rows_to_delete,0) # Use least square regression to find the best fit set of coefficients for the velocity vectors #position_smooth_interpolate = np.linalg.lstsq(position_ar_seg,position)[0] #Note that in practice, the outcome of position_smooth_coeff and position_smooth_interpolate seem to be quite similar clf.fit(residual_ar_seg,residual) # addition to switch from velocity to position residual_smooth_interpolate = clf.coef_ # addition to switch from velocity to position return residual_smooth_interpolate
def __init__(self, probabilistic_estimator, stepsize=0.01, verbose=0, fit_intercept=False, sparse_output=True, **ridge_params ): """ Arguments: probabilistic_estimator -- Estimator capable of predict_proba Keyword Arguments: average -- averaging method for f1 score stepsize -- stepsize for the exhaustive search of optimal threshold fit_intercept -- fit intercept in Ridge regression sparse_output -- Predict returns csr in favor of ndarray **ridge_params -- Passed down to Ridge regression """ self.model = probabilistic_estimator self.verbose = verbose self.ridge = Ridge(fit_intercept=fit_intercept, **ridge_params) self.stepsize = stepsize self.sparse_output = sparse_output
def cv_SVR( xM, yV, svr_params, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True): """ method can be 'Ridge', 'Lasso' cross validation is performed so as to generate prediction output for all input molecules """ print(xM.shape, yV.shape) clf = svm.SVR( **svr_params) kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle) kf_n = kf5_ext_c.split( xM) yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs) if graph: print('The prediction output using cross-validation is given by:') jutil.cv_show( yV, yV_pred, grid_std = grid_std) return yV_pred
def _cv_r0( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True): """ method can be 'Ridge', 'Lasso' cross validation is performed so as to generate prediction output for all input molecules """ print(xM.shape, yV.shape) clf = getattr( linear_model, method)( alpha = alpha) kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True) kf_n = kf5_ext_c.split( xM) yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs) if graph: print('The prediction output using cross-validation is given by:') jutil.cv_show( yV, yV_pred, grid_std = grid_std) return yV_pred
def cvLOO( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True): """ method can be 'Ridge', 'Lasso' cross validation is performed so as to generate prediction output for all input molecules """ n_splits = xM.shape[0] # print(xM.shape, yV.shape) clf = getattr( linear_model, method)( alpha = alpha) kf_n = model_selection.KFold( xM.shape[0], n_splits=n_splits) yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs) if graph: print('The prediction output using cross-validation is given by:') jutil.cv_show( yV, yV_pred, grid_std = grid_std) return yV_pred
def mlr_val_vseq_ridge( RM, yE, v_seq, alpha = .5, disp = True, graph = True): """ Validation is peformed using vseq indexed values. """ org_seq = list(range( len( yE))) t_seq = [x for x in org_seq if x not in v_seq] RMt, yEt = RM[ t_seq, :], yE[ t_seq, 0] RMv, yEv = RM[ v_seq, :], yE[ v_seq, 0] clf = linear_model.Ridge( alpha = alpha) clf.fit( RMt, yEt) if disp: print('Training result') mlr_show( clf, RMt, yEt, disp = disp, graph = graph) if disp: print('Validation result') r_sqr, RMSE = mlr_show( clf, RMv, yEv, disp = disp, graph = graph) #if r_sqr < 0: # print 'v_seq:', v_seq, '--> r_sqr = ', r_sqr return r_sqr, RMSE
def cv_SVR( xM, yV, svr_params, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True): """ method can be 'Ridge', 'Lasso' cross validation is performed so as to generate prediction output for all input molecules """ print(xM.shape, yV.shape) clf = svm.SVR( **svr_params) kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle) kf_n = kf5_ext_c.split( xM) yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs) if graph: print('The prediction output using cross-validation is given by:') kutil.cv_show( yV, yV_pred, grid_std = grid_std) return yV_pred
def _cv_r0( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True): """ method can be 'Ridge', 'Lasso' cross validation is performed so as to generate prediction output for all input molecules """ print(xM.shape, yV.shape) clf = getattr( linear_model, method)( alpha = alpha) kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True) kf_n = kf5_ext_c.split( xM) yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs) if graph: print('The prediction output using cross-validation is given by:') kutil.cv_show( yV, yV_pred, grid_std = grid_std) return yV_pred
def cv( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True): """ method can be 'Ridge', 'Lasso' cross validation is performed so as to generate prediction output for all input molecules """ print(xM.shape, yV.shape) clf = getattr( linear_model, method)( alpha = alpha) kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle) kf_n = kf_n_c.split( xM) yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs) if graph: print('The prediction output using cross-validation is given by:') kutil.cv_show( yV, yV_pred, grid_std = grid_std) return yV_pred
def predict( self, new_smiles, mode = {'tool': 'sklearn', 'type': 'ridge', 'alpha': 0.5}): """ predict for new smiles codes """ if mode['type'].lower() == 'ridge': clf = linear_model.Ridge( alpha = mode['alpha']) else: raise TypeError('The requested mode is not supported yet.') #Find an weight vector clf.fit( self.xM, self.yV) #Predict for new molecules new_xM = jchem.gfpM( new_smiles) new_yV_pred = clf.predict( new_xM) return new_yV_pred
def gs_Ridge(xM, yV, alphas_log=(1, -1, 9), n_folds=5, n_jobs=-1, scoring='r2'): """ Parameters ------------- scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2 """ print('If scoring is not r2 but error metric, output score is revered for scoring!') print(xM.shape, yV.shape) clf = linear_model.Ridge() #parmas = {'alpha': np.logspace(1, -1, 9)} parmas = {'alpha': np.logspace(*alphas_log)} kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True) kf_n = kf_n_c.split(xM) gs = model_selection.GridSearchCV( clf, parmas, scoring=scoring, cv=kf_n, n_jobs=n_jobs) gs.fit(xM, yV) return gs
def cv(method, xM, yV, alpha, n_folds=5, n_jobs=-1, grid_std=None, graph=True, shuffle=True): """ method can be 'Ridge', 'Lasso' cross validation is performed so as to generate prediction output for all input molecules Return -------- yV_pred """ print(xM.shape, yV.shape) clf = getattr(linear_model, method)(alpha=alpha) kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True) kf_n = kf_n_c.split(xM) yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv=kf_n, n_jobs=n_jobs) if graph: print('The prediction output using cross-validation is given by:') jutil.cv_show(yV, yV_pred, grid_std=grid_std) return yV_pred
def _cv_LOO_r0(method, xM, yV, alpha, n_jobs=-1, grid_std=None, graph=True): """ method can be 'Ridge', 'Lasso' cross validation is performed so as to generate prediction output for all input molecules """ n_folds = xM.shape[0] print(xM.shape, yV.shape) clf = getattr(linear_model, method)(alpha=alpha) # print("Note - shuffling is not applied because of LOO.") kf_n_c = model_selection.KFold(n_splits=n_folds) kf_n = kf_n_c.split(xM) yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv=kf_n, n_jobs=n_jobs) if graph: print('The prediction output using cross-validation is given by:') jutil.cv_show(yV, yV_pred, grid_std=grid_std) return yV_pred
def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True): """ method can be 'Ridge', 'Lasso' cross validation is performed so as to generate prediction output for all input molecules """ print(xM.shape, yV.shape) clf = getattr( linear_model, method)( alpha = alpha) kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs) if graph: print('The prediction output using cross-validation is given by:') jutil.cv_show( yV, yV_pred, grid_std = grid_std) return yV_pred
def gs_Ridge( xM, yV, alphas_log = (1, -1, 9), n_folds = 5, n_jobs = -1, scoring = 'r2'): """ Parameters ------------- scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2 """ print(xM.shape, yV.shape) clf = linear_model.Ridge() #parmas = {'alpha': np.logspace(1, -1, 9)} parmas = {'alpha': np.logspace( *alphas_log)} kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) gs = grid_search.GridSearchCV( clf, parmas, scoring = scoring, cv = kf_n, n_jobs = n_jobs) gs.fit( xM, yV) return gs
def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True): """ method can be 'Ridge', 'Lasso' cross validation is performed so as to generate prediction output for all input molecules """ print(xM.shape, yV.shape) clf = getattr( linear_model, method)( alpha = alpha) kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle) yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs) if graph: print('The prediction output using cross-validation is given by:') jutil.cv_show( yV, yV_pred, grid_std = grid_std) return yV_pred
def _cv_LOO_r0( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True): """ method can be 'Ridge', 'Lasso' cross validation is performed so as to generate prediction output for all input molecules """ n_folds = xM.shape[0] print(xM.shape, yV.shape) clf = getattr( linear_model, method)( alpha = alpha) kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds) yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs) if graph: print('The prediction output using cross-validation is given by:') jutil.cv_show( yV, yV_pred, grid_std = grid_std) return yV_pred
def test_cross_val_score_with_score_func_regression(): X, y = make_regression(n_samples=30, n_features=20, n_informative=5, random_state=0) reg = Ridge() # Default score of the Ridge regression estimator scores = cross_val_score(reg, X, y, cv=5) assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # R2 score (aka. determination coefficient) - should be the # same as the default estimator score r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5) assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # Mean squared error; this is a loss function, so "scores" are negative mse_scores = cross_val_score(reg, X, y, cv=5, scoring="mean_squared_error") expected_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99]) assert_array_almost_equal(mse_scores, expected_mse, 2) # Explained variance scoring = make_scorer(explained_variance_score) ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring) assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
def test_cross_val_score_with_score_func_regression(): X, y = make_regression(n_samples=30, n_features=20, n_informative=5, random_state=0) reg = Ridge() # Default score of the Ridge regression estimator scores = cval.cross_val_score(reg, X, y, cv=5) assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # R2 score (aka. determination coefficient) - should be the # same as the default estimator score r2_scores = cval.cross_val_score(reg, X, y, scoring="r2", cv=5) assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # Mean squared error; this is a loss function, so "scores" are negative mse_scores = cval.cross_val_score(reg, X, y, cv=5, scoring="mean_squared_error") expected_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99]) assert_array_almost_equal(mse_scores, expected_mse, 2) # Explained variance scoring = make_scorer(explained_variance_score) ev_scores = cval.cross_val_score(reg, X, y, cv=5, scoring=scoring) assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
def __init__(self, info, verbose=True, debug_mode=False): self.label_num=info['label_num'] self.target_num=info['target_num'] self.task = info['task'] self.metric = info['metric'] self.postprocessor = None #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba if debug_mode>=2: self.name = "RandomPredictor" self.model = RandomPredictor(self.target_num) self.predict_method = self.model.predict_proba return if info['task']=='regression': if info['is_sparse']==True: self.name = "BaggingRidgeRegressor" self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start... else: self.name = "GradientBoostingRegressor" self.model = GradientBoostingRegressor(n_estimators=1, max_depth=4, min_samples_split=14, verbose=verbose, warm_start = True) self.predict_method = self.model.predict # Always predict probabilities else: if info['has_categorical']: # Out of lazziness, we do not convert categorical variables... self.name = "RandomForestClassifier" self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start... elif info['is_sparse']: self.name = "BaggingNBClassifier" self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start... else: self.name = "GradientBoostingClassifier" self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", random_state=1, warm_start = True)") if info['task']=='multilabel.classification': self.model = MultiLabelEnsemble(self.model) self.predict_method = self.model.predict_proba
def train(self): """""" start = time.time() print('size before truncated outliers is %d ' % len(self.TrainData)) TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)] print('size after truncated outliers is %d ' % len(TrainData)) X = TrainData.drop(self._l_drop_cols, axis=1) Y = TrainData['logerror'] self._l_train_columns = X.columns X = X.values.astype(np.float32, copy=False) rr = Ridge(alpha= self._alpha, max_iter = self._iter, solver= 'svd') self._model = rr.fit(X, Y) end = time.time() print('time consumed %d ' % ((end - start))) self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__, datetime.now().strftime('%Y%m%d-%H:%M:%S')) with open(self._f_eval_train_model, 'wb') as o_file: pickle.dump(self._model, o_file, -1) o_file.close() self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]], ignore_index=True) ## ignore_index will reset the index or index will be overlaped return
def predict(self): """ Train the regression model with predictions on validation set. Save the learned weights to apply to test set predictions. """ pred_array = np.stack(self.pred_list, -1) reg = linear_model.Ridge(alpha=.5) pred = np.reshape(pred_array, [-1, len(self.pred_list)]) y = np.reshape(self.labels_val, [-1,1]) reg.fit(pred, y) self.weights = reg.coef_[0].tolist()
def ridge_regression_model(parameter_array): alpha_value = parameter_array[0] # ridge_solver = parameter_array[0] return linear_model.Ridge(alpha=alpha_value, fit_intercept=True, normalize=True, copy_X=True, max_iter=None, tol=0.001, solver='auto', random_state=None) #Returns the lasso regression model
def setClf(self): # self.clf = Ridge(alpha=0.0000001, tol=0.0000001) clf = LinearRegression() min_max_scaler = preprocessing.MinMaxScaler() self.clf = Pipeline([('scaler', min_max_scaler), ('estimator', clf)]) return
def update_sparse_predictions(Y,D,W,Psi,lda=0.0001): X = np.zeros((Psi.shape[0],W.shape[1])) for i in range(W.shape[1]): used = (W[:,i] != 0) if used.sum() > 0: d = np.copy(D) d = d[:,used] model = Ridge(alpha=lda) model.fit(d,Y[:,i]) X[:,i] = model.predict(Psi[:,used]) return X
def __init__(self, mu=.5, tau=1.0, lamda=1, use_gpu=False, threshold=1e-16, alpha=None, l1_ratio=None, fit_intercept=True, normalize=False, precompute=False, max_iter=10000, copy_X=True, tol=1e-4, warm_start=False, positive=False, random_state=None, selection='cyclic'): vs = L1L2(mu=mu, tau=tau, use_gpu=use_gpu, threshold=threshold, alpha=alpha, l1_ratio=l1_ratio, fit_intercept=fit_intercept, normalize=normalize, precompute=precompute, max_iter=max_iter, copy_X=copy_X, tol=tol, warm_start=warm_start, positive=positive, random_state=random_state, selection=selection) mdl = Ridge(alpha=lamda, fit_intercept=fit_intercept, normalize=normalize, copy_X=copy_X, max_iter=max_iter, tol=tol, random_state=random_state) super(L1L2TwoStep, self).__init__( (('l1l2', vs), ('ridge', mdl))) self.mu = mu self.tau = tau self.lamda = lamda self.alpha = alpha self.l1_ratio = l1_ratio self.use_gpu = use_gpu self.threshold = threshold self.fit_intercept = fit_intercept self.normalize = normalize self.precompute = precompute self.max_iter = max_iter self.copy_X = copy_X self.tol = tol self.warm_start = warm_start self.positive = positive self.intercept_ = 0.0 self.random_state = random_state self.selection = selection
def model_fit_and_test(TrainX,TrainY,TestX,TestY): def bulid_model(model_name): model = model_name() return model #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]: for model_name in [LinearRegression, ElasticNet]: model = bulid_model(model_name) model.fit(TrainX,TrainY) print(model_name) resid = model.predict(TestX) - TestY #print resid print("Residual sum of squares: %f"% np.mean(resid ** 2)) #print model.predict(TestX) #print TestY # Explained variance score: 1 is perfect prediction plt.scatter(model.predict(TestX), resid); plt.axhline(0, color='red') plt.xlabel('Predicted Values') plt.ylabel('Residuals') #plt.xlim([1, 50]) plt.show() print('Variance score: %.2f' % model.score(TestX, TestY)) from statsmodels.stats.stattools import jarque_bera _, pvalue, _, _ = jarque_bera(resid) print ("Test Residuals Normal", pvalue) from statsmodels import regression, stats import statsmodels.api as sms import statsmodels.stats.diagnostic as smd # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4))) xs_with_constant = sms.add_constant(TestX) _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant) print ("Test Heteroskedasticity", pvalue1) ljung_box = smd.acorr_ljungbox(resid, lags=10) #print "Lagrange Multiplier Statistics:", ljung_box[0] print "Test Autocorrelation P-values:", ljung_box[1] if any(ljung_box[1] < 0.05): print "The residuals are autocorrelated." else: print "The residuals are not autocorrelated."
def lsClassifier(trainData, trainLabel, testData, testLabel, lambdaS): reg = linear_model.Ridge(alpha=lambdaS) reg.fit(trainData, trainLabel.tolist()) W = reg.coef_ testResult = np.array(testData.dot(W)) testResult = np.where(testResult > 0, 1, -1).astype(np.int32) accu = np.sum(np.where(testResult == testLabel, 1, 0)) / float(testLabel.shape[0]) return testResult, accu
def __init__(self, isTrain): super(RegressionRidgeReg, self).__init__(isTrain) # data preprocessing #self.dataPreprocessing() # Create linear regression object self.model = linear_model.Ridge(alpha = 24420.530945486549)
def localupdate(b,A,z,u,rho,eps): ridge = Ridge(alpha=rho/2.0, fit_intercept=False, tol=eps) #print "b",b #print "z",z #print "u",u #print A * (z-u/rho) b_new = b - A * (z-u/rho) #print "bnew",b_new ret = ridge.fit(A,b_new) #print ret #print ret.coef_ return (ret.coef_ + (z-u/rho))
def get_next_by_EI(ni, alpha, lr, lr_time, X, y, ei_xi): ''' Args: ni: number of units in the each layer alpha: lambda for Ridge regression lr: fitted performance model in burning period lr_time: fitted time model in burning period X: all previous inputs x y: all previous observations corresponding to X ei_xi: parameter for EI exploitation-exploration trade-off Returns: x_next: a nested list [[0,1,0], [1,0,0,0], ...] as the next input x to run a specified pipeline ''' var = np.var(lr.predict(X) - y) m = np.dot(X.T, X) inv = np.linalg.inv(m + alpha * np.eye(sum(ni))) maxEI = float('-inf') x_next = None for i in range(np.prod(ni)): x = [[0]*n for n in ni] x_flat = [] pipeline = get_pipeline_by_flatten_index(ni, i) for layer in range(len(ni)): x[layer][pipeline[layer]] = 1 x_flat += x[layer] x_flat = np.array(x_flat) mu_x = lr.predict([x_flat]) var_x = var * (1 + np.dot(np.dot(x_flat, inv), x_flat.T)) sigma_x = np.sqrt(var_x) u = (np.min(y) - ei_xi - mu_x) / sigma_x EI = sigma_x * (u*norm.cdf(u) + norm.pdf(u)) estimated_time = lr_time.predict([x_flat])[0] EIPS = EI / estimated_time if EIPS > maxEI: maxEI = EIPS x_next = x return x_next
def main(dataset_size, test_proportion): diabetes = load_diabetes() X = diabetes.data[:dataset_size] y = diabetes.target[:dataset_size] fig, ax_list = plt.subplots(3, 1, figsize=(8, 6)) plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=Ridge, ax=ax_list[0]) plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=Lasso, ax=ax_list[1]) plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=LinearRegression, ax=ax_list[2]) plt.tight_layout() plt.show()
def get_models4ensamble(conf): models = [] #models = [RFRModel(conf), DLModel(conf), LRModel(conf)] #models = [LRModel(conf)] # see http://scikit-learn.org/stable/modules/linear_model.html #0 was too big to run with depth set to 1, and 1 was overfitting a bit if conf.command == 1: xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":3, "eta":0.1, "min_child_weight":5, "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0} else: xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8, "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0} #xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8, # "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0} models = [ #DLModel(conf), #LRModel(conf, model=linear_model.BayesianRidge()), #LRModel(conf, model=linear_model.LassoLars(alpha=.1)), #LRModel(conf, model=linear_model.Lasso(alpha = 0.1)), #LRModel(conf, model=Pipeline([('poly', PolynomialFeatures(degree=3)), #LRModel(conf, model=linear_model.Ridge (alpha = .5)) # ('linear', LinearRegression(fit_intercept=False))])), XGBoostModel(conf, xgb_params, use_cv=True), LRModel(conf, model=linear_model.Lasso(alpha = 0.3)), RFRModel(conf, RandomForestRegressor(oob_score=True, n_jobs=4)), #LRModel(conf, model=linear_model.Lasso(alpha = 0.2)), ETRModel(conf, model=ExtraTreesRegressor(n_jobs=4)), #AdaBoostRModel(conf, model=AdaBoostRegressor(loss='square')) ] return models #return [XGBoostModel(conf, xgb_params, use_cv=True)]
def fc_kernel(X, Y, copy_X=True, W=None, B=None, ret_reg=False,fit_intercept=True): """ return: n c """ assert copy_X == True assert len(X.shape) == 2 if dcfgs.ls == cfgs.solvers.gd: w = Worker() def wo(): from .GDsolver import fc_GD a,b=fc_GD(X,Y, W, B, n_iters=1) return {'a':a, 'b':b} outputs = w.do(wo) return outputs['a'], outputs['b'] elif dcfgs.ls == cfgs.solvers.tls: return tls(X,Y, debug=True) elif dcfgs.ls == cfgs.solvers.keras: _reg=keras_kernel() _reg.fit(X, Y, W, B) return _reg.coef_, _reg.intercept_ elif dcfgs.ls == cfgs.solvers.lightning: #_reg = SGDRegressor(eta0=1e-8, intercept_decay=0, alpha=0, verbose=2) _reg = CDRegressor(n_jobs=-1,alpha=0, verbose=2) if 0: _reg.intercept_=B _reg.coef_=W elif dcfgs.fc_ridge > 0: _reg = Ridge(alpha=dcfgs.fc_ridge) else: _reg = LinearRegression(n_jobs=-1 , copy_X=copy_X, fit_intercept=fit_intercept) _reg.fit(X, Y) if ret_reg: return _reg return _reg.coef_, _reg.intercept_
def ridge_train(X,y): alphas = [0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 75] cv_ridge = [rmse_cv(Ridge(alpha = alpha),X,y).mean() for alpha in alphas] cv_ridge = pd.Series(cv_ridge, index = alphas) cv_ridge.plot(title = "Validation - Just Do It") print ('min cv is : ',cv_ridge.min()) return alphas[cv_ridge.values.argmin()] #%% # ridge regression doesn't remove any property
def spot_check(X, y): if type == 'regression': models = [ (LinearRegression(), 'Ordinary Least Squares'), (Ridge(alpha=0.1), 'Ridge (alpha 0.1)'), (Ridge(), 'Ridge (alpha 1.0)'), (Lasso(alpha=0.1), 'Lasso (alpha 0.1)'), (Lasso(), 'Lasso (alpha 1.0)'), (ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'), (ElasticNet(), 'ElasticNet (alpha 1.0)'), (DecisionTreeRegressor(), 'Decision Tree'), (KNeighborsRegressor(), 'K-Nearest Neighbors'), # (RandomForestRegressor(), 'Random Forest Regressor'), # (BaggingRegressor(), 'Bagging Regressor'), # (GradientBoostingRegressor(), 'Gradient Bosted Regression'), # (SVR(), 'Support Vector Regression') ] splits = 5 scores = [] for model, model_name in models: score = check_model(model, splits, X, y) # get average score scores.append(score) model_names = map(lambda x: x[1], models) for name, score in zip(model_names, scores): print('%s: %f' % (name, score))
def get_classifier(self, X, Y): """ ??????? :param X: ???? :param Y: ?????? :return: ?? """ clf = Ridge() clf.fit(X, Y) return clf
def ridge_regression(data, a): features = data.columns.tolist() features.remove('label') response = ['label'] # ????Ridge Regression model lr = Ridge(alpha=a) # ?????: label(????DataFrame) y = data[response] # ??features (????DataFrame) X = data[features] # _leave_one_out(lr, X.values, y.values) # fit regression model to the data model = lr.fit(X, y) # ?????model????? predicted_y = model.predict(X) # predicted_y?????numpy array # ???y?DataFrame?????numpy array??????? y = np.array(y) # ????? _print_y_and_predicted_y_and_corr(y, predicted_y) _print_r2_score(y, predicted_y) _print_coefficients(model, features, '~/Desktop/??_???_lt30.csv') _print_MSE(y, predicted_y) plot_true_and_pred_scatter(y, predicted_y) # std_error(y, predicted_y)
def _load_model(self, model_id): _, conn = get_engine() #todo models = { 'QXgb': QXgb, 'QXgb2': QXgb2, 'Ridge': Ridge, 'RidgeClassifier': RidgeClassifier, 'KNeighborsClassifier': KNeighborsClassifier, 'QAvg': QAvg, 'QRankedAvg': QRankedAvg, 'QRankedByLineAvg': QRankedByLineAvg, 'QStackModel': QStackModel, 'LogisticRegression': LogisticRegression, 'DecisionTreeClassifier': DecisionTreeClassifier, 'QPostProcessingModel': QPostProcessingModel, 'RandomForestClassifier': RandomForestClassifier, 'ExtraTreesClassifier': ExtraTreesClassifier, 'QAvgOneModelData': QAvgOneModelData, 'QNN1': QNN1, 'QNN2': QNN2, } res = conn.execute( """ select cls, params, descr, predict_fn from qml_models where model_id='{}' """.format(model_id) ).fetchone() if not res: raise Exception('Missing {} model'.format(model_id)) model = models[res['cls']](**json.loads(res['params'])) self.add(model_id, model, res['descr'], res['predict_fn']) return model