我们从Python开源项目中,提取了以下26个代码示例,用于说明如何使用sklearn.ensemble.AdaBoostRegressor()。
def cross_validate_best_known(): ''' import and clean the tractor data, then do a corss validation on each of the three models we are training here. A RandomForest, a GradientBoost, and an AdaBoost backed by a DecisionTree. Print the scores. The parameters we're using here are the "best" that we've found so far using a grid search. ''' tractor_data = pd.read_csv('data/train.csv') tractor_data = cln.clean_all(tractor_data) X = tractor_data y = tractor_data.pop('SalePrice') rf = RandomForestRegressor(max_features=2, min_samples_split=4, n_estimators=50, min_samples_leaf=2) gb = GradientBoostingRegressor(loss='quantile', learning_rate=0.0001, n_estimators=50, max_features='log2', min_samples_split=2, max_depth=1) ada_tree_backing = DecisionTreeRegressor(max_features='sqrt', splitter='random', min_samples_split=4, max_depth=3) ab = AdaBoostRegressor(ada_tree_backing, learning_rate=0.1, loss='square', n_estimators=1000) validate.cross_v_scores([rf, gb, ab], X, y) # RandomForestRegressor -- RMLSE: -0.596797712098, R2: 0.0272065373946 # GradientBoostingRegressor -- RMLSE: -0.996134592541, R2: -2.37202164829 # AdaBoostRegressor -- RMLSE: -0.706385708459, R2: -0.103966980393
def model_cross_valid(X,Y): seed = 7 kfold = model_selection.KFold(n_splits=10, random_state=seed) def bulid_model(model_name): model = model_name() return model scoring = 'neg_mean_squared_error' # + random fest boost lstm gbdt for model_name in [LinearRegression,ElasticNet]: #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]: model = bulid_model(model_name) results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring) print(model_name,results.mean())
def test_AdaBoostRegressor(*data): ''' test the regression with different number of regression model :param data: train_data, test_data, train_value, test_value :return: None ''' X_train,X_test,y_train,y_test=data regr=ensemble.AdaBoostRegressor() regr.fit(X_train,y_train) ## graph fig=plt.figure() ax=fig.add_subplot(1,1,1) estimators_num=len(regr.estimators_) X=range(1,estimators_num+1) ax.plot(list(X),list(regr.staged_score(X_train,y_train)),label="Traing score") ax.plot(list(X),list(regr.staged_score(X_test,y_test)),label="Testing score") ax.set_xlabel("estimator num") ax.set_ylabel("score") ax.legend(loc="best") ax.set_title("AdaBoostRegressor") plt.show()
def test_AdaBoostRegressor_learning_rate(*data): ''' test the performance with different learning rate :param data: train_data, test_data, train_value, test_value :return: None ''' X_train,X_test,y_train,y_test=data learning_rates=np.linspace(0.01,1) fig=plt.figure() ax=fig.add_subplot(1,1,1) traing_scores=[] testing_scores=[] for learning_rate in learning_rates: regr=ensemble.AdaBoostRegressor(learning_rate=learning_rate,n_estimators=500) regr.fit(X_train,y_train) traing_scores.append(regr.score(X_train,y_train)) testing_scores.append(regr.score(X_test,y_test)) ax.plot(learning_rates,traing_scores,label="Traing score") ax.plot(learning_rates,testing_scores,label="Testing score") ax.set_xlabel("learning rate") ax.set_ylabel("score") ax.legend(loc="best") ax.set_title("AdaBoostRegressor") plt.show()
def test_gridsearch(): # Check that base trees can be grid-searched. # AdaBoost classification boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier()) parameters = {'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2), 'algorithm': ('SAMME', 'SAMME.R')} clf = GridSearchCV(boost, parameters) clf.fit(iris.data, iris.target) # AdaBoost regression boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(), random_state=0) parameters = {'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2)} clf = GridSearchCV(boost, parameters) clf.fit(boston.data, boston.target)
def test_sample_weight_adaboost_regressor(): """ AdaBoostRegressor should work without sample_weights in the base estimator The random weighted sampling is done internally in the _boost method in AdaBoostRegressor. """ class DummyEstimator(BaseEstimator): def fit(self, X, y): pass def predict(self, X): return np.zeros(X.shape[0]) boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3) boost.fit(X, y_regr) assert_equal(len(boost.estimator_weights_), len(boost.estimator_errors_))
def setClf(self): # min_samples_split = 3 self.clf = AdaBoostRegressor() return
def model_fit_and_test(TrainX,TrainY,TestX,TestY): def bulid_model(model_name): model = model_name() return model #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]: for model_name in [LinearRegression, ElasticNet]: model = bulid_model(model_name) model.fit(TrainX,TrainY) print(model_name) resid = model.predict(TestX) - TestY #print resid print("Residual sum of squares: %f"% np.mean(resid ** 2)) #print model.predict(TestX) #print TestY # Explained variance score: 1 is perfect prediction plt.scatter(model.predict(TestX), resid); plt.axhline(0, color='red') plt.xlabel('Predicted Values') plt.ylabel('Residuals') #plt.xlim([1, 50]) plt.show() print('Variance score: %.2f' % model.score(TestX, TestY)) from statsmodels.stats.stattools import jarque_bera _, pvalue, _, _ = jarque_bera(resid) print ("Test Residuals Normal", pvalue) from statsmodels import regression, stats import statsmodels.api as sms import statsmodels.stats.diagnostic as smd # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4))) xs_with_constant = sms.add_constant(TestX) _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant) print ("Test Heteroskedasticity", pvalue1) ljung_box = smd.acorr_ljungbox(resid, lags=10) #print "Lagrange Multiplier Statistics:", ljung_box[0] print "Test Autocorrelation P-values:", ljung_box[1] if any(ljung_box[1] < 0.05): print "The residuals are autocorrelated." else: print "The residuals are not autocorrelated."
def __init__(self, isTrain): super(RegressionAdaBoost, self).__init__(isTrain) # data preprocessing #self.dataPreprocessing() # Create AdaBoost regression object decisionReg = DecisionTreeRegressor(max_depth=10) rng = np.random.RandomState(1) self.adaReg = AdaBoostRegressor(decisionReg, n_estimators=400, random_state=rng)
def ada_boost_tree_grid_search(): ada_boost_tree_grid = { 'base_estimator__max_features': ['sqrt'], 'base_estimator__splitter': ['best', 'random'], 'base_estimator__min_samples_split': [2, 4], 'base_estimator__max_depth': [1, 3], 'n_estimators': [50, 100, 1000], 'learning_rate': [.001, .01, .1], 'loss': ['linear', 'square', 'exponential'] } abr = AdaBoostRegressor(DecisionTreeRegressor()) return ada_boost_tree_grid, abr
def adbPredictor(df): dataTrainX, dataTrainY, dataTestX, dataTestY = sample(df) # clf = linear_model.SGDRegressor() clf = ensemble.AdaBoostRegressor() clf.fit(dataTrainX, dataTrainY) predicted = clf.predict(dataTestX) fig, ax = plotter.subplots() ax.set_ylabel('Predicted KNN Weekly') ax.scatter(dataTestY, predicted) ax.set_xlabel('Measured') predicted = np.reshape(predicted, (predicted.size, 1)) corrCoeff = pearsonr(dataTestY,predicted) print(corrCoeff[0]) plotter.show() return predicted
def __init__(self, conf, model=None): self.conf = conf self.name = "AdaBoostR" if model is None: self.model = AdaBoostRegressor(loss='square') else: self.model = model
def get_models4ensamble(conf): models = [] #models = [RFRModel(conf), DLModel(conf), LRModel(conf)] #models = [LRModel(conf)] # see http://scikit-learn.org/stable/modules/linear_model.html #0 was too big to run with depth set to 1, and 1 was overfitting a bit if conf.command == 1: xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":3, "eta":0.1, "min_child_weight":5, "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0} else: xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8, "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0} #xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8, # "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0} models = [ #DLModel(conf), #LRModel(conf, model=linear_model.BayesianRidge()), #LRModel(conf, model=linear_model.LassoLars(alpha=.1)), #LRModel(conf, model=linear_model.Lasso(alpha = 0.1)), #LRModel(conf, model=Pipeline([('poly', PolynomialFeatures(degree=3)), #LRModel(conf, model=linear_model.Ridge (alpha = .5)) # ('linear', LinearRegression(fit_intercept=False))])), XGBoostModel(conf, xgb_params, use_cv=True), LRModel(conf, model=linear_model.Lasso(alpha = 0.3)), RFRModel(conf, RandomForestRegressor(oob_score=True, n_jobs=4)), #LRModel(conf, model=linear_model.Lasso(alpha = 0.2)), ETRModel(conf, model=ExtraTreesRegressor(n_jobs=4)), #AdaBoostRModel(conf, model=AdaBoostRegressor(loss='square')) ] return models #return [XGBoostModel(conf, xgb_params, use_cv=True)]
def abr(X,y): X_train,X_validation,y_train,y_validation = train_test_split(X,y,random_state=0) abr_boost = AdaBoostRegressor(random_state=1) abr_boost.fit(X_train,y_train.ravel()) print 'training error:',1.0 - abr_boost.score(X_train,y_train) print 'validation error:',1.0 - abr_boost.score(X_validation,y_validation) time_fit(abr_boost,X_train,y_train.ravel())
def get_classifier(self, X, Y): """ ???????? :param X: ???? :param Y: ?????? :return: ?? """ # rng = np.random.RandomState(1) clf = AdaBoostRegressor(DecisionTreeRegressor()) clf.fit(X, Y) return clf
def test_AdaBoostRegressor_base_regr(*data): ''' test the regression with different number of model and regression method :param data: train_data, test_data, train_value, test_value :return: None ''' from sklearn.svm import LinearSVR X_train,X_test,y_train,y_test=data fig=plt.figure() regrs=[ensemble.AdaBoostRegressor(), ensemble.AdaBoostRegressor(base_estimator=LinearSVR(epsilon=0.01,C=100))] labels=["Decision Tree Regressor","Linear SVM Regressor"] for i ,regr in enumerate(regrs): ax=fig.add_subplot(2,1,i+1) regr.fit(X_train,y_train) ## graph estimators_num=len(regr.estimators_) X=range(1,estimators_num+1) ax.plot(list(X),list(regr.staged_score(X_train,y_train)),label="Traing score") ax.plot(list(X),list(regr.staged_score(X_test,y_test)),label="Testing score") ax.set_xlabel("estimator num") ax.set_ylabel("score") ax.legend(loc="lower right") ax.set_ylim(-1,1) ax.set_title("Base_Estimator:%s"%labels[i]) plt.suptitle("AdaBoostRegressor") plt.show()
def test_AdaBoostRegressor_loss(*data): ''' test the method with different loss function :param data: train_data, test_data, train_value, test_value :return: None ''' X_train,X_test,y_train,y_test=data losses=['linear','square','exponential'] fig=plt.figure() ax=fig.add_subplot(1,1,1) for i ,loss in enumerate(losses): regr=ensemble.AdaBoostRegressor(loss=loss,n_estimators=30) regr.fit(X_train,y_train) ## graph estimators_num=len(regr.estimators_) X=range(1,estimators_num+1) ax.plot(list(X),list(regr.staged_score(X_train,y_train)), label="Traing score:loss=%s"%loss) ax.plot(list(X),list(regr.staged_score(X_test,y_test)), label="Testing score:loss=%s"%loss) ax.set_xlabel("estimator num") ax.set_ylabel("score") ax.legend(loc="lower right") ax.set_ylim(-1,1) plt.suptitle("AdaBoostRegressor") plt.show()
def test_regression_toy(): # Check classification on a toy dataset. clf = AdaBoostRegressor(random_state=0) clf.fit(X, y_regr) assert_array_equal(clf.predict(T), y_t_regr)
def test_boston(): # Check consistency on dataset boston house prices. clf = AdaBoostRegressor(random_state=0) clf.fit(boston.data, boston.target) score = clf.score(boston.data, boston.target) assert score > 0.85
def test_pickle(): # Check pickability. import pickle # Adaboost classifier for alg in ['SAMME', 'SAMME.R']: obj = AdaBoostClassifier(algorithm=alg) obj.fit(iris.data, iris.target) score = obj.score(iris.data, iris.target) s = pickle.dumps(obj) obj2 = pickle.loads(s) assert_equal(type(obj2), obj.__class__) score2 = obj2.score(iris.data, iris.target) assert_equal(score, score2) # Adaboost regressor obj = AdaBoostRegressor(random_state=0) obj.fit(boston.data, boston.target) score = obj.score(boston.data, boston.target) s = pickle.dumps(obj) obj2 = pickle.loads(s) assert_equal(type(obj2), obj.__class__) score2 = obj2.score(boston.data, boston.target) assert_equal(score, score2)
def test_sample_weight_missing(): from sklearn.linear_model import LogisticRegression from sklearn.cluster import KMeans clf = AdaBoostClassifier(KMeans(), algorithm="SAMME") assert_raises(ValueError, clf.fit, X, y_regr) clf = AdaBoostRegressor(KMeans()) assert_raises(ValueError, clf.fit, X, y_regr)
def get_classifier(self, X, Y): """ ???????? :param X: ???? :param Y: ?????? :return: ?? """ # rng = np.random.RandomState(1) clf = AdaBoostRegressor(DecisionTreeRegressor(criterion='mse')) clf.fit(X, Y) return clf
def parameterChoosing(self): dts = [] dts.append(DecisionTreeRegressor(max_depth=5, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=7, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=9, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=11, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=12, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=14, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=15, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=17, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=19, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=21, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=22, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=24, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=26, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=27, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=31, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=33, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=35, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=37, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=39, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=41, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=43, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=45, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=47, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=49, max_features='auto')) dts.append(DecisionTreeRegressor(max_depth=50, max_features='auto')) tuned_parameters = [{'base_estimator': dts, 'n_estimators': range(5,700), 'learning_rate': [1, 2, 3] } ] reg = GridSearchCV(AdaBoostRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error') reg.fit(self.X_train, self.y_train.ravel()) print "Best parameters set found on development set:\n" print reg.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in reg.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "MSE for test data set:\n" y_true, y_pred = self.y_test, reg.predict(self.X_test) print mean_squared_error(y_true, y_pred)
def __init__(self, isTrain): super(RegressionUniformBlending, self).__init__(isTrain) # data preprocessing #self.dataPreprocessing() self.net1 = NeuralNet( layers=[ # three layers: one hidden layer ('input', layers.InputLayer), ('hidden', layers.DenseLayer), #('hidden2', layers.DenseLayer), #('hidden3', layers.DenseLayer), ('output', layers.DenseLayer), ], # layer parameters: input_shape=(None, 13), # input dimension is 13 hidden_num_units=6, # number of units in hidden layer #hidden2_num_units=8, # number of units in hidden layer #hidden3_num_units=4, # number of units in hidden layer output_nonlinearity=None, # output layer uses sigmoid function output_num_units=1, # output dimension is 1 # obejctive function objective_loss_function = lasagne.objectives.squared_error, # optimization method: update=lasagne.updates.nesterov_momentum, update_learning_rate=0.002, update_momentum=0.4, # use 25% as validation train_split=TrainSplit(eval_size=0.2), regression=True, # flag to indicate we're dealing with regression problem max_epochs=100, # we want to train this many epochs verbose=0, ) # Create linear regression object self.linRegr = linear_model.LinearRegression() # Create KNN regression object self.knn = neighbors.KNeighborsRegressor(86, weights='distance') # Create Decision Tree regression object self.decisionTree = DecisionTreeRegressor(max_depth=7, max_features=None) # Create AdaBoost regression object decisionReg = DecisionTreeRegressor(max_depth=10) rng = np.random.RandomState(1) self.adaReg = AdaBoostRegressor(decisionReg, n_estimators=400, random_state=rng) # Create linear regression object self.model = RandomForestRegressor(max_features='sqrt', n_estimators=32, max_depth=39)
def test_staged_predict(): # Check staged predictions. rng = np.random.RandomState(0) iris_weights = rng.randint(10, size=iris.target.shape) boston_weights = rng.randint(10, size=boston.target.shape) # AdaBoost classification for alg in ['SAMME', 'SAMME.R']: clf = AdaBoostClassifier(algorithm=alg, n_estimators=10) clf.fit(iris.data, iris.target, sample_weight=iris_weights) predictions = clf.predict(iris.data) staged_predictions = [p for p in clf.staged_predict(iris.data)] proba = clf.predict_proba(iris.data) staged_probas = [p for p in clf.staged_predict_proba(iris.data)] score = clf.score(iris.data, iris.target, sample_weight=iris_weights) staged_scores = [ s for s in clf.staged_score( iris.data, iris.target, sample_weight=iris_weights)] assert_equal(len(staged_predictions), 10) assert_array_almost_equal(predictions, staged_predictions[-1]) assert_equal(len(staged_probas), 10) assert_array_almost_equal(proba, staged_probas[-1]) assert_equal(len(staged_scores), 10) assert_array_almost_equal(score, staged_scores[-1]) # AdaBoost regression clf = AdaBoostRegressor(n_estimators=10, random_state=0) clf.fit(boston.data, boston.target, sample_weight=boston_weights) predictions = clf.predict(boston.data) staged_predictions = [p for p in clf.staged_predict(boston.data)] score = clf.score(boston.data, boston.target, sample_weight=boston_weights) staged_scores = [ s for s in clf.staged_score( boston.data, boston.target, sample_weight=boston_weights)] assert_equal(len(staged_predictions), 10) assert_array_almost_equal(predictions, staged_predictions[-1]) assert_equal(len(staged_scores), 10) assert_array_almost_equal(score, staged_scores[-1])
def test_sparse_regression(): # Check regression with sparse input. class CustomSVR(SVR): """SVR variant that records the nature of the training set.""" def fit(self, X, y, sample_weight=None): """Modification on fit caries data type for later verification.""" super(CustomSVR, self).fit(X, y, sample_weight=sample_weight) self.data_type_ = type(X) return self X, y = datasets.make_regression(n_samples=15, n_features=50, n_targets=1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) for sparse_format in [csc_matrix, csr_matrix, lil_matrix, coo_matrix, dok_matrix]: X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) # Trained on sparse format sparse_classifier = AdaBoostRegressor( base_estimator=CustomSVR(), random_state=1 ).fit(X_train_sparse, y_train) # Trained on dense format dense_classifier = dense_results = AdaBoostRegressor( base_estimator=CustomSVR(), random_state=1 ).fit(X_train, y_train) # predict sparse_results = sparse_classifier.predict(X_test_sparse) dense_results = dense_classifier.predict(X_test) assert_array_equal(sparse_results, dense_results) # staged_predict sparse_results = sparse_classifier.staged_predict(X_test_sparse) dense_results = dense_classifier.staged_predict(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) types = [i.data_type_ for i in sparse_classifier.estimators_] assert all([(t == csc_matrix or t == csr_matrix) for t in types])