我们从Python开源项目中,提取了以下22个代码示例,用于说明如何使用sklearn.ensemble.ExtraTreesRegressor()。
def model_extra_trees_regression(Xtrain,Xtest,ytrain): X_train = Xtrain y_train = ytrain etr = ExtraTreesRegressor(n_jobs=1, random_state=0) param_grid = {}#'n_estimators': [500], 'max_features': [10,15,20]} model = GridSearchCV(estimator=etr, param_grid=param_grid, n_jobs=1, cv=10, scoring=RMSE) model.fit(X_train, y_train) print('Extra trees regression...') print('Best Params:') print(model.best_params_) print('Best CV Score:') print(-model.best_score_) y_pred = model.predict(Xtest) return y_pred, -model.best_score_ # read data, build model and do prediction # read train data
def fit(self, X, Y): from sklearn.ensemble import ExtraTreesRegressor from sklearn.feature_selection import SelectFromModel num_features = X.shape[1] max_features = int( float(self.max_features) * (np.log(num_features) + 1)) # Use at most half of the features max_features = max(1, min(int(X.shape[1] / 2), max_features)) preprocessor = ExtraTreesRegressor( n_estimators=self.n_estimators, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state) preprocessor.fit(X, Y) self.preprocessor = SelectFromModel(preprocessor, prefit=True) return self
def fit(self, X, y): """ Fit a Random Forest model to data `X` and targets `y`. Parameters ---------- X : array-like Input values. y: array-like Target values. """ self.X = X self.y = y self.n = self.X.shape[0] self.model = ExtraTreesRegressor(**self.params) self.model.fit(X, y)
def exrf(train_sample, validation_sample, features, seed): log_base = np.e exrf_est = ExtraTreesRegressor(n_estimators=1000, criterion='mse', max_features='auto', max_depth=None, bootstrap=True, min_samples_split=4, min_samples_leaf=1, min_weight_fraction_leaf=0, max_leaf_nodes=None, random_state=seed ).fit( train_sample[features], np.log1p(train_sample['volume']) / np.log(log_base)) exrf_prob = np.power(log_base, exrf_est.predict(validation_sample[features])) - 1 print_mape(validation_sample['volume'], exrf_prob, 'EXTRA-RF') return exrf_prob
def try_params( n_iterations, params ): n_estimators = int( round( n_iterations * trees_per_iteration )) print "n_estimators:", n_estimators pprint( params ) clf = XT( n_estimators = n_estimators, verbose = 0, n_jobs = -1, **params ) return train_and_eval_sklearn_regressor( clf, data )
def train(self): """""" print('size before truncated outliers is %d ' % len(self.TrainData)) self.TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)] print('size after truncated outliers is %d ' % len(self.TrainData)) X = self.TrainData.drop(self._l_drop_cols, axis=1) Y = self.TrainData['logerror'] self._l_train_columns = X.columns FeatCols = list(self._l_train_columns) etr = ExtraTreesRegressor( n_estimators= self._iter, criterion= 'mse', max_features= int(math.sqrt(len(FeatCols))), max_depth = self._depth, n_jobs= 2, random_state= 2017, verbose= True ) self._model = etr.fit(X, Y) ## evaluate on valid data self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__, datetime.now().strftime('%Y%m%d-%H:%M:%S')) with open(self._f_eval_train_model, 'wb') as o_file: pickle.dump(self._model, o_file, -1) o_file.close() self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]], ignore_index=True) ## ignore_index will reset the index or index will be overlaped return
def iterative_fit(self, X, y, n_iter=1, refit=False): from sklearn.ensemble import ExtraTreesRegressor as ETR if refit: self.estimator = None if self.estimator is None: num_features = X.shape[1] max_features = int( float(self.max_features) * (np.log(num_features) + 1)) # Use at most half of the features max_features = max(1, min(int(X.shape[1] / 2), max_features)) self.estimator = ETR( n_estimators=0, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, warm_start=True ) tmp = self.estimator # TODO copy ? tmp.n_estimators += n_iter tmp.fit(X, y,) self.estimator = tmp return self
def __init__(self, **params): """ Wrapper around sklearn's ExtraTreesRegressor implementation for pyGPGO. Random Forests can also be used for surrogate models in Bayesian Optimization. An estimate of 'posterior' variance can be obtained by using the `impurity` criterion value in each subtree. Parameters ---------- params: tuple, optional Any parameters to pass to `RandomForestRegressor`. Defaults to sklearn's. """ self.params = params
def fit(self, X, y): self.clf = ExtraTreesRegressor() #y = np.log(y) self.clf.fit(X, y)
def __init__(self, conf, model=None): self.conf = conf self.name = "ETR" if model is None: self.model = ExtraTreesRegressor(n_jobs=4) else: self.model = model
def get_models4ensamble(conf): models = [] #models = [RFRModel(conf), DLModel(conf), LRModel(conf)] #models = [LRModel(conf)] # see http://scikit-learn.org/stable/modules/linear_model.html #0 was too big to run with depth set to 1, and 1 was overfitting a bit if conf.command == 1: xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":3, "eta":0.1, "min_child_weight":5, "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0} else: xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8, "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0} #xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8, # "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0} models = [ #DLModel(conf), #LRModel(conf, model=linear_model.BayesianRidge()), #LRModel(conf, model=linear_model.LassoLars(alpha=.1)), #LRModel(conf, model=linear_model.Lasso(alpha = 0.1)), #LRModel(conf, model=Pipeline([('poly', PolynomialFeatures(degree=3)), #LRModel(conf, model=linear_model.Ridge (alpha = .5)) # ('linear', LinearRegression(fit_intercept=False))])), XGBoostModel(conf, xgb_params, use_cv=True), LRModel(conf, model=linear_model.Lasso(alpha = 0.3)), RFRModel(conf, RandomForestRegressor(oob_score=True, n_jobs=4)), #LRModel(conf, model=linear_model.Lasso(alpha = 0.2)), ETRModel(conf, model=ExtraTreesRegressor(n_jobs=4)), #AdaBoostRModel(conf, model=AdaBoostRegressor(loss='square')) ] return models #return [XGBoostModel(conf, xgb_params, use_cv=True)]
def __init__(self, n_action, gamma=0.99): self.Q = map(lambda x: ExtraTreesRegressor(n_estimators=50),[None]*n_action) self.n_action = n_action self.gamma = gamma self.first_time = True
def featureImp(dataset1): import numpy as np from sklearn import datasets from sklearn import metrics from sklearn.ensemble import ExtraTreesRegressor import collections #f = open('F:\kaggle\Final Project\\Book.txt') # f.readline() # skip the header #dataset = np.loadtxt(fname=f, delimiter=',') # dataset = datasets.load_iris() # fit an Extra Trees model to the data # print(dataset) mapElement = {} X = dataset1[:, 1:406] Y = dataset1[:, 0] num_trees = 10 max_feature = 7 model = ExtraTreesRegressor(n_estimators=num_trees, max_features=max_feature) model.fit(X, Y) z = model.feature_importances_ #print("first", z.item(0)) for i in range(len(z)): mapElement[z.item(i)] = (i + 1) # od = collections.OrderedDict(sorted(mapElement.items())) p = sorted(mapElement) #print(p) result = [] for i in range(len(p)): result.append(mapElement.get(p[(len(p) - 1) - i])) return (result) #print(result) # print(type(od)) #print(mapElement) # print(od) # model.fit(dataset.data, dataset.target) # display the relative importance of each attribute #print(model.feature_importances_)
def train(self): print "start ert" self.model = ExtraTreesRegressor(n_jobs=self.prms["n_jobs"], verbose=1, random_state=self.prms["random_state"], n_estimators=int(self.prms["n_estimators"]), max_features=self.prms["max_features"]) self.model.fit(self.data_tr.values, self.labels_tr)
def test(x_file, y_file, train_list, test_list, best_params): X1 = np.loadtxt(x_file, delimiter=",") Y1 = np.loadtxt(y_file, delimiter=",") train_X, train_Y, test_X, _ = split_train_val(X1, Y1, train_list, test_list) # print train_X.shape,test_X.shape EXT1 = ExtraTreesRegressor(n_jobs=-1, random_state=1, **best_params) EXT1.fit(train_X, train_Y) test_Y1 = EXT1.predict(test_X) # print EXT1.feature_importances_ return test_Y1
def predict(x_file, y_file, test_x_file, best_params): X1 = np.loadtxt(x_file, delimiter=",") Y1 = np.loadtxt(y_file, delimiter=",") test_X1 = np.loadtxt(test_x_file, delimiter=",") EXT1 = ExtraTreesRegressor(n_jobs=-1, random_state=1, **best_params) EXT1.fit(X1, Y1) test_Y1 = EXT1.predict(test_X1) # * NOR.scale_ + NOR.mean_ # print test_Y1 # print EXT1.feature_importances_ return test_Y1
def get_model_list(): model_list, name_list = [], [] # model_list.append(linear_model.LinearRegression()) # name_list.append('LR') # model_list.append(gaussian_process.GaussianProcessRegressor(alpha=1e-10)) # name_list.append('GaussianProcess') # model_list.append(KNeighborsRegressor(weights = 'uniform',n_neighbors=28)) # name_list.append('KNN_unif') # # model_list.append(KNeighborsRegressor(weights = 'distance',n_neighbors=28)) # name_list.append('KNN_dist') # # model_list.append(SVR(kernel = 'poly', C = 1, gamma = 'auto', coef0 = 0, degree = 2)) # name_list.append('SVR_poly') # # model_list.append(SVR(kernel = 'rbf', C = 0.3, gamma = 'auto')) name_list.append('SVR_rbf') # # # model_list.append(DecisionTreeRegressor()) # name_list.append('DT') # # model_list.append(RandomForestRegressor(n_estimators=150, max_depth=None,min_samples_split=2, random_state=0)) # name_list.append('RF') # # model_list.append(ExtraTreesRegressor(n_estimators=150, max_depth=None, max_features='auto', min_samples_split=2, random_state=0)) # name_list.append('ET') return model_list,name_list #MAPE
def get_model_list(): model_list, name_list = [], [] # model_list.append(linear_model.LinearRegression()) # name_list.append('LR') # model_list.append(gaussian_process.GaussianProcessRegressor(alpha=1e-10)) # name_list.append('GaussianProcess') # model_list.append(KNeighborsRegressor(weights = 'uniform',n_neighbors=28)) # name_list.append('KNN_unif') # # model_list.append(KNeighborsRegressor(weights = 'distance',n_neighbors=28)) # name_list.append('KNN_dist') # # model_list.append(SVR(kernel = 'poly', C = 1, gamma = 'auto', coef0 = 0, degree = 2)) # name_list.append('SVR_poly') # # model_list.append(SVR(kernel = 'rbf', C = 0.3, gamma = 'auto')) name_list.append('SVR_rbf') # # # model_list.append(DecisionTreeRegressor()) # name_list.append('DT') # # model_list.append(RandomForestRegressor(n_estimators=150, max_depth=None,min_samples_split=2, random_state=0)) # name_list.append('RF') # # model_list.append(ExtraTreesRegressor(n_estimators=150, max_depth=None, max_features='auto', min_samples_split=2, random_state=0)) # name_list.append('ET') return model_list,name_list #????
def models(): extra_params_kaggle_cla = {'n_estimators':1200,'max_features':30,'criterion':'entropy', 'min_samples_leaf': 2, 'min_samples_split': 2,'max_depth': 30, 'min_samples_leaf': 2, 'n_jobs':nthread, 'random_state':seed} extra_params_kaggle_reg = {'n_estimators':1200,'max_features':30,'criterion':'mse', 'min_samples_leaf': 2, 'min_samples_split': 2,'max_depth': 30, 'min_samples_leaf': 2, 'n_jobs':nthread, 'random_state':seed} xgb_reg = {'objective':'reg:linear', 'max_depth': 11, 'learning_rate':0.01, 'subsample':.9, 'n_estimators':10000, 'colsample_bytree':0.45, 'nthread':nthread, 'seed':seed} xgb_cla = {'objective':'binary:logistic', 'max_depth': 11, 'learning_rate':0.01, 'subsample':.9, 'n_estimators':10000, 'colsample_bytree':0.45, 'nthread':nthread, 'seed':seed} #NN params nb_epoch = 3 batch_size = 128 esr = 402 param1 = { 'hidden_units': (256, 256), 'activation': (advanced_activations.PReLU(),advanced_activations.PReLU(),core.activations.sigmoid), 'dropout': (0., 0.), 'optimizer': RMSprop(), 'nb_epoch': nb_epoch, } param2 = { 'hidden_units': (1024, 1024), 'activation': (advanced_activations.PReLU(),advanced_activations.PReLU(),core.activations.sigmoid), 'dropout': (0., 0.), 'optimizer': RMSprop(), 'nb_epoch': nb_epoch, } clfs = [ (D2, XGBClassifier(**xgb_cla)), (D11, XGBClassifier(**xgb_cla)), (D2, XGBRegressor(**xgb_reg)), (D11, XGBRegressor(**xgb_reg)), (D2, ensemble.ExtraTreesClassifier(**extra_params_kaggle_cla)), (D11, ensemble.ExtraTreesClassifier(**extra_params_kaggle_cla)), (D2, ensemble.ExtraTreesRegressor(**extra_params_kaggle_reg)), (D11, ensemble.ExtraTreesRegressor(**extra_params_kaggle_reg)), # (D1, NN(input_dim=D1[0].shape[1], output_dim=1, batch_size=batch_size, early_stopping_epoch=esr, verbose=2, loss='binary_crossentropy', class_mode='binary', **param1)), # (D3, NN(input_dim=D3[0].shape[1], output_dim=1, batch_size=batch_size, early_stopping_epoch=esr, verbose=2,loss='binary_crossentropy', class_mode='binary', **param1)), # (D5, NN(input_dim=D5[0].shape[1], output_dim=1, batch_size=batch_size, early_stopping_epoch=esr, verbose=2,loss='binary_crossentropy', class_mode='binary', **param1)), # # (D1, NN(input_dim=D1[0].shape[1], output_dim=1, batch_size=batch_size, early_stopping_epoch=esr, verbose=2,loss='binary_crossentropy', class_mode='binary', **param2)), # (D3, NN(input_dim=D3[0].shape[1], output_dim=1, batch_size=batch_size, early_stopping_epoch=esr, verbose=2,loss='binary_crossentropy', class_mode='binary', **param2)), # (D5, NN(input_dim=D5[0].shape[1], output_dim=1, batch_size=batch_size, early_stopping_epoch=esr, verbose=2,loss='binary_crossentropy', class_mode='binary', **param2)) ] for clf in clfs: yield clf
def _get_learner(self): # xgboost if self.learner_name in ["reg_xgb_linear", "reg_xgb_tree", "reg_xgb_tree_best_single_model"]: return XGBRegressor(**self.param_dict) if self.learner_name in ["clf_xgb_linear", "clf_xgb_tree"]: return XGBClassifier(**self.param_dict) # sklearn if self.learner_name == "reg_skl_lasso": return Lasso(**self.param_dict) if self.learner_name == "reg_skl_ridge": return Ridge(**self.param_dict) if self.learner_name == "reg_skl_random_ridge": return RandomRidge(**self.param_dict) if self.learner_name == "reg_skl_bayesian_ridge": return BayesianRidge(**self.param_dict) if self.learner_name == "reg_skl_svr": return SVR(**self.param_dict) if self.learner_name == "reg_skl_lsvr": return LinearSVR(**self.param_dict) if self.learner_name == "reg_skl_knn": return KNNRegressor(**self.param_dict) if self.learner_name == "reg_skl_etr": return ExtraTreesRegressor(**self.param_dict) if self.learner_name == "reg_skl_rf": return RandomForestRegressor(**self.param_dict) if self.learner_name == "reg_skl_gbm": return GradientBoostingRegressor(**self.param_dict) if self.learner_name == "reg_skl_adaboost": return AdaBoostRegressor(**self.param_dict) # keras if self.learner_name == "reg_keras_dnn": try: return KerasDNNRegressor(**self.param_dict) except: return None # rgf if self.learner_name == "reg_rgf": return RGFRegressor(**self.param_dict) # ensemble if self.learner_name == "reg_ensemble": return EnsembleLearner(**self.param_dict) return None
def test_distribution(): rng = check_random_state(12321) # Single variable with 4 values X = rng.randint(0, 4, size=(1000, 1)) y = rng.rand(1000) n_trees = 500 clf = ExtraTreesRegressor(n_estimators=n_trees, random_state=42).fit(X, y) uniques = defaultdict(int) for tree in clf.estimators_: tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-") for f, t in zip(tree.tree_.feature, tree.tree_.threshold)) uniques[tree] += 1 uniques = sorted([(1. * count / n_trees, tree) for tree, count in uniques.items()]) # On a single variable problem where X_0 has 4 equiprobable values, there # are 5 ways to build a random tree. The more compact (0,1/0,0/--0,2/--) of # them has probability 1/3 while the 4 others have probability 1/6. assert_equal(len(uniques), 5) assert_greater(0.20, uniques[0][0]) # Rough approximation of 1/6. assert_greater(0.20, uniques[1][0]) assert_greater(0.20, uniques[2][0]) assert_greater(0.20, uniques[3][0]) assert_greater(uniques[4][0], 0.3) assert_equal(uniques[4][1], "0,1/0,0/--0,2/--") # Two variables, one with 2 values, one with 3 values X = np.empty((1000, 2)) X[:, 0] = np.random.randint(0, 2, 1000) X[:, 1] = np.random.randint(0, 3, 1000) y = rng.rand(1000) clf = ExtraTreesRegressor(n_estimators=100, max_features=1, random_state=1).fit(X, y) uniques = defaultdict(int) for tree in clf.estimators_: tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-") for f, t in zip(tree.tree_.feature, tree.tree_.threshold)) uniques[tree] += 1 uniques = [(count, tree) for tree, count in uniques.items()] assert_equal(len(uniques), 8)
def get_model_list(task_name): model_list, name_list = [], [] model_list.append(linear_model.LinearRegression()) name_list.append('LR') # model_list.append(linear_model.SGDRegressor()) name_list.append('LR_SGD') model_list.append(linear_model.Lasso(alpha = 1.0)) name_list.append('Lasso') model_list.append(linear_model.Ridge (alpha = 1.0)) name_list.append('Ridge') model_list.append(linear_model.LassoLars(alpha=.1)) name_list.append('LassoLars') model_list.append(linear_model.BayesianRidge()) name_list.append('BayesianRidge') model_list.append(KernelRidge(alpha=1.0)) name_list.append('KernelRidge') model_list.append(gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1)) name_list.append('GaussianProcess') model_list.append(KNeighborsRegressor(weights = 'uniform',n_neighbors=3)) name_list.append('KNN_unif') model_list.append(KNeighborsRegressor(weights = 'distance',n_neighbors=3)) name_list.append('KNN_dist') model_list.append(SVR(kernel = 'linear', C = 1, gamma = 'auto', coef0 = 0, degree = 2)) name_list.append('SVM_linear') model_list.append(SVR(kernel = 'poly', C = 1, gamma = 'auto', coef0 = 0, degree = 2)) name_list.append('SVM_poly') model_list.append(SVR(kernel = 'rbf', C = 1, gamma = 'auto', coef0 = 0, degree = 2)) name_list.append('SVM_rbf') model_list.append(DecisionTreeRegressor()) name_list.append('DT') model_list.append(RandomForestRegressor(n_estimators=100, max_depth=None,min_samples_split=2, random_state=0)) name_list.append('RF') model_list.append(ExtraTreesRegressor(n_estimators=100, max_depth=None, max_features='auto', min_samples_split=2, random_state=0)) name_list.append('ET') return model_list, name_list