我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.datasets.load_boston()。
def rfr_feature_select(): from sklearn.datasets import load_boston from sklearn.ensemble import RandomForestRegressor from sklearn.cross_validation import cross_val_score, ShuffleSplit boston = load_boston() X = boston["data"] Y = boston["target"] names = boston["feature_names"] rf = RandomForestRegressor(n_estimators=20, max_depth=4) scores = [] for i in range(X.shape[1]): score = cross_val_score(rf, X[:, i:i + 1], Y, scoring="r2", cv=ShuffleSplit(len(X), 3, .3)) scores.append((round(np.mean(score), 3), names[i])) print sorted(scores, reverse=True)
def test_boston(self): from sklearn.datasets import load_boston scikit_data = load_boston() scikit_model = StandardScaler().fit(scikit_data.data) spec = converter.convert(scikit_model, scikit_data.feature_names, 'out').get_spec() input_data = [dict(zip(scikit_data.feature_names, row)) for row in scikit_data.data] output_data = [{"out" : row} for row in scikit_model.transform(scikit_data.data)] metrics = evaluate_transformer(spec, input_data, output_data) assert metrics["num_errors"] == 0
def setUpClass(self): from sklearn.datasets import load_boston from sklearn.tree import DecisionTreeClassifier # Load data and train model import numpy as np scikit_data = load_boston() self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data t = scikit_data.target num_classes = 3 target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1 # Save the data and the model self.scikit_data = scikit_data self.target = target self.feature_names = scikit_data.feature_names self.output_name = 'target'
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston from sklearn.ensemble import RandomForestClassifier import numpy as np scikit_data = load_boston() scikit_model = RandomForestClassifier(random_state = 1) t = scikit_data.target target = np.digitize(t, np.histogram(t)[1]) - 1 scikit_model.fit(scikit_data.data, target) # Save the data and the model self.scikit_data = scikit_data self.target = target self.scikit_model = scikit_model
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not HAS_SKLEARN: return if not HAS_LIBSVM: return scikit_data = load_boston() prob = svmutil.svm_problem(scikit_data['target'], scikit_data['data'].tolist()) param = svmutil.svm_parameter() param.svm_type = svmutil.NU_SVR param.kernel_type = svmutil.LINEAR param.eps = 1 self.libsvm_model = svmutil.svm_train(prob, param)
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston scikit_data = load_boston() scikit_model = Imputer(strategy='most_frequent', axis=0) scikit_data['data'][1,8] = np.NaN input_data = scikit_data['data'][:,8].reshape(-1, 1) scikit_model.fit(input_data, scikit_data['target']) # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not(HAS_SKLEARN): return scikit_data = load_boston() feature_names = scikit_data.feature_names scikit_model = LinearRegression() scikit_model.fit(scikit_data['data'], scikit_data['target']) # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
def test_boston_OHE_plus_trees(self): data = load_boston() pl = Pipeline([ ("OHE", OneHotEncoder(categorical_features = [8], sparse=False)), ("Trees",GradientBoostingRegressor(random_state = 1))]) pl.fit(data.data, data.target) # Convert the model spec = convert(pl, data.feature_names, 'target') # Get predictions df = pd.DataFrame(data.data, columns=data.feature_names) df['prediction'] = pl.predict(data.data) # Evaluate it result = evaluate_regressor(spec, df, 'target', verbose = False) assert result["max_error"] < 0.0001
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not HAS_XGBOOST: return if not HAS_SKLEARN: return scikit_data = load_boston() dtrain = xgboost.DMatrix(scikit_data.data, label = scikit_data.target, feature_names = scikit_data.feature_names) xgb_model = xgboost.train({}, dtrain, 1) # Save the data and the model self.scikit_data = scikit_data self.xgb_model = xgb_model self.feature_names = self.scikit_data.feature_names
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not HAS_SKLEARN: return if not HAS_LIBSVM: return scikit_data = load_boston() prob = svmutil.svm_problem(scikit_data['target'], scikit_data['data'].tolist()) param = svmutil.svm_parameter() param.svm_type = svmutil.EPSILON_SVR param.kernel_type = svmutil.LINEAR param.eps = 1 self.libsvm_model = svmutil.svm_train(prob, param)
def test_boston_OHE(self): data = load_boston() for categorical_features in [ [3], [8], [3, 8], [8,3] ]: model = OneHotEncoder(categorical_features = categorical_features, sparse=False) model.fit(data.data, data.target) # Convert the model spec = sklearn.convert(model, data.feature_names, 'out').get_spec() input_data = [dict(zip(data.feature_names, row)) for row in data.data] output_data = [{"out" : row} for row in model.transform(data.data)] result = evaluate_transformer(spec, input_data, output_data) assert result["num_errors"] == 0 # This test still isn't working
def test_boston_OHE_pipeline(self): data = load_boston() for categorical_features in [ [3], [8], [3, 8], [8,3] ]: # Put it in a pipeline so that we can test whether the output dimension # handling is correct. model = Pipeline([("OHE", OneHotEncoder(categorical_features = categorical_features)), ("Normalizer", Normalizer())]) model.fit(data.data.copy(), data.target) # Convert the model spec = sklearn.convert(model, data.feature_names, 'out').get_spec() input_data = [dict(zip(data.feature_names, row)) for row in data.data] output_data = [{"out" : row} for row in model.transform(data.data.copy())] result = evaluate_transformer(spec, input_data, output_data) assert result["num_errors"] == 0
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not(HAS_SKLEARN): return scikit_data = load_boston() feature_names = scikit_data.feature_names scikit_model = LinearRegression() scikit_model.fit(scikit_data['data'], scikit_data['target']) scikit_spec = converter.convert(scikit_model, feature_names, 'target').get_spec() # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model self.scikit_spec = scikit_spec
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not HAS_SKLEARN: return if not HAS_LIBSVM: return scikit_data = load_boston() prob = svmutil.svm_problem(scikit_data['target'] > scikit_data['target'].mean(), scikit_data['data'].tolist()) param = svmutil.svm_parameter() param.svm_type = svmutil.C_SVC param.kernel_type = svmutil.LINEAR param.eps = 1 libsvm_model = svmutil.svm_train(prob, param) libsvm_spec = libsvm_converter.convert(libsvm_model, scikit_data.feature_names, 'target').get_spec() # Save the data and the model self.scikit_data = scikit_data self.libsvm_spec = libsvm_spec
def setUpClass(self): from sklearn.datasets import load_boston # Load data and train model import numpy as np scikit_data = load_boston() num_classes = 3 self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data t = scikit_data.target target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1 # Save the data and the model self.scikit_data = scikit_data self.target = target self.feature_names = scikit_data.feature_names self.output_name = 'target'
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston from sklearn.tree import DecisionTreeClassifier from sklearn.preprocessing import MultiLabelBinarizer import numpy as np scikit_data = load_boston() scikit_model = DecisionTreeClassifier(random_state = 1) t = scikit_data.target target = np.digitize(t, np.histogram(t)[1]) - 1 scikit_model.fit(scikit_data.data, target) # Save the data and the model self.scikit_data = scikit_data self.target = target self.scikit_model = scikit_model
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston import numpy as np scikit_data = load_boston() scikit_model = GradientBoostingClassifier(random_state = 1) t = scikit_data.target target = np.digitize(t, np.histogram(t)[1]) - 1 scikit_model.fit(scikit_data.data, target) self.target = target # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
def setUpClass(self): from sklearn.datasets import load_boston import numpy as np # Load data and train model scikit_data = load_boston() num_classes = 3 self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data t = scikit_data.target target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1 # Save the data and the model self.scikit_data = scikit_data self.target = target self.feature_names = scikit_data.feature_names self.output_name = 'target'
def setUp(self): self.X, self.y = load_boston(return_X_y=True) self.regressor_settings = [ 'sklearn_random_forest_regressor', 'sklearn_extra_trees_regressor', 'sklearn_bagging_regressor', 'sklearn_GP_regressor', 'sklearn_ridge_regressor', 'sklearn_lasso_regressor', 'sklearn_kernel_ridge_regressor', 'sklearn_knn_regressor', 'sklearn_svr_regressor', 'sklearn_decision_tree_regressor', 'sklearn_linear_regression', 'sklearn_adaboost_regressor', 'xgboost_regressor', ]
def test_onehot(): data = load_boston() X, y = data['data'], data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=333) train = pd.DataFrame(X_train) test = pd.DataFrame(X_test) t_train, t_test = onehot_features(train.copy(deep=True), test.copy(deep=True), [8, 1, 12], full=False, dummy_na=True) assert t_train.shape[1] == t_test.shape[1] assert t_train.shape[1] == 441 t_train, t_test = onehot_features(train.copy(deep=True), test.copy(deep=True), [8, 1, 12], full=True, dummy_na=False) assert t_train.shape[1] == t_test.shape[1] assert t_train.shape[1] == 500
def test_few_fit_shapes(): """test_few.py: fit and predict return correct shapes """ np.random.seed(202) # load example data boston = load_boston() d = pd.DataFrame(data=boston.data) print("feature shape:",boston.data.shape) learner = FEW(generations=1, population_size=5, mutation_rate=0.2, crossover_rate=0.8, ml = LassoLarsCV(), min_depth = 1, max_depth = 3, sel = 'epsilon_lexicase', tourn_size = 2, random_state=0, verbosity=0, disable_update_check=False, fit_choice = 'mse') score = learner.fit(boston.data[:300], boston.target[:300]) print("learner:",learner._best_estimator) yhat_test = learner.predict(boston.data[300:]) test_score = learner.score(boston.data[300:],boston.target[300:]) print("train score:",score,"test score:",test_score, "test r2:",r2_score(boston.target[300:],yhat_test)) assert yhat_test.shape == boston.target[300:].shape
def test_few_with_parents_weight(): """test_few.py: few performs without error with parent pressure for selection""" np.random.seed(1006987) boston = load_boston() d = np.column_stack((boston.data,boston.target)) np.random.shuffle(d) features = d[:,0:-1] target = d[:,-1] print("feature shape:",boston.data.shape) learner = FEW(generations=1, population_size=5, mutation_rate=1, crossover_rate=1, ml = LassoLarsCV(), min_depth = 1, max_depth = 3, sel = 'tournament', fit_choice = 'r2',tourn_size = 2, random_state=0, verbosity=0, disable_update_check=False, weight_parents=True) learner.fit(features[:300], target[:300]) few_score = learner.score(features[:300], target[:300]) test_score = learner.score(features[300:],target[300:]) print("few score:",few_score) print("few test score:",test_score)
def generate_data(case, sparse=False): """Generate regression/classification data.""" bunch = None if case == 'regression': bunch = datasets.load_boston() elif case == 'classification': bunch = datasets.fetch_20newsgroups_vectorized(subset='all') X, y = shuffle(bunch.data, bunch.target) offset = int(X.shape[0] * 0.8) X_train, y_train = X[:offset], y[:offset] X_test, y_test = X[offset:], y[offset:] if sparse: X_train = csr_matrix(X_train) X_test = csr_matrix(X_test) else: X_train = np.array(X_train) X_test = np.array(X_test) y_test = np.array(y_test) y_train = np.array(y_train) data = {'X_train': X_train, 'X_test': X_test, 'y_train': y_train, 'y_test': y_test} return data
def test_score_sample_weight(): from sklearn.tree import DecisionTreeClassifier from sklearn.tree import DecisionTreeRegressor from sklearn import datasets rng = np.random.RandomState(0) # test both ClassifierMixin and RegressorMixin estimators = [DecisionTreeClassifier(max_depth=2), DecisionTreeRegressor(max_depth=2)] sets = [datasets.load_iris(), datasets.load_boston()] for est, ds in zip(estimators, sets): est.fit(ds.data, ds.target) # generate random sample weights sample_weight = rng.randint(1, 10, size=len(ds.target)) # check that the score with and without sample weights are different assert_not_equal(est.score(ds.data, ds.target), est.score(ds.data, ds.target, sample_weight=sample_weight), msg="Unweighted and weighted scores " "are unexpectedly equal")
def test_warm_start_convergence_with_regularizer_decrement(): boston = load_boston() X, y = boston.data, boston.target # Train a model to converge on a lightly regularized problem final_alpha = 1e-5 low_reg_model = ElasticNet(alpha=final_alpha).fit(X, y) # Fitting a new model on a more regularized version of the same problem. # Fitting with high regularization is easier it should converge faster # in general. high_reg_model = ElasticNet(alpha=final_alpha * 10).fit(X, y) assert_greater(low_reg_model.n_iter_, high_reg_model.n_iter_) # Fit the solution to the original, less regularized version of the # problem but from the solution of the highly regularized variant of # the problem as a better starting point. This should also converge # faster than the original model that starts from zero. warm_low_reg_model = deepcopy(high_reg_model) warm_low_reg_model.set_params(warm_start=True, alpha=final_alpha) warm_low_reg_model.fit(X, y) assert_greater(low_reg_model.n_iter_, warm_low_reg_model.n_iter_)
def make_rg_dataset_and_field_manager(self): boston = datasets.load_boston() dataset = DataSet(boston.data, boston.target, boston.feature_names, "price") feature_fields = [] for i, name in enumerate(dataset.feature_names): f = Field(name, "NUMBER", value_mean=np.mean(dataset.data[:, i]), value_std=np.std(dataset.data[:, i])) feature_fields.append(f) target = Field("price", "NUMBER", value_mean=np.mean(dataset.target), value_std=np.std(dataset.target)) field_manager = FieldManager(-1, feature_fields, target) return dataset, field_manager
def get_boston_regression_dataset(): boston = load_boston() df_boston = pd.DataFrame(boston.data) df_boston.columns = boston.feature_names df_boston['MEDV'] = boston['target'] df_boston_train, df_boston_test = train_test_split(df_boston, test_size=0.33, random_state=42) return df_boston_train, df_boston_test
def get_boston_dataset(): boston = load_boston() df_boston = pd.DataFrame(boston.data) df_boston.columns = boston.feature_names df_boston['MEDV'] = boston['target'] df_boston_train, df_boston_test = train_test_split(df_boston, test_size=0.2, random_state=42) return df_boston_train, df_boston_test
def load_boston_df(include_tgt=True, tgt_name="target", shuffle=False): """Loads the boston housing dataset into a dataframe with the target set as the "target" feature or whatever name is specified in ``tgt_name``. Parameters ---------- include_tgt : bool, optional (default=True) Whether to include the target tgt_name : str, optional (default="target") The name of the target feature shuffle : bool, optional (default=False) Whether to shuffle the rows Returns ------- X : Pandas ``DataFrame`` or ``H2OFrame``, shape=(n_samples, n_features) The loaded dataset """ bo = load_boston() X = pd.DataFrame.from_records(data=bo.data, columns=bo.feature_names) if include_tgt: X[tgt_name] = bo.target return X if not shuffle else shuffle_dataframe(X)
def lession_5(): # db = datasets.load_boston() # print db.data.shape # data_X=db.data # data_y=db.target # model = LinearRegression() # model.fit(data_X,data_y) # print model.predict(data_X[:8]) # print data_y[:8] X,y = datasets.make_regression(n_samples=100,n_features=1,n_targets=1,noise=10) plt.scatter(X,y) plt.show()
def lession_6(): db = datasets.load_boston() #print db.data.shape data_X=db.data data_y=db.target model = LinearRegression() model.fit(data_X,data_y) print model.coef_ print model.intercept_ print model.score(data_X,data_y)
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston # Load data and train model scikit_data = load_boston() self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data self.target = 1 * (scikit_data['target'] > scikit_data['target'].mean()) self.feature_names = scikit_data.feature_names self.output_name = 'target' self.scikit_data = scikit_data
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston from sklearn.ensemble import RandomForestClassifier scikit_data = load_boston() scikit_model = RandomForestClassifier(random_state = 1) target = 1 * (scikit_data['target'] > scikit_data['target'].mean()) scikit_model.fit(scikit_data['data'], target) # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
def scikit_data(self): return load_boston()
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston from sklearn.tree import DecisionTreeRegressor scikit_data = load_boston() scikit_model = DecisionTreeRegressor(random_state = 1) scikit_model.fit(scikit_data['data'], scikit_data['target']) # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
def setUpClass(cls): """ Set up the unit test by loading the dataset and training a model. """ if not HAS_SKLEARN: return scikit_data = load_boston() scikit_model = GradientBoostingRegressor(random_state = 1) scikit_model.fit(scikit_data['data'], scikit_data['target']) # Save the data and the model cls.scikit_data = scikit_data cls.scikit_model = scikit_model
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not HAS_SKLEARN: return scikit_data = load_boston() scikit_model = SVR(kernel='linear') scikit_model.fit(scikit_data['data'], scikit_data['target']) # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston # Load data and train model scikit_data = load_boston() self.scikit_data = scikit_data self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data self.target = scikit_data.target self.feature_names = scikit_data.feature_names self.output_name = 'target'
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston # Load data and train model scikit_data = load_boston() self.scikit_data = scikit_data self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data self.target = 1 * (scikit_data['target'] > scikit_data['target'].mean()) self.feature_names = scikit_data.feature_names self.output_name = 'target'
def test_boston(self): from sklearn.datasets import load_boston scikit_data = load_boston() scikit_model = Normalizer(norm='l2').fit(scikit_data.data) spec = converter.convert(scikit_model, scikit_data.feature_names, 'out') input_data = [dict(zip(scikit_data.feature_names, row)) for row in scikit_data.data] output_data = [{"out" : row} for row in scikit_model.transform(scikit_data.data)] evaluate_transformer(spec, input_data, output_data)
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston from sklearn.linear_model import LinearRegression scikit_data = load_boston() scikit_model = LinearRegression() scikit_model.fit(scikit_data['data'], scikit_data['target']) # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
def test_conversion_boston(self): from sklearn.datasets import load_boston scikit_data = load_boston() sh = scikit_data.data.shape rn.seed(0) missing_value_indices = [(rn.randint(sh[0]), rn.randint(sh[1])) for k in range(sh[0])] for strategy in ["mean", "median", "most_frequent"]: for missing_value in [0, 'NaN', -999]: X = np.array(scikit_data.data).copy() for i, j in missing_value_indices: X[i,j] = missing_value model = Imputer(missing_values = missing_value, strategy = strategy) model = model.fit(X) tr_X = model.transform(X.copy()) spec = converter.convert(model, scikit_data.feature_names, 'out') input_data = [dict(zip(scikit_data.feature_names, row)) for row in X] output_data = [{"out" : row} for row in tr_X] result = evaluate_transformer(spec, input_data, output_data) assert result["num_errors"] == 0
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston from sklearn.tree import DecisionTreeRegressor # Load data and train model scikit_data = load_boston() self.scikit_data = scikit_data self.X = scikit_data['data'] self.target = scikit_data['target'] self.feature_names = scikit_data.feature_names self.output_name = 'target'
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston scikit_data = load_boston() scikit_model = GradientBoostingClassifier(random_state = 1) target = scikit_data['target'] > scikit_data['target'].mean() scikit_model.fit(scikit_data['data'], target) # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
def setUpClass(self): # Load data and train model scikit_data = load_boston() self.scikit_data = scikit_data self.X = scikit_data['data'] self.target = scikit_data['target'] self.feature_names = scikit_data.feature_names self.output_name = 'target'
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ # Load data and train model scikit_data = load_boston() self.X = scikit_data.data self.scikit_data = self.X self.target = scikit_data.target self.feature_names = scikit_data.feature_names self.output_name = 'target'
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston from sklearn.ensemble import RandomForestRegressor scikit_data = load_boston() scikit_model = RandomForestRegressor(random_state = 1) scikit_model.fit(scikit_data['data'], scikit_data['target']) # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
def setUpClass(self): from sklearn.datasets import load_boston from sklearn.tree import DecisionTreeClassifier # Load data and train model scikit_data = load_boston() self.scikit_data = scikit_data self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data self.target = 1 * (scikit_data['target'] > scikit_data['target'].mean()) self.feature_names = scikit_data.feature_names self.output_name = 'target'