Python sklearn.datasets 模块,load_boston() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.datasets.load_boston()

项目:python_utils    作者:Jayhello    | 项目源码 | 文件源码
def rfr_feature_select():
    from sklearn.datasets import load_boston
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.cross_validation import cross_val_score, ShuffleSplit

    boston = load_boston()
    X = boston["data"]
    Y = boston["target"]
    names = boston["feature_names"]

    rf = RandomForestRegressor(n_estimators=20, max_depth=4)
    scores = []
    for i in range(X.shape[1]):
        score = cross_val_score(rf, X[:, i:i + 1],
                                Y, scoring="r2", cv=ShuffleSplit(len(X), 3, .3))
        scores.append((round(np.mean(score), 3), names[i]))

    print sorted(scores, reverse=True)
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_boston(self):
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = StandardScaler().fit(scikit_data.data)

        spec = converter.convert(scikit_model, scikit_data.feature_names, 'out').get_spec()

        input_data = [dict(zip(scikit_data.feature_names, row)) 
                for row in scikit_data.data]

        output_data = [{"out" : row} for row in scikit_model.transform(scikit_data.data)]

        metrics = evaluate_transformer(spec, input_data, output_data)

        assert metrics["num_errors"] == 0
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeClassifier

        # Load data and train model
        import numpy as np
        scikit_data = load_boston()
        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
        t = scikit_data.target
        num_classes = 3
        target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.ensemble import RandomForestClassifier
        import numpy as np

        scikit_data = load_boston()
        scikit_model = RandomForestClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not HAS_SKLEARN:
            return
        if not HAS_LIBSVM:
            return

        scikit_data = load_boston()
        prob = svmutil.svm_problem(scikit_data['target'], scikit_data['data'].tolist())
        param = svmutil.svm_parameter()
        param.svm_type = svmutil.NU_SVR
        param.kernel_type = svmutil.LINEAR
        param.eps = 1

        self.libsvm_model = svmutil.svm_train(prob, param)
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = Imputer(strategy='most_frequent', axis=0)
        scikit_data['data'][1,8] = np.NaN

        input_data = scikit_data['data'][:,8].reshape(-1, 1)
        scikit_model.fit(input_data, scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """

        if not(HAS_SKLEARN):
            return

        scikit_data = load_boston()
        feature_names = scikit_data.feature_names

        scikit_model = LinearRegression()
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_boston_OHE_plus_trees(self): 

        data = load_boston()

        pl = Pipeline([
            ("OHE", OneHotEncoder(categorical_features = [8], sparse=False)), 
            ("Trees",GradientBoostingRegressor(random_state = 1))])

        pl.fit(data.data, data.target)

        # Convert the model
        spec = convert(pl, data.feature_names, 'target')

        # Get predictions
        df = pd.DataFrame(data.data, columns=data.feature_names)
        df['prediction'] = pl.predict(data.data)

        # Evaluate it
        result = evaluate_regressor(spec, df, 'target', verbose = False)

        assert result["max_error"] < 0.0001
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not HAS_XGBOOST:
            return
        if not HAS_SKLEARN:
            return

        scikit_data = load_boston()
        dtrain = xgboost.DMatrix(scikit_data.data, label = scikit_data.target,
                feature_names = scikit_data.feature_names)
        xgb_model = xgboost.train({}, dtrain, 1)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.xgb_model = xgb_model
        self.feature_names = self.scikit_data.feature_names
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not HAS_SKLEARN:
            return
        if not HAS_LIBSVM:
            return

        scikit_data = load_boston()
        prob = svmutil.svm_problem(scikit_data['target'], scikit_data['data'].tolist())
        param = svmutil.svm_parameter()
        param.svm_type = svmutil.EPSILON_SVR
        param.kernel_type = svmutil.LINEAR
        param.eps = 1

        self.libsvm_model = svmutil.svm_train(prob, param)
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_boston_OHE(self): 
        data = load_boston()

        for categorical_features in [ [3], [8], [3, 8], [8,3] ]:

            model = OneHotEncoder(categorical_features = categorical_features, sparse=False)
            model.fit(data.data, data.target)

            # Convert the model
            spec = sklearn.convert(model, data.feature_names, 'out').get_spec()

            input_data = [dict(zip(data.feature_names, row)) for row in data.data]
            output_data = [{"out" : row} for row in model.transform(data.data)]

            result = evaluate_transformer(spec, input_data, output_data)

            assert result["num_errors"] == 0

    # This test still isn't working
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_boston_OHE_pipeline(self): 
        data = load_boston()

        for categorical_features in [ [3], [8], [3, 8], [8,3] ]:

            # Put it in a pipeline so that we can test whether the output dimension
            # handling is correct. 

            model = Pipeline([("OHE", OneHotEncoder(categorical_features = categorical_features)),
                 ("Normalizer", Normalizer())])

            model.fit(data.data.copy(), data.target)

            # Convert the model
            spec = sklearn.convert(model, data.feature_names, 'out').get_spec()

            input_data = [dict(zip(data.feature_names, row)) for row in data.data]
            output_data = [{"out" : row} for row in model.transform(data.data.copy())]

            result = evaluate_transformer(spec, input_data, output_data)

            assert result["num_errors"] == 0
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """

        if not(HAS_SKLEARN):
            return

        scikit_data = load_boston()
        feature_names = scikit_data.feature_names

        scikit_model = LinearRegression()
        scikit_model.fit(scikit_data['data'], scikit_data['target'])
        scikit_spec = converter.convert(scikit_model, feature_names, 'target').get_spec()

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
        self.scikit_spec = scikit_spec
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not HAS_SKLEARN:
            return
        if not HAS_LIBSVM:
            return

        scikit_data = load_boston()
        prob = svmutil.svm_problem(scikit_data['target'] > scikit_data['target'].mean(),
                                   scikit_data['data'].tolist())
        param = svmutil.svm_parameter()
        param.svm_type = svmutil.C_SVC
        param.kernel_type = svmutil.LINEAR
        param.eps = 1

        libsvm_model = svmutil.svm_train(prob, param)
        libsvm_spec = libsvm_converter.convert(libsvm_model, scikit_data.feature_names, 'target').get_spec()

        # Save the data and the model
        self.scikit_data =  scikit_data
        self.libsvm_spec = libsvm_spec
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        from sklearn.datasets import load_boston

        # Load data and train model
        import numpy as np
        scikit_data = load_boston()
        num_classes = 3
        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeClassifier
        from sklearn.preprocessing import MultiLabelBinarizer
        import numpy as np

        scikit_data = load_boston()
        scikit_model = DecisionTreeClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        import numpy as np

        scikit_data = load_boston()
        scikit_model = GradientBoostingClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)
        self.target = target

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        from sklearn.datasets import load_boston
        import numpy as np

        # Load data and train model
        scikit_data = load_boston()
        num_classes = 3
        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
项目:xcessiv    作者:reiinakano    | 项目源码 | 文件源码
def setUp(self):
        self.X, self.y = load_boston(return_X_y=True)
        self.regressor_settings = [
            'sklearn_random_forest_regressor',
            'sklearn_extra_trees_regressor',
            'sklearn_bagging_regressor',
            'sklearn_GP_regressor',
            'sklearn_ridge_regressor',
            'sklearn_lasso_regressor',
            'sklearn_kernel_ridge_regressor',
            'sklearn_knn_regressor',
            'sklearn_svr_regressor',
            'sklearn_decision_tree_regressor',
            'sklearn_linear_regression',
            'sklearn_adaboost_regressor',
            'xgboost_regressor',
        ]
项目:heamy    作者:rushter    | 项目源码 | 文件源码
def test_onehot():
    data = load_boston()
    X, y = data['data'], data['target']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=333)
    train = pd.DataFrame(X_train)
    test = pd.DataFrame(X_test)

    t_train, t_test = onehot_features(train.copy(deep=True), test.copy(deep=True), [8, 1, 12], full=False,
                                      dummy_na=True)
    assert t_train.shape[1] == t_test.shape[1]
    assert t_train.shape[1] == 441

    t_train, t_test = onehot_features(train.copy(deep=True), test.copy(deep=True), [8, 1, 12], full=True,
                                      dummy_na=False)
    assert t_train.shape[1] == t_test.shape[1]
    assert t_train.shape[1] == 500
项目:few    作者:lacava    | 项目源码 | 文件源码
def test_few_fit_shapes():
    """test_few.py: fit and predict return correct shapes """
    np.random.seed(202)
    # load example data
    boston = load_boston()
    d = pd.DataFrame(data=boston.data)
    print("feature shape:",boston.data.shape)

    learner = FEW(generations=1, population_size=5,
                mutation_rate=0.2, crossover_rate=0.8,
                ml = LassoLarsCV(), min_depth = 1, max_depth = 3,
                sel = 'epsilon_lexicase', tourn_size = 2,
                random_state=0, verbosity=0,
                disable_update_check=False, fit_choice = 'mse')

    score = learner.fit(boston.data[:300], boston.target[:300])
    print("learner:",learner._best_estimator)
    yhat_test = learner.predict(boston.data[300:])
    test_score = learner.score(boston.data[300:],boston.target[300:])
    print("train score:",score,"test score:",test_score,
    "test r2:",r2_score(boston.target[300:],yhat_test))
    assert yhat_test.shape == boston.target[300:].shape
项目:few    作者:lacava    | 项目源码 | 文件源码
def test_few_with_parents_weight():
    """test_few.py: few performs without error with parent pressure for selection"""
    np.random.seed(1006987)
    boston = load_boston()
    d = np.column_stack((boston.data,boston.target))
    np.random.shuffle(d)
    features = d[:,0:-1]
    target = d[:,-1]

    print("feature shape:",boston.data.shape)

    learner = FEW(generations=1, population_size=5,
                mutation_rate=1, crossover_rate=1,
                ml = LassoLarsCV(), min_depth = 1, max_depth = 3,
                sel = 'tournament', fit_choice = 'r2',tourn_size = 2, random_state=0, verbosity=0,
                disable_update_check=False, weight_parents=True)

    learner.fit(features[:300], target[:300])
    few_score = learner.score(features[:300], target[:300])
    test_score = learner.score(features[300:],target[300:])

    print("few score:",few_score)
    print("few test score:",test_score)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def generate_data(case, sparse=False):
    """Generate regression/classification data."""
    bunch = None
    if case == 'regression':
        bunch = datasets.load_boston()
    elif case == 'classification':
        bunch = datasets.fetch_20newsgroups_vectorized(subset='all')
    X, y = shuffle(bunch.data, bunch.target)
    offset = int(X.shape[0] * 0.8)
    X_train, y_train = X[:offset], y[:offset]
    X_test, y_test = X[offset:], y[offset:]
    if sparse:
        X_train = csr_matrix(X_train)
        X_test = csr_matrix(X_test)
    else:
        X_train = np.array(X_train)
        X_test = np.array(X_test)
    y_test = np.array(y_test)
    y_train = np.array(y_train)
    data = {'X_train': X_train, 'X_test': X_test, 'y_train': y_train,
            'y_test': y_test}
    return data
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_score_sample_weight():
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.tree import DecisionTreeRegressor
    from sklearn import datasets

    rng = np.random.RandomState(0)

    # test both ClassifierMixin and RegressorMixin
    estimators = [DecisionTreeClassifier(max_depth=2),
                  DecisionTreeRegressor(max_depth=2)]
    sets = [datasets.load_iris(),
            datasets.load_boston()]

    for est, ds in zip(estimators, sets):
        est.fit(ds.data, ds.target)
        # generate random sample weights
        sample_weight = rng.randint(1, 10, size=len(ds.target))
        # check that the score with and without sample weights are different
        assert_not_equal(est.score(ds.data, ds.target),
                         est.score(ds.data, ds.target,
                                   sample_weight=sample_weight),
                         msg="Unweighted and weighted scores "
                             "are unexpectedly equal")
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_warm_start_convergence_with_regularizer_decrement():
    boston = load_boston()
    X, y = boston.data, boston.target

    # Train a model to converge on a lightly regularized problem
    final_alpha = 1e-5
    low_reg_model = ElasticNet(alpha=final_alpha).fit(X, y)

    # Fitting a new model on a more regularized version of the same problem.
    # Fitting with high regularization is easier it should converge faster
    # in general.
    high_reg_model = ElasticNet(alpha=final_alpha * 10).fit(X, y)
    assert_greater(low_reg_model.n_iter_, high_reg_model.n_iter_)

    # Fit the solution to the original, less regularized version of the
    # problem but from the solution of the highly regularized variant of
    # the problem as a better starting point. This should also converge
    # faster than the original model that starts from zero.
    warm_low_reg_model = deepcopy(high_reg_model)
    warm_low_reg_model.set_params(warm_start=True, alpha=final_alpha)
    warm_low_reg_model.fit(X, y)
    assert_greater(low_reg_model.n_iter_, warm_low_reg_model.n_iter_)
项目:karura    作者:icoxfog417    | 项目源码 | 文件源码
def make_rg_dataset_and_field_manager(self):
        boston = datasets.load_boston()
        dataset = DataSet(boston.data, boston.target, boston.feature_names, "price")

        feature_fields = []
        for i, name in enumerate(dataset.feature_names):
            f = Field(name, "NUMBER", value_mean=np.mean(dataset.data[:, i]), value_std=np.std(dataset.data[:, i]))
            feature_fields.append(f)

        target = Field("price", "NUMBER", value_mean=np.mean(dataset.target), value_std=np.std(dataset.target))
        field_manager = FieldManager(-1, feature_fields, target)

        return dataset, field_manager
项目:auto_ml    作者:doordash    | 项目源码 | 文件源码
def get_boston_regression_dataset():
    boston = load_boston()
    df_boston = pd.DataFrame(boston.data)
    df_boston.columns = boston.feature_names
    df_boston['MEDV'] = boston['target']
    df_boston_train, df_boston_test = train_test_split(df_boston, test_size=0.33, random_state=42)
    return df_boston_train, df_boston_test
项目:auto_ml    作者:doordash    | 项目源码 | 文件源码
def get_boston_dataset():
    boston = load_boston()
    df_boston = pd.DataFrame(boston.data)
    df_boston.columns = boston.feature_names
    df_boston['MEDV'] = boston['target']
    df_boston_train, df_boston_test = train_test_split(df_boston, test_size=0.2, random_state=42)
    return df_boston_train, df_boston_test
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def load_boston_df(include_tgt=True, tgt_name="target", shuffle=False):
    """Loads the boston housing dataset into a dataframe with the
    target set as the "target" feature or whatever name
    is specified in ``tgt_name``.

    Parameters
    ----------

    include_tgt : bool, optional (default=True)
        Whether to include the target

    tgt_name : str, optional (default="target")
        The name of the target feature

    shuffle : bool, optional (default=False)
        Whether to shuffle the rows


    Returns
    -------

    X : Pandas ``DataFrame`` or ``H2OFrame``, shape=(n_samples, n_features)
        The loaded dataset
    """
    bo = load_boston()
    X = pd.DataFrame.from_records(data=bo.data, columns=bo.feature_names)

    if include_tgt:
        X[tgt_name] = bo.target

    return X if not shuffle else shuffle_dataframe(X)
项目:base_function    作者:Rockyzsu    | 项目源码 | 文件源码
def lession_5():
    # db = datasets.load_boston()
    # print db.data.shape
    # data_X=db.data
    # data_y=db.target
    # model = LinearRegression()
    # model.fit(data_X,data_y)
    # print model.predict(data_X[:8])
    # print data_y[:8]

    X,y = datasets.make_regression(n_samples=100,n_features=1,n_targets=1,noise=10)

    plt.scatter(X,y)
    plt.show()
项目:base_function    作者:Rockyzsu    | 项目源码 | 文件源码
def lession_6():
    db = datasets.load_boston()
    #print db.data.shape
    data_X=db.data
    data_y=db.target
    model = LinearRegression()
    model.fit(data_X,data_y)
    print model.coef_
    print model.intercept_
    print model.score(data_X,data_y)
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston

        # Load data and train model
        scikit_data = load_boston()
        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
        self.target = 1 * (scikit_data['target'] > scikit_data['target'].mean())
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
        self.scikit_data = scikit_data
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.ensemble import RandomForestClassifier

        scikit_data = load_boston()
        scikit_model = RandomForestClassifier(random_state = 1)
        target = 1 * (scikit_data['target'] > scikit_data['target'].mean())
        scikit_model.fit(scikit_data['data'], target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def scikit_data(self):
        return load_boston()
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeRegressor

        scikit_data = load_boston()
        scikit_model = DecisionTreeRegressor(random_state = 1)
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(cls):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not HAS_SKLEARN:
            return

        scikit_data = load_boston()
        scikit_model = GradientBoostingRegressor(random_state = 1)
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        cls.scikit_data = scikit_data
        cls.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not HAS_SKLEARN:
            return

        scikit_data = load_boston()
        scikit_model = SVR(kernel='linear')
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston

        # Load data and train model
        scikit_data = load_boston()
        self.scikit_data = scikit_data
        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
        self.target = scikit_data.target
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston

        # Load data and train model
        scikit_data = load_boston()
        self.scikit_data = scikit_data
        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
        self.target = 1 * (scikit_data['target'] > scikit_data['target'].mean())
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_boston(self):
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = Normalizer(norm='l2').fit(scikit_data.data)

        spec = converter.convert(scikit_model, scikit_data.feature_names, 'out')

        input_data = [dict(zip(scikit_data.feature_names, row)) 
                for row in scikit_data.data]

        output_data = [{"out" : row} for row in scikit_model.transform(scikit_data.data)]

        evaluate_transformer(spec, input_data, output_data)
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.linear_model import LinearRegression

        scikit_data = load_boston()
        scikit_model = LinearRegression()
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_conversion_boston(self):

        from sklearn.datasets import load_boston

        scikit_data = load_boston()

        sh = scikit_data.data.shape 

        rn.seed(0)
        missing_value_indices = [(rn.randint(sh[0]), rn.randint(sh[1])) 
                                    for k in range(sh[0])]

        for strategy in ["mean", "median", "most_frequent"]: 
            for missing_value in [0, 'NaN', -999]:

                X = np.array(scikit_data.data).copy()

                for i, j in missing_value_indices:
                    X[i,j] = missing_value

                model = Imputer(missing_values = missing_value, strategy = strategy)
                model = model.fit(X)

                tr_X = model.transform(X.copy())

                spec = converter.convert(model, scikit_data.feature_names, 'out')

                input_data = [dict(zip(scikit_data.feature_names, row)) 
                                for row in X]

                output_data = [{"out" : row} for row in tr_X]

                result = evaluate_transformer(spec, input_data, output_data)

                assert result["num_errors"] == 0
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeRegressor

        # Load data and train model
        scikit_data = load_boston()
        self.scikit_data = scikit_data
        self.X = scikit_data['data']
        self.target = scikit_data['target']
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = GradientBoostingClassifier(random_state = 1)
        target = scikit_data['target'] > scikit_data['target'].mean()
        scikit_model.fit(scikit_data['data'], target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        # Load data and train model
        scikit_data = load_boston()
        self.scikit_data = scikit_data
        self.X = scikit_data['data']
        self.target = scikit_data['target']
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """

        # Load data and train model
        scikit_data = load_boston()

        self.X = scikit_data.data
        self.scikit_data = self.X
        self.target = scikit_data.target
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.ensemble import RandomForestRegressor

        scikit_data = load_boston()
        scikit_model = RandomForestRegressor(random_state = 1)
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeClassifier

        # Load data and train model
        scikit_data = load_boston()
        self.scikit_data = scikit_data
        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
        self.target = 1 * (scikit_data['target'] > scikit_data['target'].mean())
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
项目:auto_ml    作者:ClimbsRocks    | 项目源码 | 文件源码
def get_boston_regression_dataset():
    boston = load_boston()
    df_boston = pd.DataFrame(boston.data)
    df_boston.columns = boston.feature_names
    df_boston['MEDV'] = boston['target']
    df_boston_train, df_boston_test = train_test_split(df_boston, test_size=0.33, random_state=42)
    return df_boston_train, df_boston_test
项目:auto_ml    作者:ClimbsRocks    | 项目源码 | 文件源码
def get_boston_regression_dataset():
    boston = load_boston()
    df_boston = pd.DataFrame(boston.data)
    df_boston.columns = boston.feature_names
    df_boston['MEDV'] = boston['target']
    df_boston_train, df_boston_test = train_test_split(df_boston, test_size=0.33, random_state=42)
    return df_boston_train, df_boston_test