Python sklearn.model_selection 模块,ParameterGrid() 实例源码

我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用sklearn.model_selection.ParameterGrid()

项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR()]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test)
项目:triage    作者:dssg    | 项目源码 | 文件源码
def run(self, grid_config):
        for classpath, parameter_config in grid_config.items():
            try:
                module_name, class_name = classpath.rsplit(".", 1)
                module = importlib.import_module(module_name)
                cls = getattr(module, class_name)
                for parameters in ParameterGrid(parameter_config):
                    try:
                        cls(**parameters)
                    except Exception as e:
                        raise ValueError(dedent('''Section: grid_config -
                        Unable to instantiate classifier {} with parameters {}, error thrown: {}
                        '''.format(classpath, parameters, e)))
            except Exception as e:
                raise ValueError(dedent('''Section: grid_config -
                Unable to import classifier {}, error thrown: {}
                '''.format(classpath, e)))
项目:trend_ml_toolkit_xgboost    作者:raymon-tian    | 项目源码 | 文件源码
def val_tune_rf(estimator,x_train,y_train,x_val,y_val,params):
    params_list = list(ParameterGrid(params))
    print params_list
    print y_val
    results = []
    for param in params_list:
        print '=========  ',param
        estimator.set_params(**param)
        estimator.fit(x_train,y_train)
        preds_prob = estimator.predict_proba(x_val)
        # print preds_prob[:,1]
        result = roc_auc_score(y_val,preds_prob[:,1])
        print 'roc_auc_score : %f'%result
        results.append((param,result))
    results.sort(key=lambda k: k[1])
    print results
    print results[-1]
项目:treehopper    作者:tomekkorbak    | 项目源码 | 文件源码
def grid_search():
    param_grid = {}
    param_grid["embeddings"] = [
        ("data/pol/orth", "w2v_allwiki_nkjp300_300"),
        ("data/pol/lemma", "w2v_allwiki_nkjp300_300"),
        ("resources/pol/fasttext", "wiki.pl")
    ]
    param_grid["optim"] = ["adam", "adagrad"]
    param_grid['reweight'] = [True, False]
    grid = ParameterGrid(param_grid)

    filename = "results/{date:%Y%m%d_%H%M}_results.csv".format(date=datetime.now())
    print('Starting a grid search through {n} parameter combinations'.format(
        n=len(grid)))
    for params in grid:
        print(params)
        with open(filename, "a") as results_file:
            results_file.write(str(params) + ", ")
            max_dev_epoch, max_dev, _ = train.main(params)
            results_file.write('Epoch {epoch}, accuracy {acc:.4f}\n'.format(
                epoch=max_dev_epoch,
                acc=max_dev
            ))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_classification():
    # Check classification for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [1, 2, 4],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyClassifier(),
                           Perceptron(),
                           DecisionTreeClassifier(),
                           KNeighborsClassifier(),
                           SVC()]:
        for params in grid:
            BaggingClassifier(base_estimator=base_estimator,
                              random_state=rng,
                              **params).fit(X_train, y_train).predict(X_test)
项目:palladio    作者:slipguru    | 项目源码 | 文件源码
def _create_batches(self):
        param_iter = ParameterGrid(self.param_grid)

        # divide work into batches equal to the communicator's size
        work_batches = [[] for _ in range(comm_size)]
        i = 0
        for fold_id, (train_index, test_index) in enumerate(self.cv_iter):
            for parameters in param_iter:
                work_batches[i % comm_size].append((fold_id + 1, train_index,
                                                    test_index, parameters))
                i += 1

        return work_batches
项目:triage    作者:dssg    | 项目源码 | 文件源码
def _generate_model_configs(self, grid_config):
        """Flattens a model/parameter grid configuration into individually
        trainable model/parameter pairs

        Yields: (tuple) classpath and parameters
        """
        for class_path, parameter_config in grid_config.items():
            for parameters in ParameterGrid(parameter_config):
                yield class_path, parameters
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def fit(self, frame):
        """Fit the grid search.

        Parameters
        ----------

        frame : H2OFrame, shape=(n_samples, n_features)
            The training frame on which to fit.
        """
        return self._fit(frame, ParameterGrid(self.param_grid))
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def fit(self, X, y=None):
        """Run fit with all sets of parameters.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.
        """
        return self._fit(X, y, ParameterGrid(self.param_grid))
项目:Tizona    作者:emcastillo    | 项目源码 | 文件源码
def __init__(self, experiment, args, job_module_config):
        super(self.__class__, self).__init__(experiment, args, job_module_config)
        # pre-format the experiment dict
        # Sklearn needs all the params to be in a  list for the grid to work
        # properly
        for param in experiment['params']:
            if type(experiment['params'][param]) is not list:
                experiment['params'][param] = [experiment['params'][param] ]

        self.searcher = ParameterGrid(experiment['params'])
项目:ottertune    作者:cmu-db    | 项目源码 | 文件源码
def create_parameter_grid(param_dict):
        from sklearn.model_selection import ParameterGrid
        return ParameterGrid(param_dict)
项目:dask-searchcv    作者:dask    | 项目源码 | 文件源码
def _get_param_iterator(self):
        """Return ParameterGrid instance for the given param_grid"""
        return model_selection.ParameterGrid(self.param_grid)


# ------------------ #
# RandomizedSearchCV #
# ------------------ #
项目:catwalk    作者:dssg    | 项目源码 | 文件源码
def _generate_model_configs(self, grid_config):
        """Flattens a model/parameter grid configuration into individually
        trainable model/parameter pairs

        Yields: (tuple) classpath and parameters
        """
        for class_path, parameter_config in grid_config.items():
            for parameters in ParameterGrid(parameter_config):
                yield class_path, parameters
项目:ycml    作者:skylander86    | 项目源码 | 文件源码
def fit_binarized(self, X_featurized, Y_binarized, validation_data=None, **kwargs):
        klass = get_class_from_module_path(self.classifier)

        if validation_data is None:  # use 0.2 for validation data
            X_train, X_validation, Y_train, Y_validation = train_test_split(X_featurized, Y_binarized, test_size=self.validation_size)
            logger.info('Using {} of training data ({} instances) for validation.'.format(self.validation_size, Y_validation.shape[0]))
        else:
            X_train, X_validation, Y_train, Y_validation = X_featurized, validation_data[0], Y_binarized, validation_data[1]
        #end if

        best_score, best_param = 0.0, None

        if self.n_jobs > 1: logger.info('Performing hyperparameter gridsearch in parallel using {} jobs.'.format(self.n_jobs))
        else: logger.debug('Performing hyperparameter gridsearch in parallel using {} jobs.'.format(self.n_jobs))

        param_scores = Parallel(n_jobs=self.n_jobs)(delayed(_fit_classifier)(klass, self.classifier_args, param, self.metric, X_train, Y_train, X_validation, Y_validation) for param in ParameterGrid(self.param_grid))

        best_param, best_score = max(param_scores, key=lambda x: x[1])
        logger.info('Best scoring param is {} with score {}.'.format(best_param, best_score))

        classifier_args = {}
        classifier_args.update(self.classifier_args)
        classifier_args.update(best_param)
        self.classifier_ = klass(**classifier_args)
        logger.info('Fitting final model <{}> on full data with param {}.'.format(self.classifier_, best_param))
        self.classifier_.fit(X_featurized, Y_binarized)

        return self
    #end def
#end class
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_parameter_grid():
    # Test basic properties of ParameterGrid.
    params1 = {"foo": [1, 2, 3]}
    grid1 = ParameterGrid(params1)
    assert_true(isinstance(grid1, Iterable))
    assert_true(isinstance(grid1, Sized))
    assert_equal(len(grid1), 3)
    assert_grid_iter_equals_getitem(grid1)

    params2 = {"foo": [4, 2],
               "bar": ["ham", "spam", "eggs"]}
    grid2 = ParameterGrid(params2)
    assert_equal(len(grid2), 6)

    # loop to assert we can iterate over the grid multiple times
    for i in xrange(2):
        # tuple + chain transforms {"a": 1, "b": 2} to ("a", 1, "b", 2)
        points = set(tuple(chain(*(sorted(p.items())))) for p in grid2)
        assert_equal(points,
                     set(("bar", x, "foo", y)
                         for x, y in product(params2["bar"], params2["foo"])))
    assert_grid_iter_equals_getitem(grid2)

    # Special case: empty grid (useful to get default estimator settings)
    empty = ParameterGrid({})
    assert_equal(len(empty), 1)
    assert_equal(list(empty), [{}])
    assert_grid_iter_equals_getitem(empty)
    assert_raises(IndexError, lambda: empty[1])

    has_empty = ParameterGrid([{'C': [1, 10]}, {}, {'C': [.5]}])
    assert_equal(len(has_empty), 4)
    assert_equal(list(has_empty), [{'C': 1}, {'C': 10}, {}, {'C': .5}])
    assert_grid_iter_equals_getitem(has_empty)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_parameters_sampler_replacement():
    # raise error if n_iter too large
    params = {'first': [0, 1], 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params, n_iter=7)
    assert_raises(ValueError, list, sampler)
    # degenerates to GridSearchCV if n_iter the same as grid_size
    sampler = ParameterSampler(params, n_iter=6)
    samples = list(sampler)
    assert_equal(len(samples), 6)
    for values in ParameterGrid(params):
        assert_true(values in samples)

    # test sampling without replacement in a large grid
    params = {'a': range(10), 'b': range(10), 'c': range(10)}
    sampler = ParameterSampler(params, n_iter=99, random_state=42)
    samples = list(sampler)
    assert_equal(len(samples), 99)
    hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c'])
                        for p in samples]
    assert_equal(len(set(hashable_samples)), 99)

    # doesn't go into infinite loops
    params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params_distribution, n_iter=7)
    samples = list(sampler)
    assert_equal(len(samples), 7)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_spectral_coclustering():
    # Test Dhillon's Spectral CoClustering on a simple problem.
    param_grid = {'svd_method': ['randomized', 'arpack'],
                  'n_svd_vecs': [None, 20],
                  'mini_batch': [False, True],
                  'init': ['k-means++'],
                  'n_init': [10],
                  'n_jobs': [1]}
    random_state = 0
    S, rows, cols = make_biclusters((30, 30), 3, noise=0.5,
                                    random_state=random_state)
    S -= S.min()  # needs to be nonnegative before making it sparse
    S = np.where(S < 1, 0, S)  # threshold some values
    for mat in (S, csr_matrix(S)):
        for kwargs in ParameterGrid(param_grid):
            model = SpectralCoclustering(n_clusters=3,
                                         random_state=random_state,
                                         **kwargs)
            model.fit(mat)

            assert_equal(model.rows_.shape, (3, 30))
            assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
            assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
            assert_equal(consensus_score(model.biclusters_,
                                         (rows, cols)), 1)

            _test_shape_indices(model)
项目:elm    作者:ContinuumIO    | 项目源码 | 文件源码
def _to_param_meta(param_grid, control):
    '''Acquire parameter metadata such as bounds that are useful for sampling'''
    choice_params = {k: v for k, v in param_grid.items()
                     if not hasattr(v, 'rvs')}
    distributions = {k: v for k, v in param_grid.items()
                     if k not in choice_params}
    pg_list = list(ParameterGrid(choice_params))
    choices, low, high, param_order, is_int = [], [], [], [], []
    is_continuous = lambda v: isinstance(v, numbers.Real)
    while len(pg_list):
        pg2 = pg_list.pop(0)
        for k, v in pg2.items():
            if k in param_order:
                idx = param_order.index(k)
            else:
                idx = len(param_order)
                param_order.append(k)
                low.append(v)
                high.append(v)
                choices.append([v])
                is_int.append(not is_continuous(v))
                continue
            if v not in choices[idx]:
                choices[idx].append(v)
            if is_continuous(v):
                is_int[idx] = False
                if v < low[idx]:
                    low[idx] = v
                if v > high[idx]:
                    high[idx] = v
            else:
                is_int[idx] = True
                low[idx] = high[idx] = v

    for k, v in distributions.items():
        choices.append(v)
        low.append(None)
        high.append(None)
        is_int.append(False)
        param_order.append(k)
    param_meta = dict(control=control, high=high, low=low,
                      choices=choices, is_int=is_int,
                      param_order=param_order)
    return param_meta
项目:fake-news-detection    作者:aldengolab    | 项目源码 | 文件源码
def clf_loop(self, X_train, X_test, y_train, y_test, individuals, setting):
        '''
        Runs through each model specified by models_to_run once with each possible
        setting in params.
        '''
        N = 0
        self.prepare_report()
        for index, clf in enumerate([self.clfs[x] for x in self.models_to_run]):
            iteration = 0
            print('Running {}.'.format(self.models_to_run[index]))
            parameter_values = self.params[self.models_to_run[index]]
            grid = ParameterGrid(parameter_values)
            while iteration < self.iterations_max and iteration < len(grid):
                print('    Running Iteration {} of {}...'.format(iteration + 1, self.iterations_max))
                if len(grid) > self.iterations_max:
                    p = random.choice(list(grid))
                else:
                    p = list(grid)[iteration]
                try:
                    m = Model(clf, X_train, y_train, X_test, y_test, p, N,
                                   self.models_to_run[index], iteration,
                                   self.output_dir, thresholds = self.thresholds,
                                   ks = self.ks, report = self.report, label='label',
                                   individuals=individuals, setting=setting)
                    m.run()
                    print('    Printing to file...')
                    if not self.roc:
                        m.performance_to_file()
                    else:
                        m.performance_to_file(roc='{}ROC_{}_{}-{}.png'.format(
                            self.output_dir, self.models_to_run[index], N,
                            iteration))
                except IndexError as e:
                    print(p)
                    print(N)
                    print('IndexError: {}'.format(e))
                    print(traceback.format_exc())
                    continue
                except RuntimeError as e:
                    print(p)
                    print(N)
                    print('RuntimeError: {}'.format(e))
                    print(traceback.format_exc())
                    continue
                except AttributeError as e:
                    print(p)
                    print(N)
                    print('AttributeError: {}'.format(e))
                    print(traceback.format_exc())
                    continue
                iteration += 1
            N += 1