我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用sklearn.model_selection.ParameterSampler()。
def fit(self, X, y=None): """Run fit on the estimator with randomly drawn parameters. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] or [n_samples, n_output], optional Target relative to X for classification or regression; None for unsupervised learning. """ sampled_params = ParameterSampler(self.param_distributions, self.n_iter, random_state=self.random_state) # the super class will handle the X, y validation return self._fit(X, y, sampled_params)
def sample_hyperparameters(random_state, num): space = { 'n_iter': N_ITER, 'batch_size': BATCH_SIZE, 'l2': L2, 'learning_rate': LEARNING_RATES, 'loss': LOSSES, 'embedding_dim': EMBEDDING_DIM, } sampler = ParameterSampler(space, n_iter=num, random_state=random_state) for params in sampler: yield params
def sample_cnn_hyperparameters(random_state, num): space = { 'n_iter': N_ITER, 'batch_size': BATCH_SIZE, 'l2': L2, 'learning_rate': LEARNING_RATES, 'loss': LOSSES, 'embedding_dim': EMBEDDING_DIM, 'kernel_width': [3, 5, 7], 'num_layers': list(range(1, 10)), 'dilation_multiplier': [1, 2], 'nonlinearity': ['tanh', 'relu'], 'residual': [True, False] } sampler = ParameterSampler(space, n_iter=num, random_state=random_state) for params in sampler: params['dilation'] = list(params['dilation_multiplier'] ** (i % 8) for i in range(params['num_layers'])) yield params
def sample_lstm_hyperparameters(random_state, num): space = { 'n_iter': N_ITER, 'batch_size': BATCH_SIZE, 'l2': L2, 'learning_rate': LEARNING_RATES, 'loss': LOSSES, 'embedding_dim': EMBEDDING_DIM, } sampler = ParameterSampler(space, n_iter=num, random_state=random_state) for params in sampler: yield params
def sample_pooling_hyperparameters(random_state, num): space = { 'n_iter': N_ITER, 'batch_size': BATCH_SIZE, 'l2': L2, 'learning_rate': LEARNING_RATES, 'loss': LOSSES, 'embedding_dim': EMBEDDING_DIM, } sampler = ParameterSampler(space, n_iter=num, random_state=random_state) for params in sampler: yield params
def test_param_sampler(): # test basic properties of param sampler param_distributions = {"kernel": ["rbf", "linear"], "C": uniform(0, 1)} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0) samples = [x for x in sampler] assert_equal(len(samples), 10) for sample in samples: assert_true(sample["kernel"] in ["rbf", "linear"]) assert_true(0 <= sample["C"] <= 1) # test that repeated calls yield identical parameters param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=3, random_state=0) assert_equal([x for x in sampler], [x for x in sampler]) if sp_version >= (0, 16): param_distributions = {"C": uniform(0, 1)} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0) assert_equal([x for x in sampler], [x for x in sampler])
def fit(self, frame): """Fit the grid search. Parameters ---------- frame : H2OFrame, shape=(n_samples, n_features) The training frame on which to fit. """ sampled_params = ParameterSampler(self.param_grid, self.n_iter, random_state=self.random_state) return self._fit(frame, sampled_params)
def fit(self, frame): """Fit the grid search. Parameters ---------- frame : H2OFrame, shape=(n_samples, n_features) The training frame on which to fit. """ sampled_params = ParameterSampler(self.param_grid, self.n_iter, random_state=self.random_state) # set our score class self.scoring_class_ = GainsStatisticalReport(**self.grsttngs_) # we can do this once to avoid many as_data_frame operations exp, loss, prem = _val_exp_loss_prem(self.exposure_feature, self.loss_feature, self.premium_feature) self.extra_args_ = { 'expo': _as_numpy(frame[exp]), 'loss': _as_numpy(frame[loss]), 'prem': _as_numpy(frame[prem]) if prem is not None else None } # for validation set self.extra_names_ = { 'expo': exp, 'loss': loss, 'prem': prem } # do fit the_fit = self._fit(frame, sampled_params) # clear extra_args_, because they might take lots of mem # we can do this because a re-fit will re-assign them anyways. # don't delete the extra_names_ though, because they're used in # scoring the incoming frame. del self.extra_args_ return the_fit
def _get_param_iterator(self): """Return ParameterSampler instance for the given distributions""" return model_selection.ParameterSampler(self.param_distributions, self.n_iter, random_state=self.random_state)
def test_parameters_sampler_replacement(): # raise error if n_iter too large params = {'first': [0, 1], 'second': ['a', 'b', 'c']} sampler = ParameterSampler(params, n_iter=7) assert_raises(ValueError, list, sampler) # degenerates to GridSearchCV if n_iter the same as grid_size sampler = ParameterSampler(params, n_iter=6) samples = list(sampler) assert_equal(len(samples), 6) for values in ParameterGrid(params): assert_true(values in samples) # test sampling without replacement in a large grid params = {'a': range(10), 'b': range(10), 'c': range(10)} sampler = ParameterSampler(params, n_iter=99, random_state=42) samples = list(sampler) assert_equal(len(samples), 99) hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c']) for p in samples] assert_equal(len(set(hashable_samples)), 99) # doesn't go into infinite loops params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']} sampler = ParameterSampler(params_distribution, n_iter=7) samples = list(sampler) assert_equal(len(samples), 7)
def maximize(self, score_optimum=None, realize=True): """ Find the next best hyper-parameter setting to optimizer. Parameters ---------- score_optimum: float An optional score to use inside the EI formula instead of the optimizer's current_best_score realize: bool Whether or not to give a more realistic estimate of the EI (default=True) Returns ------- best_setting: dict The setting with the highest expected improvement best_score: float The highest EI (per second) """ start = time.time() # Select a sample of parameters sampled_params = ParameterSampler(self.param_distributions, self.draw_samples) # Set score optimum if score_optimum is None: score_optimum = self.current_best_score # Determine the best parameters best_setting, best_score = self._maximize_on_sample(sampled_params, score_optimum) if self.local_search: best_setting, best_score = self._local_search(best_setting, best_score, score_optimum, max_steps=self.ls_max_steps) if realize: best_setting, best_score = self._realize(best_setting, best_score, score_optimum) # Store running time running_time = (time.time() - start) / self.simulate_speedup self.maximize_times.append(running_time) return best_setting, best_score