我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用sklearn.cross_validation.check_cv()。
def test_check_cv_return_types(): X = np.ones((9, 2)) cv = cval.check_cv(3, X, classifier=False) assert_true(isinstance(cv, cval.KFold)) y_binary = np.array([0, 1, 0, 1, 0, 0, 1, 1, 1]) cv = cval.check_cv(3, X, y_binary, classifier=True) assert_true(isinstance(cv, cval.StratifiedKFold)) y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2]) cv = cval.check_cv(3, X, y_multiclass, classifier=True) assert_true(isinstance(cv, cval.StratifiedKFold)) X = np.ones((5, 2)) y_multilabel = [[1, 0, 1], [1, 1, 0], [0, 0, 0], [0, 1, 1], [1, 0, 0]] cv = cval.check_cv(3, X, y_multilabel, classifier=True) assert_true(isinstance(cv, cval.KFold)) y_multioutput = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]]) cv = cval.check_cv(3, X, y_multioutput, classifier=True) assert_true(isinstance(cv, cval.KFold))
def _set_cv(cv, X, y, classifier): """This method returns either a `sklearn.cross_validation._PartitionIterator` or `sklearn.model_selection.BaseCrossValidator` depending on whether sklearn-0.17 or sklearn-0.18 is being used. Parameters ---------- cv : int, `_PartitionIterator` or `BaseCrossValidator` The CV object or int to check. If an int, will be converted into the appropriate class of crossvalidator. X : pd.DataFrame or np.ndarray, shape(n_samples, n_features) The dataframe or np.ndarray being fit in the grid search. y : np.ndarray, shape(n_samples,) The target being fit in the grid search. classifier : bool Whether the estimator being fit is a classifier Returns ------- `_PartitionIterator` or `BaseCrossValidator` """ return check_cv(cv, X, y, classifier) if not SK18 else check_cv(cv, y, classifier)
def our_check_cv(cv, X, y, classifier): ret = base_check_cv(cv, y, classifier) return ret.n_splits, list(ret.split(X, y=y))
def our_check_cv(cv, X, y, classifier): ret = base_check_cv(cv, X, y, classifier) return len(ret), list(iter(ret))
def evaluate_estimator(datafile, estimator, task, metric=None, logger=None): if metric and metric not in METRIC: raise ValueError("Invalid metric") def scorer(estimator, X, y): if task in REGRESSION_TASKS: y_pr = estimator.predict(X) elif task in CLASSIFICATION_TASKS: y_pr = estimator.predict_proba(X, batch_size=1000) else: raise NotImplementedError() score = _calculate_score(y, y_pr, task, metric) return score eval_s = time.time() data_pkl = joblib.load(datafile, 'r') resampling = data_pkl['resampling'] if resampling == 'holdout': X_tr = data_pkl["X"] y_tr = data_pkl["y"] X_val = data_pkl["valid_X"] y_val = data_pkl["valid_y"] estimator.fit(X_tr, y_tr) score = scorer(estimator, X_val, y_val) elif resampling == 'cv': X, y = data_pkl["X"], data_pkl["y"] cv = cross_validation.check_cv(None, X, y, classifier=(task in CLASSIFICATION_TASKS)) score = defaultdict(list) if metric is None else [] for train, test in cv: X_tr, X_val = X[train], X[test] y_tr, y_val = y[train], y[test] estimator.fit(X_tr, y_tr) score_ = scorer(estimator, X_val, y_val) if metric is None: for m in score_: score[m].append(score_[m]) else: score.append(score_) if metric is None: for m in score: score[m] = np.mean(score[m]) else: score = np.mean(score) estimator.fit(X, y) else: raise NotImplementedError() eval_e = time.time() if logger: logger.debug("Evaluation done, score: %s | %s sec\n%s" % (score, eval_e-eval_s, estimator)) return score
def fit(self, X, y): """Actual fitting, performing the search over parameters.""" parameter_iterable = ParameterGrid(self.param_grid) estimator = self.estimator cv = self.cv n_samples = _num_samples(X) X, y = indexable(X, y) if y is not None: if len(y) != n_samples: raise ValueError('Target variable (y) has a different number ' 'of samples (%i) than data (X: %i samples)' % (len(y), n_samples)) cv = check_cv(cv, X, y, classifier=is_classifier(estimator)) if self.verbose > 0: if isinstance(parameter_iterable, Sized): n_candidates = len(parameter_iterable) print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(len(cv), n_candidates, n_candidates * len(cv))) base_estimator = clone(self.estimator) pre_dispatch = self.pre_dispatch out = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch )(delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring, parameters, cv=cv) for parameters in parameter_iterable) best = sorted(out, key=lambda x: x[0])[-1] self.best_params_ = best[1] self.best_score_ = best[0] if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params( **best[1]) if y is not None: best_estimator.fit(X, y, **self.fit_params) else: best_estimator.fit(X, **self.fit_params) self.best_estimator_ = best_estimator return self
def fit(self, X, y): """Actual fitting, performing the search over parameters.""" parameter_iterable = ParameterSampler(self.param_distributions, self.n_iter, random_state=self.random_state) estimator = self.estimator cv = self.cv n_samples = _num_samples(X) X, y = indexable(X, y) if y is not None: if len(y) != n_samples: raise ValueError('Target variable (y) has a different number ' 'of samples (%i) than data (X: %i samples)' % (len(y), n_samples)) cv = check_cv(cv, X, y, classifier=is_classifier(estimator)) if self.verbose > 0: if isinstance(parameter_iterable, Sized): n_candidates = len(parameter_iterable) print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(len(cv), n_candidates, n_candidates * len(cv))) base_estimator = clone(self.estimator) pre_dispatch = self.pre_dispatch out = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch )( delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring, parameters, cv=cv) for parameters in parameter_iterable) best = sorted(out, reverse=True)[0] self.best_params_ = best[1] self.best_score_ = best[0] if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params( **best[1]) if y is not None: best_estimator.fit(X, y, **self.fit_params) else: best_estimator.fit(X, **self.fit_params) self.best_estimator_ = best_estimator return self