我们从Python开源项目中,提取了以下17个代码示例,用于说明如何使用sklearn.model_selection.learning_curve()。
def test_learning_curve(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) with warnings.catch_warnings(record=True) as w: train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=3, train_sizes=np.linspace(0.1, 1.0, 10)) if len(w) > 0: raise RuntimeError("Unexpected warning: %r" % w[0].message) assert_equal(train_scores.shape, (10, 3)) assert_equal(test_scores.shape, (10, 3)) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
def test_learning_curve_verbose(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) old_stdout = sys.stdout sys.stdout = StringIO() try: train_sizes, train_scores, test_scores = \ learning_curve(estimator, X, y, cv=3, verbose=1) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout assert("[learning_curve]" in out)
def test_learning_curve_batch_and_incremental_learning_are_equal(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) train_sizes = np.linspace(0.2, 1.0, 5) estimator = PassiveAggressiveClassifier(n_iter=1, shuffle=False) train_sizes_inc, train_scores_inc, test_scores_inc = \ learning_curve( estimator, X, y, train_sizes=train_sizes, cv=3, exploit_incremental_learning=True) train_sizes_batch, train_scores_batch, test_scores_batch = \ learning_curve( estimator, X, y, cv=3, train_sizes=train_sizes, exploit_incremental_learning=False) assert_array_equal(train_sizes_inc, train_sizes_batch) assert_array_almost_equal(train_scores_inc.mean(axis=1), train_scores_batch.mean(axis=1)) assert_array_almost_equal(test_scores_inc.mean(axis=1), test_scores_batch.mean(axis=1))
def plot_learning_curve(self): # Plot the learning curve plt.figure(figsize=(9, 6)) train_sizes, train_scores, test_scores = learning_curve( self.model, X=self.X_train, y=self.y_train, cv=3, scoring='neg_mean_squared_error') self.plot_learning_curve_helper(train_sizes, train_scores, test_scores, 'Learning Curve') plt.show()
def plot_learning_curve(estimators, X, y, cv=10, scoring=None, n_jobs=1): figsize = (6.4 * len(estimators), 4.8) fig, axes = plt.subplots(nrows=1, ncols=len(estimators), figsize=figsize) if len(estimators) == 1: axes = [axes] for ax, estimator in zip(axes, estimators): train_sizes, train_scores, test_scores = learning_curve( estimator=estimator, X=X, y=y, train_sizes=np.linspace(start=0.1, stop=1.0, num=10), cv=cv, scoring=None, n_jobs=n_jobs, verbose=1 ) xlabel = 'Number of training samples' _plot_curve( axes=ax, train_sizes=train_sizes, train_scores=train_scores, test_scores=test_scores, xlabel=xlabel, scoring=scoring ) ax.set_title(pipeline_name(estimator)) return fig
def plot_learning_curve(est, x, y): from sklearn.model_selection import learning_curve,KFold training_set_size, train_scores, test_scores = learning_curve( est, x, y, train_sizes=np.linspace(.1, 1, 20), cv=KFold(20, shuffle=True, random_state=1)) estimator_name = est.__class__.__name__ line = plt.plot(training_set_size, train_scores.mean(axis=1), '--', label="training " + estimator_name) plt.plot(training_set_size, test_scores.mean(axis=1), '-', label="test " + estimator_name, c=line[0].get_color()) plt.xlabel('Training set size') plt.ylabel('Score (R^2)') plt.ylim(0, 1.1)
def test_learning_curve_unsupervised(): X, _ = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) train_sizes, train_scores, test_scores = learning_curve( estimator, X, y=None, cv=3, train_sizes=np.linspace(0.1, 1.0, 10)) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
def test_learning_curve_incremental_learning_not_possible(): X, y = make_classification(n_samples=2, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) # The mockup does not have partial_fit() estimator = MockImprovingEstimator(1) assert_raises(ValueError, learning_curve, estimator, X, y, exploit_incremental_learning=True)
def test_learning_curve_incremental_learning(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockIncrementalImprovingEstimator(20) train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=3, exploit_incremental_learning=True, train_sizes=np.linspace(0.1, 1.0, 10)) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
def test_learning_curve_n_sample_range_out_of_bounds(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[0, 1]) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[0.0, 1.0]) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[0.1, 1.1]) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[0, 20]) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[1, 21])
def test_learning_curve_remove_duplicate_sample_sizes(): X, y = make_classification(n_samples=3, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(2) train_sizes, _, _ = assert_warns( RuntimeWarning, learning_curve, estimator, X, y, cv=3, train_sizes=np.linspace(0.33, 1.0, 3)) assert_array_equal(train_sizes, [1, 2])
def test_learning_curve_with_boolean_indices(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) cv = KFold(n_folds=3) train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10)) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
def _set_description(self, dfe): importances = pd.Series(self.model.feature_importances_, index=dfe.get_features().columns).sort_values(ascending=False) y = dfe.df[dfe.target] X = dfe.df.drop(dfe.target, axis=1) train_sizes, train_scores, test_scores = learning_curve(self.model, X, y, n_jobs=self.n_jobs) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) pic = ImageFile.create() with pic.plot() as plt_fig: plt, fig = plt_fig fig.set_figwidth(12) plt.subplot(121) importances.plot(kind="bar") ax2 = plt.subplot(122) ax2.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1,color="r") ax2.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") ax2.plot(train_sizes, train_scores_mean, "o-", color="r", label="????" if self.lang == "ja" else "Training score") ax2.plot(train_sizes, test_scores_mean, 'o-', color="g", label="????" if self.lang == "ja" else "Cross-validation score") ax2.set_xlabel("??????(??)" if self.lang == "ja" else "data records") ax2.set_ylabel("??" if self.lang == "ja" else "accuracy") ax2.set_ylim(0, 1) ax2.legend(loc="best") params = (self.score, self.model.__class__.__name__) self.description = { "ja": Description("???????{:.3f}??(?????:{})?????????????????????".format(*params), pic), "en": Description("The model accuracy is {:.3f}(model is {}). The contributions of each features are here.".format(*params), pic) }
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)): """ Generate a simple plot of the test and traning learning curve. Parameters ---------- estimator : object type that implements the "fit" and "predict" methods An object of that type which is cloned for each validation. title : string Title for the chart. X : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples) or (n_samples, n_features), optional Target relative to X for classification or regression; None for unsupervised learning. ylim : tuple, shape (ymin, ymax), optional Defines minimum and maximum yvalues plotted. cv : integer, cross-validation generator, optional If an integer is passed, it is the number of folds (defaults to 3). Specific cross-validation objects can be passed, see sklearn.cross_validation module for the list of possible objects n_jobs : integer, optional Number of jobs to run in parallel (default 1). """ plt.figure() plt.title(title) if ylim is not None: plt.ylim(*ylim) plt.xlabel("Training examples") plt.ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score") plt.legend(loc="best") return plt
def generate_plots(model, partition): r"""Generate plots while running the pipeline. Parameters ---------- model : alphapy.Model The model object with plotting specifications. partition : alphapy.Partition Reference to the dataset. Returns ------- None : None """ logger.info('='*80) logger.info("Generating Plots for partition: %s", datasets[partition]) # Extract model parameters calibration_plot = model.specs['calibration_plot'] confusion_matrix = model.specs['confusion_matrix'] importances = model.specs['importances'] learning_curve = model.specs['learning_curve'] roc_curve = model.specs['roc_curve'] # Generate plots if calibration_plot: plot_calibration(model, partition) if confusion_matrix: plot_confusion_matrix(model, partition) if roc_curve: plot_roc_curve(model, partition) if partition == Partition.train: if learning_curve: plot_learning_curve(model, partition) if importances: plot_importance(model, partition) # # Function get_plot_directory #
def plot_learning_curve(estimator, X, y, train_sizes=np.linspace(.1, 1.0, 5), cv=None, n_jobs=1, ax=None): ''' Plot the learning curve for `estimator`. Parameters ---------- estimator : sklearn.Estimator X : array-like y : array-like train_sizes : array-like list of floats between 0 and 1 cv : int n_jobs : int ax : matplotlib.axes ''' # http://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html if ax is None: fig, ax = plt.subplots() ax.set_xlabel("Training examples") ax.set_ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes ) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score") plt.legend(loc="best") return ax
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, scoring=None, n_jobs=1, train_sizes=np.linspace(0.1, 1.0, 5)): """ Generate a simple plot of the test and training learning curve Parameters ---------- estimator: object type that implements the "fit" and "predict" methods. title: string; title for the chart. X: traning vector, shape (n_samples, n_features) y: target, shape (n_samples,) ylim: tuple, shape (ymin, ymax) Defines minimum and maximum yvalues plotted. cv: int, cross-validation generator or an iterable Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the dafault 3-fold cross-validation - Interger, to specify the number of folds - An object to be used as a cross-validation generator """ from sklearn.model_selection import learning_curve plt.figure() plt.title(title) if ylim is not None: plt.ylim(*ylim) plt.xlabel("Training examples") plt.ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes, scoring=scoring) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color='r') plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color='g') plt.plot(train_sizes, train_scores_mean, 'o-', color='r', label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color='g', label="Cross-validation score") plt.legend(loc="best") return plt,train_sizes