我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用sklearn.datasets.load_breast_cancer()。
def trained_models(): dataset = datasets.load_breast_cancer() X = dataset.data y = dataset.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345) rf = RandomForestClassifier() rf.fit(X_train, y_train) lr = LogisticRegression() lr.fit(X_train, y_train) svc_w_linear_kernel = SVC(kernel='linear') svc_w_linear_kernel.fit(X_train, y_train) svc_wo_linear_kernel = SVC() svc_wo_linear_kernel.fit(X_train, y_train) dummy = DummyClassifier() dummy.fit(X_train, y_train) return {'RF':rf, 'LR':lr, 'SVC_w_linear_kernel':svc_w_linear_kernel, 'Dummy':dummy, 'SVC_wo_linear_kernel':svc_wo_linear_kernel}
def test_score_grid_func(): dataset = load_breast_cancer() X, y = dataset['data'], dataset['target_names'].take(dataset['target']) # Classifier to be used in the metaheuristic clf = SVC() for metaclass in METACLASSES: meta = metaclass(classifier=clf, random_state=0, verbose=True, make_logbook=True, repeat=1, number_gen=3, size_pop=2) print("Checking Grid: ", meta._name) # Fit the classifier meta.fit(X, y, normalize=True) # See score meta.score_func_to_gridsearch(meta)
def data(): dataset = datasets.load_breast_cancer() X = dataset.data y = dataset.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345) return {'X_train':X_train, 'X_test':X_test, 'y_train':y_train, 'y_test':y_test}
def load_breast_cancer_df(include_tgt=True, tgt_name="target", shuffle=False): """Loads the breast cancer dataset into a dataframe with the target set as the "target" feature or whatever name is specified in ``tgt_name``. Parameters ---------- include_tgt : bool, optional (default=True) Whether to include the target tgt_name : str, optional (default="target") The name of the target feature shuffle : bool, optional (default=False) Whether to shuffle the rows Returns ------- X : pd.DataFrame, shape=(n_samples, n_features) The loaded dataset """ bc = load_breast_cancer() X = pd.DataFrame.from_records(data=bc.data, columns=bc.feature_names) if include_tgt: X[tgt_name] = bc.target return X if not shuffle else shuffle_dataframe(X)
def setUp(self): np.random.seed(0) self.X, self.y = load_breast_cancer(return_X_y=True) p = np.random.permutation(len(self.X)) self.X, self.y = self.X[p], self.y[p]
def setUp(self): np.random.seed(0) self.X, self.y = load_breast_cancer(return_X_y=True) p = np.random.permutation(len(self.X)) self.X, self.y = self.X[p], self.y[p] self.lr = LogisticRegression() self.rf = RandomForestClassifier(random_state=8) self.svc = LinearSVC() self.lr_probas = self.lr.fit(self.X, self.y).predict_proba(self.X) self.rf_probas = self.rf.fit(self.X, self.y).predict_proba(self.X) self.svc_scores = self.svc.fit(self.X, self.y).\ decision_function(self.X)
def get_sample_dataset(dataset_properties): """Returns sample dataset Args: dataset_properties (dict): Dictionary corresponding to the properties of the dataset used to verify the estimator and metric generators. Returns: X (array-like): Features array y (array-like): Labels array splits (iterator): This is an iterator that returns train test splits for cross-validation purposes on ``X`` and ``y``. """ kwargs = dataset_properties.copy() data_type = kwargs.pop('type') if data_type == 'multiclass': try: X, y = datasets.make_classification(random_state=8, **kwargs) splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) except Exception as e: raise exceptions.UserError(repr(e)) elif data_type == 'iris': X, y = datasets.load_iris(return_X_y=True) splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) elif data_type == 'mnist': X, y = datasets.load_digits(return_X_y=True) splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) elif data_type == 'breast_cancer': X, y = datasets.load_breast_cancer(return_X_y=True) splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) elif data_type == 'boston': X, y = datasets.load_boston(return_X_y=True) splits = model_selection.KFold(n_splits=2, random_state=8).split(X) elif data_type == 'diabetes': X, y = datasets.load_diabetes(return_X_y=True) splits = model_selection.KFold(n_splits=2, random_state=8).split(X) else: raise exceptions.UserError('Unknown dataset type {}'.format(dataset_properties['type'])) return X, y, splits
def test_breast_cancer(self): dataset = load_breast_cancer() score = np.mean(cross_val_score( DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10)) print('breast_cancer: tree_type: {}, score = {}'.format(self.tree_type, score)) self.assertTrue(score > 0.8)
def test_breast_cancer(self): dataset = load_breast_cancer() score = np.mean(cross_val_score( DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10)) self.assertTrue(score > 0.8) print('breast_cancer: tree_type: {}, score = {}'.format(self.tree_type, score))
def test_rocauc_quickmethod(self): """ Test the ROCAUC quick method """ data = load_breast_cancer() model = DecisionTreeClassifier() # TODO: impage comparison of the quick method ax = roc_auc(model, data.data, data.target)
def test_plot(): dataset = load_breast_cancer() X, y = dataset['data'], dataset['target_names'].take(dataset['target']) # Classifier to be used in the metaheuristic clf = SVC() for metaclass in METACLASSES: meta = metaclass(classifier=clf, random_state=0, verbose=False, make_logbook=True, repeat=1, number_gen=2, size_pop=2) print("Checking plotting: ", meta._name) # Fit the classifier meta.fit(X, y, normalize=True) # Transformed dataset X_1 = meta.transform(X) meta = metaclass(classifier=clf, random_state=0, make_logbook=True, repeat=1, number_gen=2, size_pop=2) # Fit and Transform X_2 = meta.fit_transform(X=X, y=y, normalize=True) assert_array_equal(X_1, X_2) # Plot the results of each test meta.plot_results() ga = GeneticAlgorithm(classifier=clf, random_state=1, make_logbook=False, repeat=1) # check for error in plot ga.fit(X, y, normalize=True) assert_raises(ValueError, ga.plot_results)
def test_parallel(): dataset = load_breast_cancer() X, y = dataset['data'], dataset['target_names'].take(dataset['target']) # Classifier to be used in the metaheuristic clf = SVC() for metaclass in METACLASSES : meta = metaclass(classifier=clf, random_state=0, make_logbook=False, repeat=2, number_gen=2, parallel=True, verbose=True, size_pop=2) print("Checking parallel ", meta._name) # Fit the classifier meta.fit(X, y, normalize=True) # Transformed dataset X_1 = meta.transform(X) meta = metaclass(classifier=clf, random_state=0, make_logbook=False, repeat=2, number_gen=2, parallel=True, size_pop=2) # Fit and Transform X_2 = meta.fit_transform(X=X, y=y, normalize=True) # Check Function assert_array_equal(X_1, X_2)
def test_unusual_errors(): dataset = load_breast_cancer() X, y = dataset['data'], dataset['target_names'].take(dataset['target']) # Classifier to be used in the metaheuristic clf = SVC() for metaclass in METACLASSES: meta = metaclass(classifier=clf, random_state=0, verbose=0, make_logbook=True, repeat=1, number_gen=2, size_pop=2) print("Checking unusual error: ", meta._name) meta.fit(X, y, normalize=True) # Let's suppose you have a empty array meta.best_mask_ = np.array([]) assert_warns(UserWarning, meta.transform, X) assert_raises(ValueError, meta.safe_mask, X, meta.best_mask_) meta = metaclass(classifier=clf, random_state=0, verbose=0, make_logbook=True, repeat=1, number_gen=2, size_pop=2) assert_raises(ValueError, meta.score_func_to_gridsearch, meta) for metaclass in [BRKGA, BRKGA2]: assert_raises(ValueError, metaclass,classifier=clf, random_state=0, verbose=0, make_logbook=True, repeat=1, number_gen=2, size_pop=2, elite_size=5)
def test_predict(): dataset = load_breast_cancer() X, y = dataset['data'], dataset['target_names'].take(dataset['target']) # Classifier to be used in the metaheuristic sa = SimulatedAnneling(size_pop=2, number_gen=2) sa.fit(X,y, normalize=True) sa.predict(X)
def test_numerical_split(): bunch = load_breast_cancer() id3Estimator = Id3Estimator() id3Estimator.fit(bunch.data, bunch.target) splitter = id3Estimator.builder_.splitter record = splitter.calc(np.array(list(range(bunch.target.shape[0]))), np.array(list(range(bunch.data.shape[1])))) less = np.sum(bunch.data[:, record.feature_idx] <= record.pivot) more = bunch.data[:, record.feature_idx].shape[0] - less split = splitter.split(np.array(list(range(bunch.target.shape[0]))), record) assert_almost_equal(len(split[0].bag), less) assert_almost_equal(len(split[1].bag), more)
def test_fit(): bunch = load_breast_cancer() id3Estimator = Id3Estimator() id3Estimator.fit(bunch.data, bunch.target) assert_equal(id3Estimator.tree_.root.value, 22) id3Estimator = Id3Estimator(max_depth=2) id3Estimator.fit(bunch.data, bunch.target) assert_equal(id3Estimator.tree_.root.value, 22) id3Estimator = Id3Estimator(min_samples_split=20) id3Estimator.fit(bunch.data, bunch.target) assert_equal(id3Estimator.tree_.root.value, 22)
def test_gain_ratio(): id3Estimator = Id3Estimator(gain_ratio=True) bunch = load_breast_cancer() id3Estimator.fit(bunch.data, bunch.target) assert_equal(id3Estimator.tree_.root.value, 23)
def test_prune(): id3estimator = Id3Estimator(prune=True) bunch = load_breast_cancer() id3estimator.fit(bunch.data, bunch.target)
def test_predict(): estimator = Id3Estimator() bunch = load_breast_cancer() estimator.fit(bunch.data, bunch.target) sample = np.array([20.57, 17.77, 132.9, 1326, 0.08474, 0.07864, 0.0869, 0.07017, 0.1812, 0.05667, 0.5435, 0.7339, 3.398, 74.08, 0.005225, 0.01308, 0.0186, 0.0134, 0.01389, 0.003532, 24.99, 23.41, 158.8, 1956, 0.1238, 0.1866, 0.2416, 0.186, 0.275, 0.08902]).reshape(1, -1) assert_almost_equal(estimator.predict(bunch.data), bunch.target) assert_almost_equal(estimator.predict(sample), 0)
def test_load_breast_cancer(): res = load_breast_cancer() assert_equal(res.data.shape, (569, 30)) assert_equal(res.target.size, 569) assert_equal(res.target_names.size, 2) assert_true(res.DESCR)