我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.datasets.load_iris()。
def main(): iris = datasets.load_iris() x = iris.data y = iris.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.5) clrTree = tree.DecisionTreeClassifier() clrTree = clrTree.fit(x_train, y_train) outTree = clrTree.predict(x_test) clrKN = KNeighborsClassifier() clrKN = clrKN.fit(x_train, y_train) outKN = clrKN.predict(x_test) # Prediction accuracy print("Accuracy for Decision Tree Classifier: " + str(accuracy_score(y_test, outTree)*100)+"%") print("Accuracy for KNeighbors Classifier: " + str(accuracy_score(y_test, outKN)*100)+"%")
def main(): iris = datasets.load_iris() x = iris.data y = iris.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.5) clr = NewClassifier() clr.fit(x_train, y_train) prediction = clr.predict(x_test) # Prediction accuracy print("Accuracy: " + str(accuracy_score(y_test, prediction) * 100) + "%") # Run main
def test_pipeline_transform(): # Test whether pipeline works with a transformer at the end. # Also test pipeline.transform and pipeline.inverse_transform iris = load_iris() X = iris.data pca = PCA(n_components=2, svd_solver='full') pipeline = Pipeline([('pca', pca)]) # test transform and fit_transform: X_trans = pipeline.fit(X).transform(X) X_trans2 = pipeline.fit_transform(X) X_trans3 = pca.fit_transform(X) assert_array_almost_equal(X_trans, X_trans2) assert_array_almost_equal(X_trans, X_trans3) X_back = pipeline.inverse_transform(X_trans) X_back2 = pca.inverse_transform(X_trans) assert_array_almost_equal(X_back, X_back2)
def main(): iris = load_iris() test_idx = [0, 50, 100] # training Data train_target = np.delete(iris.target, test_idx) train_data = np.delete(iris.data, test_idx, axis=0) # testing data test_target = iris.target[test_idx] test_data = iris.data[test_idx] # Train Classifier clf = tree.DecisionTreeClassifier() clf = clf.fit(train_data, train_target) print(clf.predict(test_data)) # Run main
def test_bagged_imputer_classification(): iris = load_iris() # make DF, add species col X = pd.DataFrame.from_records(data=iris.data, columns=iris.feature_names) X['species'] = iris.target # shuffle... X = shuffle_dataframe(X) # set random indices to be null.. 15% should be good rands = np.random.rand(X.shape[0]) mask = rands > 0.85 X['species'].iloc[mask] = np.nan # define imputer, assert no missing imputer = BaggedCategoricalImputer(cols=['species']) y = imputer.fit_transform(X) assert y['species'].isnull().sum() == 0, 'expected no null...' # now test with a different estimator imputer = BaggedCategoricalImputer(cols=['species'], base_estimator=RandomForestClassifier()) y = imputer.fit_transform(X) assert y['species'].isnull().sum() == 0, 'expected no null...'
def test_make_grid_search(): X, y = load_iris(return_X_y=True) lr = LogisticRegression() svc = set_grid(SVC(), kernel=['poly'], degree=[2, 3]) gs1 = make_grid_search(lr, cv=5) # empty grid gs2 = make_grid_search(svc, cv=5) gs3 = make_grid_search([lr, svc], cv=5) for gs, n_results in [(gs1, 1), (gs2, 2), (gs3, 3)]: gs.fit(X, y) assert gs.cv == 5 assert len(gs.cv_results_['params']) == n_results svc_mask = gs3.cv_results_['param_root'] == svc assert svc_mask.sum() == 2 assert gs3.cv_results_['param_root__degree'][svc_mask].tolist() == [2, 3] assert gs3.cv_results_['param_root'][~svc_mask].tolist() == [lr]
def main(): print ("-- Classification Tree --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = ClassificationTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Decision Tree", accuracy=accuracy, legend_labels=data.target_names)
def main(): # Load the dataset data = datasets.load_iris() X = data.data y = data.target # Three -> two classes X = X[y != 2] y = y[y != 2] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Fit and predict using LDA lda = LDA() lda.fit(X_train, y_train) y_pred = lda.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy)
def main(): # Load dataset data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = 0 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) clf = LogisticRegression(gradient_descent=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
def main(): print ("-- Gradient Boosting Classification --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = GradientBoostingClassifier() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Gradient Boosting", accuracy=accuracy, legend_labels=data.target_names)
def main(): print ("-- XGBoost --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = XGBoost() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="XGBoost", accuracy=accuracy, legend_labels=data.target_names)
def test(): iris = load_iris() #print iris #print iris['target'].shape gbdt=GradientBoostingRegressor(n_estimators=1000, max_depth=4) gbdt.fit(iris.data[:120],iris.target[:120]) #Save GBDT Model joblib.dump(gbdt, 'GBDT.model') predict = gbdt.predict(iris.data[:120]) total_err = 0 for i in range(len(predict)): print predict[i],iris.target[i] err = predict[i] - iris.target[i] total_err += err * err print 'Training Error: %f' % (total_err / len(predict)) pred = gbdt.predict(iris.data[120:]) error = 0 for i in range(len(pred)): print pred[i],iris.target[i+120] err = pred[i] - iris.target[i+120] error += err * err print 'Test Error: %f' % (error / len(pred))
def get_iris(rng=42, tst_size=0.3): iris = datasets.load_iris() X = iris.data y = iris.target X = iris_normalisation(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=tst_size, random_state=rng) trg_train = np.zeros((3, len(y_train)), dtype='uint8') for e in range(trg_train.shape[1]): v = y_train[e] trg_train[v, e] = 1 trg_test = np.zeros((3, len(y_test)), dtype='uint8') for e in range(trg_test.shape[1]): v = y_test[e] trg_test[v, e] = 1 trn = Instance(X_train.T, trg_train) tst = Instance(X_test.T, trg_test) return trn, tst
def test_cross_val_predict(): # Make sure it works in cross_val_predict for multiclass. X, y = load_iris(return_X_y=True) y = LabelBinarizer().fit_transform(y) X = StandardScaler().fit_transform(X) mlp = MLPClassifier(n_epochs=10, solver_kwargs={'learning_rate': 0.05}, random_state=4567).fit(X, y) cv = KFold(n_splits=4, random_state=457, shuffle=True) y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba') auc = roc_auc_score(y, y_oos, average=None) assert np.all(auc >= 0.96)
def test_RFECV(): ''' test the method of RFECV :return: None ''' iris=load_iris() X=iris.data y=iris.target estimator=LinearSVC() selector=RFECV(estimator=estimator,cv=3) selector.fit(X,y) print("N_features %s"%selector.n_features_) print("Support is %s"%selector.support_) print("Ranking %s"%selector.ranking_) print("Grid Scores %s"%selector.grid_scores_)
def test_few_classification(): """test_few.py: tests default classification settings""" np.random.seed(42) X, y = load_iris(return_X_y=True) train,test = train_test_split(np.arange(X.shape[0]), train_size=0.75, test_size=0.25) few = FEW(classification=True,population_size='1x',generations=10) few.fit(X[train],y[train]) print('train score:', few.score(X[train],y[train])) print('test score:', few.score(X[test],y[test])) # test boolean output few = FEW(classification=True,otype='b',population_size='2x', seed_with_ml=False,generations=10) np.random.seed(42) few.fit(X[train],y[train]) print('train score:', few.score(X[train],y[train])) print('test score:', few.score(X[test],y[test])) few.print_model()
def test_base(): # Check BaseEnsemble methods. ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators=3) iris = load_iris() ensemble.fit(iris.data, iris.target) ensemble.estimators_ = [] # empty the list and create estimators manually ensemble._make_estimator() ensemble._make_estimator() ensemble._make_estimator() ensemble._make_estimator(append=False) assert_equal(3, len(ensemble)) assert_equal(3, len(ensemble.estimators_)) assert_true(isinstance(ensemble[0], Perceptron))
def test_cross_val_score_mask(): # test that cross_val_score works with boolean masks svm = SVC(kernel="linear") iris = load_iris() X, y = iris.data, iris.target kfold = KFold(5) scores_indices = cross_val_score(svm, X, y, cv=kfold) kfold = KFold(5) cv_masks = [] for train, test in kfold.split(X, y): mask_train = np.zeros(len(y), dtype=np.bool) mask_test = np.zeros(len(y), dtype=np.bool) mask_train[train] = 1 mask_test[test] = 1 cv_masks.append((train, test)) scores_masks = cross_val_score(svm, X, y, cv=cv_masks) assert_array_equal(scores_indices, scores_masks)
def test_cross_val_score_precomputed(): # test for svm with precomputed kernel svm = SVC(kernel="precomputed") iris = load_iris() X, y = iris.data, iris.target linear_kernel = np.dot(X, X.T) score_precomputed = cross_val_score(svm, linear_kernel, y) svm = SVC(kernel="linear") score_linear = cross_val_score(svm, X, y) assert_array_equal(score_precomputed, score_linear) # Error raised for non-square X svm = SVC(kernel="precomputed") assert_raises(ValueError, cross_val_score, svm, X, y) # test error is raised when the precomputed kernel is not array-like # or sparse assert_raises(ValueError, cross_val_score, svm, linear_kernel.tolist(), y)
def test_cross_val_score_with_score_func_classification(): iris = load_iris() clf = SVC(kernel='linear') # Default score (should be the accuracy score) scores = cross_val_score(clf, iris.data, iris.target, cv=5) assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2) # Correct classification score (aka. zero / one score) - should be the # same as the default estimator score zo_scores = cross_val_score(clf, iris.data, iris.target, scoring="accuracy", cv=5) assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2) # F1 score (class are balanced so f1_score should be equal to zero/one # score f1_scores = cross_val_score(clf, iris.data, iris.target, scoring="f1_weighted", cv=5) assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2)
def test_cross_val_score_mask(): # test that cross_val_score works with boolean masks svm = SVC(kernel="linear") iris = load_iris() X, y = iris.data, iris.target cv_indices = cval.KFold(len(y), 5) scores_indices = cval.cross_val_score(svm, X, y, cv=cv_indices) cv_indices = cval.KFold(len(y), 5) cv_masks = [] for train, test in cv_indices: mask_train = np.zeros(len(y), dtype=np.bool) mask_test = np.zeros(len(y), dtype=np.bool) mask_train[train] = 1 mask_test[test] = 1 cv_masks.append((train, test)) scores_masks = cval.cross_val_score(svm, X, y, cv=cv_masks) assert_array_equal(scores_indices, scores_masks)
def test_cross_val_score_precomputed(): # test for svm with precomputed kernel svm = SVC(kernel="precomputed") iris = load_iris() X, y = iris.data, iris.target linear_kernel = np.dot(X, X.T) score_precomputed = cval.cross_val_score(svm, linear_kernel, y) svm = SVC(kernel="linear") score_linear = cval.cross_val_score(svm, X, y) assert_array_equal(score_precomputed, score_linear) # Error raised for non-square X svm = SVC(kernel="precomputed") assert_raises(ValueError, cval.cross_val_score, svm, X, y) # test error is raised when the precomputed kernel is not array-like # or sparse assert_raises(ValueError, cval.cross_val_score, svm, linear_kernel.tolist(), y)
def test_cross_val_score_with_score_func_classification(): iris = load_iris() clf = SVC(kernel='linear') # Default score (should be the accuracy score) scores = cval.cross_val_score(clf, iris.data, iris.target, cv=5) assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2) # Correct classification score (aka. zero / one score) - should be the # same as the default estimator score zo_scores = cval.cross_val_score(clf, iris.data, iris.target, scoring="accuracy", cv=5) assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2) # F1 score (class are balanced so f1_score should be equal to zero/one # score f1_scores = cval.cross_val_score(clf, iris.data, iris.target, scoring="f1_weighted", cv=5) assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2)
def test_safe_split_with_precomputed_kernel(): clf = SVC() clfp = SVC(kernel="precomputed") iris = load_iris() X, y = iris.data, iris.target K = np.dot(X, X.T) cv = cval.ShuffleSplit(X.shape[0], test_size=0.25, random_state=0) tr, te = list(cv)[0] X_tr, y_tr = cval._safe_split(clf, X, y, tr) K_tr, y_tr2 = cval._safe_split(clfp, K, y, tr) assert_array_almost_equal(K_tr, np.dot(X_tr, X_tr.T)) X_te, y_te = cval._safe_split(clf, X, y, te, tr) K_te, y_te2 = cval._safe_split(clfp, K, y, te, tr) assert_array_almost_equal(K_te, np.dot(X_te, X_tr.T))
def test_score_sample_weight(): from sklearn.tree import DecisionTreeClassifier from sklearn.tree import DecisionTreeRegressor from sklearn import datasets rng = np.random.RandomState(0) # test both ClassifierMixin and RegressorMixin estimators = [DecisionTreeClassifier(max_depth=2), DecisionTreeRegressor(max_depth=2)] sets = [datasets.load_iris(), datasets.load_boston()] for est, ds in zip(estimators, sets): est.fit(ds.data, ds.target) # generate random sample weights sample_weight = rng.randint(1, 10, size=len(ds.target)) # check that the score with and without sample weights are different assert_not_equal(est.score(ds.data, ds.target), est.score(ds.data, ds.target, sample_weight=sample_weight), msg="Unweighted and weighted scores " "are unexpectedly equal")
def test_fit_predict_on_pipeline(): # test that the fit_predict method is implemented on a pipeline # test that the fit_predict on pipeline yields same results as applying # transform and clustering steps separately iris = load_iris() scaler = StandardScaler() km = KMeans(random_state=0) # first compute the transform and clustering step separately scaled = scaler.fit_transform(iris.data) separate_pred = km.fit_predict(scaled) # use a pipeline to do the transform and clustering in one step pipe = Pipeline([('scaler', scaler), ('Kmeans', km)]) pipeline_pred = pipe.fit_predict(iris.data) assert_array_almost_equal(pipeline_pred, separate_pred)
def test_discretenb_provide_prior_with_partial_fit(): # Test whether discrete NB classes use provided prior # when using partial_fit iris = load_iris() iris_data1, iris_data2, iris_target1, iris_target2 = train_test_split( iris.data, iris.target, test_size=0.4, random_state=415) for cls in [BernoulliNB, MultinomialNB]: for prior in [None, [0.3, 0.3, 0.4]]: clf_full = cls(class_prior=prior) clf_full.fit(iris.data, iris.target) clf_partial = cls(class_prior=prior) clf_partial.partial_fit(iris_data1, iris_target1, classes=[0, 1, 2]) clf_partial.partial_fit(iris_data2, iris_target2) assert_array_almost_equal(clf_full.class_log_prior_, clf_partial.class_log_prior_)
def test_randomized_logistic(): # Check randomized sparse logistic regression iris = load_iris() X = iris.data[:, [0, 2]] y = iris.target X = X[y != 2] y = y[y != 2] F, _ = f_classif(X, y) scaling = 0.3 clf = RandomizedLogisticRegression(verbose=False, C=1., random_state=42, scaling=scaling, n_resampling=50, tol=1e-3) X_orig = X.copy() feature_scores = clf.fit(X, y).scores_ assert_array_equal(X, X_orig) # fit does not modify X assert_array_equal(np.argsort(F), np.argsort(feature_scores)) clf = RandomizedLogisticRegression(verbose=False, C=[1., 0.5], random_state=42, scaling=scaling, n_resampling=50, tol=1e-3) feature_scores = clf.fit(X, y).scores_ assert_array_equal(np.argsort(F), np.argsort(feature_scores))
def test_correct_labelsize(): # Assert 1 < n_labels < n_samples dataset = datasets.load_iris() X = dataset.data # n_labels = n_samples y = np.arange(X.shape[0]) assert_raises_regexp(ValueError, 'Number of labels is %d\. Valid values are 2 ' 'to n_samples - 1 \(inclusive\)' % len(np.unique(y)), silhouette_score, X, y) # n_labels = 1 y = np.zeros(X.shape[0]) assert_raises_regexp(ValueError, 'Number of labels is %d\. Valid values are 2 ' 'to n_samples - 1 \(inclusive\)' % len(np.unique(y)), silhouette_score, X, y)
def check_non_transformer_estimators_n_iter(name, estimator, multi_output=False): # Check if all iterative solvers, run for more than one iteration iris = load_iris() X, y_ = iris.data, iris.target if multi_output: y_ = np.reshape(y_, (-1, 1)) set_random_state(estimator, 0) if name == 'AffinityPropagation': estimator.fit(X) else: estimator.fit(X, y_) # HuberRegressor depends on scipy.optimize.fmin_l_bfgs_b # which doesn't return a n_iter for old versions of SciPy. if not (name == 'HuberRegressor' and estimator.n_iter_ is None): assert_greater(estimator.n_iter_, 0)
def test_rfe_features_importance(): generator = check_random_state(0) iris = load_iris() X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))] y = iris.target clf = RandomForestClassifier(n_estimators=20, random_state=generator, max_depth=2) rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1) rfe.fit(X, y) assert_equal(len(rfe.ranking_), X.shape[1]) clf_svc = SVC(kernel="linear") rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1) rfe_svc.fit(X, y) # Check if the supports are equal assert_array_equal(rfe.get_support(), rfe_svc.get_support())
def test_graph_lasso_iris(): # Hard-coded solution from R glasso package for alpha=1.0 # The iris datasets in R and sklearn do not match in a few places, these # values are for the sklearn version cov_R = np.array([ [0.68112222, 0.0, 0.2651911, 0.02467558], [0.00, 0.1867507, 0.0, 0.00], [0.26519111, 0.0, 3.0924249, 0.28774489], [0.02467558, 0.0, 0.2877449, 0.57853156] ]) icov_R = np.array([ [1.5188780, 0.0, -0.1302515, 0.0], [0.0, 5.354733, 0.0, 0.0], [-0.1302515, 0.0, 0.3502322, -0.1686399], [0.0, 0.0, -0.1686399, 1.8123908] ]) X = datasets.load_iris().data emp_cov = empirical_covariance(X) for method in ('cd', 'lars'): cov, icov = graph_lasso(emp_cov, alpha=1.0, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_R) assert_array_almost_equal(icov, icov_R)
def test_graph_lasso_iris_singular(): # Small subset of rows to test the rank-deficient case # Need to choose samples such that none of the variances are zero indices = np.arange(10, 13) # Hard-coded solution from R glasso package for alpha=0.01 cov_R = np.array([ [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149], [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222], [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009], [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222] ]) icov_R = np.array([ [24.42244057, -16.831679593, 0.0, 0.0], [-16.83168201, 24.351841681, -6.206896552, -12.5], [0.0, -6.206896171, 153.103448276, 0.0], [0.0, -12.499999143, 0.0, 462.5] ]) X = datasets.load_iris().data[indices, :] emp_cov = empirical_covariance(X) for method in ('cd', 'lars'): cov, icov = graph_lasso(emp_cov, alpha=0.01, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_R, decimal=5) assert_array_almost_equal(icov, icov_R, decimal=5)
def testIrisDNN(self): if HAS_SKLEARN: random.seed(42) iris = datasets.load_iris() feature_columns = learn.infer_real_valued_columns_from_input(iris.data) classifier = learn.DNNClassifier( feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3) grid_search = GridSearchCV( classifier, {'hidden_units': [[5, 5], [10, 10]]}, scoring='accuracy', fit_params={'steps': [50]}) grid_search.fit(iris.data, iris.target) score = accuracy_score(iris.target, grid_search.predict(iris.data)) self.assertGreater(score, 0.5, 'Failed with score = {0}'.format(score))
def load_iris(): try: # Load Iris dataset from the sklearn.datasets package from sklearn import datasets from sklearn import decomposition # Load Dataset iris = datasets.load_iris() X = iris.data y = iris.target labels = iris.target_names # Reduce components by Principal Component Analysis from sklearn X = decomposition.PCA(n_components=3).fit_transform(X) except ImportError: # Load Iris dataset manually path = os.path.join('data', 'iris', 'iris.data') iris_data = np.genfromtxt(path, dtype='str', delimiter=',') X = iris_data[:, :4].astype(dtype=float) y = np.ndarray((X.shape[0],), dtype=int) # Create target vector y and corresponding labels labels, idx = [], 0 for i, label in enumerate(iris_data[:, 4]): label = label.split('-')[1] if label not in labels: labels.append(label); idx += 1 y[i] = idx - 1 # Reduce components by implemented Principal Component Analysis X = PCA(X, 3)[0] return X, y, labels
def datablock(): X,y = load_iris(return_X_y=True) df = pd.DataFrame(X,columns=['var%d'%i for i in range(4)]) df['target'] = y #make 1 variable categorical df['var3'] = df['var3'].apply(lambda x: int(x)).astype(object) #make outcome binary df['target'].loc[df['target']==2]=1 return DataBlock(df,df,df,'target')
def make_cl_dataset_and_field_manager(self): iris = datasets.load_iris() dataset = DataSet(iris.data, iris.target, iris.feature_names, iris.target_names) feature_fields = [] for i, name in enumerate(dataset.feature_names): f = Field(name, "NUMBER", value_mean=np.mean(dataset.data[:, i]), value_std=np.std(dataset.data[:, i])) feature_fields.append(f) target = Field("flower kind", "DROP_DOWN", value_converter={"setosa": 0, "versicolor": 1, "virginica": 2}) field_manager = FieldManager(-1, feature_fields, target) return dataset, field_manager
def make_dataset_and_field_manager(self): iris = datasets.load_iris() dataset = DataSet(iris.data, iris.target, iris.feature_names, iris.target_names) feature_fields = [] for i, name in enumerate(dataset.feature_names): f = Field(name, "NUMBER", value_mean=np.mean(dataset.data[:, i]), value_std=np.std(dataset.data[:, i])) feature_fields.append(f) target = Field("flower kind", "DROP_DOWN", value_converter={"setosa": 0, "versicolor": 1, "virginica": 2}) field_manager = FieldManager(-1, feature_fields, target) return dataset, field_manager
def load_iris_df(include_tgt=True, tgt_name="Species", shuffle=False): """Loads the iris dataset into a dataframe with the target set as the "Species" feature or whatever name is specified in ``tgt_name``. Parameters ---------- include_tgt : bool, optional (default=True) Whether to include the target tgt_name : str, optional (default="Species") The name of the target feature shuffle : bool, optional (default=False) Whether to shuffle the rows on return Returns ------- X : pd.DataFrame, shape=(n_samples, n_features) The loaded dataset """ iris = load_iris() X = pd.DataFrame.from_records(data=iris.data, columns=iris.feature_names) if include_tgt: X[tgt_name] = iris.target return X if not shuffle else shuffle_dataframe(X)
def backward(self, dz): dx, dw, db = layers.linear_backward(dz, self.cache1) return dx, dw, db # iris = datasets.load_iris() # X = iris.data # Y = iris.target # Y = to_categorical(iris.target, 3)
def iris_softmax(): print("Initializing net for Iris dataset classification problem. . .") iris = load_iris() X = iris.data Y = iris.target dn = DenseNet(input_dim=4, optim_config={"type": "sgd", "learning_rate": 0.05}, loss_fn='softmax') dn.addlayer("ReLU", 4) dn.addlayer("ReLU", 6) dn.addlayer("ReLU", 3) for i in range(600): print("Iteration: ", i) dn.train(X, Y)
def iris_svm(): print("Initializing net for Iris dataset classification problem. . .") iris = load_iris() X = iris.data Y = iris.target dn = DenseNet(input_dim=4, optim_config={"type": "sgd", "learning_rate": 0.01}, loss_fn='svm') dn.addlayer("ReLU", 4) dn.addlayer("ReLU", 6) dn.addlayer("ReLU", 3) for i in range(1000): print("Iteration: ", i) dn.train(X, Y) # def iris_svm_momentum(): # print("Initializing net for Iris dataset classification problem. . .") # iris = load_iris() # X = iris.data # Y = iris.target # # dn = DenseNet(input_dim=4, optim_config={"type": "momentum", "learning_rate": 0.01, "momentum":0.5}, loss_fn='svm') # dn.addlayer("ReLU", 4) # dn.addlayer("ReLU", 6) # dn.addlayer("ReLU", 3) # # for i in range(1000): # print("Iteration: ", i) # dn.train(X, Y) #two_bit_xor_sigmoid()
def _check_iris_imputation(_impute_fn): iris = load_iris() X = iris.data # some values missing only rng = np.random.RandomState(0) X_some_missing = X.copy() mask = np.abs(X[:, 2] - rng.normal(loc=5.5, scale=.7, size=X.shape[0])) < .6 X_some_missing[mask, 3] = np.NaN X_imputed = _impute_fn(X_some_missing, np.isnan(X_some_missing), k=3) mean_abs_diff = np.mean(np.abs(X - X_imputed)) print(mean_abs_diff) assert mean_abs_diff < 0.05, "Difference too big: %0.4f" % mean_abs_diff
def lession_4(): iris = datasets.load_iris() iris_X = iris.data iris_y = iris.target # print iris_X[:2] # print iris_y X_train,X_test,y_train,y_test = train_test_split(iris_X,iris_y,test_size=0.3) knn = KNeighborsClassifier() knn.fit(X_train,y_train) print knn.predict(X_test) print y_test # dataset usage
def train_model(split=.25): """Tran model based on the iris dataset. This will split the iris dataset into train and test set, will train a Random Forest CLassifier and fit the trained model to the test dataset. In addition the confusion matrix and features importance will be calculated. Args: split (float): Fraction of observations in the test dataset. Returns: RandomForestClassifier: Trained model. pandas.DataFrame: Confusion matrix. dictionary: Features importance """ iris = load_iris() all_data = pd.DataFrame(iris.data, columns=iris.feature_names) features = all_data.columns.str.replace('\s+', '_').str.replace('\W+', '') all_data['species'] = pd.Categorical.from_codes(iris.target, iris.target_names) train, test = train_test_split(all_data, test_size=split) clf = RandomForestClassifier(n_jobs=1) clf.fit(train.drop('species', axis=1), train.species) preds = clf.predict(test.drop('species', axis=1)) conf_matrix = pd.crosstab(test['species'], preds, rownames=['Actual Species'], colnames=['Predicted Species']) f_importances = list(zip(train.drop('species', axis=1).columns, clf.feature_importances_)) return clf, conf_matrix, f_importances, features
def __init__(self): self.iris = datasets.load_iris() self.count = 3 self.xaxis = 0 self.yaxis = 1
def __init__(self): self.iris = datasets.load_iris()
def setUp(self): iris = datasets.load_iris() rng = check_random_state(0) perm = rng.permutation(iris.target.size) iris.data = iris.data[perm] iris.target = iris.target[perm] self.iris = iris
def setUp(self): iris = datasets.load_iris() rng = check_random_state(0) iris.data = iris.data iris.target = iris.target self.iris = iris for csv_file in glob.glob("*.csv"): os.remove(csv_file)
def main(): # Load the dataset data = datasets.load_iris() X = normalize(data.data) y = data.target # Project the data onto the 2 primary components multi_class_lda = MultiClassLDA() multi_class_lda.plot_in_2d(X, y, title="LDA")