我们从Python开源项目中,提取了以下30个代码示例,用于说明如何使用sklearn.datasets.load_digits()。
def main(max_iter): # prepare npdl.utils.random.set_seed(1234) # data digits = load_digits() X_train = digits.data X_train /= np.max(X_train) Y_train = digits.target n_classes = np.unique(Y_train).size # model model = npdl.model.Model() model.add(npdl.layers.Dense(n_out=500, n_in=64, activation=npdl.activations.ReLU())) model.add(npdl.layers.Dense(n_out=n_classes, activation=npdl.activations.Softmax())) model.compile(loss=npdl.objectives.SCCE(), optimizer=npdl.optimizers.SGD(lr=0.005)) # train model.fit(X_train, npdl.utils.data.one_hot(Y_train), max_iter=max_iter, validation_split=0.1)
def test_model_tranining(self): # test by running svm on digits digits = datasets.load_digits() images_and_labels = list(zip(digits.images, digits.target)) n_samples = len(digits.images) data = digits.images.reshape((n_samples, -1)) svm = SVM() pipe = Pipeline(models={'SVM': svm}) pipe.train(data[:n_samples // 2], digits.target[:n_samples // 2]) assert svm.classifier is not None expected = digits.target[n_samples // 2:] predicted = pipe.predict(data[n_samples // 2:]) assert predicted['SVM'] is not None
def test_pca_score_with_different_solvers(): digits = datasets.load_digits() X_digits = digits.data dX_digits = da.from_array(X_digits, chunks=X_digits.shape) pca_dict = {svd_solver: dd.PCA(n_components=30, svd_solver=svd_solver, random_state=0, iterated_power=3) for svd_solver in solver_list} for pca in pca_dict.values(): pca.fit(dX_digits) # Sanity check for the noise_variance_. For more details see # https://github.com/scikit-learn/scikit-learn/issues/7568 # https://github.com/scikit-learn/scikit-learn/issues/8541 # https://github.com/scikit-learn/scikit-learn/issues/8544 assert np.all((pca.explained_variance_ - pca.noise_variance_) >= 0) # Compare scores with different svd_solvers score_dict = {svd_solver: pca.score(dX_digits) for svd_solver, pca in pca_dict.items()} assert_almost_equal(score_dict['full'], score_dict['randomized'], decimal=3)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # Convert the nominal y values to binary y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # MLP clf = MultilayerPerceptron(n_hidden=16, n_iterations=1000, learning_rate=0.01) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = RandomForest(n_estimators=100) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Random Forest", accuracy=accuracy, legend_labels=data.target_names)
def test_min_samples_split(): X_c, y_c = load_digits(return_X_y=True) X_r, y_r = make_regression(n_samples=10000, random_state=0) for mss in [2, 4, 10, 20]: mtr = MondrianTreeRegressor(random_state=0, min_samples_split=mss) mtr.partial_fit(X_r[: X_r.shape[0] // 2], y_r[: X_r.shape[0] // 2]) mtr.partial_fit(X_r[X_r.shape[0] // 2:], y_r[X_r.shape[0] // 2:]) n_node_samples = mtr.tree_.n_node_samples[mtr.tree_.children_left != -1] assert_greater(np.min(n_node_samples) + 1, mss) mtc = MondrianTreeClassifier(random_state=0, min_samples_split=mss) mtc.partial_fit(X_c[: X_c.shape[0] // 2], y_c[: X_c.shape[0] // 2]) mtc.partial_fit(X_c[X_c.shape[0] // 2:], y_c[X_c.shape[0] // 2:]) n_node_samples = mtc.tree_.n_node_samples[mtc.tree_.children_left != -1] assert_greater(np.min(n_node_samples) + 1, mss)
def test_min_samples_split(): X_c, y_c = load_digits(return_X_y=True) X_r, y_r = make_regression(n_samples=10000, random_state=0) for mss in [2, 4, 10, 20]: mfr = MondrianForestRegressor(random_state=0, min_samples_split=mss) mfr.partial_fit(X_r[: X_r.shape[0] // 2], y_r[: X_r.shape[0] // 2]) mfr.partial_fit(X_r[X_r.shape[0] // 2:], y_r[X_r.shape[0] // 2:]) for est in mfr.estimators_: n_node_samples = est.tree_.n_node_samples[est.tree_.children_left != -1] assert_greater(np.min(n_node_samples) + 1, mss) mfc = MondrianForestClassifier(random_state=0, min_samples_split=mss) mfc.partial_fit(X_c[: X_c.shape[0] // 2], y_c[: X_c.shape[0] // 2]) mfc.partial_fit(X_c[X_c.shape[0] // 2:], y_c[X_c.shape[0] // 2:]) for est in mfc.estimators_: n_node_samples = est.tree_.n_node_samples[est.tree_.children_left != -1] assert_greater(np.min(n_node_samples) + 1, mss)
def get_digits(classes=10, rng=42): X, y = datasets.load_digits(n_class=classes, return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=rng) trg_train = np.zeros((classes, len(y_train)), dtype='uint8') for e in range(trg_train.shape[1]): v = y_train[e] trg_train[v, e] = 1 trg_test = np.zeros((classes, len(y_test)), dtype='uint8') for e in range(trg_test.shape[1]): v = y_test[e] trg_test[v, e] = 1 trn = Instance(X_train.T, trg_train) tst = Instance(X_test.T, trg_test) return trn, tst
def load_data(): ''' load digit data set :return: data( have target), data_target, data( not have target) ''' digits = datasets.load_digits() ###### shuffle?######## rng = np.random.RandomState(0) indices = np.arange(len(digits.data)) rng.shuffle(indices) X = digits.data[indices] y = digits.target[indices] n_labeled_points = int(len(y)/10) unlabeled_indices = np.arange(len(y))[n_labeled_points:] return X,y,unlabeled_indices
def test_RandomizedSearchCV(): ''' Use RandomizedSearchCV and LogisticRegression, to improve C, multi_class. :return: None ''' digits = load_digits() X_train,X_test,y_train,y_test=train_test_split(digits.data, digits.target, test_size=0.25,random_state=0,stratify=digits.target) tuned_parameters ={ 'C': scipy.stats.expon(scale=100), 'multi_class': ['ovr','multinomial']} clf=RandomizedSearchCV(LogisticRegression(penalty='l2',solver='lbfgs',tol=1e-6), tuned_parameters,cv=10,scoring="accuracy",n_iter=100) clf.fit(X_train,y_train) print("Best parameters set found:",clf.best_params_) print("Randomized Grid scores:") for params, mean_score, scores in clf.grid_scores_: print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params)) print("Optimized Score:",clf.score(X_test,y_test)) print("Detailed classification report:") y_true, y_pred = y_test, clf.predict(X_test) print(classification_report(y_true, y_pred))
def test_roc_nonrepeating_thresholds(): # Test to ensure that we don't return spurious repeating thresholds. # Duplicated thresholds can arise due to machine precision issues. dataset = datasets.load_digits() X = dataset['data'] y = dataset['target'] # This random forest classifier can only return probabilities # significant to two decimal places clf = ensemble.RandomForestClassifier(n_estimators=100, random_state=0) # How well can the classifier predict whether a digit is less than 5? # This task contributes floating point roundoff errors to the probabilities train, test = slice(None, None, 2), slice(1, None, 2) probas_pred = clf.fit(X[train], y[train]).predict_proba(X[test]) y_score = probas_pred[:, :5].sum(axis=1) # roundoff errors begin here y_true = [yy < 5 for yy in y[test]] # Check for repeating values in the thresholds fpr, tpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=False) assert_equal(thresholds.size, np.unique(np.round(thresholds, 2)).size)
def main(): digits = load_digits() x_train, x_test, y_train_, y_test_ = cross_validation.train_test_split(digits.data, digits.target, test_size=0.2, random_state=0) lb = preprocessing.LabelBinarizer() lb.fit(digits.target) y_train = lb.transform(y_train_) y_test = lb.transform(y_test_) sess = tf.InteractiveSession() x = tf.placeholder(tf.float32, shape=[None, 64]) y_ = tf.placeholder(tf.float32, shape=[None, 10]) w_1 = weight_variable([64, 32]) b_1 = bias_variable([32]) h_1 = tf.nn.relu(tf.matmul(x, w_1) + b_1) w_2 = weight_variable([32, 10]) b_2 = bias_variable([10]) y = tf.nn.softmax(tf.matmul(h_1, w_2) + b_2) cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) sess.run(tf.initialize_all_variables()) for i in range(1000): train_step.run(feed_dict={x: x_train, y_: y_train}) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print(accuracy.eval(feed_dict={x: x_test, y_: y_test}))
def main(): digits = load_digits() x_train, x_test, y_train_, y_test_ = cross_validation.train_test_split(digits.data, digits.target, test_size=0.2, random_state=0) lb = preprocessing.LabelBinarizer() lb.fit(digits.target) y_train = lb.transform(y_train_) y_test = lb.transform(y_test_) sess = tf.InteractiveSession() x = tf.placeholder(tf.float32, shape=[None, 64]) y_ = tf.placeholder(tf.float32, shape=[None, 10]) phase_train = tf.placeholder(tf.bool, name='phase_train') w_1 = weight_variable([64, 32]) b_1 = bias_variable([32]) t_1 = tf.matmul(x, w_1) + b_1 bn = batch_norm(t_1, 1, phase_train) h_1 = binarized_ops.binarized(bn) w_2 = weight_variable([32, 10]) b_2 = bias_variable([10]) y = tf.nn.softmax(tf.matmul(h_1, w_2) + b_2) cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) sess.run(tf.initialize_all_variables()) for i in range(1000): train_step.run(feed_dict={x: x_train, y_: y_train, phase_train: True}) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print(accuracy.eval(feed_dict={x: x_test, y_: y_test, phase_train: False}))
def load_small_digits(train_prop,n_class): ''' Load the data from the scikit learn dataset :param train_prop: proportion of samples in the testing set< :param n_class: number of different digits :return: ''' # Load the 8 by 8 digit dataset data = load_digits(n_class) N_images = data.target.size N_train = int(N_images * train_prop) N_test = N_images - N_train x_train = data.data[:N_train,:] x_test = data.data[N_train:,:] class_train = data.target[:N_train] class_test = data.target[N_train:] z_train = np.zeros((N_train,n_class)) z_train[np.arange(N_train),class_train] = 1 z_test = np.zeros((N_test,n_class)) z_test[np.arange(N_test),class_test] = 1 return x_train,x_test,z_train,z_test