def dataset_generator(): """ generate dataset for binary classification :return: """ X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable ] X, y = datasets[0] y[y == 0] = -1 X = StandardScaler().fit_transform(X) return X, y
def data(): n_samples = 60 noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05, random_state=1) X = noisy_moons[0] return X
def main(): # Load the dataset X, y = datasets.make_moons(n_samples=300, noise=0.08, shuffle=False) # Cluster the data using DBSCAN clf = DBSCAN(eps=0.17, min_samples=5) y_pred = clf.predict(X) # Project the data onto the 2 primary principal components p = Plot() p.plot_in_2d(X, y_pred, title="DBSCAN") p.plot_in_2d(X, y, title="Actual Clustering")
def moons(): random_state = 0 X, gt = sk_datasets.make_moons(n_samples=200, noise=.05, shuffle=False, random_state=random_state) return X, gt
def _download(): train_x, train_t = make_moons(n_samples=10000, shuffle=True, noise=0.2, random_state=1234) test_x, test_t = make_moons(n_samples=10000, shuffle=True, noise=0.2, random_state=1234) valid_x, valid_t = make_moons(n_samples=10000, shuffle=True, noise=0.2, random_state=1234) train_x += np.abs(train_x.min()) test_x += np.abs(test_x.min()) valid_x += np.abs(valid_x.min()) train_set = (train_x, train_t) test_set = (test_x, test_t) valid_set = (valid_x, valid_t) return train_set, test_set, valid_set
def generate_data(): np.random.seed(0) X, y = datasets.make_moons(200, noise=0.20) return X, y
def makeSimpleDatasets(n_samples=1500): # from sklearn example np.random.seed(0) # Generate datasets. We choose the size big enough to see the scalability # of the algorithms, but not too big to avoid too long running times n_samples = 1500 noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05) noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05) blobs = datasets.make_blobs(n_samples=n_samples, random_state=8) no_structure = np.random.rand(n_samples, 2), None return [noisy_circles, noisy_moons, blobs, no_structure]
def classification(dataset=0): # generate training and test data n_train = 1000 if dataset == 0: X, Y = make_classification(n_samples=n_train, n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) X_test, Y_test = make_classification(n_samples=50, n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) X_test += 2 * rng.uniform(size=X_test.shape) elif dataset == 1: X, Y = make_moons(n_samples=n_train, noise=0.3, random_state=0) X_test, Y_test = make_moons(n_samples=50, noise=0.3, random_state=1) elif dataset == 2: X, Y = make_circles(n_samples=n_train, noise=0.2, factor=0.5, random_state=1) X_test, Y_test = make_circles(n_samples=50, noise=0.2, factor=0.5, random_state=1) else: print("dataset unknown") return # build, train, and test the model model = SupervisedNNModel(X.shape[1], 2, hunits=[100, 50], activations=[T.tanh, T.tanh, T.nnet.softmax], cost_fun='negative_log_likelihood', error_fun='zero_one_loss', learning_rate=0.01, L1_reg=0., L2_reg=0.) model.fit(X, Y) print("Test Error: %f" % model.score(X_test, Y_test)) # plot dataset + predictions plt.figure() x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02)) cm = plt.cm.RdBu cm_bright = ListedColormap(['#FF0000', '#0000FF']) Z = model.predict(np.c_[xx.ravel(), yy.ravel()])[:, 1] # Put the result into a color plot Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, cmap=cm, alpha=.8) # Plot also the training points plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=cm_bright, alpha=0.6) # and testing points plt.scatter(X_test[:, 0], X_test[:, 1], c=Y_test, cmap=cm_bright) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.xticks(()) plt.yticks(()) plt.title('Classification Problem (%i)' % dataset)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--save', type=str, default='work') parser.add_argument('--nEpoch', type=int, default=100) # parser.add_argument('--testBatchSz', type=int, default=2048) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--model', type=str, default="picnn", choices=['picnn', 'ficnn']) parser.add_argument('--dataset', type=str, default="moons", choices=['moons', 'circles', 'linear']) parser.add_argument('--noncvx', action='store_true') args = parser.parse_args() npr.seed(args.seed) tf.set_random_seed(args.seed) setproctitle.setproctitle('bamos.icnn.synthetic.{}.{}'.format(args.model, args.dataset)) save = os.path.join(os.path.expanduser(args.save), "{}.{}".format(args.model, args.dataset)) if os.path.isdir(save): shutil.rmtree(save) os.makedirs(save, exist_ok=True) if args.dataset == "moons": (dataX, dataY) = make_moons(noise=0.3, random_state=0) elif args.dataset == "circles": (dataX, dataY) = make_circles(noise=0.2, factor=0.5, random_state=0) dataY = 1.-dataY elif args.dataset == "linear": (dataX, dataY) = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) dataX += 2 * rng.uniform(size=dataX.shape) else: assert(False) dataY = dataY.reshape((-1, 1)).astype(np.float32) nData = dataX.shape[0] nFeatures = dataX.shape[1] nLabels = 1 nXy = nFeatures + nLabels config = tf.ConfigProto() #log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = Model(nFeatures, nLabels, sess, args.model, nGdIter=30) model.train(args, dataX, dataY)