我们从Python开源项目中,提取了以下17个代码示例,用于说明如何使用sklearn.decomposition.KernelPCA()。
def test_kernel_pca_sparse(): rng = np.random.RandomState(0) X_fit = sp.csr_matrix(rng.random_sample((5, 4))) X_pred = sp.csr_matrix(rng.random_sample((2, 4))) for eigen_solver in ("auto", "arpack"): for kernel in ("linear", "rbf", "poly"): # transform fit data kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=False) X_fit_transformed = kpca.fit_transform(X_fit) X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit) assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2)) # transform new data X_pred_transformed = kpca.transform(X_pred) assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1]) # inverse transform # X_pred2 = kpca.inverse_transform(X_pred_transformed) # assert_equal(X_pred2.shape, X_pred.shape)
def test_kernel_pca_precomputed(): rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) X_pred = rng.random_sample((2, 4)) for eigen_solver in ("dense", "arpack"): X_kpca = KernelPCA(4, eigen_solver=eigen_solver).\ fit(X_fit).transform(X_pred) X_kpca2 = KernelPCA( 4, eigen_solver=eigen_solver, kernel='precomputed').fit( np.dot(X_fit, X_fit.T)).transform(np.dot(X_pred, X_fit.T)) X_kpca_train = KernelPCA( 4, eigen_solver=eigen_solver, kernel='precomputed').fit_transform(np.dot(X_fit, X_fit.T)) X_kpca_train2 = KernelPCA( 4, eigen_solver=eigen_solver, kernel='precomputed').fit( np.dot(X_fit, X_fit.T)).transform(np.dot(X_fit, X_fit.T)) assert_array_almost_equal(np.abs(X_kpca), np.abs(X_kpca2)) assert_array_almost_equal(np.abs(X_kpca_train), np.abs(X_kpca_train2))
def test_nested_circles(): # Test the linear separability of the first 2D KPCA transform X, y = make_circles(n_samples=400, factor=.3, noise=.05, random_state=0) # 2D nested circles are not linearly separable train_score = Perceptron().fit(X, y).score(X, y) assert_less(train_score, 0.8) # Project the circles data into the first 2 components of a RBF Kernel # PCA model. # Note that the gamma value is data dependent. If this test breaks # and the gamma value has to be updated, the Kernel PCA example will # have to be updated too. kpca = KernelPCA(kernel="rbf", n_components=2, fit_inverse_transform=True, gamma=2.) X_kpca = kpca.fit_transform(X) # The data is perfectly linearly separable in that space train_score = Perceptron().fit(X_kpca, y).score(X_kpca, y) assert_equal(train_score, 1.0)
def cluster_texts(textdict, eps=0.45, min_samples=3): """ cluster the given texts Input: textdict: dictionary with {docid: text} Returns: doccats: dictionary with {docid: cluster_id} """ doc_ids = list(textdict.keys()) # transform texts into length normalized kpca features ft = FeatureTransform(norm='max', weight=True, renorm='length', norm_num=False) docfeats = ft.texts2features(textdict) X, featurenames = features2mat(docfeats, doc_ids) e_lkpca = KernelPCA(n_components=250, kernel='linear') X = e_lkpca.fit_transform(X) xnorm = np.linalg.norm(X, axis=1) X = X/xnorm.reshape(X.shape[0], 1) # compute cosine similarity D = 1. - linear_kernel(X) # and cluster with dbscan clst = DBSCAN(eps=eps, metric='precomputed', min_samples=min_samples) y_pred = clst.fit_predict(D) return {did: y_pred[i] for i, did in enumerate(doc_ids)}
def test_KPCA(*data): ''' test the KPCA method :param data: train_data, train_value :return: None ''' X,y=data kernels=['linear','poly','rbf','sigmoid'] for kernel in kernels: kpca=decomposition.KernelPCA(n_components=None,kernel=kernel) # Use 4 different kernel kpca.fit(X) print('kernel={0} --> lambdas: {1}'.format (kernel,kpca.lambdas_))
def plot_KPCA(*data): ''' graph after KPCA :param data: train_data, train_value :return: None ''' X,y=data kernels=['linear','poly','rbf','sigmoid'] fig=plt.figure() colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) for i,kernel in enumerate(kernels): kpca=decomposition.KernelPCA(n_components=2,kernel=kernel) kpca.fit(X) X_r=kpca.transform(X) ax=fig.add_subplot(2,2,i+1) for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label, color=color) ax.set_xlabel("X[0]") ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title("kernel={0}".format(kernel)) plt.suptitle("KPCA") plt.show()
def plot_KPCA_poly(*data): ''' graph after KPCA with poly kernel :param data: train_data, train_value :return: None ''' X,y=data fig=plt.figure() colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) Params=[(3,1,1),(3,10,1),(3,1,10),(3,10,10),(10,1,1),(10,10,1),(10,1,10),(10,10,10)] # parameter of poly # p ? gamma ? r ? # p ?3?10 # gamma ?1?10 # r ?1?10 # 8 combination for i,(p,gamma,r) in enumerate(Params): kpca=decomposition.KernelPCA(n_components=2,kernel='poly' ,gamma=gamma,degree=p,coef0=r) kpca.fit(X) X_r=kpca.transform(X) ax=fig.add_subplot(2,4,i+1) for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label, color=color) ax.set_xlabel("X[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title(r"$ ({0} (x \cdot z+1)+{1})^{{2}}$".format(gamma,r,p)) plt.suptitle("KPCA-Poly") plt.show()
def plot_KPCA_rbf(*data): ''' graph with kernel of rbf :param data: train_data, train_value :return: None ''' X,y=data fig=plt.figure() colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) Gammas=[0.5,1,4,10] for i,gamma in enumerate(Gammas): kpca=decomposition.KernelPCA(n_components=2,kernel='rbf',gamma=gamma) kpca.fit(X) X_r=kpca.transform(X) ax=fig.add_subplot(2,2,i+1) for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= {0}".format(label), color=color) ax.set_xlabel("X[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title(r"$\exp(-{0}||x-z||^2)$".format(gamma)) plt.suptitle("KPCA-rbf") plt.show()
def plot_KPCA_sigmoid(*data): ''' graph with sigmoid kernel :param data: train_data, train_value :return: None ''' X,y=data fig=plt.figure() colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) Params=[(0.01,0.1),(0.01,0.2),(0.1,0.1),(0.1,0.2),(0.2,0.1),(0.2,0.2)]# parameter of sigmoid kernel # gamma,coef0 # gamma ? 0.01?0.1?0.2 # coef0 ? 0.1,0.2 # 6 combination for i,(gamma,r) in enumerate(Params): kpca=decomposition.KernelPCA(n_components=2,kernel='sigmoid',gamma=gamma,coef0=r) kpca.fit(X) X_r=kpca.transform(X) ax=fig.add_subplot(3,2,i+1) for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= {0}".format(label), color=color) ax.set_xlabel("X[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title(r"$\tanh({0}(x\cdot z)+{1})$".format(gamma,r)) plt.suptitle("KPCA-sigmoid") plt.show()
def test_kernel_pca(): rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) X_pred = rng.random_sample((2, 4)) def histogram(x, y, **kwargs): # Histogram kernel implemented as a callable. assert_equal(kwargs, {}) # no kernel_params that we didn't ask for return np.minimum(x, y).sum() for eigen_solver in ("auto", "dense", "arpack"): for kernel in ("linear", "rbf", "poly", histogram): # histogram kernel produces singular matrix inside linalg.solve # XXX use a least-squares approximation? inv = not callable(kernel) # transform fit data kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=inv) X_fit_transformed = kpca.fit_transform(X_fit) X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit) assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2)) # non-regression test: previously, gamma would be 0 by default, # forcing all eigenvalues to 0 under the poly kernel assert_not_equal(X_fit_transformed.size, 0) # transform new data X_pred_transformed = kpca.transform(X_pred) assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1]) # inverse transform if inv: X_pred2 = kpca.inverse_transform(X_pred_transformed) assert_equal(X_pred2.shape, X_pred.shape)
def test_invalid_parameters(): assert_raises(ValueError, KernelPCA, 10, fit_inverse_transform=True, kernel='precomputed')
def test_kernel_pca_linear_kernel(): rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) X_pred = rng.random_sample((2, 4)) # for a linear kernel, kernel PCA should find the same projection as PCA # modulo the sign (direction) # fit only the first four components: fifth is near zero eigenvalue, so # can be trimmed due to roundoff error assert_array_almost_equal( np.abs(KernelPCA(4).fit(X_fit).transform(X_pred)), np.abs(PCA(4).fit(X_fit).transform(X_pred)))
def test_kernel_pca_n_components(): rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) X_pred = rng.random_sample((2, 4)) for eigen_solver in ("dense", "arpack"): for c in [1, 2, 4]: kpca = KernelPCA(n_components=c, eigen_solver=eigen_solver) shape = kpca.fit(X_fit).transform(X_pred).shape assert_equal(shape, (2, c))
def test_kernel_pca_invalid_kernel(): rng = np.random.RandomState(0) X_fit = rng.random_sample((2, 4)) kpca = KernelPCA(kernel="tototiti") assert_raises(ValueError, kpca.fit, X_fit)
def test_gridsearch_pipeline(): # Test if we can do a grid-search to find parameters to separate # circles with a perceptron model. X, y = make_circles(n_samples=400, factor=.3, noise=.05, random_state=0) kpca = KernelPCA(kernel="rbf", n_components=2) pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())]) param_grid = dict(kernel_pca__gamma=2. ** np.arange(-2, 2)) grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid) grid_search.fit(X, y) assert_equal(grid_search.best_score_, 1)
def test_gridsearch_pipeline_precomputed(): # Test if we can do a grid-search to find parameters to separate # circles with a perceptron model using a precomputed kernel. X, y = make_circles(n_samples=400, factor=.3, noise=.05, random_state=0) kpca = KernelPCA(kernel="precomputed", n_components=2) pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())]) param_grid = dict(Perceptron__n_iter=np.arange(1, 5)) grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid) X_kernel = rbf_kernel(X, gamma=2.) grid_search.fit(X_kernel, y) assert_equal(grid_search.best_score_, 1)
def __init__(self, data, input_dimensionality, output_dimensionality, kernel_type='rbf', degree=2, sigma=0, kernel_scaling=1, c=1, scaler=None): """ Creates a Kernel SEF object :param data: the data to be used by the kernel :param input_dimensionality: dimensionality of the input space :param output_dimensionality: dimensionality of the target space :param learning_rate: learning rate to be used for the optimization :param kernel_type: supported kernel: 'rbf', 'poly', and 'linear' :param degree: degree of the polynomial kernel :param sigma: the sigma value for the RBF kernel :param kernel_scaling: scaling parameter for the kernel :param c: constant kernel param for linear and poly kernels :param regularizer_weight: weight of the regularizer :param scaler: the sklearn-compatible scaler (or None) """ # Call base constructor SEF_Base.__init__(self, input_dimensionality, output_dimensionality, scaler=scaler) # Adjustable parameters self.kernel_type = kernel_type self.degree = degree self.sigma_kernel = np.float32(sigma) self.alpha = kernel_scaling self.c = c # If scaler is used, fit it! if self.scaler is None: data = np.float32(data) else: pass data = np.float32(self.scaler.fit_transform(data)) # If the rbf kernel is used and no sigma is supplied, estimate it! if sigma == 0 and self.kernel_type == 'rbf': sigma_kernel = np.float32(mean_data_distance(data)) self.sigma_kernel = sigma_kernel else: self.sigma_kernel = 1 # Use kPCA for initialization kpca = KernelPCA(kernel=self.kernel_type, n_components=self.output_dimensionality, gamma=(1.0 / (self.sigma_kernel ** 2)), degree=self.degree, eigen_solver='dense') kpca.fit(data) A = kpca.alphas_ # Scale the coefficients to have unit norm (avoid rescaling) A = A / np.sqrt(np.diag(np.dot(A.T, np.dot(np.dot(data, data.T), A)))) # Model parameters self.X_kernel = Variable(torch.from_numpy(np.float32(data)), requires_grad=False) self.A = Variable(torch.from_numpy(np.float32(A)), requires_grad=True) self.trainable_params = [self.A] self.non_trainable_params = [self.X_kernel]