Python sklearn.decomposition 模块,KernelPCA() 实例源码

我们从Python开源项目中,提取了以下17个代码示例,用于说明如何使用sklearn.decomposition.KernelPCA()

项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_kernel_pca_sparse():
    rng = np.random.RandomState(0)
    X_fit = sp.csr_matrix(rng.random_sample((5, 4)))
    X_pred = sp.csr_matrix(rng.random_sample((2, 4)))

    for eigen_solver in ("auto", "arpack"):
        for kernel in ("linear", "rbf", "poly"):
            # transform fit data
            kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
                             fit_inverse_transform=False)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed),
                                      np.abs(X_fit_transformed2))

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1],
                         X_fit_transformed.shape[1])

            # inverse transform
            # X_pred2 = kpca.inverse_transform(X_pred_transformed)
            # assert_equal(X_pred2.shape, X_pred.shape)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_kernel_pca_precomputed():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    for eigen_solver in ("dense", "arpack"):
        X_kpca = KernelPCA(4, eigen_solver=eigen_solver).\
            fit(X_fit).transform(X_pred)
        X_kpca2 = KernelPCA(
            4, eigen_solver=eigen_solver, kernel='precomputed').fit(
                np.dot(X_fit, X_fit.T)).transform(np.dot(X_pred, X_fit.T))

        X_kpca_train = KernelPCA(
            4, eigen_solver=eigen_solver,
            kernel='precomputed').fit_transform(np.dot(X_fit, X_fit.T))
        X_kpca_train2 = KernelPCA(
            4, eigen_solver=eigen_solver, kernel='precomputed').fit(
                np.dot(X_fit, X_fit.T)).transform(np.dot(X_fit, X_fit.T))

        assert_array_almost_equal(np.abs(X_kpca),
                                  np.abs(X_kpca2))

        assert_array_almost_equal(np.abs(X_kpca_train),
                                  np.abs(X_kpca_train2))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_nested_circles():
    # Test the linear separability of the first 2D KPCA transform
    X, y = make_circles(n_samples=400, factor=.3, noise=.05,
                        random_state=0)

    # 2D nested circles are not linearly separable
    train_score = Perceptron().fit(X, y).score(X, y)
    assert_less(train_score, 0.8)

    # Project the circles data into the first 2 components of a RBF Kernel
    # PCA model.
    # Note that the gamma value is data dependent. If this test breaks
    # and the gamma value has to be updated, the Kernel PCA example will
    # have to be updated too.
    kpca = KernelPCA(kernel="rbf", n_components=2,
                     fit_inverse_transform=True, gamma=2.)
    X_kpca = kpca.fit_transform(X)

    # The data is perfectly linearly separable in that space
    train_score = Perceptron().fit(X_kpca, y).score(X_kpca, y)
    assert_equal(train_score, 1.0)
项目:textcatvis    作者:cod3licious    | 项目源码 | 文件源码
def cluster_texts(textdict, eps=0.45, min_samples=3):
    """
    cluster the given texts

    Input:
        textdict: dictionary with {docid: text}
    Returns:
        doccats: dictionary with {docid: cluster_id}
    """
    doc_ids = list(textdict.keys())
    # transform texts into length normalized kpca features
    ft = FeatureTransform(norm='max', weight=True, renorm='length', norm_num=False)
    docfeats = ft.texts2features(textdict)
    X, featurenames = features2mat(docfeats, doc_ids)
    e_lkpca = KernelPCA(n_components=250, kernel='linear')
    X = e_lkpca.fit_transform(X)
    xnorm = np.linalg.norm(X, axis=1)
    X = X/xnorm.reshape(X.shape[0], 1)
    # compute cosine similarity
    D = 1. - linear_kernel(X)
    # and cluster with dbscan
    clst = DBSCAN(eps=eps, metric='precomputed', min_samples=min_samples)
    y_pred = clst.fit_predict(D)
    return {did: y_pred[i] for i, did in enumerate(doc_ids)}
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_KPCA(*data):
    '''
    test the KPCA method
    :param data: train_data, train_value
    :return: None
    '''
    X,y=data
    kernels=['linear','poly','rbf','sigmoid']
    for kernel in kernels:
        kpca=decomposition.KernelPCA(n_components=None,kernel=kernel) # Use 4 different kernel
        kpca.fit(X)
        print('kernel={0} --> lambdas: {1}'.format (kernel,kpca.lambdas_))
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def plot_KPCA(*data):
    '''
    graph after KPCA
    :param data: train_data, train_value
    :return: None
    '''
    X,y=data
    kernels=['linear','poly','rbf','sigmoid']
    fig=plt.figure()
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)

    for i,kernel in enumerate(kernels):
        kpca=decomposition.KernelPCA(n_components=2,kernel=kernel)
        kpca.fit(X)
        X_r=kpca.transform(X)
        ax=fig.add_subplot(2,2,i+1)
        for label ,color in zip( np.unique(y),colors):
            position=y==label
            ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,
            color=color)
        ax.set_xlabel("X[0]")
        ax.set_ylabel("X[1]")
        ax.legend(loc="best")
        ax.set_title("kernel={0}".format(kernel))
    plt.suptitle("KPCA")
    plt.show()
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def plot_KPCA_poly(*data):
    '''
    graph after KPCA with poly kernel
    :param data: train_data, train_value
    :return: None
    '''
    X,y=data
    fig=plt.figure()
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
    Params=[(3,1,1),(3,10,1),(3,1,10),(3,10,10),(10,1,1),(10,10,1),(10,1,10),(10,10,10)] # parameter of poly
            # p ? gamma ? r ?
            # p ?3?10
            # gamma  ?1?10
            # r ?1?10
            # 8 combination
    for i,(p,gamma,r) in enumerate(Params):
        kpca=decomposition.KernelPCA(n_components=2,kernel='poly'
        ,gamma=gamma,degree=p,coef0=r)
        kpca.fit(X)
        X_r=kpca.transform(X)
        ax=fig.add_subplot(2,4,i+1)
        for label ,color in zip( np.unique(y),colors):
            position=y==label
            ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,
            color=color)
        ax.set_xlabel("X[0]")
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_ylabel("X[1]")
        ax.legend(loc="best")
        ax.set_title(r"$ ({0} (x \cdot z+1)+{1})^{{2}}$".format(gamma,r,p))
    plt.suptitle("KPCA-Poly")
    plt.show()
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def plot_KPCA_rbf(*data):
    '''
    graph with kernel of rbf
    :param data: train_data, train_value
    :return: None
    '''
    X,y=data
    fig=plt.figure()
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
    Gammas=[0.5,1,4,10]
    for i,gamma in enumerate(Gammas):
        kpca=decomposition.KernelPCA(n_components=2,kernel='rbf',gamma=gamma)
        kpca.fit(X)
        X_r=kpca.transform(X)
        ax=fig.add_subplot(2,2,i+1)
        for label ,color in zip( np.unique(y),colors):
            position=y==label
            ax.scatter(X_r[position,0],X_r[position,1],label="target= {0}".format(label),
            color=color)
        ax.set_xlabel("X[0]")
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_ylabel("X[1]")
        ax.legend(loc="best")
        ax.set_title(r"$\exp(-{0}||x-z||^2)$".format(gamma))
    plt.suptitle("KPCA-rbf")
    plt.show()
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def plot_KPCA_sigmoid(*data):
    '''
    graph with sigmoid kernel
    :param data: train_data, train_value
    :return: None
    '''
    X,y=data
    fig=plt.figure()
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
    Params=[(0.01,0.1),(0.01,0.2),(0.1,0.1),(0.1,0.2),(0.2,0.1),(0.2,0.2)]# parameter of sigmoid kernel
        # gamma,coef0
        # gamma ? 0.01?0.1?0.2
        # coef0 ? 0.1,0.2
        # 6 combination
    for i,(gamma,r) in enumerate(Params):
        kpca=decomposition.KernelPCA(n_components=2,kernel='sigmoid',gamma=gamma,coef0=r)
        kpca.fit(X)
        X_r=kpca.transform(X)
        ax=fig.add_subplot(3,2,i+1)
        for label ,color in zip( np.unique(y),colors):
            position=y==label
            ax.scatter(X_r[position,0],X_r[position,1],label="target= {0}".format(label),
            color=color)
        ax.set_xlabel("X[0]")
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_ylabel("X[1]")
        ax.legend(loc="best")
        ax.set_title(r"$\tanh({0}(x\cdot z)+{1})$".format(gamma,r))
    plt.suptitle("KPCA-sigmoid")
    plt.show()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_kernel_pca():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    def histogram(x, y, **kwargs):
        # Histogram kernel implemented as a callable.
        assert_equal(kwargs, {})    # no kernel_params that we didn't ask for
        return np.minimum(x, y).sum()

    for eigen_solver in ("auto", "dense", "arpack"):
        for kernel in ("linear", "rbf", "poly", histogram):
            # histogram kernel produces singular matrix inside linalg.solve
            # XXX use a least-squares approximation?
            inv = not callable(kernel)

            # transform fit data
            kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
                             fit_inverse_transform=inv)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed),
                                      np.abs(X_fit_transformed2))

            # non-regression test: previously, gamma would be 0 by default,
            # forcing all eigenvalues to 0 under the poly kernel
            assert_not_equal(X_fit_transformed.size, 0)

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1],
                         X_fit_transformed.shape[1])

            # inverse transform
            if inv:
                X_pred2 = kpca.inverse_transform(X_pred_transformed)
                assert_equal(X_pred2.shape, X_pred.shape)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_invalid_parameters():
    assert_raises(ValueError, KernelPCA, 10, fit_inverse_transform=True,
                  kernel='precomputed')
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_kernel_pca_linear_kernel():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    # for a linear kernel, kernel PCA should find the same projection as PCA
    # modulo the sign (direction)
    # fit only the first four components: fifth is near zero eigenvalue, so
    # can be trimmed due to roundoff error
    assert_array_almost_equal(
        np.abs(KernelPCA(4).fit(X_fit).transform(X_pred)),
        np.abs(PCA(4).fit(X_fit).transform(X_pred)))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_kernel_pca_n_components():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    for eigen_solver in ("dense", "arpack"):
        for c in [1, 2, 4]:
            kpca = KernelPCA(n_components=c, eigen_solver=eigen_solver)
            shape = kpca.fit(X_fit).transform(X_pred).shape

            assert_equal(shape, (2, c))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_kernel_pca_invalid_kernel():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((2, 4))
    kpca = KernelPCA(kernel="tototiti")
    assert_raises(ValueError, kpca.fit, X_fit)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_gridsearch_pipeline():
    # Test if we can do a grid-search to find parameters to separate
    # circles with a perceptron model.
    X, y = make_circles(n_samples=400, factor=.3, noise=.05,
                        random_state=0)
    kpca = KernelPCA(kernel="rbf", n_components=2)
    pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())])
    param_grid = dict(kernel_pca__gamma=2. ** np.arange(-2, 2))
    grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
    grid_search.fit(X, y)
    assert_equal(grid_search.best_score_, 1)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_gridsearch_pipeline_precomputed():
    # Test if we can do a grid-search to find parameters to separate
    # circles with a perceptron model using a precomputed kernel.
    X, y = make_circles(n_samples=400, factor=.3, noise=.05,
                        random_state=0)
    kpca = KernelPCA(kernel="precomputed", n_components=2)
    pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())])
    param_grid = dict(Perceptron__n_iter=np.arange(1, 5))
    grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
    X_kernel = rbf_kernel(X, gamma=2.)
    grid_search.fit(X_kernel, y)
    assert_equal(grid_search.best_score_, 1)
项目:sef    作者:passalis    | 项目源码 | 文件源码
def __init__(self, data, input_dimensionality, output_dimensionality, kernel_type='rbf',
                 degree=2, sigma=0, kernel_scaling=1, c=1, scaler=None):
        """
        Creates a Kernel SEF object
        :param data: the data to be used by the kernel
        :param input_dimensionality: dimensionality of the input space
        :param output_dimensionality: dimensionality of the target space
        :param learning_rate: learning rate to be used for the optimization
        :param kernel_type: supported kernel: 'rbf', 'poly', and 'linear'
        :param degree: degree of the polynomial kernel
        :param sigma: the sigma value for the RBF kernel
        :param kernel_scaling: scaling parameter for the kernel
        :param c: constant kernel param for linear and poly kernels
        :param regularizer_weight: weight of the regularizer
        :param scaler: the sklearn-compatible scaler (or None)
        """
        # Call base constructor
        SEF_Base.__init__(self, input_dimensionality, output_dimensionality, scaler=scaler)

        # Adjustable parameters
        self.kernel_type = kernel_type
        self.degree = degree
        self.sigma_kernel = np.float32(sigma)
        self.alpha = kernel_scaling
        self.c = c

        # If scaler is used, fit it!
        if self.scaler is None:
            data = np.float32(data)
        else:
            pass
            data = np.float32(self.scaler.fit_transform(data))

        # If the rbf kernel is used and no sigma is supplied, estimate it!
        if sigma == 0 and self.kernel_type == 'rbf':
            sigma_kernel = np.float32(mean_data_distance(data))
            self.sigma_kernel = sigma_kernel
        else:
            self.sigma_kernel = 1

        # Use kPCA for initialization
        kpca = KernelPCA(kernel=self.kernel_type, n_components=self.output_dimensionality,
                         gamma=(1.0 / (self.sigma_kernel ** 2)), degree=self.degree, eigen_solver='dense')
        kpca.fit(data)
        A = kpca.alphas_
        # Scale the coefficients to have unit norm (avoid rescaling)
        A = A / np.sqrt(np.diag(np.dot(A.T, np.dot(np.dot(data, data.T), A))))

        # Model parameters
        self.X_kernel = Variable(torch.from_numpy(np.float32(data)), requires_grad=False)
        self.A = Variable(torch.from_numpy(np.float32(A)), requires_grad=True)

        self.trainable_params = [self.A]
        self.non_trainable_params = [self.X_kernel]