我们从Python开源项目中,提取了以下17个代码示例,用于说明如何使用sklearn.neighbors.kneighbors_graph()。
def _build_graph(self): """Compute the graph Laplacian.""" # Graph sparsification if self.sparsify == 'epsilonNN': self.A_ = radius_neighbors_graph(self.X_, self.radius, include_self=False) else: Q = kneighbors_graph( self.X_, self.n_neighbors, include_self = False ).astype(np.bool) if self.sparsify == 'kNN': self.A_ = (Q + Q.T).astype(np.float64) elif self.sparsify == 'MkNN': self.A_ = (Q.multiply(Q.T)).astype(np.float64) # Edge re-weighting if self.reweight == 'rbf': W = rbf_kernel(self.X_, gamma=self.t) self.A_ = self.A_.multiply(W) return sp.csgraph.laplacian(self.A_, normed=self.normed)
def cluster_agglomerative(X_train, model_args=None, gridsearch=True, connectivity_graph=True, connectivity_graph_neighbors=10): from sklearn.cluster import AgglomerativeClustering from sklearn.neighbors import kneighbors_graph print('AgglomerativeClustering') if connectivity_graph: print('Creating k-neighbors graph for connectivity restraint') connectivity = kneighbors_graph(X_train, n_neighbors=connectivity_graph_neighbors) model_args['connectivity'] = connectivity if gridsearch is True: ## TODO: # add hyperparamter searching. No scoring method available for this model, # so we can't easily use gridsearching. raise NotImplementedError('No hyperparameter optimization available yet for this model. Set gridsearch to False') # prune(param_grid, model_args) else: if 'n_clusters' not in model_args: raise KeyError('Need to define n_clusters for AgglomerativeClustering') param_grid = None return ModelWrapper(AgglomerativeClustering, X=X_train, model_args=model_args, param_grid=param_grid, unsupervised=True)
def test_kneighbors_graph_sparse(seed=36): # Test kneighbors_graph to build the k-Nearest Neighbor graph # for sparse input. rng = np.random.RandomState(seed) X = rng.randn(10, 10) Xcsr = csr_matrix(X) for n_neighbors in [1, 2, 3]: for mode in ["connectivity", "distance"]: assert_array_almost_equal( neighbors.kneighbors_graph(X, n_neighbors, mode=mode).toarray(), neighbors.kneighbors_graph(Xcsr, n_neighbors, mode=mode).toarray())
def test_isomap_simple_grid(): # Isomap should preserve distances when all neighbors are used N_per_side = 5 Npts = N_per_side ** 2 n_neighbors = Npts - 1 # grid of equidistant points in 2D, n_components = n_dim X = np.array(list(product(range(N_per_side), repeat=2))) # distances from each point to all others G = neighbors.kneighbors_graph(X, n_neighbors, mode='distance').toarray() for eigen_solver in eigen_solvers: for path_method in path_methods: clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2, eigen_solver=eigen_solver, path_method=path_method) clf.fit(X) G_iso = neighbors.kneighbors_graph(clf.embedding_, n_neighbors, mode='distance').toarray() assert_array_almost_equal(G, G_iso)
def ward_hc(self, n_clusters, n_neighbors=10): """ Perform Ward hierarchical clustering Parameters ---------- n_clusters : int number of clusters lsi_components : int apply LSA before the clustering algorithm n_neighbors : int N nearest neighbors used for computing the connectivity matrix """ from sklearn.cluster import AgglomerativeClustering from sklearn.neighbors import kneighbors_graph pars = {'n_neighbors': n_neighbors, 'is_hierarchical': True, "metric": self.metric} if 'lsi' not in self.pipeline: raise ValueError("you must use lsi with birch clustering " "for scaling reasons.") # This is really not efficient as # it's done a second time in _cluster_func X = self.pipeline.data connectivity = kneighbors_graph(X, n_neighbors=n_neighbors, include_self=False) km = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward', connectivity=connectivity) return self._cluster_func(n_clusters, km, pars)
def test_kneighbors_graph(): # Test kneighbors_graph to build the k-Nearest Neighbor graph. X = np.array([[0, 1], [1.01, 1.], [2, 0]]) # n_neighbors = 1 A = neighbors.kneighbors_graph(X, 1, mode='connectivity', include_self=True) assert_array_equal(A.toarray(), np.eye(A.shape[0])) A = neighbors.kneighbors_graph(X, 1, mode='distance') assert_array_almost_equal( A.toarray(), [[0.00, 1.01, 0.], [1.01, 0., 0.], [0.00, 1.40716026, 0.]]) # n_neighbors = 2 A = neighbors.kneighbors_graph(X, 2, mode='connectivity', include_self=True) assert_array_equal( A.toarray(), [[1., 1., 0.], [1., 1., 0.], [0., 1., 1.]]) A = neighbors.kneighbors_graph(X, 2, mode='distance') assert_array_almost_equal( A.toarray(), [[0., 1.01, 2.23606798], [1.01, 0., 1.40716026], [2.23606798, 1.40716026, 0.]]) # n_neighbors = 3 A = neighbors.kneighbors_graph(X, 3, mode='connectivity', include_self=True) assert_array_almost_equal( A.toarray(), [[1, 1, 1], [1, 1, 1], [1, 1, 1]])
def test_non_euclidean_kneighbors(): rng = np.random.RandomState(0) X = rng.rand(5, 5) # Find a reasonable radius. dist_array = pairwise_distances(X).flatten() np.sort(dist_array) radius = dist_array[15] # Test kneighbors_graph for metric in ['manhattan', 'chebyshev']: nbrs_graph = neighbors.kneighbors_graph( X, 3, metric=metric, mode='connectivity', include_self=True).toarray() nbrs1 = neighbors.NearestNeighbors(3, metric=metric).fit(X) assert_array_equal(nbrs_graph, nbrs1.kneighbors_graph(X).toarray()) # Test radiusneighbors_graph for metric in ['manhattan', 'chebyshev']: nbrs_graph = neighbors.radius_neighbors_graph( X, radius, metric=metric, mode='connectivity', include_self=True).toarray() nbrs1 = neighbors.NearestNeighbors(metric=metric, radius=radius).fit(X) assert_array_equal(nbrs_graph, nbrs1.radius_neighbors_graph(X).A) # Raise error when wrong parameters are supplied, X_nbrs = neighbors.NearestNeighbors(3, metric='manhattan') X_nbrs.fit(X) assert_raises(ValueError, neighbors.kneighbors_graph, X_nbrs, 3, metric='euclidean') X_nbrs = neighbors.NearestNeighbors(radius=radius, metric='manhattan') X_nbrs.fit(X) assert_raises(ValueError, neighbors.radius_neighbors_graph, X_nbrs, radius, metric='euclidean')
def test_k_and_radius_neighbors_train_is_not_query(): # Test kneighbors et.al when query is not training data for algorithm in ALGORITHMS: nn = neighbors.NearestNeighbors(n_neighbors=1, algorithm=algorithm) X = [[0], [1]] nn.fit(X) test_data = [[2], [1]] # Test neighbors. dist, ind = nn.kneighbors(test_data) assert_array_equal(dist, [[1], [0]]) assert_array_equal(ind, [[1], [1]]) dist, ind = nn.radius_neighbors([[2], [1]], radius=1.5) check_object_arrays(dist, [[1], [1, 0]]) check_object_arrays(ind, [[1], [0, 1]]) # Test the graph variants. assert_array_equal( nn.kneighbors_graph(test_data).A, [[0., 1.], [0., 1.]]) assert_array_equal( nn.kneighbors_graph([[2], [1]], mode='distance').A, np.array([[0., 1.], [0., 0.]])) rng = nn.radius_neighbors_graph([[2], [1]], radius=1.5) assert_array_equal(rng.A, [[0, 1], [1, 1]])
def test_include_self_neighbors_graph(): # Test include_self parameter in neighbors_graph X = [[2, 3], [4, 5]] kng = neighbors.kneighbors_graph(X, 1, include_self=True).A kng_not_self = neighbors.kneighbors_graph(X, 1, include_self=False).A assert_array_equal(kng, [[1., 0.], [0., 1.]]) assert_array_equal(kng_not_self, [[0., 1.], [1., 0.]]) rng = neighbors.radius_neighbors_graph(X, 5.0, include_self=True).A rng_not_self = neighbors.radius_neighbors_graph( X, 5.0, include_self=False).A assert_array_equal(rng, [[1., 1.], [1., 1.]]) assert_array_equal(rng_not_self, [[0., 1.], [1., 0.]])
def test_same_knn_parallel(): X, y = datasets.make_classification(n_samples=30, n_features=5, n_redundant=0, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y) def check_same_knn_parallel(algorithm): clf = neighbors.KNeighborsClassifier(n_neighbors=3, algorithm=algorithm) clf.fit(X_train, y_train) y = clf.predict(X_test) dist, ind = clf.kneighbors(X_test) graph = clf.kneighbors_graph(X_test, mode='distance').toarray() clf.set_params(n_jobs=3) clf.fit(X_train, y_train) y_parallel = clf.predict(X_test) dist_parallel, ind_parallel = clf.kneighbors(X_test) graph_parallel = \ clf.kneighbors_graph(X_test, mode='distance').toarray() assert_array_equal(y, y_parallel) assert_array_almost_equal(dist, dist_parallel) assert_array_equal(ind, ind_parallel) assert_array_almost_equal(graph, graph_parallel) for algorithm in ALGORITHMS: yield check_same_knn_parallel, algorithm
def test_isomap_reconstruction_error(): # Same setup as in test_isomap_simple_grid, with an added dimension N_per_side = 5 Npts = N_per_side ** 2 n_neighbors = Npts - 1 # grid of equidistant points in 2D, n_components = n_dim X = np.array(list(product(range(N_per_side), repeat=2))) # add noise in a third dimension rng = np.random.RandomState(0) noise = 0.1 * rng.randn(Npts, 1) X = np.concatenate((X, noise), 1) # compute input kernel G = neighbors.kneighbors_graph(X, n_neighbors, mode='distance').toarray() centerer = preprocessing.KernelCenterer() K = centerer.fit_transform(-0.5 * G ** 2) for eigen_solver in eigen_solvers: for path_method in path_methods: clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2, eigen_solver=eigen_solver, path_method=path_method) clf.fit(X) # compute output kernel G_iso = neighbors.kneighbors_graph(clf.embedding_, n_neighbors, mode='distance').toarray() K_iso = centerer.fit_transform(-0.5 * G_iso ** 2) # make sure error agrees reconstruction_error = np.linalg.norm(K - K_iso) / Npts assert_almost_equal(reconstruction_error, clf.reconstruction_error())
def makeWard(X, k=2): # connectivity matrix for structured Ward connectivity = kneighbors_graph(X, n_neighbors=10) # make connectivity symmetric connectivity = 0.5 * (connectivity + connectivity.T) return cluster.AgglomerativeClustering(n_clusters=k, linkage='ward', connectivity=connectivity)
def makeAvgLinkage(X=None, k=2): connectivity = kneighbors_graph(X, n_neighbors=10) # make connectivity symmetric connectivity = 0.5 * (connectivity + connectivity.T) return cluster.AgglomerativeClustering(linkage="average", affinity="cityblock", n_clusters=k, connectivity=connectivity)
def makeMaxLinkage(X=None, k=2): connectivity = kneighbors_graph(X, n_neighbors=10) # make connectivity symmetric connectivity = 0.5 * (connectivity + connectivity.T) return cluster.AgglomerativeClustering(linkage="complete", affinity="cityblock", n_clusters=k, connectivity=connectivity)
def fit(self, X, y=None): """Creates an affinity matrix for X using the selected affinity, then applies spectral clustering to this affinity matrix. Parameters ---------- X : array-like or sparse matrix, shape (n_samples, n_features) OR, if affinity==`precomputed`, a precomputed affinity matrix of shape (n_samples, n_samples) """ X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], dtype=np.float64) if X.shape[0] == X.shape[1] and self.affinity != "precomputed": warnings.warn("The spectral clustering API has changed. ``fit``" "now constructs an affinity matrix from data. To use" " a custom affinity matrix, " "set ``affinity=precomputed``.") if self.affinity == 'nearest_neighbors': connectivity = kneighbors_graph(X, n_neighbors=self.n_neighbors, include_self=True) self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T) elif self.affinity == 'precomputed': self.affinity_matrix_ = X else: params = self.kernel_params if params is None: params = {} if not callable(self.affinity): params['gamma'] = self.gamma params['degree'] = self.degree params['coef0'] = self.coef0 self.affinity_matrix_ = pairwise_kernels(X, metric=self.affinity, filter_params=True, **params) random_state = check_random_state(self.random_state) self.labels_ = spectral_clustering(self.affinity_matrix_, n_clusters=self.n_clusters, eigen_solver=self.eigen_solver, random_state=random_state, n_init=self.n_init, eigen_tol=self.eigen_tol, assign_labels=self.assign_labels, norm_laplacian=self.norm_laplacian) return self
def test_neighbors_badargs(): # Test bad argument values: these should all raise ValueErrors assert_raises(ValueError, neighbors.NearestNeighbors, algorithm='blah') X = rng.random_sample((10, 2)) Xsparse = csr_matrix(X) y = np.ones(10) for cls in (neighbors.KNeighborsClassifier, neighbors.RadiusNeighborsClassifier, neighbors.KNeighborsRegressor, neighbors.RadiusNeighborsRegressor): assert_raises(ValueError, cls, weights='blah') assert_raises(ValueError, cls, p=-1) assert_raises(ValueError, cls, algorithm='blah') nbrs = cls(algorithm='ball_tree', metric='haversine') assert_raises(ValueError, nbrs.predict, X) assert_raises(ValueError, ignore_warnings(nbrs.fit), Xsparse, y) nbrs = cls() assert_raises(ValueError, nbrs.fit, np.ones((0, 2)), np.ones(0)) assert_raises(ValueError, nbrs.fit, X[:, :, None], y) nbrs.fit(X, y) assert_raises(ValueError, nbrs.predict, [[]]) if (isinstance(cls, neighbors.KNeighborsClassifier) or isinstance(cls, neighbors.KNeighborsRegressor)): nbrs = cls(n_neighbors=-1) assert_raises(ValueError, nbrs.fit, X, y) nbrs = neighbors.NearestNeighbors().fit(X) assert_raises(ValueError, nbrs.kneighbors_graph, X, mode='blah') assert_raises(ValueError, nbrs.radius_neighbors_graph, X, mode='blah')
def test_k_and_radius_neighbors_duplicates(): # Test behavior of kneighbors when duplicates are present in query for algorithm in ALGORITHMS: nn = neighbors.NearestNeighbors(n_neighbors=1, algorithm=algorithm) nn.fit([[0], [1]]) # Do not do anything special to duplicates. kng = nn.kneighbors_graph([[0], [1]], mode='distance') assert_array_equal( kng.A, np.array([[0., 0.], [0., 0.]])) assert_array_equal(kng.data, [0., 0.]) assert_array_equal(kng.indices, [0, 1]) dist, ind = nn.radius_neighbors([[0], [1]], radius=1.5) check_object_arrays(dist, [[0, 1], [1, 0]]) check_object_arrays(ind, [[0, 1], [0, 1]]) rng = nn.radius_neighbors_graph([[0], [1]], radius=1.5) assert_array_equal(rng.A, np.ones((2, 2))) rng = nn.radius_neighbors_graph([[0], [1]], radius=1.5, mode='distance') assert_array_equal(rng.A, [[0, 1], [1, 0]]) assert_array_equal(rng.indices, [0, 1, 0, 1]) assert_array_equal(rng.data, [0, 1, 1, 0]) # Mask the first duplicates when n_duplicates > n_neighbors. X = np.ones((3, 1)) nn = neighbors.NearestNeighbors(n_neighbors=1) nn.fit(X) dist, ind = nn.kneighbors() assert_array_equal(dist, np.zeros((3, 1))) assert_array_equal(ind, [[1], [0], [1]]) # Test that zeros are explicitly marked in kneighbors_graph. kng = nn.kneighbors_graph(mode='distance') assert_array_equal( kng.A, np.zeros((3, 3))) assert_array_equal(kng.data, np.zeros(3)) assert_array_equal(kng.indices, [1., 0., 1.]) assert_array_equal( nn.kneighbors_graph().A, np.array([[0., 1., 0.], [1., 0., 0.], [0., 1., 0.]]))