我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用sklearn.neighbors.KDTree()。
def get_nearby_words(main_words): main_inds = {} all_words = [] all_vecs = [] with open(OPTS.wordvec_file) as f: for i, line in tqdm(enumerate(f)): toks = line.rstrip().split(' ') word = unicode(toks[0], encoding='ISO-8859-1') vec = np.array([float(x) for x in toks[1:]]) all_words.append(word) all_vecs.append(vec) if word in main_words: main_inds[word] = i print >> sys.stderr, 'Found vectors for %d/%d words = %.2f%%' % ( len(main_inds), len(main_words), 100.0 * len(main_inds) / len(main_words)) tree = KDTree(all_vecs) nearby_words = {} for word in tqdm(main_inds): dists, inds = tree.query([all_vecs[main_inds[word]]], k=OPTS.num_neighbors + 1) nearby_words[word] = [ {'word': all_words[i], 'dist': d} for d, i in zip(dists[0], inds[0])] return nearby_words
def _nearest_neighbor(df1, df2): """ For a DataFrame of xy coordinates find the nearest xy coordinates in a subsequent DataFrame Parameters ---------- df1 : pandas.DataFrame DataFrame of records to return as the nearest record to records in df2 df2 : pandas.DataFrame DataFrame of records with xy coordinates for which to find the nearest record in df1 for Returns ------- df1.index.values[indexes] : pandas.Series index of records in df1 that are nearest to the coordinates in df2 """ kdt = KDTree(df1.as_matrix()) indexes = kdt.query(df2.as_matrix(), k=1, return_distance=False) return df1.index.values[indexes]
def _add(self, state, r): if self._current_capacity >= self._capacity: # find the LRU entry old_index = np.argmin(self._lru) # ?????????? self._states[old_index] = state self._q_values[old_index] = r self._lru[old_index] = self._time else: self._states[self._current_capacity] = state self._q_values[self._current_capacity] = r self._lru[self._current_capacity] = self._time self._current_capacity += 1 self._time += 0.01 # ???????? self._tree = KDTree(self._states[:self._current_capacity]) # TDOO: _time?????????????????
def test_unsupervised_inputs(): # test the types of valid input into NearestNeighbors X = rng.random_sample((10, 3)) nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1) nbrs_fid.fit(X) dist1, ind1 = nbrs_fid.kneighbors(X) nbrs = neighbors.NearestNeighbors(n_neighbors=1) for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)): nbrs.fit(input) dist2, ind2 = nbrs.kneighbors(X) assert_array_almost_equal(dist1, dist2) assert_array_almost_equal(ind1, ind2)
def retrieve_image(target_image, model_file, deploy_file, imagemean_file, threshold=1): model_dir = os.path.dirname(model_file) image_files = np.load(os.path.join(model_dir, 'image_files.npy')) fc7_feature_mat = np.load(os.path.join(model_dir, 'fc7_features.npy')) latent_feature_file = os.path.join(model_dir, 'latent_features.npy') latent_feature_mat = np.load(latent_feature_file) candidates = [] dist = 0 for layer, mat in layer_features(['latent', 'fc7'], model_file, deploy_file, imagemean_file, [target_image], show_pred=True): if layer == 'latent': # coarse-level search mat = binary_hash_codes(mat) mat = mat * np.ones((latent_feature_mat.shape[0], 1)) dis_mat = np.abs(mat - latent_feature_mat) hamming_dis = np.sum(dis_mat, axis=1) distance_file = os.path.join(model_dir, 'hamming_dis.npy') np.save(distance_file, hamming_dis) candidates = np.where(hamming_dis < threshold)[0] if layer == 'fc7': # fine-level search kdt = KDTree(fc7_feature_mat[candidates], metric='euclidean') k = 6 if not candidates.shape[0] > 6: k = candidates.shape[0] dist, idxs = kdt.query(mat, k=k) candidates = candidates[idxs] print(dist) return image_files[candidates][0], dist[0]
def _find_neighborhoods(self, targets, constraints): if not constraints: raise ValueError('No constraints in neighbor search.') if any(np.isnan(v) for v in constraints.values()): raise ValueError('Nan constraints in neighbor search.') # Extract the targets, constraints from the dataset. lookup = list(targets) + list(constraints) D = self._dataset(lookup) # Not enough neighbors: crash for now. Workarounds include: # (i) reduce K, (ii) randomly drop constraints, (iii) impute dataset. if len(D) < self.K: raise ValueError('Not enough neighbors: %s' % ((targets, constraints),)) # Code the dataset with Euclidean embedding. N = len(targets) D_qr_code = self._dummy_code(D[:,:N], lookup[:N]) D_ev_code = self._dummy_code(D[:,N:], lookup[N:]) D_code = np.column_stack((D_qr_code, D_ev_code)) # Run nearest neighbor search on the constraints only. constraints_code = self._dummy_code( [constraints.values()], constraints.keys()) dist, neighbors = KDTree(D_ev_code).query(constraints_code, k=len(D)) # Check for equidistant neighbors and possibly extend the search. valid = [i for i, d in enumerate(dist[0]) if d <= dist[0][self.K-1]] if self.K < len(valid): neighbors = self.rng.choice(neighbors[0][valid], replace=False, size=self.K) else: neighbors = neighbors[0][:self.K] # For each neighbor, find its nearest M on the full lookup set. _, ex = KDTree(D_code).query(D_code[neighbors], k=min(self.M, self.K)) # Return the dataset and the list of neighborhoods. return D[:,:len(targets)], ex
def add(self, keys, values): skip_indices = [] if self.curr_capacity >= 1: dist, ind = self.tree.query(keys, k=1) for i, d in enumerate(dist): if d[0] < 0.001: new_value = values[i] index = ind[i][0] self.q_values[index] = self.q_values[index]*0.9 + new_value*0.1 skip_indices.append(i) for i, _ in enumerate(keys): if i in skip_indices: continue if self.curr_capacity >= self.capacity: # find the LRU entry old_index = np.argmin(self.lru) self.states[old_index] = keys[i] self.q_values[old_index] = values[i] self.lru[old_index] = self.tm else: self.states[self.curr_capacity] = keys[i] self.q_values[self.curr_capacity] = values[i] self.lru[self.curr_capacity] = self.tm self.curr_capacity+=1 self.tm += 0.01 self.tree = KDTree(self.states[:self.curr_capacity])
def add(self, keys, values): if self.curr_capacity >= 5: dist, ind = self.tree.query(np.pad(keys, ((0,0),(0,1)), 'constant', constant_values=1.0), k=5) #dist, ind = self.tree.query(keys, k=50) for i, ind_ in enumerate(ind): stren = 1 - self.alpha self.weights[ind_] = self.weights[ind_] * stren for i, _ in enumerate(keys): low_w = 1.0 if self.curr_capacity >= self.capacity: # find the LRU entry old_index = np.argmin(self.weights) low_w = min(low_w, self.weights[old_index]) index = old_index else: index = self.curr_capacity self.curr_capacity+=1 self.states[index] = keys[i] self.q_values[index] = values[i] self.weights[index] = 1.0 self.tree = KDTree(np.concatenate((self.states[:self.curr_capacity], np.expand_dims(self.weights[:self.curr_capacity], axis=1)),axis=1)) #self.tree = KDTree(self.states[:self.curr_capacity])
def plot_decision_boundry(data, pipe, reducer=PCA): fig, ax = plt.subplots(figsize=(16, 12)) if callable(reducer): reducer = reducer(n_components=2) # else assume it's already been instantiated... if isinstance(pipe, Pipeline) and len(pipe.steps) > 1: prepipe = Pipeline(pipe.steps[:-1]) km = pipe.steps[-1][1] data_ = prepipe.transform(data) elif isinstance(pipe, Pipeline): prepipe = None km = pipe.steps[0][1] data_ = data else: prepipe = None km = pipe data_ = data X_reduced = reducer.fit_transform(data_) cluster_centers = getattr(km, 'cluster_centers_', compute_centers(km, data_)) mu_reduced = reducer.transform(cluster_centers) n_clusters = len(np.unique(km.labels_)) tree = KDTree(mu_reduced) cmap = rediscretize_cmap(n_clusters, 'Set1') ax.scatter(mu_reduced[:, 0], mu_reduced[:, 1], c=np.arange(n_clusters), cmap=cmap, s=300) colorbar_index(ncolors=n_clusters, cmap=cmap) ax.scatter(X_reduced[:, 0], X_reduced[:, 1], c=km.labels_, cmap=cmap, alpha=.95) xmin, xmax = ax.get_xlim() ymin, ymax = ax.get_ylim() xx, yy = np.meshgrid(np.linspace(xmin, xmax, 100), np.linspace(ymin, ymax, 100)) T = np.c_[xx.ravel(), yy.ravel()] _, group = tree.query(T) Z = group.ravel().reshape(xx.shape) ax.pcolormesh(xx, yy, Z, alpha=.25, cmap=cmap) ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) for label, xy in enumerate(mu_reduced[:, :2]): ax.annotate(label, xy, fontsize=28, fontweight="bold") return ax
def estimatenormals(points, npoints = 40, method = 'pca'): """ estimate the normals of points :param points: an array of [x, y, z] :param method: 'pca' or 'ransac', theoretically ransac is more precise when there are more points :return: a list of normal vectors author: weiwei date: 20170714 """ pointsnormals = [] camerapos = np.array([0.0,0.0,0.0]) kdt = KDTree(points) if method == 'pca': regionpntidlist = kdt.query(points, k=npoints, return_distance=False) for i, pntidlist in enumerate(regionpntidlist): regionpnts = points[pntidlist] covmat = np.cov(regionpnts.T) eigvalues, eigmat = np.linalg.eig(covmat) idx = np.argmin(eigvalues) eigvec = eigmat[:, idx] if np.dot(eigvec, camerapos-points[i]) < 0: eigvec = -eigvec pointsnormals.append(eigvec) elif method == 'ransac': # NOTE: this part is not usable due to small npoints ransacer = linear_model.RANSACRegressor(linear_model.LinearRegression()) regionpntidlist = kdt.query(points, k=npoints, return_distance=False) for i, pntidlist in enumerate(regionpntidlist): XYZ = points[pntidlist] ransacer.fit(XYZ[:, 0:2], XYZ[:, 2]) inlier_mask = ransacer.inlier_mask_ regionpnts = XYZ[inlier_mask] covmat = np.cov(regionpnts.T) eigvalues, eigmat = np.linalg.eig(covmat) idx = np.argmin(eigvalues) eigvec = eigmat[:, idx] if np.dot(eigvec, camerapos-points[i]) < 0: eigvec = -eigvec pointsnormals.append(eigvec) return pointsnormals
def test_neighbors_metrics(n_samples=20, n_features=3, n_query_pts=2, n_neighbors=5): # Test computing the neighbors for various metrics # create a symmetric matrix V = rng.rand(n_features, n_features) VI = np.dot(V, V.T) metrics = [('euclidean', {}), ('manhattan', {}), ('minkowski', dict(p=1)), ('minkowski', dict(p=2)), ('minkowski', dict(p=3)), ('minkowski', dict(p=np.inf)), ('chebyshev', {}), ('seuclidean', dict(V=rng.rand(n_features))), ('wminkowski', dict(p=3, w=rng.rand(n_features))), ('mahalanobis', dict(VI=VI))] algorithms = ['brute', 'ball_tree', 'kd_tree'] X = rng.rand(n_samples, n_features) test = rng.rand(n_query_pts, n_features) for metric, metric_params in metrics: results = [] p = metric_params.pop('p', 2) for algorithm in algorithms: # KD tree doesn't support all metrics if (algorithm == 'kd_tree' and metric not in neighbors.KDTree.valid_metrics): assert_raises(ValueError, neighbors.NearestNeighbors, algorithm=algorithm, metric=metric, metric_params=metric_params) continue neigh = neighbors.NearestNeighbors(n_neighbors=n_neighbors, algorithm=algorithm, metric=metric, p=p, metric_params=metric_params) neigh.fit(X) results.append(neigh.kneighbors(test, return_distance=True)) assert_array_almost_equal(results[0][0], results[1][0]) assert_array_almost_equal(results[0][1], results[1][1])