我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.sparse()。
def __init__(self, y_coo, num_factor, bias_scale, factor_scale, weight=None): if weight is None: weight = np.ones(y_coo.data.size) self.y_coo = y_coo self.y_csr = scipy.sparse.csr_matrix(y_coo) self.y_csc = scipy.sparse.csc_matrix(y_coo) self.num_factor = num_factor self.prior_param = { 'col_bias_scale': bias_scale, 'row_bias_scale': bias_scale, 'factor_scale': np.tile(factor_scale, self.num_factor), 'weight': weight, 'obs_df': float('inf'), 'param_df': float('inf'), }
def _linear_phase(self, n_shift): """ Private: Select the center of FOV """ om = self.st['om'] M = self.st['M'] final_shifts = tuple( numpy.array(n_shift) + numpy.array(self.st['Nd']) / 2) phase = numpy.exp( 1.0j * numpy.sum( om * numpy.tile( final_shifts, (M,1)), 1)) # add-up all the linear phasees in all axes, self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])
def precompute(self): # CSR_W = cuda_cffi.cusparse.CSR.to_CSR(self.st['W_gpu'],diag_type=True) # Dia_W_cpu = scipy.sparse.dia_matrix( (self.st['M'], self.st['M']),dtype=dtype) # Dia_W_cpu = scipy.sparse.dia_matrix( ( self.st['W'], 0 ), shape=(self.st['M'], self.st['M']) ) # Dia_W_cpu = scipy.sparse.diags(self.st['W'], format="csr", dtype=dtype) # CSR_W = cuda_cffi.cusparse.CSR.to_CSR(Dia_W_cpu) self.st['pHp_gpu'] = self.CSRH.gemm(self.CSR) self.st['pHp']=self.st['pHp_gpu'].get() print('untrimmed',self.st['pHp'].nnz) self.truncate_selfadjoint(1e-5) print('trimmed', self.st['pHp'].nnz) self.st['pHp_gpu'] = cuda_cffi.cusparse.CSR.to_CSR(self.st['pHp']) # self.st['pHWp_gpu'] = self.CSR.conj().gemm(CSR_W,transA=cuda_cffi.cusparse.CUSPARSE_OPERATION_TRANSPOSE) # self.st['pHWp_gpu'] = self.st['pHWp_gpu'].gemm(self.CSR, transA=cuda_cffi.cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE)
def linear_phase(self, n_shift): ''' Select the center of FOV ''' om = self.st['om'] M = self.st['M'] final_shifts = tuple( numpy.array(n_shift) + numpy.array(self.st['Nd']) / 2) phase = numpy.exp( 1.0j * numpy.sum( om * numpy.tile( final_shifts, (M,1)), 1)) # add-up all the linear phasees in all axes, self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0']) # multiply the diagonal, linear phase before the gridding matrix
def finalization(self): ''' Add sparse matrix multiplication on GPU Note: use "python-cuda-cffi" generated interface to access cusparse ''' self.gpu_flag = 0 self.CSR = cuda_cffi.cusparse.CSR.to_CSR(self.st['p'].astype(dtype), ) self.CSRH = cuda_cffi.cusparse.CSR.to_CSR(self.st['p'].getH().tocsr().astype(dtype), ) self.scikit_plan = cu_fft.Plan(self.st['Kd'], dtype, dtype) # self.pHp = cuda_cffi.cusparse.CSR.to_CSR( # self.st['pHp'].astype(dtype)) self.gpu_flag = 1 self.sn_gpu = pycuda.gpuarray.to_gpu(self.sn.astype(dtype)) # tmp_array = skcuda.misc.ones((numpy.prod(self.st['Kd']),1),dtype=dtype) # tmp = cuda_cffi.cusolver.csrlsvqr(self.CSR, tmp_array)
def _linear_phase(self, n_shift): """ Private: Select the center of FOV """ om = self.st['om'] M = self.st['M'] final_shifts = tuple( numpy.array(n_shift) + numpy.array( self.Nd) / 2) phase = numpy.exp( 1.0j * numpy.sum( om * numpy.tile( final_shifts, (M, 1)), 1)) # add-up all the linear phasees in all axes, self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0']) return 0 # shifted sparse matrix
def prepare_matrix(val, row_var, col_var): # Takes a vector of observed values and two categorical variables # and returns a sparse matrix in coo format that can be used to # instantiate the class. Also returned are dictionaries that maps the # row and column categories to indices of a matrix # # Params: # val, row_var, col_var: numpy arrays row_id = row_var.unique() col_id = col_var.unique() nrow = row_id.size ncol = col_id.size # Associate each of the unique id names to a row and column index. row_id_map = {row_id[index]: index for index in range(len(row_id))} col_id_map = {col_id[index]: index for index in range(len(col_id))} row_indices = np.array([row_id_map[id] for id in row_var]) col_indices = np.array([col_id_map[id] for id in col_var]) y_coo = scipy.sparse.coo_matrix((val, (row_indices, col_indices)), shape=(nrow, ncol)) return y_coo, row_id_map, col_id_map
def sparse(size, *args): """ Create a sparse vector, using either a dictionary, a list of (index, value) pairs, or two separate arrays of indices and values (sorted by index). :param size: Size of the vector. :param args: Non-zero entries, as a dictionary, list of tuples, or two sorted lists containing indices and values. >>> Vectors.sparse(4, {1: 1.0, 3: 5.5}) SparseVector(4, {1: 1.0, 3: 5.5}) >>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)]) SparseVector(4, {1: 1.0, 3: 5.5}) >>> Vectors.sparse(4, [1, 3], [1.0, 5.5]) SparseVector(4, {1: 1.0, 3: 5.5}) """ return SparseVector(size, *args)
def _equals(v1_indices, v1_values, v2_indices, v2_values): """ Check equality between sparse/dense vectors, v1_indices and v2_indices assume to be strictly increasing. """ v1_size = len(v1_values) v2_size = len(v2_values) k1 = 0 k2 = 0 all_equal = True while all_equal: while k1 < v1_size and v1_values[k1] == 0: k1 += 1 while k2 < v2_size and v2_values[k2] == 0: k2 += 1 if k1 >= v1_size or k2 >= v2_size: return k1 >= v1_size and k2 >= v2_size all_equal = v1_indices[k1] == v2_indices[k2] and v1_values[k1] == v2_values[k2] k1 += 1 k2 += 1 return all_equal
def test_ml_mllib_vector_conversion(self): # to ml # dense mllibDV = Vectors.dense([1, 2, 3]) mlDV1 = newlinalg.Vectors.dense([1, 2, 3]) mlDV2 = mllibDV.asML() self.assertEqual(mlDV2, mlDV1) # sparse mllibSV = Vectors.sparse(4, {1: 1.0, 3: 5.5}) mlSV1 = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5}) mlSV2 = mllibSV.asML() self.assertEqual(mlSV2, mlSV1) # from ml # dense mllibDV1 = Vectors.dense([1, 2, 3]) mlDV = newlinalg.Vectors.dense([1, 2, 3]) mllibDV2 = Vectors.fromML(mlDV) self.assertEqual(mllibDV1, mllibDV2) # sparse mllibSV1 = Vectors.sparse(4, {1: 1.0, 3: 5.5}) mlSV = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5}) mllibSV2 = Vectors.fromML(mlSV) self.assertEqual(mllibSV1, mllibSV2)
def test_serialize(self): from scipy.sparse import lil_matrix lil = lil_matrix((4, 1)) lil[1, 0] = 1 lil[3, 0] = 2 sv = SparseVector(4, {1: 1, 3: 2}) self.assertEqual(sv, _convert_to_vector(lil)) self.assertEqual(sv, _convert_to_vector(lil.tocsc())) self.assertEqual(sv, _convert_to_vector(lil.tocoo())) self.assertEqual(sv, _convert_to_vector(lil.tocsr())) self.assertEqual(sv, _convert_to_vector(lil.todok())) def serialize(l): return ser.loads(ser.dumps(_convert_to_vector(l))) self.assertEqual(sv, serialize(lil)) self.assertEqual(sv, serialize(lil.tocsc())) self.assertEqual(sv, serialize(lil.tocsr())) self.assertEqual(sv, serialize(lil.todok()))
def get(self, stream=None): """Returns a copy of the array on host memory. Args: stream (cupy.cuda.Stream): CUDA stream object. If it is given, the copy runs asynchronously. Otherwise, the copy is synchronous. Returns: scipy.sparse.coo_matrix: Copy of the array on host memory. """ if not _scipy_available: raise RuntimeError('scipy is not available') data = self.data.get(stream) row = self.row.get(stream) col = self.col.get(stream) return scipy.sparse.coo_matrix( (data, (row, col)), shape=self.shape)
def transpose(self, axes=None, copy=False): """Returns a transpose matrix. Args: axes: This option is not supported. copy (bool): If ``True``, a returned matrix shares no data. Otherwise, it shared data arrays as much as possible. Returns: cupy.sparse.spmatrix: Transpose matrix. """ if axes is not None: raise ValueError( 'Sparse matrices do not support an \'axes\' parameter because ' 'swapping dimensions is the only logical permutation.') shape = self.shape[1], self.shape[0] return coo_matrix( (self.data, (self.col, self.row)), shape=shape, copy=copy)
def get(self, stream=None): """Returns a copy of the array on host memory. .. warning:: You need to install SciPy to use this method. Args: stream (cupy.cuda.Stream): CUDA stream object. If it is given, the copy runs asynchronously. Otherwise, the copy is synchronous. Returns: scipy.sparse.csc_matrix: Copy of the array on host memory. """ if not _scipy_available: raise RuntimeError('scipy is not available') data = self.data.get(stream) indices = self.indices.get(stream) indptr = self.indptr.get(stream) return scipy.sparse.csc_matrix( (data, indices, indptr), shape=self._shape)
def tocsr(self, copy=False): """Converts the matrix to Compressed Sparse Row format. Args: copy (bool): If ``False``, it shares data arrays as much as possible. Actually this option is ignored because all arrays in a matrix cannot be shared in csr to csc conversion. Returns: cupy.sparse.csr_matrix: Converted matrix. """ return self.T.tocsc(copy=False).T # TODO(unno): Implement todia # TODO(unno): Implement todok # TODO(unno): Implement tolil
def transpose(self, axes=None, copy=False): """Returns a transpose matrix. Args: axes: This option is not supported. copy (bool): If ``True``, a returned matrix shares no data. Otherwise, it shared data arrays as much as possible. Returns: cupy.sparse.spmatrix: Transpose matrix. """ if axes is not None: raise ValueError( 'Sparse matrices do not support an \'axes\' parameter because ' 'swapping dimensions is the only logical permutation.') shape = self.shape[1], self.shape[0] return cupy.sparse.csr_matrix( (self.data, self.indices, self.indptr), shape=shape, copy=copy)
def get(self, stream=None): """Returns a copy of the array on host memory. Args: stream (cupy.cuda.Stream): CUDA stream object. If it is given, the copy runs asynchronously. Otherwise, the copy is synchronous. Returns: scipy.sparse.csr_matrix: Copy of the array on host memory. """ if not _scipy_available: raise RuntimeError('scipy is not available') data = self.data.get(stream) indices = self.indices.get(stream) indptr = self.indptr.get(stream) return scipy.sparse.csr_matrix( (data, indices, indptr), shape=self._shape)
def tocoo(self, copy=False): """Converts the matrix to COOdinate format. Args: copy (bool): If ``False``, it shares data arrays as much as possible. Returns: cupy.sparse.coo_matrix: Converted matrix. """ if copy: data = self.data.copy() indices = self.indices.copy() else: data = self.data indices = self.indices return cusparse.csr2coo(self, data, indices)
def comp_ola_sdeconv(gx_gpu, gy_gpu, xx_gpu, xy_gpu, Ftpy_gpu, f_gpu, L_gpu, alpha, beta, gamma=0): """ Computes the division in Fourier space needed for sparse deconvolution """ sfft = xx_gpu.shape block_size = (16,16,1) grid_size = (int(np.ceil(np.float32(sfft[0]*sfft[1])/block_size[0])), int(np.ceil(np.float32(sfft[2])/block_size[1]))) mod = cu.module_from_buffer(cubin) comp_ola_sdeconv_Kernel = mod.get_function("comp_ola_sdeconv_Kernel") z_gpu = cua.zeros(sfft, np.complex64) comp_ola_sdeconv_Kernel(z_gpu.gpudata, np.int32(sfft[0]), np.int32(sfft[1]), np.int32(sfft[2]), gx_gpu.gpudata, gy_gpu.gpudata, xx_gpu.gpudata, xy_gpu.gpudata, Ftpy_gpu.gpudata, f_gpu.gpudata, L_gpu.gpudata, np.float32(alpha), np.float32(beta), np.float32(gamma), block=block_size, grid=grid_size) return z_gpu
def iter_chunks(self, chunksize=None): """ Iteratively yield the index as chunks of documents, each of size <= chunksize. The chunk is returned in its raw form (matrix or sparse matrix slice). The size of the chunk may be smaller than requested; it is up to the caller to check the result for real length, using `chunk.shape[0]`. """ self.close_shard() if chunksize is None: # if not explicitly specified, use the chunksize from the constructor chunksize = self.chunksize for shard in self.shards: query = shard.get_index().index for chunk_start in xrange(0, query.shape[0], chunksize): # scipy.sparse doesn't allow slicing beyond real size of the matrix # (unlike numpy). so, clip the end of the chunk explicitly to make # scipy.sparse happy chunk_end = min(query.shape[0], chunk_start + chunksize) chunk = query[chunk_start: chunk_end] # create a view yield chunk
def extend_2pol_npol(x, npol): if npol == 2: return x if scipy.sparse.isspmatrix_dia(x): y = scipy.sparse.diags(extend_2pol_npol(x.diagonal(), npol)) elif len(x.shape) == 1: y = np.zeros(len(x)//2*npol) y[0::npol] = x[0::2] y[1::npol] = x[1::2] elif len(x.shape) == 2: y = np.zeros((x.shape[0]//2*npol, x.shape[1]//2*npol)) y[0::npol, 0::npol] = x[0::2, 0::2] y[0::npol, 1::npol] = x[0::2, 1::2] y[1::npol, 0::npol] = x[1::2, 0::2] y[1::npol, 1::npol] = x[1::2, 1::2] else: raise SMRTError("should never be here") return y
def csc_matvec(mat_csc, vec, dense_output=True, dtype=None): v_nnz = vec.indices v_val = vec.data m_val = mat_csc.data m_ind = mat_csc.indices m_ptr = mat_csc.indptr res_dtype = dtype or np.result_type(mat_csc.dtype, vec.dtype) if dense_output: res = np.zeros((mat_csc.shape[0],), dtype=res_dtype) matvec2dense(m_ptr, m_ind, m_val, v_nnz, v_val, res) else: sizes = m_ptr.take(v_nnz+1) - m_ptr.take(v_nnz) sizes = np.concatenate(([0], np.cumsum(sizes))) n = sizes[-1] data = np.empty((n,), dtype=res_dtype) indices = np.empty((n,), dtype=np.intp) indptr = np.array([0, n], dtype=np.intp) matvec2sparse(m_ptr, m_ind, m_val, v_nnz, v_val, sizes, indices, data) res = sp.sparse.csr_matrix((data, indices, indptr), shape=(1, mat_csc.shape[0]), dtype=res_dtype) res.sum_duplicates() # expensive operation return res
def _sparse_dot(self, tst_mat, i2i_mat): # scipy always returns sparse result, even if dot product is actually dense # this function offers solution to this problem # it also takes care on sparse result w.r.t. to further processing if self.dense_output: # calculate dense result directly # TODO implement matmat multiplication instead of iteration with matvec res_type = np.result_type(i2i_mat.dtype, tst_mat.dtype) scores = np.empty((tst_mat.shape[0], i2i_mat.shape[1]), dtype=res_type) for i in xrange(tst_mat.shape[0]): v = tst_mat.getrow(i) scores[i, :] = csc_matvec(i2i_mat, v, dense_output=True, dtype=res_type) else: scores = tst_mat.dot(i2i_mat.T) # NOTE even though not neccessary for symmetric i2i matrix, # transpose helps to avoid expensive conversion to CSR (performed by scipy) if scores.nnz > NNZ_MAX: # too many nnz lead to undesired memory overhead in downvote_seen_items scores = scores.toarray(order='C') return scores
def fit(self, X, y): import scipy.sparse import sklearn.feature_selection self.preprocessor = sklearn.feature_selection.SelectPercentile( score_func=self.score_func, percentile=self.percentile) # Because the pipeline guarantees that each feature is positive, # clip all values below zero to zero if self.score_func == sklearn.feature_selection.chi2: if scipy.sparse.issparse(X): X.data[X.data<0] = 0.0 else: X[X<0] = 0.0 self.preprocessor.fit(X, y) return self
def transform(self, X): import scipy.sparse import sklearn.feature_selection # Because the pipeline guarantees that each feature is positive, # clip all values below zero to zero if self.score_func == sklearn.feature_selection.chi2: if scipy.sparse.issparse(X): X.data[X.data < 0] = 0.0 else: X[X < 0] = 0.0 if self.preprocessor is None: raise NotImplementedError() Xt = self.preprocessor.transform(X) if Xt.shape[1] == 0: raise ValueError( "%s removed all features." % self.__class__.__name__) return Xt
def get_hyperparameter_search_space(dataset_properties=None): percentile = UniformFloatHyperparameter( name="percentile", lower=1, upper=99, default=50) score_func = CategoricalHyperparameter( name="score_func", choices=["chi2", "f_classif"], default="chi2") if dataset_properties is not None: # Chi2 can handle sparse data, so we respect this if 'is_sparse' in dataset_properties and dataset_properties['is_sparse']: score_func = Constant( name="score_func", value="chi2") cs = ConfigurationSpace() cs.add_hyperparameter(percentile) cs.add_hyperparameter(score_func) return cs
def fit(self, X, y): import scipy.sparse import sklearn.feature_selection self.preprocessor = sklearn.feature_selection.GenericUnivariateSelect( score_func=self.score_func, param=self.alpha, mode=self.mode) # Because the pipeline guarantees that each feature is positive, # clip all values below zero to zero if self.score_func == sklearn.feature_selection.chi2: if scipy.sparse.issparse(X): X.data[X.data < 0] = 0.0 else: X[X < 0] = 0.0 self.preprocessor.fit(X, y) return self
def theta_matrix(coord, adj, preload=True, train=True): print "creating adjacent theta matrix ..." if preload is True: if train is True: theta_matrix = np.load('../data/theta_matrix_train_n_100.npy') else: theta_matrix = np.load('../data/theta_matrix_test_n_100.npy') else: theta_matrix = [] for i in tqdm(range(coord.shape[0])): for j in range(coord.shape[1]): theta_row = angle(coord[i,adj[i][j].nonzero()[1],:] - coord[i,j,:]) col_indice = adj[i][j].nonzero()[1] row_indice = (np.zeros(col_indice.shape[0])).astype(int32) if j == 0: theta_matrix_tmp = csc_matrix((theta_row, (row_indice, col_indice)), shape=(1,coord.shape[1])) else: theta_matrix_tmp = scipy.sparse.vstack((theta_matrix_tmp, csc_matrix((theta_row, (row_indice, col_indice)), shape=(1,coord.shape[1])))) theta_matrix.append(theta_matrix_tmp) theta_matrix = np.array(theta_matrix) return theta_matrix
def close_shard(self): """ Force the latest shard to close (be converted to a matrix and stored to disk). Do nothing if no new documents added since last call. **NOTE**: the shard is closed even if it is not full yet (its size is smaller than `self.shardsize`). If documents are added later via `add_documents()`, this incomplete shard will be loaded again and completed. """ if not self.fresh_docs: return shardid = len(self.shards) # consider the shard sparse if its density is < 30% issparse = 0.3 > 1.0 * self.fresh_nnz / (len(self.fresh_docs) * self.num_features) if issparse: index = SparseMatrixSimilarity(self.fresh_docs, num_terms=self.num_features, num_docs=len(self.fresh_docs), num_nnz=self.fresh_nnz) else: index = MatrixSimilarity(self.fresh_docs, num_features=self.num_features) logger.info("creating %s shard #%s" % ('sparse' if issparse else 'dense', shardid)) shard = Shard(self.shardid2filename(shardid), index) shard.num_best = self.num_best shard.num_nnz = self.fresh_nnz self.shards.append(shard) self.fresh_docs, self.fresh_nnz = [], 0
def get_contexts_rank(targets, cooc_mat, target_index): """ A dictionary in which each key is a target word and the value is a sorted list of context columns in descending order :param targets: the words :return: """ contexts_rank = {} for target in targets: index = target_index.get(target, -1) if index == -1: contexts_rank[target] = [] row = cooc_mat[index, :] contexts_rank[target] = sort_by_value_get_col(scipy.sparse.coo_matrix(row.mat)) # tuples of (row, col, value) return contexts_rank
def APSyn(x_row, y_row, N): """ APSyn(x, y) = (\sum_{f\epsilon N(f_{x})\bigcap N(f_{y})))} \frac{1}{(rank(f_{x})+rank(f_{y})/2)}) :param x_row: :param y_row: :return: """ # Sort y's contexts y_contexts_cols = sort_by_value_get_col(scipy.sparse.coo_matrix(y_row.mat)) # tuples of (row, col, value) y_contexts_cols = y_contexts_cols[:N] y_context_rank = { c : i + 1 for i, c in enumerate(y_contexts_cols) } # Sort x's contexts x_contexts_cols = sort_by_value_get_col(scipy.sparse.coo_matrix(x_row.mat)) x_contexts_cols = x_contexts_cols[:N] x_context_rank = { c : i + 1 for i, c in enumerate(x_contexts_cols) } # Average of 1/(rank(w1)+rank(w2)/2) for every intersected feature among the top N contexts intersected_context = set(y_contexts_cols).intersection(set(x_contexts_cols)) score = sum([1.0 / ((x_context_rank[c] + y_context_rank[c]) / 2.0) for c in intersected_context]) #score *= (1.0 / N) return score
def make_G_matrix(T,g): ''' create matrix of autoregression to enforce indicator dynamics Inputs: T: positive integer number of time-bins g: nd.array, vector p x 1 Discrete time constants Output: G: sparse diagonal matrix Matrix of autoregression ''' if type(g) is np.ndarray: if len(g) == 1 and g < 0: g=0 # gs=np.matrix(np.hstack((-np.flipud(g[:]).T,1))) gs=np.matrix(np.hstack((1,-(g[:]).T))) ones_=np.matrix(np.ones((T,1))) G = spdiags((ones_*gs).T,range(0,-len(g)-1,-1),T,T) return G else: raise Exception('g must be an array') #%%
def process(self, df, x_name, y_name=None, ngrams=2, max_features=35000, method='counts', binary=True, sparse=False): #choosing the particular flavor of vectorizer if method == 'counts': vectorizer = CountVectorizer(max_features=max_features, ngram_range=(1, ngrams), decode_error='replace', binary=binary) elif method == 'tfidf': vectorizer = TfidfVectorizer(max_features=max_features, ngram_range=(1, ngrams), decode_error='replace') #fitting the vectorizer and converting the counts to an array full_fit = vectorizer.fit_transform(df[x_name]) full_counts = full_fit.toarray() self.vocabulary_ = vectorizer.vocabulary_ #passing the attributes up to the class instance self.data = df if sparse: full_counts = csr_matrix(full_counts) self.X = full_counts if y_name != None: self.y = np.array(df[y_name]) return #splits the data into training and test sets; either called from process() #or on its own when your text is already vectorized and divided into x and y
def _sparse_series_to_coo(ss, row_levels=(0, ), column_levels=(1, ), sort_labels=False): """ Convert a SparseSeries to a scipy.sparse.coo_matrix using index levels row_levels, column_levels as the row and column labels respectively. Returns the sparse_matrix, row and column labels. """ import scipy.sparse if ss.index.nlevels < 2: raise ValueError('to_coo requires MultiIndex with nlevels > 2') if not ss.index.is_unique: raise ValueError('Duplicate index entries are not allowed in to_coo ' 'transformation.') # to keep things simple, only rely on integer indexing (not labels) row_levels = [ss.index._get_level_number(x) for x in row_levels] column_levels = [ss.index._get_level_number(x) for x in column_levels] v, i, j, rows, columns = _to_ijv(ss, row_levels=row_levels, column_levels=column_levels, sort_labels=sort_labels) sparse_matrix = scipy.sparse.coo_matrix( (v, (i, j)), shape=(len(rows), len(columns))) return sparse_matrix, rows, columns
def _create_missing_idx(nrows, ncols, density, random_state=None): if random_state is None: random_state = np.random else: random_state = np.random.RandomState(random_state) # below is cribbed from scipy.sparse size = int(np.round((1 - density) * nrows * ncols)) # generate a few more to ensure unique values min_rows = 5 fac = 1.02 extra_size = min(size + min_rows, fac * size) def _gen_unique_rand(rng, _extra_size): ind = rng.rand(int(_extra_size)) return np.unique(np.floor(ind * nrows * ncols))[:size] ind = _gen_unique_rand(random_state, extra_size) while ind.size < size: extra_size *= 1.05 ind = _gen_unique_rand(random_state, extra_size) j = np.floor(ind * 1. / nrows).astype(int) i = (ind - j * nrows).astype(int) return i.tolist(), j.tolist()
def jw_number_restrict_operator(operator, n_electrons, n_qubits=None): """Restrict a Jordan-Wigner encoded operator to a given particle number Args: sparse_operator(ndarray or sparse): Numpy operator acting on the space of n_qubits. n_electrons(int): Number of particles to restrict the operator to n_qubits(int): Number of qubits defining the total state Returns: new_operator(ndarray or sparse): Numpy operator restricted to acting on states with the same particle number. """ if n_qubits is None: n_qubits = int(numpy.log2(operator.shape[0])) select_indices = jw_number_indices(n_electrons, n_qubits) return operator[numpy.ix_(select_indices, select_indices)]
def get_ground_state(sparse_operator): """Compute lowest eigenvalue and eigenstate. Returns: eigenvalue: The lowest eigenvalue, a float. eigenstate: The lowest eigenstate in scipy.sparse csc format. """ if not is_hermitian(sparse_operator): raise ValueError('sparse_operator must be Hermitian.') values, vectors = scipy.sparse.linalg.eigsh( sparse_operator, 2, which='SA', maxiter=1e7) eigenstate = scipy.sparse.csc_matrix(vectors[:, 0]) eigenvalue = values[0] return eigenvalue, eigenstate.getH()
def _getCscMatrix(self):#compressed sparse column matrix if self._cscMatrix is not None: return self._cscMatrix # data and indices are parallel arrays, # data storing values (ie tf*idf) and indices storing values num_nnz, data, indices, indptr = 0, [], [], [0] for termVector in self._termVectors: newIndices = [i for i in termVector[1].keys()] newValues = [v for v in termVector[1].values()] indices.extend(newIndices) data.extend(newValues) num_nnz += len(newValues) indptr.append(num_nnz) data = numpy.asarray(data) indices = numpy.asarray(indices) # compressed sparse column matrix # Rows terms, column docs # # doc1 doc2 doc3 # 'the' 1 1 1 # 'cat' 1 0 2 self._cscMatrix = scipy.sparse.csc_matrix((data, indices, indptr), shape=(self.numTerms, self.numDocs)) return self._cscMatrix
def svd_recommender(self): userid, itemid, contextid, values = self.fields test_idx = (self.test.testset[userid].values, self.test.testset[itemid].values) if contextid: #TODO: refactor it! need to think about dependence on self.arrange_by and contextid #values are contextualized already test_val = self.test.testset[values].values else: test_val = self.test.testset[values].values v = self._items_factors test_shp = (self.test.testset[userid].max()+1, v.shape[1]) test_matrix = sp.sparse.coo_matrix((test_val, test_idx), shape=test_shp, dtype=np.float64).tocsr() svd_scores = (test_matrix.dot(v.T)).dot(v) return svd_scores
def _matvec(self, x_vec): ''' dot operation provided for scipy.sparse.linalg wrapper of self.forward() ''' x2 = numpy.reshape(x_vec, self.st['Nd'], order='F') return self.forward(x2)
def _matvec(self, x_vec): """ (To be tested): dot operation provided for scipy.sparse.linalg wrapper of self.forward() """ x2 = numpy.reshape(x_vec, self.Nd, order='F') return self.forward(x2)
def asmatrix(self): """ Return the sparse matrix representation of the separable filter. """ h_matrix = NP.array([1]) for i in range(self.ndim): if self.mode == 'circ': h_i = Convmtx([self.k[i]], self.h_list[i], mode=self.mode) else: h_i = Convmtx([self.n[i]], self.h_list[i], mode=self.mode) h_matrix = scipy.sparse.kron(h_matrix, h_i) return h_matrix