Python scipy 模块,sparse() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.sparse()

项目:probabilistic-matrix-factorization    作者:aki-nishimura    | 项目源码 | 文件源码
def __init__(self, y_coo, num_factor, bias_scale, factor_scale, weight=None):

        if weight is None:
            weight = np.ones(y_coo.data.size)

        self.y_coo = y_coo
        self.y_csr = scipy.sparse.csr_matrix(y_coo)
        self.y_csc = scipy.sparse.csc_matrix(y_coo)
        self.num_factor = num_factor
        self.prior_param = {
            'col_bias_scale': bias_scale,
            'row_bias_scale': bias_scale,
            'factor_scale': np.tile(factor_scale, self.num_factor),
            'weight': weight,
            'obs_df': float('inf'),
            'param_df': float('inf'),
        }
项目:pynufft    作者:jyhmiinlin    | 项目源码 | 文件源码
def _linear_phase(self, n_shift):
        """
        Private: Select the center of FOV
        """
        om = self.st['om']
        M = self.st['M']
        final_shifts = tuple(
            numpy.array(n_shift) +
            numpy.array(self.st['Nd']) / 2)

        phase = numpy.exp(
            1.0j *
            numpy.sum(
                om * numpy.tile(
                    final_shifts,
                    (M,1)),
                1))
        # add-up all the linear phasees in all axes,

        self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])
项目:pynufft    作者:jyhmiinlin    | 项目源码 | 文件源码
def precompute(self):

#         CSR_W = cuda_cffi.cusparse.CSR.to_CSR(self.st['W_gpu'],diag_type=True)

#         Dia_W_cpu = scipy.sparse.dia_matrix( (self.st['M'], self.st['M']),dtype=dtype)
#         Dia_W_cpu = scipy.sparse.dia_matrix( ( self.st['W'], 0 ), shape=(self.st['M'], self.st['M']) )
#         Dia_W_cpu = scipy.sparse.diags(self.st['W'], format="csr", dtype=dtype)
#         CSR_W = cuda_cffi.cusparse.CSR.to_CSR(Dia_W_cpu)


        self.st['pHp_gpu'] = self.CSRH.gemm(self.CSR)
        self.st['pHp']=self.st['pHp_gpu'].get()
        print('untrimmed',self.st['pHp'].nnz)
        self.truncate_selfadjoint(1e-5)
        print('trimmed', self.st['pHp'].nnz)
        self.st['pHp_gpu'] = cuda_cffi.cusparse.CSR.to_CSR(self.st['pHp'])
#         self.st['pHWp_gpu'] = self.CSR.conj().gemm(CSR_W,transA=cuda_cffi.cusparse.CUSPARSE_OPERATION_TRANSPOSE)
#         self.st['pHWp_gpu'] = self.st['pHWp_gpu'].gemm(self.CSR, transA=cuda_cffi.cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE)
项目:pynufft    作者:jyhmiinlin    | 项目源码 | 文件源码
def linear_phase(self, n_shift):
        '''
        Select the center of FOV
        '''
        om = self.st['om']
        M = self.st['M']
        final_shifts = tuple(
            numpy.array(n_shift) +
            numpy.array(self.st['Nd']) / 2)

        phase = numpy.exp(
            1.0j *
            numpy.sum(
                om * numpy.tile(
                    final_shifts,
                    (M,1)),
                1))
        # add-up all the linear phasees in all axes,

        self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])
        # multiply the diagonal, linear phase before the gridding matrix
项目:pynufft    作者:jyhmiinlin    | 项目源码 | 文件源码
def finalization(self):
        '''
        Add sparse matrix multiplication on GPU
        Note: use "python-cuda-cffi" generated interface to access cusparse

        '''
        self.gpu_flag = 0

        self.CSR = cuda_cffi.cusparse.CSR.to_CSR(self.st['p'].astype(dtype), )
        self.CSRH = cuda_cffi.cusparse.CSR.to_CSR(self.st['p'].getH().tocsr().astype(dtype), )

        self.scikit_plan = cu_fft.Plan(self.st['Kd'], dtype, dtype)
#         self.pHp = cuda_cffi.cusparse.CSR.to_CSR(
#             self.st['pHp'].astype(dtype))

        self.gpu_flag = 1
        self.sn_gpu = pycuda.gpuarray.to_gpu(self.sn.astype(dtype))
#         tmp_array = skcuda.misc.ones((numpy.prod(self.st['Kd']),1),dtype=dtype)
#         tmp = cuda_cffi.cusolver.csrlsvqr(self.CSR, tmp_array)
项目:pynufft    作者:jyhmiinlin    | 项目源码 | 文件源码
def _linear_phase(self, n_shift):
        """
        Private: Select the center of FOV
        """
        om = self.st['om']
        M = self.st['M']
        final_shifts = tuple(
            numpy.array(n_shift) +
            numpy.array(
                self.Nd) /
            2)
        phase = numpy.exp(
            1.0j *
            numpy.sum(
                om *
                numpy.tile(
                    final_shifts,
                    (M,
                     1)),
                1))
        # add-up all the linear phasees in all axes,

        self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])
        return 0  # shifted sparse matrix
项目:pynufft    作者:jyhmiinlin    | 项目源码 | 文件源码
def _linear_phase(self, n_shift):
        """
        Private: Select the center of FOV
        """
        om = self.st['om']
        M = self.st['M']
        final_shifts = tuple(
            numpy.array(n_shift) +
            numpy.array(self.st['Nd']) / 2)

        phase = numpy.exp(
            1.0j *
            numpy.sum(
                om * numpy.tile(
                    final_shifts,
                    (M,1)),
                1))
        # add-up all the linear phasees in all axes,

        self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])
项目:probabilistic-matrix-factorization    作者:aki-nishimura    | 项目源码 | 文件源码
def prepare_matrix(val, row_var, col_var):
        # Takes a vector of observed values and two categorical variables
        # and returns a sparse matrix in coo format that can be used to
        # instantiate the class. Also returned are dictionaries that maps the
        # row and column categories to indices of a matrix
        #
        # Params:
        # val, row_var, col_var: numpy arrays

        row_id = row_var.unique()
        col_id = col_var.unique()
        nrow = row_id.size
        ncol = col_id.size

        # Associate each of the unique id names to a row and column index.
        row_id_map = {row_id[index]: index for index in range(len(row_id))}
        col_id_map = {col_id[index]: index for index in range(len(col_id))}

        row_indices = np.array([row_id_map[id] for id in row_var])
        col_indices = np.array([col_id_map[id] for id in col_var])
        y_coo = scipy.sparse.coo_matrix((val, (row_indices, col_indices)), shape=(nrow, ncol))

        return y_coo, row_id_map, col_id_map
项目:MIT-Thesis    作者:alec-heif    | 项目源码 | 文件源码
def sparse(size, *args):
        """
        Create a sparse vector, using either a dictionary, a list of
        (index, value) pairs, or two separate arrays of indices and
        values (sorted by index).

        :param size: Size of the vector.
        :param args: Non-zero entries, as a dictionary, list of tuples,
                     or two sorted lists containing indices and values.

        >>> Vectors.sparse(4, {1: 1.0, 3: 5.5})
        SparseVector(4, {1: 1.0, 3: 5.5})
        >>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)])
        SparseVector(4, {1: 1.0, 3: 5.5})
        >>> Vectors.sparse(4, [1, 3], [1.0, 5.5])
        SparseVector(4, {1: 1.0, 3: 5.5})
        """
        return SparseVector(size, *args)
项目:MIT-Thesis    作者:alec-heif    | 项目源码 | 文件源码
def _equals(v1_indices, v1_values, v2_indices, v2_values):
        """
        Check equality between sparse/dense vectors,
        v1_indices and v2_indices assume to be strictly increasing.
        """
        v1_size = len(v1_values)
        v2_size = len(v2_values)
        k1 = 0
        k2 = 0
        all_equal = True
        while all_equal:
            while k1 < v1_size and v1_values[k1] == 0:
                k1 += 1
            while k2 < v2_size and v2_values[k2] == 0:
                k2 += 1

            if k1 >= v1_size or k2 >= v2_size:
                return k1 >= v1_size and k2 >= v2_size

            all_equal = v1_indices[k1] == v2_indices[k2] and v1_values[k1] == v2_values[k2]
            k1 += 1
            k2 += 1
        return all_equal
项目:MIT-Thesis    作者:alec-heif    | 项目源码 | 文件源码
def test_ml_mllib_vector_conversion(self):
        # to ml
        # dense
        mllibDV = Vectors.dense([1, 2, 3])
        mlDV1 = newlinalg.Vectors.dense([1, 2, 3])
        mlDV2 = mllibDV.asML()
        self.assertEqual(mlDV2, mlDV1)
        # sparse
        mllibSV = Vectors.sparse(4, {1: 1.0, 3: 5.5})
        mlSV1 = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5})
        mlSV2 = mllibSV.asML()
        self.assertEqual(mlSV2, mlSV1)
        # from ml
        # dense
        mllibDV1 = Vectors.dense([1, 2, 3])
        mlDV = newlinalg.Vectors.dense([1, 2, 3])
        mllibDV2 = Vectors.fromML(mlDV)
        self.assertEqual(mllibDV1, mllibDV2)
        # sparse
        mllibSV1 = Vectors.sparse(4, {1: 1.0, 3: 5.5})
        mlSV = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5})
        mllibSV2 = Vectors.fromML(mlSV)
        self.assertEqual(mllibSV1, mllibSV2)
项目:MIT-Thesis    作者:alec-heif    | 项目源码 | 文件源码
def test_serialize(self):
        from scipy.sparse import lil_matrix
        lil = lil_matrix((4, 1))
        lil[1, 0] = 1
        lil[3, 0] = 2
        sv = SparseVector(4, {1: 1, 3: 2})
        self.assertEqual(sv, _convert_to_vector(lil))
        self.assertEqual(sv, _convert_to_vector(lil.tocsc()))
        self.assertEqual(sv, _convert_to_vector(lil.tocoo()))
        self.assertEqual(sv, _convert_to_vector(lil.tocsr()))
        self.assertEqual(sv, _convert_to_vector(lil.todok()))

        def serialize(l):
            return ser.loads(ser.dumps(_convert_to_vector(l)))
        self.assertEqual(sv, serialize(lil))
        self.assertEqual(sv, serialize(lil.tocsc()))
        self.assertEqual(sv, serialize(lil.tocsr()))
        self.assertEqual(sv, serialize(lil.todok()))
项目:MIT-Thesis    作者:alec-heif    | 项目源码 | 文件源码
def sparse(size, *args):
        """
        Create a sparse vector, using either a dictionary, a list of
        (index, value) pairs, or two separate arrays of indices and
        values (sorted by index).

        :param size: Size of the vector.
        :param args: Non-zero entries, as a dictionary, list of tuples,
                     or two sorted lists containing indices and values.

        >>> Vectors.sparse(4, {1: 1.0, 3: 5.5})
        SparseVector(4, {1: 1.0, 3: 5.5})
        >>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)])
        SparseVector(4, {1: 1.0, 3: 5.5})
        >>> Vectors.sparse(4, [1, 3], [1.0, 5.5])
        SparseVector(4, {1: 1.0, 3: 5.5})
        """
        return SparseVector(size, *args)
项目:MIT-Thesis    作者:alec-heif    | 项目源码 | 文件源码
def _equals(v1_indices, v1_values, v2_indices, v2_values):
        """
        Check equality between sparse/dense vectors,
        v1_indices and v2_indices assume to be strictly increasing.
        """
        v1_size = len(v1_values)
        v2_size = len(v2_values)
        k1 = 0
        k2 = 0
        all_equal = True
        while all_equal:
            while k1 < v1_size and v1_values[k1] == 0:
                k1 += 1
            while k2 < v2_size and v2_values[k2] == 0:
                k2 += 1

            if k1 >= v1_size or k2 >= v2_size:
                return k1 >= v1_size and k2 >= v2_size

            all_equal = v1_indices[k1] == v2_indices[k2] and v1_values[k1] == v2_values[k2]
            k1 += 1
            k2 += 1
        return all_equal
项目:cupy    作者:cupy    | 项目源码 | 文件源码
def get(self, stream=None):
        """Returns a copy of the array on host memory.

        Args:
            stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
                copy runs asynchronously. Otherwise, the copy is synchronous.

        Returns:
            scipy.sparse.coo_matrix: Copy of the array on host memory.

        """
        if not _scipy_available:
            raise RuntimeError('scipy is not available')

        data = self.data.get(stream)
        row = self.row.get(stream)
        col = self.col.get(stream)
        return scipy.sparse.coo_matrix(
            (data, (row, col)), shape=self.shape)
项目:cupy    作者:cupy    | 项目源码 | 文件源码
def transpose(self, axes=None, copy=False):
        """Returns a transpose matrix.

        Args:
            axes: This option is not supported.
            copy (bool): If ``True``, a returned matrix shares no data.
                Otherwise, it shared data arrays as much as possible.

        Returns:
            cupy.sparse.spmatrix: Transpose matrix.

        """
        if axes is not None:
            raise ValueError(
                'Sparse matrices do not support an \'axes\' parameter because '
                'swapping dimensions is the only logical permutation.')
        shape = self.shape[1], self.shape[0]
        return coo_matrix(
            (self.data, (self.col, self.row)), shape=shape, copy=copy)
项目:cupy    作者:cupy    | 项目源码 | 文件源码
def get(self, stream=None):
        """Returns a copy of the array on host memory.

        .. warning::
           You need to install SciPy to use this method.

        Args:
            stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
                copy runs asynchronously. Otherwise, the copy is synchronous.

        Returns:
            scipy.sparse.csc_matrix: Copy of the array on host memory.

        """
        if not _scipy_available:
            raise RuntimeError('scipy is not available')
        data = self.data.get(stream)
        indices = self.indices.get(stream)
        indptr = self.indptr.get(stream)
        return scipy.sparse.csc_matrix(
            (data, indices, indptr), shape=self._shape)
项目:cupy    作者:cupy    | 项目源码 | 文件源码
def tocsr(self, copy=False):
        """Converts the matrix to Compressed Sparse Row format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in csr to csc conversion.

        Returns:
            cupy.sparse.csr_matrix: Converted matrix.

        """
        return self.T.tocsc(copy=False).T

    # TODO(unno): Implement todia
    # TODO(unno): Implement todok
    # TODO(unno): Implement tolil
项目:cupy    作者:cupy    | 项目源码 | 文件源码
def transpose(self, axes=None, copy=False):
        """Returns a transpose matrix.

        Args:
            axes: This option is not supported.
            copy (bool): If ``True``, a returned matrix shares no data.
                Otherwise, it shared data arrays as much as possible.

        Returns:
            cupy.sparse.spmatrix: Transpose matrix.

        """
        if axes is not None:
            raise ValueError(
                'Sparse matrices do not support an \'axes\' parameter because '
                'swapping dimensions is the only logical permutation.')

        shape = self.shape[1], self.shape[0]
        return cupy.sparse.csr_matrix(
            (self.data, self.indices, self.indptr), shape=shape, copy=copy)
项目:cupy    作者:cupy    | 项目源码 | 文件源码
def get(self, stream=None):
        """Returns a copy of the array on host memory.

        Args:
            stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
                copy runs asynchronously. Otherwise, the copy is synchronous.

        Returns:
            scipy.sparse.csr_matrix: Copy of the array on host memory.

        """
        if not _scipy_available:
            raise RuntimeError('scipy is not available')
        data = self.data.get(stream)
        indices = self.indices.get(stream)
        indptr = self.indptr.get(stream)
        return scipy.sparse.csr_matrix(
            (data, indices, indptr), shape=self._shape)
项目:cupy    作者:cupy    | 项目源码 | 文件源码
def tocoo(self, copy=False):
        """Converts the matrix to COOdinate format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible.

        Returns:
            cupy.sparse.coo_matrix: Converted matrix.

        """
        if copy:
            data = self.data.copy()
            indices = self.indices.copy()
        else:
            data = self.data
            indices = self.indices

        return cusparse.csr2coo(self, data, indices)
项目:slitSpectrographBlind    作者:aasensio    | 项目源码 | 文件源码
def comp_ola_sdeconv(gx_gpu, gy_gpu, xx_gpu, xy_gpu, Ftpy_gpu, f_gpu, L_gpu, alpha, beta, gamma=0):
    """
    Computes the division in Fourier space needed for sparse deconvolution
    """

    sfft = xx_gpu.shape
    block_size = (16,16,1)   
    grid_size = (int(np.ceil(np.float32(sfft[0]*sfft[1])/block_size[0])),
                 int(np.ceil(np.float32(sfft[2])/block_size[1])))

    mod = cu.module_from_buffer(cubin)
    comp_ola_sdeconv_Kernel = mod.get_function("comp_ola_sdeconv_Kernel")

    z_gpu = cua.zeros(sfft, np.complex64)

    comp_ola_sdeconv_Kernel(z_gpu.gpudata,
                            np.int32(sfft[0]), np.int32(sfft[1]), np.int32(sfft[2]),
                            gx_gpu.gpudata, gy_gpu.gpudata,
                            xx_gpu.gpudata, xy_gpu.gpudata, 
                            Ftpy_gpu.gpudata, f_gpu.gpudata, L_gpu.gpudata,
                            np.float32(alpha), np.float32(beta),
                            np.float32(gamma),
                            block=block_size, grid=grid_size)

    return z_gpu
项目:paragraph2vec    作者:thunlp    | 项目源码 | 文件源码
def iter_chunks(self, chunksize=None):
        """
        Iteratively yield the index as chunks of documents, each of size <= chunksize.

        The chunk is returned in its raw form (matrix or sparse matrix slice).
        The size of the chunk may be smaller than requested; it is up to the caller
        to check the result for real length, using `chunk.shape[0]`.
        """
        self.close_shard()

        if chunksize is None:
            # if not explicitly specified, use the chunksize from the constructor
            chunksize = self.chunksize

        for shard in self.shards:
            query = shard.get_index().index
            for chunk_start in xrange(0, query.shape[0], chunksize):
                # scipy.sparse doesn't allow slicing beyond real size of the matrix
                # (unlike numpy). so, clip the end of the chunk explicitly to make
                # scipy.sparse happy
                chunk_end = min(query.shape[0], chunk_start + chunksize)
                chunk = query[chunk_start: chunk_end] # create a view
                yield chunk
项目:smrt    作者:smrt-model    | 项目源码 | 文件源码
def extend_2pol_npol(x, npol):

    if npol == 2:
        return x

    if scipy.sparse.isspmatrix_dia(x):
        y = scipy.sparse.diags(extend_2pol_npol(x.diagonal(), npol))
    elif len(x.shape) == 1:
        y = np.zeros(len(x)//2*npol)
        y[0::npol] = x[0::2]
        y[1::npol] = x[1::2]
    elif len(x.shape) == 2:
        y = np.zeros((x.shape[0]//2*npol, x.shape[1]//2*npol))
        y[0::npol, 0::npol] = x[0::2, 0::2]
        y[0::npol, 1::npol] = x[0::2, 1::2]
        y[1::npol, 0::npol] = x[1::2, 0::2]
        y[1::npol, 1::npol] = x[1::2, 1::2]
    else:
        raise SMRTError("should never be here")

    return y
项目:polara    作者:Evfro    | 项目源码 | 文件源码
def csc_matvec(mat_csc, vec, dense_output=True, dtype=None):
    v_nnz = vec.indices
    v_val = vec.data

    m_val = mat_csc.data
    m_ind = mat_csc.indices
    m_ptr = mat_csc.indptr

    res_dtype = dtype or np.result_type(mat_csc.dtype, vec.dtype)
    if dense_output:
        res = np.zeros((mat_csc.shape[0],), dtype=res_dtype)
        matvec2dense(m_ptr, m_ind, m_val, v_nnz, v_val, res)
    else:
        sizes = m_ptr.take(v_nnz+1) - m_ptr.take(v_nnz)
        sizes = np.concatenate(([0], np.cumsum(sizes)))
        n = sizes[-1]
        data = np.empty((n,), dtype=res_dtype)
        indices = np.empty((n,), dtype=np.intp)
        indptr = np.array([0, n], dtype=np.intp)
        matvec2sparse(m_ptr, m_ind, m_val, v_nnz, v_val, sizes, indices, data)
        res = sp.sparse.csr_matrix((data, indices, indptr), shape=(1, mat_csc.shape[0]), dtype=res_dtype)
        res.sum_duplicates() # expensive operation
    return res
项目:polara    作者:Evfro    | 项目源码 | 文件源码
def _sparse_dot(self, tst_mat, i2i_mat):
    # scipy always returns sparse result, even if dot product is actually dense
    # this function offers solution to this problem
    # it also takes care on sparse result w.r.t. to further processing
        if self.dense_output: # calculate dense result directly
        # TODO implement matmat multiplication instead of iteration with matvec
            res_type = np.result_type(i2i_mat.dtype, tst_mat.dtype)
            scores = np.empty((tst_mat.shape[0], i2i_mat.shape[1]), dtype=res_type)
            for i in xrange(tst_mat.shape[0]):
                v = tst_mat.getrow(i)
                scores[i, :] = csc_matvec(i2i_mat, v, dense_output=True, dtype=res_type)
        else:
            scores = tst_mat.dot(i2i_mat.T)
            # NOTE even though not neccessary for symmetric i2i matrix,
            # transpose helps to avoid expensive conversion to CSR (performed by scipy)
            if scores.nnz > NNZ_MAX:
                # too many nnz lead to undesired memory overhead in downvote_seen_items
                scores = scores.toarray(order='C')
        return scores
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def fit(self, X, y):
        import scipy.sparse
        import sklearn.feature_selection

        self.preprocessor = sklearn.feature_selection.SelectPercentile(
            score_func=self.score_func,
                percentile=self.percentile)

        # Because the pipeline guarantees that each feature is positive,
        # clip all values below zero to zero
        if self.score_func == sklearn.feature_selection.chi2:
            if scipy.sparse.issparse(X):
                X.data[X.data<0] = 0.0
            else:
                X[X<0] = 0.0

        self.preprocessor.fit(X, y)
        return self
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def transform(self, X):
        import scipy.sparse
        import sklearn.feature_selection

        # Because the pipeline guarantees that each feature is positive,
        # clip all values below zero to zero
        if self.score_func == sklearn.feature_selection.chi2:
            if scipy.sparse.issparse(X):
                X.data[X.data < 0] = 0.0
            else:
                X[X < 0] = 0.0

        if self.preprocessor is None:
            raise NotImplementedError()
        Xt = self.preprocessor.transform(X)
        if Xt.shape[1] == 0:
            raise ValueError(
                "%s removed all features." % self.__class__.__name__)
        return Xt
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def get_hyperparameter_search_space(dataset_properties=None):
        percentile = UniformFloatHyperparameter(
            name="percentile", lower=1, upper=99, default=50)

        score_func = CategoricalHyperparameter(
            name="score_func", choices=["chi2", "f_classif"], default="chi2")
        if dataset_properties is not None:
            # Chi2 can handle sparse data, so we respect this
            if 'is_sparse' in dataset_properties and dataset_properties['is_sparse']:
                score_func = Constant(
                    name="score_func", value="chi2")

        cs = ConfigurationSpace()
        cs.add_hyperparameter(percentile)
        cs.add_hyperparameter(score_func)

        return cs
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def fit(self, X, y):
        import scipy.sparse
        import sklearn.feature_selection

        self.preprocessor = sklearn.feature_selection.GenericUnivariateSelect(
            score_func=self.score_func, param=self.alpha, mode=self.mode)

        # Because the pipeline guarantees that each feature is positive,
        # clip all values below zero to zero
        if self.score_func == sklearn.feature_selection.chi2:
            if scipy.sparse.issparse(X):
                X.data[X.data < 0] = 0.0
            else:
                X[X < 0] = 0.0

        self.preprocessor.fit(X, y)
        return self
项目:GVIN    作者:sufengniu    | 项目源码 | 文件源码
def theta_matrix(coord, adj, preload=True, train=True):
    print "creating adjacent theta matrix ..."
    if preload is True:
        if train is True:
            theta_matrix = np.load('../data/theta_matrix_train_n_100.npy')
        else:
            theta_matrix = np.load('../data/theta_matrix_test_n_100.npy')
    else:
        theta_matrix = []
        for i in tqdm(range(coord.shape[0])):
            for j in range(coord.shape[1]):
                theta_row = angle(coord[i,adj[i][j].nonzero()[1],:] - coord[i,j,:])
                col_indice = adj[i][j].nonzero()[1]
                row_indice = (np.zeros(col_indice.shape[0])).astype(int32)
                if j == 0:
                    theta_matrix_tmp = csc_matrix((theta_row, (row_indice, col_indice)), shape=(1,coord.shape[1]))
                else:
                    theta_matrix_tmp = scipy.sparse.vstack((theta_matrix_tmp, csc_matrix((theta_row, (row_indice, col_indice)), shape=(1,coord.shape[1]))))
            theta_matrix.append(theta_matrix_tmp)
        theta_matrix = np.array(theta_matrix)
    return theta_matrix
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def iter_chunks(self, chunksize=None):
        """
        Iteratively yield the index as chunks of documents, each of size <= chunksize.

        The chunk is returned in its raw form (matrix or sparse matrix slice).
        The size of the chunk may be smaller than requested; it is up to the caller
        to check the result for real length, using `chunk.shape[0]`.
        """
        self.close_shard()

        if chunksize is None:
            # if not explicitly specified, use the chunksize from the constructor
            chunksize = self.chunksize

        for shard in self.shards:
            query = shard.get_index().index
            for chunk_start in xrange(0, query.shape[0], chunksize):
                # scipy.sparse doesn't allow slicing beyond real size of the matrix
                # (unlike numpy). so, clip the end of the chunk explicitly to make
                # scipy.sparse happy
                chunk_end = min(query.shape[0], chunk_start + chunksize)
                chunk = query[chunk_start: chunk_end] # create a view
                yield chunk
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def close_shard(self):
        """
        Force the latest shard to close (be converted to a matrix and stored
        to disk). Do nothing if no new documents added since last call.

        **NOTE**: the shard is closed even if it is not full yet (its size is smaller
        than `self.shardsize`). If documents are added later via `add_documents()`,
        this incomplete shard will be loaded again and completed.
        """
        if not self.fresh_docs:
            return
        shardid = len(self.shards)
        # consider the shard sparse if its density is < 30%
        issparse = 0.3 > 1.0 * self.fresh_nnz / (len(self.fresh_docs) * self.num_features)
        if issparse:
            index = SparseMatrixSimilarity(self.fresh_docs, num_terms=self.num_features,
                                           num_docs=len(self.fresh_docs), num_nnz=self.fresh_nnz)
        else:
            index = MatrixSimilarity(self.fresh_docs, num_features=self.num_features)
        logger.info("creating %s shard #%s" % ('sparse' if issparse else 'dense', shardid))
        shard = Shard(self.shardid2filename(shardid), index)
        shard.num_best = self.num_best
        shard.num_nnz = self.fresh_nnz
        self.shards.append(shard)
        self.fresh_docs, self.fresh_nnz = [], 0
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def close_shard(self):
        """
        Force the latest shard to close (be converted to a matrix and stored
        to disk). Do nothing if no new documents added since last call.

        **NOTE**: the shard is closed even if it is not full yet (its size is smaller
        than `self.shardsize`). If documents are added later via `add_documents()`,
        this incomplete shard will be loaded again and completed.
        """
        if not self.fresh_docs:
            return
        shardid = len(self.shards)
        # consider the shard sparse if its density is < 30%
        issparse = 0.3 > 1.0 * self.fresh_nnz / (len(self.fresh_docs) * self.num_features)
        if issparse:
            index = SparseMatrixSimilarity(self.fresh_docs, num_terms=self.num_features,
                                           num_docs=len(self.fresh_docs), num_nnz=self.fresh_nnz)
        else:
            index = MatrixSimilarity(self.fresh_docs, num_features=self.num_features)
        logger.info("creating %s shard #%s" % ('sparse' if issparse else 'dense', shardid))
        shard = Shard(self.shardid2filename(shardid), index)
        shard.num_best = self.num_best
        shard.num_nnz = self.fresh_nnz
        self.shards.append(shard)
        self.fresh_docs, self.fresh_nnz = [], 0
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def iter_chunks(self, chunksize=None):
        """
        Iteratively yield the index as chunks of documents, each of size <= chunksize.

        The chunk is returned in its raw form (matrix or sparse matrix slice).
        The size of the chunk may be smaller than requested; it is up to the caller
        to check the result for real length, using `chunk.shape[0]`.
        """
        self.close_shard()

        if chunksize is None:
            # if not explicitly specified, use the chunksize from the constructor
            chunksize = self.chunksize

        for shard in self.shards:
            query = shard.get_index().index
            for chunk_start in xrange(0, query.shape[0], chunksize):
                # scipy.sparse doesn't allow slicing beyond real size of the matrix
                # (unlike numpy). so, clip the end of the chunk explicitly to make
                # scipy.sparse happy
                chunk_end = min(query.shape[0], chunk_start + chunksize)
                chunk = query[chunk_start: chunk_end] # create a view
                yield chunk
项目:UnsupervisedHypernymy    作者:vered1986    | 项目源码 | 文件源码
def get_contexts_rank(targets, cooc_mat, target_index):
    """
    A dictionary in which each key is a target word and the value is a sorted list of
    context columns in descending order
    :param targets: the words
    :return:
    """
    contexts_rank = {}

    for target in targets:
        index = target_index.get(target, -1)

        if index == -1:
            contexts_rank[target] = []

        row = cooc_mat[index, :]
        contexts_rank[target] = sort_by_value_get_col(scipy.sparse.coo_matrix(row.mat)) # tuples of (row, col, value)

    return contexts_rank
项目:UnsupervisedHypernymy    作者:vered1986    | 项目源码 | 文件源码
def APSyn(x_row, y_row, N):
    """
    APSyn(x, y) = (\sum_{f\epsilon N(f_{x})\bigcap N(f_{y})))} \frac{1}{(rank(f_{x})+rank(f_{y})/2)})
    :param x_row:
    :param y_row:
    :return:
    """

    # Sort y's contexts
    y_contexts_cols = sort_by_value_get_col(scipy.sparse.coo_matrix(y_row.mat)) # tuples of (row, col, value)
    y_contexts_cols = y_contexts_cols[:N]
    y_context_rank = { c : i + 1 for i, c in enumerate(y_contexts_cols) }

    # Sort x's contexts
    x_contexts_cols = sort_by_value_get_col(scipy.sparse.coo_matrix(x_row.mat))
    x_contexts_cols = x_contexts_cols[:N]
    x_context_rank = { c : i + 1 for i, c in enumerate(x_contexts_cols) }

    # Average of 1/(rank(w1)+rank(w2)/2) for every intersected feature among the top N contexts
    intersected_context = set(y_contexts_cols).intersection(set(x_contexts_cols))
    score = sum([1.0 / ((x_context_rank[c] + y_context_rank[c]) / 2.0) for c in intersected_context])
    #score *= (1.0 / N)

    return score
项目:SCaIP    作者:simonsfoundation    | 项目源码 | 文件源码
def make_G_matrix(T,g):
    ''' create matrix of autoregression to enforce indicator dynamics
    Inputs: 
    T: positive integer
        number of time-bins
    g: nd.array, vector p x 1
        Discrete time constants

    Output:
    G: sparse diagonal matrix
        Matrix of autoregression
    '''    
    if type(g) is np.ndarray:    
        if len(g) == 1 and g < 0:
            g=0

#        gs=np.matrix(np.hstack((-np.flipud(g[:]).T,1)))
        gs=np.matrix(np.hstack((1,-(g[:]).T)))
        ones_=np.matrix(np.ones((T,1)))
        G = spdiags((ones_*gs).T,range(0,-len(g)-1,-1),T,T)    

        return G
    else:
        raise Exception('g must be an array')
#%%
项目:document_classification    作者:scotthlee    | 项目源码 | 文件源码
def process(self, df, x_name, y_name=None, ngrams=2, max_features=35000, method='counts', binary=True, sparse=False):
        #choosing the particular flavor of vectorizer
        if method == 'counts':
            vectorizer = CountVectorizer(max_features=max_features, ngram_range=(1, ngrams), decode_error='replace', binary=binary)
        elif method == 'tfidf':
            vectorizer = TfidfVectorizer(max_features=max_features, ngram_range=(1, ngrams), decode_error='replace')

        #fitting the vectorizer and converting the counts to an array
        full_fit = vectorizer.fit_transform(df[x_name])
        full_counts = full_fit.toarray()
        self.vocabulary_ = vectorizer.vocabulary_

        #passing the attributes up to the class instance
        self.data = df
        if sparse:
            full_counts = csr_matrix(full_counts)
        self.X = full_counts
        if y_name != None:
            self.y = np.array(df[y_name])
        return

    #splits the data into training and test sets; either called from process()
    #or on its own when your text is already vectorized and divided into x and y
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def _sparse_series_to_coo(ss, row_levels=(0, ), column_levels=(1, ),
                          sort_labels=False):
    """ Convert a SparseSeries to a scipy.sparse.coo_matrix using index
    levels row_levels, column_levels as the row and column
    labels respectively. Returns the sparse_matrix, row and column labels.
    """

    import scipy.sparse

    if ss.index.nlevels < 2:
        raise ValueError('to_coo requires MultiIndex with nlevels > 2')
    if not ss.index.is_unique:
        raise ValueError('Duplicate index entries are not allowed in to_coo '
                         'transformation.')

    # to keep things simple, only rely on integer indexing (not labels)
    row_levels = [ss.index._get_level_number(x) for x in row_levels]
    column_levels = [ss.index._get_level_number(x) for x in column_levels]

    v, i, j, rows, columns = _to_ijv(ss, row_levels=row_levels,
                                     column_levels=column_levels,
                                     sort_labels=sort_labels)
    sparse_matrix = scipy.sparse.coo_matrix(
        (v, (i, j)), shape=(len(rows), len(columns)))
    return sparse_matrix, rows, columns
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def _create_missing_idx(nrows, ncols, density, random_state=None):
    if random_state is None:
        random_state = np.random
    else:
        random_state = np.random.RandomState(random_state)

    # below is cribbed from scipy.sparse
    size = int(np.round((1 - density) * nrows * ncols))
    # generate a few more to ensure unique values
    min_rows = 5
    fac = 1.02
    extra_size = min(size + min_rows, fac * size)

    def _gen_unique_rand(rng, _extra_size):
        ind = rng.rand(int(_extra_size))
        return np.unique(np.floor(ind * nrows * ncols))[:size]

    ind = _gen_unique_rand(random_state, extra_size)
    while ind.size < size:
        extra_size *= 1.05
        ind = _gen_unique_rand(random_state, extra_size)

    j = np.floor(ind * 1. / nrows).astype(int)
    i = (ind - j * nrows).astype(int)
    return i.tolist(), j.tolist()
项目:FermiLib    作者:ProjectQ-Framework    | 项目源码 | 文件源码
def jw_number_restrict_operator(operator, n_electrons, n_qubits=None):
    """Restrict a Jordan-Wigner encoded operator to a given particle number

    Args:
        sparse_operator(ndarray or sparse): Numpy operator acting on
            the space of n_qubits.
        n_electrons(int): Number of particles to restrict the operator to
        n_qubits(int): Number of qubits defining the total state

    Returns:
        new_operator(ndarray or sparse): Numpy operator restricted to
            acting on states with the same particle number.
    """
    if n_qubits is None:
        n_qubits = int(numpy.log2(operator.shape[0]))

    select_indices = jw_number_indices(n_electrons, n_qubits)
    return operator[numpy.ix_(select_indices, select_indices)]
项目:FermiLib    作者:ProjectQ-Framework    | 项目源码 | 文件源码
def get_ground_state(sparse_operator):
    """Compute lowest eigenvalue and eigenstate.

    Returns:
        eigenvalue: The lowest eigenvalue, a float.
        eigenstate: The lowest eigenstate in scipy.sparse csc format.
    """
    if not is_hermitian(sparse_operator):
        raise ValueError('sparse_operator must be Hermitian.')

    values, vectors = scipy.sparse.linalg.eigsh(
        sparse_operator, 2, which='SA', maxiter=1e7)

    eigenstate = scipy.sparse.csc_matrix(vectors[:, 0])
    eigenvalue = values[0]
    return eigenvalue, eigenstate.getH()
项目:nonce2vec    作者:minimalparts    | 项目源码 | 文件源码
def iter_chunks(self, chunksize=None):
        """
        Iteratively yield the index as chunks of documents, each of size <= chunksize.

        The chunk is returned in its raw form (matrix or sparse matrix slice).
        The size of the chunk may be smaller than requested; it is up to the caller
        to check the result for real length, using `chunk.shape[0]`.
        """
        self.close_shard()

        if chunksize is None:
            # if not explicitly specified, use the chunksize from the constructor
            chunksize = self.chunksize

        for shard in self.shards:
            query = shard.get_index().index
            for chunk_start in xrange(0, query.shape[0], chunksize):
                # scipy.sparse doesn't allow slicing beyond real size of the matrix
                # (unlike numpy). so, clip the end of the chunk explicitly to make
                # scipy.sparse happy
                chunk_end = min(query.shape[0], chunk_start + chunksize)
                chunk = query[chunk_start: chunk_end]  # create a view
                yield chunk
项目:lazy-semantic-indexing    作者:o19s    | 项目源码 | 文件源码
def _getCscMatrix(self):#compressed sparse column matrix
        if self._cscMatrix is not None:
            return self._cscMatrix
        # data and indices are parallel arrays,
        # data storing values (ie tf*idf) and indices storing values
        num_nnz, data, indices, indptr = 0, [], [], [0]
        for termVector in self._termVectors:
            newIndices = [i for i in termVector[1].keys()]
            newValues = [v for v in termVector[1].values()]
            indices.extend(newIndices)
            data.extend(newValues)
            num_nnz += len(newValues)
            indptr.append(num_nnz)
        data = numpy.asarray(data)
        indices = numpy.asarray(indices)
        # compressed sparse column matrix
        # Rows terms, column docs
        #
        #        doc1   doc2   doc3
        # 'the'    1      1     1
        # 'cat'    1      0     2
        self._cscMatrix = scipy.sparse.csc_matrix((data, indices, indptr),
                shape=(self.numTerms, self.numDocs))
        return self._cscMatrix
项目:TensorGlue    作者:Evfro    | 项目源码 | 文件源码
def svd_recommender(self):
        userid, itemid, contextid, values = self.fields
        test_idx = (self.test.testset[userid].values,
                    self.test.testset[itemid].values)
        if contextid:
            #TODO: refactor it! need to think about dependence on self.arrange_by and contextid
            #values are contextualized already
            test_val = self.test.testset[values].values
        else:
            test_val = self.test.testset[values].values

        v = self._items_factors
        test_shp = (self.test.testset[userid].max()+1,
                    v.shape[1])

        test_matrix = sp.sparse.coo_matrix((test_val, test_idx),
                                           shape=test_shp,
                                           dtype=np.float64).tocsr()

        svd_scores = (test_matrix.dot(v.T)).dot(v)
        return svd_scores
项目:pyspark    作者:v-v-vishnevskiy    | 项目源码 | 文件源码
def test_serialize(self):
        from scipy.sparse import lil_matrix
        lil = lil_matrix((4, 1))
        lil[1, 0] = 1
        lil[3, 0] = 2
        sv = SparseVector(4, {1: 1, 3: 2})
        self.assertEqual(sv, _convert_to_vector(lil))
        self.assertEqual(sv, _convert_to_vector(lil.tocsc()))
        self.assertEqual(sv, _convert_to_vector(lil.tocoo()))
        self.assertEqual(sv, _convert_to_vector(lil.tocsr()))
        self.assertEqual(sv, _convert_to_vector(lil.todok()))

        def serialize(l):
            return ser.loads(ser.dumps(_convert_to_vector(l)))
        self.assertEqual(sv, serialize(lil))
        self.assertEqual(sv, serialize(lil.tocsc()))
        self.assertEqual(sv, serialize(lil.tocsr()))
        self.assertEqual(sv, serialize(lil.todok()))
项目:pynufft    作者:jyhmiinlin    | 项目源码 | 文件源码
def _matvec(self, x_vec):
        '''
        dot operation provided for scipy.sparse.linalg
        wrapper of self.forward()
        '''

        x2 = numpy.reshape(x_vec, self.st['Nd'], order='F')

        return self.forward(x2)
项目:pynufft    作者:jyhmiinlin    | 项目源码 | 文件源码
def _matvec(self, x_vec):
        """
        (To be tested): dot operation provided for scipy.sparse.linalg
        wrapper of self.forward()
        """

        x2 = numpy.reshape(x_vec, self.Nd, order='F')

        return self.forward(x2)
项目:pyrsss    作者:butala    | 项目源码 | 文件源码
def asmatrix(self):
        """
        Return the sparse matrix representation of the separable filter.
        """
        h_matrix = NP.array([1])
        for i in range(self.ndim):
            if self.mode == 'circ':
                h_i = Convmtx([self.k[i]], self.h_list[i], mode=self.mode)
            else:
                h_i = Convmtx([self.n[i]], self.h_list[i], mode=self.mode)
            h_matrix = scipy.sparse.kron(h_matrix, h_i)
        return h_matrix