Python scipy.fftpack 模块,dct() 实例源码

我们从Python开源项目中,提取了以下36个代码示例,用于说明如何使用scipy.fftpack.dct()

项目:sporco    作者:bwohlberg    | 项目源码 | 文件源码
def dctii(x, axes=None):
    """
    Compute a multi-dimensional DCT-II over specified array axes. This
    function is implemented by calling the one-dimensional DCT-II
    :func:`scipy.fftpack.dct` with normalization mode 'ortho' for each
    of the specified axes.

    Parameters
    ----------
    a : array_like
      Input array
    axes : sequence of ints, optional (default None)
      Axes over which to compute the DCT-II.

    Returns
    -------
    y : ndarray
      DCT-II of input array
    """

    if axes is None:
        axes = list(range(x.ndim))
    for ax in axes:
        x = fftpack.dct(x, type=2, axis=ax, norm='ortho')
    return x
项目:tools    作者:kastnerkyle    | 项目源码 | 文件源码
def run_fft_dct_example():
    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    n_fft = 64
    X = d[0]
    X_stft = stft(X, n_fft)
    X_rr = complex_to_real_view(X_stft)
    X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho')
    X_dct_sub = X_dct[1:] - X_dct[:-1]
    std = X_dct_sub.std(axis=0, keepdims=True)
    X_dct_sub += .01 * std * random_state.randn(
        X_dct_sub.shape[0], X_dct_sub.shape[1])
    X_dct_unsub = np.cumsum(X_dct_sub, axis=0)
    X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho')
    X_irr = real_to_complex_view(X_idct)
    X_r = istft(X_irr, n_fft)[:len(X)]

    SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
    print(SNR)

    wavfile.write("fftdct_orig.wav", fs, soundsc(X))
    wavfile.write("fftdct_rec.wav", fs, soundsc(X_r))
项目:spmpython    作者:mikaem    | 项目源码 | 文件源码
def chebfft(v):
    N = len(v) - 1
    if (N == 0): w = 0;
    x = cos(pi*arange(0,N+1)/N)
    K = fft.fftfreq(2*N, 0.5/N)
    K[N] = 0
    ii = arange(0,N)
    v = hstack([v])
    V = hstack([v, flipud(v[1:N])])
    U = real(fft.fft(V))
    uu = dct(v, 1)
    uv = hstack((uu, uu[1:-1][::-1]))
    W = real(fft.ifft(1j*K*U))
    ww = dst(K*uv, 1)
    from IPython import embed; embed()
    w = zeros(N+1)
    w[1:N] = -W[1:N]/sqrt(1-x[1:N]**2)
    w[0] = sum(ii.T**2*U[0:len(ii)])/float(N) + 0.5*N*U[N]
    w[N] = sum(((-1)**(ii+1))*(ii.T**2)*U[0:len(ii)])/float(N) + 0.5*(-1)**(N+1)*N*U[N]

    return w
项目:ASP    作者:TUIlmenauAMS    | 项目源码 | 文件源码
def DCT4(samples):
    """
        Method to create DCT4 transformation using DCT3

        Arguments   :

            samples : (1D Array) Input samples to be transformed

        Returns     :

            y       :  (1D Array) Transformed output samples

    """

    # Initialize
    samplesup=np.zeros(2*N, dtype = np.float32)
    # Upsample signal:
    samplesup[1::2]=samples

    y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2

    return y[0:N]

#The DST4 transform:
项目:Artificial-Intelligence-with-Python    作者:PacktPublishing    | 项目源码 | 文件源码
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param numcep: the number of cepstrum to return, default 13    
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. 
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """            
    feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
项目:tfkaldi    作者:vrenkens    | 项目源码 | 文件源码
def mfcc(signal, samplerate, conf):
    '''
    Compute MFCC features from an audio signal.

    Args:
        signal: the audio signal from which to compute features. Should be an
            N*1 array
        samplerate: the samplerate of the signal we are working with.
        conf: feature configuration

    Returns:
        A numpy array of size (NUMFRAMES by numcep) containing features. Each
        row holds 1 feature vector, a numpy vector containing the signal
        log-energy
    '''

    feat, energy = fbank(signal, samplerate, conf)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:, :int(conf['numcep'])]
    feat = lifter(feat, float(conf['ceplifter']))
    return feat, numpy.log(energy)
项目:aenet    作者:znaoya    | 项目源码 | 文件源码
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param numcep: the number of cepstrum to return, default 13    
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. 
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """            
    feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
项目:aenet    作者:znaoya    | 项目源码 | 文件源码
def mfccVTLP(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True, alpha=1.0):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return, default 13
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """
    feat,energy = fbankVTLP(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,False,alpha)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
项目:shenfun    作者:spectralDNS    | 项目源码 | 文件源码
def clenshaw_curtis1D(u, quad="GC"):
    assert u.ndim == 1
    N = u.shape[0]
    if quad == 'GL':
        w = np.arange(0, N, 1, dtype=float)
        w[2:] = 2./(1-w[2:]**2)
        w[0] = 1
        w[1::2] = 0
        ak = dct(u, 1)
        ak /= (N-1)
        return np.sqrt(np.sum(ak*w))

    elif quad == 'GC':
        d = np.zeros(N)
        k = 2*(1 + np.arange((N-1)//2))
        d[::2] = (2./N)/np.hstack((1., 1.-k*k))
        w = dct(d, type=3)
        return np.sqrt(np.sum(u*w))
项目:mpiFFT4py    作者:spectralDNS    | 项目源码 | 文件源码
def dct(a, b, type=2, axis=0, **kw):
    if iscomplexobj(a):
        b.real[:] = dct1(a.real, type=type, axis=axis)
        b.imag[:] = dct1(a.imag, type=type, axis=axis)
        return b

    else:
        b[:] = dct1(a, type=type, axis=axis)
        return b

# Define functions taking both input array and output array
项目:Speaker_recognition    作者:Mgajurel    | 项目源码 | 文件源码
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
         nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True,
         winfunc=lambda x:numpy.ones((x,))):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return, default 13
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """

    feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
项目:tools    作者:kastnerkyle    | 项目源码 | 文件源码
def dct_compress(X, n_components, window_size=128):
    """
    Compress using the DCT

    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        The input signal to compress. Should be 1-dimensional

    n_components : int
        The number of DCT components to keep. Setting n_components to about
        .5 * window_size can give compression with fairly good reconstruction.

    window_size : int
        The input X is broken into windows of window_size, each of which are
        then compressed with the DCT.

    Returns
    -------
    X_compressed : ndarray, shape=(num_windows, window_size)
       A 2D array of non-overlapping DCT coefficients. For use with uncompress

    Reference
    ---------
    http://nbviewer.ipython.org/github/craffel/crucialpython/blob/master/week3/stride_tricks.ipynb
    """
    if len(X) % window_size != 0:
        append = np.zeros((window_size - len(X) % window_size))
        X = np.hstack((X, append))
    num_frames = len(X) // window_size
    X_strided = X.reshape((num_frames, window_size))
    X_dct = fftpack.dct(X_strided, norm='ortho')
    if n_components is not None:
        X_dct = X_dct[:, :n_components]
    return X_dct
项目:tools    作者:kastnerkyle    | 项目源码 | 文件源码
def overlap_dct_compress(X, n_components, window_size):
    """
    Overlap (at 50% of window_size) and compress X.

    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        Input signal to compress

    n_components : int
        number of DCT components to keep

    window_size : int
        Size of windows to take

    Returns
    -------
    X_dct : ndarray, shape=(n_windows, n_components)
        Windowed and compressed version of X
    """
    X_strided = halfoverlap(X, window_size)
    X_dct = fftpack.dct(X_strided, norm='ortho')
    if n_components is not None:
        X_dct = X_dct[:, :n_components]
    return X_dct


# Evil voice is caused by adding double the zeros before inverse DCT...
# Very cool bug but makes sense
项目:tools    作者:kastnerkyle    | 项目源码 | 文件源码
def dct_compress(X, n_components, window_size=128):
    """
    Compress using the DCT

    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        The input signal to compress. Should be 1-dimensional

    n_components : int
        The number of DCT components to keep. Setting n_components to about
        .5 * window_size can give compression with fairly good reconstruction.

    window_size : int
        The input X is broken into windows of window_size, each of which are
        then compressed with the DCT.

    Returns
    -------
    X_compressed : ndarray, shape=(num_windows, window_size)
       A 2D array of non-overlapping DCT coefficients. For use with uncompress

    Reference
    ---------
    http://nbviewer.ipython.org/github/craffel/crucialpython/blob/master/week3/stride_tricks.ipynb
    """
    if len(X) % window_size != 0:
        append = np.zeros((window_size - len(X) % window_size))
        X = np.hstack((X, append))
    num_frames = len(X) // window_size
    X_strided = X.reshape((num_frames, window_size))
    X_dct = fftpack.dct(X_strided, norm='ortho')
    if n_components is not None:
        X_dct = X_dct[:, :n_components]
    return X_dct
项目:tools    作者:kastnerkyle    | 项目源码 | 文件源码
def overlap_dct_compress(X, n_components, window_size):
    """
    Overlap (at 50% of window_size) and compress X.

    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        Input signal to compress

    n_components : int
        number of DCT components to keep

    window_size : int
        Size of windows to take

    Returns
    -------
    X_dct : ndarray, shape=(n_windows, n_components)
        Windowed and compressed version of X
    """
    X_strided = halfoverlap(X, window_size)
    X_dct = fftpack.dct(X_strided, norm='ortho')
    if n_components is not None:
        X_dct = X_dct[:, :n_components]
    return X_dct


# Evil voice is caused by adding double the zeros before inverse DCT...
# Very cool bug but makes sense
项目:tools    作者:kastnerkyle    | 项目源码 | 文件源码
def run_world_dct_example():
    # on chromebook
    # enc 114.229
    # synth 5.165
    fs, d = fetch_sample_speech_tapestry()
    d = d.astype("float32") / 2 ** 15

    def enc():
        temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
        temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
                temporal_positions_h, f0_h, vuv_h)
        temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
                temporal_positions_h, f0_h, vuv_h)

        return spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c

    start = time.time()
    spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c = enc()
    dct_buf = fftpack.dct(spectrogram_ct)
    n_fft = 512
    n_dct = 20
    dct_buf = dct_buf[:, :n_dct]
    idct_buf = np.zeros((dct_buf.shape[0], n_fft + 1))
    idct_buf[:, :n_dct] = dct_buf
    ispectrogram_ct = fftpack.idct(idct_buf)
    enc_done = time.time()

    y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, spectrogram_ct, fs)
    synth_done = time.time()

    print("enc time: {}".format(enc_done - start))
    print("synth time: {}".format(synth_done - enc_done))
    #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, sp_r, fs)
    wavfile.write("out_dct.wav", fs, soundsc(y))


#run_world_mgc_example()
#run_world_base_example()
项目:SONGSHTR    作者:songeater    | 项目源码 | 文件源码
def get_config():
    config = {}
    config['sound_file'] = "harvestmoon-mono-hp500.wav"
    config['save_file'] = config['sound_file'] + "_modelsave_"
    config['blocksize']=13000
    config['compressed_blocksize'] = (config['blocksize']//2+1)
    config['seqlen'] = 80 # in number of blocks...
    config['win_edge'] = int(config['blocksize'] / 2)   
    config['out_step'] = 1 # in number of blocks...
    config['batchsize'] = 5 # in number of blocks...
    config['domain'] = "rfft" # either "rfft" or "dct"
    if config['domain'] == "dct": config['win_edge'] = int(config['blocksize'] / 2) # if dct, have to set this
    return config
项目:SONGSHTR    作者:songeater    | 项目源码 | 文件源码
def conv_to_dct(signal, blocksize, edge, out_blocksize):
    blocks1 = []
    blocks2 = []
    for i in range(0, signal.shape[0]-blocksize-edge, blocksize-edge):
        dct_block = dct(signal[i:i+blocksize], norm='ortho')
        blocks1.append(dct_block)
    if blocksize > out_blocksize:
        for opw in range(len(blocks1)): blocks2.append(blocks1[opw][0:out_blocksize])
    return blocks2
项目:PIE_ISAE_Essais_Vol    作者:YuanxiangFranck    | 项目源码 | 文件源码
def get_dct(x, n_dct):
    """
    Renvoie les n_dct plus grands coefficients de la transformée en 
    cosinus discrète 
    """
    # Compute FFT coefficients (complex)
    coeffs = dct(x.transpose())
    # Sort according to absolute value
    coeffs_sorted = coeffs.ravel()[np.argsort(-np.abs(coeffs)).ravel()] \
    .reshape(coeffs.shape)
    # n_fft largest coefficients
    n_coeffs = coeffs_sorted[:,:n_dct]
    # Return the coefficients
    return n_coeffs
项目:sbrt2017    作者:igormq    | 项目源码 | 文件源码
def _call(self, signal):
        """Compute MFCC features from an audio signal.

        Args:
            signal: the audio signal from which to compute features. Should be
            an N*1 array

        Returns:
            A numpy array of size (NUMFRAMES by numcep) containing features.
            Each row holds 1 feature vector.
        """
        feat, energy = super(MFCC, self)._call(signal)

        feat = np.log(feat)
        feat = dct(feat, type=2, axis=1, norm='ortho')[:, :self.num_cep]
        feat = self._lifter(feat, self.cep_lifter)

        if self.append_energy:
            # replace first cepstral coefficient with log of frame energy
            feat[:, 0] = np.log(energy + self.eps)

        if self.d:
            d = delta(feat, 2)
            feat = np.hstack([feat, d])

            if self.dd:
                feat = np.hstack([feat, delta(d, 2)])

        return feat
项目:DTW_physionet2016    作者:JJGO    | 项目源码 | 文件源码
def mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13,
         nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True,
         winfunc=lambda x: numpy.ones((x, ))):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return,  default 13
    :param nfilt: the number of filters in the filterbank,  default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz,  default is 0.
    :param highfreq: highest band edge of mel filters. In Hz,  default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true,  the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """
    feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc)
    feat = numpy.log(feat)
    feat = dct(feat,  type=2,  axis=1,  norm='ortho')[:, :numcep]
    feat = lifter(feat, ceplifter)
    if appendEnergy: feat[:, 0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
项目:DTW_physionet2016    作者:JJGO    | 项目源码 | 文件源码
def mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13,
         nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True,
         winfunc=lambda x: numpy.ones((x, ))):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return,  default 13
    :param nfilt: the number of filters in the filterbank,  default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz,  default is 0.
    :param highfreq: highest band edge of mel filters. In Hz,  default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true,  the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """
    feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc)
    feat = numpy.log(feat)
    feat = dct(feat,  type=2,  axis=1,  norm='ortho')[:, :numcep]
    feat = lifter(feat, ceplifter)
    if appendEnergy: feat[:, 0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
项目:website-4mb    作者:dettoman    | 项目源码 | 文件源码
def dct_gen(frame):
    # read Frame
    [r, c, d] = frame.shape
    framebw = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    ##########  DCT  ##########
    # Reshape in 8x8 type array
    framedct = np.reshape(framebw / 255.0, (-1, 8), order='C')
    # Apply DCT line and column wise
    X = sft.dct(framedct, axis=1, norm='ortho')

    X = np.reshape(X, (-1, c), order='C')
    X = np.reshape(X.T, (-1, 8), order='C')
    X = sft.dct(X, axis=1, norm='ortho')
    # shape back to original shape
    X = (np.reshape(X, (-1, r), order='C')).T

    # Zeroing DC-Coefficients
    X[::8,::8] = 0

    ##########  IDCT  ##########
    # Reshape in 8x8 type array
    X = np.reshape(X, (-1, 8), order='C')
    # Apply IDCT line and column wise
    X = sft.idct(X, axis=1, norm='ortho')
    X = np.reshape(X, (-1, c), order='C')
    X = np.reshape(X.T, (-1, 8), order='C')
    x = sft.idct(X, axis=1, norm='ortho')
    # shape back to original shape
    x = (np.reshape(x, (-1, r), order='C')).T
    # cast into output image
    x = x*255

    frameout = np.zeros_like(frame)
    frameout[:, :, 0] = x
    frameout[:, :, 1] = x
    frameout[:, :, 2] = x
    return frameout
项目:Automatic_Speech_Recognition    作者:zzw922cn    | 项目源码 | 文件源码
def calcMFCC(signal,samplerate=16000,win_length=0.025,win_step=0.01,feature_len=13,filters_num=26,NFFT=512,low_freq=0,high_freq=None,pre_emphasis_coeff=0.97,cep_lifter=22,appendEnergy=True,mode='mfcc'):
    """Caculate Features.
    Args:
        signal: 1-D numpy array.
        samplerate: Sampling rate. Defaulted to 16KHz.
        win_length: Window length. Defaulted to 0.025, which is 25ms/frame.
        win_step: Interval between the start points of adjacent frames.
            Defaulted to 0.01, which is 10ms.
        feature_len: Numbers of features. Defaulted to 13.
        filters_num: Numbers of filters. Defaulted to 26.
        NFFT: Size of FFT. Defaulted to 512.
        low_freq: Lowest frequency.
        high_freq: Highest frequency.
        pre_emphasis_coeff: Coefficient for pre-emphasis. Pre-emphasis increase
            the energy of signal at higher frequency. Defaulted to 0.97.
        cep_lifter: Numbers of lifter for cepstral. Defaulted to 22.
        appendEnergy: Wheter to append energy. Defaulted to True.
        mode: 'mfcc' or 'fbank'.
            'mfcc': Mel-Frequency Cepstral Coefficients(MFCC).
                    Complete process: Mel filtering -> log -> DCT.
            'fbank': Apply Mel filtering -> log.

    Returns:
        2-D numpy array with shape (NUMFRAMES, features). Each frame containing feature_len of features.
    """
    filters_num = 2*feature_len
    feat,energy=fbank(signal,samplerate,win_length,win_step,filters_num,NFFT,low_freq,high_freq,pre_emphasis_coeff)
    feat=numpy.log(feat)
    # Performing DCT and get first 13 coefficients
    if mode == 'mfcc':
        feat=dct(feat,type=2,axis=1,norm='ortho')[:,:feature_len]
        feat=lifter(feat,cep_lifter)
    elif mode == 'fbank':
        feat = feat[:,:feature_len]
    if appendEnergy:
        # Replace the first coefficient with logE and get 2-13 coefficients.
        feat[:,0]=numpy.log(energy)
    return feat
项目:ASP    作者:TUIlmenauAMS    | 项目源码 | 文件源码
def DCT4(samples):
   #use a DCT3 to implement a DCT4:
   samplesup=np.zeros(2*N)
   #upsample signal:
   samplesup[1::2]=samples
   y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
   return y[0:N]
项目:ASP    作者:TUIlmenauAMS    | 项目源码 | 文件源码
def DCT4(samples):
   #use a DCT3 to implement a DCT4:
   samplesup=np.zeros(2*N)
   #upsample signal:
   samplesup[1::2]=samples
   y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
   return y[0:N]
项目:ASP    作者:TUIlmenauAMS    | 项目源码 | 文件源码
def DCT4(samples):
   #use a DCT3 to implement a DCT4:
   samplesup=np.zeros(2*N)
   #upsample signal:
   samplesup[1::2]=samples
   y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
   return y[0:N]
项目:ASP    作者:TUIlmenauAMS    | 项目源码 | 文件源码
def DCT4(samples):
   #use a DCT3 to implement a DCT4:
   samplesup=np.zeros(2*N)
   #upsample signal:
   samplesup[1::2]=samples
   y=spfft.dct(samplesup,type=3)/2
   return y[0:N]
项目:ASRT_SpeechRecognition    作者:nl8590687    | 项目源码 | 文件源码
def mfcc(signal, samplerate, conf):
    '''
    Compute MFCC features from an audio signal.
    ??????????MFCC????
    Args:
    ???
        signal: the audio signal from which to compute features. Should be an
            N*1 array
            ?????????????????????N*1???
        samplerate: the samplerate of the signal we are working with.
            ??????????
        conf: feature configuration
            ?????

    Returns:
    ????
        A numpy array of size (NUMFRAMES by numcep) containing features. Each
        row holds 1 feature vector, a numpy vector containing the signal
        log-energy
        ???????????numpy?????????????????numpy???????????
    '''

    feat, energy = fbank(signal, samplerate, conf)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:, :int(conf['numcep'])]
    feat = lifter(feat, float(conf['ceplifter']))
    return feat, numpy.log(energy)
项目:UWdepressionX    作者:jyfeather    | 项目源码 | 文件源码
def a_dct(l, m):
    tmp = dct(l, type=2, norm = 'ortho')
    tmp_idx = sorted(range(len(tmp)), key=lambda k: -abs(tmp[k]))
    tmp[tmp_idx[m:]] = 0
    return tmp
项目:UWdepressionX    作者:jyfeather    | 项目源码 | 文件源码
def a_dct(l, m):
    tmp = dct(l, type=2)
    tmp_idx = sorted(range(len(tmp)), key=lambda k: -abs(tmp[k]))
    return tmp[tmp_idx[:m]].tolist()
项目:Speaker-Recognition    作者:orchidas    | 项目源码 | 文件源码
def mfcc(s,fs, nfiltbank):

    #divide into segments of 25 ms with overlap of 10ms
    nSamples = np.int32(0.025*fs)
    overlap = np.int32(0.01*fs)
    nFrames = np.int32(np.ceil(len(s)/(nSamples-overlap)))
    #zero padding to make signal length long enough to have nFrames
    padding = ((nSamples-overlap)*nFrames) - len(s)
    if padding > 0:
        signal = np.append(s, np.zeros(padding))
    else:
        signal = s
    segment = np.empty((nSamples, nFrames))
    start = 0
    for i in range(nFrames):
        segment[:,i] = signal[start:start+nSamples]
        start = (nSamples-overlap)*i

    #compute periodogram
    nfft = 512
    periodogram = np.empty((nFrames,nfft/2 + 1))
    for i in range(nFrames):
        x = segment[:,i] * hamming(nSamples)
        spectrum = fftshift(fft(x,nfft))
        periodogram[i,:] = abs(spectrum[nfft/2-1:])/nSamples

    #calculating mfccs    
    fbank = mel_filterbank(nfft, nfiltbank, fs)
    #nfiltbank MFCCs for each frame
    mel_coeff = np.empty((nfiltbank,nFrames))
    for i in range(nfiltbank):
        for k in range(nFrames):
            mel_coeff[i,k] = np.sum(periodogram[k,:]*fbank[:,i])

    mel_coeff = np.log10(mel_coeff)
    mel_coeff = dct(mel_coeff)
    #exclude 0th order coefficient (much larger than others)
    mel_coeff[0,:]= np.zeros(nFrames)
    return mel_coeff
项目:csgm    作者:AshishBora    | 项目源码 | 文件源码
def dct2(image_channel):
    return fftpack.dct(fftpack.dct(image_channel.T, norm='ortho').T, norm='ortho')
项目:CNN_denoise    作者:weedwind    | 项目源码 | 文件源码
def gen_post(feat_list, stat_file, model, win_size_before = 15, win_size_after = 15, num_targets = 31):
   model.eval()             # Put the model in test mode (the opposite of model.train(), essentially)

   m, v = read_mv(stat_file)
   if m is None or v is None:
      raise Exception("mean or variance vector does not exist")

   with open(feat_list) as f:
      for line in f:
         line = line.strip()
         if len(line) < 1: continue
         print ("generating features for file", line)
         io = htk_io.fopen(line)
         utt_feat = io.getall()
         utt_feat -= m       # normalize mean
         utt_feat /= (np.sqrt(v) + eps)     # normalize var
         feat_numpy = org_data(utt_feat, win_size_before, win_size_after)
         out_feat = np.zeros((utt_feat.shape[0], num_targets))
         for i in range(feat_numpy.shape[0] // 100):     # chop the speech into shorter segments, to prevent gpu out of memory
             start_idx = i * 100
             end_idx = i * 100 + 100
             feat_chunk = feat_numpy[start_idx:end_idx]
             feat_tensor = torch.from_numpy(feat_chunk).type(gpu_dtype)
             x = Variable(feat_tensor.type(gpu_dtype), volatile = True)
             scores = model(x)
             out_feat[start_idx:end_idx] = scores.data.cpu().numpy()
         num_remain = feat_numpy.shape[0] % 100
         if num_remain > 0:
            feat_chunk = feat_numpy[-num_remain:]
            feat_tensor = torch.from_numpy(feat_chunk).type(gpu_dtype)
            x = Variable(feat_tensor.type(gpu_dtype), volatile = True)
            scores = model(x)
            out_feat[-num_remain:] = scores.data.cpu().numpy()

         out_feat = dct(out_feat, type=2, axis=1, norm='ortho')[:,1:numcep+1]
         out_feat_delta = delta(out_feat, 2)
         out_feat_ddelta = delta(out_feat_delta, 2)
         out_feat = np.concatenate((out_feat, out_feat_delta, out_feat_ddelta), axis = 1)   

         out_file = line.replace(".fea", ".mfc")
         io = htk_io.fopen(out_file, mode="wb", veclen = out_feat.shape[1])
         io.writeall(out_feat)
         print ("features saved in %s\n" %out_file)
项目:Speaker_recognition    作者:Mgajurel    | 项目源码 | 文件源码
def extract(self, signal, filename):
        if signal.ndim > 1:
            self.dprint("INFO: Input signal has more than 1 channel; the channels will be averaged.")
            signal = mean(signal, axis=1)
        assert len(signal) > 5 * self.FRAME_LEN, "Signal too short!"

        #Pre Emphasis
        #signal = signal[0] + signal[1]-a*signal[0] + signal[2]-a*signal[1] + ...
        signal = np.append(signal[0], signal[1:] - self.PRE_EMP * signal[:-1])

        #framming the signal
        signal_length = len(signal)
        if signal_length <= self.FRAME_LEN:
            num_frames = 1
        else:
            num_frames = 1 + int(math.ceil((1.0*signal_length-self.FRAME_LEN)/self.FRAME_STEP))

        pad_signal_length = int((num_frames-1)*self.FRAME_STEP + self.FRAME_LEN)
        z = np.zeros((pad_signal_length - signal_length,))
        pad_signal = np.concatenate((signal, z))
        indices = np.tile(np.arange(0, self.FRAME_LEN), (num_frames, 1)) + np.tile(np.arange(0, num_frames * self.FRAME_STEP, self.FRAME_STEP), (self.FRAME_LEN, 1)).T
        indices = np.array(indices,dtype=np.int32)
        frames = pad_signal[indices]

        #windowing the signal
        #passing the signal through hamming window
        win = np.hamming(self.FRAME_LEN)
        frames *= win

        #Magnitude spectrum
        if np.shape(frames)[1] > self.NFFT:
            self.dprint("Warning, frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid."%(np.shape(frames)[1], self.NFFT))

        mag_frames = np.absolute(np.fft.rfft(frames, self.NFFT))

        #Power Spectrum
        pspec = ((1.0 / self.NFFT) * ((mag_frames) ** 2))


        #Filter Bank
        pspec = np.where(pspec == 0,np.finfo(float).eps,pspec) # if things are all zeros we get problems

        energy = np.sum(pspec,1) #this stores the total energy in each frame
        energy = np.where(energy == 0,np.finfo(float).eps,energy) # if energy is zero, we get problems with log

        fbank = self.get_filterbanks()
        filter_banks = np.dot(pspec, fbank)
        filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)  # Numerical Stability

        # MFCC Calculation
        filter_banks = np.log(filter_banks)
        mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, : self.num_ceps] # Keep 2-13


        nframes, ncoeff = np.shape(mfcc)
        n = np.arange(ncoeff)
        lift = 1 + (self.cep_lifter / 2) * np.sin(np.pi * n / self.cep_lifter)
        mfcc *= lift
        if self.appendEnergy:
            mfcc[:,0] = np.log(energy) # replace first cepstral coefficient with log of frame energy
        np.savetxt(filename, mfcc, fmt='%.8f', delimiter=',')
项目:ip-avsr    作者:lzuwei    | 项目源码 | 文件源码
def compute_dct_features(X, image_shape, no_coeff=30, method='zigzag'):
    """
    compute 2D-dct features of a given image.
    Type 2 DCT and finds the DCT coefficents with the largest mean normalized variance
    :param X: 1 dimensional input image in 'c' format
    :param image_shape: image shape
    :param no_coeff: number of coefficients to extract
    :param method: method to extract coefficents, zigzag, variance
    :return: dct features
    """
    X_dct = fft.dct(X, norm='ortho')

    if method == 'zigzag':
        out = np.zeros((len(X_dct), no_coeff), dtype=X_dct.dtype)
        for i in xrange(len(X_dct)):
            image = X_dct[i].reshape(image_shape)
            out[i] = zigzag(image)[1:no_coeff + 1]
        return out
    elif method == 'rel_variance':
        X_dct = X_dct[:, 1:]
        # mean coefficient per frequency
        mean_dct = np.mean(X_dct, 0)
        # mean normalize
        mean_norm_dct = X_dct - mean_dct
        # find standard deviation for each frequency component
        std_dct = np.std(mean_norm_dct, 0)
        # sort by largest variance
        idxs = np.argsort(std_dct)[::-1][:no_coeff]
        # return DCT coefficients with the largest variance
        return X_dct[:, idxs]
    elif method == 'variance':
        X_dct = X_dct[:, 1:]
        # find standard deviation for each frequency component
        std_dct = np.std(X_dct, 0)
        # sort by largest variance
        idxs = np.argsort(std_dct)[::-1][:no_coeff]
        # return DCT coefficients with the largest variance
        return X_dct[:, idxs]
    elif method == 'energy':
        X_dct = X_dct[:, 1:]
        X_sum = np.abs(X_dct)
        X_sum = np.sum(X_sum, 0)
        idxs = np.argsort(X_sum)[::-1][:no_coeff]
        return X_dct[:, idxs]
    else:
        raise NotImplementedError("method not implemented, use only 'zigzag', 'variance', 'rel_variance")