Python numpy 模块,hanning() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.hanning()。
def stft(sig, frameSize, overlapFac=0.75, window=np.hanning):
""" short time fourier transform of audio signal """
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
# samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig)
samples = np.array(sig, dtype='float64')
# cols for windowing
cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
# samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(
samples,
shape=(cols, frameSize),
strides=(samples.strides[0] * hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
# all the definition of the flowing variable can be found
# train_net.py
def hanning(M):
"""Returns the Hanning window.
The Hanning window is defined as
.. math::
w(n) = 0.5 - 0.5\\cos\\left(\\frac{2\\pi{n}}{M-1}\\right)
\\qquad 0 \\leq n \\leq M-1
Args:
M (:class:`~int`):
Number of points in the output window. If zero or less, an empty
array is returned.
Returns:
~cupy.ndarray: Output ndarray.
.. seealso:: :func:`numpy.hanning`
"""
if M < 1:
return from_data.array([])
if M == 1:
return basic.ones(1, float)
n = ranges.arange(0, M)
return 0.5 - 0.5 * trigonometric.cos(2.0 * numpy.pi * n / (M - 1))
def ams_extractor(x, sr, win_len, shift_len, order):
from scipy.signal import hilbert
envelope = np.abs(hilbert(x))
for i in range(order-1):
envelope = np.abs(hilbert(envelope))
envelope = envelope * 1./3.
frames = (len(envelope) - win_len) // shift_len
hanning_window = np.hanning(win_len)
ams_feature = np.zeros(shape=(15, frames))
wts = cal_triangle_window(0, sr//2, win_len, 15, 15.6, 400)
for i in range(frames):
one_frame = x[i*shift_len:i*shift_len+win_len]
one_frame = one_frame * hanning_window
frame_fft = np.abs(np.fft.fft(one_frame, win_len))
ams_feature[:,i] = np.matmul(wts, frame_fft)
return ams_feature
def cochleagram_extractor(xx, sr, win_len, shift_len, channel_number, win_type):
fcoefs, f = make_erb_filters(sr, channel_number, 50)
fcoefs = np.flipud(fcoefs)
xf = erb_frilter_bank(xx, fcoefs)
if win_type == 'hanning':
window = np.hanning(channel_number)
elif win_type == 'hamming':
window = np.hamming(channel_number)
elif win_type == 'triangle':
window = (1 - (np.abs(channel_number - 1 - 2 * np.arange(1, channel_number + 1, 1)) / (channel_number + 1)))
else:
window = np.ones(channel_number)
window = window.reshape((channel_number, 1))
xe = np.power(xf, 2.0)
frames = 1 + ((np.size(xe, 1)-win_len) // shift_len)
cochleagram = np.zeros((channel_number, frames))
for i in range(frames):
one_frame = np.multiply(xe[:, i*shift_len:i*shift_len+win_len], np.repeat(window, win_len, 1))
cochleagram[:, i] = np.sqrt(np.mean(one_frame, 1))
cochleagram = np.where(cochleagram == 0.0, np.finfo(float).eps, cochleagram)
return cochleagram
def log_power_spectrum_extractor(x, win_len, shift_len, win_type, is_log=False):
samples = x.shape[0]
frames = (samples - win_len) // shift_len
stft = np.zeros((win_len, frames), dtype=np.complex64)
spect = np.zeros((win_len // 2 + 1, frames), dtype=np.float64)
if win_type == 'hanning':
window = np.hanning(win_len)
elif win_type == 'hamming':
window = np.hamming(win_len)
elif win_type == 'rectangle':
window = np.ones(win_len)
for i in range(frames):
one_frame = x[i*shift_len: i*shift_len+win_len]
windowed_frame = np.multiply(one_frame, window)
stft[:, i] = np.fft.fft(windowed_frame, win_len)
if is_log:
spect[:, i] = np.log(np.power(np.abs(stft[0: win_len//2+1, i]), 2.))
else:
spect[:, i] = np.power(np.abs(stft[0: win_len//2+1, i]), 2.)
return spect
def stft_extractor(x, win_len, shift_len, win_type):
samples = x.shape[0]
frames = (samples - win_len) // shift_len
stft = np.zeros((win_len, frames), dtype=np.complex64)
spect = np.zeros((win_len // 2 + 1, frames), dtype=np.complex64)
if win_type == 'hanning':
window = np.hanning(win_len)
elif win_type == 'hamming':
window = np.hamming(win_len)
elif win_type == 'rectangle':
window = np.ones(win_len)
for i in range(frames):
one_frame = x[i*shift_len: i*shift_len+win_len]
windowed_frame = np.multiply(one_frame, window)
stft[:, i] = np.fft.fft(windowed_frame, win_len)
spect[:, i] = stft[: win_len//2+1, i]
return spect
def ams_extractor(x, sr, win_len, shift_len, order=1, decimate_coef=1./4.):
from scipy.signal import hilbert
envelope = np.abs(hilbert(x))
for i in range(order-1):
envelope = np.abs(hilbert(envelope))
envelope = envelope * decimate_coef
frames = (len(envelope) - win_len) // shift_len
hanning_window = np.hanning(win_len)
ams_feature = np.zeros(shape=(15, frames))
wts = cal_triangle_window(0, sr//2, win_len, 15, 15.6, 400)
for i in range(frames):
one_frame = x[i*shift_len:i*shift_len+win_len]
one_frame = one_frame * hanning_window
frame_fft = np.abs(np.fft.fft(one_frame, win_len))
ams_feature[:,i] = np.matmul(wts, frame_fft)
return ams_feature
def unknown_feature_extractor(x, sr, win_len, shift_len, barks, inner_win, inner_shift, win_type, method_version):
x_spectrum = stft_extractor(x, win_len, shift_len, win_type)
coef = get_fft_bark_mat(sr, win_len, barks, 20, sr//2)
bark_spect = np.matmul(coef, x_spectrum)
ams = np.zeros((barks, inner_win//2+1, (bark_spect.shape[1] - inner_win)//inner_shift))
for i in range(barks):
channel_stft = stft_extractor(bark_spect[i, :], inner_win, inner_shift, 'hanning')
if method_version == 'v1':
ams[i, :, :] = 20 * np.log(np.abs(channel_stft[:inner_win//2+1, :(bark_spect.shape[1] - inner_win)//inner_shift]))
elif method_version == 'v2':
channel_amplitude = np.abs(channel_stft[:inner_win//2+1, :(bark_spect.shape[1] - inner_win)//inner_shift])
channel_angle = np.angle(channel_stft[:inner_win//2+1, :(bark_spect.shape[1] - inner_win)//inner_shift])
channel_angle = channel_angle - (np.floor(channel_angle / (2.*np.pi)) * (2.*np.pi))
ams[i, :, :] = np.power(channel_amplitude, 1./3.) * channel_angle
else:
ams[i, :, :] = np.abs(channel_stft)
return ams
def spectrum_extractor(x, win_len, shift_len, win_type, is_log):
samples = x.shape[0]
frames = (samples - win_len) // shift_len
stft = np.zeros((win_len, frames), dtype=np.complex64)
spectrum = np.zeros((win_len // 2 + 1, frames), dtype=np.float64)
if win_type == 'hanning':
window = np.hanning(win_len)
elif win_type == 'hamming':
window = np.hamming(win_len)
elif win_type == 'triangle':
window = (1 - (np.abs(win_len - 1 - 2 * np.arange(1, win_len + 1, 1)) / (win_len + 1)))
else:
window = np.ones(win_len)
for i in range(frames):
one_frame = x[i*shift_len: i*shift_len+win_len]
windowed_frame = np.multiply(one_frame, window)
stft[:, i] = np.fft.fft(windowed_frame, win_len)
if is_log:
spectrum[:, i] = np.log(np.abs(stft[0: win_len//2+1, i]))
else:
spectrum[:, i] = np.abs(stft[0: win_len // 2 + 1:, i])
return spectrum
def stft(sig, frameSize, overlapFac=0.75, window=np.hanning):
""" short time fourier transform of audio signal """
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
# samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig)
samples = np.array(sig, dtype='float64')
# cols for windowing
cols = np.floor((len(samples) - frameSize) / float(hopSize))
# zeros at end (thus samples can be fully covered by frames)
# samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(
samples,
shape=(cols, frameSize),
strides=(samples.strides[0] * hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
def stft(sig, frameSize, overlapFac=0.75, window=np.hanning):
""" short time fourier transform of audio signal """
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
# samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig)
samples = np.array(sig, dtype='float64')
# cols for windowing
cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
# samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(
samples,
shape=(cols, frameSize),
strides=(samples.strides[0] * hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
def stft(sig, frameSize, overlapFac=0.75, window=np.hanning):
""" short time fourier transform of audio signal """
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
# samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig)
samples = np.array(sig, dtype='float64')
# cols for windowing
cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(
samples,
shape=(cols, frameSize),
strides=(samples.strides[0] * hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
def __init__(self, sampling_rate=16000, frame_width=0.032, frame_shift=0.01, num_mel_filters=40, window_func="hanning",
using_delta=True, using_delta_delta=True):
assert window_func in ["hanning", "hamming"]
self.sampling_rate = sampling_rate
self.frame_width = frame_width
self.sampling_rate = sampling_rate
self.frame_width = frame_width
self.frame_shift = frame_shift
self.num_fft = int(sampling_rate * frame_width)
self.num_mel_filters = num_mel_filters
if window_func == "hanning":
self.window_func = lambda x:np.hanning(x)
elif winfunc == "hamming":
self.window_func = lambda x:np.hamming(x)
self.using_delta = using_delta
self.using_delta_delta = using_delta_delta
self.fbank = fft.get_filterbanks(nfft=self.num_fft, nfilt=num_mel_filters, samplerate=sampling_rate)
def periodic_hann(window_length):
"""Calculate a "periodic" Hann window.
The classic Hann window is defined as a raised cosine that starts and
ends on zero, and where every value appears twice, except the middle
point for an odd-length window. Matlab calls this a "symmetric" window
and np.hanning() returns it. However, for Fourier analysis, this
actually represents just over one cycle of a period N-1 cosine, and
thus is not compactly expressed on a length-N Fourier basis. Instead,
it's better to use a raised cosine that ends just before the final
zero value - i.e. a complete cycle of a period-N cosine. Matlab
calls this a "periodic" window. This routine calculates it.
Args:
window_length: The number of points in the returned window.
Returns:
A 1D np.array containing the periodic hann window.
"""
return 0.5 - (0.5 * np.cos(2 * np.pi / window_length *
np.arange(window_length)))
def stft_magnitude(signal, fft_length,
hop_length=None,
window_length=None):
"""Calculate the short-time Fourier transform magnitude.
Args:
signal: 1D np.array of the input time-domain signal.
fft_length: Size of the FFT to apply.
hop_length: Advance (in samples) between each frame passed to FFT.
window_length: Length of each block of samples to pass to FFT.
Returns:
2D np.array where each row contains the magnitudes of the fft_length/2+1
unique values of the FFT for the corresponding frame of input samples.
"""
frames = frame(signal, window_length, hop_length)
# Apply frame window to each frame. We use a periodic Hann (cosine of period
# window_length) instead of the symmetric Hann of np.hanning (period
# window_length-1).
window = periodic_hann(window_length)
windowed_frames = frames * window
return np.abs(np.fft.rfft(windowed_frames, int(fft_length)))
# Mel spectrum constants and functions.
def compute_pairwise_shifts(imstack):
# Calculates the pairwise shifts for images in a stack of format [frame, x, y].
# returns shift vector as [y, x] for each pair, a 2 x N-1 array where N is num_frames
scan_shape = imstack.shape
num_pairs = scan_shape[0]-1
print('Correcting ' + str(num_pairs) + ' frames...')
# Prepare window function (Hann)
win = np.outer(np.hanning(scan_shape[1]),np.hanning(scan_shape[2]))
# Pairwise shifts
shift = np.zeros((2, num_pairs))
for iPair in range(0, num_pairs):
image = imstack[iPair]
offset_image = imstack[iPair+1]
shift[:,iPair], error, diffphase = register_translation_hybrid(image*win, offset_image*win,
exponent = 0.3, upsample_factor = 100)
# Shifts are defined as [y, x] where y is shift of imaging location
# with respect to positive y axis, similarly for x
return shift
def __init__(self, window_size, segments_buf=None):
self._window_size = window_size
if segments_buf is None:
segments_buf = int(SAMPLE_RATE / window_size)
self._segments_buf = segments_buf
self._thresholding_window_size = THRESHOLD_WINDOW_SIZE
assert self._thresholding_window_size <= segments_buf
self._last_spectrum = np.zeros(window_size, dtype=np.int16)
self._last_flux = deque(
np.zeros(segments_buf, dtype=np.int16), segments_buf)
self._last_prunned_flux = 0
self._hanning_window = np.hanning(window_size)
# The zeros which will be used to double each segment size
self._inner_pad = np.zeros(window_size)
# To ignore the first peak just after starting the application
self._first_peak = True
def managed_window(self,axismanager, data, windowaxis):
def window_axis_function(a, window):
a = a * window
return a
newaxis = axismanager.current_axis_number(windowaxis)
N = data.shape[newaxis]
window = np.hanning(N)
# Save "window summed and squared" (see Numerical Recipes)
wss = np.sum(window**2.0)/float(N)
# Apply window
windoweddata = np.apply_along_axis(window_axis_function,
newaxis, data, window)
return windoweddata, wss
def measureLoop(self):
""" Measure 10 values, add them to buffer and remove the 10 oldest values.
"""
if self.stopRequest:
self.stopRequest = False
self.unlock()
return
data = np.zeros((100, self._data_logic.getChannels()))
data[:, 0] = np.array([self._data_logic.getData() for i in range(100)])
self.buf = np.roll(self.buf, -100, axis=0)
self.buf[-101:-1] = data
w = np.hanning(self.window_len)
s = np.r_[self.buf[self.window_len-1:0:-1], self.buf, self.buf[-1:-self.window_len:-1]]
for channel in range(self._data_logic.getChannels()):
convolved = np.convolve(w/w.sum(), s[:, channel], mode='valid')
self.smooth[:, channel] = convolved
self.sigRepeat.emit()
def undo_stft(spect, hop_size, frame_len=None, unwindow='auto'):
"""
Undoes an SFTF via overlap-add, returning a numpy array of samples.
"""
# transform into time domain
spect = np.fft.irfft(spect, n=frame_len, axis=1)
# overlap-and-add
num_frames, frame_len = spect.shape
win = np.hanning(frame_len)
#win = np.sin(np.pi * np.arange(frame_len) / frame_len)
#win = 1
if unwindow == 'auto':
unwindow = (hop_size <= frame_len//2)
samples = np.zeros((num_frames - 1) * hop_size + frame_len)
if unwindow:
factors = np.zeros_like(samples)
for idx, frame in enumerate(spect):
oidx = int(idx*hop_size)
samples[oidx:oidx+frame_len] += frame * win
if unwindow:
factors[oidx:oidx+frame_len] += win**2
if unwindow:
np.maximum(factors, .1 * factors.max(), factors)
samples /= factors
return samples
def stft(sig, frameSize, overlapFac=0.75, window=np.hanning):
""" short time fourier transform of audio signal """
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
# samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig)
samples = np.array(sig, dtype='float64')
# cols for windowing
cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(
samples,
shape=(cols, frameSize),
strides=(samples.strides[0] * hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
def stft(sig, frameSize, overlapFac=0.75, window=np.hanning):
""" short time fourier transform of audio signal """
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
# samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig)
samples = np.array(sig, dtype='float64')
# cols for windowing
cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(
samples,
shape=(cols, frameSize),
strides=(samples.strides[0] * hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
def stft(sig, frameSize, overlapFac=0.75, window=np.hanning):
""" short time fourier transform of audio signal """
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
# samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig)
samples = np.array(sig, dtype='float64')
# cols for windowing
cols = np.ceil((len(samples) - frameSize) / float(hopSize))
# zeros at end (thus samples can be fully covered by frames)
# samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(
samples,
shape=(cols, frameSize),
strides=(samples.strides[0] * hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
def test_high_frequency_completion(self):
path = dirpath + '/data/test16000.wav'
fs, x = wavfile.read(path)
f0rate = 0.5
shifter = Shifter(fs, f0rate=f0rate)
mod_x = shifter.f0transform(x, completion=False)
mod_xc = shifter.f0transform(x, completion=True)
assert len(mod_x) == len(mod_xc)
N = 512
fl = int(fs * 25 / 1000)
win = np.hanning(fl)
sts = [1000, 5000, 10000, 20000]
for st in sts:
# confirm w/o completion
f_mod_x = fft(mod_x[st: st + fl] / 2**16 * win)
amp_mod_x = 20.0 * np.log10(np.abs(f_mod_x))
# confirm w/ completion
f_mod_xc = fft(mod_xc[st: st + fl] / 2**16 * win)
amp_mod_xc = 20.0 * np.log10(np.abs(f_mod_xc))
assert np.mean(amp_mod_x[N // 4:] < np.mean(amp_mod_xc[N // 4:]))
def analysis_with_del_comp_from_est_file(v_in_sig, est_file, fs, nFFT=None, win_func=np.hanning, b_ph_unv_zero=False, nwin_per_pitch_period=0.5):
if nFFT is None: # If fft length is not provided, some standard values are assumed.
if fs==48000:
nFFT=4096
elif fs==16000:
nFFT=2048
# Pitch Marks:-------------------------------------------------------------
v_pm_sec, v_voi = la.read_reaper_est_file(est_file, check_len_smpls=len(v_in_sig), fs=fs)
v_pm_smpls = v_pm_sec * fs
m_sp, m_ph, v_shift, m_frms, m_fft = analysis_with_del_comp_from_pm(v_in_sig, v_pm_smpls, nFFT, win_func=win_func, nwin_per_pitch_period=nwin_per_pitch_period)
if b_ph_unv_zero:
m_ph = m_ph * v_voi[:,None]
return m_sp, m_ph, v_shift, v_voi, m_frms, m_fft
#==============================================================================
# From (after) 'analysis_with_del_comp':
# new: returns voi/unv decision.
def stft(self, samples, window_size, overlap_factor=0.5, window_function=np.hanning):
"""
Perform Short-time Fourier transform to get the spectrogram for the given samples
:param samples: Complex samples
:param window_size: Size of DFT window
:param overlap_factor: Value between 0 (= No Overlapping) and 1 (= Full overlapping) of windows
:param window_function: Function for DFT window
:return: short-time Fourier transform of the given signal
"""
window = window_function(window_size)
# hop size determines by how many samples the window is advanced
hop_size = window_size - int(overlap_factor * window_size)
# pad with zeros to ensure last window fits signal
padded_samples = np.append(samples, np.zeros((len(samples) - window_size) % hop_size))
num_frames = ((len(padded_samples) - window_size) // hop_size) + 1
frames = [padded_samples[i*hop_size:i*hop_size+window_size] * window for i in range(num_frames)]
return np.fft.fft(frames)
def stft(sig, frame_size, overlap_fac=0.5, window=np.hanning):
""" short time fourier transform of audio signal """
win = window(frame_size)
hop_size = int(frame_size - np.floor(overlap_fac * frame_size))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(np.floor(frame_size / 2.0)), sig)
# cols for windowing
cols = np.ceil((len(samples) - frame_size) / float(hop_size)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frame_size))
frames = stride_tricks.as_strided(
samples,
shape=(cols, frame_size),
strides=(
samples.strides[0] * hop_size,
samples.strides[0]
)
).copy()
frames *= win
return np.fft.rfft(frames)
def __init__(self, configs, chunks, chunksize, channels, rate):
self.configs = configs
self.channels = channels
self.chunksize = chunksize
self.channel_len = chunks*chunksize
self.fft_len = self.channel_len//2 + 1 # See numpy.fft.rfft
self.fft_freqs_in_hertz = np.fft.rfftfreq(self.channel_len, d=1.0/rate)
endpoint_notes = configs.settings.getmultistr('fft', 'endpoint_notes')
self.notespace = process.generic.notespace(
endpoint_notes[0], endpoint_notes[1],
step=1.0/6) # XXX
self.window = np.hanning(self.channel_len)
self.sensitivity = configs.settings.getfloat('fft', 'sensitivity')
self.compute_weights(self.sensitivity)
logger.debug("FFT length: {}".format(self.fft_len))
# Create a pyfftw.FFTW object
a = pyfftw.empty_aligned(
self.channel_len, dtype='int16', n=pyfftw.simd_alignment)
self.fft = pyfftw.builders.rfft(
a, overwrite_input=True, threads=multiprocessing.cpu_count())
def hann(args):
"""
window = hann(args)
Computes a Hann window, with NumPy's function hanning(args).
"""
return np.hanning(args)
# FUNCTIONS FOR TIME-FREQUENCY REPRESENTATION
def iFFT(Y, output_length=None, window=False):
""" Inverse real-valued Fourier Transform
Parameters
----------
Y : array_like
Frequency domain data [Nsignals x Nbins]
output_length : int, optional
Lenght of returned time-domain signal (Default: 2 x len(Y) + 1)
win : boolean, optional
Weights the resulting time-domain signal with a Hann
Returns
-------
y : array_like
Reconstructed time-domain signal
"""
Y = _np.atleast_2d(Y)
y = _np.fft.irfft(Y, n=output_length)
if window:
if window not in {'hann', 'hamming', 'blackman', 'kaiser'}:
raise ValueError('Selected window must be one of hann, hamming, blackman or kaiser')
no_of_signals, no_of_samples = y.shape
if window == 'hann':
window_array = _np.hanning(no_of_samples)
elif window == 'hamming':
window_array = _np.hamming(no_of_samples)
elif window == 'blackman':
window_array = _np.blackman(no_of_samples)
elif window == 'kaiser':
window_array = _np.kaiser(no_of_samples, 3)
y = window_array * y
return y
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)
# cols for windowing
cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)
# cols for windowing
cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
def make_filterbank(self):
erb_max = hz2erb(self.sr/2.0)
erb_freqs = np.arange(0, self.n_bins) * erb_max / float(self.n_bins - 1)
self.hz_freqs = erb2hz(erb_freqs)
self.widths = np.round(0.5 * (self.n_bins - 1) / erb_max *
9.26 * 0.00437 * self.sr * np.exp(-erb_freqs / 9.26) - 0.5)
self.filters = []
for b in range(self.n_bins):
w = self.widths[b]
f = self.hz_freqs[b]
exponential = np.exp(
np.complex(0,1) * 2 * np.pi * f / self.sr *
np.arange(-w, w + 1))
self.filters.append(np.hanning(2 * w + 1) * exponential)
def __init__(self, window = None, fft_size = 1024, hop_size = 512):
if window is None:
self.window = np.hanning(fft_size)
else:
self.window = window
self.fft_size = fft_size
self.hop_size = hop_size
self.window_size = len(self.window)
self.half_window = int(np.floor(len(self.window) / 2.0))
def __init__(self, window = None, fft_size = 1024, hop_size = 512, sample_rate = 44100):
if window is None:
self.window = np.hanning(fft_size)
else:
self.window = window
self.fft_size = fft_size
self.hop_size = hop_size
self.sample_rate = sample_rate
self.window_size = len(self.window)
self.half_window = int(np.floor(len(self.window) / 2.0))
def __init__(self, img, start_pos, HOG_flag=0, dataformat=1, resize=1):
self.HOG_flag = HOG_flag
self.padding = 2
self.dataformat = dataformat
self.resize = resize
self.img_size = img.shape[0],img.shape[1]
if self.dataformat:
w,h = start_pos[2]-start_pos[0],start_pos[3]-start_pos[1]
self.pos = start_pos[0],start_pos[1],w,h
else:
self.pos = start_pos
if self.resize:
self.pos = tuple([ele/2 for ele in self.pos])
self.img_size = img.shape[0]/2, img.shape[1]/2
img = cv2.resize(img,self.img_size[::-1])
object_size = self.pos[2:]
if self.HOG_flag:
self.target_size = 32,32
self.l = 0.0001
self.sigma = 0.6
self.f = 0.012
else:
self.target_size = object_size[0]*self.padding,object_size[1]*self.padding
self.l = 0.0001
self.sigma = 0.2
self.f = 0.02
output_sigma_factor = 1/float(8)
output_sigma = np.sqrt(np.prod(self.target_size)) * output_sigma_factor
self.cos_window = np.outer(np.hanning(self.target_size[0]), np.hanning(self.target_size[1]))
self.y = tracker.generate_gaussian(self.target_size, output_sigma)
x = tracker.get_window(img, self.pos, self.padding)
x = tracker.getFeature(x, self.cos_window,self.HOG_flag)
self.alpha = tracker.train(x, self.y, self.sigma, self.l)
self.z = x
def ideal_binary_mask(noisy_speech, clean_speech, snr, sr):
win_len = sr * 0.02
shift_len = sr * 0.01
noisy_spect = stft_extractor(noisy_speech, win_len, shift_len, 'hanning')
clean_spect = stft_extractor(clean_speech, win_len, shift_len, 'hanning')
spect_snr = np.power(np.abs(clean_spect), 2.0) / np.power(np.abs(noisy_spect - clean_spect), 2.0)
ibm = np.where(spect_snr > 10**(0.1*snr), 1, 0)
return ibm
def ideal_ratio_mask(noisy_speech, clean_speech, sr):
win_len = sr * 0.02
shift_len = sr * 0.01
noisy_spect = stft_extractor(noisy_speech, win_len, shift_len, 'hanning')
clean_spect = stft_extractor(clean_speech, win_len, shift_len, 'hanning')
irm = np.abs(clean_spect) / np.abs(noisy_spect)
return irm
def synthesis_speech(noisy_speech, ideal_mask, win_type, win_len, shift_len, syn_method='A&R'):
samples = noisy_speech.shape[0]
frames = (samples - win_len) // shift_len
if win_type == 'hanning':
window = np.hanning(win_len)
elif win_type == 'hamming':
window = np.hamming(win_len)
elif win_type == 'rectangle':
window = np.ones(win_len)
to_ifft = np.zeros(win_len, dtype=np.complex64)
clean_speech = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32)
window_sum = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32)
for i in range(frames):
one_frame = noisy_speech[i * shift_len: i * shift_len + win_len]
windowed_frame = np.multiply(one_frame, window)
stft = np.fft.fft(windowed_frame, win_len)
masked_abs = np.abs(stft[:win_len//2+1]) * ideal_mask[:, i]
to_ifft[:win_len//2+1] = masked_abs * np.exp(1j * np.angle(stft[:win_len//2+1]))
to_ifft[win_len//2+1:] = np.conj(to_ifft[win_len//2-1:0:-1])
speech_seg = np.real(np.fft.ifft(to_ifft, win_len))
if syn_method == 'A&R' or syn_method == 'ALLEN & RABINER':
clean_speech[i*shift_len:i*shift_len+win_len] += speech_seg
window_sum[i*shift_len:i*shift_len+win_len] += window
elif syn_method == 'G&L' or syn_method == 'GRIFFIN & LIM':
speech_seg = np.multiply(speech_seg, window)
clean_speech[i * shift_len:i * shift_len + win_len] += speech_seg
window_sum[i * shift_len:i * shift_len + win_len] += np.power(window, 2.)
# if i > 0:
# clean_speech[i*shift_len: (i-1)*shift_len+win_len] *= 0.5
window_sum = np.where(window_sum < 1e-2, 1e-2, window_sum)
return clean_speech / window_sum
def cochleagram_extractor(xx, sr, win_len, shift_len, channel_number, win_type):
fcoefs, f = make_erb_filters(sr, channel_number, 50)
fcoefs = np.flipud(fcoefs)
xf = erb_frilter_bank(xx, fcoefs)
if win_type == 'hanning':
window = np.hanning(channel_number)
elif win_type == 'hamming':
window = np.hamming(channel_number)
elif win_type == 'triangle':
window = (1 - (np.abs(channel_number - 1 - 2 * np.arange(1, channel_number + 1, 1)) / (channel_number + 1)))
else:
window = np.ones(channel_number)
window = window.reshape((channel_number, 1))
xe = np.power(xf, 2.0)
frames = 1 + ((np.size(xe, 1)-win_len) // shift_len)
cochleagram = np.zeros((channel_number, frames))
for i in range(frames):
one_frame = np.multiply(xe[:, i*shift_len:i*shift_len+win_len], np.repeat(window, win_len, 1))
cochleagram[:, i] = np.sqrt(np.mean(one_frame, 1))
# c1 = np.where(c1 == 0.0, np.finfo(float).eps, c1)
cochleagram = np.where(cochleagram == 0.0, np.finfo(float).eps, cochleagram)
cochleagram = np.power(cochleagram, 1./3)
return cochleagram
def ideal_binary_mask(noisy_speech, clean_speech, snr):
noisy_spect = stft_extractor(noisy_speech, 320, 160, 'hanning')
clean_spect = stft_extractor(clean_speech, 320, 160, 'hanning')
spect_snr = np.power(np.abs(clean_spect), 2.0) / np.power(np.abs(noisy_spect - clean_spect), 2.0)
ibm = np.where(spect_snr > 10**(0.1*snr), 1, 0)
return ibm
def synthesis_speech(ns, mk, win_type, win_len, shift_len, syn_method='A&R'):
samples = ns.shape[0]
frames = (samples - win_len) // shift_len
if win_type == 'hanning':
window = np.hanning(win_len)
elif win_type == 'hamming':
window = np.hamming(win_len)
elif win_type == 'rectangle':
window = np.ones(win_len)
to_ifft = np.zeros(win_len, dtype=np.complex64)
clean_speech = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32)
window_sum = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32)
for i in range(frames):
one_frame = ns[i * shift_len: i * shift_len + win_len]
windowed_frame = np.multiply(one_frame, window)
stft = np.fft.fft(windowed_frame, win_len)
masked_abs = np.abs(stft[:win_len//2+1]) * mk[:, i]
to_ifft[:win_len//2+1] = masked_abs * np.exp(1j * np.angle(stft[:win_len//2+1]))
to_ifft[win_len//2+1:] = np.conj(to_ifft[win_len//2-1:0:-1])
speech_seg = np.real(np.fft.ifft(to_ifft, 320))
if syn_method == 'A&R' or syn_method == 'ALLEN & RABINER':
clean_speech[i*shift_len:i*shift_len+win_len] += speech_seg
window_sum[i*shift_len:i*shift_len+win_len] += window
elif syn_method == 'G&L' or syn_method == 'GRIFFIN & LIM':
speech_seg = np.multiply(speech_seg, window)
clean_speech[i * shift_len:i * shift_len + win_len] += speech_seg
window_sum[i * shift_len:i * shift_len + win_len] += np.power(window, 2.)
# if i > 0:
# clean_speech[i*shift_len: (i-1)*shift_len+win_len] *= 0.5
window_sum = np.where(window_sum < 1e-2, 1e-2, window_sum)
return clean_speech / window_sum
def getCutlerDavisFrequencyScore(I, doPlot = False):
"""
Compute the frequency score suggested by Cutler and Davis, with a slight
modification using Kurtosis instead of mean versus standard deviation
:param I: An Nxd matrix representing a video with N frames at a resolution of
d pixels
:doPlot: If true, show the SSM and average power spectrum across all columns
"""
N = I.shape[0]
(D, _) = getSSM(I, N)
F = np.zeros(N)
#For linearly detrending
A = np.ones((N, 2))
A[:, 1] = np.arange(N)
#Compute the power spectrum column by column
for i in range(N):
x = D[:, i]
#Linearly detrend
mb = np.linalg.lstsq(A, x)[0]
y = x - A.dot(mb)
#Apply Hann Window
y = y*np.hanning(N)
#Add on power spectrum
F += np.abs(np.fft.fft(y))**2
#Compute kurtosis of normalized averaged power spectrum
F = F/np.sum(F)
F[0:2] = 0 #Ignore DC component
F[-1] = 0
kurt = scipy.stats.kurtosis(F, fisher = False)
M = np.mean(F)
S = np.std(F)
if doPlot:
plt.subplot(121)
plt.imshow(D, cmap='afmhot', interpolation = 'none')
plt.subplot(122)
plt.plot(F)
plt.hold(True)
plt.plot([0, N], [M, M], 'b')
plt.plot([0, N], [M+2*S, M+2*S])
plt.title("Kurtosis = %.3g"%kurt)
return (np.max(F) - M)/S
def hanningWindow(nPix, percentage):
"""
Return a Hanning window in 2D
Args:
size (int): size of the final image
percentage (TYPE): percentage of the image that is apodized
Returns:
real: 2D apodization mask
"""
M = np.ceil(nPix*percentage/100.0)
win = np.hanning(M)
winOut = np.ones(nPix)
winOut[0:M/2] = win[0:M/2]
winOut[-M/2:] = win[-M/2:]
return np.outer(winOut, winOut)
# @jit
# def conv(spec, psf, nPixBorder):
# nx, ny, nlambda = spec.shape
# nxPSF, nyPSF, nPSF = psf.shape
# out = np.zeros_like(spec)
# for i in range(nx-2*nPixBorder):
# for j in range(ny-2*nPixBorder):
# for k in range(nxPSF):
# for l in range(nyPSF):
# out[i,j,0] += spec[i+k-nxPSF/2+nPixBorder,j+l-nyPSF/2+nPixBorder,0] * psf[k,l,i]
# return out
def fft(data, fs):
n = data.shape[-1]
window = np.hanning(n)
windowed = data * window
spectrum = np.fft.fft(windowed)
freq = np.fft.fftfreq(n, 1 / fs)
half_n = np.ceil(n / 2)
spectrum_half = (2 / n) * spectrum[..., :half_n]
freq_half = freq[:half_n]
return freq_half, np.abs(spectrum_half)
def smooth1d(x, window_len):
s = np.r_[2*x[0] - x[window_len:1:-1], x, 2*x[-1] - x[-1:-window_len:-1]]
w = np.hanning(window_len)
y = np.convolve(w/w.sum(), s, mode='same')
return y[window_len-1:-window_len+1]
def _init_stretching(self):
# Resp. index of current audio chunk and computed phase
self._i1, self._i2 = 0, 0
self._N, self._H = self.chunk_size, int(self.chunk_size / 4)
self._win = numpy.hanning(self._N)
self._phi = numpy.zeros(self._N, dtype=self.y.dtype)
self._sy = numpy.zeros(len(self.y), dtype=self.y.dtype)
if not hasattr(self, '_sf'):
self.stretch_factor = 1.0
self._zero_padding()
def get_spectral_features(audio, fs, lf_limit=20):
"""
This function calculates the spectral centroid and spectral spread of an audio array.
:param audio: Audio array
:param fs: Sample rate of audio file
:param lf_limit: Low frequency limit, in Hz, to be analysed. Defaults to 20Hz.
:return: Returns the spectral centroid and spectral spread
"""
# use a hanning window
window = np.hanning(len(audio))
next_pow_2 = int(pow(2, np.ceil(np.log2(len(window)))))
# get frequency domain representation
spectrum = np.fft.fft((window * audio), next_pow_2)
spectrum = np.absolute(spectrum[0:int(len(spectrum) / 2) + 1])
freq = np.arange(0, len(spectrum), 1) * (fs / (2.0 * (len(spectrum) - 1)))
# find lowest frequency index, zeros used to unpack result
lf_limit_idx = np.where(freq >= lf_limit)[0][0]
spectrum = spectrum[lf_limit_idx:]
freq = freq[lf_limit_idx:]
# calculate centroid and spread
centroid = sum(spectrum * freq) / float(sum(spectrum))
spread = np.sqrt(sum(((freq - centroid) ** 2) * spectrum) / sum(spectrum))
return centroid, spread
def spectrogram(samples, sample_rate, frame_len, fps, batch=50):
"""
Computes a magnitude spectrogram for a given vector of samples at a given
sample rate (in Hz), frame length (in samples) and frame rate (in Hz).
Allows to transform multiple frames at once for improved performance (with
a default value of 50, more is not always better). Returns a numpy array.
"""
if len(samples) < frame_len:
return np.empty((0, frame_len // 2 + 1), dtype=samples.dtype)
win = np.hanning(frame_len)
hopsize = sample_rate // fps
num_frames = max(0, (len(samples) - frame_len) // hopsize + 1)
batch = min(batch, num_frames)
if batch <= 1 or not samples.flags.c_contiguous:
rfft = rfft_builder(samples[:frame_len], n=frame_len)
spect = np.vstack(np.abs(rfft(samples[pos:pos + frame_len] * win))
for pos in range(0, len(samples) - frame_len + 1,
int(hopsize)))
else:
rfft = rfft_builder(np.empty((batch, frame_len), samples.dtype),
n=frame_len, threads=1)
frames = np.lib.stride_tricks.as_strided(
samples, shape=(num_frames, frame_len),
strides=(samples.strides[0] * hopsize, samples.strides[0]))
spect = [np.abs(rfft(frames[pos:pos + batch] * win))
for pos in range(0, num_frames - batch + 1, batch)]
if num_frames % batch:
spect.extend(spectrogram(
samples[(num_frames // batch * batch) * hopsize:],
sample_rate, frame_len, fps, batch=1))
spect = np.vstack(spect)
return spect
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(frameSize/2), sig)
# cols for windowing
cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
cols = int(cols)
frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)