Python numpy 模块,fromfile() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.fromfile()。
def read_gnt_in_directory(gnt_dirpath):
def samples(f):
header_size = 10
# read samples from f until no bytes remaining
while True:
header = np.fromfile(f, dtype='uint8', count=header_size)
if not header.size: break
sample_size = header[0] + (header[1]<<8) + (header[2]<<16) + (header[3]<<24)
tagcode = header[5] + (header[4]<<8)
width = header[6] + (header[7]<<8)
height = header[8] + (header[9]<<8)
assert header_size + width*height == sample_size
bitmap = np.fromfile(f, dtype='uint8', count=width*height).reshape((height, width))
yield bitmap, tagcode
for file_name in os.listdir(gnt_dirpath):
if file_name.endswith('.gnt'):
file_path = os.path.join(gnt_dirpath, file_name)
with open(file_path, 'rb') as f:
for bitmap, tagcode in samples(f):
yield bitmap, tagcode
def read_flow(path, filename):
flowdata = None
with open(path + filename + '.flo') as f:
# Valid .flo file checker
magic = np.fromfile(f, np.float32, count=1)
if 202021.25 != magic:
print 'Magic number incorrect. Invalid .flo file'
else:
# Reshape data into 3D array (columns, rows, bands)
w = int(np.fromfile(f, np.int32, count=1))
h = int(np.fromfile(f, np.int32, count=1))
#print 'Reading {}.flo with shape: ({}, {}, 2)'.format(filename, h, w)
flowdata = np.fromfile(f, np.float32, count=2*w*h)
# NOTE: numpy shape(h, w, ch) is opposite to image shape(w, h, ch)
flowdata = np.reshape(flowdata, (h, w, 2))
return flowdata
def _read(self, stream, text, byte_order):
'''
Read the actual data from a PLY file.
'''
if text:
self._read_txt(stream)
else:
if self._have_list:
# There are list properties, so a simple load is
# impossible.
self._read_bin(stream, byte_order)
else:
# There are no list properties, so loading the data is
# much more straightforward.
self._data = _np.fromfile(stream,
self.dtype(byte_order),
self.count)
if len(self._data) < self.count:
k = len(self._data)
del self._data
raise PlyParseError("early end-of-file", self, k)
self._check_sanity()
def _read(self, stream, text, byte_order):
'''
Read the actual data from a PLY file.
'''
if self._have_list:
# There are list properties, so a simple load is
# impossible.
if text:
self._read_txt(stream)
else:
self._read_bin(stream, byte_order)
else:
# There are no list properties, so loading the data is
# much more straightforward.
if text:
self.data = _np.loadtxt(
_islice(iter(stream.readline, ''), self.count),
self.dtype())
else:
self.data = _np.fromfile(
stream, self.dtype(byte_order), self.count)
def load_raw(filename, volsize):
""" inspired by mhd_utils from github"""
dim = 3
element_channels = 1
np_type = np.ubyte
arr = list(volsize)
volume = np.prod(arr[0:dim - 1])
shape = (arr[dim - 1], volume, element_channels)
with open(filename,'rb') as fid:
data = np.fromfile(fid, count=np.prod(shape),dtype = np_type)
data.shape = shape
arr.reverse()
data = data.reshape(arr)
return data
def __read_annotations_old(self):
"""
Read the stimulus grid properties.
Returns a dictionary containing the parameter names as keys and the
parameter values as values.
------------------------------------------------
The returned objects must be added to the Block.
This reads an old version of the format that does not store paramater
names, so placeholder names are created instead.
ID: 29099
"""
# int16 * 14 -- an array of parameter values
values = np.fromfile(self._fsrc, dtype=np.int16, count=14)
# create dummy names and combine them with the values in a dict
# the dict will be added to the annotations
params = ['param%s' % i for i in range(len(values))]
annotations = dict(zip(params, values))
return annotations
def __read_spike_fixed(self, numpts=40):
"""
Read a spike with a fixed waveform length (40 time bins)
-------------------------------------------
Returns the time, waveform and trig2 value.
The returned objects must be converted to a SpikeTrain then
added to the Block.
ID: 29079
"""
# float32 -- spike time stamp in ms since start of SpikeTrain
time = np.fromfile(self._fsrc, dtype=np.float32, count=1)
# int8 * 40 -- spike shape -- use numpts for spike_var
waveform = np.fromfile(self._fsrc, dtype=np.int8,
count=numpts).reshape(1, 1, numpts)
# uint8 -- point of return to noise
trig2 = np.fromfile(self._fsrc, dtype=np.uint8, count=1)
return time, waveform, trig2
def __read_id(self):
'''
Read the next ID number and do the appropriate task with it.
Returns nothing.
'''
try:
# float32 -- ID of the first data sequence
objid = np.fromfile(self._fsrc, dtype=np.float32, count=1)[0]
except IndexError:
# if we have a previous segment, save it
self.__save_segment()
# if there are no more Segments, return
return False
if objid == -2:
self.__read_condition()
elif objid == -1:
self.__read_segment()
else:
self.__spiketimes.append(objid)
return True
def __read_condition(self):
'''
Read the parameter values for a single stimulus condition.
Returns nothing.
'''
# float32 -- SpikeTrain length in ms
self.__t_stop = np.fromfile(self._fsrc, dtype=np.float32, count=1)[0]
# float32 -- number of stimulus parameters
numelements = int(np.fromfile(self._fsrc, dtype=np.float32,
count=1)[0])
# [float32] * numelements -- stimulus parameter values
paramvals = np.fromfile(self._fsrc, dtype=np.float32,
count=numelements).tolist()
# organize the parameers into a dictionary with arbitrary names
paramnames = ['Param%s' % i for i in range(len(paramvals))]
self.__params = dict(zip(paramnames, paramvals))
def __extract_nsx_file_spec(self, nsx_nb):
"""
Extract file specification from an .nsx file.
"""
filename = '.'.join([self._filenames['nsx'], 'ns%i' % nsx_nb])
# Header structure of files specification 2.2 and higher. For files 2.1
# and lower, the entries ver_major and ver_minor are not supported.
dt0 = [
('file_id', 'S8'),
('ver_major', 'uint8'),
('ver_minor', 'uint8')]
nsx_file_id = np.fromfile(filename, count=1, dtype=dt0)[0]
if nsx_file_id['file_id'].decode() == 'NEURALSG':
spec = '2.1'
elif nsx_file_id['file_id'].decode() == 'NEURALCD':
spec = '{0}.{1}'.format(
nsx_file_id['ver_major'], nsx_file_id['ver_minor'])
else:
raise IOError('Unsupported NSX file type.')
return spec
def __extract_nev_file_spec(self):
"""
Extract file specification from an .nsx file
"""
filename = '.'.join([self._filenames['nsx'], 'nev'])
# Header structure of files specification 2.2 and higher. For files 2.1
# and lower, the entries ver_major and ver_minor are not supported.
dt0 = [
('file_id', 'S8'),
('ver_major', 'uint8'),
('ver_minor', 'uint8')]
nev_file_id = np.fromfile(filename, count=1, dtype=dt0)[0]
if nev_file_id['file_id'].decode() == 'NEURALEV':
spec = '{0}.{1}'.format(
nev_file_id['ver_major'], nev_file_id['ver_minor'])
else:
raise IOError('NEV file type {0} is not supported'.format(
nev_file_id['file_id']))
return spec
def __read_annotations_old(self):
"""
Read the stimulus grid properties.
Returns a dictionary containing the parameter names as keys and the
parameter values as values.
------------------------------------------------
The returned objects must be added to the Block.
This reads an old version of the format that does not store paramater
names, so placeholder names are created instead.
ID: 29099
"""
# int16 * 14 -- an array of parameter values
values = np.fromfile(self._fsrc, dtype=np.int16, count=14)
# create dummy names and combine them with the values in a dict
# the dict will be added to the annotations
params = ['param%s' % i for i in range(len(values))]
annotations = dict(zip(params, values))
return annotations
def __read_spike_fixed(self, numpts=40):
"""
Read a spike with a fixed waveform length (40 time bins)
-------------------------------------------
Returns the time, waveform and trig2 value.
The returned objects must be converted to a SpikeTrain then
added to the Block.
ID: 29079
"""
# float32 -- spike time stamp in ms since start of SpikeTrain
time = np.fromfile(self._fsrc, dtype=np.float32, count=1)
# int8 * 40 -- spike shape -- use numpts for spike_var
waveform = np.fromfile(self._fsrc, dtype=np.int8,
count=numpts).reshape(1, 1, numpts)
# uint8 -- point of return to noise
trig2 = np.fromfile(self._fsrc, dtype=np.uint8, count=1)
return time, waveform, trig2
def __read_spike_var(self):
"""
Read a spike with a variable waveform length
-------------------------------------------
Returns the time, waveform and trig2 value.
The returned objects must be converted to a SpikeTrain then
added to the Block.
ID: 29115
"""
# uint8 -- number of points in spike shape
numpts = np.fromfile(self._fsrc, dtype=np.uint8, count=1)[0]
# spike_fixed is the same as spike_var if you don't read the numpts
# byte and set numpts = 40
return self.__read_spike_fixed(numpts)
def __read_condition(self):
'''
Read the parameter values for a single stimulus condition.
Returns nothing.
'''
# float32 -- SpikeTrain length in ms
self.__t_stop = np.fromfile(self._fsrc, dtype=np.float32, count=1)[0]
# float32 -- number of stimulus parameters
numelements = int(np.fromfile(self._fsrc, dtype=np.float32,
count=1)[0])
# [float32] * numelements -- stimulus parameter values
paramvals = np.fromfile(self._fsrc, dtype=np.float32,
count=numelements).tolist()
# organize the parameers into a dictionary with arbitrary names
paramnames = ['Param%s' % i for i in range(len(paramvals))]
self.__params = dict(zip(paramnames, paramvals))
def __extract_nsx_file_spec(self, nsx_nb):
"""
Extract file specification from an .nsx file.
"""
filename = '.'.join([self._filenames['nsx'], 'ns%i' % nsx_nb])
# Header structure of files specification 2.2 and higher. For files 2.1
# and lower, the entries ver_major and ver_minor are not supported.
dt0 = [
('file_id', 'S8'),
('ver_major', 'uint8'),
('ver_minor', 'uint8')]
nsx_file_id = np.fromfile(filename, count=1, dtype=dt0)[0]
if nsx_file_id['file_id'].decode() == 'NEURALSG':
spec = '2.1'
elif nsx_file_id['file_id'].decode() == 'NEURALCD':
spec = '{0}.{1}'.format(
nsx_file_id['ver_major'], nsx_file_id['ver_minor'])
else:
raise IOError('Unsupported NSX file type.')
return spec
def __extract_nev_file_spec(self):
"""
Extract file specification from an .nsx file
"""
filename = '.'.join([self._filenames['nsx'], 'nev'])
# Header structure of files specification 2.2 and higher. For files 2.1
# and lower, the entries ver_major and ver_minor are not supported.
dt0 = [
('file_id', 'S8'),
('ver_major', 'uint8'),
('ver_minor', 'uint8')]
nev_file_id = np.fromfile(filename, count=1, dtype=dt0)[0]
if nev_file_id['file_id'].decode() == 'NEURALEV':
spec = '{0}.{1}'.format(
nev_file_id['ver_major'], nev_file_id['ver_minor'])
else:
raise IOError('NEV file type {0} is not supported'.format(
nev_file_id['file_id']))
return spec
def readheader(self):
self.fh.seek(0,0)
spam = self.fh.read(12)
self.nSamples, self.sampPeriod, self.sampSize, self.parmKind = \
unpack(">IIHH", spam)
# Get coefficients for compressed data
if self.parmKind & _C:
self.dtype = 'h'
self.veclen = self.sampSize / 2
if self.parmKind & 0x3f == IREFC:
self.A = 32767
self.B = 0
else:
self.A = numpy.fromfile(self.fh, 'f', self.veclen)
self.B = numpy.fromfile(self.fh, 'f', self.veclen)
if self.swap:
self.A = self.A.byteswap()
self.B = self.B.byteswap()
else:
self.dtype = 'f'
self.veclen = self.sampSize / 4
self.hdrlen = self.fh.tell()
def test_silence_frame_removal_given_hts_labels():
qs_file_name = join(DATA_DIR, "questions-radio_dnn_416.hed")
binary_dict, continuous_dict = hts.load_question_set(qs_file_name)
input_state_label = join(DATA_DIR, "label_state_align", "arctic_a0001.lab")
labels = hts.load(input_state_label)
features = fe.linguistic_features(labels,
binary_dict,
continuous_dict,
add_frame_features=True,
subphone_features="full"
)
# Remove silence frames
indices = labels.silence_frame_indices()
features = np.delete(features, indices, axis=0)
y = np.fromfile(join(DATA_DIR, "nn_no_silence_lab_425", "arctic_a0001.lab"),
dtype=np.float32).reshape(-1, features.shape[-1])
assert features.shape == y.shape
assert np.allclose(features, y)
# Make sure we can get same results with Merlin
def test_linguistic_features_for_acoustic_model():
qs_file_name = join(DATA_DIR, "questions-radio_dnn_416.hed")
binary_dict, continuous_dict = hts.load_question_set(qs_file_name)
# Linguistic features
# To train acoustic model paired with linguistic features,
# we need frame-level linguistic feature representation.
input_state_label = join(DATA_DIR, "label_state_align", "arctic_a0001.lab")
labels = hts.load(input_state_label)
assert labels.is_state_alignment_label()
x = fe.linguistic_features(labels,
binary_dict,
continuous_dict,
add_frame_features=True,
subphone_features="full"
)
y = np.fromfile(join(DATA_DIR, "binary_label_425",
"arctic_a0001.lab"), dtype=np.float32).reshape(-1, x.shape[-1])
assert np.allclose(x, y)
def read_array(self, dtype, count=-1, sep=""):
"""Return numpy array from file.
Work around numpy issue #2230, "numpy.fromfile does not accept
StringIO object" https://github.com/numpy/numpy/issues/2230.
"""
try:
return numpy.fromfile(self._fh, dtype, count, sep)
except IOError:
if count < 0:
size = self._size
else:
size = count * numpy.dtype(dtype).itemsize
data = self._fh.read(size)
return numpy.fromstring(data, dtype, count, sep)
def test_file_position_after_fromfile(self):
# gh-4118
sizes = [io.DEFAULT_BUFFER_SIZE//8,
io.DEFAULT_BUFFER_SIZE,
io.DEFAULT_BUFFER_SIZE*8]
for size in sizes:
f = open(self.filename, 'wb')
f.seek(size-1)
f.write(b'\0')
f.close()
for mode in ['rb', 'r+b']:
err_msg = "%d %s" % (size, mode)
f = open(self.filename, mode)
f.read(2)
np.fromfile(f, dtype=np.float64, count=1)
pos = f.tell()
f.close()
assert_equal(pos, 10, err_msg=err_msg)
def test_big_binary(self):
"""Test workarounds for 32-bit limited fwrite, fseek, and ftell
calls in windows. These normally would hang doing something like this.
See http://projects.scipy.org/numpy/ticket/1660"""
if sys.platform != 'win32':
return
try:
# before workarounds, only up to 2**32-1 worked
fourgbplus = 2**32 + 2**16
testbytes = np.arange(8, dtype=np.int8)
n = len(testbytes)
flike = tempfile.NamedTemporaryFile()
f = flike.file
np.tile(testbytes, fourgbplus // testbytes.nbytes).tofile(f)
flike.seek(0)
a = np.fromfile(f, dtype=np.int8)
flike.close()
assert_(len(a) == fourgbplus)
# check only start and end for speed:
assert_((a[:n] == testbytes).all())
assert_((a[-n:] == testbytes).all())
except (MemoryError, ValueError):
pass
def load_mnist(data_dir):
fd = open(os.path.join(data_dir,'train-images-idx3-ubyte'))
loaded = np.fromfile(file=fd,dtype=np.uint8)
trX = loaded[16:].reshape((60000,np.prod(input_shape))).astype(float) / 255
fd = open(os.path.join(data_dir,'train-labels-idx1-ubyte'))
loaded = np.fromfile(file=fd,dtype=np.uint8)
trY = loaded[8:].reshape((60000))
fd = open(os.path.join(data_dir,'t10k-images-idx3-ubyte'))
loaded = np.fromfile(file=fd,dtype=np.uint8)
teX = loaded[16:].reshape((10000,np.prod(input_shape))).astype(float) / 255
fd = open(os.path.join(data_dir,'t10k-labels-idx1-ubyte'))
loaded = np.fromfile(file=fd,dtype=np.uint8)
teY = loaded[8:].reshape((10000))
trY = np.asarray(trY)
teY = np.asarray(teY)
return trX, teX, trY, teY
def load_pfm(filepath, reverse = 1):
file = open(filepath, 'rb')
color = None
width = None
height = None
scale = None
endian = None
header = file.readline().rstrip()
color = (header == 'PF')
width, height = map(int, file.readline().strip().split(' '))
scale = float(file.readline().rstrip())
endian = '<' if(scale < 0) else '>'
scale = abs(scale)
rawdata = np.fromfile(file, endian + 'f')
shape = (height, width, 3) if color else (height, width)
file.close()
if(color):
return rawdata.reshape(shape).astype(np.float32)[:,:,::-1]
else:
return rawdata.reshape(shape).astype(np.float32)
def mnist():
fd = open(os.path.join(data_dir, 'train-images-idx3-ubyte'))
loaded = np.fromfile(file=fd, dtype=np.uint8)
trX = loaded[16:].reshape((60000, 28 * 28)).astype(float)
fd = open(os.path.join(data_dir, 'train-labels-idx1-ubyte'))
loaded = np.fromfile(file=fd, dtype=np.uint8)
trY = loaded[8:].reshape((60000))
fd = open(os.path.join(data_dir, 't10k-images-idx3-ubyte'))
loaded = np.fromfile(file=fd, dtype=np.uint8)
teX = loaded[16:].reshape((10000, 28 * 28)).astype(float)
fd = open(os.path.join(data_dir, 't10k-labels-idx1-ubyte'))
loaded = np.fromfile(file=fd, dtype=np.uint8)
teY = loaded[8:].reshape((10000))
trY = np.asarray(trY)
teY = np.asarray(teY)
return trX, teX, trY, teY
def _holes_of_line(self, line=0):
try:
amount_of_holes, hole_id = self.hole_registry[line]
for i in range(amount_of_holes):
self.hole_coord_amount.seek(2 * hole_id)
nr_of_values = unpack(b'<H', self.hole_coord_amount.read(2))[0]
self.hole_adr2data.seek(4 * hole_id)
self.hole_data.seek(unpack(b'<I', self.hole_adr2data.read(4))[0])
yield array([fromfile(self.hole_data, dtype='<i4', count=nr_of_values),
fromfile(self.hole_data, dtype='<i4', count=nr_of_values)])
hole_id += 1
except KeyError:
return
def readFile(self, filename):
'''
readFile(filename)
Reads a GDSII file and populate the library object
Parameters
----------
filename : string
Name of GDSII file
'''
if not isinstance(filename,str):
raise TypeError('GDSII_Library.readFile() : The filename must be a string')
if filename[-4:].lower() == '.gds':
filename = filename[:-4]
f = open(filename + '.gds','rb')
record = np.fromfile(f, dtype=np.uint8)
f.close()
self.readRecord(record)
def create_bfcr(filename):
"""Creates a BFCR instance for a given file.
This helper function loads a label file and its corrosponding mgc file and
creates a bfcr file from them. The paths of both files are determined
automatically.
:params filename: filename from which the BFCR instaces are created
:returns: an instance of the BFCR class
"""
filename = os.path.splitext(os.path.basename(filename))[0]
label_file = LABEL_DIR + filename + '.lab'
mgc_file = MGC_DIR + filename + '.mgc'
mgc_matrix = np.fromfile(mgc_file, dtype=np.float32).reshape(-1, MGCORD+1)
bfcr = BFCR(label_file)
bfcr.encode_feature(mgc_matrix, 'mgc', NUM_BASES)
return bfcr
def _load_original_matrix(self):
"""Loades the original mgc matrix for the test file.
This helper method loads the original matrix from the *.mgc file and
matrix for the test file and also computes the staring times of the
different phones.
:returns: the original mgc matrix
:returns: starting times of the phones
"""
mgc_matrix = np.fromfile(MGC_DIR + self._filename + '.mgc', dtype=np.float32).reshape(-1, MGCORD+1)
label = Label(LABEL_DIR + self._filename + '.lab')
step_size = mgc_matrix.shape[0]/label.last_phone_end
phone_starts = [int(round(p[1]*step_size)) for p in label.cur_phones_additions()]
return mgc_matrix, phone_starts
def parse(self):
fname = self.fname.text()
if os.path.isfile(fname):
f = open(fname, "r")
else:
sys.stderr.write("Unable to open %s\n"%fname)
return
self.vol = np.fromfile(f, dtype='f8')
self.size = int(np.ceil(np.power(len(self.vol), 1./3.)))
self.vol = self.vol.reshape(self.size, self.size, self.size)
self.center = self.size/2
if not self.image_exists:
self.layer_slider.setRange(0, self.size-1)
self.layernum.setMaximum(self.size-1)
self.layer_slider.setValue(self.center)
self.layerslider_moved(self.center)
self.old_fname = fname
def _parse_headers(self):
self.num_data_list = []
self.ones_accum_list = []
self.multi_accum_list = []
self.num_pix = []
for i, photons_file in enumerate(self.photons_list):
with open(photons_file, 'rb') as f:
num_data = np.fromfile(f, dtype='i4', count=1)[0]
self.num_pix.append(np.fromfile(f, dtype='i4', count=1)[0])
if self.num_pix[i] != len(self.geom_list[i].x):
sys.stderr.write('Warning: num_pix for %s is different (%d vs %d)\n' % (photons_file, self.num_pix[i], len(self.geom_list[i].x)))
f.seek(1024, 0)
ones = np.fromfile(f, dtype='i4', count=num_data)
multi = np.fromfile(f, dtype='i4', count=num_data)
self.num_data_list.append(num_data)
self.ones_accum_list.append(np.cumsum(ones))
self.multi_accum_list.append(np.cumsum(multi))
self.num_data_list = np.cumsum(self.num_data_list)
self.num_frames = self.num_data_list[-1]
def read_data(self, start=None, end=None):
"""read data from file and store it locally"""
nframe = self._find_nframe_from_file()
seek_to_data(self.file_object)
read_start = 0
end_read = nframe * self.nifs * self.nchans
if start is not None:
if start < 0:
read_start = (nframe + start) * self.nifs * self.nchans
elif start >= 0:
read_start = start * self.nifs * self.nchans
if end is not None:
if end < 0:
end_read = (nframe + end) * self.nifs * self.nchans
elif end >= 0:
end_read = end * self.nifs * self.nchans
self.file_object.seek(read_start, os.SEEK_CUR)
nbytes_to_read = end_read - read_start
data = np.fromfile(self.file_object, count=nbytes_to_read, dtype=self.dtype)
nframe = data.size // self.nifs // self.nchans
data = data.reshape((nframe, self.nifs, self.nchans))
if self.nbits < 8:
data = unpack(data, self.nbits)
self.data = data
return self.data
def readheader(self):
self.fh.seek(0,0)
spam = self.fh.read(12)
self.nSamples, self.sampPeriod, self.sampSize, self.parmKind = \
unpack(">IIHH", spam)
# Get coefficients for compressed data
if self.parmKind & _C:
self.dtype = 'h'
self.veclen = self.sampSize / 2
if self.parmKind & 0x3f == IREFC:
self.A = 32767
self.B = 0
else:
self.A = numpy.fromfile(self.fh, 'f', self.veclen)
self.B = numpy.fromfile(self.fh, 'f', self.veclen)
if self.swap:
self.A = self.A.byteswap()
self.B = self.B.byteswap()
else:
self.dtype = 'f'
self.veclen = self.sampSize / 4
self.hdrlen = self.fh.tell()
self.veclen = int(self.veclen)
def read_from_gnt_dir(gnt_dir=train_data_dir):
def one_file(f):
header_size = 10
while True:
header = np.fromfile(f, dtype='uint8', count=header_size)
if not header.size: break
sample_size = header[0] + (header[1]<<8) + (header[2]<<16) + (header[3]<<24)
tagcode = header[5] + (header[4]<<8)
width = header[6] + (header[7]<<8)
height = header[8] + (header[9]<<8)
if header_size + width*height != sample_size:
break
image = np.fromfile(f, dtype='uint8', count=width*height).reshape((height, width))
yield image, tagcode
for file_name in os.listdir(gnt_dir):
if file_name.endswith('.gnt'):
file_path = os.path.join(gnt_dir, file_name)
with open(file_path, 'rb') as f:
for image, tagcode in one_file(f):
yield image, tagcode
def read_from_gnt_dir(gnt_dir=train_data_dir):
def one_file(f):
header_size = 10
while True:
header = np.fromfile(f, dtype='uint8', count=header_size)
if not header.size: break
sample_size = header[0] + (header[1]<<8) + (header[2]<<16) + (header[3]<<24)
tagcode = header[5] + (header[4]<<8)
width = header[6] + (header[7]<<8)
height = header[8] + (header[9]<<8)
if header_size + width*height != sample_size:
break
image = np.fromfile(f, dtype='uint8', count=width*height).reshape((height, width))
yield image, tagcode
for file_name in os.listdir(gnt_dir):
if file_name.endswith('.gnt'):
file_path = os.path.join(gnt_dir, file_name)
with open(file_path, 'rb') as f:
for image, tagcode in one_file(f):
yield image, tagcode
def read_from_gnt_dir(gnt_dir=train_data_dir):
def one_file(f):
header_size = 10
while True:
header = np.fromfile(f, dtype='uint8', count=header_size)
if not header.size: break
sample_size = header[0] + (header[1]<<8) + (header[2]<<16) + (header[3]<<24)
tagcode = header[5] + (header[4]<<8)
width = header[6] + (header[7]<<8)
height = header[8] + (header[9]<<8)
if header_size + width*height != sample_size:
break
image = np.fromfile(f, dtype='uint8', count=width*height).reshape((height, width))
yield image, tagcode
for file_name in os.listdir(gnt_dir):
if file_name.endswith('.gnt'):
file_path = os.path.join(gnt_dir, file_name)
with open(file_path, 'rb') as f:
for image, tagcode in one_file(f):
yield image, tagcode
# ?????
def get_embedding():
embedding_path = os.path.join(FLAGS.datasets_dir, "wordVectors.txt")
if not tf.gfile.Exists(embedding_path):
raise ValueError("embedding file not exists")
# embedding = np.fromfile(embedding_path, sep=' ')
# print("embedding size:", embedding.shape)
# print("embedding size:", embedding.dtype)
# embedding.reshape(100232, 50)
# print("embedding size:", embedding.shape)
data = np.fromfile(embedding_path, dtype=np.float32, sep=' ')
print("shape:", data.shape)
print("ndim:", data.ndim)
print("dtype:", data.dtype)
print(data)
print("reshape vocabulary")
d = data.reshape((-1, 50))
print("shape:", d.shape)
print("ndim:", d.ndim)
print("dtype:", d.dtype)
print(d)
return d
def plot_conf_mat(densmap_name):
fig = plt.figure(figsize = (20,20))
plt.clf()
ax = fig.add_subplot(111)
#ax.set_aspect(1)
densmap = np.fromfile(densmap_name, np.float32)
densmap = densmap.reshape(227, 227)
densmap *= 100
densmap[densmap > 1] = 1
res = ax.imshow(densmap, cmap = plt.cm.jet,
interpolation = 'nearest')
plt.savefig('density.jpg')
img = cv2.imread("density.jpg")
img = cv2.resize(img, (227,227))
cv2.imshow("i", img)#
cv2.waitKey(0)
#plt.show()
def load_mnist(self):
data_dir = os.path.join("./data", "mnist")
fd = open(os.path.join(data_dir, 'train-images-idx3-ubyte'))
loaded = np.fromfile(file=fd , dtype=np.uint8)
trX = loaded[16:].reshape((60000, 28 , 28 , 1)).astype(np.float)
fd = open(os.path.join(data_dir, 'train-labels-idx1-ubyte'))
loaded = np.fromfile(file=fd, dtype=np.uint8)
trY = loaded[8:].reshape((60000)).astype(np.float)
fd = open(os.path.join(data_dir, 't10k-images-idx3-ubyte'))
loaded = np.fromfile(file=fd, dtype=np.uint8)
teX = loaded[16:].reshape((10000, 28 , 28 , 1)).astype(np.float)
fd = open(os.path.join(data_dir, 't10k-labels-idx1-ubyte'))
loaded = np.fromfile(file=fd, dtype=np.uint8)
teY = loaded[8:].reshape((10000)).astype(np.float)
trY = np.asarray(trY)
teY = np.asarray(teY)
X = np.concatenate((trX, teX), axis=0)
y = np.concatenate((trY, teY), axis=0)
seed = 547
np.random.seed(seed)
np.random.shuffle(X)
np.random.seed(seed)
np.random.shuffle(y)
#convert label to one-hot
y_vec = np.zeros((len(y), 10), dtype=np.float)
for i, label in enumerate(y):
y_vec[i, int(y[i])] = 1.0
return X / 255. , y_vec
def unpack(stream):
base = stream.tell()
header = Header.unpack(stream)
influence_groups = [None]*header.influence_group_count
inverse_bind_matrices = None
stream.seek(base + header.influence_count_offset)
for i in range(header.influence_group_count):
influence_count = uint8.unpack(stream)
influence_groups[i] = [Influence(None,None) for _ in range(influence_count)]
stream.seek(base + header.index_offset)
for influence_group in influence_groups:
for influence in influence_group:
influence.index = uint16.unpack(stream)
stream.seek(base + header.weight_offset)
for influence_group in influence_groups:
for influence in influence_group:
influence.weight = float32.unpack(stream)
if header.inverse_bind_matrix_offset != 0:
stream.seek(base + header.inverse_bind_matrix_offset)
element_type = numpy.dtype((numpy.float32,(3,4))).newbyteorder('>')
element_count = (header.section_size - header.inverse_bind_matrix_offset)//element_type.itemsize
inverse_bind_matrices = numpy.fromfile(stream,element_type,element_count)
stream.seek(base + header.section_size)
return influence_groups,inverse_bind_matrices
def unpack_array(stream,attribute_format,size):
if attribute_format.attribute == gx.VA_POS:
component_type = gx.ComponentType(attribute_format.component_type)
component_count = gx.PositionComponentCount(attribute_format.component_count)
array_type = Array
elif attribute_format.attribute == gx.VA_NRM:
component_type = gx.ComponentType(attribute_format.component_type)
component_count = gx.NormalComponentCount(attribute_format.component_count)
array_type = Array
elif attribute_format.attribute in gx.VA_CLR:
component_type = gx.ColorComponentType(attribute_format.component_type)
component_count = gx.ColorComponentCount(attribute_format.component_count)
array_type = ColorArray
elif attribute_format.attribute in gx.VA_TEX:
component_type = gx.ComponentType(attribute_format.component_type)
component_count = gx.TexCoordComponentCount(attribute_format.component_count)
array_type = Array
else:
raise FormatError('invalid vertex attribute')
element_type = array_type.create_element_type(component_type,component_count)
element_count = size//element_type.itemsize
array = numpy.fromfile(stream,element_type,element_count).view(array_type)
array.attribute = attribute_format.attribute
array.component_type = component_type
array.component_count = component_count
array.scale_exponent = attribute_format.scale_exponent
return array
def _read_bin(self, stream, byte_order):
'''
Read data from a binary stream. Raise StopIteration if the
property could not be read.
'''
try:
return _np.fromfile(stream, self.dtype(byte_order), 1)[0]
except IndexError:
raise StopIteration
def _read_bin(self, stream, byte_order):
(len_t, val_t) = self.list_dtype(byte_order)
try:
n = _np.fromfile(stream, len_t, 1)[0]
except IndexError:
raise StopIteration
data = _np.fromfile(stream, val_t, n)
if len(data) < n:
raise StopIteration
return data
def convert_f0(f0, src, trg):
mu_s, std_s = np.fromfile(os.path.join('./etc', '{}.npf'.format(src)), np.float32)
mu_t, std_t = np.fromfile(os.path.join('./etc', '{}.npf'.format(trg)), np.float32)
lf0 = tf.where(f0 > 1., tf.log(f0), f0)
lf0 = tf.where(lf0 > 1., (lf0 - mu_s)/std_s * std_t + mu_t, lf0)
lf0 = tf.where(lf0 > 1., tf.exp(lf0), lf0)
return lf0
def test():
# ==== Test: batch mixer (conclusion: capacity should be larger to make sure good mixing) ====
x, y = read('./dataset/vcc2016/bin/*/*/1*001.bin', 32, min_after_dequeue=1024, capacity=2048)
sv = tf.train.Supervisor()
with sv.managed_session() as sess:
for _ in range(200):
x_, y_ = sess.run([x, y])
print(y_)
# ===== Read binary ====
features = read_whole_features('./dataset/vcc2016/bin/Training Set/SF1/*001.bin')
sv = tf.train.Supervisor()
with sv.managed_session() as sess:
features = sess.run(features)
y = pw2wav(features)
sf.write('test1.wav', y, 16000) # TODO fs should be specified externally.
# ==== Direct read =====
f = './dataset/vcc2016/bin/Training Set/SF1/100001.bin'
features = np.fromfile(f, np.float32)
features = np.reshape(features, [-1, 513*2 + 1 + 1 + 1]) # f0, en, spk
y = pw2wav(features)
sf.write('test2.wav', y, 16000)
def read_float64_as_float32(filename):
x = np.fromfile(filename, np.float64)
return x.astype(np.float32)
def main():
''' NOTE: The input is rescaled to [-1, 1] '''
dirs = validate_log_dirs(args)
tf.gfile.MakeDirs(dirs['logdir'])
with open(args.architecture) as f:
arch = json.load(f)
with open(os.path.join(dirs['logdir'], args.architecture), 'w') as f:
json.dump(arch, f, indent=4)
normalizer = Tanhize(
xmax=np.fromfile('./etc/xmax.npf'),
xmin=np.fromfile('./etc/xmin.npf'),
)
image, label = read(
file_pattern=arch['training']['datadir'],
batch_size=arch['training']['batch_size'],
capacity=2048,
min_after_dequeue=1024,
normalizer=normalizer,
)
machine = MODEL(arch)
loss = machine.loss(image, label)
trainer = TRAINER(loss, arch, args, dirs)
trainer.train(nIter=arch['training']['max_iter'], machine=machine)
def _read_bin(self, stream, byte_order):
'''
Read data from a binary stream.
'''
return _np.fromfile(stream, self.dtype(byte_order), 1)[0]