Python numpy 模块,load() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.load()。
def get_named_set(lang_codes, feature_set):
if feature_set == 'id':
return get_id_set(lang_codes)
if feature_set not in FEATURE_SETS:
print("ERROR: Invalid feature set " + feature_set, file=sys.stderr)
sys.exit()
filename, source, prefix = FEATURE_SETS[feature_set]
feature_database = np.load(filename)
lang_codes = [ get_language_code(l, feature_database) for l in lang_codes ]
lang_indices = [ get_language_index(l, feature_database) for l in lang_codes ]
feature_names = get_feature_names(prefix, feature_database)
feature_indices = [ get_feature_index(f, feature_database) for f in feature_names ]
source_index = get_source_index(source, feature_database)
feature_values = feature_database["data"][lang_indices,:,:][:,feature_indices,:][:,:,source_index]
feature_values = feature_values.squeeze(axis=2)
return feature_names, feature_values
def gen_pruned_features(name):
print name
feature_dir = 'data/feature_' + args.domain + \
'_' + str(args.n_boxes) + 'boxes/' + name + '/'
n_clips = len(glob.glob(feature_dir + BOX_FEATURE + '*.npy'))
for clip in xrange(1, n_clips+1):
pruned_boxes = np.load(feature_dir + BOX_FEATURE + '{:04d}.npy'.format(clip)) # (50, args.n_boxes, 4)
roisavg = np.load(feature_dir + 'roisavg{:04d}.npy'.format(clip)) # (50, args.n_boxes, 512)
pruned_roisavg = np.zeros((50, args.n_boxes, 512))
for frame in xrange(50):
for box_id in xrange(args.n_boxes):
if not np.array_equal(pruned_boxes[frame][box_id], np.zeros((4))):
pruned_roisavg[frame][box_id] = roisavg[frame][box_id]
np.save('{}pruned_roisavg{:04d}'.format(feature_dir, clip), pruned_roisavg)
def __init__(self):
if not self.code_table:
with open(CATEGORY_CODES) as codes:
self.code_table = {int(k): v for k, v in json.loads(codes.read()).items()}
caffe_models = os.path.expanduser(CAFFE_MODELS)
model = 'squeezenet', 'init_net.pb', 'predict_net.pb', 'ilsvrc_2012_mean.npy', 227
self.model = model
mean_file = os.path.join(caffe_models, model[0], model[3])
if not os.path.exists(mean_file):
self.mean = 128
else:
mean = np.load(mean_file).mean(1).mean(1)
self.mean = mean[:, np.newaxis, np.newaxis]
init_net = os.path.join(caffe_models, model[0], model[1])
predict_net = os.path.join(caffe_models, model[0], model[2])
with open(init_net) as f:
self.init_net = f.read()
with open(predict_net) as f:
self.predict_net = f.read()
def test_xyz2lab(self):
assert_array_almost_equal(xyz2lab(self.xyz_array),
self.lab_array, decimal=3)
# Test the conversion with the rest of the illuminants.
for I in ["d50", "d55", "d65", "d75"]:
for obs in ["2", "10"]:
fname = "lab_array_{0}_{1}.npy".format(I, obs)
lab_array_I_obs = np.load(
os.path.join(os.path.dirname(__file__), 'data', fname))
assert_array_almost_equal(lab_array_I_obs,
xyz2lab(self.xyz_array, I, obs),
decimal=2)
for I in ["a", "e"]:
fname = "lab_array_{0}_2.npy".format(I)
lab_array_I_obs = np.load(
os.path.join(os.path.dirname(__file__), 'data', fname))
assert_array_almost_equal(lab_array_I_obs,
xyz2lab(self.xyz_array, I, "2"),
decimal=2)
def test_xyz2luv(self):
assert_array_almost_equal(xyz2luv(self.xyz_array),
self.luv_array, decimal=3)
# Test the conversion with the rest of the illuminants.
for I in ["d50", "d55", "d65", "d75"]:
for obs in ["2", "10"]:
fname = "luv_array_{0}_{1}.npy".format(I, obs)
luv_array_I_obs = np.load(
os.path.join(os.path.dirname(__file__), 'data', fname))
assert_array_almost_equal(luv_array_I_obs,
xyz2luv(self.xyz_array, I, obs),
decimal=2)
for I in ["a", "e"]:
fname = "luv_array_{0}_2.npy".format(I)
luv_array_I_obs = np.load(
os.path.join(os.path.dirname(__file__), 'data', fname))
assert_array_almost_equal(luv_array_I_obs,
xyz2luv(self.xyz_array, I, "2"),
decimal=2)
def test_luv2xyz(self):
assert_array_almost_equal(luv2xyz(self.luv_array),
self.xyz_array, decimal=3)
# Test the conversion with the rest of the illuminants.
for I in ["d50", "d55", "d65", "d75"]:
for obs in ["2", "10"]:
fname = "luv_array_{0}_{1}.npy".format(I, obs)
luv_array_I_obs = np.load(
os.path.join(os.path.dirname(__file__), 'data', fname))
assert_array_almost_equal(luv2xyz(luv_array_I_obs, I, obs),
self.xyz_array, decimal=3)
for I in ["a", "e"]:
fname = "luv_array_{0}_2.npy".format(I, obs)
luv_array_I_obs = np.load(
os.path.join(os.path.dirname(__file__), 'data', fname))
assert_array_almost_equal(luv2xyz(luv_array_I_obs, I, "2"),
self.xyz_array, decimal=3)
def dataset_from_file(filename):
"""Load a dataset from file.
Args:
filename (string): the name of the file from which extract the dataset
Returns:
tuple: the dataset (np.ndarray) and the ngrams (list of strings)
"""
loader = np.load(filename)
num_entries = loader['num_entries'][0]
sp_dataset = sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']),
shape = loader['shape'])
dataset = sp_dataset.toarray()
samp_entries, num_features = dataset.shape
return dataset.reshape(int(samp_entries / num_entries), num_entries, num_features), loader['ngrams']
def _load_accumulators(self, main_loop):
"""Nasty method, use carefully"""
for cg_name, model in main_loop.models.iteritems():
source = numpy.load(self.path_to_accumulators.format(cg_name))
accums_dict = {name.replace("-", "/"): value
for name, value in source.items()}
source.close()
algo = main_loop.algorithm.algorithms[cg_name]
model_params = model.get_params()
steps = algo.steps.items()
for pidx in xrange(len(steps)):
# Get parameter name and its accumulators
p = steps[pidx][0]
name = [k for k, v in model_params.iteritems() if v == p][0]
accums = accums_dict[name]
# This is num_accums_per_param
col = len(accums)
for aidx in xrange(col):
algo.step_rule_updates[pidx*col+aidx][0].set_value(
accums[aidx])
def _load_accumulators(self, main_loop):
"""Load accumulators with some checks."""
for cg_name, model in main_loop.models.iteritems():
# Load accumulators
accum_filename = self.path_to_accumulators.format(cg_name)
if not os.path.isfile(accum_filename):
logger.error(" Accumulators file does not exist [{}]"
.format(accum_filename))
continue
source = numpy.load(accum_filename)
accums_to_load = {k: v for k, v in source.items()}
source.close()
algo = main_loop.algorithm.algorithms[cg_name]
accums = algo.step_rule_updates
# Set accumulators
for acc in accums:
try:
acc.set_value(accums_to_load[acc.name])
except:
logger.error(" Could not load {}".format(acc.name))
def load_params(self, saveto):
try:
logger.info(" ...loading model parameters")
params_all = numpy.load(saveto)
params_this = self.get_params()
missing = set(params_this) - set(params_all)
for pname in params_this.keys():
if pname in params_all:
val = params_all[pname]
self._set_param_value(params_this[pname], val, pname)
elif self.num_decs > 1 and self.decoder.share_att and \
pname in self.decoder.shared_params_map:
val = params_all[self.decoder.shared_params_map[pname]]
self._set_param_value(params_this[pname], val, pname)
else:
logger.warning(
" Parameter does not exist: {}".format(pname))
logger.info(
" Number of params loaded: {}"
.format(len(params_this) - len(missing)))
except Exception as e:
logger.error(" Error {0}".format(str(e)))
def load_data():
"""Draw the Mott lobes."""
res = np.load(r'data_%d.npy' % GRID_SIZE)
x = res[:, 0]
y = res[:, 1]
z = []
for i, entry in enumerate(res):
z.append(kinetic_energy(entry[2:], -1.))
plt.pcolor(
np.reshape(x, (GRID_SIZE, GRID_SIZE)),
np.reshape(y, (GRID_SIZE, GRID_SIZE)),
np.reshape(z, (GRID_SIZE, GRID_SIZE))
)
plt.xlabel('$dt/U$')
plt.ylabel('$\mu/U$')
plt.show()
def _get_batch_normalization_weights(self,layer_name):
beta = '%s/batch_normalization/beta:0'%(layer_name)
gamma = '%s/batch_normalization/gamma:0'%(layer_name)
mean = '%s/batch_normalization/moving_mean:0'%(layer_name)
variance = '%s/batch_normalization/moving_variance:0'%(layer_name)
if self.weights is None or beta not in self.weights:
print('{:>23} {:>23}'.format(beta, 'using default initializer'))
return None, None, None, None
else:
betax = self.weights[beta]
gammax = self.weights[gamma]
meanx = self.weights[mean]
variancex = self.weights[variance]
self.loaded_weights[beta]=1
self.loaded_weights[gamma]=1
self.loaded_weights[mean]=1
self.loaded_weights[variance]=1
#print('{:>23} {:>23}'.format(beta, 'load from %s'%self.flags.load_path))
return betax,gammax,meanx,variancex
def post_sub_one(inx):
w,h = 1918,1280
path,out,threshold = inx
data = np.load(path).item()
imgs,pred = data['name'], data['pred']
#print(pred.shape)
fo = open(out,'w')
#masks = pred>threshold
for name,mask in zip(imgs,np.squeeze(pred)):
mask = imresize(mask,[h,w])
mask = mask>threshold
code = rle_encode(mask)
code = [str(i) for i in code]
code = " ".join(code)
fo.write("%s,%s\n"%(name,code))
fo.close()
return 0
def show_one_img_mask(data):
w,h = 1918,1280
a = randint(0,31)
path = "../input/test"
data = np.load(data).item()
name,masks = data['name'][a],data['pred']
img = Image.open("%s/%s"%(path,name))
#img.show()
plt.imshow(img)
plt.show()
mask = np.squeeze(masks[a])
mask = imresize(mask,[h,w]).astype(np.float32)
print(mask.shape,mask[0])
img = Image.fromarray(mask*256)#.resize([w,h])
plt.imshow(img)
plt.show()
def split(flags):
if os.path.exists(flags.split_path):
return np.load(flags.split_path).item()
folds = flags.folds
path = flags.input_path
random.seed(6)
img_list = ["%s/%s"%(path,img) for img in os.listdir(path)]
random.shuffle(img_list)
dic = {}
n = len(img_list)
num = (n+folds-1)//folds
for i in range(folds):
s,e = i*num,min(i*num+num,n)
dic[i] = img_list[s:e]
np.save(flags.split_path,dic)
return dic
def make_benchmark_figure():
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(1, 1, 1, xscale='linear', yscale='log')
d1 = np.load('./data/random_data_benchmark.npy')
d2 = np.load('./data/real_data_benchmark.npy')
d3 = np.load('./data/real_data_orange3_benchmark.npy')
ax.scatter(d1[:24, 0], d1[:24, 2], c='r', edgecolor='none', label='Random Data (Polo)')
ax.scatter(d2[:24, 0], d2[:24, 2], c='green', edgecolor='none', label='Gene expression data (Polo)')
ax.scatter(d3[:24, 0], d3[:24, 2], c='blue', edgecolor='none', label='Gene expression data (Orange3)')
ax.legend(loc=2)
ax.grid('on')
ax.set_xlabel('log2(Number of leaves)')
ax.set_ylabel('Run time, seconds')
fig.tight_layout()
fig.savefig('data/bench.png', dpi=75)
def read_data():
with open(PICKLE_FILENAME, 'rb') as f:
save = pickle.load(f)
train_dataset = save['train_dataset']
train_labels = save['train_labels']
valid_dataset = save['valid_dataset']
valid_labels = save['valid_labels']
test_dataset = save['test_dataset']
test_labels = save['test_labels']
del save
print('Training set', train_dataset.shape, train_labels.shape)
print('Valid set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
return [train_dataset, valid_dataset,
test_dataset], [train_labels, valid_labels, test_labels]
def lrelu(x, leak=0.2, name="lrelu"):
"""Leaky rectifier.
"""
with tf.variable_scope(name):
f1 = 0.5 * (1 + leak)
f2 = 0.5 * (1 - leak)
return f1 * x + f2 * abs(x)
# load CIFAR-10
# trainx, trainy = cifar10_data.load(args.data_dir, subset='train')
# trainx = trainx.transpose(0, 2, 3, 1)
# trainx_unl = trainx.copy()
# trainx_unl2 = trainx.copy()
# testx, testy = cifar10_data.load(args.data_dir, subset='test')
# testx = testx.transpose(0, 2, 3, 1)
# nr_batches_train = int(trainx.shape[0]/args.batch_size)
# nr_batches_test = int(testx.shape[0]/args.batch_size)
# load MNIST data
def open(filename, frame='unspecified'):
"""Create a Point from data saved in a file.
Parameters
----------
filename : :obj:`str`
The file to load data from.
frame : :obj:`str`
The frame to apply to the created point.
Returns
-------
:obj:`Point`
A point created from the data in the file.
"""
data = BagOfPoints.load_data(filename)
return Point(data, frame)
def open(filename, frame='unspecified'):
"""Create a Direction from data saved in a file.
Parameters
----------
filename : :obj:`str`
The file to load data from.
frame : :obj:`str`
The frame to apply to the created Direction.
Returns
-------
:obj:`Direction`
A Direction created from the data in the file.
"""
data = BagOfPoints.load_data(filename)
return Direction(data, frame)
def open(filename, frame='unspecified'):
"""Create a PointCloud from data saved in a file.
Parameters
----------
filename : :obj:`str`
The file to load data from.
frame : :obj:`str`
The frame to apply to the created PointCloud.
Returns
-------
:obj:`PointCloud`
A PointCloud created from the data in the file.
"""
data = BagOfPoints.load_data(filename)
return PointCloud(data, frame)
def open(filename, frame='unspecified'):
"""Create a NormalCloud from data saved in a file.
Parameters
----------
filename : :obj:`str`
The file to load data from.
frame : :obj:`str`
The frame to apply to the created NormalCloud.
Returns
-------
:obj:`NormalCloud`
A NormalCloud created from the data in the file.
"""
data = BagOfPoints.load_data(filename)
return NormalCloud(data, frame)
def open(filename, frame='unspecified'):
"""Create a RgbCloud from data saved in a file.
Parameters
----------
filename : :obj:`str`
The file to load data from.
frame : :obj:`str`
The frame to apply to the created RgbCloud.
Returns
-------
:obj:`RgbCloud`
A RgdCloud created from the data in the file.
"""
data = BagOfPoints.load_data(filename)
return RgbCloud(data, frame)
def __init__(self,
audio_file: Path,
id: Optional[str] = None,
sample_rate_to_convert_to: int = 16000,
label: Optional[str] = "nolabel",
fourier_window_length: int = 512,
hop_length: int = 128,
mel_frequency_count: int = 128,
label_with_tags: str = None,
positional_label: Optional[PositionalLabel] = None):
# The default values for hop_length and fourier_window_length are powers of 2 near the values specified in the wave2letter paper.
if id is None:
id = name_without_extension(audio_file)
self.audio_file = audio_file
super().__init__(
id=id, get_raw_audio=lambda: librosa.load(str(self.audio_file), sr=self.sample_rate)[0],
label=label, sample_rate=sample_rate_to_convert_to,
fourier_window_length=fourier_window_length, hop_length=hop_length, mel_frequency_count=mel_frequency_count,
label_with_tags=label_with_tags, positional_label=positional_label)
def load_word2vec_matrix(vec_file, word_index, config):
if os.path.isfile(DirConfig.W2V_CACHE):
print('---- Load word vectors from cache.')
embedding_matrix = np.load(open(DirConfig.W2V_CACHE, 'rb'))
return embedding_matrix
print('---- loading word2vec ...')
word2vec = KeyedVectors.load_word2vec_format(
vec_file, binary=True)
print('Found %s word vectors of word2vec' % len(word2vec.vocab))
nb_words = min(config.MAX_NB_WORDS, len(word_index)) + 1
embedding_matrix = np.zeros((nb_words, config.WORD_EMBEDDING_DIM))
for word, i in word_index.items():
if word in word2vec.vocab:
embedding_matrix[i] = word2vec.word_vec(word)
print('Null word embeddings: %d' % \
np.sum(np.sum(embedding_matrix, axis=1) == 0))
# check the words which not in embedding vectors
not_found_words = []
for word, i in word_index.items():
if word not in word2vec.vocab:
not_found_words.append(word)
np.save(open(DirConfig.W2V_CACHE, 'wb'), embedding_matrix)
return embedding_matrix
def get_sample_item_file(wav_file_names_sample, item_file, output):
"""
From a sampled dataset, get an item file for running an ABX task
Parameters
----------
item file : text file containing at least as columns : #filename, onset, offset,
#phoneme and context and side information such as image ID
item_file : string,
path to the item file of the whole dataset
output: string,
path where the sample item file will be stored
"""
wav_names=[]
temp=np.load(wav_file_names_sample)
for s in temp:
wav_names.append(s.split(".")[0])
df=pd.read_csv(item_file, sep="\t", index_col="#filename")
df_sample=df.loc[wav_names]
df_sample.to_csv(output, sep="\t", header=True, index=False)
return(df_sample)
def __init__(self, batchsize=64, max_length=15, mode='train'):
self.batchsize = batchsize
self.d_vocabulary = None
self.batch_index = None
self.batch_len = None
self.rev_adict = None
self.max_length = max_length
self.mode = mode
self.qdic, self.adic = VQADataProvider.load_data(mode)
with open('./result/vdict.json','r') as f:
self.vdict = json.load(f)
with open('./result/adict.json','r') as f:
self.adict = json.load(f)
self.n_ans_vocabulary = len(self.adict)
self.nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
self.glove_dict = {} # word -> glove vector
def load_vqa_json(data_split):
"""
Parses the question and answer json files for the given data split.
Returns the question dictionary and the answer dictionary.
"""
qdic, adic = {}, {}
with open(config.DATA_PATHS[data_split]['ques_file'], 'r') as f:
qdata = json.load(f)['questions']
for q in qdata:
qdic[data_split + QID_KEY_SEPARATOR + str(q['question_id'])] = \
{'qstr': q['question'], 'iid': q['image_id']}
if 'test' not in data_split:
with open(config.DATA_PATHS[data_split]['ans_file'], 'r') as f:
adata = json.load(f)['annotations']
for a in adata:
adic[data_split + QID_KEY_SEPARATOR + str(a['question_id'])] = \
a['answers']
print 'parsed', len(qdic), 'questions for', data_split
return qdic, adic
def load_genome_json():
"""
Parses the genome json file. Returns the question dictionary and the
answer dictionary.
"""
qdic, adic = {}, {}
with open(config.DATA_PATHS['genome']['genome_file'], 'r') as f:
qdata = json.load(f)
for q in qdata:
key = 'genome' + QID_KEY_SEPARATOR + str(q['id'])
qdic[key] = {'qstr': q['question'], 'iid': q['image']}
adic[key] = [{'answer': q['answer']}]
print 'parsed', len(qdic), 'questions for genome'
return qdic, adic
def __init__(self, batchsize=64, max_length=15, mode='train'):
self.batchsize = batchsize
self.d_vocabulary = None
self.batch_index = None
self.batch_len = None
self.rev_adict = None
self.max_length = max_length
self.mode = mode
self.qdic, self.adic = VQADataProvider.load_data(mode)
with open('./result/vdict.json','r') as f:
self.vdict = json.load(f)
with open('./result/adict.json','r') as f:
self.adict = json.load(f)
self.n_ans_vocabulary = len(self.adict)
def load_genome_json():
"""
Parses the genome json file. Returns the question dictionary and the
answer dictionary.
"""
qdic, adic = {}, {}
with open(config.DATA_PATHS['genome']['genome_file'], 'r') as f:
qdata = json.load(f)
for q in qdata:
key = 'genome' + QID_KEY_SEPARATOR + str(q['id'])
qdic[key] = {'qstr': q['question'], 'iid': q['image']}
adic[key] = [{'answer': q['answer']}]
print 'parsed', len(qdic), 'questions for genome'
return qdic, adic
def __init__(self, batchsize=64, max_length=15, mode='train'):
self.batchsize = batchsize
self.d_vocabulary = None
self.batch_index = None
self.batch_len = None
self.rev_adict = None
self.max_length = max_length
self.mode = mode
self.qdic, self.adic = VQADataProvider.load_data(mode)
with open('./result/vdict.json','r') as f:
self.vdict = json.load(f)
with open('./result/adict.json','r') as f:
self.adict = json.load(f)
self.n_ans_vocabulary = len(self.adict)
self.nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
self.glove_dict = {} # word -> glove vector
def load_vqa_json(data_split):
"""
Parses the question and answer json files for the given data split.
Returns the question dictionary and the answer dictionary.
"""
qdic, adic = {}, {}
with open(config.DATA_PATHS[data_split]['ques_file'], 'r') as f:
qdata = json.load(f)['questions']
for q in qdata:
qdic[data_split + QID_KEY_SEPARATOR + str(q['question_id'])] = \
{'qstr': q['question'], 'iid': q['image_id']}
if 'test' not in data_split:
with open(config.DATA_PATHS[data_split]['ans_file'], 'r') as f:
adata = json.load(f)['annotations']
for a in adata:
adic[data_split + QID_KEY_SEPARATOR + str(a['question_id'])] = \
a['answers']
print 'parsed', len(qdic), 'questions for', data_split
return qdic, adic
def load_genome_json():
"""
Parses the genome json file. Returns the question dictionary and the
answer dictionary.
"""
qdic, adic = {}, {}
with open(config.DATA_PATHS['genome']['genome_file'], 'r') as f:
qdata = json.load(f)
for q in qdata:
key = 'genome' + QID_KEY_SEPARATOR + str(q['id'])
qdic[key] = {'qstr': q['question'], 'iid': q['image']}
adic[key] = [{'answer': q['answer']}]
print 'parsed', len(qdic), 'questions for genome'
return qdic, adic
def test_individual_stability_matrix():
"""
Tests individual_stability_matrix method on three gaussian blobs.
"""
import utils
import numpy as np
import scipy as sp
desired = np.load(home + '/git_repo/PyBASC/tests/ism_test.npy')
blobs = generate_blobs()
ism = utils.individual_stability_matrix(blobs, 20, 3)
#how to use test here?
# np.corrcoef(ism.flatten(),desired.flatten())
# np.testing.assert_equal(ism,desired)
#
# corr=np.array(sp.spatial.distance.cdist(ism, desired, metric = 'correlation'))
#
assert False
def test_ndarray_to_vol():
import basc
import nibabel as nb
subject_file = home + '/git_repo/PyBASC/sample_data/sub1/Func_Quarter_Res.nii.gz'
subject_file = home + '/git_repo/PyBASC/sample_data/test.nii.gz'
data = nb.load(subject_file).get_data().astype('float32')
roi_mask_file= home + '/git_repo/PyBASC/masks/LC_Quarter_Res.nii.gz'
print( 'Data Loaded')
roi_mask_file_nb = nb.load(roi_mask_file)
roi_mask_nparray = nb.load(roi_mask_file).get_data().astype('float32').astype('bool')
roi1data = data[roi_mask_nparray]
data_array=roi1data
sample_file=subject_file
filename=home + '/git_repo/PyBASC/sample_data/ndarray_to_vol_test.nii.gz'
basc.ndarray_to_vol(data_array, roi_mask_file, roi_mask_file, filename)
def get_dataset(dataset_path='Data/Train_Data'):
# Getting all data from data path:
try:
X = np.load('Data/npy_train_data/X.npy')
Y = np.load('Data/npy_train_data/Y.npy')
except:
inputs_path = dataset_path+'/input'
images = listdir(inputs_path) # Geting images
X = []
Y = []
for img in images:
img_path = inputs_path+'/'+img
x_img = get_img(img_path).astype('float32').reshape(64, 64, 3)
x_img /= 255.
y_img = get_img(img_path.replace('input/', 'mask/mask_')).astype('float32').reshape(64, 64, 1)
y_img /= 255.
X.append(x_img)
Y.append(y_img)
X = np.array(X)
Y = np.array(Y)
# Create dateset:
if not os.path.exists('Data/npy_train_data/'):
os.makedirs('Data/npy_train_data/')
np.save('Data/npy_train_data/X.npy', X)
np.save('Data/npy_train_data/Y.npy', Y)
X, X_test, Y, Y_test = train_test_split(X, Y, test_size=0.1, random_state=42)
return X, X_test, Y, Y_test
def __init__(self,
saved_model=None,
train_folder=None,
feature=_feature.__func__):
"""
:param saved_model: optional saved train set and labels as .npz
:param train_folder: optional custom train data to process
:param feature: feature function - compatible with saved_model
"""
self.feature = feature
if train_folder is not None:
self.train_set, self.train_labels, self.model = \
self.create_model(train_folder)
else:
if cv2.__version__[0] == '2':
self.model = cv2.KNearest()
else:
self.model = cv2.ml.KNearest_create()
if saved_model is None:
saved_model = TRAIN_DATA+'raw_pixel_data.npz'
with np.load(saved_model) as data:
self.train_set = data['train_set']
self.train_labels = data['train_labels']
if cv2.__version__[0] == '2':
self.model.train(self.train_set, self.train_labels)
else:
self.model.train(self.train_set, cv2.ml.ROW_SAMPLE,
self.train_labels)
def load(self, model_filename):
self.__model = load_model("%s.model" % model_filename)
self.__chars = np.load("%s.cvocab.npy" % model_filename).tolist()
self.__trigrams = np.load("%s.tvocab.npy" % model_filename).tolist()
self.__classes = np.load("%s.classes.npy" % model_filename).tolist()
self.__char_indexes = dict((c, i) for i, c in enumerate(self.__chars))
self.__indexes_char = dict((i, c) for i, c in enumerate(self.__chars))
self.__trigrams_indexes = dict((t, i) for i, t in enumerate(self.__trigrams))
self.__indices_trigrams = dict((i, t) for i, t in enumerate(self.__trigrams))
self.__classes_indexes = dict((c, i) for i, c in enumerate(self.__classes))
self.__indexes_classes = dict((i, c) for i, c in enumerate(self.__classes))
def get_id_set(lang_codes):
feature_database = np.load("family_features.npz")
lang_codes = [ get_language_code(l, feature_database) for l in lang_codes ]
all_languages = list(feature_database["langs"])
feature_names = [ "ID_" + l.upper() for l in all_languages ]
values = np.zeros((len(lang_codes), len(feature_names)))
for i, lang_code in enumerate(lang_codes):
feature_index = get_language_index(lang_code, feature_database)
values[i, feature_index] = 1.0
return feature_names, values
def unpickle(file):
import pickle
fo = open(file, 'rb')
dict = pickle.load(fo, encoding='latin1')
fo.close()
return dict
def load_pkl(path):
with open(path) as f:
obj = cPickle.load(f)
print(" [*] load %s" % path)
return obj
def load_npy(path):
obj = np.load(path)
print(" [*] load %s" % path)
return obj
def load(self, local_dir_=None):
'''
load dataset from local disk
Args:
local_dir_: string or None
if None, will use default Dataset.DEFAULT_DIR
'''
def load(self, local_dir_=None):
if local_dir_ is None:
local_dir = self.DEFAULT_DIR
else:
local_dir = Path(local_dir_)
data_di = np.load(str(local_dir/'cifar10.npz'))
self.datum[:] = data_di['images']
self.labels[:] = data_di['labels']
def install(
self, local_dst_dir_=None, local_src_dir_=None, clean_install_=False):
'''
Install the dataset into directly usable format,
requires downloading for public dataset.
Args:
local_dst_dir_: string or None
where to install the dataset, None -> "%(default_dir)s"
local_src_dir_: string or None
where to find the raw downloaded files, None -> "%(default_dir)s"
'''
local_dst_dir = self.DEFAULT_DIR if local_dst_dir_ is None else Path(local_dst_dir_)
local_src_dir = self.DEFAULT_DIR if local_src_dir_ is None else Path(local_src_dir_)
local_dst_dir.mkdir(parents=True, exist_ok=True)
assert local_src_dir.exists()
images = np.empty((60000,3,32,32), dtype=np.uint8)
labels = np.empty((60000,), dtype=np.uint8)
tarfile_name = str(local_src_dir / 'cifar-10-python.tar.gz')
with tarfile.open(tarfile_name, 'r:gz') as tf:
for i in range(5):
with tf.extractfile('cifar-10-batches-py/data_batch_%d'%(i+1)) as f:
data_di = pickle.load(f, encoding='bytes')
images[(10000*i):(10000*(i+1))] = data_di[b'data'].reshape((10000,3,32,32))
labels[(10000*i):(10000*(i+1))] = np.asarray(data_di[b'labels'], dtype=np.uint8)
with tf.extractfile('cifar-10-batches-py/test_batch') as f:
data_di = pickle.load(f, encoding='bytes')
images[50000:60000] = data_di[b'data'].reshape((10000,3,32,32))
labels[50000:60000] = data_di[b'labels']
np.savez_compressed(str(local_dst_dir / 'cifar10.npz'), images=images, labels=labels)
if clean_install_:
os.remove(tarfile_name)
def load(self, local_dir_=None):
if local_dir_ is None:
local_dir = self.DEFAULT_DIR
else:
local_dir = Path(local_dir_)
data = np.load(str(local_dir / 'mnist.npz'))
self.labels = data['labels']
self.datum = data['images']
self.label_map = np.arange(10)
self.imsize = (1,28,28)
def load(self, local_dir_=None):
# TODO
raise NotImplementedError()
def load_aggregate_masks_scans (masks_mnames, grids, upgrid_multis):
scans = []
masks = []
igrid = 0
for masks_names in masks_mnames:
if (len(masks_names) > 0):
grid = grids[igrid]
upgrid_multi = upgrid_multis[igrid]
upgcount = upgrid_multi * upgrid_multi
scans1 = []
masks1 = []
for masks_name in masks_names:
print ("Loading: ", masks_name)
masks0 = np.load(''.join((masks_name, ".npz")))['arr_0']
scans0 = np.load(''.join((masks_name.replace("masks_", "scans_", 1), ".npz")))['arr_0']
masks1.append(masks0)
scans1.append(scans0)
scans1 = np.vstack(scans1)
masks1 = np.vstack(masks1)
if len(masks) > 0:
scans1 = np.vstack([scans1, scans])
masks1 = np.vstack([masks1, masks])
lm = len(masks1) // upgcount * upgcount
scans1 = scans1[0:lm] # cut to multiples of upgcount
masks1 = masks1[0:lm]
index_shuf = np.arange(lm)
np.random.shuffle(index_shuf)
scans1 = scans1[index_shuf]
masks1 = masks1[index_shuf]
scans = data_from_grid_by_proximity(scans1, upgrid_multi, upgrid_multi, grid=grid)
masks = data_from_grid_by_proximity(masks1, upgrid_multi, upgrid_multi, grid=grid)
igrid += 1
return masks, scans
def load_aggregate_masks_scans (masks_mnames, grids, upgrid_multis):
scans = []
masks = []
igrid = 0
for masks_names in masks_mnames:
if (len(masks_names) > 0):
grid = grids[igrid]
upgrid_multi = upgrid_multis[igrid]
upgcount = upgrid_multi * upgrid_multi
scans1 = []
masks1 = []
for masks_name in masks_names:
print ("Loading: ", masks_name)
masks0 = np.load(''.join((masks_name, ".npz")))['arr_0']
scans0 = np.load(''.join((masks_name.replace("masks_", "scans_", 1), ".npz")))['arr_0']
masks1.append(masks0)
scans1.append(scans0)
scans1 = np.vstack(scans1)
masks1 = np.vstack(masks1)
if len(masks) > 0:
scans1 = np.vstack([scans1, scans])
masks1 = np.vstack([masks1, masks])
lm = len(masks1) // upgcount * upgcount
scans1 = scans1[0:lm] # cut to multiples of upgcount
masks1 = masks1[0:lm]
index_shuf = np.arange(lm)
np.random.shuffle(index_shuf)
scans1 = scans1[index_shuf]
masks1 = masks1[index_shuf]
scans = data_from_grid_by_proximity(scans1, upgrid_multi, upgrid_multi, grid=grid)
masks = data_from_grid_by_proximity(masks1, upgrid_multi, upgrid_multi, grid=grid)
igrid += 1
return masks, scans