def maybe_pickle(data_dirs, force=False): dataset_names = [] for dir in data_dirs: set_filename = dir + '.pickle' dataset_names.append(set_filename) if os.path.exists(set_filename) and not force: # You may overwrite by setting force=True print('%s already present - Skipping pickling. ' % set_filename) else: print('Pickling %s.' % set_filename) dataset = load_logo(dir) try: with open(set_filename, 'wb') as f: pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL) except Exception as e: print('Unable to save data to', set_filename, ':', e) return dataset_names
def save_pickle(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels): try: f = open(PICKLE_FILENAME, 'wb') save = { 'train_dataset': train_dataset, 'train_labels': train_labels, 'valid_dataset': valid_dataset, 'valid_labels': valid_labels, 'test_dataset': test_dataset, 'test_labels': test_labels, } pickle.dump(save, f, pickle.HIGHEST_PROTOCOL) f.close() except Exception as e: print('Unable to save data to', PICKLE_FILENAME, ':', e) raise
def save_params(self, f_, format_=Default, filter_=None): ''' This saves all parameters in current group, using pickle protocol An dict containing string->shared_variables will be dumped to f_. Args: f_: writable file or filename string format_: string, file format. Default is to interpret from file name supported format: "pkl" filter_: string or None, regex pattern to filter ''' if isinstance(f_, str): f_ = open(f_, 'wb') if filter_ is None: pickle.dump(self._current_group_di, f_) else: pat = re.compile(filter_) pickle.dump({k:v for k,v in self._current_group_di.items() if pat.fullmatch(k)}, f_)
def save_snapshot(self, filename=None): """ Save a snapshot of current process to file Warning: this is not thread safe, do not use with multithread program Args: - filename: target file to save snapshot Returns: - Bool """ if not filename: filename = self.get_config_filename("snapshot") snapshot = self.take_snapshot() if not snapshot: return False # dump to file fd = open(filename, "wb") pickle.dump(snapshot, fd, pickle.HIGHEST_PROTOCOL) fd.close() return True
def dumpmem(self, start, end): """ Dump process memory from start to end Args: - start: start address (Int) - end: end address (Int) Returns: - memory content (raw bytes) """ mem = None logfd = tmpfile(is_binary_file=True) logname = logfd.name out = self.execute_redirect("dump memory %s 0x%x 0x%x" % (logname, start, end)) if out is None: return None else: logfd.flush() mem = logfd.read() logfd.close() return mem
def readmem(self, address, size): """ Read content of memory at an address Args: - address: start address to read (Int) - size: bytes to read (Int) Returns: - memory content (raw bytes) """ # try fast dumpmem if it works mem = self.dumpmem(address, address+size) if mem is not None: return mem # failed to dump, use slow x/gx way mem = "" out = self.execute_redirect("x/%dbx 0x%x" % (size, address)) if out: for line in out.splitlines(): bytes = line.split(":\t")[-1].split() mem += "".join([chr(int(c, 0)) for c in bytes]) return mem
def main(): glove_dict = LoadGlove() imdb_dict = LoadImdb() out_path = '../../data/imdb.glove.emb.pkl' emb = np.zeros([len(imdb_dict) + 2, 300], dtype=np.float32) for i in range(len(imdb_dict) + 2): if i in imdb_dict: # not 0 or 1 word = imdb_dict[i] if word in glove_dict: # If in glove dict, use the embedding emb[i, :] = glove_dict[word] pickle.dump(emb, open(out_path, 'wb'))
def main(): embedding_size = 300 path = '../../data/imdb.dict.pkl' dictionary = pickle.load(open(path)) out_path = '../../data/imdb.emb.pkl' path = '../../data/output.txt' # Account for missing index 0 and 1. emb = np.zeros([len(dictionary) + 2, embedding_size], dtype=np.float32) with open(path, 'r') as f: while True: word = f.readline() if not word: break word = word.rstrip() embeddings = f.readline().rstrip().split() embeddings = [float(e) for e in embeddings] if word in dictionary: emb[int(dictionary[word]), :] = np.array(embeddings, dtype=np.float32) pickle.dump(emb, open(out_path, 'wb'))
def maybe_pickle(data_folders, min_num_images_per_class, force=False): dataset_names = [] folders_list = os.listdir(data_folders) for folder in folders_list: #print(os.path.join(data_folders, folder)) curr_folder_path = os.path.join(data_folders, folder) if os.path.isdir(curr_folder_path): set_filename = curr_folder_path + '.pickle' dataset_names.append(set_filename) if os.path.exists(set_filename) and not force: # You may override by setting force=True. print('%s already present - Skipping pickling.' % set_filename) else: print('Pickling %s.' % set_filename) dataset = load_letter(curr_folder_path, min_num_images_per_class) # load and normalize the data try: with open(set_filename, 'wb') as f: pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL) f.close() except Exception as e: print('Unable to save data to', set_filename, ':', e) return dataset_names
def read_dataset(data_dir): pickle_filename = "PascalVoc.pickle" pickle_filepath = os.path.join(data_dir, pickle_filename) if not os.path.exists(pickle_filepath): utils.maybe_download_and_extract(data_dir, DATA_URL, is_tarfile=True) PascalVoc_folder = "VOCdevkit" result = create_image_lists(os.path.join(data_dir, PascalVoc_folder)) print ("Pickling ...") with open(pickle_filepath, 'wb') as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) else: print ("Found pickle file!") with open(pickle_filepath, 'rb') as f: result = pickle.load(f) training_records = result['training'] validation_records = result['validation'] del result return training_records, validation_records
def read_dataset(data_dir): pickle_filename = "MITSceneParsing.pickle" pickle_filepath = os.path.join(data_dir, pickle_filename) if not os.path.exists(pickle_filepath): utils.maybe_download_and_extract(data_dir, DATA_URL, is_zipfile=True) SceneParsing_folder = os.path.splitext(DATA_URL.split("/")[-1])[0] result = create_image_lists(os.path.join(data_dir, SceneParsing_folder)) print ("Pickling ...") with open(pickle_filepath, 'wb') as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) else: print ("Found pickle file!") with open(pickle_filepath, 'rb') as f: result = pickle.load(f) training_records = result['training'] validation_records = result['validation'] del result return training_records, validation_records
def preprocess(self, input_file, vocab_file, tensor_file): f = open(input_file, "r") data = f.read() f.close() #data = data.lower() #data = re.sub("[^a-z, ']+"," ",data) # replace unknown sumbols with space counter = collections.Counter(data) count_pairs = sorted(counter.items(), key=lambda x: -x[1]) self.chars, _ = zip(*count_pairs) self.vocab_size = len(self.chars) self.vocab = dict(zip(self.chars, range(len(self.chars)))) print(self.vocab) with open(vocab_file, 'wb') as f: cPickle.dump(self.chars, f) #print(map(self.vocab.get,data)) self.tensor = np.array(list(map(self.vocab.get, data))) np.save(tensor_file, self.tensor)
def read_dataset(data_dir): pickle_filename = "celebA.pickle" pickle_filepath = os.path.join(data_dir, pickle_filename) if not os.path.exists(pickle_filepath): utils.maybe_download_and_extract(data_dir, DATA_URL, is_zipfile=True) celebA_folder = os.path.splitext(DATA_URL.split("/")[-1])[0] result = create_image_lists(os.path.join(data_dir, celebA_folder)) print ("Training set: %d" % len(result['train'])) print ("Test set: %d" % len(result['test'])) print ("Validation set: %d" % len(result['validation'])) print ("Pickling ...") with open(pickle_filepath, 'wb') as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) else: print ("Found pickle file!") with open(pickle_filepath, 'rb') as f: result = pickle.load(f) training_images = result['train'] testing_images = result['test'] validation_images = result['validation'] del result return training_images, testing_images, validation_images
def load(fname): """Load an embedding dump generated by `save`""" content = _open(fname).read() if PY2: state = pickle.loads(content) else: state = pickle.loads(content, encoding='latin1') voc, vec = state if len(voc) == 2: words, counts = voc word_count = dict(zip(words, counts)) vocab = CountedVocabulary(word_count=word_count) else: vocab = OrderedVocabulary(voc) return Embedding(vocabulary=vocab, vectors=vec)
def update_default_setting(self, key_tree, value): """ Update a default value in the local settings file. :param key_tree: A tuple containing a tree of dictionary keys. :param value: The value for the setting. """ # Open the defaults. with open(self._default_settings_path, "rb") as fp: defaults = yaml.load(fp) branch = defaults for key in key_tree[:-1]: branch.setdefault(key, {}) branch = branch[key] branch[key_tree[-1]] = value with open(self._default_settings_path, "w") as fp: fp.write(yaml.dump(defaults)) return True
def maybe_pickle(self, data_folders, min_num_images_per_class, force=False): dataset_names = [] for folder in data_folders: set_filename = folder + '.pickle' dataset_names.append(set_filename) if os.path.exists(set_filename) and not force: # You may override by setting force=True. print('%s already present - Skipping pickling.' % set_filename) else: print('Pickling %s.' % set_filename) dataset = self.load_letter(folder, min_num_images_per_class, self.image_size, self.pixel_depth) try: with open(set_filename, 'wb') as f: pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL) except Exception as e: print('Unable to save data to', set_filename, ':', e) return dataset_names
def empty_network(network): logger.debug("Storing pypsa timeseries to disk") from .components import all_components panels = {} for c in all_components: attr = network.components[c]["list_name"] + "_t" panels[attr] = getattr(network, attr) setattr(network, attr, None) fd, fn = tempfile.mkstemp() with os.fdopen(fd, 'wb') as f: pickle.dump(panels, f, -1) del panels gc.collect() yield logger.debug("Reloading pypsa timeseries from disk") with open(fn, 'rb') as f: panels = pickle.load(f) os.remove(fn) for attr, pnl in iteritems(panels): setattr(network, attr, pnl)
def save_train_and_test_set(dataset, labels, ratio, pickle_file): split = int(len(dataset) * ratio) train_dataset = dataset[:split] train_labels = labels[:split] test_dataset = dataset[split:] test_labels = labels[split:] try: f = open(pickle_file, 'wb') save = { 'train_dataset': train_dataset, 'train_labels': train_labels, 'test_dataset': test_dataset, 'test_labels': test_labels, } pickle.dump(save, f, pickle.HIGHEST_PROTOCOL) f.close() except Exception as e: print('Unable to save data to', pickle_file, ':', e) raise statinfo = os.stat(pickle_file) print('Compressed pickle size:', statinfo.st_size) # Main
def maybe_pickle(data_folders, min_num_images_per_class, force=False): dataset_names = [] for folder in data_folders: set_filename = folder + '.pickle' dataset_names.append(set_filename) if os.path.exists(set_filename) and not force: # You may override by setting force=True. print('%s already present - Skipping pickling.' % set_filename) else: print('Pickling %s.' % set_filename) dataset = load_letter(folder, min_num_images_per_class) try: with open(set_filename, 'wb') as f: pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL) except Exception as e: print('Unable to save data to', set_filename, ':', e) return dataset_names
def preprocess(self, input_file, vocab_file, tensor_file): with codecs.open(input_file, 'r', 'utf-8') as f: lines = f.readlines() if lines[0][:1] == codecs.BOM_UTF8: lines[0] = lines[0][1:] lines = [line.strip().split() for line in lines] self.vocab, self.words = self.build_vocab(lines) self.vocab_size = len(self.words) #print 'word num: ', self.vocab_size with open(vocab_file, 'wb') as f: cPickle.dump(self.words, f) raw_data = [[0] * self.seq_length + [self.vocab.get(w, 1) for w in line] + [2] * self.seq_length for line in lines] self.raw_data = raw_data #??????? # np.save(tensor_file, self.raw_data)
def save(self, key, data): try: with open(self.cache_path, 'wb') as fh: self.data[pickle.dumps(key)] = data pickle.dump(self.data, fh, protocol=2) except Exception as e: log.warning("Could not save cache %s err: %s" % ( self.cache_path, e)) if not os.path.exists(self.cache_path): directory = os.path.dirname(self.cache_path) log.info('Generating Cache directory: %s.' % directory) try: os.makedirs(directory) except Exception as e: log.warning("Could not create directory: %s err: %s" % ( directory, e))
def create(): f = N.Sequence([ N.Conv(8, (3, 3), strides=1, pad='same'), N.Dimshuffle(pattern=(0, 3, 1, 2)), N.FlattenLeft(outdim=2), N.Noise(level=0.3, noise_dims=None, noise_type='gaussian'), N.Dense(128, activation=K.relu), N.Dropout(level=0.3, noise_dims=None), N.Dense(10, activation=K.softmax) ], debug=True) y = f(X) yT = f.T(y) f1 = K.function(X, y) f2 = K.function(X, yT) cPickle.dump(f, open(U.get_modelpath('dummy.ai', override=True), 'w')) _ = f1(x) print(_.shape, _.sum()) _ = f2(x) print(_.shape, _.sum())
def preprocess(self, input_file, vocab_file, tensor_file): with open(input_file, "r") as f: data = f.read() # Optional text cleaning or make them lower case, etc. #data = self.clean_str(data) x_text = data.split() self.vocab, self.words = self.build_vocab(x_text) self.vocab_size = len(self.words) with open(vocab_file, 'wb') as f: cPickle.dump(self.words, f) #The same operation like this [self.vocab[word] for word in x_text] # index of words as our basic data self.tensor = np.array(list(map(self.vocab.get, x_text))) # Save the data to data.npy np.save(tensor_file, self.tensor)
def preprocess(self, input_file, vocab_file, tensor_file, encoding): with codecs.open(input_file, "r", encoding=encoding) as f: data = f.read() # Optional text cleaning or make them lower case, etc. #data = self.clean_str(data) x_text = data.split() self.vocab, self.words = self.build_vocab(x_text) self.vocab_size = len(self.words) with open(vocab_file, 'wb') as f: cPickle.dump(self.words, f) #The same operation like this [self.vocab[word] for word in x_text] # index of words as our basic data self.tensor = np.array(list(map(self.vocab.get, x_text))) # Save the data to data.npy np.save(tensor_file, self.tensor)
def dumpmem(self, start, end): """ Dump process memory from start to end Args: - start: start address (Int) - end: end address (Int) Returns: - memory content (raw bytes) """ mem = None logfd = tmpfile(is_binary_file=True) logname = logfd.name out = self.execute_redirect("dump memory %s 0x%x 0x%x" % (logname, start, end)) if out is not None: logfd.flush() mem = logfd.read() logfd.close() return mem
def readmem(self, address, size): """ Read content of memory at an address Args: - address: start address to read (Int) - size: bytes to read (Int) Returns: - memory content (raw bytes) """ # try fast dumpmem if it works mem = self.dumpmem(address, address + size) if mem is not None: return mem # failed to dump, use slow x/gx way mem = "" out = self.execute_redirect("x/%dbx 0x%x" % (size, address)) if out: for line in out.splitlines(): bytes = line.split(":\t")[-1].split() mem += "".join([chr(int(c, 0)) for c in bytes]) return mem
def dump(self, main_loop): if not os.path.exists(self.path_to_folder): os.mkdir(self.path_to_folder) print("") logger.info(" Saving model") start = time.time() logger.info(" ...saving parameters") self.dump_parameters(main_loop) logger.info(" ...saving iteration state") self.dump_iteration_state(main_loop) logger.info(" ...saving log") self.dump_log(main_loop) logger.info(" Model saved, took {} seconds.".format(time.time()-start)) # Write the time and model path to the main loop # write the iteration count and the path to the model params.npz (note hardcoding of params.npz) main_loop.log.status['last_saved_model'] = (self.main_loop.log.status['iterations_done'], self.path_to_parameters)
def read_dataset(data_dir): pickle_filename = "lamem.pickle" pickle_filepath = os.path.join(data_dir, pickle_filename) if not os.path.exists(pickle_filepath): utils.maybe_download_and_extract(data_dir, DATA_URL, is_tarfile=True) lamem_folder = (DATA_URL.split("/")[-1]).split(os.path.extsep)[0] result = {'images': create_image_lists(os.path.join(data_dir, lamem_folder))} print ("Pickling ...") with open(pickle_filepath, 'wb') as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) else: print ("Found pickle file!") with open(pickle_filepath, 'rb') as f: result = pickle.load(f) training_records = result['images'] del result return training_records
def save_pkl(self): """ Dump this object into its `key_pkl` file. May raise a cPickle.PicklingError if such an exception is raised at pickle time (in which case a warning is also displayed). """ # Note that writing in binary mode is important under Windows. try: with open(self.key_pkl, 'wb') as f: pickle.dump(self, f, protocol=pickle.HIGHEST_PROTOCOL) except pickle.PicklingError: _logger.warning("Cache leak due to unpickle-able key data %s", self.keys) os.remove(self.key_pkl) raise
def preprocess(self, input_file, vocab_file, tensor_file): with open(input_file, "r") as f: data = f.read() # Optional text cleaning or make them lower case, etc. data = self.clean_str(data) x_text = data.split() self.vocab, self.words = self.build_vocab(x_text) with open(vocab_file, 'wb') as f: cPickle.dump(self.words, f) self.tensor = [] for word in x_text: if not self.vocab.has_key(word): self.tensor.append(self.vocab['UNK']) else: self.tensor.append(self.vocab[word]) self.tensor = np.asarray(self.tensor) # Save the data to data.npy np.save(tensor_file, self.tensor)
def save(self, dataset_filename="CXR_png.pickle", overwrite=False): if self._dataset is None: print("Dataset is empty. Run load_images before saving.") return data = {"dataset": self._dataset, "labels": self._labels, "valid_images_count": self._valid_images_count, "width": self._image_width, "height": self._image_height, "convert_to_gray": self._convert_to_gray, "folder": self._folder, "test_dataset": self._test_dataset, "test_labels": self._test_labels, "test_data_size": self._test_data_size} if overwrite is True: if os.path.isfile(dataset_filename): os.remove(dataset_filename) try: with open(dataset_filename, 'wb') as f: pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) except Exception as e: print('Unable to save data to', dataset_filename, ':', e)
def __rrshift__(self, iterable): """ Return elements in iterable. :param iterable iterable: Any iterable :return: Generator over same elements as input iterable. :rtype: Generator """ if self.path or (self._cachepath and not self._clearcache): for e in self.__iter__(): yield e else: self._create_cache() for i, e in enumerate(iterable): with open(self._fpath(i), 'wb') as f: pickle.dump(e, f, pickle.HIGHEST_PROTOCOL) yield e
def save_params(self, f_, filter_=None): #FIXME: this does not save shared_variable properties like "strict" or "allow_downcast" if filter_ is None: pickle.dump({k:v.get_value() for k,v in self._vars_di.items()}, f_) else: pat = re.compile(filter_) pickle.dump({k:v.get_value() for k,v in self._vars_di.items() if pat.fullmatch(k)}, f_)
def pickle_dump(data, filename): """Serialize data to file using gzip compression.""" if filename.endswith('.pkz'): with gzip.open(filename, 'wb') as f: pickle.dump(data, f, protocol=2) # Try to support python 2. elif filename.endswith('.jz'): with gzip.open(filename, 'wt') as f: f.write(json_dumps(data)) else: raise ValueError( 'Cannot determine format: {}'.format(os.path.basename(filename)))
def to_pickle(self, filename): """Save a community in serialized form. Parameters ---------- filename : str Where to save the pickled community. Returns ------- Nothing """ with open(filename, mode="wb") as out: pickle.dump(self, out)
def serialize_models(files, dir="."): """Convert several models to Python pickles.""" for f in files: fname = path.basename(f).split(".")[0] model = load_model(f) logger.info("serializing {}".format(f)) pickle.dump(model, open(path.join(dir, fname + ".pickle"), "wb"), protocol=2) # required for Python 2 compat
def preprocess(self, input_file, vocab_file, tensor_file): with codecs.open(input_file, "r", encoding=self.encoding) as f: data = f.read() counter = collections.Counter(data) count_pairs = sorted(counter.items(), key=lambda x: -x[1]) self.chars, _ = zip(*count_pairs) self.vocab_size = len(self.chars) self.vocab = dict(zip(self.chars, range(len(self.chars)))) with open(vocab_file, 'wb') as f: cPickle.dump(self.chars, f) self.tensor = np.array(list(map(self.vocab.get, data))) np.save(tensor_file, self.tensor)
def main(): """Extract and save network skeleton with the corresponding weights. Raises: ImportError: PyCaffe module is not found.""" args = get_arguments() sys.path.append(args.pycaffe_path) try: import caffe except ImportError: raise # Load net definition. net = caffe.Net('./util/deploy.prototxt', args.caffemodel, caffe.TEST) # Check the existence of output_dir. if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # Net skeleton with parameters names and shapes. # In TF, the filter shape is as follows: [ks, ks, input_channels, output_channels], # while in Caffe it looks like this: [output_channels, input_channels, ks, ks]. net_skeleton = list() for name, item in net.params.iteritems(): net_skeleton.append([name + '/w', item[0].data.shape[::-1]]) # See the explanataion on filter formats above. net_skeleton.append([name + '/b', item[1].data.shape]) with open(os.path.join(args.output_dir, 'net_skeleton.ckpt'), 'wb') as f: cPickle.dump(net_skeleton, f, protocol=cPickle.HIGHEST_PROTOCOL) # Net weights. net_weights = dict() for name, item in net.params.iteritems(): net_weights[name + '/w'] = item[0].data.transpose(2, 3, 1, 0) # See the explanation on filter formats above. net_weights[name + '/b'] = item[1].data with open(os.path.join(args.output_dir,'net_weights.ckpt'), 'wb') as f: cPickle.dump(net_weights, f, protocol=cPickle.HIGHEST_PROTOCOL) del net, net_skeleton, net_weights
def write_data(self, result_dict): for key, val in six.viewitems(result_dict): pickle_path = os.path.join(self.pickle_dir, key + ".pkl") with SimpleTimer("Writing generated data %s to pickle file" % key, end_in_new_line=False), \ open(pickle_path, "wb") as fp: cPickle.dump(val, fp, protocol=cPickle.HIGHEST_PROTOCOL)
def read_dataset(data_dir): pickle_filename = "celebA.pickle" pickle_filepath = os.path.join(data_dir, pickle_filename) if not os.path.exists(pickle_filepath): # utils.maybe_download_and_extract(data_dir, DATA_URL, is_zipfile=True) celebA_folder = os.path.splitext(DATA_URL.split("/")[-1])[0] dir_path = os.path.join(data_dir, celebA_folder) if not os.path.exists(dir_path): print ("CelebA dataset needs to be downloaded and unzipped manually") print ("Download from: %s" % DATA_URL) raise ValueError("Dataset not found") result = create_image_lists(dir_path) print ("Training set: %d" % len(result['train'])) print ("Test set: %d" % len(result['test'])) print ("Validation set: %d" % len(result['validation'])) print ("Pickling ...") with open(pickle_filepath, 'wb') as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) else: print ("Found pickle file!") with open(pickle_filepath, 'rb') as f: result = pickle.load(f) celebA = CelebA_Dataset(result) del result return celebA
def save_variables(pickle_file_name, var, info, overwrite=False): if os.path.exists(pickle_file_name) and overwrite == False: raise Exception('{:s} exists and over write is false.'.format(pickle_file_name)) # Construct the dictionary assert (type(var) == list); assert (type(info) == list); d = {} for i in xrange(len(var)): d[info[i]] = var[i] with open(pickle_file_name, 'wb') as f: cPickle.dump(d, f, cPickle.HIGHEST_PROTOCOL)