我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用joblib.load()。
def test_models(models_dir, test_dir, out_dir): models_dir = utils.abs_path_dir(models_dir) + "/" test_dir = utils.abs_path_dir(test_dir) + "/" utils.create_dir(out_dir) test_files = os.listdir(test_dir) models = os.listdir(models_dir) for model in models: utils.print_success(model) pred_dir = out_dir + model + "/" utils.create_dir(pred_dir) clf = joblib.load(models_dir + model + "/" + model + ".pkl") for index, test_file in enumerate(test_files): print(str(index) + "\t" + test_file) sys.stdout.write("\033[F") sys.stdout.write("\033[K") test_features = read_test_file(test_dir + test_file) predictions = clf.predict_proba(test_features) with open(pred_dir + test_file, "w") as filep: for pred in predictions: filep.write(str(pred[0]) + "\n") sys.stdout.write("\033[K")
def _caches_to_file(cache_path, start, end, name, cb, concat): start_time = time() if concat: all_data = [] for i in range(start, end): data = load(os.path.join(cache_path, "{0}.jb".format(i))) all_data.extend(data) dump(all_data, name, 3) else: target_path = os.path.join(cache_path, name[:-3]) if not os.path.exists(target_path): os.makedirs(target_path) for i in range(start, end): src_file_path = os.path.join(cache_path, "{0}.jb".format(i)) basename = os.path.basename(src_file_path) target_file_path = os.path.join(target_path, basename) shutil.move(src_file_path, target_file_path) finished_flag = os.path.join(target_path, '.finished') with open(finished_flag, 'a'): os.utime(finished_flag, None) logging.debug("Finished saving data to {0}. Took {1}s".format(name, time()-start_time)) cb()
def create_ds_from_splits(mat_file_prefix, db, splits): """Create datasets from splits of data. This function is created because of splits from larger datasets """ for batch_idx in xrange(1, splits+1): # load data temp_data = load_mat_data(mat_file_prefix+str(batch_idx)+".mat") print ("[MESSAGE] Loaded %d-th split" % batch_idx) group_name = "grid_data_split_"+str(batch_idx) # save data within splits for key in data_dict: add_h5_group(group_name, db) add_h5_ds(temp_data[key], key, db, group_name) print ("[MESSAGE] %s: Saved %s" % (group_name, key))
def plot_all_policy_at0(path_experiment,color,num_iter=100,fig_dir=None): mean_at_0 = [] var_at_0 = [] for itr in range(num_iter): data_bimodal_1d = joblib.load(os.path.join(path_experiment,'itr_{}.pkl'.format(itr))) poli = data_bimodal_1d['policy'] action_at_0 = poli.get_action(np.array((0,))) mean_at_0.append(action_at_0[1]['mean']) var_at_0.append(action_at_0[1]['log_std']) # print "sampled action in iter {}: {}. Reward should be: {}".format(itr, action_at_0[0], reward(action_at_0[0])) itr = list(range(num_iter)) plt.plot(itr,mean_at_0, color=color, label = 'mean at 0') plt.plot(itr, var_at_0, color=color * 0.7, label = 'logstd at 0') plt.title('How the policy variates accross iterations') plt.xlabel('iteration') plt.ylabel('mean and variance at 0') plt.legend(loc=3) if fig_dir: plt.savefig(os.path.join(fig_dir,'policy_progress')) else: print("No directory for saving plots") ## estimate by MC the policy at 0!
def plot_all_policy_at0(path_experiment, color, num_iter=100, fig_dir=None): mean_at_0 = [] var_at_0 = [] for itr in range(num_iter): data_bimodal_1d = joblib.load(os.path.join(path_experiment, 'itr_{}.pkl'.format(itr))) poli = data_bimodal_1d['policy'] action_at_0 = poli.get_action(np.array((0,))) mean_at_0.append(action_at_0[1]['mean']) var_at_0.append(action_at_0[1]['log_std']) itr = list(range(num_iter)) plt.plot(itr, mean_at_0, color=color, label='mean at 0') plt.plot(itr, var_at_0, color=color * 0.7, label='logstd at 0') plt.title('How the policy variates accross iterations') plt.xlabel('iteration') plt.ylabel('mean and variance at 0') plt.legend(loc=3) if fig_dir: plt.savefig(os.path.join(fig_dir, 'policy_at_0')) else: print("No directory for saving plots") ## plot for all the experiments
def __init__(self): et = EntityTracker() self.bow_enc = BoW_encoder() self.emb = UtteranceEmbed() at = ActionTracker(et) self.dataset, dialog_indices = Data(et, at).trainset train_indices = joblib.load('data/train_test_list/train_indices_759') test_indices = joblib.load('data/train_test_list/test_indices_759_949') self.dialog_indices_tr = train_indices self.dialog_indices_dev = test_indices obs_size = self.emb.dim + self.bow_enc.vocab_size + et.num_features self.action_templates = at.get_action_templates() action_size = at.action_size nb_hidden = 128 self.net = LSTM_net(obs_size=obs_size, action_size=action_size, nb_hidden=nb_hidden)
def train(filePath): try: if not filePath.lower().endswith('json'): return {'success':False,'message':'Training file should be in json format'} with open(filePath) as file: ent_data = json.load(file) dataset = [jsonToCrf(q, nlp) for q in ent_data['entity_examples']] X_train = [sent2features(s) for s in dataset] y_train = [sent2labels(s) for s in dataset] crf = sklearn_crfsuite.CRF( algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True ) crf.fit(X_train, y_train) if(not os.path.exists("crfModel")): os.mkdir("crfModel") if(os.path.isfile("crfModel/classifier.pkl")): os.remove("crfModel/classifier.pkl") joblib.dump(crf,"crfModel/classifier.pkl") return {'success':True,'message':'Model Trained Successfully'} except Exception as ex: return {'success':False,'message':'Error while Training the model - '+str(ex)}
def predict(utterance): try: tagged = [] finallist = [] parsed = nlp(utterance) for i in range(len(parsed)): tagged.append((str(parsed[i]),parsed[i].tag_)) finallist.append(tagged) test = [sent2features(s) for s in finallist] if(os.path.isfile("crfModel/classifier.pkl")): crf = joblib.load("crfModel/classifier.pkl") else: return {'success':False,'message':'Please Train the model first'} predicted = crf.predict(test) entityList = extractEntities(predicted[0],tagged) return {'success':True,'entitiesPredicted':entityList} except Exception as ex: return {'success':False,'message':'Error while pediction - '+str(ex)}
def setNetwork(self, fname, nEpoch=-1): """ Set values into the network Parameters ----------- fname : string Name of the file where the values are nEpoch : int Epoch number (Optional) """ basename = "nnets/" + fname if (nEpoch>=0): all_params = joblib.load(basename + ".epoch={}".format(nEpoch)) else: all_params = joblib.load(basename) self._network.setAllParams(all_params)
def __init__(self, *args, **kwargs): super(score_simple, self).__init__(*args, **kwargs) f_db = os.path.join( kwargs['output_data_directory'], kwargs['term_frequency']['f_db'] ) if not os.path.exists(f_db): msg = "{} not computed yet, needed for TF methods!" raise ValueError(msg.format(f_db)) score_config = simple_config.load()["score"] f_csv = os.path.join( score_config["output_data_directory"], score_config["term_document_frequency"]["f_db"], ) IDF = pd.read_csv(f_csv) IDF = dict(zip(IDF["word"].values, IDF["count"].values)) self.corpus_N = IDF.pop("__pipeline_document_counter") # Compute the IDF for key in IDF: IDF[key] = np.log(float(self.corpus_N) / (IDF[key] + 1)) self.IDF = IDF
def restore(self, checkpoint_dir=None): if checkpoint_dir is None: checkpoint_dir = logger.get_snapshot_dir() checkpoint_file = os.path.join(checkpoint_dir, 'params.chk') if os.path.isfile(checkpoint_file + '.meta'): sess = tf.get_default_session() saver = tf.train.Saver() saver.restore(sess, checkpoint_file) tabular_chk_file = os.path.join(checkpoint_dir, 'progress.csv.chk') if os.path.isfile(tabular_chk_file): tabular_file = os.path.join(checkpoint_dir, 'progress.csv') logger.remove_tabular_output(tabular_file) shutil.copy(tabular_chk_file, tabular_file) logger.add_tabular_output(tabular_file) pool_file = os.path.join(checkpoint_dir, 'pool.chk') if self.save_format == 'pickle': pickle_load(pool_file) elif self.save_format == 'joblib': self.pool = joblib.load(pool_file) else: raise NotImplementedError logger.log('Restored from checkpoint %s'%checkpoint_file) else: logger.log('No checkpoint %s'%checkpoint_file)
def read_data_from_pkl(datafile): """ read file in joblib.dump pkl :param datafile: filename of pkl :return: """ datas = joblib.load(datafile) for i in range(10): datas = np.random.permutation(datas) inputs, labels = [], [] for data in datas: inputs.append(data["input"]) labels.append(data["label"]) inputs = np.array(inputs).reshape(-1, 15, 101, 101, 3).astype(np.float32) inputs -= np.mean(inputs, axis=(2, 3), keepdims=True) inputs /= np.std(inputs, axis=(2, 3), keepdims=True) labels = np.array(labels).reshape(-1, 1).astype(np.float32) return inputs, labels
def _fit_embedding_word(self, embedding_type, construct_docs, tokenize_, d=None): if embedding_type == 'google': embeddings_ = joblib.load('data/google/GoogleNews-vectors-negative300.pickle') embeddings_ = SimpleNamespace(X=embeddings_.syn0, vocab={w: v.index for w, v in embeddings_.vocab.items()}) elif embedding_type == 'twitter': estimator = Pipeline([ ('tokenize', MapCorporas(tokenize_)), ('word2vec', MergeSliceCorporas(CachedFitTransform(Word2Vec( sg=1, size=d, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20, workers=16 ), self.memory))), ]).fit([self.train_docs, self.unsup_docs[:10**6], self.val_docs, self.test_docs]) embeddings_ = estimator.named_steps['word2vec'].estimator embeddings_ = SimpleNamespace(X=embeddings_.syn0, vocab={w: v.index for w, v in embeddings_.vocab.items()}) else: embeddings_ = SimpleNamespace(X=np.empty((0, d)), vocab={}) estimator = Pipeline([ ('tokenize', MapCorporas(tokenize_)), # 0.25 is chosen so the unknown vectors have approximately the same variance as google pre-trained ones ('embeddings', MapCorporas(Embeddings( embeddings_, rand=lambda shape: get_rng().uniform(-0.25, 0.25, shape).astype('float32'), include_zero=True ))), ]) estimator.fit(construct_docs) return estimator.named_steps['embeddings'].estimator
def __init__(self, num_rnn_hidden, num_classes, utter_embed, sequence_length, filter_sizes, num_filters, config, l2_reg_lambda=0.0): self.num_hidden = num_rnn_hidden self.num_classes = num_classes self.utter_embed = utter_embed self.logger = config.logger self.sequence_length = sequence_length self.embedding_size = utter_embed.get_vector_size() self.filter_sizes = filter_sizes self.num_filters = num_filters self.l2_reg_lambda = l2_reg_lambda self.config = config self.cate_mapping_dict = joblib.load('./dbdc3/data/cate_mapping_dict')
def read(self, unpickler): """Reconstruct the array.""" filename = os.path.join(unpickler._dirname, self.filename) # Load the array from the disk # use getattr instead of self.allow_mmap to ensure backward compat # with NDArrayWrapper instances pickled with joblib < 0.9.0 allow_mmap = getattr(self, 'allow_mmap', True) memmap_kwargs = ({} if not allow_mmap else {'mmap_mode': unpickler.mmap_mode}) array = unpickler.np.load(filename, **memmap_kwargs) # Reconstruct subclasses. This does not work with old # versions of numpy if (hasattr(array, '__array_prepare__') and self.subclass not in (unpickler.np.ndarray, unpickler.np.memmap)): # We need to reconstruct another subclass new_array = unpickler.np.core.multiarray._reconstruct( self.subclass, (0,), 'b') return new_array.__array_prepare__(array) else: return array
def get_vocab_index_embedding_weights(self, embedding_dim, embedding_weights_masking, load_embeddings_pickled=False, load_vocab_pickled=False): embedding_weights = [] if embedding_weights_masking==True: masking_value = "masked" # For masked embedding weights leave it blank "", else for masked use "_non_masked" else: masking_value = "non_masked" # For masked embedding weights leave it blank "", else for masked use "_non_masked" # Load data from files if load_vocab_pickled: vocab_index_dict = joblib.load(self.conf.vocab_index_file.format(masking_value)) vocab_set = joblib.load(self.conf.vocab_set_file.format(masking_value)) else: vocab_set, vocab_index_dict = self.generate_vocabulary_set(masking=embedding_weights_masking) if self.conf.feature_level == "word": embedding_weights = self.load_word_embeddings_compact(embedding_dim, vocab_set, masking=embedding_weights_masking, use_pickled=load_embeddings_pickled) return embedding_weights, vocab_index_dict
def add_file(prediction, create, value, *args, **kwargs): train_featureset = prediction.model.featureset fset_data, data = featurize.load_featureset(train_featureset.file_uri) if 'class' in prediction.dataset.name or 'regr' in prediction.dataset.name: labels = data['labels'] else: labels = [] model_data = joblib.load(prediction.model.file_uri) if hasattr(model_data, 'best_estimator_'): model_data = model_data.best_estimator_ preds = model_data.predict(fset_data) pred_probs = (pd.DataFrame(model_data.predict_proba(fset_data), index=fset_data.index.astype(str), columns=model_data.classes_) if hasattr(model_data, 'predict_proba') else []) all_classes = model_data.classes_ if hasattr(model_data, 'classes_') else [] pred_path = pjoin(TMP_DIR, '{}.npz'.format(str(uuid.uuid4()))) featurize.save_featureset(fset_data, pred_path, labels=labels, preds=preds, pred_probs=pred_probs) prediction.file_uri = pred_path DBSession().commit()
def train_model_group(model_info,function='logistic'): model = joblib.load(getTheFile('models/{}.m').format(function)) base_info = model_config.model_dim user_id = model_info['user_id'] del model_info['user_id'] for key,value in base_info.items(): if key not in model_info.columns: model_info[key] = value #model_info = transform_data.transform_with_woe(model_info) model_info = model_info[sorted(model_info.columns)] x = np.matrix(model_info) yt = model.predict(x) ytp = model.predict_proba(x) return user_id,yt,ytp
def process_map_data(path): data = joblib.load(path) im_data = data['im'] value_data = data['value'] state_data = data['state'] label_data = np.array([np.eye(1, 8, l)[0] for l in data['label']]) num = im_data.shape[0] num_train = num - num / 5 im_train = np.concatenate((np.expand_dims(im_data[:num_train], 1), np.expand_dims(value_data[:num_train], 1)),axis=1).astype(dtype=np.float32) state_train = state_data[:num_train] label_train = label_data[:num_train] im_test = np.concatenate((np.expand_dims(im_data[num_train:], 1), np.expand_dims(value_data[num_train:], 1)),axis=1).astype(dtype=np.float32) state_test = state_data[num_train:] label_test = label_data[num_train:] return (im_train, state_train, label_train), (im_test, state_test, label_test)
def load_previous_runs(self): if not os.path.isfile(self.prev_runs_file_): self.found_matching_run = False return with open(self.prev_runs_file_, 'r') as file: self.all_prev_runs_ = yaml.load(file) for run in self.all_prev_runs_: self.max_data_version_ = max(self.max_data_version_, run['input_data_version']) if run['input_data_hash'] == self.data_.df_hash: self.max_config_version_ = max(self.max_config_version_, run['config_version']) if run['config_hash'] == self.config_hash_: if self.found_matching_run: raise Exception('Duplicate previous run entries found.') self.found_matching_run = True self.run_ = run self.data_folder_ = self.get_versioned_folder(run['input_data_version'], run['config_version']) if self.found_matching_run is None: self.found_matching_run = False
def load(self, obj): """Load model from file.""" self.__dict__.update(joblib.load(obj).__dict__)
def load_grid_selection(path): """Load a selected grid from pickle.""" if not os.path.isfile(path): raise ValueError("The file is not existed.""") with open(path, "r") as f: data = pickle.load(f) f.close() return data['environment'], data['gt'], data['po'], data['goal']
def process_map_data(path, return_full=False): data = joblib.load(path) im_data = data['im'] value_data = data['value'] state_data = data['state'] if return_full: im_full = np.concatenate((np.expand_dims(im_data, 1), np.expand_dims(value_data, 1)), axis=1).astype(dtype=np.uint8) return im_full, state_data, data['label'], data['sample_idx'] label_data = np.array([np.eye(1, 8, l)[0] for l in data['label']]) num = im_data.shape[0] num_train = num - num / 5 im_train = np.concatenate((np.expand_dims(im_data[:num_train], 1), np.expand_dims(value_data[:num_train], 1)), axis=1).astype(dtype=np.float32) state_train = state_data[:num_train] label_train = label_data[:num_train] im_test = np.concatenate((np.expand_dims(im_data[num_train:], 1), np.expand_dims(value_data[num_train:], 1)), axis=1).astype(dtype=np.float32) state_test = state_data[num_train:] label_test = label_data[num_train:] return (im_train, state_train, label_train), \ (im_test, state_test, label_test), data['sample_idx']
def create_grid_8_dataset(mat_file_path, db_name, save_dir): """Convert grid 8x8 dataset from mat file to hdf5 format. Parameters ---------- mat_file_path : str the path to the mat file db_name : str the name of the dataset save_dir : str the directory of the output path (must exist) """ # load matlab data mat_data = load_mat_data(mat_file_path) print ("[MESSAGE] The Matlab data is loaded.") # init HDF5 database db = init_h5_db(db_name, save_dir) # save dataset for key in data_dict: add_h5_ds(mat_data[key], key, db) print ("[MESSAGE] Saved %s" % (key)) db.flush() db.close() print ("[MESSAGE] The grid 8x8 dataset is saved at %s" % (save_dir))
def load(file_name): if file_name.endswith('.json'): with open(file_name, 'r') as f: return jsonpickle.loads(f.read()) if file_name.endswith('.npy'): return np.load(file_name) return joblib.load(file_name)
def _load_experiments(self, data_folder, name_or_patterns): if not isinstance(name_or_patterns, (list, tuple)): name_or_patterns = [name_or_patterns] files = [] for name_or_pattern in name_or_patterns: matched_files = glob( osp.join(data_folder, name_or_pattern)) # golb gives a list of all files satisfying pattern files += matched_files # this will include twice the same file if it satisfies 2 patterns experiments = [] progress_f = None params_f = None pkl_data = None for f in files: if os.path.isdir(f): try: progress = self._read_data(osp.join(f, "progress.csv")) params = self._read_params(osp.join(f, "params.json")) params["exp_name"] = osp.basename(f) if os.path.isfile(osp.join(f, "params.pkl")): pkl_data = joblib.load(osp.join(f, "params.pkl")) experiments.append(Experiment(progress, params, pkl_data)) else: experiments.append(Experiment(progress, params)) except Exception as e: print(e) elif 'progress.csv' in f: # in case you're giving as datafolder the dir that contains the files! progress_f = self._read_data(f) elif 'params.json' in f: params_f = self._read_params(f) elif 'params.pkl' in f: print('about to load', f) pkl_data = joblib.load(f) if params_f and progress_f: if pkl_data: experiments.append(Experiment(progress_f, params_f, pkl_data)) else: experiments.append(Experiment(progress_f, params_f)) self._experiments = experiments
def symbolize_signal(self, signal, parallel = None, n_jobs = -1): """ Symbolize whole time-series signal to a sentence (vector of words), parallel can be {None, "ipython"} """ window_index = self.sliding_window_index(len(signal)) if parallel == None: return map(lambda wi: self.symbolize_window(signal[wi]), window_index) elif parallel == "ipython": ## too slow raise NotImplementedError("parallel parameter %s not supported" % parallel) #return self.iparallel_symbolize_signal(signal) elif parallel == "joblib": with tempfile.NamedTemporaryFile(delete=False) as f: tf = f.name print "save temp file at %s" % tf tfiles = joblib.dump(signal, tf) xs = joblib.load(tf, "r") n_jobs = joblib.cpu_count() if n_jobs == -1 else n_jobs window_index = list(window_index) batch_size = len(window_index) / n_jobs batches = chunk(window_index, batch_size) symbols = Parallel(n_jobs)(delayed(joblib_symbolize_window)(self, xs, batch) for batch in batches) for f in tfiles: os.unlink(f) return sum(symbols, []) else: raise NotImplementedError("parallel parameter %s not supported" % parallel)
def signal_to_paa_vector(self, signal, n_jobs = -1): window_index = self.sliding_window_index(len(signal)) with tempfile.NamedTemporaryFile(delete=False) as f: tf = f.name print "save temp file at %s" % tf tfiles = joblib.dump(signal, tf) xs = joblib.load(tf, "r") n_jobs = joblib.cpu_count() if n_jobs == -1 else n_jobs window_index = list(window_index) batch_size = len(window_index) / n_jobs batches = chunk(window_index, batch_size) vecs = Parallel(n_jobs)(delayed(joblib_paa_window)(self, xs, batch) for batch in batches) for f in tfiles: os.unlink(f) return np.vstack(vecs)
def save_single(self): assert(self.V is not None) assert(self._ref is not None) # Set the size explictly as a sanity check size_n, dim_V = self.V.shape config_score = simple_config.load()["score"] f_db = os.path.join( config_score["output_data_directory"], config_score["document_scores"]["f_db"] ) h5 = touch_h5(f_db) g = h5.require_group(self.method) gx = g.require_group(self.current_filename) # Save the data array msg = "Saving {} {} ({})" print(msg.format(self.method, self.current_filename, size_n)) for col in ["V", "_ref", "VX", "VX_explained_variance_ratio_", "VX_components_"]: if col in gx: #print " Clearing", self.method, self.current_filename, col del gx[col] gx.create_dataset("V", data=self.V, **self.h5py_args) gx.create_dataset("_ref", data=self._ref, **self.h5py_args)
def compute_reduced_representation(self): if not self.compute_reduced: return None config_score = simple_config.load()["score"] f_db = os.path.join( config_score["output_data_directory"], config_score["document_scores"]["f_db"] ) h5 = touch_h5(f_db) g = h5[self.method] keys = g.keys() V = np.vstack([g[x]["V"][:] for x in keys]) sizes = [g[x]["_ref"].shape[0] for x in keys] nc = self.reduced_n_components clf = IncrementalPCA(n_components=nc) msg = "Performing PCA on {}, ({})->({})" print(msg.format(self.method, V.shape[1], nc)) VX = clf.fit_transform(V) EVR = clf.explained_variance_ratio_ COMPONENTS = clf.components_ for key, size in zip(keys, sizes): # Take slices equal to the size vx, VX = VX[:size,:], VX[size:, :] evr, EVR = EVR[:size], EVR[size:] com, COMPONENTS = COMPONENTS[:size,:], COMPONENTS[size:, :] g[key].create_dataset("VX", data=vx, **self.h5py_args) g[key].create_dataset("VX_explained_variance_ratio_", data=evr) g[key].create_dataset("VX_components_", data=com) h5.close()
def restore(self, checkpoint_dir=None): if checkpoint_dir is None: checkpoint_dir = logger.get_snapshot_dir() checkpoint_file = os.path.join(checkpoint_dir, 'params.chk') if os.path.isfile(checkpoint_file + '.meta'): sess = tf.get_default_session() saver = tf.train.Saver() saver.restore(sess, checkpoint_file) tabular_chk_file = os.path.join(checkpoint_dir, 'progress.csv.chk') if os.path.isfile(tabular_chk_file): tabular_file = os.path.join(checkpoint_dir, 'progress.csv') logger.remove_tabular_output(tabular_file) shutil.copy(tabular_chk_file, tabular_file) logger.add_tabular_output(tabular_file) if self.qf is not None: pool_file = os.path.join(checkpoint_dir, 'pool.chk') if self.save_format == 'pickle': pickle_load(pool_file) elif self.save_format == 'joblib': self.pool = joblib.load(pool_file) else: raise NotImplementedError logger.log('Restored from checkpoint %s'%checkpoint_file) else: logger.log('No checkpoint %s'%checkpoint_file)
def load_object(string): # converts string to whatever was dumps'ed in it return joblib.load(BytesIO(string))
def process_study(study_id, annotations, out_dir, nstack): volumes_metadata = isotropic_volumes_metadata[study_id] isometric_volume = np.load('../data_proc/stage1/isotropic_volumes_1mm/{}.npy'.format(study_id)) mean = np.mean(isometric_volume).astype(np.float32) std = np.std(isometric_volume).astype(np.float32) resize_factor = np.divide(volumes_metadata['volume_resampled_shape'], volumes_metadata['volume_shape']) coords_list = [] for a in annotations: d = a['data'] z = int(round(resize_factor[0] * a['sliceNum'])) y0 = resize_factor[1] * d['y'] y1 = resize_factor[1] * (d['y'] + d['height']) x0 = resize_factor[2] * d['x'] x1 = resize_factor[2] * (d['x'] + d['width']) coords_list.append((z, y0, y1, x0, x1)) samples = [] for coords in coords_list: z, y0, y1, x0, x1 = coords for i in range(40): sample_id = uuid4() rand_y0 = max(0, int(round(y0 - random.randint(0, 32)))) rand_y1 = min(isometric_volume.shape[1], int(round(y1 + random.randint(0, 32)))) rand_x0 = max(0, int(round(x0 - random.randint(0, 32)))) rand_x1 = min(isometric_volume.shape[2], int(round(x1 + random.randint(0, 32)))) patch = [] for zi in range(nstack): patch.append(resize(isometric_volume[z+zi, rand_y0:rand_y1, rand_x0:rand_x1], [32, 32], mode='edge', clip=True, preserve_range=True)) patch = np.array(patch, dtype=np.float32) patch = (patch - mean) / (std + 1e-7) patch = np.moveaxis(patch, 0, 2) bb_x = (x0 - rand_x0) / (rand_x1 - rand_x0) bb_y = (y0 - rand_y0) / (rand_y1 - rand_y0) bb_w = (x1 - x0) / (rand_x1 - rand_x0) bb_h = (y1 - y0) / (rand_y1 - rand_y0) samples.append((patch, bb_x, bb_y, bb_w, bb_h)) joblib.dump(samples, os.path.join(out_dir, 'samples', '{}.pkl'.format(study_id))) return len(samples)
def main(): # Get the data. data_set = joblib.load("/mnt/guankai/CIKM/data/CIKM2017_train/train_Imp_3x3_resampled.pkl") for i in range(10): data_set = np.random.permutation(data_set) valid_data_num = int(len(data_set) / 10) #get 10% data for validation valid_set = data_set[0 : valid_data_num ] train_set = data_set[valid_data_num : ] convert_to(train_set, "train_Imp_3x3_resampled") convert_to(valid_set, "valid_Imp_3x3_resampled") return
def cvt(filename): output = joblib.load(filename) pred = output["output"] outputdir = filename[:filename.rfind("/")+1] + "csvfile/" if not os.path.isdir(outputdir): os.mkdir(outputdir) csvfile = outputdir + filename[filename.rfind("/"):filename.rfind(".")+1] + "csv" out = np.array(pred).reshape(2000) np.savetxt(fname=csvfile, X=out, fmt="%.3f",delimiter="")
def agg(file_name,store_file): datas = joblib.load(file_name) new_datas = [] for data in datas: new_datas.append(data) new_datas.append({"input":np.flip(data["input"],axis=2),"label":data["label"]}) new_datas.append({"input":np.flip(data["input"],axis=3),"label":data["label"]}) #new_datas.append({"input":np.rot90(m=data["input"],k=1,axes=(2,3)),"label":data["label"]}) #new_datas.append({"input":np.rot90(m=data["input"],k=2,axes=(2,3)),"label":data["label"]}) #new_datas.append({"input":np.rot90(m=data["input"],k=3,axes=(2,3)),"label":data["label"]}) joblib.dump(value=new_datas,filename=store_file,compress=3)
def slice_data(filename): data = joblib.load(filename=filename) for idx, i in enumerate(data): data[idx]["input"] = np.delete(data[idx]["input"],[3],axis=1) data[idx]["input"] = data[idx]["input"][:,:,46:55,46:55] name, suf = os.path.splitext(filename) outputfilename = name + "del_height_no.4_slice_7x7.pkl" joblib.dump(value=data, filename=outputfilename)
def binary_trans(trainy, testy, i): y1 = trainy['image_path'].apply(lambda x: 1 if x == int(i) else 0).tolist() y2 = testy['image_path'].apply(lambda x: 1 if x == int(i) else 0).tolist() return(y1, y2) # <=======================================================================================================> # load the data
def load_data(fname): obj = joblib.load(os.path.join('data', fname)) return obj
def test_pickle(self): joblib.dump(CachedIterable(self.iterator(), 3), 'output') self.assertListEqual(list(joblib.load('output')), list(range(20)))
def normalize_image(image): meanstd = joblib.load(FLAGS.mean_std_path) mean, std = meanstd['mean'], meanstd['std'] normed_image = (image - mean) / std return normed_image
def __init__(self, input_size, num_hidden, num_classes, utter_embed, bow_utter_embed, config): self.input_size = input_size self.num_hidden = num_hidden self.num_classes = num_classes self.utter_embed = utter_embed self.bow_utter_embed = bow_utter_embed self.logger = config.logger self.config = config self.cate_mapping_dict = joblib.load('./dbdc3/data/cate_mapping_dict')
def load(filename, mmap_mode=None): """Reconstruct a Python object from a file persisted with joblib.dump.""" f = open(filename, 'rb') fobj = _read_fileobject(f, filename, mmap_mode) if isinstance(fobj, (str, unicode)): return load_compatibility(fobj) obj = _unpickle(fobj, filename, mmap_mode) return obj
def load_compatibility(filename): """Reconstruct a Python object from a file persisted with joblib.dump. This function ensure the compatibility of joblib old persistence format (<= 0.9.3)""" file_handle = open(filename, 'rb') unpickler = ZipNumpyUnpickler(filename, file_handle=file_handle) try: obj = unpickler.load() finally: if hasattr(unpickler, 'file_handle'): unpickler.file_handle.close() return obj
def load_from_disk(filename): return load(filename)
def __init__(self, data_dir): self.data_dir = data_dir metadata_filename = os.path.join(self.data_dir, "metadata.joblib") if os.path.exists(metadata_filename): # self.tasks, self.metadata_df = joblib.load(metadata_filename) self.tasks, self.metadata_df = load_from_disk(metadata_filename) else: raise ValueError("No metadata found on disk")