Python joblib 模块,load() 实例源码

我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用joblib.load()

项目:ISM2017    作者:ybayle    | 项目源码 | 文件源码
def test_models(models_dir, test_dir, out_dir):
    models_dir = utils.abs_path_dir(models_dir) + "/"
    test_dir = utils.abs_path_dir(test_dir) + "/"
    utils.create_dir(out_dir)
    test_files = os.listdir(test_dir)
    models = os.listdir(models_dir)
    for model in models:
        utils.print_success(model)
        pred_dir = out_dir + model + "/"
        utils.create_dir(pred_dir)
        clf = joblib.load(models_dir + model + "/" + model + ".pkl")
        for index, test_file in enumerate(test_files):
            print(str(index) + "\t" + test_file)
            sys.stdout.write("\033[F")
            sys.stdout.write("\033[K")
            test_features = read_test_file(test_dir + test_file)
            predictions = clf.predict_proba(test_features)
            with open(pred_dir + test_file, "w") as filep:
                for pred in predictions:
                    filep.write(str(pred[0]) + "\n")
        sys.stdout.write("\033[K")
项目:autolab_core    作者:BerkeleyAutomation    | 项目源码 | 文件源码
def _caches_to_file(cache_path, start, end, name, cb, concat):
    start_time = time()
    if concat:
        all_data = []
        for i in range(start, end):
            data = load(os.path.join(cache_path, "{0}.jb".format(i)))
            all_data.extend(data)
        dump(all_data, name, 3)
    else:
        target_path = os.path.join(cache_path, name[:-3])
        if not os.path.exists(target_path):
            os.makedirs(target_path)
        for i in range(start, end):
            src_file_path = os.path.join(cache_path, "{0}.jb".format(i))

            basename = os.path.basename(src_file_path)
            target_file_path = os.path.join(target_path, basename)
            shutil.move(src_file_path, target_file_path)

        finished_flag = os.path.join(target_path, '.finished')
        with open(finished_flag, 'a'):
            os.utime(finished_flag, None)

    logging.debug("Finished saving data to {0}. Took {1}s".format(name, time()-start_time))
    cb()
项目:Learning-to-navigate-without-a-map    作者:ToniRV    | 项目源码 | 文件源码
def create_ds_from_splits(mat_file_prefix, db, splits):
    """Create datasets from splits of data.

    This function is created because of splits from larger datasets
    """
    for batch_idx in xrange(1, splits+1):
        # load data
        temp_data = load_mat_data(mat_file_prefix+str(batch_idx)+".mat")
        print ("[MESSAGE] Loaded %d-th split" % batch_idx)

        group_name = "grid_data_split_"+str(batch_idx)

        # save data within splits
        for key in data_dict:
            add_h5_group(group_name, db)
            add_h5_ds(temp_data[key], key, db, group_name)
            print ("[MESSAGE] %s: Saved %s" % (group_name, key))
项目:snn4hrl    作者:florensacc    | 项目源码 | 文件源码
def plot_all_policy_at0(path_experiment,color,num_iter=100,fig_dir=None):
    mean_at_0 = []
    var_at_0 = []
    for itr in range(num_iter):
        data_bimodal_1d = joblib.load(os.path.join(path_experiment,'itr_{}.pkl'.format(itr)))
        poli = data_bimodal_1d['policy']
        action_at_0 = poli.get_action(np.array((0,)))
        mean_at_0.append(action_at_0[1]['mean'])
        var_at_0.append(action_at_0[1]['log_std'])
        # print "sampled action in iter {}: {}. Reward should be: {}".format(itr, action_at_0[0], reward(action_at_0[0]))
    itr = list(range(num_iter))
    plt.plot(itr,mean_at_0, color=color, label = 'mean at 0')
    plt.plot(itr, var_at_0, color=color * 0.7, label = 'logstd at 0')
    plt.title('How the policy variates accross iterations')
    plt.xlabel('iteration')
    plt.ylabel('mean and variance at 0')
    plt.legend(loc=3)
    if fig_dir:
        plt.savefig(os.path.join(fig_dir,'policy_progress'))
    else:
        print("No directory for saving plots")

## estimate by MC the policy at 0!
项目:snn4hrl    作者:florensacc    | 项目源码 | 文件源码
def plot_all_policy_at0(path_experiment, color, num_iter=100, fig_dir=None):
    mean_at_0 = []
    var_at_0 = []
    for itr in range(num_iter):
        data_bimodal_1d = joblib.load(os.path.join(path_experiment, 'itr_{}.pkl'.format(itr)))
        poli = data_bimodal_1d['policy']
        action_at_0 = poli.get_action(np.array((0,)))
        mean_at_0.append(action_at_0[1]['mean'])
        var_at_0.append(action_at_0[1]['log_std'])
    itr = list(range(num_iter))
    plt.plot(itr, mean_at_0, color=color, label='mean at 0')
    plt.plot(itr, var_at_0, color=color * 0.7, label='logstd at 0')
    plt.title('How the policy variates accross iterations')
    plt.xlabel('iteration')
    plt.ylabel('mean and variance at 0')
    plt.legend(loc=3)
    if fig_dir:
        plt.savefig(os.path.join(fig_dir, 'policy_at_0'))
    else:
        print("No directory for saving plots")


## plot for all the experiments
项目:korean_restaurant_reservation    作者:JudeLee19    | 项目源码 | 文件源码
def __init__(self):

        et = EntityTracker()
        self.bow_enc = BoW_encoder()
        self.emb = UtteranceEmbed()
        at = ActionTracker(et)

        self.dataset, dialog_indices = Data(et, at).trainset

        train_indices = joblib.load('data/train_test_list/train_indices_759')
        test_indices = joblib.load('data/train_test_list/test_indices_759_949')

        self.dialog_indices_tr = train_indices
        self.dialog_indices_dev = test_indices

        obs_size = self.emb.dim + self.bow_enc.vocab_size + et.num_features
        self.action_templates = at.get_action_templates()
        action_size = at.action_size
        nb_hidden = 128

        self.net = LSTM_net(obs_size=obs_size,
                       action_size=action_size,
                       nb_hidden=nb_hidden)
项目:scikitcrf_NER    作者:ManikandanThangavelu    | 项目源码 | 文件源码
def train(filePath):
    try:
        if not filePath.lower().endswith('json'):
            return {'success':False,'message':'Training file should be in json format'}
        with open(filePath) as file:
            ent_data = json.load(file)
        dataset = [jsonToCrf(q, nlp) for q in ent_data['entity_examples']]
        X_train = [sent2features(s) for s in dataset]
        y_train = [sent2labels(s) for s in dataset]
        crf = sklearn_crfsuite.CRF(
        algorithm='lbfgs', 
        c1=0.1, 
        c2=0.1, 
        max_iterations=100, 
        all_possible_transitions=True
        )
        crf.fit(X_train, y_train)
        if(not os.path.exists("crfModel")):
            os.mkdir("crfModel")
        if(os.path.isfile("crfModel/classifier.pkl")):
            os.remove("crfModel/classifier.pkl")
        joblib.dump(crf,"crfModel/classifier.pkl")
        return {'success':True,'message':'Model Trained Successfully'}
    except Exception as ex:
        return {'success':False,'message':'Error while Training the model - '+str(ex)}
项目:scikitcrf_NER    作者:ManikandanThangavelu    | 项目源码 | 文件源码
def predict(utterance):
    try:
        tagged = []
        finallist = []
        parsed = nlp(utterance)
        for i in range(len(parsed)):
            tagged.append((str(parsed[i]),parsed[i].tag_))
        finallist.append(tagged)
        test = [sent2features(s) for s in finallist]
        if(os.path.isfile("crfModel/classifier.pkl")):
            crf = joblib.load("crfModel/classifier.pkl")
        else:
            return {'success':False,'message':'Please Train the model first'}
        predicted = crf.predict(test)
        entityList = extractEntities(predicted[0],tagged)
        return {'success':True,'entitiesPredicted':entityList}
    except Exception as ex:
        return {'success':False,'message':'Error while pediction - '+str(ex)}
项目:deer    作者:VinF    | 项目源码 | 文件源码
def setNetwork(self, fname, nEpoch=-1):
        """ Set values into the network

        Parameters
        -----------
        fname : string
            Name of the file where the values are
        nEpoch : int
            Epoch number (Optional)
        """

        basename = "nnets/" + fname

        if (nEpoch>=0):
            all_params = joblib.load(basename + ".epoch={}".format(nEpoch))
        else:
            all_params = joblib.load(basename)

        self._network.setAllParams(all_params)
项目:word2vec_pipeline    作者:NIHOPA    | 项目源码 | 文件源码
def __init__(self, *args, **kwargs):
        super(score_simple, self).__init__(*args, **kwargs)

        f_db = os.path.join(
            kwargs['output_data_directory'],
            kwargs['term_frequency']['f_db']
        )
        if not os.path.exists(f_db):
            msg = "{} not computed yet, needed for TF methods!"
            raise ValueError(msg.format(f_db))

        score_config = simple_config.load()["score"]
        f_csv = os.path.join(
            score_config["output_data_directory"],
            score_config["term_document_frequency"]["f_db"],
        )
        IDF = pd.read_csv(f_csv)
        IDF = dict(zip(IDF["word"].values, IDF["count"].values))
        self.corpus_N = IDF.pop("__pipeline_document_counter")

        # Compute the IDF
        for key in IDF:
            IDF[key] = np.log(float(self.corpus_N) / (IDF[key] + 1))
        self.IDF = IDF
项目:rllabplusplus    作者:shaneshixiang    | 项目源码 | 文件源码
def restore(self, checkpoint_dir=None):
        if checkpoint_dir is None: checkpoint_dir = logger.get_snapshot_dir()
        checkpoint_file = os.path.join(checkpoint_dir, 'params.chk')
        if os.path.isfile(checkpoint_file + '.meta'):
            sess = tf.get_default_session()
            saver = tf.train.Saver()
            saver.restore(sess, checkpoint_file)

            tabular_chk_file = os.path.join(checkpoint_dir, 'progress.csv.chk')
            if os.path.isfile(tabular_chk_file):
                tabular_file = os.path.join(checkpoint_dir, 'progress.csv')
                logger.remove_tabular_output(tabular_file)
                shutil.copy(tabular_chk_file, tabular_file)
                logger.add_tabular_output(tabular_file)

            pool_file = os.path.join(checkpoint_dir, 'pool.chk')
            if self.save_format == 'pickle':
                pickle_load(pool_file)
            elif self.save_format == 'joblib':
                self.pool = joblib.load(pool_file)
            else: raise NotImplementedError

            logger.log('Restored from checkpoint %s'%checkpoint_file)
        else:
            logger.log('No checkpoint %s'%checkpoint_file)
项目:CIKM2017    作者:heliarmk    | 项目源码 | 文件源码
def read_data_from_pkl(datafile):
    """
    read file in joblib.dump pkl
    :param datafile: filename of pkl
    :return: 
    """
    datas = joblib.load(datafile)
    for i in range(10):
        datas = np.random.permutation(datas)
    inputs, labels = [], []
    for data in datas:
        inputs.append(data["input"])
        labels.append(data["label"])

    inputs = np.array(inputs).reshape(-1, 15, 101, 101, 3).astype(np.float32)
    inputs -= np.mean(inputs, axis=(2, 3), keepdims=True)
    inputs /= np.std(inputs, axis=(2, 3), keepdims=True)
    labels = np.array(labels).reshape(-1, 1).astype(np.float32)

    return inputs, labels
项目:senti    作者:stevenxxiu    | 项目源码 | 文件源码
def _fit_embedding_word(self, embedding_type, construct_docs, tokenize_, d=None):
        if embedding_type == 'google':
            embeddings_ = joblib.load('data/google/GoogleNews-vectors-negative300.pickle')
            embeddings_ = SimpleNamespace(X=embeddings_.syn0, vocab={w: v.index for w, v in embeddings_.vocab.items()})
        elif embedding_type == 'twitter':
            estimator = Pipeline([
                ('tokenize', MapCorporas(tokenize_)),
                ('word2vec', MergeSliceCorporas(CachedFitTransform(Word2Vec(
                    sg=1, size=d, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20, workers=16
                ), self.memory))),
            ]).fit([self.train_docs, self.unsup_docs[:10**6], self.val_docs, self.test_docs])
            embeddings_ = estimator.named_steps['word2vec'].estimator
            embeddings_ = SimpleNamespace(X=embeddings_.syn0, vocab={w: v.index for w, v in embeddings_.vocab.items()})
        else:
            embeddings_ = SimpleNamespace(X=np.empty((0, d)), vocab={})
        estimator = Pipeline([
            ('tokenize', MapCorporas(tokenize_)),
            # 0.25 is chosen so the unknown vectors have approximately the same variance as google pre-trained ones
            ('embeddings', MapCorporas(Embeddings(
                embeddings_, rand=lambda shape: get_rng().uniform(-0.25, 0.25, shape).astype('float32'),
                include_zero=True
            ))),
        ])
        estimator.fit(construct_docs)
        return estimator.named_steps['embeddings'].estimator
项目:dstc6_dialogue_breakdown_task    作者:JudeLee19    | 项目源码 | 文件源码
def __init__(self, num_rnn_hidden, num_classes, utter_embed,
                 sequence_length, filter_sizes, num_filters, config, l2_reg_lambda=0.0):

        self.num_hidden = num_rnn_hidden
        self.num_classes = num_classes
        self.utter_embed = utter_embed
        self.logger = config.logger

        self.sequence_length = sequence_length
        self.embedding_size = utter_embed.get_vector_size()
        self.filter_sizes = filter_sizes
        self.num_filters = num_filters
        self.l2_reg_lambda = l2_reg_lambda

        self.config = config

        self.cate_mapping_dict = joblib.load('./dbdc3/data/cate_mapping_dict')
项目:open-database    作者:mitaffinity    | 项目源码 | 文件源码
def read(self, unpickler):
        """Reconstruct the array."""
        filename = os.path.join(unpickler._dirname, self.filename)
        # Load the array from the disk
        # use getattr instead of self.allow_mmap to ensure backward compat
        # with NDArrayWrapper instances pickled with joblib < 0.9.0
        allow_mmap = getattr(self, 'allow_mmap', True)
        memmap_kwargs = ({} if not allow_mmap
                         else {'mmap_mode': unpickler.mmap_mode})
        array = unpickler.np.load(filename, **memmap_kwargs)
        # Reconstruct subclasses. This does not work with old
        # versions of numpy
        if (hasattr(array, '__array_prepare__') and
            self.subclass not in (unpickler.np.ndarray,
                                  unpickler.np.memmap)):
            # We need to reconstruct another subclass
            new_array = unpickler.np.core.multiarray._reconstruct(
                self.subclass, (0,), 'b')
            return new_array.__array_prepare__(array)
        else:
            return array
项目:dnn_page_vectors    作者:ankit-cliqz    | 项目源码 | 文件源码
def get_vocab_index_embedding_weights(self, embedding_dim, embedding_weights_masking, load_embeddings_pickled=False, load_vocab_pickled=False):
        embedding_weights = []
        if embedding_weights_masking==True:
            masking_value = "masked"  # For masked embedding weights leave it blank "", else for masked use "_non_masked"
        else:
            masking_value = "non_masked"  # For masked embedding weights leave it blank "", else for masked use "_non_masked"

        # Load data from files
        if load_vocab_pickled:
            vocab_index_dict = joblib.load(self.conf.vocab_index_file.format(masking_value))
            vocab_set = joblib.load(self.conf.vocab_set_file.format(masking_value))

        else:
            vocab_set, vocab_index_dict = self.generate_vocabulary_set(masking=embedding_weights_masking)


        if self.conf.feature_level == "word":
            embedding_weights = self.load_word_embeddings_compact(embedding_dim, vocab_set,
                                                                           masking=embedding_weights_masking,
                                                                           use_pickled=load_embeddings_pickled)
        return embedding_weights, vocab_index_dict
项目:cesium_web    作者:cesium-ml    | 项目源码 | 文件源码
def add_file(prediction, create, value, *args, **kwargs):
        train_featureset = prediction.model.featureset
        fset_data, data = featurize.load_featureset(train_featureset.file_uri)
        if 'class' in prediction.dataset.name or 'regr' in prediction.dataset.name:
            labels = data['labels']
        else:
            labels = []
        model_data = joblib.load(prediction.model.file_uri)
        if hasattr(model_data, 'best_estimator_'):
            model_data = model_data.best_estimator_
        preds = model_data.predict(fset_data)
        pred_probs = (pd.DataFrame(model_data.predict_proba(fset_data),
                                   index=fset_data.index.astype(str),
                                   columns=model_data.classes_)
                      if hasattr(model_data, 'predict_proba') else [])
        all_classes = model_data.classes_ if hasattr(model_data, 'classes_') else []
        pred_path = pjoin(TMP_DIR, '{}.npz'.format(str(uuid.uuid4())))
        featurize.save_featureset(fset_data, pred_path, labels=labels,
                                  preds=preds, pred_probs=pred_probs)
        prediction.file_uri = pred_path
        DBSession().commit()
项目:Mmodel    作者:gxrtbf    | 项目源码 | 文件源码
def train_model_group(model_info,function='logistic'):

    model = joblib.load(getTheFile('models/{}.m').format(function))
    base_info = model_config.model_dim
    user_id = model_info['user_id']
    del model_info['user_id']

    for key,value in base_info.items():
        if key not in model_info.columns:
            model_info[key] = value

    #model_info = transform_data.transform_with_woe(model_info)
    model_info = model_info[sorted(model_info.columns)]
    x = np.matrix(model_info)
    yt = model.predict(x)
    ytp = model.predict_proba(x)
    return user_id,yt,ytp
项目:vin-keras    作者:neka-nat    | 项目源码 | 文件源码
def process_map_data(path):
    data = joblib.load(path)

    im_data = data['im']
    value_data = data['value']
    state_data = data['state']
    label_data = np.array([np.eye(1, 8, l)[0] for l in data['label']])

    num = im_data.shape[0]
    num_train = num - num / 5

    im_train = np.concatenate((np.expand_dims(im_data[:num_train], 1),
                               np.expand_dims(value_data[:num_train], 1)),axis=1).astype(dtype=np.float32)
    state_train = state_data[:num_train]
    label_train = label_data[:num_train]

    im_test = np.concatenate((np.expand_dims(im_data[num_train:], 1),
                              np.expand_dims(value_data[num_train:], 1)),axis=1).astype(dtype=np.float32)
    state_test = state_data[num_train:]
    label_test = label_data[num_train:]

    return (im_train, state_train, label_train), (im_test, state_test, label_test)
项目:guacml    作者:guacml    | 项目源码 | 文件源码
def load_previous_runs(self):
        if not os.path.isfile(self.prev_runs_file_):
            self.found_matching_run = False
            return

        with open(self.prev_runs_file_, 'r') as file:
            self.all_prev_runs_ = yaml.load(file)
            for run in self.all_prev_runs_:
                self.max_data_version_ = max(self.max_data_version_, run['input_data_version'])
                if run['input_data_hash'] == self.data_.df_hash:
                    self.max_config_version_ = max(self.max_config_version_, run['config_version'])
                    if run['config_hash'] == self.config_hash_:
                        if self.found_matching_run:
                            raise Exception('Duplicate previous run entries found.')
                        self.found_matching_run = True
                        self.run_ = run
                        self.data_folder_ = self.get_versioned_folder(run['input_data_version'],
                                                                      run['config_version'])

        if self.found_matching_run is None:
            self.found_matching_run = False
项目:ProtScan    作者:gianlucacorrado    | 项目源码 | 文件源码
def load(self, obj):
        """Load model from file."""
        self.__dict__.update(joblib.load(obj).__dict__)
项目:Learning-to-navigate-without-a-map    作者:ToniRV    | 项目源码 | 文件源码
def load_grid_selection(path):
    """Load a selected grid from pickle."""
    if not os.path.isfile(path):
        raise ValueError("The file is not existed.""")

    with open(path, "r") as f:
        data = pickle.load(f)
        f.close()

    return data['environment'], data['gt'], data['po'], data['goal']
项目:Learning-to-navigate-without-a-map    作者:ToniRV    | 项目源码 | 文件源码
def process_map_data(path, return_full=False):
    data = joblib.load(path)

    im_data = data['im']
    value_data = data['value']
    state_data = data['state']

    if return_full:
        im_full = np.concatenate((np.expand_dims(im_data, 1),
                                  np.expand_dims(value_data, 1)),
                                 axis=1).astype(dtype=np.uint8)
        return im_full, state_data, data['label'], data['sample_idx']

    label_data = np.array([np.eye(1, 8, l)[0] for l in data['label']])
    num = im_data.shape[0]
    num_train = num - num / 5

    im_train = np.concatenate((np.expand_dims(im_data[:num_train], 1),
                               np.expand_dims(value_data[:num_train], 1)),
                              axis=1).astype(dtype=np.float32)
    state_train = state_data[:num_train]
    label_train = label_data[:num_train]

    im_test = np.concatenate((np.expand_dims(im_data[num_train:], 1),
                              np.expand_dims(value_data[num_train:], 1)),
                             axis=1).astype(dtype=np.float32)
    state_test = state_data[num_train:]
    label_test = label_data[num_train:]

    return (im_train, state_train, label_train), \
           (im_test, state_test, label_test), data['sample_idx']
项目:Learning-to-navigate-without-a-map    作者:ToniRV    | 项目源码 | 文件源码
def create_grid_8_dataset(mat_file_path, db_name, save_dir):
    """Convert grid 8x8 dataset from mat file to hdf5 format.

    Parameters
    ----------
    mat_file_path : str
        the path to the mat file
    db_name : str
        the name of the dataset
    save_dir : str
        the directory of the output path (must exist)
    """
    # load matlab data
    mat_data = load_mat_data(mat_file_path)
    print ("[MESSAGE] The Matlab data is loaded.")

    # init HDF5 database
    db = init_h5_db(db_name, save_dir)

    # save dataset
    for key in data_dict:
        add_h5_ds(mat_data[key], key, db)
        print ("[MESSAGE] Saved %s" % (key))

    db.flush()
    db.close()
    print ("[MESSAGE] The grid 8x8 dataset is saved at %s" % (save_dir))
项目:keras-text    作者:raghakot    | 项目源码 | 文件源码
def load(file_name):
    if file_name.endswith('.json'):
        with open(file_name, 'r') as f:
            return jsonpickle.loads(f.read())

    if file_name.endswith('.npy'):
        return np.load(file_name)

    return joblib.load(file_name)
项目:third_person_im    作者:bstadie    | 项目源码 | 文件源码
def _load_experiments(self, data_folder, name_or_patterns):
        if not isinstance(name_or_patterns, (list, tuple)):
            name_or_patterns = [name_or_patterns]
        files = []
        for name_or_pattern in name_or_patterns:
            matched_files = glob(
                osp.join(data_folder, name_or_pattern))  # golb gives a list of all files satisfying pattern
            files += matched_files  # this will include twice the same file if it satisfies 2 patterns
        experiments = []
        progress_f = None
        params_f = None
        pkl_data = None
        for f in files:
            if os.path.isdir(f):
                try:
                    progress = self._read_data(osp.join(f, "progress.csv"))
                    params = self._read_params(osp.join(f, "params.json"))
                    params["exp_name"] = osp.basename(f)
                    if os.path.isfile(osp.join(f, "params.pkl")):
                        pkl_data = joblib.load(osp.join(f, "params.pkl"))
                        experiments.append(Experiment(progress, params, pkl_data))
                    else:
                        experiments.append(Experiment(progress, params))
                except Exception as e:
                    print(e)
            elif 'progress.csv' in f:  # in case you're giving as datafolder the dir that contains the files!
                progress_f = self._read_data(f)
            elif 'params.json' in f:
                params_f = self._read_params(f)
            elif 'params.pkl' in f:
                print('about to load', f)
                pkl_data = joblib.load(f)
        if params_f and progress_f:
            if pkl_data:
                experiments.append(Experiment(progress_f, params_f, pkl_data))
            else:
                experiments.append(Experiment(progress_f, params_f))

        self._experiments = experiments
项目:motif-classify    作者:macks22    | 项目源码 | 文件源码
def symbolize_signal(self, signal, parallel = None, n_jobs = -1):
        """
        Symbolize whole time-series signal to a sentence (vector of words),
        parallel can be {None, "ipython"}
        """
        window_index = self.sliding_window_index(len(signal))
        if parallel == None:
            return map(lambda wi: self.symbolize_window(signal[wi]), window_index)
        elif parallel == "ipython":
            ## too slow
            raise NotImplementedError("parallel parameter %s not supported" % parallel)
            #return self.iparallel_symbolize_signal(signal)
        elif parallel == "joblib":
            with tempfile.NamedTemporaryFile(delete=False) as f:
                tf = f.name
            print "save temp file at %s" % tf 
            tfiles = joblib.dump(signal, tf)
            xs = joblib.load(tf, "r")
            n_jobs = joblib.cpu_count() if n_jobs == -1 else n_jobs 
            window_index = list(window_index)
            batch_size = len(window_index) / n_jobs
            batches = chunk(window_index, batch_size)
            symbols = Parallel(n_jobs)(delayed(joblib_symbolize_window)(self, xs, batch) for batch in batches)
            for f in tfiles: os.unlink(f)
            return sum(symbols, [])
        else:
            raise NotImplementedError("parallel parameter %s not supported" % parallel)
项目:motif-classify    作者:macks22    | 项目源码 | 文件源码
def signal_to_paa_vector(self, signal, n_jobs = -1):
        window_index = self.sliding_window_index(len(signal))
        with tempfile.NamedTemporaryFile(delete=False) as f:
                tf = f.name
        print "save temp file at %s" % tf 
        tfiles = joblib.dump(signal, tf)
        xs = joblib.load(tf, "r")
        n_jobs = joblib.cpu_count() if n_jobs == -1 else n_jobs 
        window_index = list(window_index)
        batch_size = len(window_index) / n_jobs
        batches = chunk(window_index, batch_size)
        vecs = Parallel(n_jobs)(delayed(joblib_paa_window)(self, xs, batch) for batch in batches)
        for f in tfiles: os.unlink(f)
        return np.vstack(vecs)
项目:word2vec_pipeline    作者:NIHOPA    | 项目源码 | 文件源码
def save_single(self):

        assert(self.V is not None)
        assert(self._ref is not None)

        # Set the size explictly as a sanity check
        size_n, dim_V = self.V.shape

        config_score = simple_config.load()["score"]
        f_db = os.path.join(
            config_score["output_data_directory"],
            config_score["document_scores"]["f_db"]
        )

        h5 = touch_h5(f_db)
        g  = h5.require_group(self.method)
        gx = g.require_group(self.current_filename)

        # Save the data array
        msg = "Saving {} {} ({})"
        print(msg.format(self.method, self.current_filename, size_n))

        for col in ["V", "_ref", "VX",
                    "VX_explained_variance_ratio_",
                    "VX_components_"]:
            if col in gx:
                #print "  Clearing", self.method, self.current_filename, col
                del gx[col]

        gx.create_dataset("V", data=self.V, **self.h5py_args)
        gx.create_dataset("_ref", data=self._ref, **self.h5py_args)
项目:word2vec_pipeline    作者:NIHOPA    | 项目源码 | 文件源码
def compute_reduced_representation(self):

        if not self.compute_reduced:
            return None

        config_score = simple_config.load()["score"]
        f_db = os.path.join(
            config_score["output_data_directory"],
            config_score["document_scores"]["f_db"]
        )

        h5 = touch_h5(f_db)
        g = h5[self.method]

        keys = g.keys()
        V     = np.vstack([g[x]["V"][:] for x in keys])
        sizes = [g[x]["_ref"].shape[0] for x in keys]

        nc = self.reduced_n_components
        clf = IncrementalPCA(n_components=nc)

        msg = "Performing PCA on {}, ({})->({})"
        print(msg.format(self.method, V.shape[1], nc))

        VX = clf.fit_transform(V)
        EVR = clf.explained_variance_ratio_
        COMPONENTS = clf.components_

        for key, size in zip(keys, sizes):

            # Take slices equal to the size
            vx, VX = VX[:size,:], VX[size:, :]
            evr, EVR = EVR[:size], EVR[size:]
            com, COMPONENTS = COMPONENTS[:size,:], COMPONENTS[size:, :]

            g[key].create_dataset("VX", data=vx, **self.h5py_args)
            g[key].create_dataset("VX_explained_variance_ratio_", data=evr)
            g[key].create_dataset("VX_components_", data=com)

        h5.close()
项目:rllabplusplus    作者:shaneshixiang    | 项目源码 | 文件源码
def _load_experiments(self, data_folder, name_or_patterns):
        if not isinstance(name_or_patterns, (list, tuple)):
            name_or_patterns = [name_or_patterns]
        files = []
        for name_or_pattern in name_or_patterns:
            matched_files = glob(
                osp.join(data_folder, name_or_pattern))  # golb gives a list of all files satisfying pattern
            files += matched_files  # this will include twice the same file if it satisfies 2 patterns
        experiments = []
        progress_f = None
        params_f = None
        pkl_data = None
        for f in files:
            if os.path.isdir(f):
                try:
                    progress = self._read_data(osp.join(f, "progress.csv"))
                    params = self._read_params(osp.join(f, "params.json"))
                    params["exp_name"] = osp.basename(f)
                    if os.path.isfile(osp.join(f, "params.pkl")):
                        pkl_data = joblib.load(osp.join(f, "params.pkl"))
                        experiments.append(Experiment(progress, params, pkl_data))
                    else:
                        experiments.append(Experiment(progress, params))
                except Exception as e:
                    print(e)
            elif 'progress.csv' in f:  # in case you're giving as datafolder the dir that contains the files!
                progress_f = self._read_data(f)
            elif 'params.json' in f:
                params_f = self._read_params(f)
            elif 'params.pkl' in f:
                print('about to load', f)
                pkl_data = joblib.load(f)
        if params_f and progress_f:
            if pkl_data:
                experiments.append(Experiment(progress_f, params_f, pkl_data))
            else:
                experiments.append(Experiment(progress_f, params_f))

        self._experiments = experiments
项目:rllabplusplus    作者:shaneshixiang    | 项目源码 | 文件源码
def restore(self, checkpoint_dir=None):
        if checkpoint_dir is None: checkpoint_dir = logger.get_snapshot_dir()
        checkpoint_file = os.path.join(checkpoint_dir, 'params.chk')
        if os.path.isfile(checkpoint_file + '.meta'):
            sess = tf.get_default_session()
            saver = tf.train.Saver()
            saver.restore(sess, checkpoint_file)

            tabular_chk_file = os.path.join(checkpoint_dir, 'progress.csv.chk')
            if os.path.isfile(tabular_chk_file):
                tabular_file = os.path.join(checkpoint_dir, 'progress.csv')
                logger.remove_tabular_output(tabular_file)
                shutil.copy(tabular_chk_file, tabular_file)
                logger.add_tabular_output(tabular_file)

            if self.qf is not None:
                pool_file = os.path.join(checkpoint_dir, 'pool.chk')
                if self.save_format == 'pickle':
                    pickle_load(pool_file)
                elif self.save_format == 'joblib':
                    self.pool = joblib.load(pool_file)
                else: raise NotImplementedError

            logger.log('Restored from checkpoint %s'%checkpoint_file)
        else:
            logger.log('No checkpoint %s'%checkpoint_file)
项目:SRLF    作者:Fritz449    | 项目源码 | 文件源码
def load_object(string):
    # converts string to whatever was dumps'ed in it
    return joblib.load(BytesIO(string))
项目:kaggle-lung-cancer    作者:mdai    | 项目源码 | 文件源码
def process_study(study_id, annotations, out_dir, nstack):
    volumes_metadata = isotropic_volumes_metadata[study_id]
    isometric_volume = np.load('../data_proc/stage1/isotropic_volumes_1mm/{}.npy'.format(study_id))
    mean = np.mean(isometric_volume).astype(np.float32)
    std = np.std(isometric_volume).astype(np.float32)
    resize_factor = np.divide(volumes_metadata['volume_resampled_shape'], volumes_metadata['volume_shape'])

    coords_list = []
    for a in annotations:
        d = a['data']
        z = int(round(resize_factor[0] * a['sliceNum']))
        y0 = resize_factor[1] * d['y']
        y1 = resize_factor[1] * (d['y'] + d['height'])
        x0 = resize_factor[2] * d['x']
        x1 = resize_factor[2] * (d['x'] + d['width'])
        coords_list.append((z, y0, y1, x0, x1))

    samples = []
    for coords in coords_list:
        z, y0, y1, x0, x1 = coords
        for i in range(40):
            sample_id = uuid4()
            rand_y0 = max(0, int(round(y0 - random.randint(0, 32))))
            rand_y1 = min(isometric_volume.shape[1], int(round(y1 + random.randint(0, 32))))
            rand_x0 = max(0, int(round(x0 - random.randint(0, 32))))
            rand_x1 = min(isometric_volume.shape[2], int(round(x1 + random.randint(0, 32))))
            patch = []
            for zi in range(nstack):
                patch.append(resize(isometric_volume[z+zi, rand_y0:rand_y1, rand_x0:rand_x1], [32, 32],
                                    mode='edge', clip=True, preserve_range=True))
            patch = np.array(patch, dtype=np.float32)
            patch = (patch - mean) / (std + 1e-7)
            patch = np.moveaxis(patch, 0, 2)
            bb_x = (x0 - rand_x0) / (rand_x1 - rand_x0)
            bb_y = (y0 - rand_y0) / (rand_y1 - rand_y0)
            bb_w = (x1 - x0) / (rand_x1 - rand_x0)
            bb_h = (y1 - y0) / (rand_y1 - rand_y0)
            samples.append((patch, bb_x, bb_y, bb_w, bb_h))

    joblib.dump(samples, os.path.join(out_dir, 'samples', '{}.pkl'.format(study_id)))
    return len(samples)
项目:CIKM2017    作者:heliarmk    | 项目源码 | 文件源码
def main():
    # Get the data.
    data_set = joblib.load("/mnt/guankai/CIKM/data/CIKM2017_train/train_Imp_3x3_resampled.pkl")
    for i in range(10):
        data_set = np.random.permutation(data_set)
    valid_data_num = int(len(data_set) / 10) #get 10% data for validation
    valid_set = data_set[0 : valid_data_num ]
    train_set = data_set[valid_data_num  : ]
    convert_to(train_set, "train_Imp_3x3_resampled")
    convert_to(valid_set, "valid_Imp_3x3_resampled")
    return
项目:CIKM2017    作者:heliarmk    | 项目源码 | 文件源码
def cvt(filename):
    output = joblib.load(filename)
    pred = output["output"]
    outputdir = filename[:filename.rfind("/")+1] + "csvfile/"
    if not os.path.isdir(outputdir):
        os.mkdir(outputdir)
    csvfile = outputdir + filename[filename.rfind("/"):filename.rfind(".")+1] + "csv"
    out = np.array(pred).reshape(2000)
    np.savetxt(fname=csvfile, X=out, fmt="%.3f",delimiter="")
项目:CIKM2017    作者:heliarmk    | 项目源码 | 文件源码
def agg(file_name,store_file):

    datas = joblib.load(file_name)
    new_datas = []

    for data in datas:
        new_datas.append(data)
        new_datas.append({"input":np.flip(data["input"],axis=2),"label":data["label"]})
        new_datas.append({"input":np.flip(data["input"],axis=3),"label":data["label"]})
        #new_datas.append({"input":np.rot90(m=data["input"],k=1,axes=(2,3)),"label":data["label"]})
        #new_datas.append({"input":np.rot90(m=data["input"],k=2,axes=(2,3)),"label":data["label"]})
        #new_datas.append({"input":np.rot90(m=data["input"],k=3,axes=(2,3)),"label":data["label"]})

    joblib.dump(value=new_datas,filename=store_file,compress=3)
项目:CIKM2017    作者:heliarmk    | 项目源码 | 文件源码
def slice_data(filename):
    data = joblib.load(filename=filename)
    for idx, i in enumerate(data):
        data[idx]["input"] = np.delete(data[idx]["input"],[3],axis=1)
        data[idx]["input"] = data[idx]["input"][:,:,46:55,46:55]
    name, suf = os.path.splitext(filename)
    outputfilename = name + "del_height_no.4_slice_7x7.pkl"
    joblib.dump(value=data, filename=outputfilename)
项目:Kaggle_the_Nature_Conservancy_Fisheries_Monitoring    作者:Sapphirine    | 项目源码 | 文件源码
def binary_trans(trainy, testy, i):
    y1 = trainy['image_path'].apply(lambda x: 1 if x == int(i) else 0).tolist()
    y2 = testy['image_path'].apply(lambda x: 1 if x == int(i) else 0).tolist()
    return(y1, y2)


# <=======================================================================================================>
# load the data
项目:Asynchronous-RL-agent    作者:Fritz449    | 项目源码 | 文件源码
def load_object(string):
    # converts string to whatever was dumps'ed in it
    return joblib.load(BytesIO(string))
项目:Kaleido    作者:vacancy    | 项目源码 | 文件源码
def load_data(fname):
    obj = joblib.load(os.path.join('data', fname))
    return obj
项目:senti    作者:stevenxxiu    | 项目源码 | 文件源码
def test_pickle(self):
        joblib.dump(CachedIterable(self.iterator(), 3), 'output')
        self.assertListEqual(list(joblib.load('output')), list(range(20)))
项目:deep_separation_contraction    作者:edouardoyallon    | 项目源码 | 文件源码
def normalize_image(image):
  meanstd = joblib.load(FLAGS.mean_std_path)
  mean, std = meanstd['mean'], meanstd['std']
  normed_image = (image - mean) / std
  return normed_image
项目:deep_separation_contraction    作者:edouardoyallon    | 项目源码 | 文件源码
def normalize_image(image):
  meanstd = joblib.load(FLAGS.mean_std_path)
  mean, std = meanstd['mean'], meanstd['std']
  normed_image = (image - mean) / std
  return normed_image
项目:dstc6_dialogue_breakdown_task    作者:JudeLee19    | 项目源码 | 文件源码
def __init__(self, input_size, num_hidden, num_classes, utter_embed, bow_utter_embed, config):
        self.input_size = input_size
        self.num_hidden = num_hidden
        self.num_classes = num_classes
        self.utter_embed = utter_embed
        self.bow_utter_embed = bow_utter_embed
        self.logger = config.logger
        self.config = config

        self.cate_mapping_dict = joblib.load('./dbdc3/data/cate_mapping_dict')
项目:open-database    作者:mitaffinity    | 项目源码 | 文件源码
def load(filename, mmap_mode=None):
    """Reconstruct a Python object from a file persisted with joblib.dump."""
    f = open(filename, 'rb')
    fobj = _read_fileobject(f, filename, mmap_mode)
    if isinstance(fobj, (str, unicode)):
        return load_compatibility(fobj)

    obj = _unpickle(fobj, filename, mmap_mode)

    return obj
项目:open-database    作者:mitaffinity    | 项目源码 | 文件源码
def load_compatibility(filename):
    """Reconstruct a Python object from a file persisted with joblib.dump.
    This function ensure the compatibility of joblib old persistence format (<= 0.9.3)"""
    file_handle = open(filename, 'rb')
    unpickler = ZipNumpyUnpickler(filename, file_handle=file_handle)
    try:
        obj = unpickler.load()
    finally:
        if hasattr(unpickler, 'file_handle'):
            unpickler.file_handle.close()
    return obj
项目:open-database    作者:mitaffinity    | 项目源码 | 文件源码
def load_from_disk(filename):
    return load(filename)
项目:open-database    作者:mitaffinity    | 项目源码 | 文件源码
def __init__(self, data_dir):
        self.data_dir = data_dir

        metadata_filename = os.path.join(self.data_dir, "metadata.joblib")
        if os.path.exists(metadata_filename):
            # self.tasks, self.metadata_df = joblib.load(metadata_filename)
            self.tasks, self.metadata_df = load_from_disk(metadata_filename)
        else:
            raise ValueError("No metadata found on disk")