我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用util.load_dict()。
def __init__(self, source, source_dicts, batch_size=128, maxlen=100, n_words_source=-1, skip_empty=False, shuffle_each_epoch=False, sort_by_length=True, maxibatch_size=20): if shuffle_each_epoch: self.source_orig = source self.source = shuffle.main([self.source_orig], temporary=True) else: self.source = fopen(source, 'r') self.source_dicts = [] for source_dict in source_dicts: self.source_dicts.append(load_dict(source_dict)) self.batch_size = batch_size self.maxlen = maxlen self.skip_empty = skip_empty self.n_words_source = n_words_source if self.n_words_source > 0: for d in self.source_dicts: for key, idx in d.items(): if idx >= self.n_words_source: del d[key] self.shuffle = shuffle_each_epoch self.sort_by_length = sort_by_length self.source_buffer = [] self.k = batch_size * maxibatch_size self.end_of_data = False
def __init__(self, source, source_dicts, batch_size=128, maxlen=100, n_words_source=-1, skip_empty=False, shuffle_each_epoch=False, sort_by_length=True, maxibatch_size=20): if shuffle_each_epoch: self.source_orig = source self.source = shuffle.main([self.source_orig], temporary=True) self.source = self.source[0] # ??? print('this had better be a file:', type(self.source)) else: self.source = fopen(source, 'r') self.source_dicts = [] for source_dict in source_dicts: self.source_dicts.append(load_dict(source_dict)) self.batch_size = batch_size self.maxlen = maxlen self.skip_empty = skip_empty self.n_words_source = n_words_source if self.n_words_source > 0: for d in self.source_dicts: for key, idx in d.items(): if idx >= self.n_words_source: del d[key] self.shuffle = shuffle_each_epoch self.sort_by_length = sort_by_length self.source_buffer = [] self.k = batch_size * maxibatch_size self.end_of_data = False
def __init__(self, source, source_dict, batch_size=128, maxlen=None, n_words_source=-1, skip_empty=False, shuffle_each_epoch=False, sort_by_length=False, maxibatch_size=20, ): if shuffle_each_epoch: self.source_orig = source self.source = shuffle.main([self.source_orig], temporary=True) else: self.source = data_utils.fopen(source, 'r') self.source_dict = load_dict(source_dict) self.batch_size = batch_size self.maxlen = maxlen self.skip_empty = skip_empty self.n_words_source = n_words_source if self.n_words_source > 0: for key, idx in self.source_dict.items(): if idx >= self.n_words_source: del self.source_dict[key] self.shuffle = shuffle_each_epoch self.sort_by_length = sort_by_length self.shuffle = shuffle_each_epoch self.sort_by_length = sort_by_length self.source_buffer = [] self.k = batch_size * maxibatch_size self.end_of_data = False
def load_inverse_dict(dict_path): orig_dict = load_dict(dict_path) idict = {} for words, idx in orig_dict.iteritems(): idict[idx] = words return idict
def main(models, source_file, nbest_file, saveto, b=80, normalize=False, verbose=False, alignweights=False): # load model model_options options = [] for model in args.models: try: with open('%s.json' % model, 'rb') as f: options.append(json.load(f)) except: with open('%s.pkl' % model, 'rb') as f: options.append(pkl.load(f)) #hacks for using old models with missing options if not 'dropout_embedding' in options[-1]: options[-1]['dropout_embedding'] = 0 if not 'dropout_hidden' in options[-1]: options[-1]['dropout_hidden'] = 0 if not 'dropout_source' in options[-1]: options[-1]['dropout_source'] = 0 if not 'dropout_target' in options[-1]: options[-1]['dropout_target'] = 0 dictionary, dictionary_target = options[0]['dictionaries'] # load source dictionary and invert word_dict = load_dict(dictionary) word_idict = dict() for kk, vv in word_dict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' # load target dictionary and invert word_dict_trg = load_dict(dictionary_target) word_idict_trg = dict() for kk, vv in word_dict_trg.iteritems(): word_idict_trg[vv] = kk word_idict_trg[0] = '<eos>' word_idict_trg[1] = 'UNK' rescore_model(source_file, nbest_file, saveto, models, options, b, normalize, verbose, alignweights)
def __init__(self, source, target, source_dict, target_dict, batch_size=128, maxlen=100, n_words_source=-1, n_words_target=-1, shuffle_each_epoch=False, sort_by_length=True): if shuffle_each_epoch: shuffle.main([source, target]) self.source = fopen(source+'.shuf', 'r') self.target = fopen(target+'.shuf', 'r') else: self.source = fopen(source, 'r') self.target = fopen(target, 'r') self.source_dict = load_dict(source_dict) self.target_dict = load_dict(target_dict) self.batch_size = batch_size self.maxlen = maxlen self.n_words_source = n_words_source self.n_words_target = n_words_target self.shuffle = shuffle_each_epoch self.sort_by_length = sort_by_length self.source_buffer = [] self.target_buffer = [] self.k = batch_size * 20 self.end_of_data = False
def _build_dictionaries(self): """ Builds and inverts source and target dictionaries, taken from the first model since all of them must have the same vocabulary. """ dictionaries = self._options[0]['dictionaries'] dictionaries_source = dictionaries[:-1] dictionary_target = dictionaries[-1] # load and invert source dictionaries word_dicts = [] word_idicts = [] for dictionary in dictionaries_source: word_dict = load_dict(dictionary) if self._options[0]['n_words_src']: for key, idx in word_dict.items(): if idx >= self._options[0]['n_words_src']: del word_dict[key] word_idict = dict() for kk, vv in word_dict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' word_dicts.append(word_dict) word_idicts.append(word_idict) self._word_dicts = word_dicts self._word_idicts = word_idicts # load and invert target dictionary word_dict_trg = load_dict(dictionary_target) word_idict_trg = dict() for kk, vv in word_dict_trg.iteritems(): word_idict_trg[vv] = kk word_idict_trg[0] = '<eos>' word_idict_trg[1] = 'UNK' self._word_idict_trg = word_idict_trg
def __init__(self, source, target, source_dicts, target_dict, batch_size=128, maxlen=100, n_words_source=-1, n_words_target=-1, shuffle_each_epoch=False, sort_by_length=True, indomain_source='', indomain_target='', interpolation_rate=0.1, maxibatch_size=20): if shuffle_each_epoch: shuffle.main([source, target]) shuffle.main([indomain_source, indomain_target]) self.source = fopen(source+'.shuf', 'r') self.target = fopen(target+'.shuf', 'r') self.indomain_source = fopen(indomain_source+'.shuf', 'r') self.indomain_target = fopen(indomain_target+'.shuf', 'r') else: self.source = fopen(source, 'r') self.target = fopen(target, 'r') self.indomain_source = fopen(indomain_source, 'r') self.indomain_target = fopen(indomain_target, 'r') self.source_dicts = [] for source_dict in source_dicts: self.source_dicts.append(load_dict(source_dict)) self.target_dict = load_dict(target_dict) self.batch_size = batch_size self.maxlen = maxlen self.n_words_source = n_words_source self.n_words_target = n_words_target if self.n_words_source > 0: for d in self.source_dicts: for key, idx in d.items(): if idx >= self.n_words_source: del d[key] if self.n_words_target > 0: for key, idx in self.target_dict.items(): if idx >= self.n_words_target: del self.target_dict[key] self.shuffle = shuffle_each_epoch self.sort_by_length = sort_by_length self.source_buffer = [] self.target_buffer = [] self.k = batch_size * maxibatch_size self.end_of_data = False self.interpolation_rate = interpolation_rate self.indomain_k = int(math.ceil(self.interpolation_rate * self.k)) self.outdomain_k = self.k - self.indomain_k
def __init__(self, source, target, source_dicts, target_dict, batch_size=128, maxlen=100, n_words_source=-1, n_words_target=-1, skip_empty=False, shuffle_each_epoch=False, sort_by_length=True, maxibatch_size=20): if shuffle_each_epoch: self.source_orig = source self.target_orig = target self.source, self.target = shuffle.main([self.source_orig, self.target_orig], temporary=True) else: self.source = fopen(source, 'r') self.target = fopen(target, 'r') self.source_dicts = [] for source_dict in source_dicts: self.source_dicts.append(load_dict(source_dict)) self.target_dict = load_dict(target_dict) self.batch_size = batch_size self.maxlen = maxlen self.skip_empty = skip_empty self.n_words_source = n_words_source self.n_words_target = n_words_target if self.n_words_source > 0: for d in self.source_dicts: for key, idx in d.items(): if idx >= self.n_words_source: del d[key] if self.n_words_target > 0: for key, idx in self.target_dict.items(): if idx >= self.n_words_target: del self.target_dict[key] self.shuffle = shuffle_each_epoch self.sort_by_length = sort_by_length self.source_buffer = [] self.target_buffer = [] self.k = batch_size * maxibatch_size self.end_of_data = False
def __init__(self, source, target, source_dict, target_dict, batch_size=128, maxlen=100, n_words_source=-1, n_words_target=-1, skip_empty=False, shuffle_each_epoch=False, sort_by_length=True, maxibatch_size=20): if shuffle_each_epoch: self.source_orig = source self.target_orig = target self.source, self.target = shuffle.main([self.source_orig, self.target_orig], temporary=True) else: self.source = data_utils.fopen(source, 'r') self.target = data_utils.fopen(target, 'r') self.source_dict = load_dict(source_dict) self.target_dict = load_dict(target_dict) self.batch_size = batch_size self.maxlen = maxlen self.skip_empty = skip_empty self.n_words_source = n_words_source self.n_words_target = n_words_target if self.n_words_source > 0: for key, idx in self.source_dict.items(): if idx >= self.n_words_source: del self.source_dict[key] if self.n_words_target > 0: for key, idx in self.target_dict.items(): if idx >= self.n_words_target: del self.target_dict[key] self.shuffle = shuffle_each_epoch self.sort_by_length = sort_by_length self.source_buffer = [] self.target_buffer = [] self.k = batch_size * maxibatch_size self.end_of_data = False
def main(models, source_file, nbest_file, saveto, b=80, normalize=False, verbose=False, alignweights=False): # load model model_options options = [] for model in args.models: try: with open('%s.json' % model, 'rb') as f: options.append(json.load(f)) except: with open('%s.pkl' % model, 'rb') as f: options.append(pkl.load(f)) #hacks for using old models with missing options if not 'dropout_embedding' in options[-1]: options[-1]['dropout_embedding'] = 0 if not 'dropout_hidden' in options[-1]: options[-1]['dropout_hidden'] = 0 if not 'dropout_source' in options[-1]: options[-1]['dropout_source'] = 0 if not 'dropout_target' in options[-1]: options[-1]['dropout_target'] = 0 dictionaries = options[0]['dictionaries'] dictionaries_source = dictionaries[:-1] dictionary_target = dictionaries[-1] # load source dictionary and invert word_dicts = [] word_idicts = [] for dictionary in dictionaries_source: word_dict = load_dict(dictionary) if options[0]['n_words_src']: for key, idx in word_dict.items(): if idx >= options[0]['n_words_src']: del word_dict[key] word_idict = dict() for kk, vv in word_dict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' word_dicts.append(word_dict) word_idicts.append(word_idict) # load target dictionary and invert word_dict_trg = load_dict(dictionary_target) word_idict_trg = dict() for kk, vv in word_dict_trg.iteritems(): word_idict_trg[vv] = kk word_idict_trg[0] = '<eos>' word_idict_trg[1] = 'UNK' rescore_model(source_file, nbest_file, saveto, models, options, b, normalize, verbose, alignweights)
def __init__(self, source, target, source_dicts, target_dict, batch_size=128, maxlen=100, n_words_source=-1, n_words_target=-1, shuffle_each_epoch=False, sort_by_length=True, maxibatch_size=20): # source, target: file path+name # allow source have many dicts if shuffle_each_epoch: shuffle.main([source, target]) self.source = fopen(source+'.shuf', 'r') self.target = fopen(target+'.shuf', 'r') else: self.source = fopen(source, 'r') self.target = fopen(target, 'r') self.source_dicts = [] for source_dict in source_dicts: self.source_dicts.append(load_dict(source_dict)) self.target_dict = load_dict(target_dict) self.batch_size = batch_size self.maxlen = maxlen self.n_words_source = n_words_source self.n_words_target = n_words_target if self.n_words_source > 0: for d in self.source_dicts: for key, idx in d.items(): if idx >= self.n_words_source: del d[key] if self.n_words_target > 0: for key, idx in self.target_dict.items(): if idx >= self.n_words_target: del self.target_dict[key] self.shuffle = shuffle_each_epoch self.sort_by_length = sort_by_length self.source_buffer = [] self.target_buffer = [] self.k = batch_size * maxibatch_size self.end_of_data = False
def __init__(self, source, target, source_dicts, target_dict, batch_size=128, maxlen=100, n_words_source=-1, n_words_target=-1, shuffle_each_epoch=False, sort_by_length=True, maxibatch_size=20): if shuffle_each_epoch: shuffle.main([source, target]) self.source = fopen(source+'.shuf', 'r') self.target = fopen(target+'.shuf', 'r') else: self.source = fopen(source, 'r') self.target = fopen(target, 'r') self.source_dicts = [] for source_dict in source_dicts: self.source_dicts.append(load_dict(source_dict)) self.target_dict = load_dict(target_dict) self.batch_size = batch_size self.maxlen = maxlen self.n_words_source = n_words_source self.n_words_target = n_words_target if self.n_words_source > 0: for d in self.source_dicts: for key, idx in d.items(): if idx >= self.n_words_source: del d[key] if self.n_words_target > 0: for key, idx in self.target_dict.items(): if idx >= self.n_words_target: del self.target_dict[key] self.shuffle = shuffle_each_epoch self.sort_by_length = sort_by_length self.source_buffer = [] self.target_buffer = [] self.k = batch_size * maxibatch_size self.end_of_data = False
def __init__(self, source, target, source_dicts, target_dict, batch_size=128, maxlen=100, n_words_source=-1, n_words_target=-1, skip_empty=False, shuffle_each_epoch=False, sort_by_length=True, use_factor=False, maxibatch_size=20): if shuffle_each_epoch: self.source_orig = source self.target_orig = target self.source, self.target = shuffle.main([self.source_orig, self.target_orig], temporary=True) else: self.source = fopen(source, 'r') self.target = fopen(target, 'r') self.source_dicts = [] for source_dict in source_dicts: self.source_dicts.append(load_dict(source_dict)) self.target_dict = load_dict(target_dict) self.batch_size = batch_size self.maxlen = maxlen self.skip_empty = skip_empty self.use_factor = use_factor self.n_words_source = n_words_source self.n_words_target = n_words_target if self.n_words_source > 0: for d in self.source_dicts: for key, idx in d.items(): if idx >= self.n_words_source: del d[key] if self.n_words_target > 0: for key, idx in self.target_dict.items(): if idx >= self.n_words_target: del self.target_dict[key] self.shuffle = shuffle_each_epoch self.sort_by_length = sort_by_length self.source_buffer = [] self.target_buffer = [] self.k = batch_size * maxibatch_size self.end_of_data = False