我们从Python开源项目中,提取了以下1个代码示例,用于说明如何使用data_utils.prepare_wmt_data()。
def load_data(self): # TODO: make configurable self.data_dir = "/data/WMT15/" print("Preparing WMT data in %s" % self.data_dir) en_train, fr_train, en_dev, fr_dev, _, _ = data_utils.prepare_wmt_data( self.data_dir, self.en_vocab_size, self.fr_vocab_size) # Read data into buckets and compute their sizes. print ("Reading development and training data (limit: %d)." % self.max_train_data_size) self.dev_set = self.read_data(en_dev, fr_dev) self.train_set = self.read_data(en_train, fr_train, self.max_train_data_size) train_bucket_sizes = [len(self.train_set[b]) for b in xrange(len(self._buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. self.train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))]