def _attention(self, query, attn_states): conv2d = nn_ops.conv2d reduce_sum = math_ops.reduce_sum softmax = nn_ops.softmax tanh = math_ops.tanh with vs.variable_scope("Attention"): k = vs.get_variable("AttnW", [1, 1, self._attn_size, self._attn_vec_size]) v = vs.get_variable("AttnV", [self._attn_vec_size]) hidden = array_ops.reshape(attn_states, [-1, self._attn_length, 1, self._attn_size]) hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") y = _linear(query, self._attn_vec_size, True) y = array_ops.reshape(y, [-1, 1, 1, self._attn_vec_size]) s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) a = softmax(s) d = reduce_sum( array_ops.reshape(a, [-1, self._attn_length, 1, 1]) * hidden, [1, 2]) new_attns = array_ops.reshape(d, [-1, self._attn_size]) new_attn_states = array_ops.slice(attn_states, [0, 1, 0], [-1, -1, -1]) return new_attns, new_attn_states
def _forward(self, x): # Pad the last dim with a zeros vector. We need this because it lets us # infer the scale in the inverse function. y = array_ops.expand_dims(x, dim=-1) if self._static_event_ndims == 0 else x ndims = (y.get_shape().ndims if y.get_shape().ndims is not None else array_ops.rank(y)) y = array_ops.pad(y, paddings=array_ops.concat(0, ( array_ops.zeros((ndims - 1, 2), dtype=dtypes.int32), [[0, 1]]))) # Set shape hints. if x.get_shape().ndims is not None: shape = x.get_shape().as_list() if self._static_event_ndims == 0: shape += [2] elif shape[-1] is not None: shape[-1] += 1 shape = tensor_shape.TensorShape(shape) y.get_shape().assert_is_compatible_with(shape) y.set_shape(shape) # Since we only support event_ndims in [0, 1] and we do padding, we always # reduce over the last dimension, i.e., dim=-1 (which is the default). return nn_ops.softmax(y)
def _attention(self, query, attn_states): conv2d = nn_ops.conv2d reduce_sum = math_ops.reduce_sum softmax = nn_ops.softmax tanh = math_ops.tanh with tf.variable_scope("attention"): k = tf.get_variable( "attn_w", [1, 1, self._attn_size, self._attn_vec_size]) v = tf.get_variable("attn_v", [self._attn_vec_size]) hidden = array_ops.reshape(attn_states, [-1, self._attn_length, 1, self._attn_size]) hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") y = _linear(query, self._attn_vec_size, True) y = array_ops.reshape(y, [-1, 1, 1, self._attn_vec_size]) s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) a = softmax(s) d = reduce_sum( array_ops.reshape(a, [-1, self._attn_length, 1, 1]) * hidden, [1, 2]) new_attns = array_ops.reshape(d, [-1, self._attn_size]) new_attn_states = array_ops.slice(attn_states, [0, 1, 0], [-1, -1, -1]) return new_attns, new_attn_states
def testSoftmax3DUnknownSize(self): logits = np.ones((2, 3, 2)) logits[0, 0, 0] = 0 logits[1, 1, 1] = 0 logit_placeholder = array_ops.placeholder( dtypes.float32, shape=(None, None, 2)) feed_dict = {logit_placeholder: logits} exp_prediction = 0.5 * np.ones((2, 3, 2)) exp_prediction[0, 0, 0] = self.low exp_prediction[0, 0, 1] = self.high exp_prediction[1, 1, 0] = self.high exp_prediction[1, 1, 1] = self.low prediction = _layers.softmax(logit_placeholder) with self.test_session() as sess: prediction = sess.run(prediction, feed_dict=feed_dict) self.assertAllClose(exp_prediction, prediction)
def test_unary_ops(self): ops = [ ('relu', nn_ops.relu, nn.relu), ('relu6', nn_ops.relu6, nn.relu6), ('crelu', nn_ops.crelu, nn.crelu), ('elu', nn_ops.elu, nn.elu), ('softplus', nn_ops.softplus, nn.softplus), ('l2_loss', nn_ops.l2_loss, nn.l2_loss), ('softmax', nn_ops.softmax, nn.softmax), ('log_softmax', nn_ops.log_softmax, nn.log_softmax), ] for op_name, tf_op, lt_op in ops: golden_tensor = tf_op(self.original_lt.tensor) golden_lt = core.LabeledTensor(golden_tensor, self.axes) actual_lt = lt_op(self.original_lt) self.assertIn(op_name, actual_lt.name) self.assertLabeledTensorsEqual(golden_lt, actual_lt)
def _kl_categorical_categorical(a, b, name=None): """Calculate the batched KL divergence KL(a || b) with a and b Categorical. Args: a: instance of a Categorical distribution object. b: instance of a Categorical distribution object. name: (optional) Name to use for created operations. default is "kl_categorical_categorical". Returns: Batchwise KL(a || b) """ with ops.name_scope(name, "kl_categorical_categorical", values=[a.logits, b.logits]): # sum(probs log(probs / (1 - probs))) delta_log_probs1 = (nn_ops.log_softmax(a.logits) - nn_ops.log_softmax(b.logits)) return math_ops.reduce_sum(nn_ops.softmax(a.logits) * delta_log_probs1, axis=-1)
def testEntropyGradient(self): with self.test_session() as sess: logits = constant_op.constant([[1., 2., 3.], [2., 5., 1.]]) probabilities = nn_ops.softmax(logits) log_probabilities = nn_ops.log_softmax(logits) true_entropy = - math_ops.reduce_sum( probabilities * log_probabilities, axis=-1) categorical_distribution = categorical.Categorical(probs=probabilities) categorical_entropy = categorical_distribution.entropy() # works true_entropy_g = gradients_impl.gradients(true_entropy, [logits]) categorical_entropy_g = gradients_impl.gradients( categorical_entropy, [logits]) res = sess.run({"true_entropy": true_entropy, "categorical_entropy": categorical_entropy, "true_entropy_g": true_entropy_g, "categorical_entropy_g": categorical_entropy_g}) self.assertAllClose(res["true_entropy"], res["categorical_entropy"]) self.assertAllClose(res["true_entropy_g"], res["categorical_entropy_g"])
def testMeanUnivariate(self): with self.test_session() as sess: for batch_shape in ((), (2,), (2, 3)): dist = make_univariate_mixture( batch_shape=batch_shape, num_components=2) mean = dist.mean() self.assertEqual(batch_shape, mean.get_shape()) cat_probs = nn_ops.softmax(dist.cat.logits) dist_means = [d.mean() for d in dist.components] mean_value, cat_probs_value, dist_means_value = sess.run( [mean, cat_probs, dist_means]) self.assertEqual(batch_shape, mean_value.shape) cat_probs_value = _swap_first_last_axes(cat_probs_value) true_mean = sum( [c_p * m for (c_p, m) in zip(cat_probs_value, dist_means_value)]) self.assertAllClose(true_mean, mean_value)
def testMeanMultivariate(self): with self.test_session() as sess: for batch_shape in ((), (2,), (2, 3)): dist = make_multivariate_mixture( batch_shape=batch_shape, num_components=2, event_shape=(4,)) mean = dist.mean() self.assertEqual(batch_shape + (4,), mean.get_shape()) cat_probs = nn_ops.softmax(dist.cat.logits) dist_means = [d.mean() for d in dist.components] mean_value, cat_probs_value, dist_means_value = sess.run( [mean, cat_probs, dist_means]) self.assertEqual(batch_shape + (4,), mean_value.shape) cat_probs_value = _swap_first_last_axes(cat_probs_value) # Add a new innermost dimension for broadcasting to mvn vector shape cat_probs_value = [np.expand_dims(c_p, -1) for c_p in cat_probs_value] true_mean = sum( [c_p * m for (c_p, m) in zip(cat_probs_value, dist_means_value)]) self.assertAllClose(true_mean, mean_value)
def testProbScalarUnivariate(self): with self.test_session() as sess: dist = make_univariate_mixture(batch_shape=[], num_components=2) for x in [ np.array( [1.0, 2.0], dtype=np.float32), np.array( 1.0, dtype=np.float32), np.random.randn(3, 4).astype(np.float32) ]: p_x = dist.prob(x) self.assertEqual(x.shape, p_x.get_shape()) cat_probs = nn_ops.softmax([dist.cat.logits])[0] dist_probs = [d.prob(x) for d in dist.components] p_x_value, cat_probs_value, dist_probs_value = sess.run( [p_x, cat_probs, dist_probs]) self.assertEqual(x.shape, p_x_value.shape) total_prob = sum(c_p_value * d_p_value for (c_p_value, d_p_value ) in zip(cat_probs_value, dist_probs_value)) self.assertAllClose(total_prob, p_x_value)
def testProbBatchMultivariate(self): with self.test_session() as sess: dist = make_multivariate_mixture( batch_shape=[2, 3], num_components=2, event_shape=[4]) for x in [ np.random.randn(2, 3, 4).astype(np.float32), np.random.randn(4, 2, 3, 4).astype(np.float32) ]: p_x = dist.prob(x) self.assertEqual(x.shape[:-1], p_x.get_shape()) cat_probs = nn_ops.softmax(dist.cat.logits) dist_probs = [d.prob(x) for d in dist.components] p_x_value, cat_probs_value, dist_probs_value = sess.run( [p_x, cat_probs, dist_probs]) self.assertEqual(x.shape[:-1], p_x_value.shape) cat_probs_value = _swap_first_last_axes(cat_probs_value) total_prob = sum(c_p_value * d_p_value for (c_p_value, d_p_value ) in zip(cat_probs_value, dist_probs_value)) self.assertAllClose(total_prob, p_x_value)
def testEntropyLowerBoundMultivariate(self): with self.test_session() as sess: for batch_shape in ((), (2,), (2, 3)): dist = make_multivariate_mixture( batch_shape=batch_shape, num_components=2, event_shape=(4,)) entropy_lower_bound = dist.entropy_lower_bound() self.assertEqual(batch_shape, entropy_lower_bound.get_shape()) cat_probs = nn_ops.softmax(dist.cat.logits) dist_entropy = [d.entropy() for d in dist.components] entropy_lower_bound_value, cat_probs_value, dist_entropy_value = ( sess.run([entropy_lower_bound, cat_probs, dist_entropy])) self.assertEqual(batch_shape, entropy_lower_bound_value.shape) cat_probs_value = _swap_first_last_axes(cat_probs_value) # entropy_lower_bound = sum_i pi_i entropy_i # for i in num_components, batchwise. true_entropy_lower_bound = sum( [c_p * m for (c_p, m) in zip(cat_probs_value, dist_entropy_value)]) self.assertAllClose(true_entropy_lower_bound, entropy_lower_bound_value)
def sequence_loss_by_example(logits, targets, weights, average_across_timesteps=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits (per example). Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, default: "sequence_loss_by_example". Returns: 1D batch-sized float Tensor: The log-perplexity for each sequence. Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ if len(targets) != len(logits) or len(weights) != len(logits): raise ValueError("Lengths of logits, weights, and targets must be the same " "%d, %d, %d." % (len(logits), len(weights), len(targets))) with ops.name_scope( name, "sequence_loss_by_example",logits + targets + weights): log_perp_list = [] for logit, target, weight in zip(logits, targets, weights): if softmax_loss_function is None: target = array_ops.reshape(target, [-1]) crossent = nn_ops.sparse_softmax_cross_entropy_with_logits( logit, target) else: crossent = softmax_loss_function(logit, target) log_perp_list.append(crossent * weight) log_perps = math_ops.add_n(log_perp_list) if average_across_timesteps: total_size = math_ops.add_n(weights) total_size += 1e-12 # Just to avoid division by 0 for all-0 weights. log_perps /= total_size return log_perps
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope( name, "sequence_loss",logits + targets + weights): cost = math_ops.reduce_sum(sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, dtypes.float32) else: return cost
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum( sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, cost.dtype) else: return cost
def sequence_loss(targets, logits, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum( sequence_loss_by_example( targets, logits, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, cost.dtype) else: return cost
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum(sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, cost.dtype) else: return cost
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum(sequence_loss_by_example(logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, dtypes.float32) else: return cost
def _base_inference(self, data, data_spec=None, soft=False): if soft: inference_result = self.layers[0].soft_inference_graph(data) else: inference_result = self._do_layer_inference(self.layers[0], data) for layer in self.layers[1:]: inference_result = self._do_layer_inference(layer, inference_result) output_size = 1 if self.is_regression else self.params.num_classes output = layers.fully_connected( inference_result, output_size, activation_fn=nn_ops.softmax) return output
def inference_graph(self, data, data_spec=None): """Returns the op that performs inference on a batch of data.""" return nn_ops.softmax( self._base_inference( data, data_spec=data_spec, soft=True)) # pylint: disable=unused-argument
def _base_inference(self, data, data_spec=None): """Returns an op that performs inference without a softmax.""" inference_result = self._do_layer_inference(self.layers[0], data) for layer in self.layers[1:]: inference_result = self._do_layer_inference(layer, inference_result) output_size = 1 if self.is_regression else self.params.num_classes output = layers.fully_connected( inference_result, output_size, activation_fn=array_ops.identity) return output
def inference_graph(self, data, data_spec=None): """Returns the op that performs inference on a batch of data.""" return nn_ops.softmax(self._base_inference(data, data_spec=data_spec))
def training_inference_graph(self, data, data_spec=None): """Returns an inference-without-softmax op for training purposes.""" return self._base_inference(data, data_spec=data_spec)
def _cat_probs(self, log_probs): """Get a list of num_components batchwise probabilities.""" which_softmax = nn_ops.log_softmax if log_probs else nn_ops.softmax cat_probs = which_softmax(self.cat.logits) cat_probs = array_ops.unpack( cat_probs, num=self.num_components, axis=-1) return cat_probs
def _entropy(self): logits_2d = array_ops.reshape( self.logits, array_ops.pack([-1, self.num_classes])) histogram_2d = nn_ops.softmax(logits_2d) ret = array_ops.reshape( nn_ops.softmax_cross_entropy_with_logits(logits_2d, histogram_2d), self.batch_shape()) ret.set_shape(self.get_batch_shape()) return ret
def predict_proba(self, data, data_spec=None): inference_result = self.inference_graph(data, data_spec=data_spec) probabilities = nn_ops.softmax(inference_result, name="probabilities") return probabilities
def _entropy(self): if self.logits.get_shape().ndims == 2: logits_2d = self.logits else: logits_2d = array_ops.reshape(self.logits, [-1, self.num_classes]) histogram_2d = nn_ops.softmax(logits_2d) ret = array_ops.reshape( nn_ops.softmax_cross_entropy_with_logits(logits_2d, histogram_2d), self.batch_shape()) ret.set_shape(self.get_batch_shape()) return ret
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum(sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, dtypes.float32) return cost
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.op_scope(logits + targets + weights, name, "sequence_loss"): cost = math_ops.reduce_sum(sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, dtypes.float32) else: return cost
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ #with ops.op_scope(logits + targets + weights, name, "sequence_loss"): with tf.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum(sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, dtypes.float32) else: return cost
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.op_scope(logits + targets + weights, name, "sequence_loss"): cost = math_ops.reduce_sum(sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, cost.dtype) else: return cost
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum( sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, cost.dtype) else: return cost
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum(sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, dtypes.float32) else: return cost