我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.framework.ops.control_dependencies()。
def _check_shape(self, shape): """Check that the init arg `shape` defines a valid operator.""" shape = ops.convert_to_tensor(shape, name="shape") if not self._verify_pd: return shape # Further checks are equivalent to verification that this is positive # definite. Why? Because the further checks simply check that this is a # square matrix, and combining the fact that this is square (and thus maps # a vector space R^k onto itself), with the behavior of .matmul(), this must # be the identity operator. rank = array_ops.size(shape) assert_matrix = check_ops.assert_less_equal(2, rank) with ops.control_dependencies([assert_matrix]): last_dim = array_ops.gather(shape, rank - 1) second_to_last_dim = array_ops.gather(shape, rank - 2) assert_square = check_ops.assert_equal(last_dim, second_to_last_dim) return control_flow_ops.with_dependencies([assert_matrix, assert_square], shape)
def _mean(self): with ops.control_dependencies(self._assertions): distribution_means = [d.mean() for d in self.components] cat_probs = self._cat_probs(log_probs=False) # This was checked to not be None at construction time. static_event_rank = self.get_event_shape().ndims # Expand the rank of x up to static_event_rank times so that # broadcasting works correctly. def expand(x): expanded_x = x for _ in range(static_event_rank): expanded_x = array_ops.expand_dims(expanded_x, -1) return expanded_x cat_probs = [expand(c_p) for c_p in cat_probs] partial_means = [ c_p * m for (c_p, m) in zip(cat_probs, distribution_means) ] # These should all be the same shape by virtue of matching # batch_shape and event_shape. return math_ops.add_n(partial_means)
def apply_gradients(self, grads_and_vars, global_step=None, name=None): train_op = self._optimizer.apply_gradients( grads_and_vars, global_step=global_step, name=name) var_list = [x[1] for x in grads_and_vars if x[0] is not None] self._variable_map = {} if self._sequential_update: with ops.control_dependencies([train_op]): ma_op = self._ema.apply(var_list) else: ma_op = self._ema.apply(var_list) for v in var_list: v_avg = self._ema.average(v) self._variable_map[v.op.name] = v_avg self._variable_map[v_avg.op.name] = v return control_flow_ops.group(train_op, ma_op, name="train_with_avg")
def apply_gradients(self, grads_and_vars, global_step=None, name=None): with ops.name_scope(name, self._name) as name: update_op = self._opt.apply_gradients( grads_and_vars, global_step=global_step) clip_update_ops = [] with ops.control_dependencies([update_op]): for grad, var in grads_and_vars: if grad is None or var not in self._vars_to_clip_dims: continue with ops.name_scope("clip_" + var.op.name): if isinstance(grad, ops.Tensor): clip_update_ops.append(self._clip_dense(var)) else: clip_update_ops.append(self._clip_sparse(grad, var)) # In case no var was clipped, still need to run the update_op. return control_flow_ops.group(*([update_op] + clip_update_ops), name=name)
def __init__(self, label_name, weight_column_name, enable_centered_bias, head_name, thresholds): def loss_fn(logits, labels): check_shape_op = control_flow_ops.Assert( math_ops.less_equal(array_ops.rank(labels), 2), ["labels shape should be either [batch_size, 1] or [batch_size]"]) with ops.control_dependencies([check_shape_op]): labels = array_ops.reshape( labels, shape=[array_ops.shape(labels)[0], 1]) return losses.hinge_loss(logits, labels) super(_BinarySvmHead, self).__init__( train_loss_fn=loss_fn, eval_loss_fn=loss_fn, n_classes=2, label_name=label_name, weight_column_name=weight_column_name, enable_centered_bias=enable_centered_bias, head_name=head_name, thresholds=thresholds)
def _get_train_ops(self, features, labels): """See base class.""" features = self._get_feature_dict(features) features, labels = self._feature_engineering_fn(features, labels) logits = self._logits(features, is_training=True) def _make_training_op(training_loss): global_step = contrib_variables.get_global_step() assert global_step linear_train_step = self._linear_model.get_train_step(training_loss) dnn_train_step = (self._dnn_model.get_train_step(training_loss) if self._dnn_model else []) with ops.control_dependencies(linear_train_step + dnn_train_step): with ops.get_default_graph().colocate_with(global_step): return state_ops.assign_add(global_step, 1).op return self._head.head_ops(features, labels, model_fn.ModeKeys.TRAIN, _make_training_op, logits=logits)
def _dense_inner_flatten(inputs, new_rank): """Helper function for `inner_flatten`.""" rank_assertion = check_ops.assert_rank_at_least( inputs, new_rank, message='inputs has rank less than new_rank') with ops.control_dependencies([rank_assertion]): outer_dimensions = array_ops.slice( array_ops.shape(inputs), [0], [new_rank - 1]) new_shape = array_ops.concat(0, (outer_dimensions, [-1])) reshaped = array_ops.reshape(inputs, new_shape) # if `new_rank` is an integer, try to calculate new shape. if isinstance(new_rank, six.integer_types): static_shape = inputs.get_shape() if static_shape is not None and static_shape.dims is not None: static_shape = static_shape.as_list() static_outer_dims = static_shape[:new_rank - 1] static_inner_dims = static_shape[new_rank - 1:] flattened_dimension = 1 for inner_dim in static_inner_dims: if inner_dim is None: flattened_dimension = None break flattened_dimension *= inner_dim reshaped.set_shape(static_outer_dims + [flattened_dimension]) return reshaped
def _thin_stack_lookup_metal_gradient(op, stack1_grad, stack2_grad, buf_top_grad, _): stack, buffer, _, _, buffer_cursors, transitions = op.inputs stack2_ptrs = op.outputs[3] timestep = op.get_attr("timestep") # HACK: Recover original Variable instances from op chain while stack.op.type != "Variable": stack = stack.op.inputs[0] while buffer.op.type != "Variable": assert buffer.op.type == "Identity" buffer = buffer.op.inputs[0] buffer_cursors = _fetch_buffer_cursors(buffer_cursors) updates = _thin_stack_lookup_gradient_impl( stack, buffer, stack2_ptrs, buffer_cursors, stack1_grad, stack2_grad, buf_top_grad, transitions, timestep) with ops.control_dependencies(updates): return tf.identity(stack), tf.identity(buffer), None, None, None, None
def _thin_stack_update_gradient(op, stack_grad, *rest): stack = op.inputs[2] batch_size = op.inputs[4].get_shape().as_list()[0] t = op.get_attr("timestep") # We usually slice off the head of the stack output in feedforward and # send it off to downstream computation. The Slice feedforward op will # generate a sparse gradient in the backward pass. Nix this sparsity # at the very start. if isinstance(stack_grad, ops.IndexedSlices): # Trick: re-use our stack structure to store new gradients. # Recover the original stack variable from the lookup/update chain. stack = _fetch_stack(stack) stack = tf.assign(stack, tf.zeros_like(stack)) stack = tf.scatter_update(stack, stack_grad.indices, stack_grad.values) stack_grad = stack with tf.control_dependencies([stack_grad]): input_grad = tf.slice(stack_grad, [t * batch_size, 0], [batch_size, -1]) return input_grad, None, stack_grad, None, None, None
def compute_gradients(self, *args, **kwargs): """Compute gradients of "loss" for the variables in "var_list". This simply wraps the compute_gradients() from the real optimizer. The gradients will be aggregated in the apply_gradients() so that user can modify the gradients like clipping with per replica global norm if needed. The global norm with aggregated gradients can be bad as one replica's huge gradients can hurt the gradients from other replicas. Args: *args: Arguments for compute_gradients(). **kwargs: Keyword arguments for compute_gradients(). Returns: A list of (gradient, variable) pairs. """ with ops.control_dependencies([logging_ops.Print(0, [0], message="Starting to compute gradients")]): grads_and_vars = self._opt.compute_gradients(*args, **kwargs) for index, (grad, var) in enumerate(grads_and_vars): with ops.control_dependencies([grad]): grads_and_vars[index] = (logging_ops.Print(grad, [0], message="Done computing gradient %d" % index), var) return grads_and_vars
def _AddShardedSaveOps(self, filename_tensor, per_device): """Add ops to save the params per shard. Args: filename_tensor: String Tensor. per_device: A list of (device, BaseSaverBuilder.VarToSave) pairs, as returned by _GroupByDevices(). Returns: An op to save the variables. """ num_shards = len(per_device) sharded_saves = [] num_shards_tensor = constant_op.constant(num_shards, name="num_shards") for shard, (device, vars_to_save) in enumerate(per_device): with ops.device(device): sharded_filename = self.sharded_filename( filename_tensor, shard, num_shards_tensor) sharded_saves.append(self._AddSaveOps(sharded_filename, vars_to_save)) # Return the sharded name for the save path. with ops.control_dependencies([x.op for x in sharded_saves]): # pylint: disable=protected-access return gen_io_ops._sharded_filespec(filename_tensor, num_shards_tensor)
def _dense_inner_flatten(inputs, new_rank): """Helper function for `inner_flatten`.""" rank_assertion = check_ops.assert_rank_at_least( inputs, new_rank, message='inputs has rank less than new_rank') with ops.control_dependencies([rank_assertion]): outer_dimensions = array_ops.strided_slice( array_ops.shape(inputs), [0], [new_rank - 1]) new_shape = array_ops.concat((outer_dimensions, [-1]), 0) reshaped = array_ops.reshape(inputs, new_shape) # if `new_rank` is an integer, try to calculate new shape. if isinstance(new_rank, six.integer_types): static_shape = inputs.get_shape() if static_shape is not None and static_shape.dims is not None: static_shape = static_shape.as_list() static_outer_dims = static_shape[:new_rank - 1] static_inner_dims = static_shape[new_rank - 1:] flattened_dimension = 1 for inner_dim in static_inner_dims: if inner_dim is None: flattened_dimension = None break flattened_dimension *= inner_dim reshaped.set_shape(static_outer_dims + [flattened_dimension]) return reshaped
def test_train_override_saver(self): with ops.Graph().as_default() as g, self.test_session(g): with ops.control_dependencies(self._build_inference_graph()): train_op = state_ops.assign_add(variables_lib.get_global_step(), 1) self._assert_ckpt(self._output_dir, False) real_saver = saver_lib.Saver() saver = test.mock.Mock(wraps=real_saver, saver_def=real_saver.saver_def) ops.add_to_collection(ops.GraphKeys.SAVERS, saver) loss = learn.graph_actions._monitored_train( # pylint: disable=protected-access g, output_dir=self._output_dir, train_op=train_op, loss_op=constant_op.constant(2.0), steps=1) self.assertEqual(2.0, loss) self._assert_ckpt(self._output_dir, True) self.assertTrue(saver.build.called) self.assertEqual(1, saver.save.call_count) # TODO(ispir): remove following tests after deprecated train.
def test_train_summaries(self): with ops.Graph().as_default() as g, self.test_session(g): with ops.control_dependencies(self._build_inference_graph()): train_op = state_ops.assign_add(variables_lib.get_global_step(), 1) loss_op = constant_op.constant(2.0) summary.scalar('loss', loss_op) self._assert_summaries(self._output_dir) self._assert_ckpt(self._output_dir, False) loss = learn.graph_actions.train( g, output_dir=self._output_dir, train_op=train_op, loss_op=loss_op, steps=1) # TODO(ebrevdo,ptucker,ispir): this meta_graph_def lacks the # SaverDef, so we can't add it to the summary assertion test below. # meta_graph_def = meta_graph.create_meta_graph_def() self.assertEqual(2.0, loss) self._assert_summaries( self._output_dir, expected_graphs=[g], expected_summaries={1: { 'loss': 2.0 }}) self._assert_ckpt(self._output_dir, True)
def test_train_chief_monitor(self): with ops.Graph().as_default() as g, self.test_session(g): with ops.control_dependencies(self._build_inference_graph()): train_op = state_ops.assign_add(variables_lib.get_global_step(), 1) loss_op = constant_op.constant(2.0) summary.scalar('loss', loss_op) chief_exclusive_monitor = _BaseMonitorWrapper(False) all_workers_monitor = _BaseMonitorWrapper(True) loss = learn.graph_actions.train( g, output_dir=self._output_dir, train_op=train_op, loss_op=loss_op, supervisor_is_chief=True, steps=1, monitors=[chief_exclusive_monitor, all_workers_monitor]) self.assertEqual(2.0, loss) self.assertTrue(chief_exclusive_monitor.is_active and all_workers_monitor.is_active, 'All monitors must have been active.') self.assertTrue(chief_exclusive_monitor.has_step and all_workers_monitor.has_step, 'All monitors must have a step.')
def _init_clusters_random(self): """Does random initialization of clusters. Returns: Tensor of randomly initialized clusters. """ num_data = math_ops.add_n([array_ops.shape(inp)[0] for inp in self._inputs]) # Note that for mini-batch k-means, we should ensure that the batch size of # data used during initialization is sufficiently large to avoid duplicated # clusters. with ops.control_dependencies( [check_ops.assert_less_equal(self._num_clusters, num_data)]): indices = random_ops.random_uniform( array_ops.reshape(self._num_clusters, [-1]), minval=0, maxval=math_ops.cast(num_data, dtypes.int64), seed=self._random_seed, dtype=dtypes.int64) clusters_init = embedding_lookup( self._inputs, indices, partition_strategy='div') return clusters_init
def _init_clusters_random(data, num_clusters, random_seed): """Does random initialization of clusters. Args: data: a list of Tensors with a matrix of data, each row is an example. num_clusters: an integer with the number of clusters. random_seed: Seed for PRNG used to initialize seeds. Returns: A Tensor with num_clusters random rows of data. """ assert isinstance(data, list) num_data = math_ops.add_n([array_ops.shape(inp)[0] for inp in data]) with ops.control_dependencies( [check_ops.assert_less_equal(num_clusters, num_data)]): indices = random_ops.random_uniform( [num_clusters], minval=0, maxval=math_ops.cast(num_data, dtypes.int64), seed=random_seed, dtype=dtypes.int64) indices = math_ops.cast(indices, dtypes.int32) % num_data clusters_init = embedding_lookup(data, indices, partition_strategy='div') return clusters_init
def testAssertCloseNonIntegerDtype(self): x = np.array([1., 5, 10, 15, 20], dtype=np.float32) y = x + 1e-8 z = [2., 5, 10, 15, 20] with self.test_session(): with ops.control_dependencies([distribution_util.assert_close(x, y)]): array_ops.identity(x).eval() with ops.control_dependencies([distribution_util.assert_close(y, x)]): array_ops.identity(x).eval() with self.assertRaisesOpError("Condition x ~= y"): with ops.control_dependencies([distribution_util.assert_close(x, z)]): array_ops.identity(x).eval() with self.assertRaisesOpError("Condition x ~= y"): with ops.control_dependencies([distribution_util.assert_close(y, z)]): array_ops.identity(y).eval()
def testAssertCloseEpsilon(self): x = [0., 5, 10, 15, 20] # x != y y = [0.1, 5, 10, 15, 20] # x = z z = [1e-8, 5, 10, 15, 20] with self.test_session(): with ops.control_dependencies([distribution_util.assert_close(x, z)]): array_ops.identity(x).eval() with self.assertRaisesOpError("Condition x ~= y"): with ops.control_dependencies([distribution_util.assert_close(x, y)]): array_ops.identity(x).eval() with self.assertRaisesOpError("Condition x ~= y"): with ops.control_dependencies([distribution_util.assert_close(y, z)]): array_ops.identity(y).eval()
def state_saving_rnn(cell, inputs, state_saver, state_name, sequence_length=None, scope=None): """RNN that accepts a state saver for time-truncated RNN calculation. Args: cell: An instance of RNNCell. inputs: A length T list of inputs, each a tensor of shape [batch_size, input_size]. state_saver: A state saver object with methods `state` and `save_state`. state_name: The name to use with the state_saver. sequence_length: (optional) An int32/int64 vector size [batch_size]. See the documentation for rnn() for more details about sequence_length. scope: VariableScope for the created subgraph; defaults to "RNN". Returns: A pair (outputs, state) where: outputs is a length T list of outputs (one for each input) states is the final state Raises: TypeError: If "cell" is not an instance of RNNCell. ValueError: If inputs is None or an empty list. """ initial_state = state_saver.state(state_name) (outputs, state) = rnn(cell, inputs, initial_state=initial_state, sequence_length=sequence_length, scope=scope) save_state = state_saver.save_state(state_name, state) with ops.control_dependencies([save_state]): outputs[-1] = array_ops.identity(outputs[-1]) return (outputs, state)
def _maybe_mask_score(score, memory_sequence_length, score_mask_value): if memory_sequence_length is None: return score message = ("All values in memory_sequence_length must greater than zero.") with ops.control_dependencies( [check_ops.assert_positive(memory_sequence_length, message=message)]): score_mask = array_ops.sequence_mask( memory_sequence_length, maxlen=array_ops.shape(score)[1]) score_mask_values = score_mask_value * array_ops.ones_like(score) return array_ops.where(score_mask, score, score_mask_values)
def zero_state(self, batch_size, dtype): with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): if self._initial_cell_state is not None: cell_state = self._initial_cell_state else: cell_state = self._cell.zero_state(batch_size, dtype) error_message = ( "When calling zero_state of AttentionWrapper %s: " % self._base_name + "Non-matching batch sizes between the memory " "(encoder output) and the requested batch size. Are you using " "the BeamSearchDecoder? If so, make sure your encoder output has " "been tiled to beam_width via tf.contrib.seq2seq.tile_batch, and " "the batch_size= argument passed to zero_state is " "batch_size * beam_width.") with ops.control_dependencies( [check_ops.assert_equal(batch_size, self._attention_mechanism.batch_size, message=error_message)]): cell_state = nest.map_structure( lambda s: array_ops.identity(s, name="checked_cell_state"), cell_state) if self._alignment_history: alignment_history = tensor_array_ops.TensorArray( dtype=dtype, size=0, dynamic_size=True) else: alignment_history = () return AttentionWrapperState( cell_state=cell_state, time=array_ops.zeros([], dtype=dtypes.int32), attention=_zero_state_tensors(self._attention_size, batch_size, dtype), alignments=self._attention_mechanism.initial_alignments( batch_size, dtype), alignment_history=alignment_history)
def _fm_scorer_grad(op, pred_grad, reg_grad): feature_ids = op.inputs[0] feature_params = op.inputs[1] feature_vals = op.inputs[2] feature_poses = op.inputs[3] factor_lambda = op.inputs[4] bias_lambda = op.inputs[5] with ops.control_dependencies([pred_grad.op, reg_grad.op]): return None, fm_ops.fm_grad(feature_ids, feature_params, feature_vals, feature_poses, factor_lambda, bias_lambda, pred_grad, reg_grad), None, None, None, None
def __init__(self, inputs, outputs, updates=None, name=None, **session_kwargs): updates = updates or [] if not isinstance(inputs, (list, tuple)): raise TypeError('`inputs` to a TensorFlow backend function ' 'should be a list or tuple.') if not isinstance(outputs, (list, tuple)): raise TypeError('`outputs` of a TensorFlow backend function ' 'should be a list or tuple.') if not isinstance(updates, (list, tuple)): raise TypeError('`updates` in a TensorFlow backend function ' 'should be a list or tuple.') self.inputs = list(inputs) self.outputs = list(outputs) with ops.control_dependencies(self.outputs): updates_ops = [] for update in updates: if isinstance(update, tuple): p, new_p = update updates_ops.append(state_ops.assign(p, new_p)) else: # assumed already an op updates_ops.append(update) self.updates_op = control_flow_ops.group(*updates_ops) self.name = name self.session_kwargs = session_kwargs
def _check_multiple_of(value, multiple_of): """Checks that value `value` is a non-zero multiple of `multiple_of`. Args: value: an int32 scalar Tensor. multiple_of: an int or int32 scalar Tensor. Returns: new_value: an int32 scalar Tensor matching `value`, but which includes an assertion that `value` is a multiple of `multiple_of`. """ assert isinstance(value, ops.Tensor) with ops.control_dependencies([ control_flow_ops.Assert( math_ops.logical_and( math_ops.equal(math_ops.mod(value, multiple_of), 0), math_ops.not_equal(value, 0)), [string_ops.string_join( ["Tensor %s should be a multiple of: " % value.name, string_ops.as_string(multiple_of), ", but saw value: ", string_ops.as_string(value), ". Consider setting pad=True."])])]): new_value = array_ops.identity( value, name="multiple_of_checked") return new_value
def _check_rank(value, expected_rank): """Check the rank of Tensor `value`, via shape inference and assertions. Args: value: A Tensor, possibly with shape associated shape information. expected_rank: int32 scalar (optionally a `Tensor`). Returns: new_value: A Tensor matching `value`. Accessing this tensor tests assertions on its rank. If expected_rank is not a `Tensor`, then new_value's shape's rank has been set. Raises: ValueError: if `expected_rank` is not a `Tensor` and the rank of `value` is known and is not equal to `expected_rank`. """ assert isinstance(value, ops.Tensor) with ops.control_dependencies([ control_flow_ops.Assert( math_ops.equal(expected_rank, array_ops.rank(value)), [string_ops.string_join( ["Rank of tensor %s should be: " % value.name, string_ops.as_string(expected_rank), ", shape received:"]), array_ops.shape(value)])]): new_value = array_ops.identity(value, name="rank_checked") if isinstance(expected_rank, ops.Tensor): expected_rank_value = tensor_util.constant_value(expected_rank) if expected_rank_value is not None: expected_rank = int(expected_rank_value) if not isinstance(expected_rank, ops.Tensor): try: new_value.set_shape(new_value.get_shape().with_rank(expected_rank)) except ValueError as e: raise ValueError("Rank check failed for %s: %s" % (value.name, str(e))) return new_value
def _get_train_ops(self, features, targets): """See base class.""" global_step = contrib_variables.get_global_step() assert global_step features = self._get_feature_dict(features) logits = self._logits(features, is_training=True) if self._enable_centered_bias: centered_bias_step = [self._centered_bias_step(targets, features)] else: centered_bias_step = [] with ops.control_dependencies(centered_bias_step): training_loss = self._target_column.training_loss(logits, targets, features) weighted_average_loss = self._target_column.loss(logits, targets, features) logging_ops.scalar_summary("loss", weighted_average_loss) linear_train_step = self._linear_model.get_train_step(training_loss) dnn_train_step = (self._dnn_model.get_train_step(training_loss) if self._dnn_model else []) with ops.control_dependencies(linear_train_step + dnn_train_step): with ops.get_default_graph().colocate_with(global_step): return state_ops.assign_add(global_step, 1).op, weighted_average_loss
def _log_loss_with_two_classes(logits, target): check_shape_op = control_flow_ops.Assert( math_ops.less_equal(array_ops.rank(target), 2), ["target's shape should be either [batch_size, 1] or [batch_size]"]) with ops.control_dependencies([check_shape_op]): target = array_ops.reshape(target, shape=[array_ops.shape(target)[0], 1]) return nn.sigmoid_cross_entropy_with_logits( logits, math_ops.to_float(target))
def _softmax_cross_entropy_loss(logits, target): check_shape_op = control_flow_ops.Assert( math_ops.less_equal(array_ops.rank(target), 2), ["target's shape should be either [batch_size, 1] or [batch_size]"]) with ops.control_dependencies([check_shape_op]): target = array_ops.reshape(target, shape=[array_ops.shape(target)[0]]) return nn.sparse_softmax_cross_entropy_with_logits(logits, target)
def _hinge_loss(logits, target): check_shape_op = control_flow_ops.Assert( math_ops.less_equal(array_ops.rank(target), 2), ["target's shape should be either [batch_size, 1] or [batch_size]"]) with ops.control_dependencies([check_shape_op]): target = array_ops.reshape(target, shape=[array_ops.shape(target)[0], 1]) return losses.hinge_loss(logits, target)
def _get_train_ops(self, features, targets): """See base class.""" if not isinstance(self._linear_optimizer, sdca_optimizer.SDCAOptimizer): return super(LinearRegressor, self)._get_train_ops(features, targets) assert not self._joint_weights, ("_joint_weights is incompatible with" " SDCAOptimizer.") global_step = contrib_variables.get_or_create_global_step() logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=self._linear_feature_columns, num_outputs=self._target_column.num_label_columns, weight_collections=[self._linear_model.get_scope_name()], scope=self._linear_model.get_scope_name())) with ops.control_dependencies([self._centered_bias()]): loss = self._target_column.loss(logits, targets, features) logging_ops.scalar_summary("loss", loss) _add_bias_column(self._linear_feature_columns, features, bias, targets, columns_to_variables) train_op = self._linear_optimizer.get_train_step( columns_to_variables, self._target_column.weight_column_name, self._loss_type(), features, targets, global_step) return train_op, loss
def _check_labels_and_scores(boolean_labels, scores, check_shape): """Check the rank of labels/scores, return tensor versions.""" with ops.name_scope('_check_labels_and_scores', values=[boolean_labels, scores]): boolean_labels = ops.convert_to_tensor(boolean_labels, name='boolean_labels') scores = ops.convert_to_tensor(scores, name='scores') if boolean_labels.dtype != dtypes.bool: raise ValueError( 'Argument boolean_labels should have dtype bool. Found: %s', boolean_labels.dtype) if check_shape: labels_rank_1 = control_flow_ops.Assert( math_ops.equal(1, array_ops.rank(boolean_labels)), ['Argument boolean_labels should have rank 1. Found: ', boolean_labels.name, array_ops.shape(boolean_labels)]) scores_rank_1 = control_flow_ops.Assert( math_ops.equal(1, array_ops.rank(scores)), ['Argument scores should have rank 1. Found: ', scores.name, array_ops.shape(scores)]) with ops.control_dependencies([labels_rank_1, scores_rank_1]): return boolean_labels, scores else: return boolean_labels, scores
def kl(dist_a, dist_b, allow_nan=False, name=None): """Get the KL-divergence KL(dist_a || dist_b). Args: dist_a: The first distribution. dist_b: The second distribution. allow_nan: If `False` (default), a runtime error is raised if the KL returns NaN values for any batch entry of the given distributions. If `True`, the KL may return a NaN for the given entry. name: (optional) Name scope to use for created operations. Returns: A Tensor with the batchwise KL-divergence between dist_a and dist_b. Raises: NotImplementedError: If no KL method is defined for distribution types of dist_a and dist_b. """ kl_fn = _DIVERGENCES.get((type(dist_a), type(dist_b)), None) if kl_fn is None: raise NotImplementedError( "No KL(dist_a || dist_b) registered for dist_a type %s and dist_b " "type %s" % ((type(dist_a).__name__, type(dist_b).__name__))) with ops.name_scope("KullbackLeibler"): kl_t = kl_fn(dist_a, dist_b, name=name) if allow_nan: return kl_t # Check KL for NaNs kl_t = array_ops.identity(kl_t, name="kl") with ops.control_dependencies([ control_flow_ops.Assert( math_ops.logical_not( math_ops.reduce_any(math_ops.is_nan(kl_t))), ["KL calculation between %s and %s returned NaN values " "(and was called with allow_nan=False). Values:" % (dist_a.name, dist_b.name), kl_t])]): return array_ops.identity(kl_t, name="checked_kl")
def __init__(self, lam, validate_args=False, allow_nan_stats=True, name="Poisson"): """Construct Poisson distributions. Args: lam: Floating point tensor, the rate parameter of the distribution(s). `lam` must be positive. validate_args: `Boolean`, default `False`. Whether to assert that `lam > 0` as well as inputs to pmf computations are non-negative integers. If validate_args is `False`, then `pmf` computations might return `NaN`, but can be evaluated at any real value. allow_nan_stats: `Boolean`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member. If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: A name for this distribution. """ with ops.name_scope(name, values=[lam]) as ns: with ops.control_dependencies([check_ops.assert_positive(lam)] if validate_args else []): self._lam = array_ops.identity(lam, name="lam") super(Poisson, self).__init__( dtype=self._lam.dtype, parameters={"lam": self._lam}, is_continuous=False, is_reparameterized=False, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=ns)
def _log_prob(self, x): with ops.control_dependencies(self._assertions): x = ops.convert_to_tensor(x, name="x") distribution_log_probs = [d.log_prob(x) for d in self.components] cat_log_probs = self._cat_probs(log_probs=True) final_log_probs = [ cat_lp + d_lp for (cat_lp, d_lp) in zip(cat_log_probs, distribution_log_probs) ] concat_log_probs = array_ops.pack(final_log_probs, 0) log_sum_exp = math_ops.reduce_logsumexp(concat_log_probs, [0]) return log_sum_exp
def _assert_valid_mu(self, mu): """Return `mu` after validity checks and possibly with assertations.""" cov = self._cov if mu.dtype != cov.dtype: raise TypeError( "mu and cov must have the same dtype. Found mu.dtype = %s, " "cov.dtype = %s" % (mu.dtype, cov.dtype)) # Try to validate with static checks. mu_shape = mu.get_shape() cov_shape = cov.get_shape() if mu_shape.is_fully_defined() and cov_shape.is_fully_defined(): if mu_shape != cov_shape[:-1]: raise ValueError( "mu.shape and cov.shape[:-1] should match. Found: mu.shape=%s, " "cov.shape=%s" % (mu_shape, cov_shape)) else: return mu # Static checks could not be run, so possibly do dynamic checks. if not self.validate_args: return mu else: assert_same_rank = check_ops.assert_equal( array_ops.rank(mu) + 1, cov.rank(), data=["mu should have rank 1 less than cov. Found: rank(mu) = ", array_ops.rank(mu), " rank(cov) = ", cov.rank()], ) with ops.control_dependencies([assert_same_rank]): assert_same_shape = check_ops.assert_equal( array_ops.shape(mu), cov.vector_shape(), data=["mu.shape and cov.shape[:-1] should match. " "Found: shape(mu) = " , array_ops.shape(mu), " shape(cov) = ", cov.shape()], ) return control_flow_ops.with_dependencies([assert_same_shape], mu)
def get_mean_baseline(ema_decay=0.99, name=None): """ExponentialMovingAverage baseline. Args: ema_decay: decay rate for the ExponentialMovingAverage. name: name for variable scope of the ExponentialMovingAverage. Returns: Callable baseline function that takes the `StochasticTensor` (unused) and the downstream `loss`, and returns an EMA of the loss. """ def mean_baseline(_, loss): with vs.variable_scope(name, default_name="MeanBaseline"): reduced_loss = math_ops.reduce_mean(loss) ema = training.ExponentialMovingAverage(decay=ema_decay) update_op = ema.apply([reduced_loss]) with ops.control_dependencies([update_op]): # Using `identity` causes an op to be added in this context, which # triggers the update. Removing the `identity` means nothing is updated. baseline = array_ops.identity(ema.average(reduced_loss)) return baseline return mean_baseline
def approximate_duality_gap(self): """Add operations to compute the approximate duality gap. Returns: An Operation that computes the approximate duality gap over all examples. """ with name_scope('sdca/approximate_duality_gap'): _, values_list = self._hashtable.export_sharded() shard_sums = [] for values in values_list: with ops.device(values.device): # For large tables to_double() below allocates a large temporary # tensor that is freed once the sum operation completes. To reduce # peak memory usage in cases where we have multiple large tables on a # single device, we serialize these operations. # Note that we need double precision to get accurate results. with ops.control_dependencies(shard_sums): shard_sums.append( math_ops.reduce_sum(math_ops.to_double(values), 0)) summed_values = math_ops.add_n(shard_sums) primal_loss = summed_values[1] dual_loss = summed_values[2] example_weights = summed_values[3] # Note: we return NaN if there are no weights or all weights are 0, e.g. # if no examples have been processed return (primal_loss + dual_loss + self._l1_loss() + (2.0 * self._l2_loss(self._symmetric_l2_regularization())) ) / example_weights
def _get_train_ops(self, features, labels): global_step = contrib_variables.get_global_step() assert global_step logits = self._model.build_model( features, self._feature_columns, is_training=True) model_fn_ops = self._head.head_ops(features, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_training_fn, logits=logits) train_step = self._model.get_train_step(model_fn_ops.loss) with ops.control_dependencies(train_step): with ops.get_default_graph().colocate_with(global_step): return state_ops.assign_add(global_step, 1).op, model_fn_ops.loss
def training_graph(self, input_data, input_labels, data_spec=None, **kwargs): loss = self._get_loss(input_data, input_labels, data_spec=data_spec) with ops.control_dependencies([loss.op]): return super(TrainingLossForest, self).training_graph( input_data, input_labels, **kwargs)