我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.ops.nn_ops.xw_plus_b()。
def _extract_argmax_and_embed(embedding, output_projection=None, update_embedding=True): """Get a loop_function that extracts the previous symbol and embeds it. Args: embedding: embedding tensor for symbols. output_projection: None or a pair (W, B). If provided, each fed previous output will first be multiplied by W and added B. update_embedding: Boolean; if False, the gradients will not propagate through the embeddings. Returns: A loop function. """ def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b( prev, output_projection[0], output_projection[1]) prev_symbol = math_ops.argmax(prev, 1) # Note that gradients will not propagate through the second parameter of # embedding_lookup. emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev return loop_function
def _extract_argmax_and_one_hot(one_hot_size, output_projection=None): """Get a loop_function that extracts the previous symbol and build a one-hot vector for it. Args: one_hot_size: total size of one-hot vector. output_projection: None or a pair (W, B). If provided, each fed previous output will first be multiplied by W and added B. update_embedding: Boolean; if False, the gradients will not propagate through the embeddings. Returns: A loop function. """ def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1]) prev_symbol = math_ops.argmax(prev, 1) # Note that gradients will not propagate through the second parameter of # embedding_lookup. emb_prev = tf.one_hot(prev_symbol, one_hot_size) return emb_prev return loop_function
def _argmax_or_mcsearch(embedding, output_projection=None, update_embedding=True, mc_search=False): def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1]) if isinstance(mc_search, bool): #tf.multinomial???prev????????? ?-1?????????? prev_symbol = tf.reshape(tf.multinomial(prev, 1), [-1]) if mc_search else math_ops.argmax(prev, 1) else: prev_symbol = tf.cond(mc_search, lambda: tf.reshape(tf.multinomial(prev, 1), [-1]), lambda: tf.argmax(prev, 1)) emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) #??????????? if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev return loop_function
def _extract_argmax_and_embed(embedding, output_projection=None, update_embedding=True): """Get a loop_function that extracts the previous symbol and embeds it. Args: embedding: embedding tensor for symbols. output_projection: None or a pair (W, B). If provided, each fed previous output will first be multiplied by W and added B. update_embedding: Boolean; if False, the gradients will not propagate through the embeddings. Returns: A loop function. """ def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b( prev, output_projection[0], output_projection[1]) prev_symbol = math_ops.argmax(prev, 1) #?????????? # Note that gradients will not propagate through the second parameter of # embedding_lookup. emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) #???????????? if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev return loop_function
def sequence_loss_by_mle(logits, targets, vocab_size, sequence_length, batch_size, output_projection=None): #print("logits: ", np.shape(logits[0])) #logits: [seq_len, batch_size, emb_dim] #targets: [seq_len, batch_size] =====transpose====> [batch_size, seq_len] # labels = tf.to_int32(tf.transpose(targets)) #targets: [seq_len, batch_size] ====reshape[-1]====> [seq_len * batch_size] labels = tf.to_int32(tf.reshape(targets, [-1])) if output_projection is not None: #logits = nn_ops.xw_plus_b(logits, output_projection[0], output_projection[1]) logits = [tf.matmul(logit, output_projection[0]) + output_projection[1] for logit in logits] reshape_logits = tf.reshape(logits, [-1, vocab_size]) #[seq_len * batch_size, vocab_size] prediction = tf.clip_by_value(reshape_logits, 1e-20, 1.0) pretrain_loss = -tf.reduce_sum( # [seq_len * batch_size , vocab_size] tf.one_hot(labels, vocab_size, 1.0, 0.0) * tf.log(prediction) ) / (sequence_length * batch_size) return pretrain_loss
def _extract_argmax_and_embed(embedding, output_projection=None, update_embedding=True): """Get a loop_function that extracts the previous symbol and embeds it. Args: embedding: embedding tensor for symbols. output_projection: None or a pair (W, B). If provided, each fed previous output will first be multiplied by W and added B. update_embedding: Boolean; if False, the gradients will not propagate through the embeddings. Returns: A loop function. """ def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1]) prev_symbol = math_ops.argmax(prev, 1) # Note that gradients will not propagate through the second parameter of # embedding_lookup. emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev return loop_function
def project_and_apply_input_bias(logits, output_projection, input_bias): if output_projection is not None: logits = nn_ops.xw_plus_b( logits, output_projection[0], output_projection[1]) # Apply softmax to ensure all tokens have a positive value. probs = tf.nn.softmax(logits) # Apply input bias, which is a mask of shape [batch, vocab len] # where each token from the input in addition to all "corrective" # tokens are set to 1.0. return tf.mul(probs, input_bias)
def _extract_argmax_and_embed(embedding, output_projection=None, update_embedding=True): """Get a loop_function that extracts the previous symbol and embeds it. Args: embedding: embedding tensor for symbols. output_projection: None or a pair (W, B). If provided, each fed previous output will first be multiplied by W and added B. update_embedding: Boolean; if False, the gradients will not propagate through the embeddings. Returns: A loop function. """ def loop_function(prev, _): # decoder outputs thus far. if output_projection is not None: prev = nn_ops.xw_plus_b( prev, output_projection[0], output_projection[1]) prev_symbol = math_ops.argmax(prev, 1) # Note that gradients will not propagate through the second parameter of # embedding_lookup. emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev, prev_symbol return loop_function
def _extract_argmax_and_embed(embedding, output_projection=None, update_embedding=True): def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b( prev, output_projection[0], output_projection[1]) prev_symbol = math_ops.argmax(prev, 1) # Note that gradients will not propagate through the second parameter of # embedding_lookup. emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev return loop_function
def sequence_softmax(inputs, noutput, scope=None, name=None, linear_name=None): """Run a softmax layer over all the time steps of an input sequence. Args: inputs: (length, batch_size, depth) tensor noutput: output depth scope: optional scope name name: optional name for output tensor linear_name: name for linear (pre-softmax) output Returns: A tensor of size (length, batch_size, noutput). """ length, _, ninputs = _shape(inputs) inputs_u = array_ops.unstack(inputs) output_u = [] with variable_scope.variable_scope(scope, "SequenceSoftmax", [inputs]): initial_w = random_ops.truncated_normal([0 + ninputs, noutput], stddev=0.1) initial_b = constant_op.constant(0.1, shape=[noutput]) w = variables.model_variable("weights", initializer=initial_w) b = variables.model_variable("biases", initializer=initial_b) for i in xrange(length): with variable_scope.variable_scope(scope, "SequenceSoftmaxStep", [inputs_u[i]]): # TODO(tmb) consider using slim.fully_connected(..., # activation_fn=tf.nn.softmax) linear = nn_ops.xw_plus_b(inputs_u[i], w, b, name=linear_name) output = nn_ops.softmax(linear) output_u += [output] outputs = array_ops.stack(output_u, name=name) return outputs