我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用util.softmax()。
def predict(self, tree): if tr.isleaf(tree): # output = word vector try: tree.vector = self.L[:, self.word_map[tree[0]]] except: tree.vector = self.L[:, self.word_map[tr.UNK]] else: # calculate output of child nodes self.predict(tree[0]) self.predict(tree[1]) # compute output lr = np.hstack([tree[0].vector, tree[1].vector]) tree.vector = np.tanh( np.tensordot(self.V, np.outer(lr, lr), axes=([1, 2], [0, 1])) + np.dot(self.W, lr) + self.b) # softmax import util tree.output = util.softmax(np.dot(self.Ws, tree.vector) + self.bs) label = np.argmax(tree.output) tree.set_label(str(label)) return tree
def sample_noun(self, vector): """Sample a noun at random. The probability of word :math:`w` is .. math:: \log(p(w))\propto w^Tv` where :math:`p` is the poem vector and :math:`w` the word vector""" p = util.softmax(self.noun_vectors.dot(vector) / self.tau) return npr.choice(self.nouns, p=p)
def sample_adjective(self, vector): """Sample an adjective at random (same method as sample_noun)""" p = util.softmax(self.adj_vectors.dot(vector) / self.tau) return npr.choice(self.adjs, p=p)
def proba(self, features, params): """ return a categorical probability distribution over the vocabulary """ product = np.dot(features, params) return softmax(product, self.T)
def forward_prop(self, tree): cost = 0.0 result = np.zeros((5,5)) if tr.isleaf(tree): # output = word vector try: tree.vector = self.L[:, self.word_map[tree[0]]] except: tree.vector = self.L[:, self.word_map[tr.UNK]] tree.fprop = True else: # calculate output of child nodes lcost, lresult = self.forward_prop(tree[0]) rcost, rresult = self.forward_prop(tree[1]) cost += lcost + rcost result += lresult + rresult # compute output lr = np.hstack([tree[0].vector, tree[1].vector]) tree.vector = np.tanh( np.tensordot(self.V, np.outer(lr, lr), axes=([1, 2], [0, 1])) + np.dot(self.W, lr) + self.b) # softmax tree.output = np.dot(self.Ws, tree.vector) + self.bs tree.output -= np.max(tree.output) tree.output = np.exp(tree.output) tree.output /= np.sum(tree.output) tree.frop = True # cost cost -= np.log(tree.output[int(tree.label())]) true_label = int(tree.label()) predicted_label = np.argmax(tree.output) result[true_label, predicted_label] += 1 return cost, result
def back_prop(self, tree, error=None): # clear nodes tree.frop = False # softmax grad deltas = tree.output deltas[int(tree.label())] -= 1.0 self.dWs += np.outer(deltas, tree.vector) self.dbs += deltas deltas = np.dot(self.Ws.T, deltas) if error is not None: deltas += error deltas *= (1 - tree.vector**2) # leaf node => update word vectors if tr.isleaf(tree): try: index = self.word_map[tree[0]] except KeyError: index = self.word_map[tr.UNK] self.dL[index] += deltas return # Hidden gradients else: lr = np.hstack([tree[0].vector, tree[1].vector]) outer = np.outer(deltas, lr) self.dV += (np.outer(lr, lr)[..., None] * deltas).T self.dW += outer self.db += deltas # Compute error for children deltas = np.dot(self.W.T, deltas) deltas += np.tensordot(self.V.transpose((0,2,1)) + self.V, outer.T, axes=([1,0], [0,1])) self.back_prop(tree[0], deltas[:self.dim]) self.back_prop(tree[1], deltas[self.dim:])
def __init__(self, n_primes, n_composed, features_size, markov_order, temperature=1.0, min_len_definitions=2, max_len_definitions=4): """ markov_order: integer at least 1 such that p(x_t|x_t-1:x_1) = p(x_t|x_t-1:x_t-markov_order) temperature: temperature for softmax """ self.mo = markov_order self.np = n_primes self.nc = n_composed self.V = self.np + self.nc self.T = temperature self.min_len_def = min_len_definitions self.max_len_def = max_len_definitions self.features_size = features_size # tokens are composed of a..z letters alphabet = ''.join([chr(c) for c in range(97, 97+26)]) # str(a..z) # tokens all have the same size tok_len self.tok_len = int(np.log(self.V) / np.log(len(alphabet)) + 1) # enumerate all the tokens self.vocabulary = [] for i, tok in zip(range(self.V), itertools.product(alphabet, repeat=self.tok_len)): self.vocabulary.append(''.join(tok)) self.params = uniform(0,1,(self.mo * features_size, self.V)) self.features = uniform(0,1,(self.V,features_size)) self.dictionary = {} for i in range(self.np, self.np+self.nc): # sample len of def, sample def, store in dictionary # then compute the features as a rescaled mean of the features len_diff = self.max_len_def - self.min_len_def len_def = np.random.choice(len_diff) + self.min_len_def definition = np.random.choice(self.np, size=len_def, replace=False) tok = self.vocabulary[i] self.dictionary[tok] = [self.vocabulary[e] for e in definition] #factor = np.random.beta(a=3, b=2.5) # closer to 1 than 0 #factor = np.random.beta(a=1, b=3) # closer to 0 than 1 factor = 1#1/(8*self.nc) f = factor * np.mean([self.features[e] for e in definition], axis=0) self.features[i] = f self.initial_features = uniform(0,1,(self.mo, features_size))
def inference(documents, doc_mask, query, query_mask): embedding = tf.get_variable('embedding', [FLAGS.vocab_size, FLAGS.embedding_size], initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05)) regularizer = tf.nn.l2_loss(embedding) doc_emb = tf.nn.dropout(tf.nn.embedding_lookup(embedding, documents), FLAGS.dropout_keep_prob) doc_emb.set_shape([None, None, FLAGS.embedding_size]) query_emb = tf.nn.dropout(tf.nn.embedding_lookup(embedding, query), FLAGS.dropout_keep_prob) query_emb.set_shape([None, None, FLAGS.embedding_size]) with tf.variable_scope('document', initializer=orthogonal_initializer()): fwd_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size) back_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size) doc_len = tf.reduce_sum(doc_mask, reduction_indices=1) h, _ = tf.nn.bidirectional_dynamic_rnn( fwd_cell, back_cell, doc_emb, sequence_length=tf.to_int64(doc_len), dtype=tf.float32) #h_doc = tf.nn.dropout(tf.concat(2, h), FLAGS.dropout_keep_prob) h_doc = tf.concat(h, 2) with tf.variable_scope('query', initializer=orthogonal_initializer()): fwd_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size) back_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size) query_len = tf.reduce_sum(query_mask, reduction_indices=1) h, _ = tf.nn.bidirectional_dynamic_rnn( fwd_cell, back_cell, query_emb, sequence_length=tf.to_int64(query_len), dtype=tf.float32) #h_query = tf.nn.dropout(tf.concat(2, h), FLAGS.dropout_keep_prob) h_query = tf.concat(h, 2) M = tf.matmul(h_doc, h_query, adjoint_b=True) M_mask = tf.to_float(tf.matmul(tf.expand_dims(doc_mask, -1), tf.expand_dims(query_mask, 1))) alpha = softmax(M, 1, M_mask) beta = softmax(M, 2, M_mask) #query_importance = tf.expand_dims(tf.reduce_mean(beta, reduction_indices=1), -1) query_importance = tf.expand_dims(tf.reduce_sum(beta, 1) / tf.to_float(tf.expand_dims(doc_len, -1)), -1) s = tf.squeeze(tf.matmul(alpha, query_importance), [2]) unpacked_s = zip(tf.unstack(s, FLAGS.batch_size), tf.unstack(documents, FLAGS.batch_size)) y_hat = tf.stack([tf.unsorted_segment_sum(attentions, sentence_ids, FLAGS.vocab_size) for (attentions, sentence_ids) in unpacked_s]) return y_hat, regularizer