Python keras.backend 模块，softmax() 实例源码

我们从Python开源项目中，提取了以下36个代码示例，用于说明如何使用keras.backend.softmax()。

项目：onto-lstm 作者：pdasigi | 项目源码 | 文件源码

def call(self, x, mask=None):
        mean = super(IntraAttention, self).call(x, mask)
        # x: (batch_size, input_length, input_dim)
        # mean: (batch_size, input_dim)
        ones = K.expand_dims(K.mean(K.ones_like(x), axis=(0, 2)), dim=0)  # (1, input_length)
        # (batch_size, input_length, input_dim)
        tiled_mean = K.permute_dimensions(K.dot(K.expand_dims(mean), ones), (0, 2, 1))
        if mask is not None:
            if K.ndim(mask) > K.ndim(x):
                # Assuming this is because of the bug in Bidirectional. Temporary fix follows.
                # TODO: Fix Bidirectional.
                mask = K.any(mask, axis=(-2, -1))
            if K.ndim(mask) < K.ndim(x):
                mask = K.expand_dims(mask)
            x = switch(mask, x, K.zeros_like(x))
        # (batch_size, input_length, proj_dim)
        projected_combination = K.tanh(K.dot(x, self.vector_projector) + K.dot(tiled_mean, self.mean_projector))
        scores = K.dot(projected_combination, self.scorer)  # (batch_size, input_length)
        weights = K.softmax(scores)  # (batch_size, input_length)
        attended_x = K.sum(K.expand_dims(weights) * x, axis=1)  # (batch_size, input_dim)
        return attended_x

项目：recurrent-attention-for-QA-SQUAD-based-on-keras 作者：wentaozhu | 项目源码 | 文件源码

def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a = K.dot(h_tm1, self.Wa)
        eij = K.dot(K.tanh(h_tm1a + K.dot(inputs[:, :self.h_dim], self.Ua)), self.Va)
        eijs = K.repeat_elements(eij, self.h_dim, axis=1)

        #alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        #cisum = K.sum(ci, axis=1)
        cisum = eijs*inputs[:, :self.h_dim]
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid(K.dot(inputs[:, self.h_dim:], self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh(K.dot(inputs[:, self.h_dim:], self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1]

项目：keras-text 作者：raghakot | 项目源码 | 文件源码

def _softmax(x, dim):
    """Computes softmax along a specified dim. Keras currently lacks this feature.
    """

    if K.backend() == 'tensorflow':
        import tensorflow as tf
        return tf.nn.softmax(x, dim)
    elif K.backend() is 'cntk':
        import cntk
        return cntk.softmax(x, dim)
    elif K.backend() == 'theano':
        # Theano cannot softmax along an arbitrary dim.
        # So, we will shuffle `dim` to -1 and un-shuffle after softmax.
        perm = np.arange(K.ndim(x))
        perm[dim], perm[-1] = perm[-1], perm[dim]
        x_perm = K.permute_dimensions(x, perm)
        output = K.softmax(x_perm)

        # Permute back
        perm[dim], perm[-1] = perm[-1], perm[dim]
        output = K.permute_dimensions(x, output)
        return output
    else:
        raise ValueError("Backend '{}' not supported".format(K.backend()))

项目：aes 作者：feidong1991 | 项目源码 | 文件源码

def call(self, x, mask=None):
        y = K.dot(x, self.att_W)
        if not self.activation:
            if K.backend() == 'theano':
                weights = K.theano.tensor.tensordot(self.att_v, y, axes=[0, 2])
            elif K.backend() == 'tensorflow':
                weights = K.tensorflow.python.ops.math_ops.tensordot(self.att_v, y, axes=[0, 2])
        elif self.activation == 'tanh':
            if K.backend() == 'theano':
                weights = K.theano.tensor.tensordot(self.att_v, K.tanh(y), axes=[0, 2])
            elif K.backend() == 'tensorflow':
                weights = K.tensorflow.python.ops.math_ops.tensordot(self.att_v, K.tanh(y), axes=[0, 2])
        weights = K.softmax(weights)
        out = x * K.permute_dimensions(K.repeat(weights, x.shape[2]), [0, 2, 1])
        if self.op == 'attsum':
            out = out.sum(axis=1)
        elif self.op == 'attmean':
            out = out.sum(axis=1) / mask.sum(axis=1, keepdims=True)
        return K.cast(out, K.floatx())

项目：NTM-Keras 作者：SigmaQuan | 项目源码 | 文件源码

def content_addressing(memory_t,  key_vector_t, key_strength_t):
    '''
    Focusing by content.
    :param memory_t: external memory.
    :param key_vector_t: key vector.
    :param key_strength_t: the strength of key.
    :return:
    '''
    # print("content addressing:")
    # print(">>memory_t")
    # print(key_vector_t)
    # print(">>key_vector_t")
    # print(key_vector_t)
    # print(">>key_strength_t")
    # print(key_strength_t)
    _weight_content_t = \
        key_strength_t * cosine_similarity_group(key_vector_t, memory_t)
    weight_content_t = softmax(_weight_content_t)
    # print("_weight_content_t")
    # print(_weight_content_t)
    return weight_content_t

项目：ntm_keras 作者：flomlo | 项目源码 | 文件源码

def _get_weight_vector(self, M, w_tm1, k, beta, g, s, gamma):
#        M = tf.Print(M, [M, w_tm1, k], message='get weights beg1: ')
#        M = tf.Print(M, [beta, g, s, gamma], message='get weights beg2: ')
        # Content adressing, see Chapter 3.3.1:
        num = beta * _cosine_distance(M, k)
        w_c  = K.softmax(num) # It turns out that equation (5) is just softmax.
        # Location adressing, see Chapter 3.3.2:
        # Equation 7:
        w_g = (g * w_c) + (1-g)*w_tm1
        # C_s is the circular convolution
        #C_w = K.sum((self.C[None, :, :, :] * w_g[:, None, None, :]),axis=3)
        # Equation 8:
        # TODO: Explain
        C_s = K.sum(K.repeat_elements(self.C[None, :, :, :], self.batch_size, axis=0) * s[:,:,None,None], axis=1)
        w_tilda = K.batch_dot(C_s, w_g)
        # Equation 9:
        w_out = _renorm(w_tilda ** gamma)

        return w_out

项目：Keras-GAN 作者：Shaofanl | 项目源码 | 文件源码

def register(self, info_tensor, param_tensor):
        self.info_tensor = info_tensor #(128,1)

        if self.stddev_fix:
            self.param_tensor = param_tensor

            mean = K.clip(param_tensor[:, 0].dimshuffle(0, 'x'), self.min, self.max) 
            std  = 1.0
        else:
            self.param_tensor = param_tensor # 2 

            mean = K.clip(param_tensor[:, 0].dimshuffle(0, 'x'), self.min, self.max) 
          # std  = K.maximum( param_tensor[:, 1].dimshuffle(0, 'x'), 0)
            std  = K.sigmoid( param_tensor[:, 1].dimshuffle(0, 'x') )

        e = (info_tensor-mean)/(std + K.epsilon())
        self.log_Q_c_given_x = \
            K.sum(-0.5*np.log(2*np.pi) -K.log(std+K.epsilon()) -0.5*(e**2), axis=1) * self.lmbd

#       m = Sequential([ Activation('softmax', input_shape=(self.n,)), Lambda(lambda x: K.log(x), lambda x: x) ])
        return K.reshape(self.log_Q_c_given_x, (-1, 1))

项目：tying-wv-and-wc 作者：icoxfog417 | 项目源码 | 文件源码

def __init__(self, 
        vocab_size, 
        sequence_size,
        setting=None,
        checkpoint_path="",
        temperature=10,
        tying=False):

        super().__init__(vocab_size, sequence_size, setting, checkpoint_path)
        self.temperature = temperature
        self.tying = tying
        self.gamma = self.setting.gamma

        if tying:
            self.model.pop()  # remove activation
            self.model.pop()  # remove projection (use self embedding)
            self.model.add(Lambda(lambda x: K.dot(x, K.transpose(self.embedding.embeddings))))
            self.model.add(Activation("softmax"))

项目：tying-wv-and-wc 作者：icoxfog417 | 项目源码 | 文件源码

def augmented_loss(self, y_true, y_pred):
        _y_pred = Activation("softmax")(y_pred)
        loss = K.categorical_crossentropy(_y_pred, y_true)

        # y is (batch x seq x vocab)
        y_indexes = K.argmax(y_true, axis=2)  # turn one hot to index. (batch x seq)
        y_vectors = self.embedding(y_indexes)  # lookup the vector (batch x seq x vector_length)

        #v_length = self.setting.vector_length
        #y_vectors = K.reshape(y_vectors, (-1, v_length))
        #y_t = K.map_fn(lambda v: K.dot(self.embedding.embeddings, K.reshape(v, (-1, 1))), y_vectors)
        #y_t = K.squeeze(y_t, axis=2)  # unknown but necessary operation
        #y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size))

        # vector x embedding dot products (batch x seq x vocab)
        y_t = tf.tensordot(y_vectors, K.transpose(self.embedding.embeddings), 1)
        y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size))  # explicitly set shape
        y_t = K.softmax(y_t / self.temperature)
        _y_pred_t = Activation("softmax")(y_pred / self.temperature)
        aug_loss = kullback_leibler_divergence(y_t, _y_pred_t)
        loss += (self.gamma * self.temperature) * aug_loss
        return loss

项目：DeepJet 作者：mstoye | 项目源码 | 文件源码

def mean_log_Gaussian_like(y_true, parameters):
    """Mean Log Gaussian Likelihood distribution
    Note: The 'c' variable is obtained as global variable
    """

    #Note: The output size will be (c + 2) * m = 6
    c = 1 #The number of outputs we want to predict
    m = 2 #The number of distributions we want to use in the mixture
    components = K.reshape(parameters,[-1, c + 2, m])
    mu = components[:, :c, :]
    sigma = components[:, c, :]
    alpha = components[:, c + 1, :]
    alpha = K.softmax(K.clip(alpha,1e-8,1.))

    exponent = K.log(alpha) - .5 * float(c) * K.log(2 * np.pi) \
    - float(c) * K.log(sigma) \
    - K.sum((K.expand_dims(y_true,2) - mu)**2, axis=1)/(2*(sigma)**2)

    log_gauss = log_sum_exp(exponent, axis=1)
    res = - K.mean(log_gauss)
    return res

项目：DeepJet 作者：mstoye | 项目源码 | 文件源码

def mean_log_LaPlace_like(y_true, parameters):
    """Mean Log Laplace Likelihood distribution
    Note: The 'c' variable is obtained as global variable
    """
    #Note: The output size will be (c + 2) * m = 6
    c = 1 #The number of outputs we want to predict
    m = 2 #The number of distributions we want to use in the mixture
    components = K.reshape(parameters,[-1, c + 2, m])
    mu = components[:, :c, :]
    sigma = components[:, c, :]
    alpha = components[:, c + 1, :]
    alpha = K.softmax(K.clip(alpha,1e-2,1.))

    exponent = K.log(alpha) - float(c) * K.log(2 * sigma) \
    - K.sum(K.abs(K.expand_dims(y_true,2) - mu), axis=1)/(sigma)

    log_gauss = log_sum_exp(exponent, axis=1)
    res = - K.mean(log_gauss)
    return res

项目：onto-lstm 作者：pdasigi | 项目源码 | 文件源码

def summarize_memory(o_t, mem_tm1):
        '''
        This method selects the relevant parts of the memory given the read output and summarizes the
        memory. Implements Equations 2-3 or 8-11 in the paper.
        '''
        # Selecting relevant memory slots, Equation 2
        z_t = K.softmax(K.sum(K.expand_dims(o_t, dim=1) * mem_tm1, axis=2))  # (batch_size, input_length)
        # Summarizing memory, Equation 3
        m_rt = K.sum(K.expand_dims(z_t, dim=2) * mem_tm1, axis=1)  # (batch_size, output_dim)
        return z_t, m_rt

项目：latplan 作者：guicho271828 | 项目源码 | 文件源码

def call(self,logits):
        u = K.random_uniform(K.shape(logits), 0, 1)
        gumbel = - K.log(-K.log(u + 1e-20) + 1e-20)
        return K.in_train_phase(
            K.softmax( ( logits + gumbel ) / self.tau ),
            K.softmax( ( logits + gumbel ) / self.min ))

项目：latplan 作者：guicho271828 | 项目源码 | 文件源码

def loss(self):
        logits = self.logits
        q = K.softmax(logits)
        log_q = K.log(q + 1e-20)
        return - K.mean(q * (log_q - K.log(1.0/K.int_shape(logits)[-1])),
                        axis=tuple(range(1,len(K.int_shape(logits)))))

项目：NTM-Keras 作者：SigmaQuan | 项目源码 | 文件源码

def softmax(x):
    # print("x")
    # print(x)
    _softmax = K.softmax(x)
    # print("softmax(x)")
    # print(_softmax)
    return _softmax

项目：kfs 作者：the-moliver | 项目源码 | 文件源码

def call(self, x, mask=None):
        output_mu = K.dot(x, self.W_mu)
        output_sigma = K.dot(x, self.W_sigma)
        output_pi = K.dot(x, self.W_pi)
        if self.bias:
            output_mu += self.b_mu
            output_sigma += self.b_sigma
            output_pi += self.b_pi
        return K.concatenate([output_mu, K.exp(output_sigma), K.softmax(output_pi)], axis=-1)

项目：rl 作者：Shmuma | 项目源码 | 文件源码

def create_entropy_loss(policy_t, beta):
    def entropy_loss_func(p_t):
        log_p_t = tf.nn.log_softmax(p_t)
        sigm_p_t = K.softmax(p_t)
        entropy_t = beta * K.sum(sigm_p_t * log_p_t, axis=-1, keepdims=True)
        return entropy_t

    entropy_loss_t = Lambda(entropy_loss_func, name="entropy_loss", output_shape=(1,))(policy_t)
    return entropy_loss_t

项目：DeepNews 作者：kabrapratik28 | 项目源码 | 文件源码

def simple_context(self, X, mask):
        """
        Simple context calculation layer logic
        X = (batch_size, time_steps, units)
        time_steps are nothing but number of words in our case.
        """
        # segregrate heading and desc
        desc, head = X[:, :max_len_desc, :], X[:, max_len_desc:, :]
        # segregrate activation and context part
        head_activations, head_words = head[:, :, :activation_rnn_size], head[:, :, activation_rnn_size:]
        desc_activations, desc_words = desc[:, :, :activation_rnn_size], desc[:, :, activation_rnn_size:]

        # p=(bacth_size, length_desc_words, rnn_units)
        # q=(bacth_size, length_headline_words, rnn_units)
        # K.dot(p,q) = (bacth_size, length_desc_words,length_headline_words)
        activation_energies = K.batch_dot(head_activations, desc_activations, axes=(2, 2))

        # make sure we dont use description words that are masked out
        activation_energies = activation_energies + -1e20 * K.expand_dims(1. - K.cast(mask[:, :max_len_desc], 'float32'), 1)

        # for every head word compute weights for every desc word
        activation_energies = K.reshape(activation_energies, (-1, max_len_desc))
        activation_weights = K.softmax(activation_energies)
        activation_weights = K.reshape(activation_weights, (-1, max_len_head, max_len_desc))

        # for every head word compute weighted average of desc words
        desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1))
        return K.concatenate((desc_avg_word, head_words))

项目：sdp 作者：tansey | 项目源码 | 文件源码

def neural_network(self, X):
    """pi, mu, sigma = NN(x; theta)"""
    X_image = tf.reshape(X, [-1,IMAGE_ROWS,IMAGE_COLS,1])
    conv1 = Convolution2D(32, 5, 5, border_mode='same', activation=K.relu, W_regularizer=l2(0.01),
                          input_shape=(IMAGE_ROWS, IMAGE_COLS, 1))(X_image)
    pool1 = MaxPooling2D(pool_size=(2,2), border_mode='same')(conv1)
    conv2 = Convolution2D(64, 5, 5, border_mode='same', activation=K.relu, W_regularizer=l2(0.01))(pool1)
    pool2 = MaxPooling2D(pool_size=(2,2), border_mode='same')(conv2)
    pool2_flat = tf.reshape(pool2, [-1, IMAGE_ROWS//4 * IMAGE_COLS//4 * 64])
    hidden1 = Dense(1024, W_regularizer=l2(0.01), activation=K.relu)(pool2_flat)
    hidden2 = Dense(64, W_regularizer=l2(0.01), activation=K.relu)(hidden1)
    self.mus = Dense(self.K)(hidden2)
    self.sigmas = Dense(self.K, activation=K.softplus)(hidden2)
    self.pi = Dense(self.K, activation=K.softmax)(hidden2)

项目：albemarle 作者：SeanTater | 项目源码 | 文件源码

def step(self, x, states):
        h_tm1, c_tm1, y_tm1, B, U, H = states
        s = K.dot(c_tm1, self.W_h) + self.b_h
        s = K.repeat(s, self.input_length)
        energy = time_distributed_dense(s + H, self.W_a, self.b_a)
        energy = K.squeeze(energy, 2)
        alpha = K.softmax(energy)
        alpha = K.repeat(alpha, self.input_dim)
        alpha = K.permute_dimensions(alpha, (0, 2, 1))
        weighted_H = H * alpha
        v = K.sum(weighted_H, axis=1)
        y, new_states = super(AttentionDecoder, self).step(v, states[:-1])
        return y, new_states

项目：Keras-GAN 作者：Shaofanl | 项目源码 | 文件源码

def register(self, info_tensor, param_tensor):
        self.info_tensor = info_tensor
        self.param_tensor = param_tensor 
        self.log_Q_c_given_x = \
            K.sum(K.log(K.softmax(param_tensor)+K.epsilon()) * info_tensor, axis=1) * self.lmbd
#       m = Sequential([ Activation('softmax', input_shape=(self.n,)), Lambda(lambda x: K.log(x), lambda x: x) ])
        return K.reshape(self.log_Q_c_given_x, (-1, 1))

项目：nea 作者：nusnlp | 项目源码 | 文件源码

def call(self, x, mask=None):
        y = K.dot(x, self.att_W)
        if not self.activation:
            weights = K.theano.tensor.tensordot(self.att_v, y, axes=[0, 2])
        elif self.activation == 'tanh':
            weights = K.theano.tensor.tensordot(self.att_v, K.tanh(y), axes=[0, 2])
        weights = K.softmax(weights)
        out = x * K.permute_dimensions(K.repeat(weights, x.shape[2]), [0, 2, 1])
        if self.op == 'attsum':
            out = out.sum(axis=1)
        elif self.op == 'attmean':
            out = out.sum(axis=1) / mask.sum(axis=1, keepdims=True)
        return K.cast(out, K.floatx())

项目：mcv-m5 作者：david-vazquez | 项目源码 | 文件源码

def call(self, x, mask=None):
        ch_idx = self.channel_index
        l_idx = K.ndim(x) - 1  # last index
        x = K.permute_dimensions(
            x, tuple(i for i in range(K.ndim(x)) if i != ch_idx) + (ch_idx,))
        sh = K.shape(x)
        x = K.reshape(x, (-1, sh[-1]))
        x = K.softmax(x)
        x = K.reshape(x, sh)
        x = K.permute_dimensions(
            x, tuple(range(ch_idx) + [l_idx] + range(ch_idx, l_idx)))
        return x


# Works TH and TF

项目：keras_zoo 作者：david-vazquez | 项目源码 | 文件源码

def call(self, x, mask=None):
        ch_idx = self.channel_index
        l_idx = K.ndim(x) - 1  # last index
        x = K.permute_dimensions(
            x, tuple(i for i in range(K.ndim(x)) if i != ch_idx) + (ch_idx,))
        sh = K.shape(x)
        x = K.reshape(x, (-1, sh[-1]))
        x = K.softmax(x)
        x = K.reshape(x, sh)
        x = K.permute_dimensions(
            x, tuple(range(ch_idx) + [l_idx] + range(ch_idx, l_idx)))
        return x


# Works TH and TF

项目：ensemble-adv-training 作者：ftramer | 项目源码 | 文件源码

def tf_test_error_rate(model, x, X_test, y_test):
    """
    Compute test error.
    """
    assert len(X_test) == len(y_test)

    # Predictions for the test set
    eval_prediction = K.softmax(model(x))

    predictions = batch_eval([x], [eval_prediction], [X_test])[0]

    return error_rate(predictions, y_test)

项目：tying-wv-and-wc 作者：icoxfog417 | 项目源码 | 文件源码

def perplexity(cls, y_true, y_pred):
        _y_pred = Activation("softmax")(y_pred)
        return super(AugmentedModel, cls).perplexity(y_true, _y_pred)

项目：VASC 作者：wang-research | 项目源码 | 文件源码

def compute_softmax(logits,temp):
    z = logits + sampling_gumbel( K.shape(logits) )
    return K.softmax( z / temp )

项目：onto-lstm 作者：pdasigi | 项目源码 | 文件源码

def call(self, x, mask=None):
        # x: (batch_size, input_length, input_dim) where input_length = head_size + 2
        head_encoding = x[:, :-2, :]  # (batch_size, head_size, input_dim)
        prep_encoding = x[:, -2, :]  # (batch_size, input_dim)
        child_encoding = x[:, -1, :]  # (batch_size, input_dim)
        if self.composition_type == 'HPCD':
            # TODO: The following line may not work with TF.
            # (batch_size, head_size, input_dim, 1) * (1, head_size, input_dim, proj_dim)
            head_proj_prod = K.expand_dims(head_encoding) * K.expand_dims(self.dist_proj_head, dim=0)
            head_projection = K.sum(head_proj_prod, axis=2)  # (batch_size, head_size, proj_dim)
        else:
            head_projection = K.dot(head_encoding, self.proj_head)  # (batch_size, head_size, proj_dim)
        prep_projection = K.expand_dims(K.dot(prep_encoding, self.proj_prep), dim=1)  # (batch_size, 1, proj_dim)
        child_projection = K.expand_dims(K.dot(child_encoding, self.proj_child), dim=1)  # (batch_size, 1, proj_dim)
        #(batch_size, head_size, proj_dim)
        if self.composition_type == 'HPCT':
            composed_projection = K.tanh(head_projection + prep_projection + child_projection)
        elif self.composition_type == 'HPC' or self.composition_type == "HPCD":
            prep_child_projection = K.tanh(prep_projection + child_projection)  # (batch_size, 1, proj_dim)
            composed_projection = K.tanh(head_projection + prep_child_projection)
        else:
            # Composition type in HC
            composed_projection = K.tanh(head_projection + child_projection)
        for hidden_layer in self.hidden_layers:
            composed_projection = K.tanh(K.dot(composed_projection, hidden_layer))  # (batch_size, head_size, proj_dim)
        # (batch_size, head_size)
        head_word_scores = K.squeeze(K.dot(composed_projection, self.scorer), axis=-1)
        if mask is None:
            attachment_probabilities = K.softmax(head_word_scores)  # (batch_size, head_size)
        else:
            if K.ndim(mask) > 2:
                # This means this layer came after a Bidirectional layer. Keras has this bug which
                # concatenates input masks instead of output masks.
                # TODO: Fix Bidirectional instead.
                mask = K.any(mask, axis=(-2, -1))
            # We need to do a masked softmax.
            exp_scores = K.exp(head_word_scores)  # (batch_size, head_size)
            head_mask = mask[:, :-2]  # (batch_size, head_size)
            # (batch_size, head_size)
            masked_exp_scores = switch(head_mask, exp_scores, K.zeros_like(head_encoding[:, :, 0]))
            # (batch_size, 1). Adding epsilon to avoid divison by 0. But epsilon is float64.
            exp_sum = K.cast(K.expand_dims(K.sum(masked_exp_scores, axis=1) + K.epsilon()), 'float32')
            attachment_probabilities = masked_exp_scores / exp_sum  # (batch_size, head_size)
        return attachment_probabilities

项目：onto-lstm 作者：pdasigi | 项目源码 | 文件源码

def call(self, x, mask=None):
        # x[0]: (batch_size, input_length, input_dim)
        # x[1]: (batch_size, 1) indices of prepositions
        # Optional: x[2]: (batch_size, input_length - 2)
        assert isinstance(x, list) or isinstance(x, tuple)
        encoded_sentence = x[0]
        prep_indices = K.squeeze(x[1], axis=-1)  #(batch_size,)
        batch_indices = K.arange(K.shape(encoded_sentence)[0])  # (batch_size,)
        if self.with_attachment_probs:
            # We're essentially doing K.argmax(x[2]) here, but argmax is not differentiable!
            head_probs = x[2]
            head_probs_padding = K.zeros_like(x[2])[:, :2]  # (batch_size, 2)
            # (batch_size, input_length)
            padded_head_probs = K.concatenate([head_probs, head_probs_padding])
            # (batch_size, 1)
            max_head_probs = K.expand_dims(K.max(padded_head_probs, axis=1))
            # (batch_size, input_length, 1)
            max_head_prob_indices = K.expand_dims(K.equal(padded_head_probs, max_head_probs))
            # (batch_size, input_length, input_dim)
            masked_head_encoding = K.switch(max_head_prob_indices, encoded_sentence, K.zeros_like(encoded_sentence))
            # (batch_size, input_dim)
            head_encoding = K.sum(masked_head_encoding, axis=1)
        else:
            head_indices = prep_indices - 1  # (batch_size,)
            head_encoding = encoded_sentence[batch_indices, head_indices, :]  # (batch_size, input_dim)
        prep_encoding = encoded_sentence[batch_indices, prep_indices, :]  # (batch_size, input_dim)
        child_encoding = encoded_sentence[batch_indices, prep_indices+1, :]  # (batch_size, input_dim)
        '''
        prep_indices = x[1]
        sentence_mask = mask[0]
        if sentence_mask is not None:
            if K.ndim(sentence_mask) > 2:
                # This means this layer came after a Bidirectional layer. Keras has this bug which
                # concatenates input masks instead of output masks.
                # TODO: Fix Bidirectional instead.
                sentence_mask = K.any(sentence_mask, axis=(-2, -1))
        head_encoding, prep_encoding, child_encoding = self.get_split_averages(encoded_sentence, sentence_mask,
                                                                               prep_indices)
        '''
        head_projection = K.dot(head_encoding, self.proj_head)  # (batch_size, proj_dim)
        prep_projection = K.dot(prep_encoding, self.proj_prep)  # (batch_size, proj_dim)
        child_projection = K.dot(child_encoding, self.proj_child)  # (batch_size, proj_dim)
        #(batch_size, proj_dim)
        if self.composition_type == 'HPCT':
            composed_projection = K.tanh(head_projection + prep_projection + child_projection)
        elif self.composition_type == 'HPC':
            prep_child_projection = K.tanh(prep_projection + child_projection)  # (batch_size, proj_dim)
            composed_projection = K.tanh(head_projection + prep_child_projection)
        else:
            # Composition type in HC
            composed_projection = K.tanh(head_projection + child_projection)
        for hidden_layer in self.hidden_layers:
            composed_projection = K.tanh(K.dot(composed_projection, hidden_layer))  # (batch_size, proj_dim)
        # (batch_size, num_classes)
        class_scores = K.dot(composed_projection, self.scorer)
        label_probabilities = K.softmax(class_scores)
        return label_probabilities

项目：recurrent-attention-for-QA-SQUAD-based-on-keras 作者：wentaozhu | 项目源码 | 文件源码

def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a = K.dot(h_tm1, self.Wa)
        eij = K.dot(K.tanh(K.repeat(h_tm1a, K.shape(self.h)[1]) + self.ha), self.Va)
        eijs = K.squeeze(eij, -1)
        alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        cisum = K.sum(ci, axis=1)
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid(K.dot(inputs, self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh(K.dot(inputs, self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1]
        '''if self.consume_less == 'gpu':

            matrix_x = K.dot(x * B_W[0], self.W) + self.b
            matrix_inner = K.dot(h_tm1 * B_U[0], self.U[:, :2 * self.units])

            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units: 2 * self.units]
            inner_z = matrix_inner[:, :self.units]
            inner_r = matrix_inner[:, self.units: 2 * self.units]

            z = self.inner_activation(x_z + inner_z)
            r = self.inner_activation(x_r + inner_r)

            x_h = matrix_x[:, 2 * self.units:]
            inner_h = K.dot(r * h_tm1 * B_U[0], self.U[:, 2 * self.units:])
            hh = self.activation(x_h + inner_h)
        else:
            if self.consume_less == 'cpu':
                x_z = x[:, :self.units]
                x_r = x[:, self.units: 2 * self.units]
                x_h = x[:, 2 * self.units:]
            elif self.consume_less == 'mem':
                x_z = K.dot(x * B_W[0], self.W_z) + self.b_z
                x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
                x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
            else:
                raise ValueError('Unknown `consume_less` mode.')
            z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
            r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))

            hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], self.U_h))
        h = z * h_tm1 + (1 - z) * hh
        return h, [h]'''

项目：recurrent-attention-for-QA-SQUAD-based-on-keras 作者：wentaozhu | 项目源码 | 文件源码

def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a = K.dot(h_tm1, self.Wa)
        eij = K.dot(K.tanh(h_tm1a + self.ha), self.Va)
        eijs = K.repeat_elements(eij, self.h_dim, axis=1)

        #alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        #cisum = K.sum(ci, axis=1)
        cisum = eijs*self.h
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid(K.dot(inputs, self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh(K.dot(inputs, self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1]
        '''if self.consume_less == 'gpu':

            matrix_x = K.dot(x * B_W[0], self.W) + self.b
            matrix_inner = K.dot(h_tm1 * B_U[0], self.U[:, :2 * self.units])

            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units: 2 * self.units]
            inner_z = matrix_inner[:, :self.units]
            inner_r = matrix_inner[:, self.units: 2 * self.units]

            z = self.inner_activation(x_z + inner_z)
            r = self.inner_activation(x_r + inner_r)

            x_h = matrix_x[:, 2 * self.units:]
            inner_h = K.dot(r * h_tm1 * B_U[0], self.U[:, 2 * self.units:])
            hh = self.activation(x_h + inner_h)
        else:
            if self.consume_less == 'cpu':
                x_z = x[:, :self.units]
                x_r = x[:, self.units: 2 * self.units]
                x_h = x[:, 2 * self.units:]
            elif self.consume_less == 'mem':
                x_z = K.dot(x * B_W[0], self.W_z) + self.b_z
                x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
                x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
            else:
                raise ValueError('Unknown `consume_less` mode.')
            z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
            r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))

            hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], self.U_h))
        h = z * h_tm1 + (1 - z) * hh
        return h, [h]'''

项目：recurrent-attention-for-QA-SQUAD-based-on-keras 作者：wentaozhu | 项目源码 | 文件源码

def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a = K.dot(h_tm1, self.Wa)
        eij = K.dot(K.tanh(h_tm1a + self.ha), self.Va)
        eijs = K.repeat_elements(eij, self.h_dim, axis=1)

        #alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        #cisum = K.sum(ci, axis=1)
        cisum = eijs*self.h
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid(K.dot(inputs, self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh(K.dot(inputs, self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1]
        '''if self.consume_less == 'gpu':

            matrix_x = K.dot(x * B_W[0], self.W) + self.b
            matrix_inner = K.dot(h_tm1 * B_U[0], self.U[:, :2 * self.units])

            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units: 2 * self.units]
            inner_z = matrix_inner[:, :self.units]
            inner_r = matrix_inner[:, self.units: 2 * self.units]

            z = self.inner_activation(x_z + inner_z)
            r = self.inner_activation(x_r + inner_r)

            x_h = matrix_x[:, 2 * self.units:]
            inner_h = K.dot(r * h_tm1 * B_U[0], self.U[:, 2 * self.units:])
            hh = self.activation(x_h + inner_h)
        else:
            if self.consume_less == 'cpu':
                x_z = x[:, :self.units]
                x_r = x[:, self.units: 2 * self.units]
                x_h = x[:, 2 * self.units:]
            elif self.consume_less == 'mem':
                x_z = K.dot(x * B_W[0], self.W_z) + self.b_z
                x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
                x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
            else:
                raise ValueError('Unknown `consume_less` mode.')
            z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
            r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))

            hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], self.U_h))
        h = z * h_tm1 + (1 - z) * hh
        return h, [h]'''

项目：DeepNews 作者：kabrapratik28 | 项目源码 | 文件源码

def create_model(self,):
        """
        RNN model creation
        Layers include Embedding Layer, 3 LSTM stacked,
        Simple Context layer (manually defined),
        Time Distributed Layer
        """
        length_vocab, embedding_size = self.word2vec.shape
        print ("shape of word2vec matrix ", self.word2vec.shape)

        model = Sequential()

        # TODO: look at mask zero flag
        model.add(
                Embedding(
                        length_vocab, embedding_size,
                        input_length=max_length,
                        weights=[self.word2vec], mask_zero=True,
                        name='embedding_layer'
                )
        )

        for i in range(rnn_layers):
            lstm = LSTM(rnn_size, return_sequences=True,
                name='lstm_layer_%d' % (i + 1)
            )

            model.add(lstm)
            # No drop out added !

        model.add(Lambda(self.simple_context,
                     mask=lambda inputs, mask: mask[:, max_len_desc:],
                     output_shape=self.output_shape_simple_context_layer,
                     name='simple_context_layer'))

        vocab_size = self.word2vec.shape[0]
        model.add(TimeDistributed(Dense(vocab_size,
                                name='time_distributed_layer')))

        model.add(Activation('softmax', name='activation_layer'))

        model.compile(loss='categorical_crossentropy', optimizer='adam')
        K.set_value(model.optimizer.lr, np.float32(learning_rate))
        print (model.summary())
        return model

项目：ntm_keras 作者：flomlo | 项目源码 | 文件源码

def _split_and_apply_activations(self, controller_output):
        """ This takes the controller output, splits it in ntm_output, read and wright adressing data.
            It returns a triple of ntm_output, controller_instructions_read, controller_instructions_write.
            ntm_output is a tensor, controller_instructions_read and controller_instructions_write are lists containing
            the adressing instruction (k, beta, g, shift, gamma) and in case of write also the writing constructions,
            consisting of an erase and an add vector. 

            As it is necesseary for stable results,
            k and add_vector is activated via tanh, erase_vector via sigmoid (this is critical!),
            shift via softmax,
            gamma is sigmoided, inversed and clipped (probably not ideal)
            g is sigmoided,
            beta is linear (probably not ideal!) """

        # splitting
        ntm_output, controller_instructions_read, controller_instructions_write = tf.split(
                    controller_output,
                    np.asarray([self.output_dim,
                                self.read_heads * self.controller_read_head_emitting_dim,
                                self.write_heads * self.controller_write_head_emitting_dim]),
                    axis=1)

        controller_instructions_read = tf.split(controller_instructions_read, self.read_heads, axis=1)
        controller_instructions_write = tf.split(controller_instructions_write, self.write_heads, axis=1)

        controller_instructions_read = [
                tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1]), axis=1) for 
                single_head_data in controller_instructions_read]

        controller_instructions_write = [
                tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1, self.m_depth, self.m_depth]), axis=1) for 
                single_head_data in controller_instructions_write]

        #activation
        ntm_output = self.activation(ntm_output)
        controller_instructions_read = [(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma)) for
                (k, beta, g, shift, gamma) in controller_instructions_read]
        controller_instructions_write = [
                (tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma), hard_sigmoid(erase_vector), tanh(add_vector))  for 
                (k, beta, g, shift, gamma, erase_vector, add_vector) in controller_instructions_write]

        return (ntm_output, controller_instructions_read, controller_instructions_write)

项目：albemarle 作者：SeanTater | 项目源码 | 文件源码

def __init__(self, output_dim, hidden_dim, output_length, depth=1,bidirectional=True, dropout=0.1, **kwargs):
        if bidirectional and hidden_dim % 2 != 0:
            raise Exception ("hidden_dim for AttentionSeq2seq should be even (Because of bidirectional RNN).")
        super(AttentionSeq2seq, self).__init__()
        if type(depth) not in [list, tuple]:
            depth = (depth, depth)
        if 'batch_input_shape' in kwargs:
            shape = kwargs['batch_input_shape']
            del kwargs['batch_input_shape']
        elif 'input_shape' in kwargs:
            shape = (None,) + tuple(kwargs['input_shape'])
            del kwargs['input_shape']
        elif 'input_dim' in kwargs:
            if 'input_length' in kwargs:
                input_length = kwargs['input_length']
            else:
                input_length = None
            shape = (None, input_length, kwargs['input_dim'])
            del kwargs['input_dim']
        self.add(Layer(batch_input_shape=shape))
        if bidirectional:
            self.add(Bidirectional(LSTMEncoder(output_dim=int(hidden_dim / 2), state_input=False, return_sequences=True, **kwargs)))
        else:
            self.add(LSTMEncoder(output_dim=hidden_dim, state_input=False, return_sequences=True, **kwargs))
        for i in range(0, depth[0] - 1):
            self.add(Dropout(dropout))
            if bidirectional:
                self.add(Bidirectional(LSTMEncoder(output_dim=int(hidden_dim / 2), state_input=False, return_sequences=True, **kwargs)))
            else:
                self.add(LSTMEncoder(output_dim=hidden_dim, state_input=False, return_sequences=True, **kwargs))
        encoder = self.layers[-1]
        self.add(Dropout(dropout))
        self.add(TimeDistributed(Dense(hidden_dim if depth[1] > 1 else output_dim)))
        decoder = AttentionDecoder(hidden_dim=hidden_dim, output_length=output_length, state_input=False, **kwargs)
        self.add(Dropout(dropout))
        self.add(decoder)
        for i in range(0, depth[1] - 1):
            self.add(Dropout(dropout))
            self.add(LSTMEncoder(output_dim=hidden_dim, state_input=False, return_sequences=True, **kwargs))
        self.add(Dropout(dropout))
        self.add(TimeDistributed(Dense(output_dim, activation='softmax')))
        self.encoder = encoder
        self.decoder = decoder

项目：DIL 作者：FoxRow | 项目源码 | 文件源码

def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_xy, box_wh, box_confidence, box_class_probs