项目:onto-lstm    作者:pdasigi
def call(self, x, mask=None):
        mean = super(IntraAttention, self).call(x, mask)
        # x: (batch_size, input_length, input_dim)
        # mean: (batch_size, input_dim)
        ones = K.expand_dims(K.mean(K.ones_like(x), axis=(0, 2)), dim=0)  # (1, input_length)
        # (batch_size, input_length, input_dim)
        tiled_mean = K.permute_dimensions(, ones), (0, 2, 1))
        if mask is not None:
            if K.ndim(mask) > K.ndim(x):
                # Assuming this is because of the bug in Bidirectional. Temporary fix follows.
                # TODO: Fix Bidirectional.
                mask = K.any(mask, axis=(-2, -1))
            if K.ndim(mask) < K.ndim(x):
                mask = K.expand_dims(mask)
            x = switch(mask, x, K.zeros_like(x))
        # (batch_size, input_length, proj_dim)
        projected_combination = K.tanh(, self.vector_projector) +, self.mean_projector))
        scores =, self.scorer)  # (batch_size, input_length)
        weights = K.softmax(scores)  # (batch_size, input_length)
        attended_x = K.sum(K.expand_dims(weights) * x, axis=1)  # (batch_size, input_dim)
        return attended_x
项目:recurrent-attention-for-QA-SQUAD-based-on-keras    作者:wentaozhu
def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a =, self.Wa)
        eij = +[:, :self.h_dim], self.Ua)), self.Va)
        eijs = K.repeat_elements(eij, self.h_dim, axis=1)

        #alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        #cisum = K.sum(ci, axis=1)
        cisum = eijs*inputs[:, :self.h_dim]
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid([:, self.h_dim:], self.Wzr) +, self.Uzr) +, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh([:, self.h_dim:], self.W) +*h_tm1, self.U) +, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1]
项目:keras-text    作者:raghakot
def _softmax(x, dim):
    """Computes softmax along a specified dim. Keras currently lacks this feature.

    if K.backend() == 'tensorflow':
        import tensorflow as tf
        return tf.nn.softmax(x, dim)
    elif K.backend() is 'cntk':
        import cntk
        return cntk.softmax(x, dim)
    elif K.backend() == 'theano':
        # Theano cannot softmax along an arbitrary dim.
        # So, we will shuffle `dim` to -1 and un-shuffle after softmax.
        perm = np.arange(K.ndim(x))
        perm[dim], perm[-1] = perm[-1], perm[dim]
        x_perm = K.permute_dimensions(x, perm)
        output = K.softmax(x_perm)

        # Permute back
        perm[dim], perm[-1] = perm[-1], perm[dim]
        output = K.permute_dimensions(x, output)
        return output
        raise ValueError("Backend '{}' not supported".format(K.backend()))
项目:aes    作者:feidong1991
def call(self, x, mask=None):
        y =, self.att_W)
        if not self.activation:
            if K.backend() == 'theano':
                weights = K.theano.tensor.tensordot(self.att_v, y, axes=[0, 2])
            elif K.backend() == 'tensorflow':
                weights = K.tensorflow.python.ops.math_ops.tensordot(self.att_v, y, axes=[0, 2])
        elif self.activation == 'tanh':
            if K.backend() == 'theano':
                weights = K.theano.tensor.tensordot(self.att_v, K.tanh(y), axes=[0, 2])
            elif K.backend() == 'tensorflow':
                weights = K.tensorflow.python.ops.math_ops.tensordot(self.att_v, K.tanh(y), axes=[0, 2])
        weights = K.softmax(weights)
        out = x * K.permute_dimensions(K.repeat(weights, x.shape[2]), [0, 2, 1])
        if self.op == 'attsum':
            out = out.sum(axis=1)
        elif self.op == 'attmean':
            out = out.sum(axis=1) / mask.sum(axis=1, keepdims=True)
        return K.cast(out, K.floatx())
项目:NTM-Keras    作者:SigmaQuan
def content_addressing(memory_t,  key_vector_t, key_strength_t):
    Focusing by content.
    :param memory_t: external memory.
    :param key_vector_t: key vector.
    :param key_strength_t: the strength of key.
    # print("content addressing:")
    # print(">>memory_t")
    # print(key_vector_t)
    # print(">>key_vector_t")
    # print(key_vector_t)
    # print(">>key_strength_t")
    # print(key_strength_t)
    _weight_content_t = \
        key_strength_t * cosine_similarity_group(key_vector_t, memory_t)
    weight_content_t = softmax(_weight_content_t)
    # print("_weight_content_t")
    # print(_weight_content_t)
    return weight_content_t
项目:ntm_keras    作者:flomlo
def _get_weight_vector(self, M, w_tm1, k, beta, g, s, gamma):
#        M = tf.Print(M, [M, w_tm1, k], message='get weights beg1: ')
#        M = tf.Print(M, [beta, g, s, gamma], message='get weights beg2: ')
        # Content adressing, see Chapter 3.3.1:
        num = beta * _cosine_distance(M, k)
        w_c  = K.softmax(num) # It turns out that equation (5) is just softmax.
        # Location adressing, see Chapter 3.3.2:
        # Equation 7:
        w_g = (g * w_c) + (1-g)*w_tm1
        # C_s is the circular convolution
        #C_w = K.sum((self.C[None, :, :, :] * w_g[:, None, None, :]),axis=3)
        # Equation 8:
        # TODO: Explain
        C_s = K.sum(K.repeat_elements(self.C[None, :, :, :], self.batch_size, axis=0) * s[:,:,None,None], axis=1)
        w_tilda = K.batch_dot(C_s, w_g)
        # Equation 9:
        w_out = _renorm(w_tilda ** gamma)

        return w_out
项目:Keras-GAN    作者:Shaofanl
def register(self, info_tensor, param_tensor):
        self.info_tensor = info_tensor #(128,1)

        if self.stddev_fix:
            self.param_tensor = param_tensor

            mean = K.clip(param_tensor[:, 0].dimshuffle(0, 'x'), self.min, self.max) 
            std  = 1.0
            self.param_tensor = param_tensor # 2 

            mean = K.clip(param_tensor[:, 0].dimshuffle(0, 'x'), self.min, self.max) 
          # std  = K.maximum( param_tensor[:, 1].dimshuffle(0, 'x'), 0)
            std  = K.sigmoid( param_tensor[:, 1].dimshuffle(0, 'x') )

        e = (info_tensor-mean)/(std + K.epsilon())
        self.log_Q_c_given_x = \
            K.sum(-0.5*np.log(2*np.pi) -K.log(std+K.epsilon()) -0.5*(e**2), axis=1) * self.lmbd

#       m = Sequential([ Activation('softmax', input_shape=(self.n,)), Lambda(lambda x: K.log(x), lambda x: x) ])
        return K.reshape(self.log_Q_c_given_x, (-1, 1))
项目:tying-wv-and-wc    作者:icoxfog417
def __init__(self, 

        super().__init__(vocab_size, sequence_size, setting, checkpoint_path)
        self.temperature = temperature
        self.tying = tying
        self.gamma = self.setting.gamma

        if tying:
            self.model.pop()  # remove activation
            self.model.pop()  # remove projection (use self embedding)
            self.model.add(Lambda(lambda x:, K.transpose(self.embedding.embeddings))))
项目:tying-wv-and-wc    作者:icoxfog417    | 项目源码 | 文件源码
def augmented_loss(self, y_true, y_pred):
        _y_pred = Activation("softmax")(y_pred)
        loss = K.categorical_crossentropy(_y_pred, y_true)

        # y is (batch x seq x vocab)
        y_indexes = K.argmax(y_true, axis=2)  # turn one hot to index. (batch x seq)
        y_vectors = self.embedding(y_indexes)  # lookup the vector (batch x seq x vector_length)

        #v_length = self.setting.vector_length
        #y_vectors = K.reshape(y_vectors, (-1, v_length))
        #y_t = K.map_fn(lambda v:, K.reshape(v, (-1, 1))), y_vectors)
        #y_t = K.squeeze(y_t, axis=2)  # unknown but necessary operation
        #y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size))

        # vector x embedding dot products (batch x seq x vocab)
        y_t = tf.tensordot(y_vectors, K.transpose(self.embedding.embeddings), 1)
        y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size))  # explicitly set shape
        y_t = K.softmax(y_t / self.temperature)
        _y_pred_t = Activation("softmax")(y_pred / self.temperature)
        aug_loss = kullback_leibler_divergence(y_t, _y_pred_t)
        loss += (self.gamma * self.temperature) * aug_loss
        return loss
项目:DeepJet    作者:mstoye
def mean_log_Gaussian_like(y_true, parameters):
    """Mean Log Gaussian Likelihood distribution
    Note: The 'c' variable is obtained as global variable

    #Note: The output size will be (c + 2) * m = 6
    c = 1 #The number of outputs we want to predict
    m = 2 #The number of distributions we want to use in the mixture
    components = K.reshape(parameters,[-1, c + 2, m])
    mu = components[:, :c, :]
    sigma = components[:, c, :]
    alpha = components[:, c + 1, :]
    alpha = K.softmax(K.clip(alpha,1e-8,1.))

    exponent = K.log(alpha) - .5 * float(c) * K.log(2 * np.pi) \
    - float(c) * K.log(sigma) \
    - K.sum((K.expand_dims(y_true,2) - mu)**2, axis=1)/(2*(sigma)**2)

    log_gauss = log_sum_exp(exponent, axis=1)
    res = - K.mean(log_gauss)
    return res
项目:DeepJet    作者:mstoye
def mean_log_LaPlace_like(y_true, parameters):
    """Mean Log Laplace Likelihood distribution
    Note: The 'c' variable is obtained as global variable
    #Note: The output size will be (c + 2) * m = 6
    c = 1 #The number of outputs we want to predict
    m = 2 #The number of distributions we want to use in the mixture
    components = K.reshape(parameters,[-1, c + 2, m])
    mu = components[:, :c, :]
    sigma = components[:, c, :]
    alpha = components[:, c + 1, :]
    alpha = K.softmax(K.clip(alpha,1e-2,1.))

    exponent = K.log(alpha) - float(c) * K.log(2 * sigma) \
    - K.sum(K.abs(K.expand_dims(y_true,2) - mu), axis=1)/(sigma)

    log_gauss = log_sum_exp(exponent, axis=1)
    res = - K.mean(log_gauss)
    return res
项目:onto-lstm    作者:pdasigi
def summarize_memory(o_t, mem_tm1):
        This method selects the relevant parts of the memory given the read output and summarizes the
        memory. Implements Equations 2-3 or 8-11 in the paper.
        # Selecting relevant memory slots, Equation 2
        z_t = K.softmax(K.sum(K.expand_dims(o_t, dim=1) * mem_tm1, axis=2))  # (batch_size, input_length)
        # Summarizing memory, Equation 3
        m_rt = K.sum(K.expand_dims(z_t, dim=2) * mem_tm1, axis=1)  # (batch_size, output_dim)
        return z_t, m_rt
项目:latplan    作者:guicho271828
def call(self,logits):
        u = K.random_uniform(K.shape(logits), 0, 1)
        gumbel = - K.log(-K.log(u + 1e-20) + 1e-20)
        return K.in_train_phase(
            K.softmax( ( logits + gumbel ) / self.tau ),
            K.softmax( ( logits + gumbel ) / self.min ))
项目:latplan    作者:guicho271828
def loss(self):
        logits = self.logits
        q = K.softmax(logits)
        log_q = K.log(q + 1e-20)
        return - K.mean(q * (log_q - K.log(1.0/K.int_shape(logits)[-1])),
项目:NTM-Keras    作者:SigmaQuan
def softmax(x):
    # print("x")
    # print(x)
    _softmax = K.softmax(x)
    # print("softmax(x)")
    # print(_softmax)
    return _softmax
项目:kfs    作者:the-moliver
def call(self, x, mask=None):
        output_mu =, self.W_mu)
        output_sigma =, self.W_sigma)
        output_pi =, self.W_pi)
        if self.bias:
            output_mu += self.b_mu
            output_sigma += self.b_sigma
            output_pi += self.b_pi
        return K.concatenate([output_mu, K.exp(output_sigma), K.softmax(output_pi)], axis=-1)
项目:rl    作者:Shmuma
def create_entropy_loss(policy_t, beta):
    def entropy_loss_func(p_t):
        log_p_t = tf.nn.log_softmax(p_t)
        sigm_p_t = K.softmax(p_t)
        entropy_t = beta * K.sum(sigm_p_t * log_p_t, axis=-1, keepdims=True)
        return entropy_t

    entropy_loss_t = Lambda(entropy_loss_func, name="entropy_loss", output_shape=(1,))(policy_t)
    return entropy_loss_t
项目:DeepNews    作者:kabrapratik28
def simple_context(self, X, mask):
        Simple context calculation layer logic
        X = (batch_size, time_steps, units)
        time_steps are nothing but number of words in our case.
        # segregrate heading and desc
        desc, head = X[:, :max_len_desc, :], X[:, max_len_desc:, :]
        # segregrate activation and context part
        head_activations, head_words = head[:, :, :activation_rnn_size], head[:, :, activation_rnn_size:]
        desc_activations, desc_words = desc[:, :, :activation_rnn_size], desc[:, :, activation_rnn_size:]

        # p=(bacth_size, length_desc_words, rnn_units)
        # q=(bacth_size, length_headline_words, rnn_units)
        #,q) = (bacth_size, length_desc_words,length_headline_words)
        activation_energies = K.batch_dot(head_activations, desc_activations, axes=(2, 2))

        # make sure we dont use description words that are masked out
        activation_energies = activation_energies + -1e20 * K.expand_dims(1. - K.cast(mask[:, :max_len_desc], 'float32'), 1)

        # for every head word compute weights for every desc word
        activation_energies = K.reshape(activation_energies, (-1, max_len_desc))
        activation_weights = K.softmax(activation_energies)
        activation_weights = K.reshape(activation_weights, (-1, max_len_head, max_len_desc))

        # for every head word compute weighted average of desc words
        desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1))
        return K.concatenate((desc_avg_word, head_words))
项目:sdp    作者:tansey
def neural_network(self, X):
    """pi, mu, sigma = NN(x; theta)"""
    X_image = tf.reshape(X, [-1,IMAGE_ROWS,IMAGE_COLS,1])
    conv1 = Convolution2D(32, 5, 5, border_mode='same', activation=K.relu, W_regularizer=l2(0.01),
                          input_shape=(IMAGE_ROWS, IMAGE_COLS, 1))(X_image)
    pool1 = MaxPooling2D(pool_size=(2,2), border_mode='same')(conv1)
    conv2 = Convolution2D(64, 5, 5, border_mode='same', activation=K.relu, W_regularizer=l2(0.01))(pool1)
    pool2 = MaxPooling2D(pool_size=(2,2), border_mode='same')(conv2)
    pool2_flat = tf.reshape(pool2, [-1, IMAGE_ROWS//4 * IMAGE_COLS//4 * 64])
    hidden1 = Dense(1024, W_regularizer=l2(0.01), activation=K.relu)(pool2_flat)
    hidden2 = Dense(64, W_regularizer=l2(0.01), activation=K.relu)(hidden1)
    self.mus = Dense(self.K)(hidden2)
    self.sigmas = Dense(self.K, activation=K.softplus)(hidden2)
    self.pi = Dense(self.K, activation=K.softmax)(hidden2)
项目:albemarle    作者:SeanTater
def step(self, x, states):
        h_tm1, c_tm1, y_tm1, B, U, H = states
        s =, self.W_h) + self.b_h
        s = K.repeat(s, self.input_length)
        energy = time_distributed_dense(s + H, self.W_a, self.b_a)
        energy = K.squeeze(energy, 2)
        alpha = K.softmax(energy)
        alpha = K.repeat(alpha, self.input_dim)
        alpha = K.permute_dimensions(alpha, (0, 2, 1))
        weighted_H = H * alpha
        v = K.sum(weighted_H, axis=1)
        y, new_states = super(AttentionDecoder, self).step(v, states[:-1])
        return y, new_states
项目:Keras-GAN    作者:Shaofanl
def register(self, info_tensor, param_tensor):
        self.info_tensor = info_tensor
        self.param_tensor = param_tensor 
        self.log_Q_c_given_x = \
            K.sum(K.log(K.softmax(param_tensor)+K.epsilon()) * info_tensor, axis=1) * self.lmbd
#       m = Sequential([ Activation('softmax', input_shape=(self.n,)), Lambda(lambda x: K.log(x), lambda x: x) ])
        return K.reshape(self.log_Q_c_given_x, (-1, 1))
项目:nea    作者:nusnlp
def call(self, x, mask=None):
        y =, self.att_W)
        if not self.activation:
            weights = K.theano.tensor.tensordot(self.att_v, y, axes=[0, 2])
        elif self.activation == 'tanh':
            weights = K.theano.tensor.tensordot(self.att_v, K.tanh(y), axes=[0, 2])
        weights = K.softmax(weights)
        out = x * K.permute_dimensions(K.repeat(weights, x.shape[2]), [0, 2, 1])
        if self.op == 'attsum':
            out = out.sum(axis=1)
        elif self.op == 'attmean':
            out = out.sum(axis=1) / mask.sum(axis=1, keepdims=True)
        return K.cast(out, K.floatx())
项目:mcv-m5    作者:david-vazquez
def call(self, x, mask=None):
        ch_idx = self.channel_index
        l_idx = K.ndim(x) - 1  # last index
        x = K.permute_dimensions(
            x, tuple(i for i in range(K.ndim(x)) if i != ch_idx) + (ch_idx,))
        sh = K.shape(x)
        x = K.reshape(x, (-1, sh[-1]))
        x = K.softmax(x)
        x = K.reshape(x, sh)
        x = K.permute_dimensions(
            x, tuple(range(ch_idx) + [l_idx] + range(ch_idx, l_idx)))
        return x

# Works TH and TF
项目:keras_zoo    作者:david-vazquez
def call(self, x, mask=None):
        ch_idx = self.channel_index
        l_idx = K.ndim(x) - 1  # last index
        x = K.permute_dimensions(
            x, tuple(i for i in range(K.ndim(x)) if i != ch_idx) + (ch_idx,))
        sh = K.shape(x)
        x = K.reshape(x, (-1, sh[-1]))
        x = K.softmax(x)
        x = K.reshape(x, sh)
        x = K.permute_dimensions(
            x, tuple(range(ch_idx) + [l_idx] + range(ch_idx, l_idx)))
        return x

# Works TH and TF
项目:ensemble-adv-training    作者:ftramer
def tf_test_error_rate(model, x, X_test, y_test):
    Compute test error.
    assert len(X_test) == len(y_test)

    # Predictions for the test set
    eval_prediction = K.softmax(model(x))

    predictions = batch_eval([x], [eval_prediction], [X_test])[0]

    return error_rate(predictions, y_test)
项目:tying-wv-and-wc    作者:icoxfog417
def perplexity(cls, y_true, y_pred):
        _y_pred = Activation("softmax")(y_pred)
        return super(AugmentedModel, cls).perplexity(y_true, _y_pred)
项目:VASC    作者:wang-research
def compute_softmax(logits,temp):
    z = logits + sampling_gumbel( K.shape(logits) )
    return K.softmax( z / temp )
项目:onto-lstm    作者:pdasigi
def call(self, x, mask=None):
        # x: (batch_size, input_length, input_dim) where input_length = head_size + 2
        head_encoding = x[:, :-2, :]  # (batch_size, head_size, input_dim)
        prep_encoding = x[:, -2, :]  # (batch_size, input_dim)
        child_encoding = x[:, -1, :]  # (batch_size, input_dim)
        if self.composition_type == 'HPCD':
            # TODO: The following line may not work with TF.
            # (batch_size, head_size, input_dim, 1) * (1, head_size, input_dim, proj_dim)
            head_proj_prod = K.expand_dims(head_encoding) * K.expand_dims(self.dist_proj_head, dim=0)
            head_projection = K.sum(head_proj_prod, axis=2)  # (batch_size, head_size, proj_dim)
            head_projection =, self.proj_head)  # (batch_size, head_size, proj_dim)
        prep_projection = K.expand_dims(, self.proj_prep), dim=1)  # (batch_size, 1, proj_dim)
        child_projection = K.expand_dims(, self.proj_child), dim=1)  # (batch_size, 1, proj_dim)
        #(batch_size, head_size, proj_dim)
        if self.composition_type == 'HPCT':
            composed_projection = K.tanh(head_projection + prep_projection + child_projection)
        elif self.composition_type == 'HPC' or self.composition_type == "HPCD":
            prep_child_projection = K.tanh(prep_projection + child_projection)  # (batch_size, 1, proj_dim)
            composed_projection = K.tanh(head_projection + prep_child_projection)
            # Composition type in HC
            composed_projection = K.tanh(head_projection + child_projection)
        for hidden_layer in self.hidden_layers:
            composed_projection = K.tanh(, hidden_layer))  # (batch_size, head_size, proj_dim)
        # (batch_size, head_size)
        head_word_scores = K.squeeze(, self.scorer), axis=-1)
        if mask is None:
            attachment_probabilities = K.softmax(head_word_scores)  # (batch_size, head_size)
            if K.ndim(mask) > 2:
                # This means this layer came after a Bidirectional layer. Keras has this bug which
                # concatenates input masks instead of output masks.
                # TODO: Fix Bidirectional instead.
                mask = K.any(mask, axis=(-2, -1))
            # We need to do a masked softmax.
            exp_scores = K.exp(head_word_scores)  # (batch_size, head_size)
            head_mask = mask[:, :-2]  # (batch_size, head_size)
            # (batch_size, head_size)
            masked_exp_scores = switch(head_mask, exp_scores, K.zeros_like(head_encoding[:, :, 0]))
            # (batch_size, 1). Adding epsilon to avoid divison by 0. But epsilon is float64.
            exp_sum = K.cast(K.expand_dims(K.sum(masked_exp_scores, axis=1) + K.epsilon()), 'float32')
            attachment_probabilities = masked_exp_scores / exp_sum  # (batch_size, head_size)
        return attachment_probabilities
项目:onto-lstm    作者:pdasigi    | 项目源码 | 文件源码
def call(self, x, mask=None):
        # x[0]: (batch_size, input_length, input_dim)
        # x[1]: (batch_size, 1) indices of prepositions
        # Optional: x[2]: (batch_size, input_length - 2)
        assert isinstance(x, list) or isinstance(x, tuple)
        encoded_sentence = x[0]
        prep_indices = K.squeeze(x[1], axis=-1)  #(batch_size,)
        batch_indices = K.arange(K.shape(encoded_sentence)[0])  # (batch_size,)
        if self.with_attachment_probs:
            # We're essentially doing K.argmax(x[2]) here, but argmax is not differentiable!
            head_probs = x[2]
            head_probs_padding = K.zeros_like(x[2])[:, :2]  # (batch_size, 2)
            # (batch_size, input_length)
            padded_head_probs = K.concatenate([head_probs, head_probs_padding])
            # (batch_size, 1)
            max_head_probs = K.expand_dims(K.max(padded_head_probs, axis=1))
            # (batch_size, input_length, 1)
            max_head_prob_indices = K.expand_dims(K.equal(padded_head_probs, max_head_probs))
            # (batch_size, input_length, input_dim)
            masked_head_encoding = K.switch(max_head_prob_indices, encoded_sentence, K.zeros_like(encoded_sentence))
            # (batch_size, input_dim)
            head_encoding = K.sum(masked_head_encoding, axis=1)
            head_indices = prep_indices - 1  # (batch_size,)
            head_encoding = encoded_sentence[batch_indices, head_indices, :]  # (batch_size, input_dim)
        prep_encoding = encoded_sentence[batch_indices, prep_indices, :]  # (batch_size, input_dim)
        child_encoding = encoded_sentence[batch_indices, prep_indices+1, :]  # (batch_size, input_dim)
        prep_indices = x[1]
        sentence_mask = mask[0]
        if sentence_mask is not None:
            if K.ndim(sentence_mask) > 2:
                # This means this layer came after a Bidirectional layer. Keras has this bug which
                # concatenates input masks instead of output masks.
                # TODO: Fix Bidirectional instead.
                sentence_mask = K.any(sentence_mask, axis=(-2, -1))
        head_encoding, prep_encoding, child_encoding = self.get_split_averages(encoded_sentence, sentence_mask,
        head_projection =, self.proj_head)  # (batch_size, proj_dim)
        prep_projection =, self.proj_prep)  # (batch_size, proj_dim)
        child_projection =, self.proj_child)  # (batch_size, proj_dim)
        #(batch_size, proj_dim)
        if self.composition_type == 'HPCT':
            composed_projection = K.tanh(head_projection + prep_projection + child_projection)
        elif self.composition_type == 'HPC':
            prep_child_projection = K.tanh(prep_projection + child_projection)  # (batch_size, proj_dim)
            composed_projection = K.tanh(head_projection + prep_child_projection)
            # Composition type in HC
            composed_projection = K.tanh(head_projection + child_projection)
        for hidden_layer in self.hidden_layers:
            composed_projection = K.tanh(, hidden_layer))  # (batch_size, proj_dim)
        # (batch_size, num_classes)
        class_scores =, self.scorer)
        label_probabilities = K.softmax(class_scores)
        return label_probabilities
项目:recurrent-attention-for-QA-SQUAD-based-on-keras    作者:wentaozhu
def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a =, self.Wa)
        eij =, K.shape(self.h)[1]) + self.ha), self.Va)
        eijs = K.squeeze(eij, -1)
        alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        cisum = K.sum(ci, axis=1)
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid(, self.Wzr) +, self.Uzr) +, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh(, self.W) +*h_tm1, self.U) +, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1]
        '''if self.consume_less == 'gpu':

            matrix_x = * B_W[0], self.W) + self.b
            matrix_inner = * B_U[0], self.U[:, :2 * self.units])

            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units: 2 * self.units]
            inner_z = matrix_inner[:, :self.units]
            inner_r = matrix_inner[:, self.units: 2 * self.units]

            z = self.inner_activation(x_z + inner_z)
            r = self.inner_activation(x_r + inner_r)

            x_h = matrix_x[:, 2 * self.units:]
            inner_h = * h_tm1 * B_U[0], self.U[:, 2 * self.units:])
            hh = self.activation(x_h + inner_h)
            if self.consume_less == 'cpu':
                x_z = x[:, :self.units]
                x_r = x[:, self.units: 2 * self.units]
                x_h = x[:, 2 * self.units:]
            elif self.consume_less == 'mem':
                x_z = * B_W[0], self.W_z) + self.b_z
                x_r = * B_W[1], self.W_r) + self.b_r
                x_h = * B_W[2], self.W_h) + self.b_h
                raise ValueError('Unknown `consume_less` mode.')
            z = self.inner_activation(x_z + * B_U[0], self.U_z))
            r = self.inner_activation(x_r + * B_U[1], self.U_r))

            hh = self.activation(x_h + * h_tm1 * B_U[2], self.U_h))
        h = z * h_tm1 + (1 - z) * hh
        return h, [h]'''
项目:recurrent-attention-for-QA-SQUAD-based-on-keras    作者:wentaozhu
def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a =, self.Wa)
        eij = + self.ha), self.Va)
        eijs = K.repeat_elements(eij, self.h_dim, axis=1)

        #alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        #cisum = K.sum(ci, axis=1)
        cisum = eijs*self.h
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid(, self.Wzr) +, self.Uzr) +, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh(, self.W) +*h_tm1, self.U) +, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1]
        '''if self.consume_less == 'gpu':

            matrix_x = * B_W[0], self.W) + self.b
            matrix_inner = * B_U[0], self.U[:, :2 * self.units])

            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units: 2 * self.units]
            inner_z = matrix_inner[:, :self.units]
            inner_r = matrix_inner[:, self.units: 2 * self.units]

            z = self.inner_activation(x_z + inner_z)
            r = self.inner_activation(x_r + inner_r)

            x_h = matrix_x[:, 2 * self.units:]
            inner_h = * h_tm1 * B_U[0], self.U[:, 2 * self.units:])
            hh = self.activation(x_h + inner_h)
            if self.consume_less == 'cpu':
                x_z = x[:, :self.units]
                x_r = x[:, self.units: 2 * self.units]
                x_h = x[:, 2 * self.units:]
            elif self.consume_less == 'mem':
                x_z = * B_W[0], self.W_z) + self.b_z
                x_r = * B_W[1], self.W_r) + self.b_r
                x_h = * B_W[2], self.W_h) + self.b_h
                raise ValueError('Unknown `consume_less` mode.')
            z = self.inner_activation(x_z + * B_U[0], self.U_z))
            r = self.inner_activation(x_r + * B_U[1], self.U_r))

            hh = self.activation(x_h + * h_tm1 * B_U[2], self.U_h))
        h = z * h_tm1 + (1 - z) * hh
        return h, [h]'''
项目:recurrent-attention-for-QA-SQUAD-based-on-keras    作者:wentaozhu
def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a =, self.Wa)
        eij = + self.ha), self.Va)
        eijs = K.repeat_elements(eij, self.h_dim, axis=1)

        #alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        #cisum = K.sum(ci, axis=1)
        cisum = eijs*self.h
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid(, self.Wzr) +, self.Uzr) +, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh(, self.W) +*h_tm1, self.U) +, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1]
        '''if self.consume_less == 'gpu':

            matrix_x = * B_W[0], self.W) + self.b
            matrix_inner = * B_U[0], self.U[:, :2 * self.units])

            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units: 2 * self.units]
            inner_z = matrix_inner[:, :self.units]
            inner_r = matrix_inner[:, self.units: 2 * self.units]

            z = self.inner_activation(x_z + inner_z)
            r = self.inner_activation(x_r + inner_r)

            x_h = matrix_x[:, 2 * self.units:]
            inner_h = * h_tm1 * B_U[0], self.U[:, 2 * self.units:])
            hh = self.activation(x_h + inner_h)
            if self.consume_less == 'cpu':
                x_z = x[:, :self.units]
                x_r = x[:, self.units: 2 * self.units]
                x_h = x[:, 2 * self.units:]
            elif self.consume_less == 'mem':
                x_z = * B_W[0], self.W_z) + self.b_z
                x_r = * B_W[1], self.W_r) + self.b_r
                x_h = * B_W[2], self.W_h) + self.b_h
                raise ValueError('Unknown `consume_less` mode.')
            z = self.inner_activation(x_z + * B_U[0], self.U_z))
            r = self.inner_activation(x_r + * B_U[1], self.U_r))

            hh = self.activation(x_h + * h_tm1 * B_U[2], self.U_h))
        h = z * h_tm1 + (1 - z) * hh
        return h, [h]'''
项目:DeepNews    作者:kabrapratik28
def create_model(self,):
        RNN model creation
        Layers include Embedding Layer, 3 LSTM stacked,
        Simple Context layer (manually defined),
        Time Distributed Layer
        length_vocab, embedding_size = self.word2vec.shape
        print ("shape of word2vec matrix ", self.word2vec.shape)

        model = Sequential()

        # TODO: look at mask zero flag
                        length_vocab, embedding_size,
                        weights=[self.word2vec], mask_zero=True,

        for i in range(rnn_layers):
            lstm = LSTM(rnn_size, return_sequences=True,
                name='lstm_layer_%d' % (i + 1)

            # No drop out added !

                     mask=lambda inputs, mask: mask[:, max_len_desc:],

        vocab_size = self.word2vec.shape[0]

        model.add(Activation('softmax', name='activation_layer'))

        model.compile(loss='categorical_crossentropy', optimizer='adam')
        K.set_value(, np.float32(learning_rate))
        print (model.summary())
        return model
项目:ntm_keras    作者:flomlo
def _split_and_apply_activations(self, controller_output):
        """ This takes the controller output, splits it in ntm_output, read and wright adressing data.
            It returns a triple of ntm_output, controller_instructions_read, controller_instructions_write.
            ntm_output is a tensor, controller_instructions_read and controller_instructions_write are lists containing
            the adressing instruction (k, beta, g, shift, gamma) and in case of write also the writing constructions,
            consisting of an erase and an add vector. 

            As it is necesseary for stable results,
            k and add_vector is activated via tanh, erase_vector via sigmoid (this is critical!),
            shift via softmax,
            gamma is sigmoided, inversed and clipped (probably not ideal)
            g is sigmoided,
            beta is linear (probably not ideal!) """

        # splitting
        ntm_output, controller_instructions_read, controller_instructions_write = tf.split(
                                self.read_heads * self.controller_read_head_emitting_dim,
                                self.write_heads * self.controller_write_head_emitting_dim]),

        controller_instructions_read = tf.split(controller_instructions_read, self.read_heads, axis=1)
        controller_instructions_write = tf.split(controller_instructions_write, self.write_heads, axis=1)

        controller_instructions_read = [
                tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1]), axis=1) for 
                single_head_data in controller_instructions_read]

        controller_instructions_write = [
                tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1, self.m_depth, self.m_depth]), axis=1) for 
                single_head_data in controller_instructions_write]

        ntm_output = self.activation(ntm_output)
        controller_instructions_read = [(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma)) for
                (k, beta, g, shift, gamma) in controller_instructions_read]
        controller_instructions_write = [
                (tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma), hard_sigmoid(erase_vector), tanh(add_vector))  for 
                (k, beta, g, shift, gamma, erase_vector, add_vector) in controller_instructions_write]

        return (ntm_output, controller_instructions_read, controller_instructions_write)
项目:albemarle    作者:SeanTater
def __init__(self, output_dim, hidden_dim, output_length, depth=1,bidirectional=True, dropout=0.1, **kwargs):
        if bidirectional and hidden_dim % 2 != 0:
            raise Exception ("hidden_dim for AttentionSeq2seq should be even (Because of bidirectional RNN).")
        super(AttentionSeq2seq, self).__init__()
        if type(depth) not in [list, tuple]:
            depth = (depth, depth)
        if 'batch_input_shape' in kwargs:
            shape = kwargs['batch_input_shape']
            del kwargs['batch_input_shape']
        elif 'input_shape' in kwargs:
            shape = (None,) + tuple(kwargs['input_shape'])
            del kwargs['input_shape']
        elif 'input_dim' in kwargs:
            if 'input_length' in kwargs:
                input_length = kwargs['input_length']
                input_length = None
            shape = (None, input_length, kwargs['input_dim'])
            del kwargs['input_dim']
        if bidirectional:
            self.add(Bidirectional(LSTMEncoder(output_dim=int(hidden_dim / 2), state_input=False, return_sequences=True, **kwargs)))
            self.add(LSTMEncoder(output_dim=hidden_dim, state_input=False, return_sequences=True, **kwargs))
        for i in range(0, depth[0] - 1):
            if bidirectional:
                self.add(Bidirectional(LSTMEncoder(output_dim=int(hidden_dim / 2), state_input=False, return_sequences=True, **kwargs)))
                self.add(LSTMEncoder(output_dim=hidden_dim, state_input=False, return_sequences=True, **kwargs))
        encoder = self.layers[-1]
        self.add(TimeDistributed(Dense(hidden_dim if depth[1] > 1 else output_dim)))
        decoder = AttentionDecoder(hidden_dim=hidden_dim, output_length=output_length, state_input=False, **kwargs)
        for i in range(0, depth[1] - 1):
            self.add(LSTMEncoder(output_dim=hidden_dim, state_input=False, return_sequences=True, **kwargs))
        self.add(TimeDistributed(Dense(output_dim, activation='softmax')))
        self.encoder = encoder
        self.decoder = decoder
项目:DIL    作者:FoxRow
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_xy, box_wh, box_confidence, box_class_probs