Python keras.backend 模块，sum() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用keras.backend.sum()。

项目：onto-lstm 作者：pdasigi | 项目源码 | 文件源码

def get_initial_states(self, onto_nse_input, input_mask=None):
        input_to_read = onto_nse_input  # (batch_size, num_words, num_senses, num_hyps, output_dim + 1)
        memory_input = input_to_read[:, :, :, :, :-1]  # (bs, words, senses, hyps, output_dim)
        if input_mask is None:
            mem_0 = K.mean(memory_input, axis=(2, 3))  # (batch_size, num_words, output_dim)
        else:
            memory_mask = input_mask
            if K.ndim(onto_nse_input) != K.ndim(input_mask):
                memory_mask = K.expand_dims(input_mask)
            memory_mask = K.cast(memory_mask / (K.sum(memory_mask) + K.epsilon()), 'float32')
            mem_0 = K.sum(memory_input * memory_mask, axis=(2,3))  # (batch_size, num_words, output_dim)
        flattened_mem_0 = K.batch_flatten(mem_0)
        initial_states = self.reader.get_initial_states(input_to_read)
        initial_states += [flattened_mem_0]
        return initial_states

项目：onto-lstm 作者：pdasigi | 项目源码 | 文件源码

def call(self, x, mask=None):
        mean = super(IntraAttention, self).call(x, mask)
        # x: (batch_size, input_length, input_dim)
        # mean: (batch_size, input_dim)
        ones = K.expand_dims(K.mean(K.ones_like(x), axis=(0, 2)), dim=0)  # (1, input_length)
        # (batch_size, input_length, input_dim)
        tiled_mean = K.permute_dimensions(K.dot(K.expand_dims(mean), ones), (0, 2, 1))
        if mask is not None:
            if K.ndim(mask) > K.ndim(x):
                # Assuming this is because of the bug in Bidirectional. Temporary fix follows.
                # TODO: Fix Bidirectional.
                mask = K.any(mask, axis=(-2, -1))
            if K.ndim(mask) < K.ndim(x):
                mask = K.expand_dims(mask)
            x = switch(mask, x, K.zeros_like(x))
        # (batch_size, input_length, proj_dim)
        projected_combination = K.tanh(K.dot(x, self.vector_projector) + K.dot(tiled_mean, self.mean_projector))
        scores = K.dot(projected_combination, self.scorer)  # (batch_size, input_length)
        weights = K.softmax(scores)  # (batch_size, input_length)
        attended_x = K.sum(K.expand_dims(weights) * x, axis=1)  # (batch_size, input_dim)
        return attended_x

项目：keras-fractalnet 作者：snf | 项目源码 | 文件源码

def _drop_path(self, inputs):
        count = len(inputs)
        drops = K.switch(
            self.is_global,
            self._gen_global_path(count),
            self._gen_local_drops(count, self.p)
        )
        ave = K.zeros(shape=self.average_shape)
        for i in range(0, count):
            ave += inputs[i] * drops[i]
        sum = K.sum(drops)
        # Check that the sum is not 0 (global droppath can make it
        # 0) to avoid divByZero
        ave = K.switch(
            K.not_equal(sum, 0.),
            ave/sum,
            ave)
        return ave

项目：Deep-Learning-with-Keras 作者：PacktPublishing | 项目源码 | 文件源码

def call(self, x, mask=None):
        if K.image_dim_ordering == "th":
            _, f, r, c = self.shape
        else:
            _, r, c, f = self.shape
        half_n = self.n // 2
        squared = K.square(x)
        pooled = K.pool2d(squared, (half_n, half_n), strides=(1, 1),
                         padding="same", pool_mode="avg")
        if K.image_dim_ordering == "th":
            summed = K.sum(pooled, axis=1, keepdims=True)
            averaged = (self.alpha / self.n) * K.repeat_elements(summed, f, axis=1)
        else:
            summed = K.sum(pooled, axis=3, keepdims=True)
            averaged = (self.alpha / self.n) * K.repeat_elements(summed, f, axis=3)
        denom = K.pow(self.k + averaged, self.beta)
        return x / denom

项目：segmentation_DLMI 作者：imatge-upc | 项目源码 | 文件源码

def categorical_crossentropy_3d(y_true, y_predicted):
    """
    Computes categorical cross-entropy loss for a softmax distribution in a hot-encoded 3D array
    with shape (num_samples, num_classes, dim1, dim2, dim3)

    Parameters
    ----------
    y_true : keras.placeholder [batches, dim0,dim1,dim2]
        Placeholder for data holding the ground-truth labels encoded in a one-hot representation
    y_predicted : keras.placeholder [batches,channels,dim0,dim1,dim2]
        Placeholder for data holding the softmax distribution over classes

    Returns
    -------
    scalar
        Categorical cross-entropy loss value
    """
    y_true_flatten = K.flatten(y_true)
    y_pred_flatten = K.flatten(y_predicted)
    y_pred_flatten_log = -K.log(y_pred_flatten + K.epsilon())
    num_total_elements = K.sum(y_true_flatten)
    # cross_entropy = K.dot(y_true_flatten, K.transpose(y_pred_flatten_log))
    cross_entropy = tf.reduce_sum(tf.multiply(y_true_flatten, y_pred_flatten_log))
    mean_cross_entropy = cross_entropy / (num_total_elements + K.epsilon())
    return mean_cross_entropy

项目：segmentation_DLMI 作者：imatge-upc | 项目源码 | 文件源码

def elementwise_softmax_3d(matrix):
    """
    Computes element-wise softmax for 3D arrays (volumes), that is, for a matrix with shape
    (num_samples, dim1, dim2, dim3, num_classes)

    Parameters
    ----------
    matrix : keras.placeholder
        Placeholder for the 3D array whose softmax distribution we want to compute

    Returns
    -------
    keras.placeholder
        Placeholder for a 3D array with the softmax distribution for all classes with shape
        (num_samples, dim1, dim2, dim3, num_classes)

    """
    expon = lambda x: K.exp(x)
    expon_matrix = expon(matrix)
    softmax_matrix = expon_matrix / K.sum(expon_matrix, axis=4, keepdims=True)
    return softmax_matrix

项目：segmentation_DLMI 作者：imatge-upc | 项目源码 | 文件源码

def dice(y_true, y_pred):
    """
    Computes the Sorensen-Dice metric
                    TP
        Dice = 2 -------
                  T + P
    Parameters
    ----------
    y_true : keras.placeholder
        Placeholder that contains the ground truth labels of the classes
    y_pred : keras.placeholder
        Placeholder that contains the class prediction

    Returns
    -------
    scalar
        Dice metric
    """
    y_pred_decision = tf.floor((y_pred + K.epsilon()) / K.max(y_pred, axis=4, keepdims=True))

    y_sum = K.sum(y_true * y_pred_decision)

    return (2. * y_sum + K.epsilon()) / (K.sum(y_true) + K.sum(y_pred_decision) + K.epsilon())

项目：recurrent-attention-for-QA-SQUAD-based-on-keras 作者：wentaozhu | 项目源码 | 文件源码

def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a = K.dot(h_tm1, self.Wa)
        eij = K.dot(K.tanh(h_tm1a + K.dot(inputs[:, :self.h_dim], self.Ua)), self.Va)
        eijs = K.repeat_elements(eij, self.h_dim, axis=1)

        #alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        #cisum = K.sum(ci, axis=1)
        cisum = eijs*inputs[:, :self.h_dim]
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid(K.dot(inputs[:, self.h_dim:], self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh(K.dot(inputs[:, self.h_dim:], self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1]

项目：keras-utilities 作者：cbaziotis | 项目源码 | 文件源码

def call(self, x, mask=None):
        eij = dot_product(x, self.W)

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ? to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

项目：keras-utilities 作者：cbaziotis | 项目源码 | 文件源码

def call(self, x, mask=None):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)
        ait = K.dot(uit, self.u)

        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ? to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

项目：BiMPM_keras 作者：ijinmao | 项目源码 | 文件源码

def _mean_attentive_vectors(self, x2, cosine_matrix):
        """Mean attentive vectors.

        Calculate mean attentive vector for the entire sentence by weighted
        summing all the contextual embeddings of the entire sentence

        # Arguments
            x2: sequence vectors, (batch_size, x2_timesteps, embedding_size)
            cosine_matrix: cosine similarities matrix of x1 and x2,
                           (batch_size, x1_timesteps, x2_timesteps)

        # Output shape
            (batch_size, x1_timesteps, embedding_size)
        """
        # (batch_size, x1_timesteps, x2_timesteps, 1)
        expanded_cosine_matrix = K.expand_dims(cosine_matrix, axis=-1)
        # (batch_size, 1, x2_timesteps, embedding_size)
        x2 = K.expand_dims(x2, axis=1)
        # (batch_size, x1_timesteps, embedding_size)
        weighted_sum = K.sum(expanded_cosine_matrix * x2, axis=2)
        # (batch_size, x1_timesteps, 1)
        sum_cosine = K.expand_dims(K.sum(cosine_matrix, axis=-1) + self.epsilon, axis=-1)
        # (batch_size, x1_timesteps, embedding_size)
        attentive_vector = weighted_sum / sum_cosine
        return attentive_vector

项目：KATE 作者：hugochan | 项目源码 | 文件源码

def vae_loss(self, x, x_decoded_mean):
        xent_loss =  K.sum(K.binary_crossentropy(x_decoded_mean, x), axis=-1)
        kl_loss = - 0.5 * K.sum(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis=-1)

        return xent_loss + kl_loss

    # def weighted_vae_loss(self, feature_weights):
    #     def loss(y_true, y_pred):
    #         try:
    #             x = K.binary_crossentropy(y_pred, y_true)
    #             y = tf.Variable(feature_weights.astype('float32'))
    #             # y2 = y_true / K.sum(y_true)
    #             # import pdb;pdb.set_trace()
    #             xent_loss = K.dot(x, y)
    #             kl_loss = - 0.5 * K.sum(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis=-1)
    #         except Exception as e:
    #             print e
    #             import pdb;pdb.set_trace()
    #         return xent_loss + kl_loss
    #     return loss

项目：AerialCrackDetection_Keras 作者：TTMRonald | 项目源码 | 文件源码

def rpn_loss_regr(num_anchors):
    def rpn_loss_regr_fixed_num(y_true, y_pred):
        if K.image_dim_ordering() == 'th':
            x = y_true[:, 4 * num_anchors:, :, :] - y_pred
            x_abs = K.abs(x)
            x_bool = K.less_equal(x_abs, 1.0)
            return lambda_rpn_regr * K.sum(
                y_true[:, :4 * num_anchors, :, :] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :4 * num_anchors, :, :])
        else:
            x = y_true[:, :, :, 4 * num_anchors:] - y_pred
            x_abs = K.abs(x)
            x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32)

            return lambda_rpn_regr * K.sum(
                y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors])

    return rpn_loss_regr_fixed_num

项目：deep-mil-for-whole-mammogram-classification 作者：wentaozhu | 项目源码 | 文件源码

def call(self, x,mask=None):
        import theano.tensor as T
        newx = T.sort(x)
        #response = K.reverse(newx, axes=1)
        #response = K.sum(x> 0.5, axis=1) / self.k
        return newx
        #response = K.reshape(newx,[-1,1])
        #return K.concatenate([1-response, response], axis=self.label)
        #response = K.reshape(x[:,self.axis], (-1,1))
        #return K.concatenate([1-response, response], axis=self.axis)
        #e = K.exp(x - K.max(x, axis=self.axis, keepdims=True))
        #s = K.sum(e, axis=self.axis, keepdims=True)
        #return e / s

项目：deep-mil-for-whole-mammogram-classification 作者：wentaozhu | 项目源码 | 文件源码

def call(self, x,mask=None):
        newx = K.sort(x)
        #response = K.reverse(newx, axes=1)
        #response = K.sum(x> 0.5, axis=1) / self.k
        return K.concatenate([newx[:,:self.softmink], newx[:,newx.shape[1]-self.softmaxk:]], axis=-1)
        #response = K.reshape(newx,[-1,1])
        #return K.concatenate([1-response, response], axis=self.label)
        #response = K.reshape(x[:,self.axis], (-1,1))
        #return K.concatenate([1-response, response], axis=self.axis)
        #e = K.exp(x - K.max(x, axis=self.axis, keepdims=True))
        #s = K.sum(e, axis=self.axis, keepdims=True)
        #return e / s

项目：deep-mil-for-whole-mammogram-classification 作者：wentaozhu | 项目源码 | 文件源码

def call(self, x,mask=None):
        import theano.tensor as T
        newx = T.sort(x)
        #response = K.reverse(newx, axes=1)
        #response = K.sum(x> 0.5, axis=1) / self.k
        return newx
        #response = K.reshape(newx,[-1,1])
        #return K.concatenate([1-response, response], axis=self.label)
        #response = K.reshape(x[:,self.axis], (-1,1))
        #return K.concatenate([1-response, response], axis=self.axis)
        #e = K.exp(x - K.max(x, axis=self.axis, keepdims=True))
        #s = K.sum(e, axis=self.axis, keepdims=True)
        #return e / s

项目：deep-mil-for-whole-mammogram-classification 作者：wentaozhu | 项目源码 | 文件源码

def call(self, x,mask=None):
        newx = K.sort(x)
        #response = K.reverse(newx, axes=1)
        #response = K.sum(x> 0.5, axis=1) / self.k
        return K.concatenate([newx[:,:self.softmink], newx[:,newx.shape[1]-self.softmaxk:]], axis=-1)
        #response = K.reshape(newx,[-1,1])
        #return K.concatenate([1-response, response], axis=self.label)
        #response = K.reshape(x[:,self.axis], (-1,1))
        #return K.concatenate([1-response, response], axis=self.axis)
        #e = K.exp(x - K.max(x, axis=self.axis, keepdims=True))
        #s = K.sum(e, axis=self.axis, keepdims=True)
        #return e / s

项目：keras-frcnn 作者：yhenon | 项目源码 | 文件源码

def rpn_loss_regr(num_anchors):
    def rpn_loss_regr_fixed_num(y_true, y_pred):
        if K.image_dim_ordering() == 'th':
            x = y_true[:, 4 * num_anchors:, :, :] - y_pred
            x_abs = K.abs(x)
            x_bool = K.less_equal(x_abs, 1.0)
            return lambda_rpn_regr * K.sum(
                y_true[:, :4 * num_anchors, :, :] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :4 * num_anchors, :, :])
        else:
            x = y_true[:, :, :, 4 * num_anchors:] - y_pred
            x_abs = K.abs(x)
            x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32)

            return lambda_rpn_regr * K.sum(
                y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors])

    return rpn_loss_regr_fixed_num

项目：latplan 作者：guicho271828 | 项目源码 | 文件源码

def to_configs(states, verbose=True, **kwargs):
    base = setting['base']
    width  = states.shape[1] // base
    height = states.shape[1] // base
    load(width,height)

    def build():
        P = len(setting['panels'])
        states = Input(shape=(height*base,width*base))
        error = build_error(states, height, width, base)

        matches = 1 - K.clip(K.sign(error - threshold),0,1)
        # a, h, w, panel
        matches = K.reshape(matches, [K.shape(states)[0], height * width, -1])
        # a, pos, panel
        matches = K.permute_dimensions(matches, [0,2,1])
        # a, panel, pos
        config = matches * K.arange(height*width,dtype='float')
        config = K.sum(config, axis=-1)
        return Model(states, wrap(states, config))

    model = build()
    return model.predict(states, **kwargs)

项目：latplan 作者：guicho271828 | 项目源码 | 文件源码

def to_configs(states, verbose=True, **kwargs):
    base = panels.shape[1]
    dim  = states.shape[1] - pad*2
    size = dim // base

    def build():
        states = Input(shape=(dim+2*pad,dim+2*pad))
        s = tensor_swirl(states, radius=dim+2*pad * relative_swirl_radius, **unswirl_args)
        error = build_errors(s,base,pad,dim,size)
        matches = 1 - K.clip(K.sign(error - threshold),0,1)
        # a, h, w, panel
        matches = K.reshape(matches, [K.shape(states)[0], size * size, -1])
        # a, pos, panel
        config = matches * K.arange(2,dtype='float')
        config = K.sum(config, axis=-1)
        # this is 0,1 configs; for compatibility, we need -1 and 1
        config = - (config - 0.5)*2
        return Model(states, wrap(states, K.round(config)))

    return build().predict(states, **kwargs)

项目：keras-text 作者：raghakot | 项目源码 | 文件源码

def call(self, x, mask=None):
        # x: [..., time_steps, features]
        # ut = [..., time_steps, attention_dims]
        ut = K.dot(x, self.kernel)
        if self.use_bias:
            ut = K.bias_add(ut, self.bias)

        ut = K.tanh(ut)
        if self.use_context:
            ut = ut * self.context_kernel

        # Collapse `attention_dims` to 1. This indicates the weight for each time_step.
        ut = K.sum(ut, axis=-1, keepdims=True)

        # Convert those weights into a distribution but along time axis.
        # i.e., sum of alphas along `time_steps` axis should be 1.
        self.at = _softmax(ut, dim=1)
        if mask is not None:
            self.at *= K.cast(K.expand_dims(mask, -1), K.floatx())

        # Weighted sum along `time_steps` axis.
        return K.sum(x * self.at, axis=-2)

项目：keratin 作者：uw-biomedical-ml | 项目源码 | 文件源码

def dice(y_true, y_pred, smooth=1.0):
    """
    The Dice coefficient, defined as ::

        \frac{2 |X \intersect Y|}{|X| + |Y|}

    Parameters
    ----------
    y_true, y_pred : tensors
        The predicted and binary classification in an image

    """
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return ((2. * intersection + smooth) /
            (K.sum(y_true_f) + K.sum(y_pred_f) + smooth))

项目：reactionrnn 作者：minimaxir | 项目源码 | 文件源码

def reactionrnn_model(weights_path, num_classes, maxlen=140):
    '''
    Builds the model architecture for textgenrnn and
    loads the pretrained weights for the model.
    '''

    input = Input(shape=(maxlen,), name='input')
    embedded = Embedding(num_classes, 100, input_length=maxlen,
                         name='embedding')(input)
    rnn = GRU(256, return_sequences=False, name='rnn')(embedded)
    output = Dense(5, name='output',
                   activation=lambda x: K.relu(x) / K.sum(K.relu(x),
                                                          axis=-1))(rnn)

    model = Model(inputs=[input], outputs=[output])
    model.load_weights(weights_path, by_name=True)
    model.compile(loss='mse', optimizer='nadam')
    return model

项目：State-Frequency-Memory-stock-prediction 作者：z331565360 | 项目源码 | 文件源码

def get_initial_states(self, x):

        init_state_h = K.zeros_like(x)
        init_state_h = K.sum(init_state_h, axis = 1)
        reducer_s = K.zeros((self.input_dim, self.hidden_dim))
        reducer_f = K.zeros((self.hidden_dim, self.freq_dim))
        reducer_p = K.zeros((self.hidden_dim, self.output_dim))
        init_state_h = K.dot(init_state_h, reducer_s)

        init_state_p = K.dot(init_state_h, reducer_p)

        init_state = K.zeros_like(init_state_h)
        init_freq = K.dot(init_state_h, reducer_f)

        init_state = K.reshape(init_state, (-1, self.hidden_dim, 1))
        init_freq = K.reshape(init_freq, (-1, 1, self.freq_dim))

        init_state_S_re = init_state * init_freq
        init_state_S_im = init_state * init_freq

        init_state_time = K.cast_to_floatx(0.)

        initial_states = [init_state_p, init_state_h, init_state_S_re, init_state_S_im, init_state_time]
        return initial_states

项目：State-Frequency-Memory-stock-prediction 作者：z331565360 | 项目源码 | 文件源码

def get_initial_states(self, x):

        init_state_h = K.zeros_like(x)
        init_state_h = K.sum(init_state_h, axis = 1)
        reducer_s = K.zeros((self.input_dim, self.hidden_dim))
        reducer_f = K.zeros((self.hidden_dim, self.freq_dim))
        reducer_p = K.zeros((self.hidden_dim, self.output_dim))
        init_state_h = K.dot(init_state_h, reducer_s)

        init_state_p = K.dot(init_state_h, reducer_p)

        init_state = K.zeros_like(init_state_h)
        init_freq = K.dot(init_state_h, reducer_f)

        init_state = K.reshape(init_state, (-1, self.hidden_dim, 1))
        init_freq = K.reshape(init_freq, (-1, 1, self.freq_dim))

        init_state_S_re = init_state * init_freq
        init_state_S_im = init_state * init_freq

        init_state_time = K.cast_to_floatx(0.)

        initial_states = [init_state_p, init_state_h, init_state_S_re, init_state_S_im, init_state_time]
        return initial_states

项目：Cat-Segmentation 作者：ardamavi | 项目源码 | 文件源码

def dice_coef(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

项目：onto-lstm 作者：pdasigi | 项目源码 | 文件源码

def get_split_averages(input_tensor, input_mask, indices):
        # Splits input tensor into three parts based on the indices and
        # returns average of values prior to index, values at the index and
        # average of values after the index.
        # input_tensor: (batch_size, input_length, input_dim)
        # input_mask: (batch_size, input_length)
        # indices: (batch_size, 1)
        # (1, input_length)
        length_range = K.expand_dims(K.arange(K.shape(input_tensor)[1]), dim=0)
        # (batch_size, input_length)
        batched_range = K.repeat_elements(length_range, K.shape(input_tensor)[0], 0)
        tiled_indices = K.repeat_elements(indices, K.shape(input_tensor)[1], 1)  # (batch_size, input_length)
        greater_mask = K.greater(batched_range, tiled_indices)  # (batch_size, input_length)
        lesser_mask = K.lesser(batched_range, tiled_indices)  # (batch_size, input_length)
        equal_mask = K.equal(batched_range, tiled_indices)  # (batch_size, input_length)

        # We also need to mask these masks using the input mask.
        # (batch_size, input_length)
        if input_mask is not None:
            greater_mask = switch(input_mask, greater_mask, K.zeros_like(greater_mask))
            lesser_mask = switch(input_mask, lesser_mask, K.zeros_like(lesser_mask))

        post_sum = K.sum(switch(K.expand_dims(greater_mask), input_tensor, K.zeros_like(input_tensor)), axis=1)  # (batch_size, input_dim)
        pre_sum = K.sum(switch(K.expand_dims(lesser_mask), input_tensor, K.zeros_like(input_tensor)), axis=1)  # (batch_size, input_dim)
        values_at_indices = K.sum(switch(K.expand_dims(equal_mask), input_tensor, K.zeros_like(input_tensor)), axis=1)  # (batch_size, input_dim)

        post_normalizer = K.expand_dims(K.sum(greater_mask, axis=1) + K.epsilon(), dim=1)  # (batch_size, 1)
        pre_normalizer = K.expand_dims(K.sum(lesser_mask, axis=1) + K.epsilon(), dim=1)  # (batch_size, 1)

        return K.cast(pre_sum / pre_normalizer, 'float32'), values_at_indices, K.cast(post_sum / post_normalizer, 'float32')

项目：onto-lstm 作者：pdasigi | 项目源码 | 文件源码

def summarize_memory(o_t, mem_tm1):
        '''
        This method selects the relevant parts of the memory given the read output and summarizes the
        memory. Implements Equations 2-3 or 8-11 in the paper.
        '''
        # Selecting relevant memory slots, Equation 2
        z_t = K.softmax(K.sum(K.expand_dims(o_t, dim=1) * mem_tm1, axis=2))  # (batch_size, input_length)
        # Summarizing memory, Equation 3
        m_rt = K.sum(K.expand_dims(z_t, dim=2) * mem_tm1, axis=1)  # (batch_size, output_dim)
        return z_t, m_rt