Python keras.backend 模块,squeeze() 实例源码

我们从Python开源项目中,提取了以下43个代码示例,用于说明如何使用keras.backend.squeeze()

项目:keras-utilities    作者:cbaziotis    | 项目源码 | 文件源码
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights

    Returns:

    """
    if K.backend() == 'tensorflow':
        # todo: check that this is correct
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel)
项目:nn_playground    作者:DingKe    | 项目源码 | 文件源码
def call(self, x):
        # input shape: (nb_samples, time (padded with zeros), input_dim)
        # note that the .build() method of subclasses MUST define
        # self.input_spec with a complete input shape.
        input_shape = self.input_spec[0].shape

        if self.window_size > 1:
            x = K.temporal_padding(x, (self.window_size-1, 0))
        x = K.expand_dims(x, 2)  # add a dummy dimension

        # z, g
        output = K.conv2d(x, self.kernel, strides=self.strides,
                          padding='valid',
                          data_format='channels_last')
        output = K.squeeze(output, 2)  # remove the dummy dimension
        if self.use_bias:
            output = K.bias_add(output, self.bias, data_format='channels_last')
        z  = output[:, :, :self.output_dim]
        g = output[:, :, self.output_dim:]

        return self.activation(z) * K.sigmoid(g)
项目:nn_playground    作者:DingKe    | 项目源码 | 文件源码
def preprocess_input(self, inputs, training=None):
        if self.window_size > 1:
            inputs = K.temporal_padding(inputs, (self.window_size-1, 0))
        inputs = K.expand_dims(inputs, 2)  # add a dummy dimension

        output = K.conv2d(inputs, self.kernel, strides=self.strides,
                          padding='valid',
                          data_format='channels_last')
        output = K.squeeze(output, 2)  # remove the dummy dimension
        if self.use_bias:
            output = K.bias_add(output, self.bias, data_format='channels_last')

        if self.dropout is not None and 0. < self.dropout < 1.:
            z = output[:, :, :self.units]
            f = output[:, :, self.units:2 * self.units]
            o = output[:, :, 2 * self.units:]
            f = K.in_train_phase(1 - _dropout(1 - f, self.dropout), f, training=training)
            return K.concatenate([z, f, o], -1)
        else:
            return output
项目:latplan    作者:guicho271828    | 项目源码 | 文件源码
def generate_gpu(configs,**kwargs):
    configs = np.array(configs)
    import math
    size = int(math.sqrt(len(configs[0])))
    base = panels.shape[1]
    dim = base*size

    def build():
        P = 2
        configs = Input(shape=(size*size,))
        _configs = 1 - K.round((configs/2)+0.5) # from -1/1 to 1/0
        configs_one_hot = K.one_hot(K.cast(_configs,'int32'), P)
        configs_one_hot = K.reshape(configs_one_hot, [-1,P])
        _panels = K.variable(panels)
        _panels = K.reshape(_panels, [P, base*base])
        states = tf.matmul(configs_one_hot, _panels)
        states = K.reshape(states, [-1, size, size, base, base])
        states = K.permute_dimensions(states, [0, 1, 3, 2, 4])
        states = K.reshape(states, [-1, size*base, size*base, 1])
        states = K.spatial_2d_padding(states, padding=((pad,pad),(pad,pad)))
        states = K.squeeze(states, -1)
        return Model(configs, wrap(configs, states))

    return preprocess(batch_swirl(build().predict(configs,**kwargs)))
项目:latplan    作者:guicho271828    | 项目源码 | 文件源码
def generate_gpu2(configs,**kwargs):
    configs = np.array(configs)
    import math
    size = int(math.sqrt(len(configs[0])))
    base = panels.shape[1]
    dim = base*size

    def build():
        P = 2
        configs = Input(shape=(size*size,))
        _configs = 1 - K.round((configs/2)+0.5) # from -1/1 to 1/0
        configs_one_hot = K.one_hot(K.cast(_configs,'int32'), P)
        configs_one_hot = K.reshape(configs_one_hot, [-1,P])
        _panels = K.variable(panels)
        _panels = K.reshape(_panels, [P, base*base])
        states = tf.matmul(configs_one_hot, _panels)
        states = K.reshape(states, [-1, size, size, base, base])
        states = K.permute_dimensions(states, [0, 1, 3, 2, 4])
        states = K.reshape(states, [-1, size*base, size*base, 1])
        states = K.spatial_2d_padding(states, padding=((pad,pad),(pad,pad)))
        states = K.squeeze(states, -1)
        states = tensor_swirl(states, radius=dim+2*pad * relative_swirl_radius, **swirl_args)
        return Model(configs, wrap(configs, states))

    return preprocess(build().predict(configs,**kwargs))
项目:emnlp2017-bilstm-cnn-crf    作者:UKPLab    | 项目源码 | 文件源码
def _backward(gamma, mask):
    '''Backward recurrence of the linear chain crf.'''
    gamma = K.cast(gamma, 'int32')

    def _backward_step(gamma_t, states):
        y_tm1 = K.squeeze(states[0], 0)
        y_t = batch_gather(gamma_t, y_tm1)
        return y_t, [K.expand_dims(y_t, 0)]

    initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)]
    _, y_rev, _ = K.rnn(_backward_step,
                        gamma,
                        initial_states,
                        go_backwards=True)
    y = K.reverse(y_rev, 1)

    if mask is not None:
        mask = K.cast(mask, dtype='int32')
        # mask output
        y *= mask
        # set masked values to -1
        y += -(1 - mask)
    return y
项目:SGAITagger    作者:zhiweiuu    | 项目源码 | 文件源码
def _backward(gamma, mask):
    '''Backward recurrence of the linear chain crf.'''
    gamma = K.cast(gamma, 'int32')

    def _backward_step(gamma_t, states):
        y_tm1 = K.squeeze(states[0], 0)
        y_t = KC.batch_gather(gamma_t, y_tm1)
        return y_t, [K.expand_dims(y_t, 0)]

    initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)]
    _, y_rev, _ = K.rnn(_backward_step,
                        gamma,
                        initial_states,
                        go_backwards=True)
    y = K.reverse(y_rev, 1)

    if mask is not None:
        mask = K.cast(mask, dtype='int32')
        # mask output
        y *= mask
        # set masked values to -1
        y += -(1 - mask)
    return y
项目:rl    作者:Shmuma    | 项目源码 | 文件源码
def make_model(state_shape, n_actions):
    in_t = Input(shape=(HISTORY_STEPS,) + state_shape, name='input')
    action_t = Input(shape=(1,), dtype='int32', name='action')
    advantage_t = Input(shape=(1,), name='advantage')

    fl_t = Flatten(name='flat')(in_t)
    l1_t = Dense(SIMPLE_L1_SIZE, activation='relu', name='l1')(fl_t)
    l2_t = Dense(SIMPLE_L2_SIZE, activation='relu', name='l2')(l1_t)
    policy_t = Dense(n_actions, name='policy', activation='softmax')(l2_t)

    def loss_func(args):
        p_t, act_t, adv_t = args
        oh_t = K.one_hot(act_t, n_actions)
        oh_t = K.squeeze(oh_t, 1)
        p_oh_t = K.log(1e-6 + K.sum(oh_t * p_t, axis=-1, keepdims=True))
        res_t = adv_t * p_oh_t
        return -res_t

    loss_t = Lambda(loss_func, output_shape=(1,), name='loss')([policy_t, action_t, advantage_t])

    return Model(input=[in_t, action_t, advantage_t], output=[policy_t, loss_t])
项目:AdaptationSeg    作者:YangZhang4065    | 项目源码 | 文件源码
def layout_loss_hard(y_true,y_pred):

    y_pred=K.clip(y_pred,-50.,50.)#prevent overflow
    exp_pred=K.exp(y_pred-K.max(y_pred,axis=1,keepdims=True))
    y_pred_softmax=exp_pred/K.sum(exp_pred,axis=1,keepdims=True)

    max_pred_softmax=K.max(y_pred_softmax,axis=1,keepdims=True)
    bin_pred_softmax_a=y_pred_softmax/max_pred_softmax
    bin_pred_softmax=bin_pred_softmax_a**6.

    final_pred=K.mean(bin_pred_softmax,axis=[2,3])
    final_pred=final_pred/(K.sum(final_pred,axis=1,keepdims=True)+K.epsilon())
    y_true_s=K.squeeze(y_true,axis=3)
    y_true_s=K.squeeze(y_true_s,axis=2)
    tier_wise_loss_v=-K.clip(K.log(final_pred),-500,500)*y_true_s
    return K.mean(K.sum(tier_wise_loss_v,axis=1))


#compile
项目:devise-keras    作者:priyamtejaswin    | 项目源码 | 文件源码
def custom_for_keras(self, ALL_word_embeds):
        ## only the top 20 rows from word_vectors is legit!
        def top_accuracy(true_word_indices, image_vectors):
            l2 = lambda x, axis: K.sqrt(K.sum(K.square(x), axis=axis, keepdims=True))
            l2norm = lambda x, axis: x/l2(x, axis)

            l2_words = l2norm(ALL_word_embeds, axis=1)
            l2_images = l2norm(image_vectors, axis=1)

            tiled_words = K.tile(K.expand_dims(l2_words, axis=1) , (1, 200, 1))
            tiled_images = K.tile(K.expand_dims(l2_images, axis=1), (1, 20, 1))

            diff = K.squeeze(l2(l2_words - l2_images, axis=2))

            # slice_top3 = lambda x: x[:, 0:3]
            # slice_top1 = lambda x: x[:, 0:1]

            diff_top5 = metrics.top_k_categorical_accuracy(tiled_images, diff)
            return diff_top5

        return top_accuracy
项目:anago    作者:Hironsan    | 项目源码 | 文件源码
def _backward(gamma, mask):
    """Backward recurrence of the linear chain crf."""
    gamma = K.cast(gamma, 'int32')

    def _backward_step(gamma_t, states):
        y_tm1 = K.squeeze(states[0], 0)
        y_t = batch_gather(gamma_t, y_tm1)
        return y_t, [K.expand_dims(y_t, 0)]

    initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)]
    _, y_rev, _ = K.rnn(_backward_step,
                        gamma,
                        initial_states,
                        go_backwards=True)
    y = K.reverse(y_rev, 1)

    if mask is not None:
        mask = K.cast(mask, dtype='int32')
        # mask output
        y *= mask
        # set masked values to -1
        y += -(1 - mask)
    return y
项目:pose-regression    作者:dluvizon    | 项目源码 | 文件源码
def lin_interpolation_2d(inp, dim):

    num_rows, num_cols, num_filters = K.int_shape(inp)[1:]
    conv = SeparableConv2D(num_filters, (num_rows, num_cols), use_bias=False)
    x = conv(inp)

    w = conv.get_weights()
    w[0].fill(0)
    w[1].fill(0)
    linspace = linspace_2d(num_rows, num_cols, dim=dim)

    for i in range(num_filters):
        w[0][:,:, i, 0] = linspace[:,:]
        w[1][0, 0, i, i] = 1.

    conv.set_weights(w)
    conv.trainable = False

    x = Lambda(lambda x: K.squeeze(x, axis=1))(x)
    x = Lambda(lambda x: K.squeeze(x, axis=1))(x)
    x = Lambda(lambda x: K.expand_dims(x, axis=-1))(x)

    return x
项目:tying-wv-and-wc    作者:icoxfog417    | 项目源码 | 文件源码
def augmented_loss(self, y_true, y_pred):
        _y_pred = Activation("softmax")(y_pred)
        loss = K.categorical_crossentropy(_y_pred, y_true)

        # y is (batch x seq x vocab)
        y_indexes = K.argmax(y_true, axis=2)  # turn one hot to index. (batch x seq)
        y_vectors = self.embedding(y_indexes)  # lookup the vector (batch x seq x vector_length)

        #v_length = self.setting.vector_length
        #y_vectors = K.reshape(y_vectors, (-1, v_length))
        #y_t = K.map_fn(lambda v: K.dot(self.embedding.embeddings, K.reshape(v, (-1, 1))), y_vectors)
        #y_t = K.squeeze(y_t, axis=2)  # unknown but necessary operation
        #y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size))

        # vector x embedding dot products (batch x seq x vocab)
        y_t = tf.tensordot(y_vectors, K.transpose(self.embedding.embeddings), 1)
        y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size))  # explicitly set shape
        y_t = K.softmax(y_t / self.temperature)
        _y_pred_t = Activation("softmax")(y_pred / self.temperature)
        aug_loss = kullback_leibler_divergence(y_t, _y_pred_t)
        loss += (self.gamma * self.temperature) * aug_loss
        return loss
项目:pointer-networks-experiments    作者:zygmuntz    | 项目源码 | 文件源码
def step(self, x_input, states):
        #print "x_input:", x_input, x_input.shape
        # <TensorType(float32, matrix)>

        input_shape = self.input_spec[0].shape
        en_seq = states[-1]
        _, [h, c] = super(PointerLSTM, self).step(x_input, states[:-1])

        # vt*tanh(W1*e+W2*d)
        dec_seq = K.repeat(h, input_shape[1])
        Eij = time_distributed_dense(en_seq, self.W1, output_dim=1)
        Dij = time_distributed_dense(dec_seq, self.W2, output_dim=1)
        U = self.vt * tanh(Eij + Dij)
        U = K.squeeze(U, 2)

        # make probability tensor
        pointer = softmax(U)
        return pointer, [h, c]
项目:deeppavlov    作者:deepmipt    | 项目源码 | 文件源码
def weight_and_reduce(self, inputs):
        """Define a function for a lambda layer of a model."""

        inp, inp_cont = inputs
        reduced = K.batch_dot(inp_cont,
                              K.permute_dimensions(inp, (0,2,1)), axes=[1,2])
        return K.squeeze(reduced, 1)
项目:deeppavlov    作者:deepmipt    | 项目源码 | 文件源码
def terminal_f(self, inp):
        val = np.concatenate((np.zeros((self.max_sequence_length-1,1)), np.ones((1,1))), axis=0)
        kcon = K.constant(value=val, dtype='float32')
        inp = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(inp)
        last_state = Lambda(lambda x: K.permute_dimensions(K.dot(x, kcon), (0,2,1)))(inp)
        return K.squeeze(last_state, 1)
项目:deeppavlov    作者:deepmipt    | 项目源码 | 文件源码
def terminal_b(self, inp):
        val = np.concatenate((np.ones((1,1)), np.zeros((self.max_sequence_length-1,1))), axis=0)
        kcon = K.constant(value=val, dtype='float32')
        inp = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(inp)
        last_state = Lambda(lambda x: K.permute_dimensions(K.dot(x, kcon), (0,2,1)))(inp)
        return K.squeeze(last_state, 1)
项目:emnlp2017-bilstm-cnn-crf    作者:UKPLab    | 项目源码 | 文件源码
def sparse_loss(self, y_true, y_pred):
        '''Linear Chain Conditional Random Field loss function with sparse
        tag sequences.
        '''
        y_true = K.cast(y_true, 'int32')
        y_true = K.squeeze(y_true, 2)
        mask = self._fetch_mask()
        return sparse_chain_crf_loss(y_true, y_pred, self.U, self.b_start, self.b_end, mask)
项目:MMdnn    作者:Microsoft    | 项目源码 | 文件源码
def _layer_LRN(self):
        self.add_body(0, '''
from keras.layers.core import Layer
class LRN(Layer):

    def __init__(self, size=5, alpha=0.0005, beta=0.75, k=2, **kwargs):
        self.n = size
        self.alpha = alpha
        self.beta = beta
        self.k = k
        super(LRN, self).__init__(**kwargs)

    def build(self, input_shape):
        self.shape = input_shape
        super(LRN, self).build(input_shape)

    def call(self, x, mask=None):
        half_n = self.n - 1
        squared = K.square(x)
        scale = self.k
        norm_alpha = self.alpha / (2 * half_n + 1)
        if K.image_dim_ordering() == "th":
            b, f, r, c = self.shape
            squared = K.expand_dims(squared, 0)
            squared = K.spatial_3d_padding(squared, padding=((half_n, half_n), (0, 0), (0,0)))
            squared = K.squeeze(squared, 0)
            for i in range(half_n*2+1):
                scale += norm_alpha * squared[:, i:i+f, :, :]
        else:
            b, r, c, f = self.shape
            squared = K.expand_dims(squared, -1)
            squared = K.spatial_3d_padding(squared, padding=((0, 0), (0,0), (half_n, half_n)))
            squared = K.squeeze(squared, -1)
            for i in range(half_n*2+1):
                scale += norm_alpha * squared[:, :, :, i:i+f]

        scale = K.pow(scale, self.beta)
        return x / scale

    def compute_output_shape(self, input_shape):
        return input_shape''')
项目:pepnet    作者:hammerlab    | 项目源码 | 文件源码
def compute_mask(self, inputs, mask=None):
        """Computes an output mask tensor.

        # Arguments
            inputs: Tensor or list of tensors.
            mask: Tensor or list of tensors.

        # Returns
            None or a tensor (or list of tensors,
                one per output tensor of the layer).
        """
        if mask is None:
            return None
        # dimensions of mask should be (batch_size, time_steps)
        assert mask.ndim == 2
        # add a dummy dimension so that the shape is now
        # (batch_size, time_steps, 1)
        mask = K.expand_dims(mask, 2)
        # now add a fake 2nd spatial dimension
        # (batch_size, time_steps, 1, 1)
        mask = K.expand_dims(mask, 3)
        strides = self.strides + (1,)
        pool_size = self.pool_size + (1,)
        mask = K.pool2d(
            mask,
            pool_size=pool_size,
            strides=strides,
            padding=self.padding,
            data_format="channels_last",
            pool_mode='max')
        # get rid of dummy dimensions
        mask = K.squeeze(mask, 3)
        mask = K.squeeze(mask, 2)
        return mask
项目:kfs    作者:the-moliver    | 项目源码 | 文件源码
def call(self, x, mask=None):
        x = K.permute_dimensions(x, (0, 2, 1))
        x = K.reshape(x, (-1, self.input_length))
        x = K.expand_dims(x, 1)
        x = K.expand_dims(x, -1)
        if self.real_filts is not None:
            conv_out_r = K.conv2d(x, self.W_r, strides=self.subsample,
                                  border_mode=self.border_mode,
                                  dim_ordering='th')
        else:
            conv_out_r = x

        if self.complex_filts is not None:
            conv_out_c1 = K.conv2d(x, self.W_c1, strides=self.subsample,
                                   border_mode=self.border_mode,
                                   dim_ordering='th')
            conv_out_c2 = K.conv2d(x, self.W_c2, strides=self.subsample,
                                   border_mode=self.border_mode,
                                   dim_ordering='th')
            conv_out_c = K.sqrt(K.square(conv_out_c1) + K.square(conv_out_c2) + K.epsilon())
            output = K.concatenate((conv_out_r, conv_out_c), axis=1)
        else:
            output = conv_out_r

        output_shape = self.get_output_shape_for((None, self.input_length, self.input_dim))
        output = K.squeeze(output, 3)  # remove the dummy 3rd dimension
        output = K.permute_dimensions(output, (2, 1, 0))
        output = K.reshape(output, (-1, output_shape[1], output.shape[1]*output.shape[2]))
        return output
项目:SGAITagger    作者:zhiweiuu    | 项目源码 | 文件源码
def sparse_loss(self, y_true, y_pred):
        '''Linear Chain Conditional Random Field loss function with sparse
        tag sequences.
        '''
        y_true = K.cast(y_true, 'int32')
        y_true = K.squeeze(y_true, 2)
        mask = self._fetch_mask()
        return sparse_chain_crf_loss(y_true, y_pred, self.U, self.b_start, self.b_end, mask)
项目:rl    作者:Shmuma    | 项目源码 | 文件源码
def create_policy_loss(policy_t, value_t, n_actions):
    """
    Policy loss 
    :param policy_t: policy tensor from prediction part
    :param value_t: value tensor from prediction part
    :param n_actions: count of actions in space
    :param entropy_beta: entropy loss scaling factor
    :return: action_t, advantage_t, policy_loss_t
    """
    action_t = Input(batch_shape=(None, 1), name='action', dtype='int32')
    reward_t = Input(batch_shape=(None, 1), name="reward")

    def policy_loss_func(args):
        p_t, v_t, act_t, rew_t = args
        log_p_t = tf.nn.log_softmax(p_t)
        oh_t = K.one_hot(act_t, n_actions)
        oh_t = K.squeeze(oh_t, 1)
        p_oh_t = K.sum(log_p_t * oh_t, axis=-1, keepdims=True)
        adv_t = (rew_t - K.stop_gradient(v_t))
        tf.summary.scalar("advantage_mean", K.mean(adv_t))
        tf.summary.scalar("advantage_rms", K.sqrt(K.mean(K.square(adv_t))))

        res_t = -adv_t * p_oh_t
        tf.summary.scalar("loss_policy_mean", K.mean(res_t))
        tf.summary.scalar("loss_policy_rms", K.sqrt(K.mean(K.square(res_t))))
        return res_t

    loss_args = [policy_t, value_t, action_t, reward_t]
    policy_loss_t = Lambda(policy_loss_func, output_shape=(1,), name='policy_loss')(loss_args)

    tf.summary.scalar("value_mean", K.mean(value_t))
    tf.summary.scalar("reward_mean", K.mean(reward_t))

    return action_t, reward_t, policy_loss_t
项目:foolbox    作者:bethgelab    | 项目源码 | 文件源码
def predictions_and_gradient(self, image, label):
        predictions, gradient = self._pred_grad_fn([
            self._process_input(image[np.newaxis]),
            np.array([label])])
        predictions = np.squeeze(predictions, axis=0)
        gradient = np.squeeze(gradient, axis=0)
        gradient = self._process_gradient(gradient)
        assert predictions.shape == (self.num_classes(),)
        assert gradient.shape == image.shape
        return predictions, gradient
项目:foolbox    作者:bethgelab    | 项目源码 | 文件源码
def backward(self, gradient, image):
        assert gradient.ndim == 1
        gradient = np.reshape(gradient, (-1, 1))
        gradient = self._bw_grad_fn([
            gradient,
            self._process_input(image[np.newaxis]),
        ])
        gradient = np.squeeze(gradient, axis=0)
        gradient = self._process_gradient(gradient)
        assert gradient.shape == image.shape
        return gradient
项目:AdaptationSeg    作者:YangZhang4065    | 项目源码 | 文件源码
def binarize_label(batch_seg):
    label_tensor_to_return=np.zeros((batch_seg.shape[0],class_number,image_size[0],image_size[1]),dtype=np.bool)
    count=0
    for i in range(batch_seg.shape[0]):
        label=np.squeeze(batch_seg[i,:,:])
        label_return=np.zeros((class_number,label.shape[0],label.shape[1]),dtype=np.bool)
        it = np.nditer(label, flags=['multi_index'])
        while not it.finished:
            if np.asscalar(it[0]) <= 12 or np.asscalar(it[0]) ==15 or np.asscalar(it[0]) ==17 or np.asscalar(it[0]) ==19 or np.asscalar(it[0]) ==21:
                label_return[it[0],it.multi_index[0],it.multi_index[1]]=True
            it.iternext()
        label_return = label_return[np.newaxis, ...]
        label_tensor_to_return[count,:,:,:]=label_return
        count+=1
    return label_tensor_to_return
项目:AdaptationSeg    作者:YangZhang4065    | 项目源码 | 文件源码
def binarize_SP(batch_seg):
    max_dim=np.amax(batch_seg)
    label_tensor_to_return=np.zeros((batch_seg.shape[0],max_dim,image_size[0],image_size[1]),dtype=np.bool)
    count=0
    for i in range(batch_seg.shape[0]):
        label=np.squeeze(batch_seg[i,:,:])
        label_return=np.zeros((max_dim,label.shape[0],label.shape[1]),dtype=np.bool)
        it = np.nditer(label, flags=['multi_index'])
        while not it.finished:
            label_return[it[0]-1,it.multi_index[0],it.multi_index[1]]=True
            it.iternext()
        label_return = label_return[np.newaxis, ...]
        label_tensor_to_return[count,:,:,:]=label_return
        count+=1
    return label_tensor_to_return
项目:albemarle    作者:SeanTater    | 项目源码 | 文件源码
def step(self, x, states):
        h_tm1, c_tm1, y_tm1, B, U, H = states
        s = K.dot(c_tm1, self.W_h) + self.b_h
        s = K.repeat(s, self.input_length)
        energy = time_distributed_dense(s + H, self.W_a, self.b_a)
        energy = K.squeeze(energy, 2)
        alpha = K.softmax(energy)
        alpha = K.repeat(alpha, self.input_dim)
        alpha = K.permute_dimensions(alpha, (0, 2, 1))
        weighted_H = H * alpha
        v = K.sum(weighted_H, axis=1)
        y, new_states = super(AttentionDecoder, self).step(v, states[:-1])
        return y, new_states
项目:anago    作者:Hironsan    | 项目源码 | 文件源码
def sparse_loss(self, y_true, y_pred):
        """Linear Chain Conditional Random Field loss function with sparse
        tag sequences.
        """
        y_true = K.cast(y_true, 'int32')
        y_true = K.squeeze(y_true, 2)
        mask = self._fetch_mask()
        return sparse_chain_crf_loss(y_true, y_pred, self.U, self.b_start, self.b_end, mask)
项目:neural-segmentation    作者:melsner    | 项目源码 | 文件源码
def masked_categorical_crossentropy(y_true, y_pred):
    mask = K.cast(K.expand_dims(K.any(y_true, -1), axis=-1), 'float32')
    y_pred *= mask
    y_pred += 1-mask
    y_pred += 1-mask
    losses = K.categorical_crossentropy(y_pred, y_true)
    losses *= K.squeeze(mask, -1)
    ## Normalize by number of real segments, using a small non-zero denominator in cases of padding characters
    ## in order to avoid division by zero
    #losses /= (K.mean(mask) + (1e-10*(1-K.mean(mask))))
    return losses
项目:neural-segmentation    作者:melsner    | 项目源码 | 文件源码
def masked_categorical_accuracy(y_true, y_pred):
    mask = K.cast(K.expand_dims(K.greater(K.argmax(y_true, axis=-1), 0), axis=-1), 'float32')
    accuracy = K.cast(K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)), 'float32')
    accuracy *= K.squeeze(mask, -1)
    ## Normalize by number of real segments, using a small non-zero denominator in cases of padding characters
    ## in order to avoid division by zero
    #accuracy /= (K.mean(mask) + (1e-10*(1-K.mean(mask))))
    return accuracy
项目:Named-Entity-Recognition    作者:vishal1796    | 项目源码 | 文件源码
def viterbi_decoding(self, X, mask=None):
        input_energy = self.activation(K.dot(X, self.kernel) + self.bias)
        if self.use_boundary:
            input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary)

        argmin_tables = self.recursion(input_energy, mask, return_logZ=False)
        argmin_tables = K.cast(argmin_tables, 'int32')

        # backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same
        argmin_tables = K.reverse(argmin_tables, 1)
        initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])]  # matrix instead of vector is required by tf `K.rnn`
        if K.backend() == 'theano':
            initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)]

        def gather_each_row(params, indices):
            n = K.shape(indices)[0]
            if K.backend() == 'theano':
                return params[K.T.arange(n), indices]
            else:
                indices = K.transpose(K.stack([K.tf.range(n), indices]))
                return K.tf.gather_nd(params, indices)

        def find_path(argmin_table, best_idx):
            next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0])
            next_best_idx = K.expand_dims(next_best_idx)
            if K.backend() == 'theano':
                next_best_idx = K.T.unbroadcast(next_best_idx, 1)
            return next_best_idx, [next_best_idx]

        _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll)
        best_paths = K.reverse(best_paths, 1)
        best_paths = K.squeeze(best_paths, 2)

        return K.one_hot(best_paths, self.units)
项目:Neural_Artistic_Style_Transfer    作者:giuseppebonaccorso    | 项目源码 | 文件源码
def gramian(filters):
        c_filters = K.batch_flatten(K.permute_dimensions(K.squeeze(filters, axis=0), pattern=(2, 0, 1)))
        return K.dot(c_filters, K.transpose(c_filters))
项目:CIAN    作者:yanghanxy    | 项目源码 | 文件源码
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel)
项目:NeuralSentenceOrdering    作者:FudanNLP    | 项目源码 | 文件源码
def get_output(self, train=False):
        X = train
        X = K.expand_dims(X, -1)  # add a dimension of the right
        X = K.permute_dimensions(X, (0, 2, 1, 3))
        conv_out = K.conv2d(X, self.W, strides=self.subsample,
                            border_mode=self.border_mode,
                            dim_ordering='th')

        output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1))
        output = self.activation(output)
        output = K.squeeze(output, 3)  # remove the dummy 3rd dimension
        output = K.permute_dimensions(output, (0, 2, 1))
        return output
项目:ikelos    作者:braingineer    | 项目源码 | 文件源码
def call(self, x, mask=None):
        energy = K.squeeze(self.layer(x), 2)
        p_matrix = softmax(energy)
        if mask is not None:
            mask = self.squash_mask(mask)
            p_matrix = make_safe(p_matrix * mask) # remove unwanted items
            p_matrix = p_matrix / K.sum(p_matrix, axis=-1, keepdims=True) # renormalize
        return make_safe(p_matrix)
项目:onto-lstm    作者:pdasigi    | 项目源码 | 文件源码
def call(self, x, mask=None):
        # x: (batch_size, input_length, input_dim) where input_length = head_size + 2
        head_encoding = x[:, :-2, :]  # (batch_size, head_size, input_dim)
        prep_encoding = x[:, -2, :]  # (batch_size, input_dim)
        child_encoding = x[:, -1, :]  # (batch_size, input_dim)
        if self.composition_type == 'HPCD':
            # TODO: The following line may not work with TF.
            # (batch_size, head_size, input_dim, 1) * (1, head_size, input_dim, proj_dim)
            head_proj_prod = K.expand_dims(head_encoding) * K.expand_dims(self.dist_proj_head, dim=0)
            head_projection = K.sum(head_proj_prod, axis=2)  # (batch_size, head_size, proj_dim)
        else:
            head_projection = K.dot(head_encoding, self.proj_head)  # (batch_size, head_size, proj_dim)
        prep_projection = K.expand_dims(K.dot(prep_encoding, self.proj_prep), dim=1)  # (batch_size, 1, proj_dim)
        child_projection = K.expand_dims(K.dot(child_encoding, self.proj_child), dim=1)  # (batch_size, 1, proj_dim)
        #(batch_size, head_size, proj_dim)
        if self.composition_type == 'HPCT':
            composed_projection = K.tanh(head_projection + prep_projection + child_projection)
        elif self.composition_type == 'HPC' or self.composition_type == "HPCD":
            prep_child_projection = K.tanh(prep_projection + child_projection)  # (batch_size, 1, proj_dim)
            composed_projection = K.tanh(head_projection + prep_child_projection)
        else:
            # Composition type in HC
            composed_projection = K.tanh(head_projection + child_projection)
        for hidden_layer in self.hidden_layers:
            composed_projection = K.tanh(K.dot(composed_projection, hidden_layer))  # (batch_size, head_size, proj_dim)
        # (batch_size, head_size)
        head_word_scores = K.squeeze(K.dot(composed_projection, self.scorer), axis=-1)
        if mask is None:
            attachment_probabilities = K.softmax(head_word_scores)  # (batch_size, head_size)
        else:
            if K.ndim(mask) > 2:
                # This means this layer came after a Bidirectional layer. Keras has this bug which
                # concatenates input masks instead of output masks.
                # TODO: Fix Bidirectional instead.
                mask = K.any(mask, axis=(-2, -1))
            # We need to do a masked softmax.
            exp_scores = K.exp(head_word_scores)  # (batch_size, head_size)
            head_mask = mask[:, :-2]  # (batch_size, head_size)
            # (batch_size, head_size)
            masked_exp_scores = switch(head_mask, exp_scores, K.zeros_like(head_encoding[:, :, 0]))
            # (batch_size, 1). Adding epsilon to avoid divison by 0. But epsilon is float64.
            exp_sum = K.cast(K.expand_dims(K.sum(masked_exp_scores, axis=1) + K.epsilon()), 'float32')
            attachment_probabilities = masked_exp_scores / exp_sum  # (batch_size, head_size)
        return attachment_probabilities
项目:onto-lstm    作者:pdasigi    | 项目源码 | 文件源码
def call(self, x, mask=None):
        # x[0]: (batch_size, input_length, input_dim)
        # x[1]: (batch_size, 1) indices of prepositions
        # Optional: x[2]: (batch_size, input_length - 2)
        assert isinstance(x, list) or isinstance(x, tuple)
        encoded_sentence = x[0]
        prep_indices = K.squeeze(x[1], axis=-1)  #(batch_size,)
        batch_indices = K.arange(K.shape(encoded_sentence)[0])  # (batch_size,)
        if self.with_attachment_probs:
            # We're essentially doing K.argmax(x[2]) here, but argmax is not differentiable!
            head_probs = x[2]
            head_probs_padding = K.zeros_like(x[2])[:, :2]  # (batch_size, 2)
            # (batch_size, input_length)
            padded_head_probs = K.concatenate([head_probs, head_probs_padding])
            # (batch_size, 1)
            max_head_probs = K.expand_dims(K.max(padded_head_probs, axis=1))
            # (batch_size, input_length, 1)
            max_head_prob_indices = K.expand_dims(K.equal(padded_head_probs, max_head_probs))
            # (batch_size, input_length, input_dim)
            masked_head_encoding = K.switch(max_head_prob_indices, encoded_sentence, K.zeros_like(encoded_sentence))
            # (batch_size, input_dim)
            head_encoding = K.sum(masked_head_encoding, axis=1)
        else:
            head_indices = prep_indices - 1  # (batch_size,)
            head_encoding = encoded_sentence[batch_indices, head_indices, :]  # (batch_size, input_dim)
        prep_encoding = encoded_sentence[batch_indices, prep_indices, :]  # (batch_size, input_dim)
        child_encoding = encoded_sentence[batch_indices, prep_indices+1, :]  # (batch_size, input_dim)
        '''
        prep_indices = x[1]
        sentence_mask = mask[0]
        if sentence_mask is not None:
            if K.ndim(sentence_mask) > 2:
                # This means this layer came after a Bidirectional layer. Keras has this bug which
                # concatenates input masks instead of output masks.
                # TODO: Fix Bidirectional instead.
                sentence_mask = K.any(sentence_mask, axis=(-2, -1))
        head_encoding, prep_encoding, child_encoding = self.get_split_averages(encoded_sentence, sentence_mask,
                                                                               prep_indices)
        '''
        head_projection = K.dot(head_encoding, self.proj_head)  # (batch_size, proj_dim)
        prep_projection = K.dot(prep_encoding, self.proj_prep)  # (batch_size, proj_dim)
        child_projection = K.dot(child_encoding, self.proj_child)  # (batch_size, proj_dim)
        #(batch_size, proj_dim)
        if self.composition_type == 'HPCT':
            composed_projection = K.tanh(head_projection + prep_projection + child_projection)
        elif self.composition_type == 'HPC':
            prep_child_projection = K.tanh(prep_projection + child_projection)  # (batch_size, proj_dim)
            composed_projection = K.tanh(head_projection + prep_child_projection)
        else:
            # Composition type in HC
            composed_projection = K.tanh(head_projection + child_projection)
        for hidden_layer in self.hidden_layers:
            composed_projection = K.tanh(K.dot(composed_projection, hidden_layer))  # (batch_size, proj_dim)
        # (batch_size, num_classes)
        class_scores = K.dot(composed_projection, self.scorer)
        label_probabilities = K.softmax(class_scores)
        return label_probabilities
项目:recurrent-attention-for-QA-SQUAD-based-on-keras    作者:wentaozhu    | 项目源码 | 文件源码
def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a = K.dot(h_tm1, self.Wa)
        eij = K.dot(K.tanh(K.repeat(h_tm1a, K.shape(self.h)[1]) + self.ha), self.Va)
        eijs = K.squeeze(eij, -1)
        alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        cisum = K.sum(ci, axis=1)
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid(K.dot(inputs, self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh(K.dot(inputs, self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1]
        '''if self.consume_less == 'gpu':

            matrix_x = K.dot(x * B_W[0], self.W) + self.b
            matrix_inner = K.dot(h_tm1 * B_U[0], self.U[:, :2 * self.units])

            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units: 2 * self.units]
            inner_z = matrix_inner[:, :self.units]
            inner_r = matrix_inner[:, self.units: 2 * self.units]

            z = self.inner_activation(x_z + inner_z)
            r = self.inner_activation(x_r + inner_r)

            x_h = matrix_x[:, 2 * self.units:]
            inner_h = K.dot(r * h_tm1 * B_U[0], self.U[:, 2 * self.units:])
            hh = self.activation(x_h + inner_h)
        else:
            if self.consume_less == 'cpu':
                x_z = x[:, :self.units]
                x_r = x[:, self.units: 2 * self.units]
                x_h = x[:, 2 * self.units:]
            elif self.consume_less == 'mem':
                x_z = K.dot(x * B_W[0], self.W_z) + self.b_z
                x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
                x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
            else:
                raise ValueError('Unknown `consume_less` mode.')
            z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
            r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))

            hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], self.U_h))
        h = z * h_tm1 + (1 - z) * hh
        return h, [h]'''
项目:kfs    作者:the-moliver    | 项目源码 | 文件源码
def call(self, x, mask=None):
        ndim = K.ndim(x)
        xshape = K.shape(x)
        W = self.kernel_activation(self.kernel)

        if self.filter_axes == self.sum_axes:
            ax1 = [a-1 for a in self.sum_axes]
            ax1 = ax1 + list(set(range(ndim)) - set(ax1))
            ax2 = list(set(range(ndim)) - set(self.sum_axes))
            permute_dims = list(range(len(ax2)))
            permute_dims.insert(self.sum_axes[0], len(ax2))
            outdims = [-1] + [xshape[a] for a in ax2[1:]] + [self.filters_complex + self.filters_simple]
            ax2 = ax2 + self.sum_axes
            W = K.permute_dimensions(W, ax1)
            W = K.reshape(W, (-1, 2 * self.filters_complex + self.filters_simple))
            x = K.permute_dimensions(x, ax2)
            x = K.reshape(x, (-1, K.shape(W)[0]))
            output = K.dot(x, W)
            output_complex = K.sqrt(K.square(output[:, :self.filters_complex]) + K.square(output[:, self.filters_complex:2*self.filters_complex]) + K.epsilon())
            output_simple = output[:, 2*self.filters_complex:]
            output = K.reshape(K.concatenate([output_complex, output_simple], axis=1), outdims)
            if self.use_bias:
                b_broadcast = [i for j, i in enumerate(self.bias_broadcast) if j not in self.sum_axes]
                b = K.squeeze(self.bias, self.sum_axes[0])
                if len(self.sum_axes) > 1:
                    b = K.squeeze(b, self.sum_axes[1] - 1)
                if len(self.sum_axes) > 2:
                    b = K.squeeze(b, self.sum_axes[2] - 2)
                if K.backend() == 'theano':
                    output += K.pattern_broadcast(b, b_broadcast)
                else:
                    output += b
            output = K.permute_dimensions(output, permute_dims)

        else:
            # bcast = list(np.where(self.broadcast)[0])
            permute_dims = list(range(ndim + 1))
            permute_dims[self.sum_axes[0]] = ndim
            permute_dims[ndim] = self.sum_axes[0]

            if K.backend() == 'theano':
                output = K.sum(x[..., None] * K.pattern_broadcast(W, self.kernel_broadcast), axis=self.sum_axes, keepdims=True)
            else:
                output = K.sum(x[..., None] * W, axis=self.sum_axes, keepdims=True)

            output_complex = K.sqrt(K.square(output[..., :self.filters_complex]) + K.square(output[..., self.filters_complex:2*self.filters_complex]) + K.epsilon())
            output_simple = output[..., 2*self.filters_complex:]
            output = K.concatenate([output_complex, output_simple], axis=-1)

            if self.use_bias:
                if K.backend() == 'theano':
                    output += K.pattern_broadcast(self.bias, self.bias_broadcast)
                else:
                    output += self.bias
            output = K.squeeze(K.permute_dimensions(output, permute_dims), ndim)
            if len(self.sum_axes) > 1:
                output = K.squeeze(output, self.sum_axes[1])

        return self.activation(output)
项目:keras-attention    作者:datalogue    | 项目源码 | 文件源码
def step(self, x, states):

        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        _stm = K.repeat(stm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r)
            + K.dot(stm, self.U_r)
            + K.dot(context, self.C_r)
            + self.b_r)

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z)
            + K.dot(stm, self.U_z)
            + K.dot(context, self.C_z)
            + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p)
            + K.dot((rt * stm), self.U_p)
            + K.dot(context, self.C_p)
            + self.b_p)

        # new hidden state:
        st = (1-zt)*stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o)
            + K.dot(stm, self.U_o)
            + K.dot(context, self.C_o)
            + self.b_o)

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st]
项目:neural-segmentation    作者:melsner    | 项目源码 | 文件源码
def plot(self, Xs, Xs_mask, Y, logdir, prefix, iteration, batch_size=128):
        ## Initialize plotting objects
        fig = plt.figure()
        fig.set_size_inches(10, 10)
        ax_input = fig.add_subplot(311)
        ax_targ = fig.add_subplot(312)
        ax_pred = fig.add_subplot(313)

        inputs_raw = Xs
        masks_raw = Xs_mask
        preds_raw = self.predict(inputs_raw,
                                 masks_raw,
                                 batch_size)

        if inputs_raw.shape[-1] == 1 and self.charDim:
            inputs_raw = oneHot(inputs_raw, self.charDim)
        targs_raw = np.expand_dims(Y, -1)

        for u in range(len(Xs)):
            ## Set up plotting canvas
            fig.patch.set_visible(False)
            fig.suptitle('Utterance %d, Checkpoint %d' % (u, iteration))

            ## Plot inputs (heatmap)
            inputs = inputs_raw[u]
            inputs = inputs[np.where(1 - masks_raw[u])]
            inputs = np.swapaxes(inputs, 0, 1)
            ax_input.clear()
            ax_input.axis('off')
            ax_input.set_title('Input', loc='left')
            hm_input = ax_input.pcolor(inputs, cmap=plt.cm.Blues)

            ## Plot targets (bar chart)
            targs = targs_raw[u]
            targs = targs[np.where(1 - masks_raw[u])]
            ax_targ.clear()
            ax_targ.axis('off')
            ax_targ.set_title('Target', loc='left')
            ax_targ.set_ylim([0, 1])
            ax_targ.margins(0)
            hm_targ = ax_targ.bar(np.arange(len(targs)), targs)

            ## Plot predictions (bar chart)
            preds = preds_raw[u]
            preds = preds[np.where(1 - masks_raw[u])]
            preds = np.squeeze(preds, -1)
            ax_pred.clear()
            ax_pred.axis('off')
            ax_pred.set_title('Prediction', loc='left')
            ax_pred.set_ylim([0, 1])
            ax_pred.margins(0)
            hm_pred = ax_pred.bar(np.arange(len(preds)), preds)

            ## Save plot
            fig.savefig(logdir + '/barchart_' + prefix + '_utt' + str(u) + '_iter' + str(iteration) + '.jpg')

        plt.close(fig)
项目:ikelos    作者:braingineer    | 项目源码 | 文件源码
def accumulate(attend_function, inputs, input_length,
                                mask=None, return_probabilities=False):
    '''get the running attention over a sequence. 

    given a 3dim tensor where the 1st dim is time (or not. whatever.),  calculating the running attended sum.
    in other words, at the first time step, you only have that item.
                    at the second time step, attend over the first two items.
                    at the third..  the third. so on. 

    this basically a mod on keras' rnn implementation
    author: bcm
    '''

    ndim = inputs.ndim
    assert ndim >= 3, 'inputs should be at least 3d'

    axes = [1,0] + list(range(2, ndim))
    inputs = inputs.dimshuffle(axes)

    indices = list(range(input_length))

    successive_outputs = []
    if mask is not None:
        if mask.ndim == ndim-1:
            mask = K.expand_dims(mask)
        assert mask.ndim == ndim
        mask = mask.dimshuffle(axes)
        prev_output = None

    successive_outputs = []
    successive_pvecs = []
    uncover_mask = K.zeros_like(inputs)
    uncover_indices = K.arange(input_length)
    for _ in range(ndim-1):
        uncover_indices = K.expand_dims(uncover_indices)
    make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask)
    for i in indices:
        inputs_i = make_subset(i,inputs)
        mask_i = make_subset(i,mask)
        if mask is not None:
            output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. 
        else:
            output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. 
        if return_probabilities:
            output, p_vectors = output
            successive_pvecs.append(p_vectors)
        assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors"
        successive_outputs.append(output)
    outputs = K.pack(successive_outputs)
    K.squeeze(outputs, -1)
    axes = [1, 0] + list(range(2, outputs.ndim))
    outputs = outputs.dimshuffle(axes)

    if return_probabilities:
        out_pvecs = K.pack(successive_pvecs)
        K.squeeze(out_pvecs, -1)
        out_pvecs = out_pvecs.dimshuffle(axes)
        outputs = [outputs, out_pvecs]

    return outputs