Python theano.tensor 模块,addbroadcast() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.addbroadcast()

项目:hippodeep    作者:bthyreau    | 项目源码 | 文件源码
def get_output_for(self, inputs, **kwargs):
            inputs = autocrop(inputs, self.cropping)
            # modify broadcasting pattern.
            if self.broadcastable is not None:
                for n, broadcasting_dim in enumerate(self.broadcastable):
                    for dim, broadcasting in enumerate(broadcasting_dim):
                        if broadcasting:
                            inputs[n] = T.addbroadcast(inputs[n], dim)

            output = None
            for input in inputs:
                if output is not None:
                    output = self.merge_function(output, input)
                else:
                    output = input
            return output


    # Definition of the network
项目:cbof    作者:passalis    | 项目源码 | 文件源码
def conv_pairwise_distance(feature_maps, codebook):
    """
    Calculates the pairwise distances between the feature maps (n_samples, filters, x, y)
    :param feature_maps: 
    :param codebook: 
    :return: 
    """
    x_square = T.sum(feature_maps ** 2, axis=1)  # n_samples, filters, x, y
    x_square = x_square.reshape((x_square.shape[0], 1, x_square.shape[1], x_square.shape[2]))
    x_square = T.addbroadcast(x_square, 1)

    y_square = T.sum(codebook ** 2, axis=1)
    y_square = y_square.reshape((1, y_square.shape[0], y_square.shape[1], y_square.shape[2]))
    y_square = T.addbroadcast(y_square, 0, 2, 3)

    inner_product = T.nnet.conv2d(feature_maps, codebook)
    dist = x_square + y_square - 2 * inner_product
    dist = T.sqrt(T.maximum(dist, 0))
    return dist
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def sequence_iteration(self, output, mask,use_dropout=0,dropout_value=0.5):

        dot_product = T.dot(output , self.t_w_out)

        net_o = T.add( dot_product , self.t_b_out )

        ex_net = T.exp(net_o)
        sum_net = T.sum(ex_net, axis=2, keepdims=True)
        softmax_o = ex_net / sum_net


        mask = T.addbroadcast(mask, 2) # to do nesseccary?
        output = T.mul(mask, softmax_o)   + T.mul( (1. - mask) , 1e-6 )

        return output #result


######                     Linear Layer
########################################
项目:deep-learning-models    作者:kuleshov    | 项目源码 | 文件源码
def create_updates(self, grads, params, alpha, opt_alg, opt_params):
    # call super-class to generate SGD/ADAM updates
    grad_updates = Model.create_updates(self, grads, params, alpha, opt_alg, opt_params)

    # create updates for centering signal

    # load neural net outputs (probabilities have been precomputed)
    _, _, _, l_cv, c, v = self.network
    log_pxz, log_qz_given_x = self.log_pxz, self.log_qz_given_x
    cv = T.addbroadcast(lasagne.layers.get_output(l_cv),1)

    # compute learning signals
    l = log_pxz - log_qz_given_x - cv
    l_avg, l_var = l.mean(), l.var()
    c_new = 0.8*c + 0.2*l_avg
    v_new = 0.8*v + 0.2*l_var

    # compute update for centering signal
    cv_updates = {c : c_new, v : v_new}

    return OrderedDict( grad_updates.items() + cv_updates.items() )
项目:deep-learning-models    作者:kuleshov    | 项目源码 | 文件源码
def create_updates(self, grads, params, alpha, opt_alg, opt_params):
    # call super-class to generate SGD/ADAM updates
    grad_updates = Model.create_updates(self, grads, params, alpha, opt_alg, opt_params)

    # create updates for centering signal

    # load neural net outputs (probabilities have been precomputed)
    l_px_mu, l_px_logsigma, l_pa_mu, l_pa_logsigma, \
    l_qa_mu, l_qa_logsigma, l_qz_mu, l_qz_logsigma, l_qa, l_qz, l_cv, c, v = self.network

    # load neural net outputs (probabilities have been precomputed)
    log_pxz, log_px_given_z, log_pz = self.log_pxz, self.log_px_given_z, self.log_pz
    log_qz_given_x = self.log_qz_given_x    
    cv = T.addbroadcast(lasagne.layers.get_output(l_cv),1)

    # compute learning signals
    l = log_px_given_z + log_pz - log_qz_given_x - cv
    l_avg, l_var = l.mean(), l.var()
    c_new = 0.8*c + 0.2*l_avg
    v_new = 0.8*v + 0.2*l_var

    # compute update for centering signal
    cv_updates = {c : c_new, v : v_new}

    return OrderedDict( grad_updates.items() + cv_updates.items() )
项目:KEHNN    作者:MarkWuNLP    | 项目源码 | 文件源码
def __call__(self, input,input_lm=None, return_list = False):
         # activation function
        if input_lm == None:
            self.h_l, _ = theano.scan(self.step2,
                        sequences=input.dimshuffle(1,0,2),
                        outputs_info=theano.shared(value=np.zeros((self.batch_size,self.n_hidden),
                                                                  dtype=theano.config.floatX),borrow=True))
        else:
            self.h_l, _ = theano.scan(self.step,
                        sequences=[input.dimshuffle(1,0,2),T.addbroadcast(input_lm.dimshuffle(1,0,'x'), -1)],
                        outputs_info=theano.shared(value=np.zeros((self.batch_size,self.n_hidden),
                                                                  dtype=theano.config.floatX),borrow=True))
        self.h_l = self.h_l.dimshuffle(1,0,2)
        if return_list == True:
            return self.h_l
        return self.h_l[:,-1,:]
项目:deep-coref    作者:clarkkev    | 项目源码 | 文件源码
def get_padded_shuffled_mask(self, train, X, pad=0):
        mask = self.get_input_mask(train)
        if mask is None:
            mask = T.ones_like(X.sum(axis=-1))  # is there a better way to do this without a sum?

        # mask is (nb_samples, time)
        mask = T.shape_padright(mask)  # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1)  # the new dimension (the '1') is made broadcastable
        # see http://deeplearning.net/software/theano/library/tensor/basic.html#broadcasting-in-theano-vs-numpy
        mask = mask.dimshuffle(1, 0, 2)  # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        return mask.astype('int8')
项目:dcnn    作者:jcatw    | 项目源码 | 文件源码
def __init__(self, incomings, parameters, layer_num,
                 W=lasagne.init.Normal(0.01),
                 num_features=None,
                 **kwargs):

        super(DCNNLayer, self).__init__(incomings, **kwargs)

        self.parameters = parameters

        if num_features is None:
            self.num_features = self.parameters.num_features
        else:
            self.num_features = num_features

        self.W = T.addbroadcast(
            self.add_param(W,
                           (1, parameters.num_hops + 1, self.num_features), name='DCNN_W_%d' % layer_num), 0)

        self.nonlinearity = params.nonlinearity_map[self.parameters.dcnn_nonlinearity]
项目:dcnn    作者:jcatw    | 项目源码 | 文件源码
def __init__(self, incomings, parameters, layer_num,
                 W=lasagne.init.Normal(0.01),
                 num_features=None,
                 **kwargs):

        super(DCNNLayer, self).__init__(incomings, **kwargs)

        self.parameters = parameters

        if num_features is None:
            self.num_features = self.parameters.num_features
        else:
            self.num_features = num_features

        self.W = T.addbroadcast(
            self.add_param(W,
                           (1, parameters.num_hops + 1, self.num_features), name='DCNN_W_%d' % layer_num), 0)

        self.nonlinearity = params.nonlinearity_map[self.parameters.dcnn_nonlinearity]
项目:dcnn    作者:jcatw    | 项目源码 | 文件源码
def __init__(self, incomings, parameters, layer_num,
                 W=lasagne.init.Normal(0.01),
                 num_features=None,
                 **kwargs):
        super(AggregatedDCNNLayer, self).__init__(incomings, **kwargs)

        self.parameters = parameters

        if num_features is None:
            self.num_features = self.parameters.num_features
        else:
            self.num_features = num_features

        self.W = T.addbroadcast(
            self.add_param(W, (self.parameters.num_hops + 1, 1, self.num_features), name='AGGREGATE_DCNN_W_%d' % layer_num), 1)

        self.nonlinearity = params.nonlinearity_map[self.parameters.dcnn_nonlinearity]
项目:MultiTurnResponseSelection    作者:MarkWuNLP    | 项目源码 | 文件源码
def __call__(self, input,input_lm=None, return_list = False):
         # activation function
        if input_lm == None:
            self.h_l, _ = theano.scan(self.step2,
                        sequences=input.dimshuffle(1,0,2),
                        outputs_info=[theano.shared(value=np.zeros((self.batch_size,self.n_hidden),
                                                                  dtype=theano.config.floatX),borrow=True),
                                      theano.shared(value=np.zeros((self.batch_size,self.n_hidden),
                                                                  dtype=theano.config.floatX),borrow=True)])
        else:
            self.h_l, _ = theano.scan(self.step,
                        sequences=[input.dimshuffle(1,0,2),T.addbroadcast(input_lm.dimshuffle(1,0,'x'), -1)],
                        outputs_info=[theano.shared(value=np.zeros((self.batch_size,self.n_hidden),
                                                                  dtype=theano.config.floatX),borrow=True),
                                      theano.shared(value=np.zeros((self.batch_size,self.n_hidden),
                                                                  dtype=theano.config.floatX),borrow=True)])
        self.h_l = self.h_l[0].dimshuffle(1,0,2)
        if return_list == True:
            return self.h_l
        return self.h_l[:,-1,:]
项目:mctest-model    作者:Maluuba    | 项目源码 | 文件源码
def call(self, x, mask=None):
        ax = 1 if self.is_q else 2

        def _step(v1, v2):
            cosine_score = T.tensordot(v1 / T.sqrt(T.sum(T.sqr(v1), axis=2, keepdims=True) + 1e-6),
                                       (v2) / T.sqrt(T.sum(T.sqr(v2), axis=ax, keepdims=True) + 1e-6),
                                       [[2], [ax]])
            return cosine_score

        l_s = x[0]  # n_b x n_s x n_w_s x D
        l_a = x[1]  # n_b x 4 x n_w_qa x D
        # w_qa = self.layers[2].get_output(train)  # n_b x 4 x n_w_qa x 1
        # w_qa = T.addbroadcast(w_qa, len(self.layers[2].output_shape) - 1)

        # get cosine similarity for ALL word pairs
        output, _ = theano.scan(_step, sequences=[l_s, l_a], outputs_info=None)
        if not self.is_q:
            output = output.dimshuffle(0, 1, 3, 2, 4)  # n_b x n_s x 4 x n_w_s x n_w_qa
        return output
项目:TACNTN    作者:MarkWuNLP    | 项目源码 | 文件源码
def __call__(self, input,input_lm=None, return_list = False):
         # activation function
        if input_lm == None:
            self.h_l, _ = theano.scan(self.step2,
                        sequences=input.dimshuffle(1,0,2),
                        outputs_info=theano.shared(value=np.zeros((self.batch_size,self.n_hidden),
                                                                  dtype=theano.config.floatX),borrow=True))
        else:
            self.h_l, _ = theano.scan(self.step,
                        sequences=[input.dimshuffle(1,0,2),T.addbroadcast(input_lm.dimshuffle(1,0,'x'), -1)],
                        outputs_info=theano.shared(value=np.zeros((self.batch_size,self.n_hidden),
                                                                  dtype=theano.config.floatX),borrow=True))
        self.h_l = self.h_l.dimshuffle(1,0,2)
        if return_list == True:
            return self.h_l
        return self.h_l[:,-1,:]
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def visualize(self, p0, p=None):
        if p is None:
            p = self.get_prob(*self.get_params())
        p0 = T.addbroadcast(p0, 0)
        return p - p0
项目:cbof    作者:passalis    | 项目源码 | 文件源码
def __init__(self, incoming, n_codewords=24, V=lasagne.init.Normal(0.1), gamma=lasagne.init.Constant(0.1),
                 eps=0.00001, input_var=None, initializers=None, spatial_level=1, **kwargs):
        """
        Creates a BoF layer

        :param incoming: 
        :param n_codewords: number of codewords
        :param V: initializer used for the codebook
        :param gamma: initializer used for the scaling factors
        :param eps: epsilon used to ensure numerical stability
        :param input_var: input_var of the model (used to compile a function that extract the features fed to layer)
        :param initializers: 
        :param spatial_level: 0 (no spatial segmentation), 1 (first spatial level)
        :param pooling_type: either 'mean' or 'max'
        :param kwargs: 
        """
        super(CBoF_Layer, self).__init__(incoming, **kwargs)

        self.n_codewords = n_codewords
        self.spatial_level = spatial_level
        n_filters = self.input_shape[1]
        self.eps = eps

        # Create parameters
        self.V = self.add_param(V, (n_codewords, n_filters, 1, 1), name='V')
        self.gamma = self.add_param(gamma, (1, n_codewords, 1, 1), name='gamma')

        # Make gammas broadcastable
        self.gamma = T.addbroadcast(self.gamma, 0, 2, 3)

        # Compile function used for feature extraction
        if input_var is not None:
            self.features_fn = theano.function([input_var], lasagne.layers.get_output(incoming, deterministic=True))

        if initializers is not None:
            initializers.append(self.initialize_layer)
项目:keraflow    作者:ipod825    | 项目源码 | 文件源码
def squeeze(self, x, axis):
        '''Remove a 1-dimension from the tensor at index "axis".
        '''
        x = T.addbroadcast(x, axis)
        return T.squeeze(x)
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def sequence_iteration(self, output, mask, use_dropout=0, dropout_value=0.5):

        dot_product = T.dot(output, self.t_w_out)

        linear_o = T.add(dot_product, self.t_b_out)


        mask = T.addbroadcast(mask, 2)  # to do nesseccary?
        output = T.mul(mask, linear_o) + T.mul((1. - mask), 1e-6)

        return output  # result


### TEST FUNCTIONS # to do make new file with test functions
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, w_hidden_hidden, b_act):

        pre_w_sig = T.dot(pre_out_sig, w_hidden_hidden)
        inner_act = self.activation
        out_sig = inner_act(T.add(cur_w_in_sig, pre_w_sig, b_act))

        mask = T.addbroadcast(mask, 1)
        out_sig_m = mask * out_sig + (1. - mask) * pre_out_sig
        return [out_sig_m]
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ig_c, w_fg_c, w_og_c, w_ifco, b_ifco,
                       t_n_out):

        ifco = T.add(T.dot(pre_out_sig, w_ifco), b_ifco)

        inner_act = self.activation
        gate_act = self.sigmoid()

        # Input Gate
        ig_t1 = gate_act(T.add(ifco[:, 0:t_n_out], T.mul(pre_cell_sig, w_ig_c), cur_w_in_sig[:, 0:t_n_out]))
        # Forget Gate
        fg_t1 = gate_act(T.add(ifco[:, 1 * t_n_out:2 * t_n_out], T.mul(pre_cell_sig, w_fg_c),
                               cur_w_in_sig[:, 1 * t_n_out:2 * t_n_out]))
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(
            T.add(ifco[:, 2 * t_n_out:3 * t_n_out], cur_w_in_sig[:, 2 * t_n_out:3 * t_n_out]))))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig
        # functionality: cs_t1 =   T.switch(mask , cs_t1, pre_cell_sig)

        # Output Gate
        og_t1 = gate_act(
            T.add(ifco[:, 3 * t_n_out:4 * t_n_out], T.mul(cs_t1, w_og_c), cur_w_in_sig[:, 3 * t_n_out:4 * t_n_out]))
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1]
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco,
                       t_n_out):

        ifco = T.add(T.dot(pre_out_sig, w_ifco), b_ifco)

        inner_act = self.activation
        gate_act = self.sigmoid()

        # Input Gate
        ig_t1 = gate_act(T.add(ifco[:, 0:t_n_out], cur_w_in_sig[:, 0:t_n_out]))
        # Forget Gate
        fg_t1 = gate_act(T.add(ifco[:, 1 * t_n_out:2 * t_n_out],
                               cur_w_in_sig[:, 1 * t_n_out:2 * t_n_out]))
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(
            T.add(ifco[:, 2 * t_n_out:3 * t_n_out], cur_w_in_sig[:, 2 * t_n_out:3 * t_n_out]))))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig
        # functionality: cs_t1 =   T.switch(mask , cs_t1, pre_cell_sig)

        # Output Gate
        og_t1 = gate_act(
            T.add(ifco[:, 3 * t_n_out:4 * t_n_out], cur_w_in_sig[:, 3 * t_n_out:4 * t_n_out]))
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1]
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, w_hidden_hidden, b_act, ln_s1, ln_b1, ln_s2, ln_b2):

        pre_w_sig = T.dot(pre_out_sig, w_hidden_hidden)

        inner_act = self.activation

        pre_w_sig_ln = self.ln(pre_w_sig, ln_b1, ln_s1)
        cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b2, ln_s2)

        out_sig = inner_act(T.add(cur_w_in_sig_ln, pre_w_sig_ln, b_act))

        mask = T.addbroadcast(mask, 1)
        out_sig_m = mask * out_sig + (1. - mask) * pre_out_sig
        return [out_sig_m]
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ig_c, w_fg_c, w_og_c, w_ifco, b_ifco, ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3,
                       t_n_out):

        cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b1, ln_s1)

        pre_w_out_sig = T.dot(pre_out_sig, w_ifco)
        pre_w_out_sig_ln = self.ln(pre_w_out_sig, ln_b2, ln_s2)

        preact = T.add(cur_w_in_sig_ln, pre_w_out_sig_ln, b_ifco)

        inner_act = self.activation  # T.nnet.hard_sigmoid T.tanh
        gate_act = self.sigmoid()  # T.nnet.hard_sigmoid

        # Input Gate
        ig_t1 = gate_act(T.add(preact[:, 0:t_n_out], T.mul(pre_cell_sig, w_ig_c)))
        # Forget Gate
        fg_t1 = gate_act(T.add(preact[:, 1 * t_n_out:2 * t_n_out], T.mul(pre_cell_sig, w_fg_c),))
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act( T.add(preact[:, 2 * t_n_out:3 * t_n_out]))))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig
        # functionality: cs_t1 =   T.switch(mask , cs_t1, pre_cell_sig)

        cs_t1_ln = self.ln(cs_t1, ln_b3, ln_s3)

        # Output Gate
        og_t1 = gate_act(
            T.add(preact[:, 3 * t_n_out:4 * t_n_out], T.mul(cs_t1_ln, w_og_c)))
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1_ln))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1]
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3,
                       t_n_out):

        cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b1, ln_s1)

        pre_w_out_sig = T.dot(pre_out_sig, w_ifco)
        pre_w_out_sig_ln = self.ln(pre_w_out_sig, ln_b2, ln_s2)

        preact = T.add(cur_w_in_sig_ln, pre_w_out_sig_ln, b_ifco)



        inner_act = self.activation # T.nnet.hard_sigmoid #T.tanh # T.nnet.hard_sigmoid T.tanh
        gate_act = self.sigmoid()  # T.nnet.hard_sigmoid #T.nnet.sigmoid

        # Input Gate
        ig_t1 = gate_act(preact[:, 0:t_n_out])
        # Forget Gate
        fg_t1 = gate_act(preact[:, 1 * t_n_out:2 * t_n_out])
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(preact[:, 2 * t_n_out:3 * t_n_out])))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig

        cs_t1_ln = self.ln(cs_t1, ln_b3, ln_s3)
        # Output Gate
        og_t1 = gate_act(preact[:, 3 * t_n_out:4 * t_n_out])
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1_ln))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1]
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def t_forward_step(self,mask, rzup_in_sig, h_pre,b_rzup, u_rz, u_up,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3, t_n_out):



        signal_act = self.activation
        gate_act = self.sigmoid()

        rzup_in_sig_ln = self.ln(rzup_in_sig, ln_b1, ln_s1)

        rzup_b_in_sig_ln = T.add(rzup_in_sig_ln, b_rzup)

        preact = T.dot( h_pre, u_rz)

        preact_ln = self.ln(preact, ln_b2, ln_s2)

        r = gate_act( T.add( rzup_b_in_sig_ln[:, 0:t_n_out] , preact_ln[:, 0:t_n_out] ))
        z = gate_act( T.add( rzup_b_in_sig_ln[:, t_n_out:2 * t_n_out] , preact_ln[:, t_n_out:2 * t_n_out] ))

        preactx = T.dot(h_pre , u_up)
        preactx_ln = self.ln(preactx, ln_b3, ln_s3)
        h_pre_r_ln = T.mul( preactx_ln, r)

        h_update = signal_act( T.add( rzup_b_in_sig_ln[:, 2*t_n_out:3*t_n_out] , h_pre_r_ln ))

        h_new = T.add( (1.-z) * h_update , z * h_pre )

        mask = T.addbroadcast(mask, 1)
        out_sig =  T.add( mask * h_new   , (1. - mask) * h_pre )

        return out_sig
项目:deep-learning-models    作者:kuleshov    | 项目源码 | 文件源码
def create_gradients(self, loss, deterministic=False):
    # load networks
    l_p_mu, l_q_mu, _, l_cv, c, v = self.network

    # load params
    p_params  = lasagne.layers.get_all_params(l_p_mu, trainable=True)
    q_params  = lasagne.layers.get_all_params(l_q_mu, trainable=True)
    cv_params = lasagne.layers.get_all_params(l_cv, trainable=True)

    # load neural net outputs (probabilities have been precomputed)
    log_pxz, log_qz_given_x = self.log_pxz, self.log_qz_given_x
    cv = T.addbroadcast(lasagne.layers.get_output(l_cv),1)

    # compute learning signals
    l = log_pxz - log_qz_given_x - cv
    l_avg, l_var = l.mean(), l.var()
    c_new = 0.8*c + 0.2*l_avg
    v_new = 0.8*v + 0.2*l_var
    l = (l - c_new) / T.maximum(1, T.sqrt(v_new))

    # compute grad wrt p
    p_grads = T.grad(-log_pxz.mean(), p_params)

    # compute grad wrt q
    q_target = T.mean(dg(l) * log_qz_given_x)
    q_grads = T.grad(-0.2*q_target, q_params) # 5x slower rate for q

    # compute grad of cv net
    cv_target = T.mean(l**2)
    cv_grads = T.grad(cv_target, cv_params)

    # combine and clip gradients
    clip_grad = 1
    max_norm = 5
    grads = p_grads + q_grads + cv_grads
    mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm)
    cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]

    return cgrads
项目:neural_wfst    作者:se4u    | 项目源码 | 文件源码
def get_layer(self, x_in, op=lambda x: x):
        b_ = T.addbroadcast(self._params['b'], 0)
        ret = op(T.dot(x_in, self._params['U']) + b_)
        return ret
项目:PAN    作者:hworang77    | 项目源码 | 文件源码
def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic or self.fixed:
            # use stored mean and std
            mean = self.mean
            std = self.std
        else:
            # use this batch's mean and std
            mean = input.mean(self.axes, keepdims=True)
            #std = input.std(self.axes, keepdims=True)
            std = (input.var(self.axes, keepdims=True)+self.epsilon).sqrt()
            # and update the stored mean and std:
            # we create (memory-aliased) clones of the stored mean and std
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_std = theano.clone(self.std, share_inputs=False)
            # set a default update for them
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * mean)
            running_std.default_update = ((1 - self.alpha) * running_std +
                                          self.alpha * std)
            # and include them in the graph so their default updates will be
            # applied (although the expressions will be optimized away later)
            mean += 0 * running_mean
            std += 0 * running_std
        #std += self.epsilon
        mean = T.addbroadcast(mean, *self.axes)
        std = T.addbroadcast(std, *self.axes)
        beta = T.addbroadcast(self.beta, *self.axes)
        gamma = T.addbroadcast(self.gamma, *self.axes)
#        normalized = (input - mean) * (gamma / std) + beta
        normalized = (input - mean) / std
        if self.rescale:
            normalized = normalized * gamma + beta
        return self.nonlinearity(normalized)
项目:cnn-bnn    作者:jpdz    | 项目源码 | 文件源码
def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic:
            # use stored mean and std
            mean = self.mean
            std = self.std
        else:
            # use this batch's mean and std
            mean = input.mean(self.axes, keepdims=True)
            std = input.std(self.axes, keepdims=True)
            # and update the stored mean and std:
            # we create (memory-aliased) clones of the stored mean and std
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_std = theano.clone(self.std, share_inputs=False)
            # set a default update for them
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * mean)
            running_std.default_update = ((1 - self.alpha) * running_std +
                                          self.alpha * std)
            # and include them in the graph so their default updates will be
            # applied (although the expressions will be optimized away later)
            mean += 0 * running_mean
            std += 0 * running_std
        std += self.epsilon
        mean = T.addbroadcast(mean, *self.axes)
        std = T.addbroadcast(std, *self.axes)
        beta = T.addbroadcast(self.beta, *self.axes)
        gamma = T.addbroadcast(self.gamma, *self.axes)
        normalized = (input - mean) * (gamma / std) + beta
        return self.nonlinearity(normalized)
项目:keras-recommendation    作者:sonyisme    | 项目源码 | 文件源码
def get_padded_shuffled_mask(self, train, X, pad=0):
        mask = self.get_input_mask(train)
        if mask is None:
            mask = T.ones_like(X.sum(axis=-1)) # is there a better way to do this without a sum?

        # mask is (nb_samples, time)
        mask = T.shape_padright(mask) # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1) # (time, nb_samples, 1) matrix.
        mask = mask.dimshuffle(1, 0, 2) # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        return mask.astype('int8')
项目:reading-text-in-the-wild    作者:mathDR    | 项目源码 | 文件源码
def squeeze(x, axis):
    '''Remove a 1-dimension from the tensor at index "axis".
    '''
    x = T.addbroadcast(x, axis)
    return T.squeeze(x)
项目:DBQA    作者:nanfeng1101    | 项目源码 | 文件源码
def __call__(self, input, input_lm=None, h0=None):
        batch_size = input.shape[0]
        if h0 == None:
            h0 = T.alloc(np.asarray(0., dtype=theano.config.floatX), batch_size, self.n_hidden)
        if input_lm == None:
            def step(x_t, h_tm_prev):
                x_z = T.dot(x_t, self.W_z) + self.b_z
                x_r = T.dot(x_t, self.W_r) + self.b_r
                x_h = T.dot(x_t, self.W_h) + self.b_h

                z_t = self.inner_activation(x_z + T.dot(h_tm_prev, self.U_z))
                r_t = self.inner_activation(x_r + T.dot(h_tm_prev, self.U_r))
                hh_t = self.activation(x_h + T.dot(r_t * h_tm_prev, self.U_h))
                h_t = (1 - z_t) * hh_t + z_t * h_tm_prev
                return h_t

            self.h_l, _ = theano.scan(step, sequences=input.dimshuffle(1, 0, 2), outputs_info=h0)
        else:
            def step(x_t, mask, h_tm_prev):
                x_z = T.dot(x_t, self.W_z) + self.b_z
                x_r = T.dot(x_t, self.W_r) + self.b_r
                x_h = T.dot(x_t, self.W_h) + self.b_h
                z_t = self.inner_activation(x_z + T.dot(h_tm_prev, self.U_z))
                r_t = self.inner_activation(x_r + T.dot(h_tm_prev, self.U_r))

                hh = self.activation(x_h + T.dot(r_t * h_tm_prev, self.U_h))
                h_t = z_t * h_tm_prev + (1 - z_t) * hh
                h_t = mask * h_t + (1 - mask) * h_tm_prev
                return h_t

            self.h_l, _ = theano.scan(step, sequences=[input.dimshuffle(1, 0, 2), T.addbroadcast(input_lm.dimshuffle(1, 0, 'x'), -1)], outputs_info=h0)
        self.h_l = self.h_l.dimshuffle(1, 0, 2)
        return self.h_l[:, -1, :]
项目:gogh-figure    作者:joelmoniz    | 项目源码 | 文件源码
def get_output_for(self, input, style=None, **kwargs):

        mean = input.mean(self.axes)
        inv_std = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))

        pattern = [0, 1, 'x', 'x']

        if style == None:
            pattern_params = ['x', 0, 'x', 'x']
            beta = 0 if self.beta is None else self.beta.dimshuffle(pattern_params)
            gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern_params)
        else:
            pattern_params = pattern
            beta = 0 if self.beta is None else self.beta[style].dimshuffle(pattern_params)
            gamma = 1 if self.gamma is None else self.gamma[style].dimshuffle(pattern_params)
            # if self.beta is not None:
            #   beta = ifelse(T.eq(style.shape[0], 1), T.addbroadcast(beta, 0), beta)
            # if self.gamma is not None:
            #   gamma = ifelse(T.eq(style.shape[0], 1), T.addbroadcast(gamma, 0), gamma)

        mean = mean.dimshuffle(pattern)
        inv_std = inv_std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * inv_std) + beta
        return normalized
项目:dqn_vizdoom_theano    作者:mihahauke    | 项目源码 | 文件源码
def get_output_for(self, inputs, **kwargs):
        m = tensor.mean(inputs[0], axis=1, keepdims=True)
        sv = tensor.addbroadcast(inputs[1], 1)
        return inputs[0] + sv - m
项目:deep-motion-analysis    作者:Brimborough    | 项目源码 | 文件源码
def __call__(self, input): 
        mean = input.mean(self.axes, keepdims=True) 
        std = input.std(self.axes, keepdims=True) + self.epsilon 

        # Don't batchnoramlise a single data point
        mean = ifelse(T.gt(input.shape[0], 1), mean, T.zeros(mean.shape, dtype=mean.dtype))
        std  = ifelse(T.gt(input.shape[0], 1), std, T.ones(std.shape, dtype=std.dtype))

        return (input - mean) * T.addbroadcast((self.gamma / std) + self.beta, *self.axes)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def grad(self, inputs, gout):
        (x,) = inputs
        (gz,) = gout
        if x.dtype not in continuous_dtypes:
            return [x.zeros_like(dtype=theano.config.floatX)]
        if self.structured:
            if self.axis is None:
                r = gz * theano.sparse.sp_ones_like(x)
            elif self.axis == 0:
                r = col_scale(theano.sparse.sp_ones_like(x), gz)
            elif self.axis == 1:
                r = row_scale(theano.sparse.sp_ones_like(x), gz)
            else:
                raise ValueError('Illegal value for self.axis.')
        else:
            o_format = x.format
            x = dense_from_sparse(x)
            if _is_sparse_variable(gz):
                gz = dense_from_sparse(gz)
            if self.axis is None:
                r = tensor.second(x, gz)
            else:
                ones = tensor.ones_like(x)
                if self.axis == 0:
                    r = tensor.addbroadcast(gz.dimshuffle('x', 0), 0) * ones
                elif self.axis == 1:
                    r = tensor.addbroadcast(gz.dimshuffle(0, 'x'), 1) * ones
                else:
                    raise ValueError('Illegal value for self.axis.')
            r = SparseFromDense(o_format)(r)
        return [r]
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_rebroadcast_rebroadcast(self):
        mode = theano.compile.get_default_mode().including('canonicalize')
        m = T.matrix()
        s = T.addbroadcast(m, 0, 1)
        v = T.unbroadcast(s, 1)
        f = theano.function([m], v, mode=mode)
        f([[76]])
        e = f.maker.fgraph.toposort()
        rebroadcast_nodes = [n for n in e if isinstance(n.op, T.Rebroadcast)]
        assert len(rebroadcast_nodes) == 1
        assert rebroadcast_nodes[0].op.axis == {0: True}
项目:KEHNN    作者:MarkWuNLP    | 项目源码 | 文件源码
def __init__(self, input_l, input_r, n_in, n_hidden, n_out, activation=T.tanh,
                 output_type='real',batch_size=200,input_lm=None,input_rm=None):
        if input_lm == None:
            input_lm = theano.shared(value=np.ones((batch_size,20), dtype=theano.config.floatX),borrow=True)
        if input_rm == None:
            input_rm = theano.shared(value=np.ones((batch_size,20), dtype=theano.config.floatX),borrow=True)
        self.activation = activation
        self.output_type = output_type
        # Parameters are reshaped views of theta
        param_idx = 0  # pointer to somewhere along parameter vector

        # recurrent weights as a shared variable
        self.W = theano.shared(ortho_weight(n_hidden),borrow=True,name='W')
        # input to hidden layer weights
        self.W_in = theano.shared(glorot_uniform((n_in,n_hidden)),borrow=True,name='W_in')

        self.h0 = theano.shared(value=np.zeros((batch_size,n_hidden), dtype=theano.config.floatX),borrow=True,name='h0')
        self.bh = theano.shared(value=np.zeros((batch_size,n_hidden), dtype=theano.config.floatX),borrow=True,name='bh')
        #self.by = theano.shared(value=np.zeros((n_out,), dtype=theano.config.floatX),borrow=True,name='by')
        # for convenience
        self.params = [self.W, self.W_in, self.bh]

        # activation function
        def step(x_t, mask, h_tm1):
            h_tm1 =  mask * h_tm1
            #h_t = h_tm1 + self.bh
            h_t = T.tanh(T.dot(x_t, self.W_in) + \
                                  T.dot(h_tm1, self.W) + self.bh)
            #y_t = T.dot(h_t, self.W_out) + self.by
            return h_t
        #a = T.addbroadcast(input_lm.dimshuffle(1,0), -1)
        self.h_l, _ = theano.scan(step,
                    sequences=[input_l.dimshuffle(1,0,2),T.addbroadcast(input_lm.dimshuffle(1,0,'x'), -1)],
                    outputs_info=theano.shared(value=np.zeros((batch_size,n_hidden), dtype=theano.config.floatX),borrow=True))
        self.h_r, _ = theano.scan(step,
                    sequences=[input_r.dimshuffle(1,0,2),T.addbroadcast(input_rm.dimshuffle(1,0,'x'), -1)],
                    outputs_info=theano.shared(value=np.zeros((batch_size,n_hidden), dtype=theano.config.floatX),borrow=True))
        self.h_l = self.h_l.dimshuffle(1,0,2)
        self.h_r = self.h_r.dimshuffle(1,0,2)
项目:DCNMT    作者:SwordYork    | 项目源码 | 文件源码
def apply(self, char_seq, sample_matrix, char_aux):
        # Time as first dimension
        embeddings = self.lookup.apply(char_seq)
        gru_out = self.dgru.apply(
            **merge(self.gru_fork.apply(embeddings, as_dict=True),
                    {'mask': char_aux}))
        wgru_out = tensor.exp(self.wl.apply(self.bidir_w.apply(embeddings, char_aux)))

        if self.dgru_depth > 1:
            gru_out = gru_out[-1]

        gru_out = tensor.addbroadcast(wgru_out, 2) * gru_out
        sampled_representation = tensor.tanh(tensor.batched_dot(sample_matrix, gru_out.dimshuffle([1, 0, 2])))
        return sampled_representation.dimshuffle([1, 0, 2]), wgru_out
项目:dcnn    作者:jcatw    | 项目源码 | 文件源码
def _outer_substract(self, x, y):
        z = x.dimshuffle(0, 1, 'x')
        z = T.addbroadcast(z, 2)
        return (z - y.T).dimshuffle(0, 2, 1)
项目:dcnn    作者:jcatw    | 项目源码 | 文件源码
def get_output_for(self, inputs):
        A = inputs[0]
        X = inputs[1]

        num_nodes = A.shape[0]
        structural_symbolic_loss = T.addbroadcast(
            T.reshape(
                1 + A + self._symbolic_triangles(A) + self._symbolic_arrows(A),
                [num_nodes, num_nodes, 1]
            ),
            2
        )

        feature_symbolic_loss = (
            (self._outer_substract(X, X) ** 2) *
            T.addbroadcast(self.W, 0, 1)
        )

        unnormalized_logprobs = T.sum(
            structural_symbolic_loss + feature_symbolic_loss,
            2
        )

        flat_reduction_index = T.argmax(unnormalized_logprobs)

        return self.reduce(A, [
            flat_reduction_index // num_nodes,
            flat_reduction_index % num_nodes
        ])
项目:MultiTurnResponseSelection    作者:MarkWuNLP    | 项目源码 | 文件源码
def __call__(self, input,input_lm=None, return_list = False, Init_input =None,check_gate = False):
         # activation function
        if Init_input == None:
            init = theano.shared(value=np.zeros((self.batch_size,self.n_hidden),
                                                                  dtype=theano.config.floatX),borrow=True)
        else:
            init = Init_input

        if check_gate:
            self.h_l, _ = theano.scan(self.step3,
                        sequences=[input.dimshuffle(1,0,2),T.addbroadcast(input_lm.dimshuffle(1,0,'x'), -1)],
                        outputs_info=[init, theano.shared(value=np.zeros((self.batch_size,self.n_hidden),
                                                                  dtype=theano.config.floatX),borrow=True)])
            return [self.h_l[0][:,-1,:], self.h_l[1]]



        if input_lm == None:
            self.h_l, _ = theano.scan(self.step2,
                        sequences=input.dimshuffle(1,0,2),
                        outputs_info=init)
        else:
            self.h_l, _ = theano.scan(self.step,
                        sequences=[input.dimshuffle(1,0,2),T.addbroadcast(input_lm.dimshuffle(1,0,'x'), -1)],
                        outputs_info=init)
        self.h_l = self.h_l.dimshuffle(1,0,2)
        if return_list == True:
            return self.h_l
        return self.h_l[:,-1,:]
项目:MultiTurnResponseSelection    作者:MarkWuNLP    | 项目源码 | 文件源码
def __init__(self, input_l, input_r, n_in, n_hidden, n_out, activation=T.tanh,
                 output_type='real',batch_size=200,input_lm=None,input_rm=None):
        if input_lm == None:
            input_lm = theano.shared(value=np.ones((batch_size,20), dtype=theano.config.floatX),borrow=True)
        if input_rm == None:
            input_rm = theano.shared(value=np.ones((batch_size,20), dtype=theano.config.floatX),borrow=True)
        self.activation = activation
        self.output_type = output_type
        # Parameters are reshaped views of theta
        param_idx = 0  # pointer to somewhere along parameter vector

        # recurrent weights as a shared variable
        self.W = theano.shared(ortho_weight(n_hidden),borrow=True,name='W')
        # input to hidden layer weights
        self.W_in = theano.shared(glorot_uniform((n_in,n_hidden)),borrow=True,name='W_in')

        self.h0 = theano.shared(value=np.zeros((batch_size,n_hidden), dtype=theano.config.floatX),borrow=True,name='h0')
        self.bh = theano.shared(value=np.zeros((batch_size,n_hidden), dtype=theano.config.floatX),borrow=True,name='bh')
        #self.by = theano.shared(value=np.zeros((n_out,), dtype=theano.config.floatX),borrow=True,name='by')
        # for convenience
        self.params = [self.W, self.W_in, self.bh]

        # activation function
        def step(x_t, mask, h_tm1):
            h_tm1 =  mask * h_tm1
            #h_t = h_tm1 + self.bh
            h_t = T.tanh(T.dot(x_t, self.W_in) + \
                                  T.dot(h_tm1, self.W) + self.bh)
            #y_t = T.dot(h_t, self.W_out) + self.by
            return h_t
        #a = T.addbroadcast(input_lm.dimshuffle(1,0), -1)
        self.h_l, _ = theano.scan(step,
                    sequences=[input_l.dimshuffle(1,0,2),T.addbroadcast(input_lm.dimshuffle(1,0,'x'), -1)],
                    outputs_info=theano.shared(value=np.zeros((batch_size,n_hidden), dtype=theano.config.floatX),borrow=True))
        self.h_r, _ = theano.scan(step,
                    sequences=[input_r.dimshuffle(1,0,2),T.addbroadcast(input_rm.dimshuffle(1,0,'x'), -1)],
                    outputs_info=theano.shared(value=np.zeros((batch_size,n_hidden), dtype=theano.config.floatX),borrow=True))
        self.h_l = self.h_l.dimshuffle(1,0,2)
        self.h_r = self.h_r.dimshuffle(1,0,2)
项目:RecommendationSystem    作者:TURuibo    | 项目源码 | 文件源码
def get_padded_shuffled_mask(self, train, X, pad=0):
        mask = self.get_input_mask(train)
        if mask is None:
            mask = T.ones_like(X.sum(axis=-1)) # is there a better way to do this without a sum?

        # mask is (nb_samples, time)
        mask = T.shape_padright(mask) # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1) # (time, nb_samples, 1) matrix.
        mask = mask.dimshuffle(1, 0, 2) # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        return mask.astype('int8')
项目:odin_old    作者:trungnt13    | 项目源码 | 文件源码
def addbroadcast(x, *axes):
    return T.addbroadcast(x, *axes)

# ===========================================================================
# Predefined data
# ===========================================================================
项目:odin_old    作者:trungnt13    | 项目源码 | 文件源码
def squeeze(x, axis):
    '''Remove a 1-dimension from the tensor at index "axis".
    '''
    x = T.addbroadcast(x, axis)
    return T.squeeze(x)
项目:mctest-model    作者:Maluuba    | 项目源码 | 文件源码
def call(self, x, mask=None):
        lay0 = x[0]
        lay1 = x[1]
        lay1 = T.addbroadcast(lay1, lay1.ndim - 1)
        return lay0 * lay1
项目:TACNTN    作者:MarkWuNLP    | 项目源码 | 文件源码
def __init__(self, input_l, input_r, n_in, n_hidden, n_out, activation=T.tanh,
                 output_type='real',batch_size=200,input_lm=None,input_rm=None):
        if input_lm == None:
            input_lm = theano.shared(value=np.ones((batch_size,20), dtype=theano.config.floatX),borrow=True)
        if input_rm == None:
            input_rm = theano.shared(value=np.ones((batch_size,20), dtype=theano.config.floatX),borrow=True)
        self.activation = activation
        self.output_type = output_type
        # Parameters are reshaped views of theta
        param_idx = 0  # pointer to somewhere along parameter vector

        # recurrent weights as a shared variable
        self.W = theano.shared(ortho_weight(n_hidden),borrow=True,name='W')
        # input to hidden layer weights
        self.W_in = theano.shared(glorot_uniform((n_in,n_hidden)),borrow=True,name='W_in')

        self.h0 = theano.shared(value=np.zeros((batch_size,n_hidden), dtype=theano.config.floatX),borrow=True,name='h0')
        self.bh = theano.shared(value=np.zeros((batch_size,n_hidden), dtype=theano.config.floatX),borrow=True,name='bh')
        #self.by = theano.shared(value=np.zeros((n_out,), dtype=theano.config.floatX),borrow=True,name='by')
        # for convenience
        self.params = [self.W, self.W_in, self.bh]

        # activation function
        def step(x_t, mask, h_tm1):
            h_tm1 =  mask * h_tm1
            #h_t = h_tm1 + self.bh
            h_t = T.tanh(T.dot(x_t, self.W_in) + \
                                  T.dot(h_tm1, self.W) + self.bh)
            #y_t = T.dot(h_t, self.W_out) + self.by
            return h_t
        #a = T.addbroadcast(input_lm.dimshuffle(1,0), -1)
        self.h_l, _ = theano.scan(step,
                    sequences=[input_l.dimshuffle(1,0,2),T.addbroadcast(input_lm.dimshuffle(1,0,'x'), -1)],
                    outputs_info=theano.shared(value=np.zeros((batch_size,n_hidden), dtype=theano.config.floatX),borrow=True))
        self.h_r, _ = theano.scan(step,
                    sequences=[input_r.dimshuffle(1,0,2),T.addbroadcast(input_rm.dimshuffle(1,0,'x'), -1)],
                    outputs_info=theano.shared(value=np.zeros((batch_size,n_hidden), dtype=theano.config.floatX),borrow=True))
        self.h_l = self.h_l.dimshuffle(1,0,2)
        self.h_r = self.h_r.dimshuffle(1,0,2)
项目:deep-learning-models    作者:kuleshov    | 项目源码 | 文件源码
def create_gradients(self, loss, deterministic=False):
    # load networks
    l_px_mu, l_px_logsigma, l_pa_mu, l_pa_logsigma, \
    l_qa_mu, l_qa_logsigma, l_qz_mu, l_qz_logsigma, l_qa, l_qz, l_cv, c, v = self.network

    # load params
    p_params  = lasagne.layers.get_all_params(
        # [l_px_mu], trainable=True)
        [l_px_mu, l_pa_mu, l_pa_logsigma], trainable=True)
    qa_params  = lasagne.layers.get_all_params(l_qa_mu, trainable=True)    
    qz_params  = lasagne.layers.get_all_params(l_qz, trainable=True)
    cv_params = lasagne.layers.get_all_params(l_cv, trainable=True)

    # load neural net outputs (probabilities have been precomputed)
    log_pxz, log_px_given_z, log_pz = self.log_pxz, self.log_px_given_z, self.log_pz
    log_qza_given_x = self.log_qza_given_x    
    log_qz_given_x = self.log_qz_given_x    
    log_qz_given_x_dgz = self.log_qz_given_x_dgz
    cv = T.addbroadcast(lasagne.layers.get_output(l_cv),1)

    # compute learning signals
    l0 = log_px_given_z + log_pz - log_qz_given_x #- cv # NOTE: this disn't have q(a)
    l_avg, l_var = l0.mean(), l0.var()
    c_new = 0.8*c + 0.2*l_avg
    v_new = 0.8*v + 0.2*l_var
    l = (l0 - c_new) / T.maximum(1, T.sqrt(v_new))
    l_target = (l0 - c_new) / T.maximum(1, T.sqrt(v_new))
    # l_target = log_px_given_z + log_pz - log_qz_given_x

    # compute grad wrt p
    p_grads = T.grad(-log_pxz.mean(), p_params)

    # compute grad wrt q_a
    elbo = T.mean(log_pxz - log_qza_given_x)
    qa_grads = T.grad(-elbo, qa_params)

    # compute grad wrt q_z
    qz_target = T.mean(dg(l_target) * log_qz_given_x_dgz)
    qz_grads = T.grad(-0.2*qz_target, qz_params) # 5x slower rate for q
    # qz_grads = T.grad(-0.2*T.mean(l0), qz_params) # 5x slower rate for q
    # qz_grads = T.grad(-0.2*elbo, qz_params) # 5x slower rate for q

    # compute grad of cv net
    cv_target = T.mean(l0**2)
    # cv_grads = [0.2*g for g in T.grad(cv_target, cv_params)]

    # combine and clip gradients
    clip_grad = 1
    max_norm = 5
    # grads = p_grads + qa_grads + qz_grads + cv_grads
    grads = p_grads + qa_grads + qz_grads #+ cv_grads
    mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm)
    cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]

    return cgrads
项目:DBQA    作者:nanfeng1101    | 项目源码 | 文件源码
def __call__(self, input, input_lm=None, h0=None, c0=None):
        batch_size = input_lm.shape[0]
        if h0 == None:
            h0 = T.alloc(np.asarray(0., dtype=theano.config.floatX), batch_size, self.n_hidden)
        if c0 == None:
            c0 = T.alloc(np.asarray(0., dtype=theano.config.floatX), batch_size, self.n_hidden)
        if input_lm == None:
            def step(x_t, h_tm_prev, c_tm_prev):
                x_i = T.dot(x_t, self.W_i) + self.b_i
                x_f = T.dot(x_t, self.W_f) + self.b_f
                x_c = T.dot(x_t, self.W_c) + self.b_c
                x_o = T.dot(x_t, self.W_o) + self.b_o

                i_t = self.inner_activation(x_i + T.dot(h_tm_prev, self.U_i))
                f_t = self.inner_activation(x_f + T.dot(h_tm_prev, self.U_f))
                c_t = f_t * c_tm_prev + i_t * self.activation(x_c + T.dot(h_tm_prev, self.U_c))  # internal memory
                o_t = self.inner_activation(x_o + T.dot(h_tm_prev, self.U_o))
                h_t = o_t * self.activation(c_t)  # actual hidden state

                return [h_t, c_t]

            self.h_1, _ = theano.scan(step,
                                      sequences=input.dimshuffle(1, 0, 2),
                                      outputs_info=[h0, c0]
                                      )
        else:
            def step(x_t, mask, h_tm_prev, c_tm_prev):
                x_i = T.dot(x_t, self.W_i) + self.b_i
                x_f = T.dot(x_t, self.W_f) + self.b_f
                x_c = T.dot(x_t, self.W_c) + self.b_c
                x_o = T.dot(x_t, self.W_o) + self.b_o

                i_t = self.inner_activation(x_i + T.dot(h_tm_prev, self.U_i))
                f_t = self.inner_activation(x_f + T.dot(h_tm_prev, self.U_f))
                c_t = f_t * c_tm_prev + i_t * self.activation(x_c + T.dot(h_tm_prev, self.U_c))  # internal memory
                o_t = self.inner_activation(x_o + T.dot(h_tm_prev, self.U_o))
                h_t = o_t * self.activation(c_t)  # actual hidden state

                h_t = mask * h_t + (1 - mask) * h_tm_prev
                c_t = mask * c_t + (1 - mask) * c_tm_prev

                return [h_t, c_t]

            self.h_1, _ = theano.scan(step,
                                      sequences=[input.dimshuffle(1, 0, 2),
                                                 T.addbroadcast(input_lm.dimshuffle(1, 0, 'x'), -1)],
                                      outputs_info=[h0, c0])

        self.h_1 = self.h_1[0].dimshuffle(1, 0, 2)
        return self.h_1[:, -1, :]
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_dnn_batchnorm_train():
    if not dnn.dnn_available(test_ctx_name):
        raise SkipTest(dnn.dnn_available.msg)
    if dnn.version(raises=False) < 5000:
        raise SkipTest("batch normalization requires cudnn v5+")
    utt.seed_rng()

    for mode in ('per-activation', 'spatial'):
        for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
            x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used

            # forward pass
            out, x_mean, x_invstd = dnn.dnn_batch_normalization_train(
                x, scale, bias, mode, eps)
            # reference forward pass
            if mode == 'per-activation':
                axes = (0,)
            elif mode == 'spatial':
                axes = (0,) + tuple(range(2, ndim))
            x_mean2 = x.mean(axis=axes, keepdims=True)
            x_invstd2 = T.inv(T.sqrt(x.var(axis=axes, keepdims=True) + eps))
            scale2 = T.addbroadcast(scale, *axes)
            bias2 = T.addbroadcast(bias, *axes)
            out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
            # backward pass
            dy = vartype('dy')
            grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
            # reference backward pass
            grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
            # compile
            f = theano.function([x, scale, bias, dy],
                                [out, x_mean, x_invstd, out2, x_mean2, x_invstd2] +
                                grads + grads2, mode=mode_with_gpu)
            # run
            for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(1 if d in axes else s
                                    for d, s in enumerate(data_shape))
                X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
                Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32')
                Scale = numpy.random.randn(*param_shape).astype('float32')
                Bias = numpy.random.randn(*param_shape).astype('float32')
                outputs = f(X, Scale, Bias, Dy)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[0 + 3])  # out
                utt.assert_allclose(outputs[1], outputs[1 + 3])  # mean
                utt.assert_allclose(outputs[2], outputs[2 + 3])  # invstd
                # compare gradients
                utt.assert_allclose(outputs[6], outputs[6 + 3], atol=1e-4)  # dx
                utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=2e-4, atol=1e-4)  # dscale
                utt.assert_allclose(outputs[8], outputs[8 + 3])  # dbias