def rbf_kernel(X0):
    XY =, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)

    V = H.flatten()

    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.

    Kxy = T.exp(-H / h ** 2 / 2.0)
    neighbors = T.argsort(H, axis=1)[:, 1]

    return Kxy, neighbors, h
def lyr_linear(
        self, name_,
        idim_, odim_,
        init_=None, bias_=0., params_di_='params'):
        dense matrix multiplication, optionally adding a bias vector
        name_W = name_+'_w'
        name_B = name_+'_b'
        if init_ is None:
            init_ = dict(init_=[1.4/sqrt(idim_+odim_)])
        v_W = self.get_variable(name_W, (idim_,odim_), **init_)
        if bias_ is None:
            s_ret =, v_W)
            v_B = self.get_variable(name_B, (odim_,), bias_)
            s_ret =, v_W) + v_B
        return s_ret
def get_output_for(self, input, init=False, deterministic=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation =, self.W)

        if init:
            ma = T.mean(activation, axis=0)
            activation -= ma.dimshuffle('x',0)
            stdv = T.sqrt(T.mean(T.square(activation),axis=0))
            activation /= stdv.dimshuffle('x',0)
            self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)]
            activation += self.b.dimshuffle('x', 0)

        return self.nonlinearity(activation)
def get_output_for(self, input, init=False, deterministic=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation =, self.W)

        if init:
            ma = T.mean(activation, axis=0)
            activation -= ma.dimshuffle('x',0)
            stdv = T.sqrt(T.mean(T.square(activation),axis=0))
            activation /= stdv.dimshuffle('x',0)
            self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)]
            activation += self.b.dimshuffle('x', 0)

        return self.nonlinearity(activation)
def nn(model, text, vectors, query, k=5):
    Return the nearest neighbour sentences to query
    text: list of sentences
    vectors: the corresponding representations for text
    query: a string to search
    qf = encode(model, [query])
    qf /= norm(qf)
    scores =, vectors.T).flatten()
    sorted_args = numpy.argsort(scores)[::-1]
    sentences = [text[a] for a in sorted_args[:k]]
    print('QUERY: ' + query)
    print('NEAREST: ')
    for i, s in enumerate(sentences):
        print(s, sorted_args[i])
def _step(self, x_, m_, h_, c_):
    preact =, self.U) + x_

    i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim))
    f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias)
    o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim))
    j = tensor.tanh(_slice(preact, 3, self.hidden_dim))

    c = f * c_ + i * j
    c = m_[:, None] * c + (1. - m_)[:, None] * c_

    h = o * tensor.tanh(c)
    if self.recurrent_dropout_layer != None:
      h = self.recurrent_dropout_layer.connect(h, self.is_train)
    h = m_[:, None] * h + (1. - m_)[:, None] * h_

    return h, c
def connect(self, inputs, mask, is_train):
    """ is_train: A boolean tensor.
    max_length = inputs.shape[0]
    batch_size = inputs.shape[1]
    outputs_info = [tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim),
            tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim)]
    # Dropout mask sharing for variational dropout.
    self.is_train = is_train
    if self.recurrent_dropout_layer != None:
      self.recurrent_dropout_layer.generate_mask([batch_size, self.hidden_dim], is_train)

    inputs =, self.W) + self.b
    rval, _ = theano.scan(self._step, # Scan function
                sequences=[inputs, mask], # Input sequence
                name=_p(self.prefix, '_layers'),
                n_steps=max_length) # scan steps
    return rval[0]
def _step(self, x_, m_, h_, c_):
    preact=, self.U) + _slice(x_, 0, self.hidden_dim * 5)
    # i: input. f: forget. o: output. t: transform.
    # j: input w\ non-linearity. k: input w\o non-linearity.
    i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim))
    f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias)
    o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim))
    t = tensor.nnet.sigmoid(_slice(preact, 3, self.hidden_dim))
    j = tensor.tanh(_slice(preact, 4, self.hidden_dim))
    k = _slice(x_, 5, self.hidden_dim)

    c = f * c_ + i * j
    c = m_[:, None] * c + (1. - m_)[:, None] * c_

    h = t * o * tensor.tanh(c) + (1. - t) * k
    if self.recurrent_dropout_layer != None:
      h = self.recurrent_dropout_layer.connect(h, self.is_train)
    h = m_[:, None] * h + (1. - m_)[:, None] * h_

    return h, c
def connect(self, inputs, mask, is_train):
    max_length = inputs.shape[0]
    batch_size = inputs.shape[1]
    outputs_info = [tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim),
            tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim)]

    # Dropout layers
    self.is_train = is_train
    if self.recurrent_dropout_layer != None:
      self.recurrent_dropout_layer.generate_mask([batch_size, self.hidden_dim], is_train)

    proj_inputs =, self.W) + self.b
    rval, _ = theano.scan(self._step, # Scan function
                sequences=[inputs, proj_inputs, mask], # Input sequence
                name=_p(self.prefix, '_layers'),
                n_steps=max_length) # scan steps
    return rval[0]
def _step(self, x_, px_, m_, h_, c_):  
    preact =, self.U) + px_
    i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim))
    f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias)
    o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim))
    j = tensor.tanh(_slice(preact, 3, self.hidden_dim))

    c = f * c_ + i * j
    c = m_[:, None] * c + (1. - m_)[:, None] * c_

    # Residual connection.
    h = o * tensor.tanh(c) + x_
    if self.recurrent_dropout_layer != None:
      h = self.recurrent_dropout_layer.connect(h, self.is_train)
    h = m_[:, None] * h + (1. - m_)[:, None] * h_
    return h, c
def nn(model, text, vectors, query, k=5):
    Return the nearest neighbour sentences to query
    text: list of sentences
    vectors: the corresponding representations for text
    query: a string to search
    qf = encode(model, [query])
    qf /= norm(qf)
    scores =, vectors.T).flatten()
    sorted_args = numpy.argsort(scores)[::-1]
    sentences = [text[a] for a in sorted_args[:k]]
    print 'QUERY: ' + query
    print 'NEAREST: '
    for i, s in enumerate(sentences):
        print s, sorted_args[i]
def dot(inp, matrix, bias=None):
    Decide the right type of dot product depending on the input
    if 'int' in inp.dtype and inp.ndim == 2:
        return matrix[inp.flatten()]
    elif 'int' in inp.dtype:
        return matrix[inp]
    elif 'float' in inp.dtype and inp.ndim == 3:
        shape0 = inp.shape[0]
        shape1 = inp.shape[1]
        shape2 = inp.shape[2]
        if bias:
            return ( * shape1, shape2)), matrix) + bias).reshape((shape0, shape1, matrix.shape[1]))
            return * shape1, shape2)), matrix).reshape((shape0, shape1, matrix.shape[1]))
        if bias:
            return, matrix) + bias
            return, matrix)

# Numerically stable log(sum(exp(A))). Can also be used in softmax function.
项目:dnc-theano    作者:khaotik    | 项目源码 | 文件源码
def op_ortho_loss(s_x_, axes_=(-2, -1), ndim_=None):
    orthogoal matrix loss
    used to regularize parameter to unitary

        s_x_: (batch of) matrices
        axes_: tuple of two integers, specify which axes to be for matrix,
            defaults to last two axes
        ndim_: specify args to be (ndim_ x ndim_) matrices

    if ndim_ is None:
        ax = axes_[0]
        ndim = T.shape(s_x_)[ax]
        ndim = ndim_

    tpat = list(range(ndim))
    bpat = ['x'] * s_x_.ndim
    tpat[axes_[0]], tpat[axes_[1]] = tpat[axes_[1]], tpat[axes_[0]]
    bpat[axes_[0]] = 0
    bpat[axes_[1]] = 1
    s_y =*tpat), s_x_)
    return T.sqr(s_y - T.eye(ndim).dimshuffle(*bpat))
def op_covmat(s_x_, l1_normize_=True, eps_=1e-7):
    Return covariance matrix given a batch of data points

            batch of row vectors
            Defatuls to True.
            Make covariance matrix is L1 normalized wrt number of data points.
            Adds a small identity matrix I*eps_ to result, this is applied after
            L1 - normalization

    assert s_x_.ndim == 2
    s_mean = s_x_ - T.mean(s_x_, axis=0, keepdims=True)
    s_shp = T.shape(s_x_)
    s_covmat =, s_mean)
    if l1_normize_:
        s_covmat /= s_shp[0]
    return s_covmat + T.eye(s_shp[1]) * eps_
def lyr_linear(
        self, name_,
        idim_, odim_,
        init_=None, bias_=0.,
        dense matrix multiplication, optionally adding a bias vector
        name_W = name_+'_w'
        name_B = name_+'_b'
        if init_ is None:
            init_ = [1.4/sqrt(idim_+odim_)]
        with self.get_group(params_group_):
            v_W = self.get_variable(name_W, (idim_,odim_), init_=init_)
        if bias_ is None:
            s_ret =, v_W)
            with self.get_group(params_group_):
                v_B = self.get_variable(name_B, (odim_,), bias_)
            s_ret =, v_W) + v_B
        return s_ret
def sample(self, x, K):
        if x.ndim == 1:
            x = x.reshape(1, x.shape[0])
        hn = self.encode(x)
        W = self.params[0]
        ww =, W)
        samples = []
        for _ in range(K):
            s = hn * (1. - hn)
            jj = ww * s.dimshuffle(0, 'x', 1) * s.dimshuffle(0, 1, 'x')
            alpha = self.srng.normal(size=hn.shape,

            delta = (alpha.dimshuffle(0, 1, 'x')*jj).sum(1)

            zn = self.decode(hn + delta)
            hn = self.encode(zn)
            # zn2 = self.decode(hn)
        return samples
def gru_layer(tparams, emb, options):
    hiddenDimSize = options['hiddenDimSize']
    timesteps = emb.shape[0]
    if emb.ndim == 3: n_samples = emb.shape[1]
    else: n_samples = 1

    def stepFn(wx, h, U_gru):
        uh =, U_gru)
        r = T.nnet.sigmoid(_slice(wx, 0, hiddenDimSize) + _slice(uh, 0, hiddenDimSize))
        z = T.nnet.sigmoid(_slice(wx, 1, hiddenDimSize) + _slice(uh, 1, hiddenDimSize))
        h_tilde = T.tanh(_slice(wx, 2, hiddenDimSize) + r * _slice(uh, 2, hiddenDimSize))
        h_new = z * h + ((1. - z) * h_tilde)
        return h_new

    Wx =, tparams['W_gru']) + tparams['b_gru']
    results, updates = theano.scan(fn=stepFn, sequences=[Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), non_sequences=[tparams['U_gru']], name='gru_layer', n_steps=timesteps)

    return results
def one_step(self, x, h_tm1, s_tm1):
        Run the forward pass for a single timestep of a LSTM
        h_tm1: initial h
        s_tm1: initial s  (cell state)

        g = T.tanh(, self.W_gx) +, self.W_gh) + self.b_g)
        i = T.nnet.sigmoid(, self.W_ix) +, self.W_ih) + self.b_i)
        f = T.nnet.sigmoid(, self.W_fx) +, self.W_fh) + self.b_f)
        o = T.nnet.sigmoid(, self.W_ox) +, self.W_oh) + self.b_o)

        s = i * g + s_tm1 * f
        h = T.tanh(s) * o

        return h, s
def test_output(self, x):
        d_0 = 1.0 - self.d_p_0
        d_1 = 1.0 - self.d_p_1

        tl_raw = * d_0, self.W_tl)
        hl_raw = * d_0, self.W_hl)
        tl = (tl_raw - self.Mean_tl) / (self.Std_tl + self.epsilon)
        hl = (hl_raw - self.Mean_hl) / (self.Std_hl + self.epsilon)

        tr_raw = (tl * d_1).dot(self.W_tr) + (x * d_0 * self.D_h)
        hr_raw = (hl * d_1).dot(self.W_hr) + (x * d_0 * self.D_t)
        tr = (tr_raw - self.Mean_tr) / (self.Std_tr + self.epsilon)
        hr = (hr_raw - self.Mean_hr) / (self.Std_hr + self.epsilon)

        t  = T.nnet.sigmoid(tr * self.S_t + self.B_t)
        h  = self._act(hr * self.S_h + self.B_h)
        rv = h * t + x * (1 - t)

        return rv
def test_output(self, x):
        d_0 = 1.0 - self.d_p_0
        d_1 = 1.0 - self.d_p_1

        tl_raw = * d_0, self.W_tl)
        hl_raw = * d_0, self.W_hl)
        tl = (tl_raw - self.Mean_tl) / (self.Std_tl + self.epsilon)
        hl = (hl_raw - self.Mean_hl) / (self.Std_hl + self.epsilon)

        tr_raw = (tl * d_1).dot(self.W_tr)
        hr_raw = (hl * d_1).dot(self.W_hr)
        tr = (tr_raw - self.Mean_tr) / (self.Std_tr + self.epsilon)
        hr = (hr_raw - self.Mean_hr) / (self.Std_hr + self.epsilon)

        t  = T.nnet.sigmoid(tr * self.S_t + self.B_t)
        h  = self._act(hr * self.S_h + self.B_h)
        rv = h * t + x * (1 - t)

        return rv
def step_call(self, x, *params):
        # Used within scan with `get_params`
        params = list(params)

        for l in xrange(self.n_layers):
            W = params.pop(0)
            b = params.pop(0)

            if l == self.n_layers - 1:
                x =, W) + b
                activ = self.h_act
                x = eval(activ)(, W) + b)

        assert len(params) == 2, params
        return x
def step_free_energy(self, x, beta, *params):
        '''Step free energy function.

            x (T.tensor): data sample.
            beta (float): beta value for annealing.
            *params: theano shared variables.

            T.tensor: free energy.

        W, v_params, h_params = self.split_params(*params)

        vis_term = beta * self.v_dist.get_energy_bias(x, *v_params)
        x = self.v_dist.scale_for_energy_model(x, *v_params)
        hid_act = beta * (, W) + self.h_dist.get_center(*h_params))
        fe = -vis_term - T.log(1. + T.exp(hid_act)).sum(axis=1)
        return fe
def step_free_energy_h(self, h, beta, *params):
        '''Step free energy function for hidden states.

            h (T.tensor): hidden sample.
            beta (float): beta value for annealing.
            *params: theano shared variables.

            T.tensor: free energy.

        W, v_params, h_params = self.split_params(*params)

        hid_term = beta * self.h_dist.get_energy_bias(h, *h_params)
        h = self.h_dist.scale_for_energy_model(h, *h_params)
        vis_act = beta * (, W.T) + self.v_dist.get_center(*v_params))
        fe = -hid_term - T.log(1. + T.exp(vis_act)).sum(axis=1)
        return fe
def _step(self, m, y, h_, Ur):
        '''Step function for RNN call.

            m (T.tensor): masks.
            y (T.tensor): inputs.
            h_ (T.tensor): recurrent state.
            Ur (theano.shared): recurrent connection.

            T.tensor: next recurrent state.

        preact =, Ur) + y
        h      = T.tanh(preact)
        h      = m * h + (1 - m) * h_
        return h
def _generate_train_model_function(self, scores):
       u = T.lvector('u')
       i = T.lvector('i')
       j = T.lvector('j')
       self.W = theano.shared(numpy.zeros((self._dim)).astype('float32'), name='W');
       self.S = theano.shared(scores, name='S');
       x_ui  =, self.S[u,i,:].T);
       x_uj  =, self.S[u,j,:].T);
       x_uij = x_ui - x_uj;
       obj = T.sum(
               T.log(T.nnet.sigmoid(x_uij)).sum() - \
               self._lambda_w * 0.5 * (self.W ** 2).sum()
       cost = -obj
       g_cost_W = T.grad(cost=cost, wrt=self.W)
       updates = [
               (self.W, self.W - self._learning_rate * g_cost_W)
       self.train_model = theano.function(inputs=[u,i,j], outputs=cost, updates=updates);
def svgd_gradient(X0):

    hidden, _, mse = discrim(X0)
    grad = -1.0 * T.grad( mse.sum(), X0)

    kxy, neighbors, h = rbf_kernel(hidden)  #TODO

    coff = T.exp( - T.sum((hidden[neighbors] - hidden)**2, axis=1) / h**2 / 2.0 )
    v = coff.dimshuffle(0, 'x') * (-hidden[neighbors] + hidden) / h**2

    X1 = X0[neighbors]
    hidden1, _, _ = discrim(X1)
    dxkxy = T.Lop(hidden1, X1, v)

    #svgd_grad = (, T.flatten(grad, 2)).reshape(dxkxy.shape) + dxkxy) / T.sum(kxy, axis=1).dimshuffle(0, 'x', 'x', 'x')
    svgd_grad = grad + dxkxy / 2.
    return grad, svgd_grad, dxkxy
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def svgd_gradient(X0):

    hidden, _, mse = discrim(X0)
    grad = -1.0 * T.grad( mse.sum(), X0)

    kxy, neighbors, h = rbf_kernel(hidden)  #TODO

    coff = T.exp( - T.sum((hidden[neighbors] - hidden)**2, axis=1) / h**2 / 2.0 )
    v = coff.dimshuffle(0, 'x') * (-hidden[neighbors] + hidden) / h**2

    X1 = X0[neighbors]
    hidden1, _, _ = discrim(X1)
    dxkxy = T.Lop(hidden1, X1, v)

    #svgd_grad = (, T.flatten(grad, 2)).reshape(dxkxy.shape) + dxkxy) / T.sum(kxy, axis=1).dimshuffle(0, 'x', 'x', 'x')
    svgd_grad = grad + dxkxy / 2.
    return grad, svgd_grad, dxkxy
def gen_test(_z, _params, _batchnorm, n_layers=3, n_f=128, init_sz=4, nc=3, use_tanh=False):
    if use_tanh:
        _z= tanh(_z)
    [gw0, gg0, gb0] = _params[0:3]
    hs = []
    u = _batchnorm[0]
    s = _batchnorm[n_layers + 1]
    h0 = relu(batchnorm(, -1.0, 1.0), gw0), u=u, s=s, g=gg0, b=gb0))
    h1 = h0.reshape((h0.shape[0], n_f * 2 ** n_layers, init_sz, init_sz))
    hs.extend([h0, h1])
    for n in range(n_layers):
        [w, g, b] = _params[3 * (n + 1):3 * (n + 2)]
        hin = hs[-1]
        u = _batchnorm[n + 1]
        s = _batchnorm[n + n_layers + 2]
        hout = relu(batchnorm(deconv(hin, w, subsample=(2, 2), border_mode=(2, 2)), u=u, s=s, g=g, b=b))
    x = deconv(hs[-1], _params[-1], subsample=(2, 2), border_mode=(2, 2))
    if nc == 3:
        x_f = tanh(x)
    if nc == 1:
        x_f = sigmoid(x)
    return x_f
def gen_batchnorm(_z, _params, n_layers=3, n_f=128, init_sz=4, nc=3):
    [gw0, gg0, gb0] = _params[0:3]
    hs = []
    h0_o =, gw0)
    output = [h0_o]
    h0 = relu(batchnorm(h0_o, g=gg0, b=gb0))
    h1 = h0.reshape((h0.shape[0], n_f * 2 ** n_layers, init_sz, init_sz))
    hs.extend([h0, h1])
    for n in range(n_layers):
        [w, g, b] = _params[3 * (n + 1):3 * (n + 2)]
        hin = hs[-1]
        h_o = deconv(hin, w, subsample=(2, 2), border_mode=(2, 2))
        hout = relu(batchnorm(h_o, g=g, b=b))

    if nc == 3:
        x = tanh(deconv(hs[-1], _params[-1], subsample=(2, 2), border_mode=(2, 2)))
    if nc == 1:
        x = sigmoid(deconv(hs[-1], _params[-1], subsample=(2, 2), border_mode=(2, 2)))

项目:iGAN    作者:junyanz    | 项目源码 | 文件源码
def predict_batchnorm(_x, _params, n_layers=3):
    w = _params[0]
    h0 = lrelu(dnn_conv(_x, w, subsample=(2, 2), border_mode=(2, 2)))
    hs = [h0]
    output = []
    for n in range(n_layers):
        hin = hs[-1]
        w, g, b = _params[1 + 3 * n:1 + 3 * (n + 1)]
        h_o = dnn_conv(hin, w, subsample=(2, 2), border_mode=(2, 2))
        hout = lrelu(batchnorm(h_o, g=g, b=b))
    h = T.flatten(hs[-1], 2)
    y = tanh(, _params[-1]))
    return y, output

    return y, output
def fullyconnected_layer(tparams, state_below, options, prefix, activ='lambda x: x', **kwargs):
    compute the forward pass for a fully connected layer

    tparams      : OrderedDict of theano shared variables, {parameter name: value}
    state_below  : theano 3d tensor, input data, dimensions: (num of time steps, batch size, dim of vector)
    options      : dictionary, {hyperparameter: value}
    prefix       : string, layer name
    activ        : string, activation function: 'liner', 'tanh', or 'rectifier'

                 : theano 3d tensor, output data, dimensions: (num of time steps, batch size, dim of vector)

    return eval(activ)(, tparams[p_name(prefix, 'W')]) + tparams[p_name(prefix, 'b')])
项目:gated_word_char_rlm    作者:nyu-dl    | 项目源码 | 文件源码
def gate_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs):
    compute the forward pass for a gate layer

    tparams        : OrderedDict of theano shared variables, {parameter name: value}
    X_word         : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector)
    X_char         : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector)
    options        : dictionary, {hyperparameter: value}
    prefix         : string, layer name
    pretrain_mode  : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char
    activ          : string, activation function: 'liner', 'tanh', or 'rectifier'

    X              : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector)

    # compute gating values, Eq.(3)
    G = tensor.nnet.sigmoid(, tparams[p_name(prefix, 'v')]) + tparams[p_name(prefix, 'b')][0])
    X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)),  
               ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char),
               G[:, :, None] * X_char + (1. - G)[:, :, None] * X_word)   
项目:gated_word_char_rlm    作者:nyu-dl    | 项目源码 | 文件源码
def concat_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs):
    compute the forward pass for a concat layer

    tparams        : OrderedDict of theano shared variables, {parameter name: value}
    X_word         : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector)
    X_char         : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector)
    options        : dictionary, {hyperparameter: value}
    prefix         : string,  layer name
    pretrain_mode  : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char
    activ          : string, activation function: 'liner', 'tanh', or 'rectifier'

    X              : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector)

    X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)),
               ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char),
     [X_word, X_char], axis=2), tparams[p_name(prefix, 'W')]) + tparams[p_name(prefix, 'b')]) 
    return eval(activ)(X)
def get_output_for(self, input, **kwargs):
        # if the input has more than two dimensions, flatten it into a
        # batch of feature vectors.
        input_reshape = input.flatten(2) if input.ndim > 2 else input

        activation =, self.W_h)
        if self.b_h is not None:
            activation = activation + self.b_h.dimshuffle('x', 0)
            activation = self.nonlinearity(activation)

        transform =, self.W_t)
        if self.b_t is not None:
            transform = transform + self.b_t.dimshuffle('x', 0)
            transform = nonlinearities.sigmoid(transform)

        carry = 1.0 - transform

        output = activation * transform + input_reshape * carry
        # reshape output back to orignal input_shape
        if input.ndim > 2:
            output = T.reshape(output, input.shape)

        return output
def dot(inp, matrix):
    Decide the right type of dot product depending on the input
    if 'int' in inp.dtype and inp.ndim==2:
        return matrix[inp.flatten()]
    elif 'int' in inp.dtype:
        return matrix[inp]
    elif 'float' in inp.dtype and inp.ndim == 3:
        shape0 = inp.shape[0]
        shape1 = inp.shape[1]
        shape2 = inp.shape[2]
        return*shape1, shape2)), matrix)
        return, matrix)
def dot(inp, matrix):
    Decide the right type of dot product depending on the input
    if 'int' in inp.dtype and inp.ndim==2:
        return matrix[inp.flatten()]
    elif 'int' in inp.dtype:
        return matrix[inp]
    elif 'float' in inp.dtype and inp.ndim == 3:
        shape0 = inp.shape[0]
        shape1 = inp.shape[1]
        shape2 = inp.shape[2]
        return*shape1, shape2)), matrix)
        return, matrix)
def gru_layer(tparams, emb, layerIndex, hiddenDimSize, mask=None):
    timesteps = emb.shape[0]
    if emb.ndim == 3: n_samples = emb.shape[1]
    else: n_samples = 1

    W_rx =, tparams['W_r_'+layerIndex])
    W_zx =, tparams['W_z_'+layerIndex])
    Wx =, tparams['W_'+layerIndex])

    def stepFn(stepMask, wrx, wzx, wx, h):
        r = T.nnet.sigmoid(wrx +, tparams['U_r_'+layerIndex]) + tparams['b_r_'+layerIndex])
        z = T.nnet.sigmoid(wzx +, tparams['U_z_'+layerIndex]) + tparams['b_z_'+layerIndex])
        h_tilde = T.tanh(wx +*h, tparams['U_'+layerIndex]) + tparams['b_'+layerIndex])
        h_new = z * h + ((1. - z) * h_tilde)
        h_new = stepMask[:, None] * h_new + (1. - stepMask)[:, None] * h
        return h_new#, output, time

    results, updates = theano.scan(fn=stepFn, sequences=[mask,W_rx,W_zx,Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), name='gru_layer'+layerIndex, n_steps=timesteps)

项目:doctorai    作者:mp2893    | 项目源码 | 文件源码
def gru_layer(tparams, emb, layerIndex, hiddenDimSize, mask=None):
    timesteps = emb.shape[0]
    if emb.ndim == 3: n_samples = emb.shape[1]
    else: n_samples = 1

    W_rx =, tparams['W_r_'+layerIndex])
    W_zx =, tparams['W_z_'+layerIndex])
    Wx =, tparams['W_'+layerIndex])

    def stepFn(stepMask, wrx, wzx, wx, h):
        r = T.nnet.sigmoid(wrx +, tparams['U_r_'+layerIndex]) + tparams['b_r_'+layerIndex])
        z = T.nnet.sigmoid(wzx +, tparams['U_z_'+layerIndex]) + tparams['b_z_'+layerIndex])
        h_tilde = T.tanh(wx +*h, tparams['U_'+layerIndex]) + tparams['b_'+layerIndex])
        h_new = z * h + ((1. - z) * h_tilde)
        h_new = stepMask[:, None] * h_new + (1. - stepMask)[:, None] * h
        return h_new

    results, updates = theano.scan(fn=stepFn, sequences=[mask,W_rx,W_zx,Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), name='gru_layer'+layerIndex, n_steps=timesteps)

项目:sesame-paste-noodle    作者:aissehust    | 项目源码 | 文件源码
def forward(self, inputtensor):
        inputimage = inputtensor[0]

        if self.dc == 0.0:
            if 0 <self.dc <=1:
                _srng = RandomStreams(np.random.randint(1, 2147462579))
                one = T.constant(1)
                retain_prob = one - self.dc
                mask_shape = self.w.shape
                mask = _srng.binomial(mask_shape, p=retain_prob,
                self.w = self.w * mask
                raise IndexError

        if self.need_bias:
            return ((, self.w)+self.b), )
            return (, self.w),)
def dot(inp, matrix, bias=None):
    Decide the right type of dot product depending on the input
    if 'int' in inp.dtype and inp.ndim == 2:
        return matrix[inp.flatten()]
    elif 'int' in inp.dtype:
        return matrix[inp]
    elif 'float' in inp.dtype and inp.ndim == 3:
        shape0 = inp.shape[0]
        shape1 = inp.shape[1]
        shape2 = inp.shape[2]
        if bias:
            return ( * shape1, shape2)), matrix) + bias).reshape((shape0, shape1, matrix.shape[1]))
            return * shape1, shape2)), matrix).reshape((shape0, shape1, matrix.shape[1]))
        if bias:
            return, matrix) + bias
            return, matrix)

项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def recurrent_as_activation_function(self, Wix, Uix, h_tm1, c_tm1, y_tm1):
        """ Implement the recurrent unit as an activation function. This function is called by self.__init__().

        :param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation
        :type Wix: matrix
        :param h_tm1: contains the hidden activation from previous time step
        :type h_tm1: matrix, each row means a hidden activation vector of a time step
        :param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM
        :returns: h_t is the hidden activation of current time step

        h_t = T.tanh(Wix +, self.W_hi) +, self.W_yi) + self.b_i)  #

        # simple recurrent decoder
        #y_t =, self.U_hi) + self.b

        # recurrent output and additional input
        y_t = Uix +, self.U_hi) +, self.U_yi) + self.b

        c_t = h_t

项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def recurrent_as_activation_function(self, Wix, Wiy, h_tm1, c_tm1):
        """ Implement the recurrent unit as an activation function. This function is called by self.__init__().

        :param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation
        :type Wix: matrix
        :param h_tm1: contains the hidden activation from previous time step
        :type h_tm1: matrix, each row means a hidden activation vector of a time step
        :param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM
        :returns: h_t is the hidden activation of current time step

        h_t = T.tanh(Wix +, self.W_hi) + Wiy + self.b_i)  #

        c_t = h_t

项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1, y_tm1):
        """ This function treats the LSTM block as an activation function, and implements the standard LSTM activation function.
            The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn`


        i_t = T.nnet.sigmoid(Wix +, self.W_hi) + self.w_ci * c_tm1 + self.b_i)  #
        f_t = T.nnet.sigmoid(Wfx +, self.W_hf) + self.w_cf * c_tm1 + self.b_f)  #

        c_t = f_t * c_tm1 + i_t * T.tanh(Wcx +, self.W_hc) +, self.W_yi) + self.b_c)

        o_t = T.nnet.sigmoid(Wox +, self.W_ho) + self.w_co * c_t + self.b_o)

        h_t = o_t * T.tanh(c_t)

        y_t =, self.U_ho) + self.b

项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1):
        """ This function treats the LSTM block as an activation function, and implements the standard LSTM activation function.
            The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn`


        i_t = T.nnet.sigmoid(Wix +, self.W_hi) + self.w_ci * c_tm1 + self.b_i)  #
        f_t = T.nnet.sigmoid(Wfx +, self.W_hf) + self.w_cf * c_tm1 + self.b_f)  #

        c_t = f_t * c_tm1 + i_t * T.tanh(Wcx +, self.W_hc) + self.b_c)

        o_t = T.nnet.sigmoid(Wox +, self.W_ho) + self.w_co * c_t + self.b_o)

        h_t = o_t * T.tanh(c_t)

项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1):
        """ This function treats the LSTM block as an activation function, and implements the standard LSTM activation function.
            The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn`


        i_t = T.nnet.sigmoid(Wix +, self.W_hi) + self.w_ci * c_tm1 + self.b_i)  #
        f_t = T.nnet.sigmoid(Wfx +, self.W_hf) + self.w_cf * c_tm1 + self.b_f)  # 

        c_t = f_t * c_tm1 + i_t * T.tanh(Wcx +, self.W_hc) + self.b_c)

        o_t = T.nnet.sigmoid(Wox +, self.W_ho) + self.w_co * c_t + self.b_o)

        h_t = o_t * T.tanh(c_t)

项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def recurrent_as_activation_function(self, Wix, h_tm1, c_tm1, y_tm1):
        """ Implement the recurrent unit as an activation function. This function is called by self.__init__().

        :param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation
        :type Wix: matrix
        :param h_tm1: contains the hidden activation from previous time step
        :type h_tm1: matrix, each row means a hidden activation vector of a time step
        :param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM
        :returns: h_t is the hidden activation of current time step

        h_t = T.tanh(Wix +, self.W_hi) +, self.W_yi) + self.b_i)  #

        y_t =, self.U_hi) + self.b

        c_t = h_t

项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1):
        """ This function treats the LSTM block as an activation function, and implements the standard LSTM activation function.
            The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn`


        i_t = T.nnet.sigmoid(Wix +, self.W_hi) + self.w_ci * c_tm1 + self.b_i)  #
        f_t = T.nnet.sigmoid(Wfx +, self.W_hf) + self.w_cf * c_tm1 + self.b_f)  #

        c_t = f_t * c_tm1 + i_t * T.tanh(Wcx +, self.W_hc) + self.b_c)

        o_t = T.nnet.sigmoid(Wox +, self.W_ho) + self.w_co * c_t + self.b_o)

        h_t = o_t * T.tanh(c_t)

项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1, y_tm1):
        """ This function treats the LSTM block as an activation function, and implements the standard LSTM activation function.
            The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn`


        i_t = T.nnet.sigmoid(Wix +, self.W_hi) + self.w_ci * c_tm1 + self.b_i)  #
        f_t = T.nnet.sigmoid(Wfx +, self.W_hf) + self.w_cf * c_tm1 + self.b_f)  #

        c_t = f_t * c_tm1 + i_t * T.tanh(Wcx +, self.W_hc) +, self.W_yi) + self.b_c)

        o_t = T.nnet.sigmoid(Wox +, self.W_ho) + self.w_co * c_t + self.b_o)

        h_t = o_t * T.tanh(c_t)

        y_t =, self.U_ho) + self.b

项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1):
        """ This function treats the LSTM block as an activation function, and implements the LSTM (without the output gate) activation function.
            The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn`


        i_t = T.nnet.sigmoid(Wix +, self.W_hi) + self.b_i)
        f_t = T.nnet.sigmoid(Wfx +, self.W_hf) + self.b_f)

        c_t = f_t * c_tm1 + i_t * T.tanh(Wcx +, self.W_hc) + self.b_c)

        o_t = T.nnet.sigmoid(Wox +, self.W_ho) + self.b_o)

        h_t = o_t * T.tanh(c_t)

        return h_t, c_t