我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.dot()。
def rbf_kernel(X0): XY = T.dot(X0, X0.transpose()) x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1)) X2e = T.repeat(x2, X0.shape[0], axis=1) H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY) V = H.flatten() # median distance h = T.switch(T.eq((V.shape[0] % 2), 0), # if even vector T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]), # if odd vector T.sort(V)[V.shape[0] // 2]) h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2. Kxy = T.exp(-H / h ** 2 / 2.0) neighbors = T.argsort(H, axis=1)[:, 1] return Kxy, neighbors, h
def lyr_linear( self, name_, s_x_, idim_, odim_, init_=None, bias_=0., params_di_='params'): ''' dense matrix multiplication, optionally adding a bias vector ''' name_W = name_+'_w' name_B = name_+'_b' self.set_vars(params_di_) if init_ is None: init_ = dict(init_=[1.4/sqrt(idim_+odim_)]) v_W = self.get_variable(name_W, (idim_,odim_), **init_) if bias_ is None: s_ret = T.dot(s_x_, v_W) else: v_B = self.get_variable(name_B, (odim_,), bias_) s_ret = T.dot(s_x_, v_W) + v_B return s_ret
def get_output_for(self, input, init=False, deterministic=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.dot(input, self.W) if init: ma = T.mean(activation, axis=0) activation -= ma.dimshuffle('x',0) stdv = T.sqrt(T.mean(T.square(activation),axis=0)) activation /= stdv.dimshuffle('x',0) self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)] else: activation += self.b.dimshuffle('x', 0) return self.nonlinearity(activation)
def nn(model, text, vectors, query, k=5): """ Return the nearest neighbour sentences to query text: list of sentences vectors: the corresponding representations for text query: a string to search """ qf = encode(model, [query]) qf /= norm(qf) scores = numpy.dot(qf, vectors.T).flatten() sorted_args = numpy.argsort(scores)[::-1] sentences = [text[a] for a in sorted_args[:k]] print('QUERY: ' + query) print('NEAREST: ') for i, s in enumerate(sentences): print(s, sorted_args[i])
def _step(self, x_, m_, h_, c_): preact = tensor.dot(h_, self.U) + x_ i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim)) f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias) o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim)) j = tensor.tanh(_slice(preact, 3, self.hidden_dim)) c = f * c_ + i * j c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = o * tensor.tanh(c) if self.recurrent_dropout_layer != None: h = self.recurrent_dropout_layer.connect(h, self.is_train) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c
def connect(self, inputs, mask, is_train): """ is_train: A boolean tensor. """ max_length = inputs.shape[0] batch_size = inputs.shape[1] outputs_info = [tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim), tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim)] # Dropout mask sharing for variational dropout. self.is_train = is_train if self.recurrent_dropout_layer != None: self.recurrent_dropout_layer.generate_mask([batch_size, self.hidden_dim], is_train) inputs = tensor.dot(inputs, self.W) + self.b rval, _ = theano.scan(self._step, # Scan function sequences=[inputs, mask], # Input sequence outputs_info=outputs_info, name=_p(self.prefix, '_layers'), n_steps=max_length) # scan steps return rval[0]
def _step(self, x_, m_, h_, c_): preact= tensor.dot(h_, self.U) + _slice(x_, 0, self.hidden_dim * 5) # i: input. f: forget. o: output. t: transform. # j: input w\ non-linearity. k: input w\o non-linearity. i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim)) f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias) o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim)) t = tensor.nnet.sigmoid(_slice(preact, 3, self.hidden_dim)) j = tensor.tanh(_slice(preact, 4, self.hidden_dim)) k = _slice(x_, 5, self.hidden_dim) c = f * c_ + i * j c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = t * o * tensor.tanh(c) + (1. - t) * k if self.recurrent_dropout_layer != None: h = self.recurrent_dropout_layer.connect(h, self.is_train) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c
def connect(self, inputs, mask, is_train): max_length = inputs.shape[0] batch_size = inputs.shape[1] outputs_info = [tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim), tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim)] # Dropout layers self.is_train = is_train if self.recurrent_dropout_layer != None: self.recurrent_dropout_layer.generate_mask([batch_size, self.hidden_dim], is_train) proj_inputs = tensor.dot(inputs, self.W) + self.b rval, _ = theano.scan(self._step, # Scan function sequences=[inputs, proj_inputs, mask], # Input sequence outputs_info=outputs_info, name=_p(self.prefix, '_layers'), n_steps=max_length) # scan steps return rval[0]
def _step(self, x_, px_, m_, h_, c_): preact = tensor.dot(h_, self.U) + px_ i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim)) f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias) o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim)) j = tensor.tanh(_slice(preact, 3, self.hidden_dim)) c = f * c_ + i * j c = m_[:, None] * c + (1. - m_)[:, None] * c_ # Residual connection. h = o * tensor.tanh(c) + x_ if self.recurrent_dropout_layer != None: h = self.recurrent_dropout_layer.connect(h, self.is_train) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c
def nn(model, text, vectors, query, k=5): """ Return the nearest neighbour sentences to query text: list of sentences vectors: the corresponding representations for text query: a string to search """ qf = encode(model, [query]) qf /= norm(qf) scores = numpy.dot(qf, vectors.T).flatten() sorted_args = numpy.argsort(scores)[::-1] sentences = [text[a] for a in sorted_args[:k]] print 'QUERY: ' + query print 'NEAREST: ' for i, s in enumerate(sentences): print s, sorted_args[i]
def dot(inp, matrix, bias=None): """ Decide the right type of dot product depending on the input arguments """ if 'int' in inp.dtype and inp.ndim == 2: return matrix[inp.flatten()] elif 'int' in inp.dtype: return matrix[inp] elif 'float' in inp.dtype and inp.ndim == 3: shape0 = inp.shape[0] shape1 = inp.shape[1] shape2 = inp.shape[2] if bias: return (T.dot(inp.reshape((shape0 * shape1, shape2)), matrix) + bias).reshape((shape0, shape1, matrix.shape[1])) else: return T.dot(inp.reshape((shape0 * shape1, shape2)), matrix).reshape((shape0, shape1, matrix.shape[1])) else: if bias: return T.dot(inp, matrix) + bias else: return T.dot(inp, matrix) # Numerically stable log(sum(exp(A))). Can also be used in softmax function.
def op_ortho_loss(s_x_, axes_=(-2, -1), ndim_=None): ''' orthogoal matrix loss used to regularize parameter to unitary Args: s_x_: (batch of) matrices axes_: tuple of two integers, specify which axes to be for matrix, defaults to last two axes ndim_: specify args to be (ndim_ x ndim_) matrices ''' if ndim_ is None: ax = axes_[0] ndim = T.shape(s_x_)[ax] else: ndim = ndim_ tpat = list(range(ndim)) bpat = ['x'] * s_x_.ndim tpat[axes_[0]], tpat[axes_[1]] = tpat[axes_[1]], tpat[axes_[0]] bpat[axes_[0]] = 0 bpat[axes_[1]] = 1 s_y = T.dot(s_x_.transpose(*tpat), s_x_) return T.sqr(s_y - T.eye(ndim).dimshuffle(*bpat))
def op_covmat(s_x_, l1_normize_=True, eps_=1e-7): ''' Return covariance matrix given a batch of data points Args: s_x_: batch of row vectors l1_normize_: Defatuls to True. Make covariance matrix is L1 normalized wrt number of data points. eps_: Adds a small identity matrix I*eps_ to result, this is applied after L1 - normalization ''' assert s_x_.ndim == 2 s_mean = s_x_ - T.mean(s_x_, axis=0, keepdims=True) s_shp = T.shape(s_x_) s_covmat = T.dot(s_mean.T, s_mean) if l1_normize_: s_covmat /= s_shp[0] return s_covmat + T.eye(s_shp[1]) * eps_
def lyr_linear( self, name_, s_x_, idim_, odim_, init_=None, bias_=0., params_group_='params' ): ''' dense matrix multiplication, optionally adding a bias vector ''' name_W = name_+'_w' name_B = name_+'_b' if init_ is None: init_ = [1.4/sqrt(idim_+odim_)] with self.get_group(params_group_): v_W = self.get_variable(name_W, (idim_,odim_), init_=init_) if bias_ is None: s_ret = T.dot(s_x_, v_W) else: with self.get_group(params_group_): v_B = self.get_variable(name_B, (odim_,), bias_) s_ret = T.dot(s_x_, v_W) + v_B return s_ret
def sample(self, x, K): if x.ndim == 1: x = x.reshape(1, x.shape[0]) hn = self.encode(x) W = self.params[0] ww = T.dot(W.T, W) samples = [] for _ in range(K): s = hn * (1. - hn) jj = ww * s.dimshuffle(0, 'x', 1) * s.dimshuffle(0, 1, 'x') alpha = self.srng.normal(size=hn.shape, avg=0., std=self.sigma, dtype=theano.config.floatX) delta = (alpha.dimshuffle(0, 1, 'x')*jj).sum(1) zn = self.decode(hn + delta) hn = self.encode(zn) # zn2 = self.decode(hn) samples.append(zn.eval()) return samples
def gru_layer(tparams, emb, options): hiddenDimSize = options['hiddenDimSize'] timesteps = emb.shape[0] if emb.ndim == 3: n_samples = emb.shape[1] else: n_samples = 1 def stepFn(wx, h, U_gru): uh = T.dot(h, U_gru) r = T.nnet.sigmoid(_slice(wx, 0, hiddenDimSize) + _slice(uh, 0, hiddenDimSize)) z = T.nnet.sigmoid(_slice(wx, 1, hiddenDimSize) + _slice(uh, 1, hiddenDimSize)) h_tilde = T.tanh(_slice(wx, 2, hiddenDimSize) + r * _slice(uh, 2, hiddenDimSize)) h_new = z * h + ((1. - z) * h_tilde) return h_new Wx = T.dot(emb, tparams['W_gru']) + tparams['b_gru'] results, updates = theano.scan(fn=stepFn, sequences=[Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), non_sequences=[tparams['U_gru']], name='gru_layer', n_steps=timesteps) return results
def one_step(self, x, h_tm1, s_tm1): """ Run the forward pass for a single timestep of a LSTM h_tm1: initial h s_tm1: initial s (cell state) """ g = T.tanh(T.dot(x, self.W_gx) + T.dot(h_tm1, self.W_gh) + self.b_g) i = T.nnet.sigmoid(T.dot(x, self.W_ix) + T.dot(h_tm1, self.W_ih) + self.b_i) f = T.nnet.sigmoid(T.dot(x, self.W_fx) + T.dot(h_tm1, self.W_fh) + self.b_f) o = T.nnet.sigmoid(T.dot(x, self.W_ox) + T.dot(h_tm1, self.W_oh) + self.b_o) s = i * g + s_tm1 * f h = T.tanh(s) * o return h, s
def test_output(self, x): d_0 = 1.0 - self.d_p_0 d_1 = 1.0 - self.d_p_1 tl_raw = T.dot(x * d_0, self.W_tl) hl_raw = T.dot(x * d_0, self.W_hl) tl = (tl_raw - self.Mean_tl) / (self.Std_tl + self.epsilon) hl = (hl_raw - self.Mean_hl) / (self.Std_hl + self.epsilon) tr_raw = (tl * d_1).dot(self.W_tr) + (x * d_0 * self.D_h) hr_raw = (hl * d_1).dot(self.W_hr) + (x * d_0 * self.D_t) tr = (tr_raw - self.Mean_tr) / (self.Std_tr + self.epsilon) hr = (hr_raw - self.Mean_hr) / (self.Std_hr + self.epsilon) t = T.nnet.sigmoid(tr * self.S_t + self.B_t) h = self._act(hr * self.S_h + self.B_h) rv = h * t + x * (1 - t) return rv
def test_output(self, x): d_0 = 1.0 - self.d_p_0 d_1 = 1.0 - self.d_p_1 tl_raw = T.dot(x * d_0, self.W_tl) hl_raw = T.dot(x * d_0, self.W_hl) tl = (tl_raw - self.Mean_tl) / (self.Std_tl + self.epsilon) hl = (hl_raw - self.Mean_hl) / (self.Std_hl + self.epsilon) tr_raw = (tl * d_1).dot(self.W_tr) hr_raw = (hl * d_1).dot(self.W_hr) tr = (tr_raw - self.Mean_tr) / (self.Std_tr + self.epsilon) hr = (hr_raw - self.Mean_hr) / (self.Std_hr + self.epsilon) t = T.nnet.sigmoid(tr * self.S_t + self.B_t) h = self._act(hr * self.S_h + self.B_h) rv = h * t + x * (1 - t) return rv
def step_call(self, x, *params): # Used within scan with `get_params` params = list(params) for l in xrange(self.n_layers): W = params.pop(0) b = params.pop(0) if l == self.n_layers - 1: x = T.dot(x, W) + b else: activ = self.h_act x = eval(activ)(T.dot(x, W) + b) assert len(params) == 2, params return x
def step_free_energy(self, x, beta, *params): '''Step free energy function. Args: x (T.tensor): data sample. beta (float): beta value for annealing. *params: theano shared variables. Returns: T.tensor: free energy. ''' W, v_params, h_params = self.split_params(*params) vis_term = beta * self.v_dist.get_energy_bias(x, *v_params) x = self.v_dist.scale_for_energy_model(x, *v_params) hid_act = beta * (T.dot(x, W) + self.h_dist.get_center(*h_params)) fe = -vis_term - T.log(1. + T.exp(hid_act)).sum(axis=1) return fe
def step_free_energy_h(self, h, beta, *params): '''Step free energy function for hidden states. Args: h (T.tensor): hidden sample. beta (float): beta value for annealing. *params: theano shared variables. Returns: T.tensor: free energy. ''' W, v_params, h_params = self.split_params(*params) hid_term = beta * self.h_dist.get_energy_bias(h, *h_params) h = self.h_dist.scale_for_energy_model(h, *h_params) vis_act = beta * (T.dot(h, W.T) + self.v_dist.get_center(*v_params)) fe = -hid_term - T.log(1. + T.exp(vis_act)).sum(axis=1) return fe
def _step(self, m, y, h_, Ur): '''Step function for RNN call. Args: m (T.tensor): masks. y (T.tensor): inputs. h_ (T.tensor): recurrent state. Ur (theano.shared): recurrent connection. Returns: T.tensor: next recurrent state. ''' preact = T.dot(h_, Ur) + y h = T.tanh(preact) h = m * h + (1 - m) * h_ return h
def _generate_train_model_function(self, scores): u = T.lvector('u') i = T.lvector('i') j = T.lvector('j') self.W = theano.shared(numpy.zeros((self._dim)).astype('float32'), name='W'); self.S = theano.shared(scores, name='S'); x_ui = T.dot(self.W, self.S[u,i,:].T); x_uj = T.dot(self.W, self.S[u,j,:].T); x_uij = x_ui - x_uj; obj = T.sum( T.log(T.nnet.sigmoid(x_uij)).sum() - \ self._lambda_w * 0.5 * (self.W ** 2).sum() ) cost = -obj g_cost_W = T.grad(cost=cost, wrt=self.W) updates = [ (self.W, self.W - self._learning_rate * g_cost_W) ] self.train_model = theano.function(inputs=[u,i,j], outputs=cost, updates=updates);
def svgd_gradient(X0): hidden, _, mse = discrim(X0) grad = -1.0 * T.grad( mse.sum(), X0) kxy, neighbors, h = rbf_kernel(hidden) #TODO coff = T.exp( - T.sum((hidden[neighbors] - hidden)**2, axis=1) / h**2 / 2.0 ) v = coff.dimshuffle(0, 'x') * (-hidden[neighbors] + hidden) / h**2 X1 = X0[neighbors] hidden1, _, _ = discrim(X1) dxkxy = T.Lop(hidden1, X1, v) #svgd_grad = (T.dot(kxy, T.flatten(grad, 2)).reshape(dxkxy.shape) + dxkxy) / T.sum(kxy, axis=1).dimshuffle(0, 'x', 'x', 'x') svgd_grad = grad + dxkxy / 2. return grad, svgd_grad, dxkxy
def gen_test(_z, _params, _batchnorm, n_layers=3, n_f=128, init_sz=4, nc=3, use_tanh=False): if use_tanh: _z= tanh(_z) [gw0, gg0, gb0] = _params[0:3] hs = [] u = _batchnorm[0] s = _batchnorm[n_layers + 1] h0 = relu(batchnorm(T.dot(T.clip(_z, -1.0, 1.0), gw0), u=u, s=s, g=gg0, b=gb0)) h1 = h0.reshape((h0.shape[0], n_f * 2 ** n_layers, init_sz, init_sz)) hs.extend([h0, h1]) for n in range(n_layers): [w, g, b] = _params[3 * (n + 1):3 * (n + 2)] hin = hs[-1] u = _batchnorm[n + 1] s = _batchnorm[n + n_layers + 2] hout = relu(batchnorm(deconv(hin, w, subsample=(2, 2), border_mode=(2, 2)), u=u, s=s, g=g, b=b)) hs.append(hout) x = deconv(hs[-1], _params[-1], subsample=(2, 2), border_mode=(2, 2)) if nc == 3: x_f = tanh(x) if nc == 1: x_f = sigmoid(x) return x_f
def gen_batchnorm(_z, _params, n_layers=3, n_f=128, init_sz=4, nc=3): [gw0, gg0, gb0] = _params[0:3] hs = [] h0_o = T.dot(_z, gw0) output = [h0_o] h0 = relu(batchnorm(h0_o, g=gg0, b=gb0)) h1 = h0.reshape((h0.shape[0], n_f * 2 ** n_layers, init_sz, init_sz)) hs.extend([h0, h1]) for n in range(n_layers): [w, g, b] = _params[3 * (n + 1):3 * (n + 2)] hin = hs[-1] h_o = deconv(hin, w, subsample=(2, 2), border_mode=(2, 2)) hout = relu(batchnorm(h_o, g=g, b=b)) hs.append(hout) output.append(h_o) if nc == 3: x = tanh(deconv(hs[-1], _params[-1], subsample=(2, 2), border_mode=(2, 2))) if nc == 1: x = sigmoid(deconv(hs[-1], _params[-1], subsample=(2, 2), border_mode=(2, 2))) return x, output
def predict_batchnorm(_x, _params, n_layers=3): w = _params[0] h0 = lrelu(dnn_conv(_x, w, subsample=(2, 2), border_mode=(2, 2))) hs = [h0] output = [] for n in range(n_layers): hin = hs[-1] w, g, b = _params[1 + 3 * n:1 + 3 * (n + 1)] h_o = dnn_conv(hin, w, subsample=(2, 2), border_mode=(2, 2)) hout = lrelu(batchnorm(h_o, g=g, b=b)) hs.append(hout) output.append(h_o) h = T.flatten(hs[-1], 2) y = tanh(T.dot(h, _params[-1])) return y, output return y, output
def fullyconnected_layer(tparams, state_below, options, prefix, activ='lambda x: x', **kwargs): """ compute the forward pass for a fully connected layer Parameters ---------- tparams : OrderedDict of theano shared variables, {parameter name: value} state_below : theano 3d tensor, input data, dimensions: (num of time steps, batch size, dim of vector) options : dictionary, {hyperparameter: value} prefix : string, layer name activ : string, activation function: 'liner', 'tanh', or 'rectifier' Returns ------- : theano 3d tensor, output data, dimensions: (num of time steps, batch size, dim of vector) """ return eval(activ)(tensor.dot(state_below, tparams[p_name(prefix, 'W')]) + tparams[p_name(prefix, 'b')])
def gate_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs): """ compute the forward pass for a gate layer Parameters ---------- tparams : OrderedDict of theano shared variables, {parameter name: value} X_word : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector) X_char : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector) options : dictionary, {hyperparameter: value} prefix : string, layer name pretrain_mode : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char activ : string, activation function: 'liner', 'tanh', or 'rectifier' Returns ------- X : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector) """ # compute gating values, Eq.(3) G = tensor.nnet.sigmoid(tensor.dot(X_word, tparams[p_name(prefix, 'v')]) + tparams[p_name(prefix, 'b')][0]) X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)), ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char), G[:, :, None] * X_char + (1. - G)[:, :, None] * X_word) return eval(activ)(X)
def concat_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs): """ compute the forward pass for a concat layer Parameters ---------- tparams : OrderedDict of theano shared variables, {parameter name: value} X_word : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector) X_char : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector) options : dictionary, {hyperparameter: value} prefix : string, layer name pretrain_mode : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char activ : string, activation function: 'liner', 'tanh', or 'rectifier' Returns ------- X : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector) """ X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)), ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char), tensor.dot(tensor.concatenate([X_word, X_char], axis=2), tparams[p_name(prefix, 'W')]) + tparams[p_name(prefix, 'b')]) return eval(activ)(X)
def get_output_for(self, input, **kwargs): # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input_reshape = input.flatten(2) if input.ndim > 2 else input activation = T.dot(input_reshape, self.W_h) if self.b_h is not None: activation = activation + self.b_h.dimshuffle('x', 0) activation = self.nonlinearity(activation) transform = T.dot(input_reshape, self.W_t) if self.b_t is not None: transform = transform + self.b_t.dimshuffle('x', 0) transform = nonlinearities.sigmoid(transform) carry = 1.0 - transform output = activation * transform + input_reshape * carry # reshape output back to orignal input_shape if input.ndim > 2: output = T.reshape(output, input.shape) return output
def dot(inp, matrix): """ Decide the right type of dot product depending on the input arguments """ if 'int' in inp.dtype and inp.ndim==2: return matrix[inp.flatten()] elif 'int' in inp.dtype: return matrix[inp] elif 'float' in inp.dtype and inp.ndim == 3: shape0 = inp.shape[0] shape1 = inp.shape[1] shape2 = inp.shape[2] return TT.dot(inp.reshape((shape0*shape1, shape2)), matrix) else: return TT.dot(inp, matrix)
def gru_layer(tparams, emb, layerIndex, hiddenDimSize, mask=None): timesteps = emb.shape[0] if emb.ndim == 3: n_samples = emb.shape[1] else: n_samples = 1 W_rx = T.dot(emb, tparams['W_r_'+layerIndex]) W_zx = T.dot(emb, tparams['W_z_'+layerIndex]) Wx = T.dot(emb, tparams['W_'+layerIndex]) def stepFn(stepMask, wrx, wzx, wx, h): r = T.nnet.sigmoid(wrx + T.dot(h, tparams['U_r_'+layerIndex]) + tparams['b_r_'+layerIndex]) z = T.nnet.sigmoid(wzx + T.dot(h, tparams['U_z_'+layerIndex]) + tparams['b_z_'+layerIndex]) h_tilde = T.tanh(wx + T.dot(r*h, tparams['U_'+layerIndex]) + tparams['b_'+layerIndex]) h_new = z * h + ((1. - z) * h_tilde) h_new = stepMask[:, None] * h_new + (1. - stepMask)[:, None] * h return h_new#, output, time results, updates = theano.scan(fn=stepFn, sequences=[mask,W_rx,W_zx,Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), name='gru_layer'+layerIndex, n_steps=timesteps) return results
def gru_layer(tparams, emb, layerIndex, hiddenDimSize, mask=None): timesteps = emb.shape[0] if emb.ndim == 3: n_samples = emb.shape[1] else: n_samples = 1 W_rx = T.dot(emb, tparams['W_r_'+layerIndex]) W_zx = T.dot(emb, tparams['W_z_'+layerIndex]) Wx = T.dot(emb, tparams['W_'+layerIndex]) def stepFn(stepMask, wrx, wzx, wx, h): r = T.nnet.sigmoid(wrx + T.dot(h, tparams['U_r_'+layerIndex]) + tparams['b_r_'+layerIndex]) z = T.nnet.sigmoid(wzx + T.dot(h, tparams['U_z_'+layerIndex]) + tparams['b_z_'+layerIndex]) h_tilde = T.tanh(wx + T.dot(r*h, tparams['U_'+layerIndex]) + tparams['b_'+layerIndex]) h_new = z * h + ((1. - z) * h_tilde) h_new = stepMask[:, None] * h_new + (1. - stepMask)[:, None] * h return h_new results, updates = theano.scan(fn=stepFn, sequences=[mask,W_rx,W_zx,Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), name='gru_layer'+layerIndex, n_steps=timesteps) return results
def forward(self, inputtensor): inputimage = inputtensor[0] if self.dc == 0.0: pass else: if 0 <self.dc <=1: _srng = RandomStreams(np.random.randint(1, 2147462579)) one = T.constant(1) retain_prob = one - self.dc mask_shape = self.w.shape mask = _srng.binomial(mask_shape, p=retain_prob, dtype=self.w.dtype) self.w = self.w * mask else: raise IndexError if self.need_bias: return ((T.dot(inputimage, self.w)+self.b), ) else: return (T.dot(inputimage, self.w),)
def recurrent_as_activation_function(self, Wix, Uix, h_tm1, c_tm1, y_tm1): """ Implement the recurrent unit as an activation function. This function is called by self.__init__(). :param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation :type Wix: matrix :param h_tm1: contains the hidden activation from previous time step :type h_tm1: matrix, each row means a hidden activation vector of a time step :param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM :returns: h_t is the hidden activation of current time step """ h_t = T.tanh(Wix + T.dot(h_tm1, self.W_hi) + T.dot(y_tm1, self.W_yi) + self.b_i) # # simple recurrent decoder #y_t = T.dot(h_t, self.U_hi) + self.b # recurrent output and additional input y_t = Uix + T.dot(h_t, self.U_hi) + T.dot(y_tm1, self.U_yi) + self.b c_t = h_t return h_t, c_t, y_t
def recurrent_as_activation_function(self, Wix, Wiy, h_tm1, c_tm1): """ Implement the recurrent unit as an activation function. This function is called by self.__init__(). :param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation :type Wix: matrix :param h_tm1: contains the hidden activation from previous time step :type h_tm1: matrix, each row means a hidden activation vector of a time step :param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM :returns: h_t is the hidden activation of current time step """ h_t = T.tanh(Wix + T.dot(h_tm1, self.W_hi) + Wiy + self.b_i) # c_t = h_t return h_t, c_t
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1, y_tm1): """ This function treats the LSTM block as an activation function, and implements the standard LSTM activation function. The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` """ i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + self.w_ci * c_tm1 + self.b_i) # f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + self.w_cf * c_tm1 + self.b_f) # c_t = f_t * c_tm1 + i_t * T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + T.dot(y_tm1, self.W_yi) + self.b_c) o_t = T.nnet.sigmoid(Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o) h_t = o_t * T.tanh(c_t) y_t = T.dot(h_t, self.U_ho) + self.b return h_t, c_t, y_t #, i_t, f_t, o_t
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1): """ This function treats the LSTM block as an activation function, and implements the standard LSTM activation function. The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` """ i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + self.w_ci * c_tm1 + self.b_i) # f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + self.w_cf * c_tm1 + self.b_f) # c_t = f_t * c_tm1 + i_t * T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c) o_t = T.nnet.sigmoid(Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o) h_t = o_t * T.tanh(c_t) return h_t, c_t#, i_t, f_t, o_t
def recurrent_as_activation_function(self, Wix, h_tm1, c_tm1, y_tm1): """ Implement the recurrent unit as an activation function. This function is called by self.__init__(). :param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation :type Wix: matrix :param h_tm1: contains the hidden activation from previous time step :type h_tm1: matrix, each row means a hidden activation vector of a time step :param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM :returns: h_t is the hidden activation of current time step """ h_t = T.tanh(Wix + T.dot(h_tm1, self.W_hi) + T.dot(y_tm1, self.W_yi) + self.b_i) # y_t = T.dot(h_t, self.U_hi) + self.b c_t = h_t return h_t, c_t, y_t
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1): """ This function treats the LSTM block as an activation function, and implements the LSTM (without the output gate) activation function. The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` """ i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + self.b_i) f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + self.b_f) c_t = f_t * c_tm1 + i_t * T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c) o_t = T.nnet.sigmoid(Wox + T.dot(h_tm1, self.W_ho) + self.b_o) h_t = o_t * T.tanh(c_t) return h_t, c_t