我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.alloc()。
def connect(self, inputs, mask, is_train): """ is_train: A boolean tensor. """ max_length = inputs.shape[0] batch_size = inputs.shape[1] outputs_info = [tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim), tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim)] # Dropout mask sharing for variational dropout. self.is_train = is_train if self.recurrent_dropout_layer != None: self.recurrent_dropout_layer.generate_mask([batch_size, self.hidden_dim], is_train) inputs = tensor.dot(inputs, self.W) + self.b rval, _ = theano.scan(self._step, # Scan function sequences=[inputs, mask], # Input sequence outputs_info=outputs_info, name=_p(self.prefix, '_layers'), n_steps=max_length) # scan steps return rval[0]
def connect(self, inputs, mask, is_train): max_length = inputs.shape[0] batch_size = inputs.shape[1] outputs_info = [tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim), tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim)] # Dropout layers self.is_train = is_train if self.recurrent_dropout_layer != None: self.recurrent_dropout_layer.generate_mask([batch_size, self.hidden_dim], is_train) proj_inputs = tensor.dot(inputs, self.W) + self.b rval, _ = theano.scan(self._step, # Scan function sequences=[inputs, proj_inputs, mask], # Input sequence outputs_info=outputs_info, name=_p(self.prefix, '_layers'), n_steps=max_length) # scan steps return rval[0]
def gru_layer(tparams, emb, options): hiddenDimSize = options['hiddenDimSize'] timesteps = emb.shape[0] if emb.ndim == 3: n_samples = emb.shape[1] else: n_samples = 1 def stepFn(wx, h, U_gru): uh = T.dot(h, U_gru) r = T.nnet.sigmoid(_slice(wx, 0, hiddenDimSize) + _slice(uh, 0, hiddenDimSize)) z = T.nnet.sigmoid(_slice(wx, 1, hiddenDimSize) + _slice(uh, 1, hiddenDimSize)) h_tilde = T.tanh(_slice(wx, 2, hiddenDimSize) + r * _slice(uh, 2, hiddenDimSize)) h_new = z * h + ((1. - z) * h_tilde) return h_new Wx = T.dot(emb, tparams['W_gru']) + tparams['b_gru'] results, updates = theano.scan(fn=stepFn, sequences=[Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), non_sequences=[tparams['U_gru']], name='gru_layer', n_steps=timesteps) return results
def gru_layer(tparams, emb, layerIndex, hiddenDimSize, mask=None): timesteps = emb.shape[0] if emb.ndim == 3: n_samples = emb.shape[1] else: n_samples = 1 W_rx = T.dot(emb, tparams['W_r_'+layerIndex]) W_zx = T.dot(emb, tparams['W_z_'+layerIndex]) Wx = T.dot(emb, tparams['W_'+layerIndex]) def stepFn(stepMask, wrx, wzx, wx, h): r = T.nnet.sigmoid(wrx + T.dot(h, tparams['U_r_'+layerIndex]) + tparams['b_r_'+layerIndex]) z = T.nnet.sigmoid(wzx + T.dot(h, tparams['U_z_'+layerIndex]) + tparams['b_z_'+layerIndex]) h_tilde = T.tanh(wx + T.dot(r*h, tparams['U_'+layerIndex]) + tparams['b_'+layerIndex]) h_new = z * h + ((1. - z) * h_tilde) h_new = stepMask[:, None] * h_new + (1. - stepMask)[:, None] * h return h_new#, output, time results, updates = theano.scan(fn=stepFn, sequences=[mask,W_rx,W_zx,Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), name='gru_layer'+layerIndex, n_steps=timesteps) return results
def gru_layer(tparams, emb, layerIndex, hiddenDimSize, mask=None): timesteps = emb.shape[0] if emb.ndim == 3: n_samples = emb.shape[1] else: n_samples = 1 W_rx = T.dot(emb, tparams['W_r_'+layerIndex]) W_zx = T.dot(emb, tparams['W_z_'+layerIndex]) Wx = T.dot(emb, tparams['W_'+layerIndex]) def stepFn(stepMask, wrx, wzx, wx, h): r = T.nnet.sigmoid(wrx + T.dot(h, tparams['U_r_'+layerIndex]) + tparams['b_r_'+layerIndex]) z = T.nnet.sigmoid(wzx + T.dot(h, tparams['U_z_'+layerIndex]) + tparams['b_z_'+layerIndex]) h_tilde = T.tanh(wx + T.dot(r*h, tparams['U_'+layerIndex]) + tparams['b_'+layerIndex]) h_new = z * h + ((1. - z) * h_tilde) h_new = stepMask[:, None] * h_new + (1. - stepMask)[:, None] * h return h_new results, updates = theano.scan(fn=stepFn, sequences=[mask,W_rx,W_zx,Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), name='gru_layer'+layerIndex, n_steps=timesteps) return results
def set_output(self): output_shape = self._output_shape padding = self._padding unpool_size = self._unpool_size unpooled_output = tensor.alloc(0.0, # Value to fill the tensor output_shape[0], output_shape[1] + 2 * padding[0], output_shape[2], output_shape[3] + 2 * padding[1], output_shape[4] + 2 * padding[2]) unpooled_output = tensor.set_subtensor(unpooled_output[:, padding[0]:output_shape[ 1] + padding[0]:unpool_size[0], :, padding[1]:output_shape[3] + padding[1]:unpool_size[ 1], padding[2]:output_shape[4] + padding[2]:unpool_size[2]], self._prev_layer.output) self._output = unpooled_output
def set_output(self): padding = self._padding input_shape = self._input_shape if np.sum(self._padding) > 0: padded_input = tensor.alloc(0.0, # Value to fill the tensor input_shape[0], input_shape[1] + 2 * padding[1], input_shape[2], input_shape[3] + 2 * padding[3], input_shape[4] + 2 * padding[4]) padded_input = tensor.set_subtensor( padded_input[:, padding[1]:padding[1] + input_shape[1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]], self._prev_layer.output) else: padded_input = self._prev_layer.output self._output = conv3d2d.conv3d(padded_input, self.W.val) + \ self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')
def set_output(self): padding = self._padding input_shape = self._input_shape padded_input = tensor.alloc(0.0, # Value to fill the tensor input_shape[0], input_shape[1] + 2 * padding[1], input_shape[2], input_shape[3] + 2 * padding[3], input_shape[4] + 2 * padding[4]) padded_input = tensor.set_subtensor(padded_input[:, padding[1]:padding[1] + input_shape[ 1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]], self._prev_layer.output) fc_output = tensor.reshape( tensor.dot(self._fc_layer.output, self.Wx.val), self._output_shape) self._output = conv3d2d.conv3d(padded_input, self.Wh.val) + \ fc_output + self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')
def set_output(self): padding = self._padding input_shape = self._input_shape padded_input = tensor.alloc(0.0, # Value to fill the tensor input_shape[0], input_shape[1] + 2 * padding[1], input_shape[2], input_shape[3] + 2 * padding[3], input_shape[4] + 2 * padding[4]) padded_input = tensor.set_subtensor(padded_input[:, padding[1]:padding[1] + input_shape[ 1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]], self._prev_layer.output) self._output = conv3d2d.conv3d(padded_input, self.W.val) + \ self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')
def __call__(self, c01b): """ .. todo:: WRITEME """ half = self.n // 2 sq = T.sqr(c01b) ch, r, c, b = c01b.shape extra_channels = T.alloc(0., ch + 2*half, r, c, b) sq = T.set_subtensor(extra_channels[half:half+ch,:,:,:], sq) scale = self.k for i in xrange(self.n): scale += self.alpha * sq[i:i+ch,:,:,:] scale = scale ** self.beta return c01b / scale
def get_layer(self, x_in): assert x_in.ndim == 2 n_steps = x_in.shape[0] def __slice(x_, n, dim): return x_[n * dim: (n + 1) * dim] def __step(x_, h_, c_): preact = T.dot(h_, self._params['U']) + x_ + self._params['b'] i = T.nnet.sigmoid(__slice(preact, 0, self._ydim)) f = T.nnet.sigmoid(__slice(preact, 1, self._ydim)) o = T.nnet.sigmoid(__slice(preact, 2, self._ydim)) c = T.tanh(__slice(preact, 3, self._ydim)) c = f * c_ + i * c h = o * T.tanh(c) return h, c x_in = T.dot(x_in, self._params['W']) + self._params['b'] rval, updates = theano.scan(__step, sequences=x_in, go_backwards=self.go_backwards, outputs_info=[T.alloc(np_floatX(0.), self._ydim), T.alloc(np_floatX(0.), self._ydim)], name='lstm_layers', n_steps=n_steps) return reverse(rval[0]) if self.go_backwards else rval[0]
def rnn_ff(inps, dim, hidden, batSize, prefix, params, names): Wx = theano.shared(randomMatrix(dim, hidden)) Wh = theano.shared(randomMatrix(hidden, hidden)) bh = theano.shared(numpy.zeros(hidden, dtype=theano.config.floatX)) #model.container['bi_h0'] = theano.shared(numpy.zeros(model.container['nh'], dtype=theano.config.floatX)) # bundle params += [ Wx, Wh, bh ] #, model.container['bi_h0'] names += [ prefix + '_Wx', prefix + '_Wh', prefix + '_bh' ] #, 'bi_h0' def recurrence(x_t, h_tm1): h_t = T.nnet.sigmoid(T.dot(x_t, Wx) + T.dot(h_tm1, Wh) + bh) return h_t h, _ = theano.scan(fn=recurrence, \ sequences=inps, outputs_info=[T.alloc(0., batSize, hidden)], n_steps=inps.shape[0]) return h
def compute_emb(x, W): def _step(xi, emb, W): if prm.att_doc: new_shape = (xi.shape[0], xi.shape[1], xi.shape[2], prm.dim_emb) else: new_shape = (xi.shape[0], xi.shape[1], prm.dim_emb) out = W[xi.flatten()].reshape(new_shape).sum(-2) return out / tensor.maximum(1., tensor.neq(xi,-1).astype('float32').sum(-1, keepdims=True)) if prm.att_doc: emb_init = tensor.alloc(0., x.shape[1], x.shape[2], prm.dim_emb) else: emb_init = tensor.alloc(0., x.shape[1], prm.dim_emb) (embs), scan_updates = theano.scan(_step, sequences=[x], outputs_info=[emb_init], non_sequences=[W], name='emb_scan', n_steps=x.shape[0]) return embs
def call(self, x, mask=None): X = x half_n = self.n // 2 input_sqr = K.square(X) if K._BACKEND == 'theano': b, ch, r, c = X.shape extra_channels = T.alloc(0., b, ch + 2*half_n, r, c) input_sqr = T.set_subtensor( extra_channels[:, half_n:half_n+ch, :, :], input_sqr) elif K._BACKEND == 'tensorflow': b, ch, r, c = K.int_shape(X) up_dims = tf.pack([tf.shape(X)[0], half_n, r, c]) up = tf.fill(up_dims, 0.0) middle = input_sqr down_dims = tf.pack([tf.shape(X)[0], half_n, r, c]) down = tf.fill(down_dims, 0.0) input_sqr = K.concatenate([up, middle, down], axis=1) scale = self.k norm_alpha = self.alpha / self.n for i in range(self.n): scale += norm_alpha * input_sqr[:, i:i+ch, :, :] scale = scale ** self.beta result = X / scale return result
def gru_layer(tparams, emb, name, hiddenDimSize): timesteps = emb.shape[0] if emb.ndim == 3: n_samples = emb.shape[1] else: n_samples = 1 def stepFn(wx, h, U_gru): uh = T.dot(h, U_gru) r = T.nnet.sigmoid(_slice(wx, 0, hiddenDimSize) + _slice(uh, 0, hiddenDimSize)) z = T.nnet.sigmoid(_slice(wx, 1, hiddenDimSize) + _slice(uh, 1, hiddenDimSize)) h_tilde = T.tanh(_slice(wx, 2, hiddenDimSize) + r * _slice(uh, 2, hiddenDimSize)) h_new = z * h + ((1. - z) * h_tilde) return h_new Wx = T.dot(emb, tparams['W_gru_'+name]) + tparams['b_gru_'+name] results, updates = theano.scan(fn=stepFn, sequences=[Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), non_sequences=[tparams['U_gru_'+name]], name='gru_layer', n_steps=timesteps) return results
def test_gpujoin_gpualloc(): a = T.fmatrix('a') a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32') b = T.fmatrix('b') b_val = numpy.asarray(numpy.random.rand(3, 5), dtype='float32') f = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_without_gpu) f_gpu = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)), mode=mode_with_gpu) f_gpu2 = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_with_gpu) assert sum([node.op == T.alloc for node in f.maker.fgraph.toposort()]) == 2 assert sum([node.op == T.join for node in f.maker.fgraph.toposort()]) == 1 assert sum([isinstance(node.op, GpuAlloc) for node in f_gpu.maker.fgraph.toposort()]) == 2 assert sum([node.op == gpu_join for node in f_gpu.maker.fgraph.toposort()]) == 1 assert sum([isinstance(node.op, GpuAlloc) for node in f_gpu2.maker.fgraph.toposort()]) == 2 assert sum([node.op == gpu_join for node in f_gpu2.maker.fgraph.toposort()]) == 1 assert numpy.allclose(f(a_val, b_val), f_gpu2(a_val, b_val))
def local_gpua_alloc2(node): """ Join(axis, {Alloc or HostFromGPU}, ...) -> Join(axis, GpuAlloc, Alloc, ...) Moves an alloc that is an input to join to the gpu. """ try: get_context(None) except ContextNotDefined: # If there is no default context then we do not perform the move here. return if (isinstance(node.op, tensor.Alloc) and all(c != 'output' and c.op == tensor.join and all(i.owner and i.owner.op in [host_from_gpu, tensor.alloc] for i in c.inputs[1:]) for c, idx in node.outputs[0].clients)): return [host_from_gpu(gpu_alloc(None)(*node.inputs))]
def test0(self): x = shared(self.rng.randn(3, 7)) a = tensor.alloc(x, 6, 7) # It is a bad idea to have tensor.alloc return x directly, # because the shape mismatch cannot be caught. assert a.owner and isinstance(a.owner.op, tensor.Alloc) f = function([], a, mode=mode_opt) # The optimization should then be applied, and remove Alloc assert ([node.op for node in f.maker.fgraph.toposort()] == [deep_copy_op]) # In DebugMode, the shape mismatch should be detected if isinstance(mode_opt, compile.DebugMode): self.assertRaises(ValueError, f) # No need to check_stack_trace as the optimization # local_canonicalize_alloc only removes nodes.
def test2(self): # Test that alloc never gets instantiated during optimization mode = mode_opt.excluding('local_canonicalize_alloc') x = tensor.matrix('x') y = tensor.tile(x, (1,)*2) f = function([x], [y], mode=mode) op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()] print(op_classes) # We are supposed to test if tensr.Alloc is not in op_classes, # but since the proper proper optimization is not currently # implemented it will fail. Once the correct optimization is in place, # we have to change the following we should not see tensor.Alloc # in op_classes and we have to change the assert. assert tensor.Alloc in op_classes # The correct opt removes nodes, no need for check_stack_trace
def test_local_reshape_dimshuffle(): reshape_dimshuffle = out2in(local_dimshuffle_alloc) x = tensor.vector('x') out = tensor.alloc(x, 3, 2).dimshuffle('x', 'x', 0, 1) g = FunctionGraph([x], [out]) reshape_dimshuffle(g) l=theano.gof.PerformLinker() l.accept(g) f=l.make_function() assert f([3, 4]).ndim == 4 topo = g.toposort() assert any([not isinstance(x, DimShuffle) for x in topo])
def test_gpualloc(): ''' This tests tries to catch the scenario when, due to infer_shape, the input of the alloc changes from tensor scalar to a constant 1. In this case the original constracted broadcastable pattern will have a False for that dimension, but the new broadcastable pattern that will be inserted by gpualloc will have a True since it knows the dimension is 1 and therefore broadcastable. ''' x = theano.shared(numpy.ones(3, dtype='float32'), 'x') m = (x).dimshuffle(['x', 0]) v = tensor.alloc(1., *m.shape) f = theano.function([], v + x, mode=mode_with_gpu.excluding( "local_elemwise_alloc")) l = f.maker.fgraph.toposort() assert numpy.any([isinstance(y.op, cuda.GpuAlloc) for y in l])
def apply(self, sentences, init_hid=None): """ Parameters ---------- sentences: (length, batch, featuresdim) Returns ------- hs: (n_blocks, batch, hid_size) """ if sentences.ndim == 3: batch_size = sentences.shape[1] n_steps = sentences.shape[0] else: raise NotImplementedError if init_hid is None: init_hid = T.unbroadcast(T.alloc(numpy.float32(0.), batch_size, self.hid_size)) rval, updates = theano.scan(self._step_forward, sequences=[sentences], outputs_info=[init_hid], n_steps=n_steps ) self.hs = rval return self.hs
def __Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None): if not isinstance(inputs, list): inputs = [inputs] if not isinstance(hidden_dims, list): hidden_dims = [hidden_dims] if h0s is None: h0s = [None]*len(hidden_dims) for i in xrange(len(hidden_dims)): if h0s[i] is None: h0_unbatched = lib.param( name + '.h0_' + str(i), numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX) ) num_batches = inputs[0].shape[1] h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i]) h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim) outputs, _ = theano.scan( step_fn, sequences=inputs, outputs_info=h0s, non_sequences=non_sequences ) return outputs
def extend_middle_dim(_2D, num): """ Gets a 2D tensor (A, B), outputs a 3D tensor (A, num, B) :usage: >>> TODO """ rval = _2D.dimshuffle((0, 'x', 1)) rval = T.alloc(rval, rval.shape[0], num, rval.shape[2]) return rval
def alloc_zeros_matrix(*dims): return T.alloc(np.cast[theano.config.floatX](0.), *dims)
def alloc_ones_matrix(*dims): return T.alloc(np.cast[theano.config.floatX](1.), *dims)
def localResponseNormalizationCrossChannel(incoming, alpha=1e-4, k=2, beta=0.75, n=5): """ Implement the local response normalization cross the channels described in <ImageNet Classification with Deep Convolutional Neural Networks>, A.Krizhevsky et al. sec.3.3. Reference of the code: https://github.com/Lasagne/Lasagne/blob/master/lasagne/layers/ normalization.py https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/expr/normalize.py Parameters: incomping: The feature maps. (output of the convolution layer). alpha: float scalar k: float scalr beta: float scalar n: integer: number of adjacent channels. Must be odd. """ if n % 2 == 0: raise NotImplementedError("Works only with odd n") input_shape = incoming.shape half_n = n // 2 input_sqr = T.sqr(incoming) b, ch, r, c = input_shape extra_channels = T.alloc(0., b, ch + 2*half_n, r, c) input_sqr = T.set_subtensor(extra_channels[:, half_n:half_n+ch, :, :], input_sqr) scale = k for i in range(n): scale += alpha * input_sqr[:, i:i+ch, :, :] scale = scale ** beta return incoming / scale
def get_output_for(self, inputs, **kwargs): vals, ref = inputs N, _, H, W = ref.shape yx = tt.stack(tt.mgrid[0:H, 0:W])[np.newaxis, :, :, :] grid = tt.alloc(tt.cast(yx, "float32"), N, 2, H, W) stacked = tt.concatenate([grid, ref], axis=1) return super(BilateralFilterLayer, self).get_output_for( [vals, stacked], **kwargs)
def get_output_for(self, inputs, **kwargs): unary, ref = inputs N, _, H, W = ref.shape yx = tt.cast(tt.stack(tt.mgrid[0:H, 0:W]), "float32") grid = tt.alloc(yx[np.newaxis, :, :, :], N, 2, H, W) stacked = tt.concatenate([grid, ref], axis=1) def _bilateral(V, R): o = tt.ones((1, V.shape[1], V.shape[2]), "float32") norm = tt.sqrt(gaussian_filter(R, o, self.kstd_bf, self.ref_dim)) + 1e-8 return gaussian_filter(R, V/norm, self.kstd_bf, self.ref_dim, self.val_dim) / norm def _step(prev_q, U, ref, normalize=True): qbf = _bilateral(prev_q, ref,) qsf = tt.nnet.conv2d(prev_q[np.newaxis, :, :, :], self.W_spatial, border_mode="half")[0] q_hat = -self.compat_bf * qbf + -self.compat_spatial * qsf q_hat = U - q_hat return softmax(q_hat, axis=0) if normalize else q_hat def _inference(unary_i, ref_i): U = tt.log(tt.clip(unary_i, 1e-5, 1)) prev_q = softmax(U, axis=0) # This is faster than using scan. for i in range(self.num_iter): normalize = self.normalize_final_iter or i < self.num_iter-1 prev_q = _step(prev_q, U, ref_i, normalize) return prev_q return theano.scan(fn=_inference, sequences=[unary, stacked], outputs_info=None)[0]
def sample(self, x0=None, h0=None, c0=None, n_samples=10, n_steps=10, condition_on=None, debug=False): if x0 is None: x0, _ = self.output_net.sample( p=T.constant(0.5).astype(floatX), size=(n_samples, self.output_net.dim_out)).astype(floatX) if h0 is None: h0 = T.alloc(0., x0.shape[0], self.dim_h).astype(floatX) if c0 is None: c0 = T.alloc(0., x0.shape[0], self.dim_h).astype(floatX) z0 = self.output_net.preact(h0) seqs = [] outputs_info = [h0, c0, x0, None] non_seqs = [] step = self.step_sample p0 = self.output_net.distribution(z0) non_seqs += self.get_sample_params() if debug: return self.step_sample(h0, x0, *self.get_sample_params()) outs = scan(step, seqs, outputs_info, non_seqs, n_steps, name=self.name+'_sampling', strict=False) (h, c, x, p), updates = outs x = concatenate([x0[None, :, :], x]) h = concatenate([h0[None, :, :], h]) p = concatenate([p0[None, :, :], p]) return OrderedDict(x=x, p=p, h=h, x0=x0, p0=p0, h0=h0), updates
def __call__(self, x, h0=None, c0=None, condition_on=None): if h0 is None: h0 = T.alloc(0., x.shape[1], self.dim_h).astype(floatX) if c0 is None: c0 = T.alloc(0., x.shape[1], self.dim_h).astype(floatX) params = self.get_sample_params() return self.step_call(x, h0, c0, condition_on, *params)
def test_recurrent(dim_in=13, dim_h=17, n_samples=107, window=7): rnn = test_build(dim_in, dim_h) data_iter = Euclidean(n_samples=n_samples, dims=dim_in, batch_size=window) x = data_iter.next()[data_iter.name] test_dict = OrderedDict() X = T.matrix('x', dtype=floatX) Y = rnn.call_seqs(X, None, 0, *rnn.get_sample_params())[0] y = np.dot(x, rnn.input_net.params['W0']) + rnn.input_net.params['b0'] test_dict['RNN preact from data'] = (X, Y, x, y, theano.OrderedUpdates()) H0 = T.alloc(0., X.shape[0], rnn.dim_hs[0]).astype(floatX) H = rnn._step(1, Y, H0, rnn.Ur0) h0 = np.zeros((x.shape[0], rnn.dim_hs[0])).astype(floatX) h = np.tanh(np.dot(h0, rnn.params['Ur0']) + y) test_dict['step reccurent'] = (X, H, x, h, theano.OrderedUpdates()) P = rnn.output_net.feed(H) p = sigmoid(np.dot(h, rnn.output_net.params['W0']) + rnn.output_net.params['b0']) test_dict['output'] = (X, P, x, p, theano.OrderedUpdates()) for k, v in test_dict.iteritems(): print 'Testing %s' % k inp, out, inp_np, out_np, updates = v f = theano.function([inp], out, updates=updates) out_actual = f(inp_np) if not np.allclose(out_np, out_actual): print 'np', out_np print 'theano', out_actual assert False
def init_h(h_init, X, batch_size, models, **h_args): '''Initializes the RNN hidden state. Args: h_init (str): type of initialization. X (T.tensor): input tensor for initialization through MLP. batch_size (int) models (list): list of Layer, pulls 'h_net' for initialization (TODO change this). **h_args: kwargs for different initializations. Returns: T.tensor: Full 3D tensor returned to train `h_net`. ''' if h_init is None: h0 = None elif h_init == 'last': print 'Initializing h0 from chain' h0 = theano.shared(np.zeros((batch_size, rnn.dim_h)).astype(floatX)) h0s = h0[None, :, :] elif h_init == 'noise': noise_amount = h_args['noise_amount'] print 'Initializing h0 from noise' h0 = trng.normal(avg=0, std=0.1, size=(batch_size, rnn.dim_h)).astype(floatX) h0s = h0[None, :, :] elif h_init == 'average': print 'Initializing h0 from running average' averager = models['averager'] h0 = (T.alloc(0., batch_size, rnn.dim_h) + averager.m[None, :]).astype(floatX) h0s = h0[None, :, :] elif h_init == 'mlp': print 'Initializing h0 from MLP' mlp = models['h_net'] h0s = mlp(X) return h0s
def __call__(self, x, m=None, h0s=None, condition_on=None): '''Call function. For learning RNNs. Args: x (T.tensor): input sequence. window x batch x dim m (T.tensor): mask. window x batch. For masking in recurrent steps. h0s (Optional[list]): initial h0s. condition_on (Optional[T.tensor]): conditional for recurrent step. Returns: OrderedDict: dictionary of results: hiddens, probabilities, and preacts. theano.OrderedUpdates. ''' if h0s is None: h0s = [T.alloc(0., x.shape[1], dim_h).astype(floatX) for dim_h in self.dim_hs] if m is None: m = T.ones((x.shape[0], x.shape[1])).astype(floatX) params = self.get_sample_params() return self.step_call(x, m, h0s, *params)
def ctc_interleave_blanks(Y): Y_ = T.alloc(-1, Y.shape[0] * 2 + 1) Y_ = T.set_subtensor(Y_[T.arange(Y.shape[0]) * 2 + 1], Y) return Y_
def fprop(self, all_states): if self.ntimes: stateshape0 = all_states.shape[0] shape0 = TT.switch(TT.gt(self.n, 0), self.n, all_states.shape[0]) single_frame = TT.shape_padleft(all_states[stateshape0-1]) mask = TT.alloc(numpy.float32(1), shape0, *[1 for k in xrange(all_states.ndim-1)]) rval = single_frame * mask self.out = rval return rval single_frame = all_states[all_states.shape[0]-1] self.out = single_frame return single_frame
def fprop(self, all_states): shape0 = all_states.shape[0] single_frame = all_states.min(0) if self.ntimes: single_frame = TT.shape_padleft(all_states.max(0)) mask = TT.alloc(numpy.float32(1), shape0, *[1 for k in xrange(all_states.ndim-1)]) rval = single_frame * mask self.out = rval return rval self.out = single_frame return single_frame
def fprop(self, all_states): shape0 = all_states.shape[0] single_frame = all_states.max(0) if self.ntimes: single_frame = TT.shape_padleft(all_states.max(0)) mask = TT.alloc(numpy.float32(1), shape0, *[1 for k in xrange(all_states.ndim-1)]) rval = single_frame * mask self.out = rval return rval self.out = single_frame return single_frame
def fprop(self, x): # This is black magic based on broadcasting, # that's why variable names don't make any sense. a = TT.shape_padleft(x) padding = [1] * x.ndim b = TT.alloc(numpy.float32(1), self.n_times, *padding) self.out = a * b return self.out
def forward(self, inputtensor): x = inputtensor[0] input_shape = x.shape ch = input_shape[1] bs = input_shape[0] half_n = self.localSize // 2 input_sqr = T.sqr(x) extra_channels = T.alloc(0., bs, ch + 2*half_n, *input_shape[2:]) input_sqr = T.set_subtensor(extra_channels[:, half_n:half_n+ch, :, :] , input_sqr) scale = 1 for i in range(self.localSize): scale += self.alpha/self.localSize * input_sqr[:, i:i+ch, ...] scale = scale ** self.beta return (x/scale, )
def set_output(self): if sum(self._padding) > 0: padded_input = tensor.alloc(0.0, # Value to fill the tensor self._input_shape[0], self._input_shape[1], self._input_shape[2] + 2 * self._padding[2], self._input_shape[3] + 2 * self._padding[3]) padded_input = tensor.set_subtensor( padded_input[:, :, self._padding[2]:self._padding[2] + self._input_shape[2], self._padding[3]:self._padding[3] + self._input_shape[3]], self._prev_layer.output) padded_input_shape = [self._input_shape[0], self._input_shape[1], self._input_shape[2] + 2 * self._padding[2], self._input_shape[3] + 2 * self._padding[3]] else: padded_input = self._prev_layer.output padded_input_shape = self._input_shape conv_out = conv.conv2d( input=padded_input, filters=self.W.val, filter_shape=self._filter_shape, image_shape=np.asarray( padded_input_shape, dtype=np.int16), border_mode='valid') # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self._output = conv_out + self.b.val.dimshuffle('x', 0, 'x', 'x')
def alloc(self, value, shape, dtype): return T.alloc(value, *tuple(shape), dtype=dtype)