我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.patternbroadcast()。
def set_to_zero(list_of_tensors_and_shapes, on_gpu=True): """ :param: list_of_tensors_and_shapes of the form [(tensor1, shape1), ...] """ if on_gpu: updates = [] for tensor, shape in list_of_tensors_and_shapes: if np.sum(shape) == 1: updates.append((tensor, 0)) else: updates.append((tensor, T.patternbroadcast(T.zeros(shape), [False] * tensor.ndim))) return updates else: updates = [] for tensor, shape in list_of_tensors_and_shapes: updates.append((tensor, np.zeros(shape, dtype=config_.floatX))) return updates
def dropout(self, x, drop_rate, noise_shape=None): '''Sets entries in `x` to zero at random, while scaling the entire tensor. @param x: tensor @param drop_rate: fraction of the entries in the tensor that will be set to 0. @param noise_shape: shape for randomly generated keep/drop flags, must be broadcastable to the shape of `x` ''' assert drop_rate > 0. or drop_rate < 1, 'Dropout drop_rate must be in interval [0, 1].' retain_prob = 1. - drop_rate if noise_shape is None: random_tensor = self.random_binomial(self.shape(x), p=retain_prob) else: random_tensor = self.random_binomial(noise_shape, p=retain_prob) random_tensor = T.patternbroadcast(random_tensor, [dim == 1 for dim in noise_shape]) train_x = x*random_tensor train_x /= retain_prob return self.in_train_phase(train_x, x)
def local_abstractconv_gradweights_gemm(node): if not isinstance(node.op, AbstractConv2d_gradWeights): return None img, topgrad, shape = node.inputs if not isinstance(img.type, GpuArrayType) or \ not isinstance(topgrad.type, GpuArrayType): return None ctx = infer_context_name(img, topgrad) rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode, subsample=node.op.subsample, filter_dilation=node.op.filter_dilation)( gpu_contiguous(img), gpu_contiguous(topgrad), shape) if node.op.filter_flip: rval = rval[:, :, ::-1, ::-1] rval = tensor.patternbroadcast(rval, node.outputs[0].broadcastable) rval = as_gpuarray_variable(rval, context_name=ctx) return [rval]
def local_abstractconv3d_gradweights_gemm(node): if not isinstance(node.op, AbstractConv3d_gradWeights): return None img, topgrad, shape = node.inputs if not isinstance(img.type, GpuArrayType) or \ not isinstance(topgrad.type, GpuArrayType): return None ctx = infer_context_name(img, topgrad) rval = GpuCorr3dMM_gradWeights(border_mode=node.op.border_mode, subsample=node.op.subsample, filter_dilation=node.op.filter_dilation)( gpu_contiguous(img), gpu_contiguous(topgrad), shape) if node.op.filter_flip: rval = rval[:, :, ::-1, ::-1, ::-1] rval = tensor.patternbroadcast(rval, node.outputs[0].broadcastable) rval = as_gpuarray_variable(rval, context_name=ctx) return [rval]
def local_conv_dnn(node): if not dnn_available(): return if isinstance(node.op, GpuConv): if node.op.border_mode not in ['full', 'valid']: return img, kern = node.inputs border_mode = node.op.border_mode subsample = node.op.subsample direction_hint = node.op.direction_hint rval = dnn_conv(img, kern, border_mode=border_mode, subsample=subsample, direction_hint=direction_hint) if node.outputs[0].broadcastable != rval.broadcastable: rval = tensor.patternbroadcast( rval, node.outputs[0].type.broadcastable) return [rval] # This optimizer is registered in opt.py as part of the meta-optimizer. # It tries exactly the opposite code path of what local_conv_dnn() uses, # because for some input/kernel shape configurations, this is faster.
def local_abstractconv_gradweight_gemm(node): if not isinstance(node.op, AbstractConv2d_gradWeights): return None img, topgrad, shape = node.inputs if not isinstance(img.type, CudaNdarrayType) or \ not isinstance(topgrad.type, CudaNdarrayType): return None rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode, subsample=node.op.subsample, filter_dilation=node.op.filter_dilation)( gpu_contiguous(img), gpu_contiguous(topgrad), shape) if node.op.filter_flip: rval = rval[:, :, ::-1, ::-1] rval = tensor.patternbroadcast(rval, node.outputs[0].broadcastable) rval = as_cuda_ndarray_variable(rval) return [rval]
def local_abstractconv3d_gradweight_gemm(node): if not isinstance(node.op, AbstractConv3d_gradWeights): return None img, topgrad, shape = node.inputs if not isinstance(img.type, CudaNdarrayType) or \ not isinstance(topgrad.type, CudaNdarrayType): return None rval = GpuCorr3dMM_gradWeights(border_mode=node.op.border_mode, subsample=node.op.subsample, filter_dilation=node.op.filter_dilation)( gpu_contiguous(img), gpu_contiguous(topgrad), shape) if node.op.filter_flip: rval = rval[:, :, ::-1, ::-1, ::-1] rval = tensor.patternbroadcast(rval, node.outputs[0].broadcastable) rval = as_cuda_ndarray_variable(rval) return [rval]
def __Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None): if not isinstance(inputs, list): inputs = [inputs] if not isinstance(hidden_dims, list): hidden_dims = [hidden_dims] if h0s is None: h0s = [None]*len(hidden_dims) for i in xrange(len(hidden_dims)): if h0s[i] is None: h0_unbatched = lib.param( name + '.h0_' + str(i), numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX) ) num_batches = inputs[0].shape[1] h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i]) h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim) outputs, _ = theano.scan( step_fn, sequences=inputs, outputs_info=h0s, non_sequences=non_sequences ) return outputs
def cosine_sim(k, M): k_unit = k / (T.sqrt(T.sum(k**2)) + 1e-5) # T.patternbroadcast(k_unit.reshape((1,k_unit.shape[0])),(True,False)) k_unit = k_unit.dimshuffle(('x', 0)) k_unit.name = "k_unit" M_lengths = T.sqrt(T.sum(M**2, axis=1)).dimshuffle((0, 'x')) M_unit = M / (M_lengths + 1e-5) M_unit.name = "M_unit" return T.sum(k_unit * M_unit, axis=1)
def dropout(x, level, noise_shape=None, seed=None): '''Sets entries in `x` to zero at random, while scaling the entire tensor. # Arguments x: tensor level: fraction of the entries in the tensor that will be set to 0. noise_shape: shape for randomly generated keep/drop flags, must be broadcastable to the shape of `x` seed: random seed to ensure determinism. ''' if level < 0. or level >= 1: raise Exception('Dropout level must be in interval [0, 1[.') if seed is None: seed = np.random.randint(1, 10e6) rng = RandomStreams(seed=seed) retain_prob = 1. - level if noise_shape is None: random_tensor = rng.binomial(x.shape, p=retain_prob, dtype=x.dtype) else: random_tensor = rng.binomial(noise_shape, p=retain_prob, dtype=x.dtype) random_tensor = T.patternbroadcast(random_tensor, [dim == 1 for dim in noise_shape]) x *= random_tensor x /= retain_prob return x
def unflatten_tensor_variables(flatarr, shapes, symb_arrs): import theano.tensor as TT import numpy as np arrs = [] n = 0 for (shape, symb_arr) in zip(shapes, symb_arrs): size = np.prod(list(shape)) arr = flatarr[n:n + size].reshape(shape) if arr.type.broadcastable != symb_arr.type.broadcastable: arr = TT.patternbroadcast(arr, symb_arr.type.broadcastable) arrs.append(arr) n += size return arrs
def forward(self, inputtensor): if self.deterministic or self.p == 0: return inputtensor else: x = inputtensor[0] # Using theano constant to prevent upcasting one = T.constant(1) retain_prob = one - self.p if self.rescale: x /= retain_prob mask_shape = x.shape # apply dropout, respecting shared axes if self.shared_axes: shared_axes = tuple(a if a >= 0 else a + x.ndim for a in self.shared_axes) mask_shape = tuple(1 if a in shared_axes else s for a, s in enumerate(mask_shape)) mask = self._srng.binomial(mask_shape, p=retain_prob, dtype=x.dtype) if self.shared_axes: bcast = tuple(bool(s == 1) for s in mask_shape) mask = T.patternbroadcast(mask, bcast) x = x * mask return (x, )
def randdrop(x, level, noise_shape=None, seed=None): '''Sets entries in `x` to zero at random, while scaling the entire tensor. # Arguments x: tensor level: fraction of the entries in the tensor that will be set to 0. noise_shape: shape for randomly generated keep/drop flags, must be broadcastable to the shape of `x` seed: random seed to ensure determinism. ''' # if level < 0. or level >= 1: # raise Exception('Dropout level must be in interval [0, 1[.') if seed is None: seed = np.random.randint(1337) rng = RandomStreams(seed=seed) retain_prob = 1 - level if noise_shape is None: random_tensor = rng.binomial(x.shape, p=retain_prob, dtype=x.dtype) else: random_tensor = rng.binomial(noise_shape, p=retain_prob, dtype=x.dtype) random_tensor = T.patternbroadcast(random_tensor, [dim == 1 for dim in noise_shape]) x *= random_tensor x /= retain_prob return x
def patternbroadcast(self, x, pattern): return T.patternbroadcast(x, pattern) # TENSOR OPERATION
def fix_update_bcasts(updates): import theano.tensor as T for param, update in updates.items(): if param.broadcastable != update.broadcastable: updates[param] = T.patternbroadcast(update, param.broadcastable) return updates
def get_output_for(self, input, deterministic=False, **kwargs): """Apply alpha dropout.""" if deterministic or self.p == 0: return input else: # Using theano constant to prevent upcasting one = T.constant(1) retain_prob = one - self.p if self.rescale: input /= retain_prob # use nonsymbolic shape for dropout mask if possible mask_shape = self.input_shape if any(s is None for s in mask_shape): mask_shape = input.shape # apply dropout, respecting shared axes if self.shared_axes: shared_axes = tuple(a if a >= 0 else a + input.ndim for a in self.shared_axes) mask_shape = tuple(1 if a in shared_axes else s for a, s in enumerate(mask_shape)) mask = self._srng.uniform(mask_shape, dtype=input.dtype) < retain_prob if self.shared_axes: bcast = tuple(bool(s == 1) for s in mask_shape) mask = T.patternbroadcast(mask, bcast) a = T.pow(retain_prob + self.alpha ** 2 * retain_prob * (1 - retain_prob), -0.5) b = -a * (1 - retain_prob) * self.alpha return a * (input * mask + self.alpha * (1 - mask)) + b
def pattern_broadcast(x, broatcastable): return T.patternbroadcast(x, broatcastable) # VALUE MANIPULATION
def dropout(x, level, noise_shape=None, seed=None): """Sets entries in `x` to zero at random, while scaling the entire tensor. # Arguments x: tensor level: fraction of the entries in the tensor that will be set to 0. noise_shape: shape for randomly generated keep/drop flags, must be broadcastable to the shape of `x` seed: random seed to ensure determinism. """ if level < 0. or level >= 1: raise ValueError('Dropout level must be in interval [0, 1[.') if seed is None: seed = np.random.randint(1, 10e6) if isinstance(noise_shape, list): noise_shape = tuple(noise_shape) rng = RandomStreams(seed=seed) retain_prob = 1. - level if noise_shape is None: random_tensor = rng.binomial(x.shape, p=retain_prob, dtype=x.dtype) else: random_tensor = rng.binomial(noise_shape, p=retain_prob, dtype=x.dtype) random_tensor = T.patternbroadcast(random_tensor, [dim == 1 for dim in noise_shape]) x *= random_tensor x /= retain_prob return x
def dropout(x, level, noise_shape=None, seed=None): '''Sets entries in `x` to zero at random, while scaling the entire tensor. # Arguments x: tensor level: fraction of the entries in the tensor that will be set to 0. noise_shape: shape for randomly generated keep/drop flags, must be broadcastable to the shape of `x` seed: random seed to ensure determinism. ''' if level < 0. or level >= 1: raise ValueError('Dropout level must be in interval [0, 1[.') if seed is None: seed = np.random.randint(1, 10e6) rng = RandomStreams(seed=seed) retain_prob = 1. - level if noise_shape is None: random_tensor = rng.binomial(x.shape, p=retain_prob, dtype=x.dtype) else: random_tensor = rng.binomial(noise_shape, p=retain_prob, dtype=x.dtype) random_tensor = T.patternbroadcast(random_tensor, [dim == 1 for dim in noise_shape]) x *= random_tensor x /= retain_prob return x
def Recurrence(processed_frames, h0, reset): """ processed_frames.shape: (batch size, n frames, DIM) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames, DIM) """ # print "warning no recurrence" # return T.zeros_like(processed_frames), h0 learned_h0 = lib.param( 'Recurrence.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) gru0 = lib.ops.LowMemGRU('Recurrence.GRU0', DIM, DIM, processed_frames, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_GRUS): gru = lib.ops.LowMemGRU('Recurrence.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i]) grus.append(gru) last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1) return (grus[-1], last_hidden)
def Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None): if not isinstance(inputs, list): inputs = [inputs] if not isinstance(hidden_dims, list): hidden_dims = [hidden_dims] if h0s is None: h0s = [None]*len(hidden_dims) for i in xrange(len(hidden_dims)): if h0s[i] is None: h0_unbatched = lib.param( name + '.h0_' + str(i), numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX) ) num_batches = inputs[0].shape[1] h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i]) h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim) outputs, _ = theano.scan( step_fn, sequences=inputs, outputs_info=h0s, non_sequences=non_sequences ) return outputs
def perform(self, node, inputs, outputs): image, filt = inputs out = self.numba_fct(image, filt, self.mode) # out = T.patternbroadcast(out, (False, True, False, False)) outputs[0][0] = out
def grad(self, inputs, gout): (x,) = inputs (gz,) = gout gx = dense_from_sparse(gz) gx = tensor.patternbroadcast(gx, x.broadcastable) return gx,
def grad(self, inp, grads): bottom, weights = inp top, = grads d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip, self.filter_dilation)( weights, top, bottom.shape[-2:]) d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip, self.filter_dilation)( bottom, top, weights.shape[-2:]) # Make sure that the broadcastable pattern of the inputs is used # for the gradients, even if the grad opts are not able to infer # that the dimensions are broadcastable. # Also make sure that the gradient lives on the same device than # the corresponding input. d_bottom = patternbroadcast(d_bottom, bottom.broadcastable) d_bottom = bottom.type.filter_variable(d_bottom) d_weights = patternbroadcast(d_weights, weights.broadcastable) d_weights = weights.type.filter_variable(d_weights) return d_bottom, d_weights
def grad(self, inp, grads): bottom, top = inp[:2] weights, = grads d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip, self.filter_dilation)(weights, top, bottom.shape[-2:]) d_top = AbstractConv2d(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip, self.filter_dilation)(bottom, weights) # Make sure that the broadcastable pattern of the inputs is used # for the gradients, even if the grad opts are not able to infer # that the dimensions are broadcastable. # Also make sure that the gradient lives on the same device than # the corresponding input. d_bottom = patternbroadcast(d_bottom, bottom.broadcastable) d_bottom = bottom.type.filter_variable(d_bottom) d_top = patternbroadcast(d_top, top.broadcastable) d_top = top.type.filter_variable(d_top) d_height_width = (theano.gradient.DisconnectedType()(),) return (d_bottom, d_top) + d_height_width
def grad(self, inp, grads): bottom, top = inp[:2] weights, = grads d_bottom = AbstractConv3d_gradInputs(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip, self.filter_dilation)(weights, top, bottom.shape[-3:]) d_top = AbstractConv3d(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip, self.filter_dilation)(bottom, weights) # Make sure that the broadcastable pattern of the inputs is used # for the gradients, even if the grad opts are not able to infer # that the dimensions are broadcastable. # Also make sure that the gradient lives on the same device than # the corresponding input. d_bottom = patternbroadcast(d_bottom, bottom.broadcastable) d_bottom = bottom.type.filter_variable(d_bottom) d_top = patternbroadcast(d_top, top.broadcastable) d_top = top.type.filter_variable(d_top) d_depth_height_width = (theano.gradient.DisconnectedType()(),) return (d_bottom, d_top) + d_depth_height_width
def grad(self, inp, grads): weights, top = inp[:2] bottom, = grads d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip, self.filter_dilation)(bottom, top, weights.shape[-2:]) d_top = AbstractConv2d(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip, self.filter_dilation)(bottom, weights) # Make sure that the broadcastable pattern of the inputs is used # for the gradients, even if the grad opts are not able to infer # that the dimensions are broadcastable. # Also make sure that the gradient lives on the same device than # the corresponding input. d_weights = patternbroadcast(d_weights, weights.broadcastable) d_weights = weights.type.filter_variable(d_weights) d_top = patternbroadcast(d_top, top.broadcastable) d_top = top.type.filter_variable(d_top) d_height_width = (theano.gradient.DisconnectedType()(),) return (d_weights, d_top) + d_height_width
def grad(self, inp, grads): weights, top = inp[:2] bottom, = grads d_weights = AbstractConv3d_gradWeights(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip, self.filter_dilation)(bottom, top, weights.shape[-3:]) d_top = AbstractConv3d(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip, self.filter_dilation)(bottom, weights) # Make sure that the broadcastable pattern of the inputs is used # for the gradients, even if the grad opts are not able to infer # that the dimensions are broadcastable. # Also make sure that the gradient lives on the same device than # the corresponding input. d_weights = patternbroadcast(d_weights, weights.broadcastable) d_weights = weights.type.filter_variable(d_weights) d_top = patternbroadcast(d_top, top.broadcastable) d_top = top.type.filter_variable(d_top) d_depth_height_width = (theano.gradient.DisconnectedType()(),) return (d_weights, d_top) + d_depth_height_width
def test_broadcast(self): # Test that we can rebroadcast data = numpy.random.rand(10, 10).astype('float32') output_var = f32sc(name="output", value=data) up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1) output_func = theano.function(inputs=[], outputs=[], updates=[(output_var, up)]) output_func() up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'), output_var.type.broadcastable) output_func = theano.function(inputs=[], outputs=[], updates=[(output_var, up)]) output_func()
def local_gpu_reshape(node): if isinstance(node.op, GpuFromHost): host_input = node.inputs[0] if host_input.owner and \ isinstance(host_input.owner.op, tensor.Reshape): x, shp = host_input.owner.inputs gpu_reshape = GpuReshape(**host_input.owner.op._props_dict())(as_cuda_ndarray_variable(x), shp) if gpu_reshape.broadcastable != node.outputs[0].broadcastable: # this can happen as we always return False for all broadcast # dim in GpuReshape but not for Reshape # Event if we did the same think, with the constant # optimization that could happen. gpu_reshape = theano.tensor.patternbroadcast( gpu_reshape, node.outputs[0].broadcastable) return [gpu_reshape] if isinstance(node.op, tensor.Reshape): x, shp = node.inputs if x.owner and isinstance(x.owner.op, HostFromGpu): gpu_x, = x.owner.inputs gpu_reshape = GpuReshape(**node.op._props_dict())(gpu_x, shp) if gpu_reshape.broadcastable != node.outputs[0].broadcastable: # this can happen as we always return False for all broadcast # dim in GpuReshape but not for Reshape # Event if we did the same think, with the constant # optimization that could happen. gpu_reshape = theano.tensor.patternbroadcast( gpu_reshape, node.outputs[0].broadcastable) return [host_from_gpu(gpu_reshape)] return False
def _gpu_conv_to_fftconv(node): # shared helper function for local_conv_fft_valid and local_conv_fft_full. # we import conv2d_fft locally to avoid pycuda warnings from theano.sandbox.cuda.fftconv import conv2d_fft kwargs = {'border_mode': node.op.border_mode} if (node.op.imshp is not None and node.op.imshp[-1] is not None and node.op.imshp[-1] % 2 == 1): kwargs['pad_last_dim'] = True # If the user supplied the full nonsymbolic image_shape and # filter_shape in conv2d(), we can pass it on to conv2d_fft(). if ((node.op.imshp is not None) and (len(node.op.imshp) == 3) and (None not in node.op.imshp) and (node.op.bsize is not None)): kwargs['image_shape'] = (node.op.bsize,) + node.op.imshp if ((node.op.kshp is not None) and (None not in node.op.kshp) and (node.op.nkern is not None) and (len(node.op.imshp) == 3) and (node.op.imshp[0] is not None)): kwargs['filter_shape'] = (node.op.nkern, node.op.imshp[0]) + \ node.op.kshp rval = conv2d_fft(node.inputs[0], node.inputs[1], **kwargs) if node.outputs[0].broadcastable != rval.broadcastable: # With given shape information, conv2d_fft may return a different # broadcast pattern than GpuConv. This is forbidden, so we fix it. rval = tensor.patternbroadcast( rval, node.outputs[0].type.broadcastable) return rval
def squeeze(x, axis): '''Remove a 1-dimension from the tensor at index "axis". ''' broadcastable = x.broadcastable[:axis] + x.broadcastable[axis+1:] x = T.patternbroadcast(x, [i == axis for i in range(x.type.ndim)]) x = T.squeeze(x) x = T.patternbroadcast(x, broadcastable) return x
def big_frame_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, n big frames * BIG_FRAME_SIZE) h0.shape: (batch size, N_BIG_GRUS, BIG_DIM) reset.shape: () output[0].shape: (batch size, n frames, DIM) output[1].shape: same as h0.shape output[2].shape: (batch size, seq len, Q_LEVELS) """ learned_h0 = lib.param( 'BigFrameLevel.h0', numpy.zeros((N_BIG_GRUS, BIG_DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_BIG_GRUS, BIG_DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape(( input_sequences.shape[0], input_sequences.shape[1] / BIG_FRAME_SIZE, BIG_FRAME_SIZE )) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) frames *= lib.floatX(2) gru0 = lib.ops.LowMemGRU('BigFrameLevel.GRU0', BIG_FRAME_SIZE, BIG_DIM, frames, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_BIG_GRUS): gru = lib.ops.LowMemGRU('BigFrameLevel.GRU'+str(i), BIG_DIM, BIG_DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear( 'BigFrameLevel.Output', BIG_DIM, DIM * BIG_FRAME_SIZE / FRAME_SIZE, grus[-1] ) output = output.reshape((output.shape[0], output.shape[1] * BIG_FRAME_SIZE / FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1) independent_preds = lib.ops.Linear( 'BigFrameLevel.IndependentPreds', BIG_DIM, Q_LEVELS * BIG_FRAME_SIZE, grus[-1] ) independent_preds = independent_preds.reshape((independent_preds.shape[0], independent_preds.shape[1] * BIG_FRAME_SIZE, Q_LEVELS)) return (output, last_hidden, independent_preds)
def frame_level_rnn(input_sequences, other_input, h0, reset): """ input_sequences.shape: (batch size, n frames * FRAME_SIZE) other_input.shape: (batch size, n frames, DIM) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames * FRAME_SIZE, DIM) """ learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape(( input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE, FRAME_SIZE )) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) frames *= lib.floatX(2) gru_input = lib.ops.Linear('FrameLevel.InputExpand', FRAME_SIZE, DIM, frames) + other_input gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0', DIM, DIM, gru_input, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_GRUS): gru = lib.ops.LowMemGRU('FrameLevel.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear( 'FrameLevel.Output', DIM, FRAME_SIZE * DIM, grus[-1], initialization='he' ) output = output.reshape((output.shape[0], output.shape[1] * FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1) return (output, last_hidden)