我们从Python开源项目中,提取了以下30个代码示例,用于说明如何使用theano.tensor.join()。
def lyr_lstm( self, name_, s_x_, s_cell_, s_hid_, idim_, hdim_, axis_=-1, lyr_linear_=None, op_act_=T.tanh, op_gate_=T.nnet.sigmoid): s_inp = T.join(axis_, s_x_, s_hid_) if lyr_linear_ is None: lyr_linear_ = self.lyr_linear s_gates_lin, s_inp_lin = T.split( lyr_linear_(name_+'_rec', s_inp, idim_+hdim_, hdim_*4), [hdim_*3,hdim_], 2, axis=axis_) s_igate, s_fgate, s_ogate = T.split(op_gate_(s_gates_lin), [hdim_]*3, 3, axis=axis_) s_cell_tp1 = s_igate*op_act_(s_inp_lin) + s_fgate*s_cell_ s_hid_tp1 = op_act_(s_cell_tp1)*s_ogate return s_cell_tp1, s_hid_tp1
def lyr_gru_flat( self, name_, s_x_, s_state_, idim_, hdim_, axis_=-1, lyr_linear_=None, op_act_=T.tanh, op_gate_=T.nnet.sigmoid, params_group_='params' ): ''' GRU layer, flat version In order to use, you need to provide state variable ''' if lyr_linear_ is None: lyr_linear_ = self.lyr_linear s_igate = lyr_linear_(name_+'_igate', idim_+hdim_, idim_, params_group_=params_group_) s_inp_gated = T.join(axis_, s_x_ * op_gate_(s_igate), s_state_) s_gate_lin, s_state_tp1_lin = T.split(lyr_linear_(name_+'_gate', s_inp_gated, idim_+hdim_, hdim_*2), [hdim_,hdim_], 2, axis_) s_gate = op_gate_(s_gate_lin) return s_state_*s_gate + op_act_(s_state_tp1_lin)*(1.-s_gate)
def _k_max_pooling(input, kmax): pool = input.dimshuffle(0, 2, 1, 3).flatten(ndim=3).dimshuffle(1,0,2).flatten(ndim=2).dimshuffle(1,0) neighborsArgSorted = T.argsort(pool, axis=1) yy = T.sort(neighborsArgSorted[:, -kmax:], axis=1).flatten() xx = T.repeat(T.arange(neighborsArgSorted.shape[0]), kmax) pool_kmax = pool[xx, yy] pool_kmax_shape = T.join(0, T.as_tensor([input.shape[0], input.shape[1], input.shape[3], kmax])) pooled_out = pool_kmax.reshape(pool_kmax_shape, ndim=4).dimshuffle(0, 1, 3, 2) return pooled_out
def test_gpujoin_gpualloc(): a = T.fmatrix('a') a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32') b = T.fmatrix('b') b_val = numpy.asarray(numpy.random.rand(3, 5), dtype='float32') f = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_without_gpu) f_gpu = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)), mode=mode_with_gpu) f_gpu2 = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_with_gpu) assert sum([node.op == T.alloc for node in f.maker.fgraph.toposort()]) == 2 assert sum([node.op == T.join for node in f.maker.fgraph.toposort()]) == 1 assert sum([isinstance(node.op, GpuAlloc) for node in f_gpu.maker.fgraph.toposort()]) == 2 assert sum([node.op == gpu_join for node in f_gpu.maker.fgraph.toposort()]) == 1 assert sum([isinstance(node.op, GpuAlloc) for node in f_gpu2.maker.fgraph.toposort()]) == 2 assert sum([node.op == gpu_join for node in f_gpu2.maker.fgraph.toposort()]) == 1 assert numpy.allclose(f(a_val, b_val), f_gpu2(a_val, b_val))
def local_gpua_alloc2(node): """ Join(axis, {Alloc or HostFromGPU}, ...) -> Join(axis, GpuAlloc, Alloc, ...) Moves an alloc that is an input to join to the gpu. """ try: get_context(None) except ContextNotDefined: # If there is no default context then we do not perform the move here. return if (isinstance(node.op, tensor.Alloc) and all(c != 'output' and c.op == tensor.join and all(i.owner and i.owner.op in [host_from_gpu, tensor.alloc] for i in c.inputs[1:]) for c, idx in node.outputs[0].clients)): return [host_from_gpu(gpu_alloc(None)(*node.inputs))]
def test_no_cycle(self): # Optimizing this graph resulted in a cycle, see gh-1549 # This test depends on cuda import theano.sandbox.cuda as cuda if not cuda.cuda_available: raise SkipTest("cuda not available") if sys.version_info[:2] < (2, 5): raise SkipTest("Test skipped due to a too old python") # This pickle file has undergone manual surgery due to changes # in scan and may or may not run correctly. It does passes # the test below. pkl_filename = os.path.join(os.path.dirname(theano.__file__), 'tensor', 'tests', 'shape_opt_cycle.pkl') # Due to incompatibilities between python 2 and 3 in the format # of pickled numpy ndarray, we have to force an encoding from theano.misc.pkl_utils import CompatUnpickler with open(pkl_filename, "rb") as pkl_file: if PY3: u = CompatUnpickler(pkl_file, encoding="latin1") else: u = CompatUnpickler(pkl_file) fn_args = u.load() theano.function(**fn_args)
def test_local_join_make_vector(): a, b, c, d, e = tensor.scalars('abcde') v = tensor.vector('v') mv = MakeVector(config.floatX) s = tensor.join(0, mv(a), v, mv(b, c), mv(d, e)) f = function([a, b, c, d, e, v], s, mode=mode_opt) theano.printing.debugprint(f) val = f(1, 2, 3, 4, 6, [7, 8]) assert numpy.all(val == [1, 7, 8, 2, 3, 4, 6]) e = f.maker.fgraph.toposort() assert len([n for n in e if isinstance(n.op, Join)]) == 1 assert all([not isinstance(n.op, Join) or len(n.inputs) == 4 for n in e if isinstance(n.op, Join)]) assert f.maker.fgraph.outputs[0].dtype == config.floatX assert check_stack_trace(f, ops_to_check='all')
def lyr_sconv_gen( name_, s_x_, idim_, odim_, **kwargs_): ''' quick & dirty implementation of fxnn convolution layer ''' global g_mdl dilation = kwargs_.get('dilation_') if dilation is None: dilation = 1 init_scale = kwargs_.get('init_scale_') bias = kwargs_.get('bias_') op_conv = partial( T.nnet.conv2d, border_mode='half', filter_dilation = (dilation, dilation)) ir = 0.5/sqrt(idim_*5+odim_) s_dims = T.shape(s_x_) s_x = T.reshape(s_x_, (s_dims[0]*idim_, 1, s_dims[2], s_dims[3])) s_x1 = T.reshape(op_conv( s_x, g_sconv_ker, filter_shape=(2, 1, 1, 3), **kwargs_), (s_dims[0]*idim_*2, 1, s_dims[2], s_dims[3])) s_x2 = T.reshape(op_conv( s_x1, g_sconv_ker.transpose(0,1,3,2), filter_shape=(2, 1, 3, 1), ), (s_dims[0], idim_*4, s_dims[2], s_dims[3])) s_y = T.join(1, s_x2, s_x_) return g_mdl.lyr_conv( name_, s_y, idim_*5, odim_, fsize_=1, init_scale_=ir, **kwargs_);
def lyr_gru( self, name_, s_x_, s_state_, idim_, hdim_, axis_=0, lyr_linear_=None, op_act_=T.tanh, op_gate_=T.nnet.sigmoid): if lyr_linear_ is None: lyr_linear_ = self.lyr_linear s_igate = lyr_linear_(name_+'_igate', idim_+hdim_, idim_) s_inp_gated = T.join(axis_, s_x_ * op_gate_(s_igate), s_state_) s_gate_lin, s_state_tp1_lin = T.split(lyr_linear_(name_+'_gate', s_inp_gated, idim_+hdim_, hdim_*2), [hdim_,hdim_], 2, axis_) s_gate = op_gate_(s_gate_lin) return s_state_*s_gate + op_act_(s_state_tp1_lin)*(1.-s_gate)
def lyr_lstm_flat( self, name_, s_x_, s_cell_, s_hid_, idim_, hdim_, axis_=-1, lyr_linear_=None, op_act_=T.tanh, op_gate_=T.nnet.sigmoid, params_group_='params' ): ''' LSTM layer, flat version In order to use, you need to provide state variable Returns: hidden_state, cell_state ''' s_inp = T.join(axis_, s_x_, s_hid_) if lyr_linear_ is None: lyr_linear_ = self.lyr_linear s_gates_lin, s_inp_lin = T.split( lyr_linear_(name_+'_rec', s_inp, idim_+hdim_, hdim_*4), [hdim_*3,hdim_], 2, axis=axis_) s_igate, s_fgate, s_ogate = T.split(op_gate_(s_gates_lin), [hdim_]*3, 3, axis=axis_) s_cell_tp1 = s_igate*op_act_(s_inp_lin) + s_fgate*s_cell_ s_hid_tp1 = op_act_(s_cell_tp1)*s_ogate return s_cell_tp1, s_hid_tp1
def max_pool_3d(inpt, inpt_shape, ds, ignore_border=True): # Downsize 'into the depth' by downsizing twice. inpt_shape_4d = ( inpt_shape[0] * inpt_shape[1], inpt_shape[2], inpt_shape[3], inpt_shape[4] ) inpt_as_tensor4 = T.reshape(inpt, inpt_shape_4d, ndim=4) # The first pooling only downsizes the height and the width. pool_out1 = pool.pool_2d(inpt_as_tensor4, (ds[1], ds[2]), ignore_border=True) out_shape1 = T.join(0, inpt_shape[:-2], pool_out1.shape[-2:]) inpt_pooled_once = T.reshape(pool_out1, out_shape1, ndim=5) # Shuffle dimensions so the depth is the last dimension. inpt_shuffled = inpt_pooled_once.dimshuffle(0, 4, 2, 3, 1) shuffled_shape = inpt_shuffled.shape # Reshape input to be 4 dimensional. shuffle_shape_4d = ( shuffled_shape[0] * shuffled_shape[1], shuffled_shape[2], shuffled_shape[3], shuffled_shape[4] ) inpt_shuffled_4d = T.reshape(inpt_shuffled, shuffle_shape_4d, ndim=4) pool_out2 = pool.pool_2d(inpt_shuffled_4d, (1, ds[0]), ignore_border=True) out_shape2 = T.join(0, shuffled_shape[:-2], pool_out2.shape[-2:]) inpt_pooled_twice = T.reshape(pool_out2, out_shape2, ndim=5) pool_output_fin = inpt_pooled_twice.dimshuffle(0, 4, 2, 3, 1) return pool_output_fin
def setUp(self): super(G_Join_and_Split, self).setUp() self.mode = mode_with_gpu.excluding('constant_folding') self.join_op = GpuJoin() self.split_op_class = GpuSplit # Use join instead of MakeVector since there is no MakeVector on GPU self.make_vector_op = GpuJoin() # this is to avoid errors with limited devices self.floatX = 'float32' self.hide_error = theano.config.mode not in ['DebugMode', 'DEBUG_MODE'] self.shared = gpuarray_shared_constructor
def local_gpua_join_1(node): # join of a single element if (isinstance(node.op, GpuJoin) and len(node.inputs) == 2): return [node.inputs[1]]
def test_join(self): tv = numpy.asarray(self.rng.uniform(size=(10,)), theano.config.floatX) t = theano.shared(tv) out = tensor.join(0, self.x, t) self.check_rop_lop(out, (self.in_shape[0] + 10,))
def test_equality_shapes(self): # Test equality where one sides contain only shapes related # stuff. if theano.config.mode == "FAST_COMPILE": raise SkipTest("Skip opt test as the opt is disabled") x = T.vector('x', dtype=config.floatX) for g in [x.shape[0], Shape_i(0)(x)]: f = theano.function([x], T.eq(g, 0)) assert f([3, 3]) == 0 assert f([]) == 1 f = theano.function([x], T.eq(g, -1)) self.assert_eqs_const(f, 0) assert f([3, 3]) == 0 g = join(0, x.shape[0:], # todo test reshape, dimshuffle x.shape[0:1]) f = theano.function([x], T.eq(g, 0)) assert (f([3, 3]) == 0).all() assert (f([]) == 1).all() f = theano.function([x], T.eq(g, -1)) self.assert_eqs_const(f, 0, op=T.alloc) assert (f([3, 3]) == 0).all()
def test_constant_merge(self): """This test the error in gh-1122 that is a caused by the combination of merge optimizer and ShapeFeature. """ x = tensor.constant([0, 0]) y = x[1:] x1 = x - tensor.join(0, y, y) x1.eval()
def test_local_useless_rebroadcast(self): mode = theano.compile.get_default_mode().including('canonicalize') v1 = T.vector() v2 = T.vector() j = T.join(0, v1, v2) f = theano.function([v1, v2], j, mode=mode) f([1, 2], [3, 4, 5]) e = f.maker.fgraph.toposort() assert len([n for n in e if isinstance(n.op, T.Rebroadcast)]) == 0 assert check_stack_trace(f, ops_to_check='all')
def test_opt_gpujoin_onlyajoin(): # from a bug in normal sampling _a = numpy.asarray([[1, 2], [3, 4]], dtype='float32') _b = numpy.asarray([[5, 6, 7], [8, 9, 10]], dtype='float32') a = cuda.shared_constructor(_a) b = cuda.shared_constructor(_b) c = tensor.join(1, a, b) f = theano.function([], c, mode=mode_with_gpu) f() graph_nodes = f.maker.fgraph.toposort() assert isinstance(graph_nodes[-1].op, cuda.HostFromGpu) assert isinstance(graph_nodes[-2].op, cuda.GpuJoin) assert numpy.all(f() == numpy.concatenate([_a, _b], axis=1)) # test mixed dtype _b = numpy.asarray([[5, 6, 7], [8, 9, 10]], dtype='float64') b = theano.tensor.constant(_b) c = tensor.join(1, a, b) f = theano.function([], c, mode=mode_with_gpu) f() graph_nodes = f.maker.fgraph.toposort() assert isinstance(graph_nodes[-1].op, theano.tensor.Join) assert numpy.all(f() == numpy.concatenate([_a, _b], axis=1))
def test_opt_gpujoin_joinvectors_elemwise_then_minusone(): # from a bug in gpu normal sampling _a = numpy.asarray([1, 2, 3, 4], dtype='float32') _b = numpy.asarray([5, 6, 7, 8], dtype='float32') a = cuda.shared_constructor(_a) b = cuda.shared_constructor(_b) a_prime = tensor.cos(a) b_prime = tensor.sin(b) c = tensor.join(0, a_prime, b_prime) d = c[:-1] f = theano.function([], d, mode=mode_with_gpu) graph_nodes = f.maker.fgraph.toposort() assert isinstance(graph_nodes[-1].op, cuda.HostFromGpu) assert isinstance(graph_nodes[-2].op, cuda.GpuSubtensor) assert isinstance(graph_nodes[-3].op, cuda.GpuJoin) concat = numpy.concatenate([numpy.cos(_a), numpy.sin(_b)], axis=0) concat = concat[:-1] assert numpy.allclose(numpy.asarray(f()), concat)
def test_gpujoin_no_rebroadcast(): _a = numpy.asarray([[1, 2], [3, 4]], dtype='float32') a = tcn.shared_constructor(_a) f = theano.function([], T.join(1, a)) l = f.maker.fgraph.toposort() assert not any([isinstance(x.op, T.Rebroadcast) for x in l])
def unpad_dims(output, input, leftdims, rightdims): """Reshapes the output after pad_dims. This reverts the padding by `pad_dims`. """ if output.ndim == input.ndim: return output # restore the output to the original shape outshp = tensor.join(0, input.shape[:-rightdims], output.shape[-rightdims:]) return GpuReshape(input.ndim)(output, outshp)
def my_pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0), mode='max'): """ This function is a patch to the maxpool op of Theano: contrarily to current implementation of maxpool, the gradient is backpropagated to only one input of a given patch if several inputs have the same value. This is consistent with the CuDNN implementation (and therefore the op is replaced by the CuDNN version when possible). """ if input.ndim < 2: raise NotImplementedError('pool_2d requires a dimension >= 2') if not ignore_border is None: # check that ignore_border is True if provided assert ignore_border ignore_border = True if input.ndim == 4: op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def pad_dims(input, leftdims, rightdims): """Reshapes the input to a (leftdims + rightdims) tensor This helper function is used to convert pooling inputs with arbitrary non-pooling dimensions to the correct number of dimensions for the GPU pooling ops. This reduces or expands the number of dimensions of the input to exactly `leftdims`, by adding extra dimensions on the left or by combining some existing dimensions on the left of the input. Use `unpad_dims` to reshape back to the original dimensions. Examples -------- Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)`` adds a singleton dimension and reshapes to (1, 3, 5, 7). Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)`` reshapes back to (3, 5, 7). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)`` does not reshape and returns output with shape (3, 5, 7, 9). Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)`` combines the first two dimensions and reshapes to (15, 7, 9, 11). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)`` adds a singleton dimension and reshapes to (1, 3, 5, 7, 9). """ assert input.ndim >= rightdims if input.ndim == (leftdims + rightdims): return input # extract image dimensions img_shape = input.shape[-rightdims:] non_pool_ndim = input.ndim - rightdims if non_pool_ndim < leftdims: # too few dimensions, pad on the left dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim)) new_shape = tensor.join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape) else: # too many dimensions, combine the leading dimensions batched_ndim = non_pool_ndim - leftdims + 1 batch_size = tensor.prod(input.shape[:batched_ndim]) # convert to a vector for tensor.join batch_size = tensor.shape_padright(batch_size, 1) new_shape = tensor.join(0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape) # store in the required shape new_shape = tensor.cast(new_shape, 'int64') input_ND = GpuReshape(leftdims + rightdims)(input, new_shape) return input_ND
def test_local_join_empty(): # test for vector, vector, empty to vector empty_vec = numpy.asarray([], dtype=config.floatX) a = tensor.vector('a') s = tensor.join(0, a, a, empty_vec) f = function([a], s, mode=mode_opt) val = f([1]) assert numpy.all(val == [1]) e = f.maker.fgraph.toposort() assert len([n for n in e if isinstance(n.op, Join)]) == 1 assert all([not isinstance(n.op, Join) or len(n.inputs) == 3 for n in e if isinstance(n.op, Join)]) assert f.maker.fgraph.outputs[0].dtype == config.floatX # test for matrix join(1,a) empty_mat = numpy.asarray([[]], dtype=config.floatX) m = tensor.matrix('m') s = join(1, empty_mat, m, m, m) f = function([m], s, mode=mode_opt) val = f([[1]]) assert numpy.all(val == [[1]]) e = f.maker.fgraph.toposort() assert len([n for n in e if isinstance(n.op, Join)]) == 1 assert all([not isinstance(n.op, Join) or len(n.inputs) == 4 for n in e if isinstance(n.op, Join)]) assert f.maker.fgraph.outputs[0].dtype == config.floatX # test for vector, vector, empty to matrix # We can't optimize this case. s = tensor.stack([a, a, empty_vec]) f = function([a], s, mode=mode_opt) val = f([]) assert numpy.all(val == [1]) e = f.maker.fgraph.toposort() assert len([n for n in e if isinstance(n.op, Join)]) == 1 assert all([not isinstance(n.op, Join) or len(n.inputs) == 4 for n in e if isinstance(n.op, Join)]) assert f.maker.fgraph.outputs[0].dtype == config.floatX # test for matrix join(0,a) # We can't optimize this case. s = join(0, m, numpy.asarray([[2.]], dtype=config.floatX), m) f = function([m], s, mode=mode_opt) val = f([[1]]) assert numpy.all(val == [[1], [2], [1]]) e = f.maker.fgraph.toposort() assert len([n for n in e if isinstance(n.op, Join)]) == 1 assert all([not isinstance(n.op, Join) or len(n.inputs) == 4 for n in e if isinstance(n.op, Join)]) assert f.maker.fgraph.outputs[0].dtype == config.floatX
def local_gpu_join(node): """ Inspired by the opt for convop. Very loose notation follows. Subgraphs concerned first look like [array of HostTensor] -> HostToGpu -> GpuToHost -> Join -> HostToGpu -> GpuToHost First we apply this Opt: join(host_from_gpu) -> host_from_gpu(gpu_join) then, as an intermediate result, there should be host_from_gpu(gpu_join) -> HostToGpu -> GpuToHost this unnecessary GpuToHost -> HostToGpu should be removed by other opts, leaving us with host_from_gpu(gpu_join) For intermediate places in the graph not covered by the first opt, the following could be useful: gpu_from_host(join) -> gpu_join(gpu_from_host) not implemented yet. """ if isinstance(node.op, tensor.Join): # optimizing this case: # join(host_from_gpu) -> host_from_gpu(gpu_join) axis_and_tensors = node.inputs matches = [t.dtype == 'float32' and ((t.owner is not None and isinstance(t.owner.op, HostFromGpu)) or isinstance(t, gof.Constant)) for t in axis_and_tensors[1:]] if all(matches): new_tensors = [as_cuda_ndarray_variable(t) for t in axis_and_tensors[1:]] new_a_and_t = [axis_and_tensors[0]] + new_tensors replacement_node = host_from_gpu(gpu_join(*new_a_and_t)) return [replacement_node] # This is a copy of the same opt in tensor to make the tests happy, # but I'm not convinced it is actually needed.
def local_gpualloc(node): replace = False if node.op == tensor.alloc: if node.inputs[0].owner and \ isinstance(node.inputs[0].owner.op, HostFromGpu): replace = True elif all([c != 'output' and c.op == gpu_from_host for c, idx in node.outputs[0].clients]): # if all clients are on gpu replace = True elif all([c != 'output' and c.op == tensor.join and all(i.owner and i.owner.op in [host_from_gpu, tensor.alloc] for i in c.inputs[1:]) for c, idx in node.outputs[0].clients]): # if the client is on gpu or alloc replace = True if replace and node.inputs[0].dtype != 'float32': replace = False if replace: val = node.inputs[0] shp = node.inputs[1:] old_out = node.outputs[0] new_out = host_from_gpu(gpu_alloc(val, *shp)) # Sigh. it's an annoying thing about theano # that you can't add information to the graph. # If for some reason it has come to light that # one of the dimensions is broadcastable, we have to hide that # or the optimization won't go through. if new_out.type != old_out.type: assert new_out.type.ndim == old_out.type.ndim assert new_out.type.dtype == old_out.type.dtype # it seems to have happened that new_out has some broadcastable # dimensions that old_out did not have for b_old, b_new in zip(old_out.type.broadcastable, new_out.type.broadcastable): assert b_new or (not b_old) new_out = tensor.patternbroadcast(new_out, old_out.broadcastable) return [new_out]
def pad_dims(input, leftdims, rightdims): """Reshapes the input to a (leftdims + rightdims) tensor This helper function is used to convert pooling inputs with arbitrary non-pooling dimensions to the correct number of dimensions for the GPU pooling ops. This reduces or expands the number of dimensions of the input to exactly `leftdims`, by adding extra dimensions on the left or by combining some existing dimensions on the left of the input. Use `unpad_dims` to reshape back to the original dimensions. Examples -------- Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)`` adds a singleton dimension and reshapes to (3, 1, 5, 7). Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)`` reshapes back to (3, 5, 7). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)`` does not reshape and returns output with shape (3, 5, 7, 9). Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)`` combines the first two dimensions and reshapes to (8, 7, 9, 11). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)`` adds a singleton dimension and reshapes to (3, 1, 5, 7, 9). """ assert input.ndim >= rightdims if input.ndim == (leftdims + rightdims): return input # extract image dimensions img_shape = input.shape[-rightdims:] non_pool_ndim = input.ndim - rightdims if non_pool_ndim < leftdims: # too few dimensions, pad on the left dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim)) new_shape = tensor.join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape) else: # too many dimensions, combine the leading dimensions batched_ndim = non_pool_ndim - leftdims + 1 batch_size = tensor.prod(input.shape[:batched_ndim]) # convert to a vector for tensor.join batch_size = tensor.shape_padright(batch_size, 1) new_shape = tensor.join(0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape) # store in the required shape new_shape = tensor.cast(new_shape, 'int64') input_ND = GpuReshape(leftdims + rightdims)(input, new_shape) return input_ND