def _k_max_pooling(input, kmax): pool = input.dimshuffle(0, 2, 1, 3).flatten(ndim=3).dimshuffle(1,0,2).flatten(ndim=2).dimshuffle(1,0) neighborsArgSorted = T.argsort(pool, axis=1) yy = T.sort(neighborsArgSorted[:, -kmax:], axis=1).flatten() xx = T.repeat(T.arange(neighborsArgSorted.shape[0]), kmax) pool_kmax = pool[xx, yy] pool_kmax_shape = T.join(0, T.as_tensor([input.shape[0], input.shape[1], input.shape[3], kmax])) pooled_out = pool_kmax.reshape(pool_kmax_shape, ndim=4).dimshuffle(0, 1, 3, 2) return pooled_out
def make_node(self, frames, n, axis): """ Compute an n-point fft of frames along given axis. """ _frames = tensor.as_tensor(frames, ndim=2) _n = tensor.as_tensor(n, ndim=0) _axis = tensor.as_tensor(axis, ndim=0) if self.half and _frames.type.dtype.startswith('complex'): raise TypeError('Argument to HalfFFT must not be complex', frames) spectrogram = tensor.zmatrix() buf = generic() # The `buf` output is present for future work # when we call FFTW directly and re-use the 'plan' that FFTW creates. # In that case, buf would store a CObject encapsulating the plan. rval = Apply(self, [_frames, _n, _axis], [spectrogram, buf]) return rval
def __init__(self, rng, inputVar, cfgParams, copyLayer=None, layerNum=None): """ Allocate a PoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type inputVar: theano.tensor.dtensor4 :param inputVar: symbolic image tensor, of shape image_shape :type cfgParams: PoolLayerParams """ floatX = theano.config.floatX # @UndefinedVariable outputDim = cfgParams.outputDim poolsize = cfgParams.poolsize inputDim = cfgParams.inputDim activation = cfgParams.activation poolType = cfgParams.poolType self.cfgParams = cfgParams self.layerNum = layerNum self.inputVar = inputVar if inputVar.type.ndim != 4: raise TypeError() self.params = [] self.weights = [] # downsample each feature map individually, using maxpooling if poolType == 0: # use maxpooling pooled_out = pool_2d(input=self.inputVar, ds=poolsize, ignore_border=True) elif poolType == 1: # use average pooling pooled_out = theano.sandbox.neighbours.images2neibs(ten4=self.inputVar, neib_shape=poolsize, mode='ignore_borders').mean(axis=-1) new_shape = T.cast(T.join(0, self.inputVar.shape[:-2], T.as_tensor([self.inputVar.shape[2]//poolsize[0]]), T.as_tensor([self.inputVar.shape[3]//poolsize[1]])), 'int64') pooled_out = T.reshape(pooled_out, new_shape, ndim=4) elif poolType == 3: # use subsampling and ignore border pooled_out = self.inputVar[:, :, :(inputDim[2]//poolsize[0])*poolsize[0], :(inputDim[3]//poolsize[1])*poolsize[1]][:, :, ::poolsize[0], ::poolsize[1]] elif poolType == -1: # no pooling at all pooled_out = self.inputVar else: raise ValueError("Unknown pool type!") self.output = (pooled_out if activation is None else activation(pooled_out)) self.output.name = 'output_layer_{}'.format(self.layerNum)
def my_pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0), mode='max'): """ This function is a patch to the maxpool op of Theano: contrarily to current implementation of maxpool, the gradient is backpropagated to only one input of a given patch if several inputs have the same value. This is consistent with the CuDNN implementation (and therefore the op is replaced by the CuDNN version when possible). """ if input.ndim < 2: raise NotImplementedError('pool_2d requires a dimension >= 2') if not ignore_border is None: # check that ignore_border is True if provided assert ignore_border ignore_border = True if input.ndim == 4: op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def pad_dims(input, leftdims, rightdims): """Reshapes the input to a (leftdims + rightdims) tensor This helper function is used to convert pooling inputs with arbitrary non-pooling dimensions to the correct number of dimensions for the GPU pooling ops. This reduces or expands the number of dimensions of the input to exactly `leftdims`, by adding extra dimensions on the left or by combining some existing dimensions on the left of the input. Use `unpad_dims` to reshape back to the original dimensions. Examples -------- Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)`` adds a singleton dimension and reshapes to (1, 3, 5, 7). Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)`` reshapes back to (3, 5, 7). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)`` does not reshape and returns output with shape (3, 5, 7, 9). Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)`` combines the first two dimensions and reshapes to (15, 7, 9, 11). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)`` adds a singleton dimension and reshapes to (1, 3, 5, 7, 9). """ assert input.ndim >= rightdims if input.ndim == (leftdims + rightdims): return input # extract image dimensions img_shape = input.shape[-rightdims:] non_pool_ndim = input.ndim - rightdims if non_pool_ndim < leftdims: # too few dimensions, pad on the left dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim)) new_shape = tensor.join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape) else: # too many dimensions, combine the leading dimensions batched_ndim = non_pool_ndim - leftdims + 1 batch_size = tensor.prod(input.shape[:batched_ndim]) # convert to a vector for tensor.join batch_size = tensor.shape_padright(batch_size, 1) new_shape = tensor.join(0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape) # store in the required shape new_shape = tensor.cast(new_shape, 'int64') input_ND = GpuReshape(leftdims + rightdims)(input, new_shape) return input_ND
def max_pool(images, imgshp, maxpoolshp): """Implements a max pooling layer Takes as input a 2D tensor of shape batch_size x img_size and performs max pooling. Max pooling downsamples by taking the max value in a given area, here defined by maxpoolshp. Outputs a 2D tensor of shape batch_size x output_size. :param images: 2D tensor containing images on which to apply convolution. Assumed to be of shape batch_size x img_size :param imgshp: tuple containing image dimensions :param maxpoolshp: tuple containing shape of area to max pool over :return: out1, symbolic result (2D tensor) :return: out2, logical shape of the output """ N = numpy poolsize = N.int64(N.prod(maxpoolshp)) # imgshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if N.size(imgshp) == 2: imgshp = (1,) + imgshp # construct indices and index pointers for sparse matrix, which, # when multiplied with input images will generate a stack of image # patches indices, indptr, spmat_shape, sptype, outshp = \ convolution_indices.conv_eval(imgshp, maxpoolshp, maxpoolshp, mode='valid') # print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX' # print 'imgshp = ', imgshp # print 'maxpoolshp = ', maxpoolshp # print 'outshp = ', outshp # build sparse matrix, then generate stack of image patches csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices, indptr, spmat_shape) patches = sparse.structured_dot(csc, images.T).T pshape = tensor.stack([images.shape[0] *\ tensor.as_tensor(N.prod(outshp)), tensor.as_tensor(imgshp[0]), tensor.as_tensor(poolsize)]) patch_stack = tensor.reshape(patches, pshape, ndim=3) out1 = tensor.max(patch_stack, axis=2) pshape = tensor.stack([images.shape[0], tensor.as_tensor(N.prod(outshp)), tensor.as_tensor(imgshp[0])]) out2 = tensor.reshape(out1, pshape, ndim=3) out3 = tensor.DimShuffle(out2.broadcastable, (0, 2, 1))(out2) return tensor.flatten(out3, 2), outshp
def pad_dims(input, leftdims, rightdims): """Reshapes the input to a (leftdims + rightdims) tensor This helper function is used to convert pooling inputs with arbitrary non-pooling dimensions to the correct number of dimensions for the GPU pooling ops. This reduces or expands the number of dimensions of the input to exactly `leftdims`, by adding extra dimensions on the left or by combining some existing dimensions on the left of the input. Use `unpad_dims` to reshape back to the original dimensions. Examples -------- Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)`` adds a singleton dimension and reshapes to (3, 1, 5, 7). Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)`` reshapes back to (3, 5, 7). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)`` does not reshape and returns output with shape (3, 5, 7, 9). Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)`` combines the first two dimensions and reshapes to (8, 7, 9, 11). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)`` adds a singleton dimension and reshapes to (3, 1, 5, 7, 9). """ assert input.ndim >= rightdims if input.ndim == (leftdims + rightdims): return input # extract image dimensions img_shape = input.shape[-rightdims:] non_pool_ndim = input.ndim - rightdims if non_pool_ndim < leftdims: # too few dimensions, pad on the left dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim)) new_shape = tensor.join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape) else: # too many dimensions, combine the leading dimensions batched_ndim = non_pool_ndim - leftdims + 1 batch_size = tensor.prod(input.shape[:batched_ndim]) # convert to a vector for tensor.join batch_size = tensor.shape_padright(batch_size, 1) new_shape = tensor.join(0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape) # store in the required shape new_shape = tensor.cast(new_shape, 'int64') input_ND = GpuReshape(leftdims + rightdims)(input, new_shape) return input_ND