我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.shape()。
def load_params(self, f_, filter_=None): di = pickle.load(f_) if filter_ is None: for k,v in di.items(): p = self._vars_di[k].get_value(borrow=True) if p.shape != v.shape: raise ValueError('Shape mismatch, need %s, got %s'%(v.shape, p.shape), p.shape) self._vars_di[k].set_value(v) else: pat = re.compile(filter_) for k,v in di.items(): if not pat.fullmatch(k): continue p = self._vars_di[k].get_value(borrow=True) if p.shape != v.shape: raise ValueError('Shape mismatch, need %s, got %s'%(v.shape, p.shape), p.shape) self._vars_di[k].set_value(v)
def get_output_for(self, input, **kwargs): a, b, c = self.scale_factor upscaled = input if self.mode == 'repeat': if c > 1: upscaled = T.extra_ops.repeat(upscaled, c, 4) if b > 1: upscaled = T.extra_ops.repeat(upscaled, b, 3) if a > 1: upscaled = T.extra_ops.repeat(upscaled, a, 2) elif self.mode == 'dilate': if c > 1 or b > 1 or a > 1: output_shape = self.get_output_shape_for(input.shape) upscaled = T.zeros(shape=output_shape, dtype=input.dtype) upscaled = T.set_subtensor( upscaled[:, :, ::a, ::b, ::c], input) return upscaled
def op_ortho_loss(s_x_, axes_=(-2, -1), ndim_=None): ''' orthogoal matrix loss used to regularize parameter to unitary Args: s_x_: (batch of) matrices axes_: tuple of two integers, specify which axes to be for matrix, defaults to last two axes ndim_: specify args to be (ndim_ x ndim_) matrices ''' if ndim_ is None: ax = axes_[0] ndim = T.shape(s_x_)[ax] else: ndim = ndim_ tpat = list(range(ndim)) bpat = ['x'] * s_x_.ndim tpat[axes_[0]], tpat[axes_[1]] = tpat[axes_[1]], tpat[axes_[0]] bpat[axes_[0]] = 0 bpat[axes_[1]] = 1 s_y = T.dot(s_x_.transpose(*tpat), s_x_) return T.sqr(s_y - T.eye(ndim).dimshuffle(*bpat))
def op_covmat(s_x_, l1_normize_=True, eps_=1e-7): ''' Return covariance matrix given a batch of data points Args: s_x_: batch of row vectors l1_normize_: Defatuls to True. Make covariance matrix is L1 normalized wrt number of data points. eps_: Adds a small identity matrix I*eps_ to result, this is applied after L1 - normalization ''' assert s_x_.ndim == 2 s_mean = s_x_ - T.mean(s_x_, axis=0, keepdims=True) s_shp = T.shape(s_x_) s_covmat = T.dot(s_mean.T, s_mean) if l1_normize_: s_covmat /= s_shp[0] return s_covmat + T.eye(s_shp[1]) * eps_
def unflatten_into_tensors(flatparams_P, output_shapes, name=None): ''' Unflattens a vector produced by flatcat into a list of tensors of the specified shapes. ''' outputs = [] curr_pos = 0 for shape in output_shapes: size = np.prod(shape) flatval = flatparams_P[curr_pos:curr_pos+size] outputs.append(flatval.reshape(shape)) curr_pos += size # assert curr_pos == flatparams_P.get_shape().num_elements() return outputs # from http://arxiv.org/abs/1412.6980 # and https://gist.github.com/Newmu/acb738767acb4788bac3 # suggested lr 0.001
def adam(cost, params, lr, beta1=0.9, beta2=0.999, eps=1e-8): updates = [] grads = tensor.grad(cost, params); assert len(params) == len(grads) t0 = theano.shared(np.array(0., dtype=theano.config.floatX)) t = t0 + 1 corr1 = (1 - beta1**t) corr2 = (1 - beta2**t) alpha = lr * tensor.sqrt(corr2) / corr1 for p, g in zip(params, grads): m = theano.shared(value=np.zeros(p.get_value().shape, dtype=theano.config.floatX), broadcastable=p.broadcastable) v = theano.shared(value=np.zeros(p.get_value().shape, dtype=theano.config.floatX), broadcastable=p.broadcastable) m_t = beta1 * m + (1 - beta1) * g v_t = beta2 * v + (1 - beta2) * tensor.square(g) p_t = p - alpha * m_t/(tensor.sqrt(v_t) + eps) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t0, t)) return updates
def __init__(self, input_var=None, num_styles=None, shape=(None, 3, 256, 256), net_type=1, **kwargs): """ net_type: 0 (fast neural style- fns) or 1 (conditional instance norm- cin) """ assert net_type in [0, 1] self.net_type = net_type self.network = {} if len(shape) == 2: shape=(None, 3, shape[0], shape[1]) elif len(shape) == 3: shape=(None, shape[0], shape[1], shape[2]) self.shape = shape self.num_styles = num_styles self.network['loss_net'] = {} self.setup_loss_net() self.load_loss_net_weights() self.network['transform_net'] = {} self.setup_transform_net(input_var)
def setup_transform_net(self, input_var=None): transform_net = InputLayer(shape=self.shape, input_var=input_var) transform_net = style_conv_block(transform_net, self.num_styles, 32, 9, 1) transform_net = style_conv_block(transform_net, self.num_styles, 64, 3, 2) transform_net = style_conv_block(transform_net, self.num_styles, 128, 3, 2) for _ in range(5): transform_net = residual_block(transform_net, self.num_styles) transform_net = nn_upsample(transform_net, self.num_styles) transform_net = nn_upsample(transform_net, self.num_styles) if self.net_type == 0: transform_net = style_conv_block(transform_net, self.num_styles, 3, 9, 1, tanh) transform_net = ExpressionLayer(transform_net, lambda X: 150.*X, output_shape=None) elif self.net_type == 1: transform_net = style_conv_block(transform_net, self.num_styles, 3, 9, 1, sigmoid) self.network['transform_net'] = transform_net
def sp_ones_like(x): """ Construct a sparse matrix of ones with the same sparsity pattern. Parameters ---------- x Sparse matrix to take the sparsity pattern. Returns ------- A sparse matrix The same as `x` with data changed for ones. """ # TODO: don't restrict to CSM formats data, indices, indptr, shape = csm_properties(x) return CSM(format=x.format)(tensor.ones_like(data), indices, indptr, shape)
def sp_zeros_like(x): """ Construct a sparse matrix of zeros. Parameters ---------- x Sparse matrix to take the shape. Returns ------- A sparse matrix The same as `x` with zero entries for all element. """ # TODO: don't restrict to CSM formats _, _, indptr, shape = csm_properties(x) return CSM(format=x.format)(data=numpy.array([], dtype=x.type.dtype), indices=numpy.array([], dtype='int32'), indptr=tensor.zeros_like(indptr), shape=shape)
def perform(self, node, inputs, outputs): # for efficiency, if remap does nothing, then do not apply it (data, indices, indptr, shape) = inputs (out,) = outputs if len(shape) != 2: raise ValueError('Shape should be an array of length 2') if data.shape != indices.shape: errmsg = ('Data (shape ' + repr(data.shape) + ' must have the same number of elements ' + 'as indices (shape' + repr(indices.shape) + ')') raise ValueError(errmsg) if self.format == 'csc': out[0] = scipy.sparse.csc_matrix((data, indices.copy(), indptr.copy()), numpy.asarray(shape), copy=False) else: assert self.format == 'csr' out[0] = scipy.sparse.csr_matrix((data, indices.copy(), indptr.copy()), shape.copy(), copy=False)
def perform(self, node, inputs, outputs): (x_data, x_indices, x_indptr, x_shape, g_data, g_indices, g_indptr, g_shape) = inputs (g_out,) = outputs if len(x_indptr) - 1 == x_shape[0]: sp_dim = x_shape[1] else: sp_dim = x_shape[0] g_row = numpy.zeros(sp_dim, dtype=g_data.dtype) gout_data = numpy.zeros(x_data.shape, dtype=node.outputs[0].dtype) for i in range(len(x_indptr) - 1): for j_ptr in range(g_indptr[i], g_indptr[i + 1]): g_row[g_indices[j_ptr]] += g_data[j_ptr] for j_ptr in range(x_indptr[i], x_indptr[i + 1]): gout_data[j_ptr] = g_row[x_indices[j_ptr]] for j_ptr in range(g_indptr[i], g_indptr[i + 1]): g_row[g_indices[j_ptr]] = 0 g_out[0] = gout_data
def perform(self, node, inputs, outputs): (x, s) = inputs (z,) = outputs M, N = x.shape assert x.format == 'csc' assert s.shape == (M,) indices = x.indices indptr = x.indptr y_data = x.data.copy() for j in xrange(0, N): for i_idx in xrange(indptr[j], indptr[j + 1]): y_data[i_idx] *= s[indices[i_idx]] z[0] = scipy.sparse.csc_matrix((y_data, indices, indptr), (M, N))
def grad(self, inputs, gout): (gz,) = gout is_continuous = [(inputs[i].dtype in tensor.continuous_dtypes) for i in range(len(inputs))] if _is_sparse_variable(gz): gz = dense_from_sparse(gz) split = tensor.Split(len(inputs))(gz, 0, tensor.stack( [x.shape[0] for x in inputs])) if not isinstance(split, list): split = [split] derivative = [SparseFromDense(self.format)(s) for s in split] def choose(continuous, derivative): if continuous: return derivative else: return None return [choose(c, d) for c, d in zip(is_continuous, derivative)]
def structured_monoid(tensor_op): # Generic operation to perform many kinds of monoid element-wise # operations on the non-zeros of a sparse matrix. # The first parameter must always be a sparse matrix. The other parameters # must be scalars which will be passed as argument to the tensor_op. def decorator(f): def wrapper(*args): x = as_sparse_variable(args[0]) assert x.format in ["csr", "csc"] xs = [scalar.as_scalar(arg) for arg in args[1:]] data, ind, ptr, shape = csm_properties(x) data = tensor_op(data, *xs) return CSM(x.format)(data, ind, ptr, shape) wrapper.__name__ = str(tensor_op.scalar_op) return wrapper return decorator
def perform(self, node, inputs, outputs): (a_indices, a_indptr, b, g_ab) = inputs (out,) = outputs g_a_data = numpy.zeros(a_indices.shape, dtype=g_ab.dtype) for j in xrange(len(a_indptr) - 1): ind0 = a_indptr[j] ind1 = a_indptr[j + 1] for i_idx in xrange(ind0, ind1): i = a_indices[i_idx] # Depending on the type of g_ab and b (sparse or dense), # the following dot product can result in a scalar or # a (1, 1) sparse matrix. dot_val = numpy.dot(g_ab[i], b[j].T) if isinstance(dot_val, scipy.sparse.spmatrix): dot_val = dot_val[0, 0] g_a_data[i_idx] = dot_val out[0] = g_a_data
def perform(self, node, inputs, outputs): (a_indices, a_indptr, b, g_ab) = inputs (out,) = outputs g_a_data = numpy.zeros(a_indices.shape, dtype=g_ab.dtype) for i in xrange(len(a_indptr) - 1): # loop over rows ind0 = a_indptr[i] ind1 = a_indptr[i + 1] # loop over values in that row (columns) for j_idx in xrange(ind0, ind1): j = a_indices[j_idx] # grad is dot product of i-th row of gradient with j-th row of b # Depending on the type of g_ab and b (sparse or dense), # the following dot product can result in a scalar or # a (1, 1) sparse matrix. dot_val = numpy.dot(g_ab[i], b[j].T) if isinstance(dot_val, scipy.sparse.spmatrix): dot_val = dot_val[0, 0] g_a_data[j_idx] = dot_val out[0] = g_a_data
def test_only_nonseq_inputs(self): # Compile the Theano function n_steps = 2 inp = tensor.matrix() broadcasted_inp, _ = theano.scan(lambda x: x, non_sequences=[inp], n_steps=n_steps) out = broadcasted_inp.sum() gr = tensor.grad(out, inp) fun = theano.function([inp], [broadcasted_inp, gr]) # Execute the Theano function and compare outputs to the expected outputs inputs = numpy.array([[1, 2], [3, 4]], dtype=theano.config.floatX) expected_out1 = numpy.repeat(inputs[None], n_steps, axis=0) expected_out2 = numpy.ones(inputs.shape, dtype="int8") * n_steps out1, out2 = fun(inputs) utt.assert_allclose(out1, expected_out1) utt.assert_allclose(out2, expected_out2) # simple rnn, one input, one state, weights for each; input/state # are vectors, weights are scalars
def test_draw_as_input_to_scan(self): trng = theano.tensor.shared_randomstreams.RandomStreams(123) x = theano.tensor.matrix('x') y = trng.binomial(size=x.shape, p=x) z, updates = theano.scan(lambda a: a, non_sequences=y, n_steps=2) f = theano.function([x], [y, z], updates=updates, allow_input_downcast=True) rng = numpy.random.RandomState(utt.fetch_seed()) nx = rng.uniform(size=(10, 10)) ny1, nz1 = f(nx) ny2, nz2 = f(nx) utt.assert_allclose([ny1, ny1], nz1) utt.assert_allclose([ny2, ny2], nz2) assert not numpy.allclose(ny1, ny2)
def test_infershape_seq_shorter_nsteps(self): raise SkipTest("This is a generic problem with " "infershape that has to be discussed " "and figured out") x = tensor.vector('x') [o1, o2], _ = theano.scan(lambda x, y: (x + 1, y + x), sequences=x, outputs_info=[None, x[0]], n_steps=20) f = theano.function([x], [o1.shape[0], o2.shape[0]], mode=mode_with_opt) vx = numpy.ones((10,), dtype=theano.config.floatX) out1, out2 = f(vx) assert out1 == 10 assert out2 == 10 lssc = [x for x in f.maker.fgraph.toposort() if isinstance(x.op, theano.scan_module.scan_op.Scan)] assert len(lssc) == 0
def test_infershape_nsteps_smaller_seq_length(self): x = tensor.vector('x') [o1, o2], _ = theano.scan(lambda x, y: (x + 1, y + x), sequences=x, outputs_info=[None, x[0]], n_steps=20) f = theano.function([x], [o1.shape[0], o2.shape[0]], mode=mode_with_opt) vx = numpy.ones((30,), dtype=theano.config.floatX) o1, o2 = f(vx) assert o1 == 20 assert o2 == 20 lssc = [x for x in f.maker.fgraph.toposort() if isinstance(x.op, theano.scan_module.scan_op.Scan)] assert len(lssc) == 0
def model_baseline(s_x_, s_pdpo_): '''very simple logistic regression model''' global g_mdl, g_dataset s_bsize = T.shape(s_x_)[0] idim, odim = reduce(int.__mul__, g_dataset.imsize), len(g_dataset.label_map) return T.nnet.softmax( g_mdl.op_dropout(g_mdl.lyr_linear( 'm', T.reshape(s_x_, (s_bsize,idim)), idim, odim), s_pdpo_))
def lyr_sconv_gen( name_, s_x_, idim_, odim_, **kwargs_): ''' quick & dirty implementation of fxnn convolution layer ''' global g_mdl dilation = kwargs_.get('dilation_') if dilation is None: dilation = 1 init_scale = kwargs_.get('init_scale_') bias = kwargs_.get('bias_') op_conv = partial( T.nnet.conv2d, border_mode='half', filter_dilation = (dilation, dilation)) ir = 0.5/sqrt(idim_*5+odim_) s_dims = T.shape(s_x_) s_x = T.reshape(s_x_, (s_dims[0]*idim_, 1, s_dims[2], s_dims[3])) s_x1 = T.reshape(op_conv( s_x, g_sconv_ker, filter_shape=(2, 1, 1, 3), **kwargs_), (s_dims[0]*idim_*2, 1, s_dims[2], s_dims[3])) s_x2 = T.reshape(op_conv( s_x1, g_sconv_ker.transpose(0,1,3,2), filter_shape=(2, 1, 3, 1), ), (s_dims[0], idim_*4, s_dims[2], s_dims[3])) s_y = T.join(1, s_x2, s_x_) return g_mdl.lyr_conv( name_, s_y, idim_*5, odim_, fsize_=1, init_scale_=ir, **kwargs_);
def op_noise_uniform(self, s_x_, s_scale_): return s_x_ + self.rng.uniform(low=-s_scale_, high=s_scale_, size=T.shape(s_x_), dtype=th.config.floatX)
def get_output_shape_for(self, input_shapes): input_shapes = autocrop_array_shapes(input_shapes, self.cropping) # Infer the output shape by grabbing, for each axis, the first # input size that is not `None` (if there is any) output_shape = input_shapes[1] return output_shape
def get_output_for(self, upscaled, **kwargs): a, b = self.scale_factor # get output for pooling and pre-pooling layer inp, out =\ lasagne.layers.get_output([self.pool2d_layer_in, self.pool2d_layer]) # upscale the input feature map by scale_factor if b > 1: upscaled = T.extra_ops.repeat(upscaled, b, 3) if a > 1: upscaled = T.extra_ops.repeat(upscaled, a, 2) # get the shapes for pre-pooling layer and upscaled layer sh_pool2d_in = T.shape(inp) sh_upscaled = T.shape(upscaled) # in case the shape is different left-bottom-pad with zero tmp = T.zeros(sh_pool2d_in) indx = (slice(None), slice(None), slice(0, sh_upscaled[2]), slice(0, sh_upscaled[3])) upscaled = T.set_subtensor(tmp[indx], upscaled) # get max pool indices indices_pool = T.grad(None, wrt=inp, known_grads={out: T.ones_like(out)}) # mask values using indices_pool f = indices_pool * upscaled return f
def get_output_for(self, input, deterministic=False, **kwargs): """ Parameters ---------- input : tensor output from the previous layer deterministic : bool If true noise is disabled, see notes """ if deterministic or self.sigma == 0: return input else: return softmax4D(input + self._srng.normal(input.shape, avg=0.0, std=self.sigma))
def get_output_for(self, input, deterministic=False, **kwargs): """ Parameters ---------- input : tensor output from the previous layer deterministic : bool If true noise is disabled, see notes """ if deterministic or self.sigma == 0: return input else: return T.clip(input + self._srng.normal(input.shape, avg=0.0, std=self.sigma), 0.0, 1.0)
def get_output_for(self, input, **kwargs): all_dims = range(len(T.shape(input))) print all_dims return T.Unbroadcast(input, *all_dims)
def op_matmul(s_x_, s_y_, axes_=(-2, -1)): ''' limited implementation of np.matmul, does not support broadcasting Args: s_x_: (batch of) matrix(matrices) s_y_: (batch of) matrix(matrices) axes_: tuple of int, the axes for the matrix ''' assert s_x_.ndim == s_y_.ndim ndim = s_x_.ndim assert -ndim <= axes_[0] < ndim assert -ndim <= axes_[1] < ndim assert ndim >= 2 axes = axes_[0]%ndim, axes_[1]%ndim if ndim == 2: if axes == (0,1): return T.dot(s_x_, s_y_) else: return T.dot(s_y_, s_x_) s_shp = T.shape(s_x_) s_size = reduce(T.mul, [s_shp[i] for i in range(s_x_.ndim) if i not in axes]) s_szu = s_shp[axes[0]] s_szv = s_shp[axes[1]] s_szw = T.shape(s_y_)[axes[1]] transpp = list(range(ndim)) transpp[axes[0]], transpp[ndim-2] = transpp[ndim-2], transpp[axes[0]] transpp[axes[1]], transpp[ndim-1] = transpp[ndim-1], transpp[axes[1]] s_shp2 = [s_shp[a] for a in transpp] s_shp2[axes[1]] = s_szw s_x = s_x_.transpose(*transpp).reshape((s_size, s_szu, s_szv)) s_y = s_y_.transpose(*transpp).reshape((s_size, s_szv, s_szw)) return T.batched_dot(s_x, s_y).reshape(s_shp2).transpose(transpp)
def op_unitary_loss(s_re_, s_im_, axes_=None, size_=None): ''' unitary matrix loss of real/imag part, used to regularize parameter to unitary Args: s_re_: real part, square matrix s_im_: imag part, square matrix size_: specify args to be (size_ x size_) matrices axes_: tuple of two integers, specify which axes to be for matrix, defaults to last two axes ''' if axes_ is None: axes_ = (-2, -1) if size_ is None: ax = axes_[0] size = T.shape(s_re_)[ax] else: size = size_ assert s_re_.ndim == s_im_.ndim tpat = list(range(s_re_.ndim)) bpat = ['x'] * s_re_.ndim tpat[axes_[0]], tpat[axes_[1]] = tpat[axes_[1]], tpat[axes_[0]] bpat[axes_[0]] = 0 bpat[axes_[1]] = 1 s_y_re_ = T.dot(s_re_.transpose(*tpat), s_re_) + T.dot(s_im_.transpose(*tpat), s_im_) s_tmp = T.dot(s_re_.transpose(*tpat), s_im_) s_y_im_ = s_tmp - s_tmp.transpose(*tpat) return T.mean(T.sqr(s_y_re_ - T.eye(size).dimshuffle(*bpat)) + T.sqr(s_y_im_))
def load_params(self, f_, format_=Default, filter_=None): ''' This loads parameters into current group An dict containing string->shared_variables will be loaded. Args: f_: readable file or filename string format_: string, file format. Default is to interpret from file name supported formats: "pkl" filter_: string or None, regex pattern to filter Notes: This will update the current group with newly loaded key-value pairs, rather than clear-then-load. ''' if isinstance(f_, str): f_ = open(f_, 'rb') di = pickle.load(f_) if filter_ is None: self._current_group_di.update(di) else: pat = re.compile(filter_) self._current_group_di.update({k:v for k,v in di.items() if pat.fullmatch(k)}) for k,v in di.items(): if not pat.fullmatch(k): continue p = self._current_group_di[k].get_value(borrow=True) if p.shape != v.shape: raise ValueError('Shape mismatch, need %s, got %s'%(v.shape, p.shape), p.shape) self._current_group_di[k].set_value(v)
def forward(self, inputtensor): x = inputtensor[0] #input_shape=(self.batchSize, # self.inputFeatureDim, # int(self.dataSize[0]*self.subsample[0]), # int(self.dataSize[1]*self.subsample[1])) #filter_shape=(self.outputFeatureDim, # self.inputFeatureDim, # *self.filterSize) #print("{}, {}".format(input_shape, filter_shape)) if self.isAfterFullConn: x = T.reshape(x, (T.shape(x)[0], self.outputFeature, 1, 1)) # All input/output are refering to convolutional operator # So when using it, think in oppersite way. y = T.nnet.abstract_conv.conv2d_grad_wrt_inputs(x, self.w, input_shape=(None, self.inputFeatureDim, #None, None is also ok for inputFeatureDim, int(self.dataSize[0]*self.subsample[0]), int(self.dataSize[1]*self.subsample[1])), filter_shape=(self.outputFeatureDim, self.inputFeatureDim, *self.filterSize), border_mode='valid', subsample=self.subsample) return (y,)
def binarize_weights(W,H,srng=None,deterministic=False): """ Copied from BinaryNet by Matthieu Courbariaux, https://github.com/MatthieuCourbariaux/BinaryNet :param W: :param H: :param srng: :param deterministic: :return: quantized weights """ if srng is None: rng = np.random.RandomState(666) srng = theano.sandbox.rng_mrg.MRG_RandomStreams(rng.randint(999999)) # [-1,1] -> [0,1] Wb=T.clip(((W / H)+1.)/2.,0,1) # Deterministic BinaryConnect (round to nearest) if deterministic: # print("det") Wb = T.cast(GradPreserveRoundTensor(Wb), theano.config.floatX) # Stochastic BinaryConnect else: # print("stoch") Wb = T.cast(srng.binomial(n=1, p=Wb, size=T.shape(Wb)), theano.config.floatX) Wb = T.cast(T.switch(Wb, H, -H), theano.config.floatX) return Wb
def ternarize_weights(W,W0,deterministic=False,srng=None): """ Changed copy of the code from TernaryConnect by Zhouhan Lin, Matthieu Courbariaux, https://github.com/hantek/BinaryConnect/tree/ternary :param W: Weights :param W0: W0=0.5 :param deterministic: deterministic rounding :param srng: random number generator :return: quantized weights """ Wb=None #print 'Current W0: ',W0 if srng is None: rng = np.random.RandomState(666) srng = theano.sandbox.rng_mrg.MRG_RandomStreams(rng.randint(999999)) if deterministic: #print 'Deterministic Ternarization!' larger_than_neg_0_5 = T.gt(W, -W0/2.) larger_than_pos_0_5 = T.gt(W, W0/2.) W_val = larger_than_neg_0_5 * 1 + larger_than_pos_0_5 * 1 - 1 Wb = W_val * W0 else: #print 'Stochastic Ternarization!' w_sign = T.gt(W, 0) * 2 - 1 p = T.clip(T.abs_(W / (W0)), 0, 1) Wb = W0 * w_sign * T.cast(srng.binomial(n=1, p=p, size=T.shape(W)), theano.config.floatX) return Wb
def quantize_weights(W,srng=None,bitlimit=None,deterministic=False): """ Exponential quantization :param W: Weights :param srng: random number generator :param bitlimit: limit values to be in power of 2 range, e.g. for values in 2^-22 to 2^9 set it to [-22, 9] :param deterministic: deterministic rounding :return: quantized weights """ bitlimit=[-22, 9] #hardcoded for experiments if srng is None: rng = np.random.RandomState(666) srng = theano.sandbox.rng_mrg.MRG_RandomStreams(rng.randint(999999)) if bitlimit: index_low = T.clip( T.switch(W > 0., T.floor(T.log2(W)), T.floor(T.log2(-W))), bitlimit[0], bitlimit[1]) else: index_low = T.switch( W > 0., T.floor(T.log2(W)), T.floor(T.log2(-W))) sign = T.switch(W > 0., 1., -1.) p_up = sign * W / 2 ** (index_low) - 1 # percentage of upper index. if deterministic: index_deterministic = index_low + T.switch(p_up > 0.5, 1, 0) quantized_W = sign * 2 ** index_deterministic else: index_random = index_low + srng.binomial( n=1, p=p_up, size=T.shape(W), dtype=theano.config.floatX) quantized_W = sign * 2 ** index_random return quantized_W
def compute_estimator(self, log_p_all, log_q_all): n_samples = tt.shape(log_p_all)[1] # See equation 14, for definition of I see equation 2 f_x_h = log_p_all - log_q_all # f_x_h: (batch_size, n_samples) sum_p_over_q = logsumexp(f_x_h, axis=1) # sum_p_over_q: (batch_size, ) L = sum_p_over_q - tt.log(n_samples) # L: (batch_size, ) # Equation 10 sum_min_i = logsubexp(sum_p_over_q.dimshuffle(0, 'x'), f_x_h) sum_min_i_normalized = sum_min_i - np.log(n_samples - 1).astype(theano.config.floatX) L_h_given_h = L.dimshuffle(0, 'x') - sum_min_i_normalized # equation (10) # Get gradient of log Q and scale part_1 = L_h_given_h * log_q_all # equation 11, part 1 weights = f_x_h - sum_p_over_q.dimshuffle(0, 'x') exp_weights = tt.exp(weights) part_2 = exp_weights * f_x_h estimator = (part_1 + part_2).sum() / self.batch_size gradients = tt.grad(estimator, self.params.values(), consider_constant=[exp_weights, L_h_given_h]) likelihood = L.sum() / self.batch_size return likelihood, gradients
def train(self, learning_rate, epoch): batch_likelihood_list = np.array([]) batch_order = self.get_batch_order(self.N_train) self.prng.shuffle(batch_order) for i, batch in enumerate(batch_order): samples = self.sample_train(batch) batch_L = self.update_train(learning_rate, epoch, *samples) batch_likelihood_list = np.append(batch_likelihood_list, batch_L) assert(batch_likelihood_list.shape[0] == len(batch_order)) return np.mean(batch_likelihood_list)
def valid(self): batch_likelihood_list = np.array([]) batch_order = self.get_batch_order(self.N_valid) self.prng.shuffle(batch_order) for i, batch in enumerate(batch_order): samples = self.sample_valid(batch) batch_L = self.likelihood_valid(*samples) batch_likelihood_list = np.append(batch_likelihood_list, batch_L) assert(batch_likelihood_list.shape[0] == len(batch_order)) return np.mean(batch_likelihood_list)
def test(self): batch_likelihood_list = np.array([]) batch_order = self.get_batch_order(self.N_test) self.prng.shuffle(batch_order) for i, batch in enumerate(batch_order): samples = self.sample_test(batch) batch_L = self.likelihood_test(*samples) batch_likelihood_list = np.append(batch_likelihood_list, batch_L) assert(batch_likelihood_list.shape[0] == len(batch_order)) return np.mean(batch_likelihood_list)
def compute_samples(self, srng, Z_below, layer): q_z_above_given_z_below = sigmoid(tt.dot(Z_below, self.params['W_enc_' + str(layer)]) + self.params['b_enc_' + str(layer)]) U = srng.uniform(q_z_above_given_z_below.shape) Z = tt.cast(q_z_above_given_z_below > U, dtype=theano.config.floatX) return Z
def binarization(W,H,binary=True,deterministic=False,stochastic=False,srng=None): # (deterministic == True) <-> test-time <-> inference-time if not binary or (deterministic and stochastic): # print("not binary") Wb = W else: # [-1,1] -> [0,1] Wb = hard_sigmoid(W/H) # Stochastic BinaryConnect if stochastic: # print("stoch") Wb = T.cast(srng.binomial(n=1, p=Wb, size=T.shape(Wb)), theano.config.floatX) # Deterministic BinaryConnect (round to nearest) else: # print("det") Wb = T.round(Wb) # 0 or 1 -> -1 or 1 Wb = T.cast(T.switch(Wb,H,-H), theano.config.floatX) return Wb # This class extends the Lasagne DenseLayer to support BinaryConnect
def zeros(shape, dtype=np.float32): return np.zeros(shape, dtype) # TODO: convert this to a theano function
def gaussian_kl(means1_N_D, stdevs1_N_D, means2_N_D, stdevs2_N_D): ''' KL divergences between Gaussians with diagonal covariances Covariances matrices are specified with square roots of the diagonal (standard deviations) ''' D = tensor.shape(means1_N_D)[1] return ( .5 * (tensor.sqr(stdevs1_N_D/stdevs2_N_D).sum(axis=1) + tensor.sqr((means2_N_D-means1_N_D)/stdevs2_N_D).sum(axis=1) + 2.*(tensor.log(stdevs2_N_D).sum(axis=1) - tensor.log(stdevs1_N_D).sum(axis=1)) - D ))
def gaussian_log_density(means_N_D, stdevs_N_D, x_N_D): '''Log density of a Gaussian distribution with diagonal covariance (specified as standard deviations).''' D = tensor.shape(means_N_D)[1] lognormconsts_B = -.5*(D*np.log(2.*np.pi) + 2.*tensor.log(stdevs_N_D).sum(axis=1)) # log normalization constants logprobs_B = -.5*tensor.sqr((x_N_D - means_N_D)/stdevs_N_D).sum(axis=1) + lognormconsts_B return logprobs_B
def function(inputs, outputs, **kwargs): # Cache compiled function f = theano.function(inputs, outputs, **kwargs) def wrapper(*args): # Execute out = f(*args) # Find output elements with shape == () and convert them to scalars is_list = isinstance(out, (list,tuple)) out_as_list = list(out) if is_list else [out] for i in xrange(len(out_as_list)): if isinstance(out_as_list[i], np.ndarray) and out_as_list[i].shape == (): out_as_list[i] = np.asscalar(out_as_list[i]) return out_as_list if is_list else out_as_list[0] return wrapper
def setup_loss_net(self): """ Create a network of convolution layers based on the VGG16 architecture from the paper: "Very Deep Convolutional Networks for Large-Scale Image Recognition" Original source: https://gist.github.com/ksimonyan/211839e770f7b538e2d8 License: see http://www.robots.ox.ac.uk/~vgg/research/very_deep/ Based on code in the Lasagne Recipes repository: https://github.com/Lasagne/Recipes """ loss_net = self.network['loss_net'] loss_net['input'] = InputLayer(shape=self.shape) loss_net['conv1_1'] = ConvLayer(loss_net['input'], 64, 3, pad=1, flip_filters=False) loss_net['conv1_2'] = ConvLayer(loss_net['conv1_1'], 64, 3, pad=1, flip_filters=False) loss_net['pool1'] = PoolLayer(loss_net['conv1_2'], 2) loss_net['conv2_1'] = ConvLayer(loss_net['pool1'], 128, 3, pad=1, flip_filters=False) loss_net['conv2_2'] = ConvLayer(loss_net['conv2_1'], 128, 3, pad=1, flip_filters=False) loss_net['pool2'] = PoolLayer(loss_net['conv2_2'], 2) loss_net['conv3_1'] = ConvLayer(loss_net['pool2'], 256, 3, pad=1, flip_filters=False) loss_net['conv3_2'] = ConvLayer(loss_net['conv3_1'], 256, 3, pad=1, flip_filters=False) loss_net['conv3_3'] = ConvLayer(loss_net['conv3_2'], 256, 3, pad=1, flip_filters=False) loss_net['pool3'] = PoolLayer(loss_net['conv3_3'], 2) loss_net['conv4_1'] = ConvLayer(loss_net['pool3'], 512, 3, pad=1, flip_filters=False) loss_net['conv4_2'] = ConvLayer(loss_net['conv4_1'], 512, 3, pad=1, flip_filters=False) loss_net['conv4_3'] = ConvLayer(loss_net['conv4_2'], 512, 3, pad=1, flip_filters=False) loss_net['pool4'] = PoolLayer(loss_net['conv4_3'], 2) loss_net['conv5_1'] = ConvLayer(loss_net['pool4'], 512, 3, pad=1, flip_filters=False) loss_net['conv5_2'] = ConvLayer(loss_net['conv5_1'], 512, 3, pad=1, flip_filters=False) loss_net['conv5_3'] = ConvLayer(loss_net['conv5_2'], 512, 3, pad=1, flip_filters=False)
def batched_gram5d(self, fmap): # (layer, batch, featuremaps, height*width) fmap=fmap.flatten(ndim=4) # (layer*batch, featuremaps, height*width) fmap2=fmap.reshape((-1, fmap.shape[-2], fmap.shape[-1])) # The T.prod term can't be taken outside as a T.mean in style_loss(), since the width and height of the image might vary return T.batched_dot(fmap2, fmap2.dimshuffle(0,2,1)).reshape(fmap.shape)/T.prod(fmap.shape[-2:])
def style_loss5d(self, out_layer, target_style_layer): # Each input is a 5D tensor: (style loss layer, batch, feature map, height, width) return T.mean(T.sum(T.sqr(self.batched_gram(out_layer) - T.tile(self.batched_gram(target_style_layer), (1, T.shape(out_layer)[0], 1, 1))), axis=(2,3)), axis=1)