我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.ge()。
def _encode(self, application_call, text, mask, def_embs=None, def_map=None, text_name=None): if not self._random_unk: text = ( tensor.lt(text, self._num_input_words) * text + tensor.ge(text, self._num_input_words) * self._vocab.unk) if text_name: application_call.add_auxiliary_variable( unk_ratio(text, mask, self._vocab.unk), name='{}_unk_ratio'.format(text_name)) embs = self._lookup.apply(text) if self._random_unk: embs = ( tensor.lt(text, self._num_input_words)[:, :, None] * embs + tensor.ge(text, self._num_input_words)[:, :, None] * disconnected_grad(embs)) if def_embs: embs, _, _ = self._combiner.apply(embs, mask, def_embs, def_map) add_role(embs, EMBEDDINGS) encoded = flip01( self._encoder_rnn.apply( self._encoder_fork.apply( flip01(embs)), mask=mask.T)[0]) return encoded
def apply(self, application_call, defs, def_mask): """ Returns vector per each word in sequence using the dictionary based lookup """ # Short listing defs_sl_main = (T.lt(defs, self._num_input_words) * defs + T.ge(defs, self._num_input_words) * self._vocab.unk) defs_sl_cache = (T.ge(defs, self._num_input_words) * defs + T.lt(defs, self._num_input_words) * self._vocab.unk) application_call.add_auxiliary_variable( unk_ratio(defs_sl_main, def_mask, self._vocab.unk), name='def_unk_ratio') embedded_def_words = self._def_lookup.apply(defs_sl_main) cached_embeddings = self._cache.apply(defs_sl_cache) final_embeddings = (T.lt(defs, self._num_input_words).dimshuffle(0,1,'x') * embedded_def_words + T.ge(defs, self._num_input_words).dimshuffle(0, 1, 'x') * cached_embeddings) def_embeddings = self._def_rnn.apply( T.transpose(self._def_fork.apply(final_embeddings), (1, 0, 2)), mask=def_mask.T)[0][-1] return def_embeddings
def log_cross_entropy_extended(x, x_theta, log_distribution, k_max, eps = 0.0): p_k = x_theta["p_k"] F = x.shape[1] p_k = T.clip(p_k, eps, 1.0) x_k = T.clip(x, 0, k_max) p_k = T.reshape(p_k, (-1, k_max + 1)) x_k = T.reshape(x_k, (-1, 1)) y_cross_entropy = objectives.categorical_crossentropy(p_k, x_k) y_cross_entropy = T.reshape(y_cross_entropy, (-1, F)) y_log_distribution = T.ge(x, k_max) * log_distribution(x - k_max, x_theta, eps) # y = - T.lt(x, 0) * y_cross_entropy + y_log_distribution y = - y_cross_entropy + T.lt(x, 0) * y_log_distribution # y = - y_cross_entropy + y_log_distribution return y
def log_softmax_poisson(x, p_k, log_lambda, k_max = 10, eps = 0.0): F = x.shape[1] p_k = T.clip(p_k, eps, 1.0 - eps) x_k = T.clip(x, 0, k_max) p_k = T.reshape(p_k, (-1, k_max + 1)) x_k = T.reshape(x_k, (-1, 1)) y_cross_entropy = objectives.categorical_crossentropy(p_k, x_k) y_cross_entropy = T.reshape(y_cross_entropy, (-1, F)) y_log_poisson = T.ge(x, k_max) * log_poisson(x - k_max, log_lambda, eps) y = - y_cross_entropy + y_log_poisson return y
def log_softmax_negative_binomial(x, p_k, p, log_r, k_max = 10, eps = 0.0): F = x.shape[1] p_k = T.clip(p_k, eps, 1.0 - eps) x_k = T.clip(x, 0, k_max) p_k = T.reshape(p_k, (-1, k_max + 1)) x_k = T.reshape(x_k, (-1, 1)) y_cross_entropy = objectives.categorical_crossentropy(p_k, x_k) y_cross_entropy = T.reshape(y_cross_entropy, (-1, F)) y_log_negative_binomial = T.ge(x, k_max) \ * log_negative_binomial(x - k_max, p, log_r, eps) y = - y_cross_entropy + y_log_negative_binomial return y
def SGD(tparams, cost, inps, lr,clip_norm=5): """ default: lr=0.01 """ grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, clip_norm): grads = [g*clip_norm/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] for p, g in zip(tparams.values(), gshared): updated_p = p - lr * g updates.append((p, updated_p)) f_update = theano.function([lr], [], updates=updates) return f_grad_shared, f_update
def Adagrad(tparams, cost, inps, lr, epsilon=1e-6,clip_norm=5): """ default: lr=0.01 """ grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, clip_norm): grads = [g*clip_norm/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] for p, g in zip(tparams.values(), gshared): acc = theano.shared(p.get_value() * 0.) acc_t = acc + g ** 2 updates.append((acc, acc_t)) p_t = p - (lr / tensor.sqrt(acc_t + epsilon)) * g updates.append((p, p_t)) f_update = theano.function([lr], [], updates=updates) return f_grad_shared, f_update
def in_top_k(predictions, targets, k): """Returns whether the `targets` are in the top `k` `predictions`. # Arguments predictions: A tensor of shape `(batch_size, classes)` and type `float32`. targets: A 1D tensor of length `batch_size` and type `int32` or `int64`. k: An `int`, number of top elements to consider. # Returns A 1D tensor of length `batch_size` and type `bool`. `output[i]` is `True` if `predictions[i, targets[i]]` is within top-`k` values of `predictions[i]`. """ # handle k < 1 and k >= predictions.shape[1] cases to match TF behavior if k < 1: # dtype='bool' is only available since Theano 0.9.0 try: return T.zeros_like(targets, dtype='bool') except TypeError: return T.zeros_like(targets, dtype='int8') if k >= int_shape(predictions)[1]: try: return T.ones_like(targets, dtype='bool') except TypeError: return T.ones_like(targets, dtype='int8') predictions_k = T.sort(predictions)[:, -k] targets_values = predictions[T.arange(targets.shape[0]), targets] return T.ge(targets_values, predictions_k) # CONVOLUTIONS
def test_inequality_with_self(self): x = T.scalar('x', dtype=config.floatX) mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison') f = theano.function([x], T.lt(x, x), mode=mode) self.assert_eqs_const(f, 0) f = theano.function([x], T.le(x, x), mode=mode) self.assert_eqs_const(f, 1) f = theano.function([x], T.gt(x, x), mode=mode) self.assert_eqs_const(f, 0) f = theano.function([x], T.ge(x, x), mode=mode) self.assert_eqs_const(f, 1) f = theano.function([x], T.minimum(x, x), mode=mode) self.assert_identity(f) f = theano.function([x], T.maximum(x, x), mode=mode) self.assert_identity(f)
def test_elemwise_comparaison_cast(): """ test if an elemwise comparaison followed by a cast to float32 are pushed to gpu. """ a = tensor.fmatrix() b = tensor.fmatrix() av = theano._asarray(numpy.random.rand(4, 4), dtype='float32') bv = numpy.ones((4, 4), dtype='float32') for g, ans in [(tensor.lt, av < bv), (tensor.gt, av > bv), (tensor.le, av <= bv), (tensor.ge, av >= bv)]: f = pfunc([a, b], tensor.cast(g(a, b), 'float32'), mode=mode_with_gpu) out = f(av, bv) assert numpy.all(out == ans) assert any([isinstance(node.op, cuda.GpuElemwise) for node in f.maker.fgraph.toposort()])
def SGD(tparams, cost, inps, lr): """ default: lr=0.01 """ grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, 5): grads = [g*5/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] for p, g in zip(tparams.values(), gshared): updated_p = p - lr * g updates.append((p, updated_p)) f_update = theano.function([lr], [], updates=updates) return f_grad_shared, f_update
def greater_equal(x, y): return T.ge(x, y)
def apply(self, application_call, defs, def_mask): """ Returns vector per each word in sequence using the dictionary based lookup """ # Short listing defs = (T.lt(defs, self._num_input_words) * defs + T.ge(defs, self._num_input_words) * self._vocab.unk) # Memory bottleneck: # For instance (16101,52,300) ~= 32GB. # [(16786, 52, 1), (16786, 52, 100)] # TODO: Measure memory consumption here and check if it is in sensible range # or maybe introduce some control in Retrieval? defs_emb = self._def_lookup.apply(defs) application_call.add_auxiliary_variable( unk_ratio(defs, def_mask, self._vocab.unk), name='def_unk_ratio') if self._translate: logger.info("Translating in MeanPoolReadDefinitions") # Translate. Crucial for recovering useful information from embeddings defs_emb = self._def_translate.apply(defs_emb) def_emb_mask = def_mask[:, :, None] defs_emb = (def_emb_mask * defs_emb).sum(axis=1) if self._normalize: defs_emb = defs_emb / def_emb_mask.sum(axis=1) return defs_emb
def binary_accuracy(probs, y, mask, length_var): # (n_samples, n_timesteps_f) probs_shp = probs.shape predicted = T.ge(probs, 0.5) # (n_samples * n_timesteps_f) y_flat = y.flatten() acc = lasagne.objectives.binary_accuracy(probs.flatten(), y_flat) # (n_samples, n_timesteps_f) acc = acc.reshape(probs_shp) acc = acc * mask acc = T.sum(acc, axis=1) / length_var return acc, predicted
def error(self, y, threshold=0.5): return tensor.mean(tensor.eq(tensor.ge(self.prediction(), threshold), y))
def ge(self, l, r): return T.ge(l, r)
def clip_gradients(stack_config, grad_param): ''' TODO Gradients need to be clipped while updating. Params ------ stack_config : grads : params : ''' threshold = stack_config['clipping_value'] print 'clip_gradients threshold', threshold if threshold > 0: gradients_to_clip = [] gradients_not_to_clip = [] for (g, p) in grad_param: if (hasattr(p, 'clip_gradient') and p.clip_gradient): gradients_to_clip.append((g, p)) print p.name, 'gradient is being clipped in optimizer.clip_gradients' else: gradients_not_to_clip.append((g, p)) if len(gradients_to_clip) == 0: return grad_param total_grad_norm = tensor.sqrt(tensor.sum( [tensor.sum(g * g) for (g, _) in gradients_to_clip])) grad_norm_gt_threshold = tensor.ge(total_grad_norm, threshold) grad_thresholder = lambda _g: (tensor.switch( grad_norm_gt_threshold, _g * (threshold / total_grad_norm), _g)) clipped_grad_param = [] for (g, p) in gradients_to_clip: cg = grad_thresholder(g) cg.wrt_name = g.wrt_name clipped_grad_param.append((cg, p)) clipped_grad_param += gradients_not_to_clip return clipped_grad_param else: return grad_param
def __call__(self, p): p *= T.ge(p, 0) return p
def Momentum(tparams, cost, inps, lr, momentum=0.9,clip_norm=5): """ default: lr=0.01 """ grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, clip_norm): grads = [g*clip_norm/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] for p, g in zip(tparams.values(), gshared): m = theano.shared(p.get_value() * 0.) m_new = momentum * m - lr * g updates.append((m, m_new)) updated_p = p + m_new updates.append((p, updated_p)) f_update = theano.function([lr], [], updates=updates) return f_grad_shared, f_update
def NAG(tparams, cost, inps, lr, momentum=0.9,clip_norm=5): """ default: lr=0.01 """ grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, clip_norm): grads = [g*clip_norm/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] for p, g in zip(tparams.values(), gshared): m = theano.shared(p.get_value() * 0.) m_new = momentum * m - lr * g updates.append((m, m_new)) updated_p = p + momentum * m_new - lr * g updates.append((p, updated_p)) f_update = theano.function([lr], [], updates=updates) return f_grad_shared, f_update
def Adadelta(tparams, cost, inps, lr, rho=0.95, epsilon=1e-6,clip_norm=5): """ default: lr=0.5 """ grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, clip_norm): grads = [g*clip_norm/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] for p, g in zip(tparams.values(), gshared): acc = theano.shared(p.get_value() * 0.) acc_delta = theano.shared(p.get_value() * 0.) acc_new = rho * acc + (1 - rho) * g ** 2 updates.append((acc,acc_new)) update = g * tensor.sqrt(acc_delta + epsilon) / tensor.sqrt(acc_new + epsilon) updated_p = p - lr * update updates.append((p, updated_p)) acc_delta_new = rho * acc_delta + (1 - rho) * update ** 2 updates.append((acc_delta,acc_delta_new)) f_update = theano.function([lr], [], updates=updates) return f_grad_shared, f_update
def RMSprop_v2(tparams, cost, inps, lr, rho=0.95, momentum=0.9, epsilon=1e-4, clip_norm=5): """ default: lr=0.0001 This is the implementation of the RMSprop algorithm used in http://arxiv.org/pdf/1308.0850v5.pdf """ grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, clip_norm): grads = [g*clip_norm/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] for p, g in zip(tparams.values(), gshared): acc = theano.shared(p.get_value() * 0.) acc2 = theano.shared(p.get_value() * 0.) acc_new = rho * acc + (1.-rho) * g acc2_new = rho * acc + (1.-rho) * (g ** 2) updates.append((acc, acc_new)) updates.append((acc2, acc2_new)) updir = theano.shared(p.get_value() * 0.) updir_new = momentum * updir - lr * g / tensor.sqrt(acc2_new -acc_new ** 2 + epsilon) updates.append((updir, updir_new)) updated_p = p + updir_new updates.append((p, updated_p)) f_update = theano.function([lr], [], updates=updates) return f_grad_shared, f_update
def Adam(tparams, cost, inps, lr, b1=0.1, b2=0.001, e=1e-8, clip_norm=5): """ default: lr=0.0002 This is the implementation of the Adam algorithm Reference: http://arxiv.org/pdf/1412.6980v8.pdf """ grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, clip_norm): grads = [g*clip_norm/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] i = theano.shared(numpy_floatX(0.)) i_t = i + 1. fix1 = 1. - b1**(i_t) fix2 = 1. - b2**(i_t) lr_t = lr * (tensor.sqrt(fix2) / fix1) for p, g in zip(tparams.values(), gshared): m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * m) v_t = (b2 * tensor.sqr(g)) + ((1. - b2) * v) g_t = m_t / (tensor.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) f_update = theano.function([lr], [], updates=updates) return f_grad_shared, f_update
def binarize_conv_filters(W): """Binarize convolution weights and find the weight scaling factor W : theano tensor : convolution layer weight of dimension no_filters x no_feat_maps x h x w """ # symbolic binary weight Wb = T.cast(T.switch(T.ge(W, 0),1,-1), theano.config.floatX) # BinaryNet method #Wb = T.cast(T.switch(T.round(hard_sigmoid(W),1,-1)), theano.config.floatX) # weight scaling factor # FIXME: directly compute the mean along axis 1,2,3 instead of reshaping alpha = T.mean( T.reshape(T.abs_(W), (W.shape[0], W.shape[1]*W.shape[2]*W.shape[3])), axis=1) return Wb, alpha
def binarize_fc_weights(W): # symbolic binary weight Wb = T.cast(T.switch(T.ge(W, 0),1,-1), theano.config.floatX) # BinaryNet method #Wb = T.cast(T.switch(T.round(hard_sigmoid(W)),1,-1), theano.config.floatX) alpha = T.mean(T.abs_(W), axis=0) return Wb, alpha
def SignTheano(x): return T.cast(2.*T.ge(x,0)-1., theano.config.floatX)
def sylu(gain=10, spread=0.1): return lambda x: switch(T.ge(x, (1 / spread)), gain, 0) + \ switch(T.and_(T.gt((1 / spread), x), T.gt(x, -(1 / spread))), gain * spread * x, 0) + \ switch(T.le(x, -(1 / spread)), -gain, 0) # Exponential Linear Unit
def RMSprop_v1(tparams, cost, inps, lr, rho=0.9, epsilon=1e-6, cutoff= 1e10): """ default: lr=0.001 This is the implementation of the RMSprop algorithm used in http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf. """ grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, 5): grads = [g*5/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] for p, g in zip(tparams.values(), gshared): acc = theano.shared(p.get_value() * 0.) acc_new = rho * acc + (1 - rho) * g ** 2 updates.append((acc, acc_new)) updated_p = p - lr * (g / tensor.sqrt(acc_new + epsilon)) updated_p = tensor.switch(tensor.ge(updated_p,cutoff), cutoff, updated_p) updated_p = tensor.switch(tensor.le(updated_p,-cutoff), -cutoff, updated_p) updates.append((p, updated_p)) f_update = theano.function([lr], [], updates=updates) return f_grad_shared, f_update
def RMSprop_v2(tparams, cost, inps, lr, rho=0.95, momentum=0.9, epsilon=1e-4): """ default: lr=0.0001 This is the implementation of the RMSprop algorithm used in http://arxiv.org/pdf/1308.0850v5.pdf """ grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, 5): grads = [g*5/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] for p, g in zip(tparams.values(), gshared): acc = theano.shared(p.get_value() * 0.) acc2 = theano.shared(p.get_value() * 0.) acc_new = rho * acc + (1.-rho) * g acc2_new = rho * acc + (1.-rho) * (g ** 2) updates.append((acc, acc_new)) updates.append((acc2, acc2_new)) updir = theano.shared(p.get_value() * 0.) updir_new = momentum * updir - lr * g / tensor.sqrt(acc2_new -acc_new ** 2 + epsilon) updates.append((updir, updir_new)) updated_p = p + updir_new updates.append((p, updated_p)) f_update = theano.function([lr], [], updates=updates) return f_grad_shared, f_update
def Adam(tparams, cost, inps, lr, b1=0.1, b2=0.001, e=1e-8): """ default: lr=0.0002 This is the implementation of the Adam algorithm Reference: http://arxiv.org/pdf/1412.6980v8.pdf """ grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, 5): grads = [g*5/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] i = theano.shared(numpy_floatX(0.)) i_t = i + 1. fix1 = 1. - b1**(i_t) fix2 = 1. - b2**(i_t) lr_t = lr * (tensor.sqrt(fix2) / fix1) for p, g in zip(tparams.values(), gshared): m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * m) v_t = (b2 * tensor.sqr(g)) + ((1. - b2) * v) g_t = m_t / (tensor.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) f_update = theano.function([lr], [], updates=updates) return f_grad_shared, f_update
def pSGLD(tparams, cost, inps, ntrain, lr, rho=0.9, epsilon=1e-6, clip_norm=5): """ default: lr=0.001 """ trng = RandomStreams(123) grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, clip_norm): grads = [g*clip_norm/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] for p, g in zip(tparams.values(), gshared): acc = theano.shared(p.get_value() * 0.) acc_new = rho * acc + (1 - rho) * g ** 2 updates.append((acc, acc_new)) G = tensor.sqrt(acc_new + epsilon) eps = trng.normal(p.get_value().shape, avg = 0.0, std = 1.0, dtype=theano.config.floatX) updated_p = p - lr * (g-p/ntrain) / G + tensor.sqrt(lr/G)*2./ntrain * eps updates.append((p, updated_p)) f_update = theano.function([lr,ntrain], [], updates=updates) return f_grad_shared, f_update
def pSGLD_test(tparams, cost, inps, lr, rho=0.99, epsilon=1e-6, eta=0.01, anne_rate=0.55, clip_norm=5): """ default: lr=0.001 """ trng = RandomStreams(123) grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, clip_norm): grads = [g*clip_norm/norm for g in grads] gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] i = theano.shared(numpy_floatX(0.)) i_t = i + 1. for p, g in zip(tparams.values(), gshared): acc = theano.shared(p.get_value() * 0.) acc_new = rho * acc + (1 - rho) * g ** 2 updates.append((acc, acc_new)) G = tensor.sqrt(acc_new + epsilon) eps = trng.normal(p.get_value().shape, avg = 0.0, std = 1.0, dtype=theano.config.floatX) updated_p = p - lr * g / G + tensor.sqrt(lr/G) * eta/(1+i_t)**anne_rate * eps updates.append((p, updated_p)) updates.append((i, i_t)) f_update = theano.function([lr], [], updates=updates) return f_grad_shared, f_update
def test_shape_inequality_with_self(self): x = T.vector('x', dtype=config.floatX) mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison', 'local_shape_to_shape_i', 'local_track_shape_i', 'local_subtensor_make_vector') f = theano.function([x], T.lt(x.shape[0], 0), mode=mode) self.assert_eqs_const(f, 0) f = theano.function([x], T.ge(x.shape[0], 0), mode=mode) self.assert_eqs_const(f, 1) f = theano.function([x], T.maximum(x.shape[0], 0), mode=mode) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, Shape_i), topo[0].op x_val = numpy.ones(100, dtype=config.floatX) assert f(x_val) == x_val.shape[0] f = theano.function([x], T.maximum(0, x.shape[0]), mode=mode) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, Shape_i), topo[0].op x_val = numpy.ones(100, dtype=config.floatX) assert f(x_val) == x_val.shape[0] f = theano.function([x], T.minimum(x.shape[0], 0), mode=mode) self.assert_eqs_const(f, 0) assert f(x_val) == 0 f = theano.function([x], T.minimum(0, x.shape[0]), mode=mode) self.assert_eqs_const(f, 0) assert f(x_val) == 0 f = theano.function([x], T.minimum([0, 0], x.shape[0]), mode=mode) # This case isn't optimized. # self.assert_eqs_const(f, 0) utt.assert_allclose(f(x_val), [0, 0])
def test_shape_add_inequality(self): x = T.vector('x', dtype=config.floatX) mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison', 'local_shape_to_shape_i', 'local_track_shape_i', 'local_subtensor_make_vector') y = T.vector('y', dtype=config.floatX) f = theano.function([x, y], T.lt(x.shape[0]+y.shape[0], 0), mode=mode) self.assert_eqs_const(f, 0) f = theano.function([x, y], T.ge(x.shape[0]+y.shape[0], 0), mode=mode) self.assert_eqs_const(f, 1)
def test_elemwise(self): # float Ops mats = theano.tensor.matrices('cabxy') c, a, b, x, y = mats s1 = T.switch(c, a, b) s2 = T.switch(c, x, y) for op in (T.add, T.sub, T.mul, T.true_div, T.int_div, T.floor_div, T.minimum, T.maximum, T.gt, T.lt, T.ge, T.le, T.eq, T.neq, T.pow): g = optimize(FunctionGraph(mats, [op(s1, s2)])) assert str(g).count('Switch') == 1 # integer Ops mats = theano.tensor.imatrices('cabxy') c, a, b, x, y = mats s1 = T.switch(c, a, b) s2 = T.switch(c, x, y) for op in (T.and_, T.or_, T.xor, T.bitwise_and, T.bitwise_or, T.bitwise_xor): g = optimize(FunctionGraph(mats, [op(s1, s2)])) assert str(g).count('Switch') == 1 # add/mul with more than two inputs u, v = theano.tensor.matrices('uv') s3 = T.switch(c, u, v) for op in (T.add, T.mul): g = optimize(FunctionGraph(mats + [u, v], [op(s1, s2, s3)])) assert str(g).count('Switch') == 1
def __call__(self, p): p = theano.shared(p) p *= T.ge(p, 0.) return p