def euclidean_distance(vects): """ calculate the euclidean distance between two vectors. Parameters ---------- vects: list List of two vectors to calculate the euclidean distance Returns ------- euclidean_distance : float. Euclidean distance between two vectors """ eps = 1e-08 x, y = vects return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), eps))
def gradient_penalty_loss(y_true, y_pred, averaged_samples, gradient_penalty_weight): """Calculates the gradient penalty loss for a batch of "averaged" samples. In Improved WGANs, the 1-Lipschitz constraint is enforced by adding a term to the loss function that penalizes the network if the gradient norm moves away from 1. However, it is impossible to evaluate this function at all points in the input space. The compromise used in the paper is to choose random points on the lines between real and generated samples, and check the gradients at these points. Note that it is the gradient w.r.t. the input averaged samples, not the weights of the discriminator, that we're penalizing! In order to evaluate the gradients, we must first run samples through the generator and evaluate the loss. Then we get the gradients of the discriminator w.r.t. the input averaged samples. The l2 norm and penalty can then be calculated for this gradient. Note that this loss function requires the original averaged samples as input, but Keras only supports passing y_true and y_pred to loss functions. To get around this, we make a partial() of the function with the averaged_samples argument, and use that for model training.""" gradients = K.gradients(K.sum(y_pred), averaged_samples) gradient_l2_norm = K.sqrt(K.sum(K.square(gradients))) gradient_penalty = gradient_penalty_weight * K.square(1 - gradient_l2_norm) return gradient_penalty
def visualize(model, layer_name): print 'Model loaded.' layer_dict = dict([(layer.name, layer) for layer in model.layers]) for filter_index in sample(range(0, layer_dict[layer_name].nb_filter),10): layer_output = layer_dict[layer_name].output loss = K.mean(layer_output[:, filter_index, :, :]) grads = K.gradients(loss, model.layers[0].input)[0] grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5) iterate = K.function([model.layers[0].input, K.learning_phase()], [loss, grads]) input_img_data = np.asarray([read_image('visimage.jpg')]) for _ in xrange(100): loss_value, grads_value = iterate([input_img_data, 0]) input_img_data += grads_value * 3 img = deprocess_image(input_img_data[0]) write_image(img, '../activations/out{}.jpg'.format(filter_index))
def call(self, x, mask=None): # x should be an output and a target assert len(x) == 2 losses = _per_sample_loss(self.loss, mask, x) if self.fast: grads = K.sqrt(sum([ K.sum(K.square(g), axis=1) for g in K.gradients(losses, self.parameter_list) ])) else: nb_samples = K.shape(losses)[0] grads = K.map_fn( lambda i: self._grad_norm(losses[i]), K.arange(0, nb_samples), dtype=K.floatx() ) return K.reshape(grads, (-1, 1))
def call(self, x, mask=None): conv_out = K.conv2d(x, self.W, strides=self.strides, padding=self.padding, data_format=self.data_format, filter_shape=self.kernel_shape) if self.data_format == 'channels_first': # Complex-cell filter operation conv_out1 = K.sqrt(K.square(conv_out[:, :self.filters_complex, :, :]) + K.square(conv_out[:, self.filters_complex:2*self.filters_complex, :, :]) + K.epsilon()) # Simple-cell filter operation conv_out2 = K.concatenate([conv_out1, conv_out[:, 2*self.filters_complex:, :, :]], axis=1) elif self.data_format == 'channels_last': # Complex-cell filter operation conv_out1 = K.sqrt(K.square(conv_out[:, :, :, :self.filters_complex]) + K.square(conv_out[:, :, :, self.filters_complex:2*self.filters_complex]) + K.epsilon()) # Simple-cell filter operation conv_out2 = K.concatenate([conv_out1, conv_out[:, :, :, 2*self.filters_complex:]], axis=3) if self.bias: if self.data_format == 'channels_first': conv_out2 += K.reshape(self.b, (1, self.filters_complex + self.filters_simple, 1, 1)) elif self.data_format == 'channels_last': conv_out2 += K.reshape(self.b, (1, 1, 1, self.filters_complex + self.filters_simple)) return self.activation(conv_out2)
def call(self, inputs): stim = inputs[0] center = inputs[1] centers_x = self.XX[None, :, :, None] - center[:, 0, None, None, None] - self.centers[0][None, None, None, :] centers_y = self.YY[None, :, :, None] - center[:, 1, None, None, None] - self.centers[1][None, None, None, :] senv = self.stds[None, None, None, :] gauss = self.gauss_scale * (K.square(self.dx) / (2 * np.pi * K.square(senv) + K.epsilon()))*K.exp(-(K.square(centers_x) + K.square(centers_y))/(2.0 * K.square(senv))) # gauss = (1 / K.sqrt(2 * np.pi * K.square(senv) + K.epsilon()))*K.exp(-(K.square(centers_x) + K.square(centers_y))/(2.0 * K.square(senv))) # gauss /= K.max(gauss, axis=(1, 2), keepdims=True) gauss = K.reshape(gauss, self.kernel_shape) if K.backend() == 'theano': output = K.sum(stim[..., None] * K.pattern_broadcast(gauss, self.kernel_broadcast), axis=self.filter_axes, keepdims=False) else: output = K.sum(stim[..., None] * gauss, axis=self.filter_axes, keepdims=False) return output
def step(self, x, states): prev_output = states[0] B_U = states[1] B_W = states[2] if self.consume_less == 'cpu': h = x else: h = K.dot(x * B_W, self.W) if (self.activation=='soft_thresh'): preactivation = h + K.dot(prev_output * B_U, self.Uaug) preactivation_abs = K.sqrt(self.epsilon + preactivation**2 + preactivation[:,self.swap_re_im]**2) rescale = K.maximum(preactivation_abs+self.baug,0.)/(preactivation_abs + self.epsilon) output = preactivation*rescale else: print "Activation",self.activation,"not implemented" raise NotImplementedError return output, [output]
def make_patches_grid(x, patch_size, patch_stride): '''Break image `x` up into a grid of patches. input shape: (channels, rows, cols) output shape: (rows, cols, channels, patch_rows, patch_cols) ''' from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T x = K.expand_dims(x, 0) xs = K.shape(x) num_rows = 1 + (xs[-2] - patch_size) // patch_stride num_cols = 1 + (xs[-1] - patch_size) // patch_stride num_channels = xs[-3] patches = images2neibs(x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) # arrange in a 2d-grid (rows, cols, channels, px, py) patches = K.reshape(patches, (num_rows, num_cols, num_channels, patch_size, patch_size)) patches_norm = K.sqrt(K.sum(K.square(patches), axis=(2,3,4), keepdims=True)) return patches, patches_norm # get tensor representations of our images
def sample_adaptive_normal_noise(inputs, **kwargs): from keras.backend import shape, random_normal, sqrt seed = kwargs.get('seed', 7) latent_dim = kwargs.get('latent_dim', 2) if isinstance(inputs, list): mu, sigma2 = inputs n_samples = kwargs.get('n_samples', shape(mu)[0]) samples_isotropic = random_normal(shape=(n_samples, latent_dim), mean=0, stddev=1, seed=seed) samples = mu + sqrt(sigma2) * samples_isotropic return samples else: samples_isotropic = random_normal(shape=(shape(inputs)[0], latent_dim), mean=0, stddev=1, seed=seed) return samples_isotropic
def get_weightnorm_params_and_grads(p, g): ps = K.get_variable_shape(p) # construct weight scaler: V_scaler = g/||V|| V_scaler_shape = (ps[-1],) # assumes we're using tensorflow! V_scaler = K.ones(V_scaler_shape) # init to ones, so effective parameters don't change # get V parameters = ||V||/g * W norm_axes = [i for i in range(len(ps) - 1)] V = p / tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) # split V_scaler into ||V|| and g parameters V_norm = tf.sqrt(tf.reduce_sum(tf.square(V), norm_axes)) g_param = V_scaler * V_norm # get grad in V,g parameters grad_g = tf.reduce_sum(g * V, norm_axes) / V_norm grad_V = tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) * \ (g - tf.reshape(grad_g / V_norm, [1] * len(norm_axes) + [-1]) * V) return V, V_norm, V_scaler, g_param, grad_g, grad_V
def call(self, x, mask=None): def image_expand(tensor): return K.expand_dims(K.expand_dims(tensor, -1), -1) def batch_image_expand(tensor): return image_expand(K.expand_dims(tensor, 0)) hw = K.cast(x.shape[2] * x.shape[3], K.floatx()) mu = K.sum(x, [-1, -2]) / hw mu_vec = image_expand(mu) sig2 = K.sum(K.square(x - mu_vec), [-1, -2]) / hw y = (x - mu_vec) / (K.sqrt(image_expand(sig2)) + K.epsilon()) scale = batch_image_expand(self.scale) shift = batch_image_expand(self.shift) return scale*y + shift # else: # raise NotImplemented("Please complete `CycGAN/layers/padding.py` to run on backend {}.".format(K.backend()))
def semantic_matrix(argv): assert len(argv) == 2 q = argv[0] a = argv[1] q_sqrt = K.sqrt((q ** 2).sum(axis=2, keepdims=True)) a_sqrt = K.sqrt((a ** 2).sum(axis=2, keepdims=True)) denominator = K.batch_dot(q_sqrt, K.permute_dimensions(a_sqrt, [0,2,1])) return K.batch_dot(q, K.permute_dimensions(a, [0,2,1])) / (denominator + SAFE_EPSILON) # ??idx?????? # ??????batch index???????? # ??https://groups.google.com/forum/#!topic/theano-users/7gUdN6E00Dc # ??argmax???2 - axis # ??theano??a > 0????????[1,1,0]????????????? # ?bool??????????? # ??????????T.set_subtensor(ib[(ib < 0).nonzero()], 0)
def iterate_softmax(model, neuron): input_tensor = model.input # this is a placeholder tensor that will contain our generated images # build a loss function that maximizes the activation # of the nth filter of the layer considered print('X shape', model.output[:, neuron]) x = model.output loss_weight_continuity = 0.0 loss_weight_activity = 1.0 loss = K.mean(x) #loss += loss_weight_continuity * total_variation_norm(input_tensor) # compute the gradient of the input picture wrt this loss grads = K.gradients(loss, input_tensor)[0] # normalization trick: we normalize the gradient grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5) # this function returns the loss and grads given the input picture return K.function([input_tensor], [loss, grads])
def custom_for_keras(self, ALL_word_embeds): ## only the top 20 rows from word_vectors is legit! def top_accuracy(true_word_indices, image_vectors): l2 = lambda x, axis: K.sqrt(K.sum(K.square(x), axis=axis, keepdims=True)) l2norm = lambda x, axis: x/l2(x, axis) l2_words = l2norm(ALL_word_embeds, axis=1) l2_images = l2norm(image_vectors, axis=1) tiled_words = K.tile(K.expand_dims(l2_words, axis=1) , (1, 200, 1)) tiled_images = K.tile(K.expand_dims(l2_images, axis=1), (1, 20, 1)) diff = K.squeeze(l2(l2_words - l2_images, axis=2)) # slice_top3 = lambda x: x[:, 0:3] # slice_top1 = lambda x: x[:, 0:1] diff_top5 = metrics.top_k_categorical_accuracy(tiled_images, diff) return diff_top5 return top_accuracy
def build_mdl(len_words, embed_dim, embeds, len_sent1, len_sent2): embeds.insert(0, np.zeros(embeds[0].shape, dtype='float32')) # for padding input_q = Input(shape=(len_sent1,), dtype='int32') input_a = Input(shape=(len_sent2,), dtype='int32') embed = Embedding(mask_zero=True, input_dim=len_words+1, output_dim=embed_dim, weights=[np.array(embeds)], dropout=0.2) x_q = embed(input_q) x_a = embed(input_a) rnn_q = LSTM(64, input_dim=embed_dim, return_sequences=False, input_length=len_sent1)(x_q) rnn_a = LSTM(64, input_dim=embed_dim, return_sequences=False, input_length=len_sent2)(x_a) dense_q = Dense(32)(rnn_q) dense_a = Dense(32)(rnn_a) def cosine(x): axis = len(x[0]._keras_shape) - 1 dot = lambda a, b: K.batch_dot(a, b, axes=axis) return dot(x[0], x[1]) / K.sqrt(dot(x[0], x[0]) * dot(x[1], x[1])) # https://github.com/fchollet/keras/issues/2299 cosine_sim = merge([dense_q, dense_a], mode=cosine, output_shape=(1,)) model = Model(input=[input_q, input_a], output=[cosine_sim]) model.compile(optimizer='rmsprop', loss='mse') return model
def ori_loss(y_true, y_pred, lamb=1.): # clip y_pred = K.tf.clip_by_value(y_pred, K.epsilon(), 1 - K.epsilon()) # get ROI label_seg = K.sum(y_true, axis=-1, keepdims=True) label_seg = K.tf.cast(K.tf.greater(label_seg, 0), K.tf.float32) # weighted cross entropy loss lamb_pos, lamb_neg = 1., 1. logloss = lamb_pos*y_true*K.log(y_pred)+lamb_neg*(1-y_true)*K.log(1-y_pred) logloss = logloss*label_seg # apply ROI logloss = -K.sum(logloss) / (K.sum(label_seg) + K.epsilon()) # coherence loss, nearby ori should be as near as possible mean_kernal = np.reshape(np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]], dtype=np.float32)/8, [3, 3, 1, 1]) sin2angle_ori, cos2angle_ori, modulus_ori = ori2angle(y_pred) sin2angle = K.conv2d(sin2angle_ori, mean_kernal, padding='same') cos2angle = K.conv2d(cos2angle_ori, mean_kernal, padding='same') modulus = K.conv2d(modulus_ori, mean_kernal, padding='same') coherence = K.sqrt(K.square(sin2angle) + K.square(cos2angle)) / (modulus + K.epsilon()) coherenceloss = K.sum(label_seg) / (K.sum(coherence*label_seg) + K.epsilon()) - 1 loss = logloss + lamb*coherenceloss return loss
def get_gradients(self, loss, params): ''' Replacement for the default keras get_gradients() function. Modification: checks if the object has the attribute grads and returns that rather than calculating the gradients using automatic differentiation. ''' if hasattr(self, 'grads'): grads = self.grads else: grads = K.gradients(loss, params) if hasattr(self, 'clipnorm') and self.clipnorm > 0: norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) grads = [clip_norm(g, self.clipnorm, norm) for g in grads] if hasattr(self, 'clipvalue') and self.clipvalue > 0: grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads] return grads
def _get_output_functions(self): # if you name your layers you can use model.get_layer('recurrent_layer') model = self.tweet_classifier recurrent_layer = model.layers[2] attention_layer = model.layers[5] merged_layer = model.layers[9] output_layer = model.layers[10] layers = [recurrent_layer, attention_layer, merged_layer, output_layer] outputs = [] for l in layers: outputs.append(l.output) loss = K.mean(model.output) grads = K.gradients(loss, l.output) grads_norm = grads / (K.sqrt(K.mean(K.square(grads))) + 1e-5) outputs.append(grads_norm) all_function = K.function([model.layers[0].input, K.learning_phase()], outputs) return all_function
def call(self, x, mask=None): ax = 1 if self.is_q else 2 def _step(v1, v2): cosine_score = T.tensordot(v1 / T.sqrt(T.sum(T.sqr(v1), axis=2, keepdims=True) + 1e-6), (v2) / T.sqrt(T.sum(T.sqr(v2), axis=ax, keepdims=True) + 1e-6), [[2], [ax]]) return cosine_score l_s = x[0] # n_b x n_s x n_w_s x D l_a = x[1] # n_b x 4 x n_w_qa x D # w_qa = self.layers[2].get_output(train) # n_b x 4 x n_w_qa x 1 # w_qa = T.addbroadcast(w_qa, len(self.layers[2].output_shape) - 1) # get cosine similarity for ALL word pairs output, _ = theano.scan(_step, sequences=[l_s, l_a], outputs_info=None) if not self.is_q: output = output.dimshuffle(0, 1, 3, 2, 4) # n_b x n_s x 4 x n_w_s x n_w_qa return output
def call(self, x, mask=None): l_q = x[0] # n_b x n_s l_a = x[1] # n_b x n_s x 4 # add broadcast dimension to end of l_q l_q = l_q.dimshuffle(0, 1, 'x') if self.mean_type == 'harmonic': # compute harmonic mean of two scores output = 2. * l_q * l_a / (l_q + l_a + 0.00001) * self.beta elif self.mean_type == 'geometric': # compute geometric mean of two scores output = T.sqrt(l_q * l_a + 0.00001) * self.beta elif self.mean_type == 'bilinear': output = l_q * l_a * self.alpha + self.beta * l_a + self.gama * l_q else: # compute arithmetic mean output = (l_q + l_a) / 2. return output + 0 * (self.alpha + self.beta + self.gama)
def _cosine_similarity(self, x1, x2): """Compute cosine similarity. # Arguments: x1: (..., embedding_size) x2: (..., embedding_size) """ cos = K.sum(x1 * x2, axis=-1) x1_norm = K.sqrt(K.maximum(K.sum(K.square(x1), axis=-1), self.epsilon)) x2_norm = K.sqrt(K.maximum(K.sum(K.square(x2), axis=-1), self.epsilon)) cos = cos / x1_norm / x2_norm return cos
def LN(x, gamma, beta, epsilon=1e-6, axis=-1): m = K.mean(x, axis=axis, keepdims=True) std = K.sqrt(K.var(x, axis=axis, keepdims=True) + epsilon) x_normed = (x - m) / (std + epsilon) x_normed = gamma * x_normed + beta return x_normed
def call(self, inputs): kernel = self.kernel * self.g / K.sqrt(K.sum(K.square(self.kernel), axis=0)) output = K.dot(inputs, kernel) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output
def call(self, x): kernel = self.kernel * self.g / K.sqrt(K.sum(K.square(self.kernel), axis=[0, 1, 2], keepdims=True)) output = K.conv2d(x, kernel, strides=self.strides, padding=self.padding, data_format=self.data_format) if self.use_bias: output = K.bias_add(output, self.bias, data_format=self.data_format) if self.activation is not None: output = self.activation(output) return output
def step(self, x, states): h_tm1 = states[0] # previous memory B_U = states[1] # dropout matrices for recurrent units B_W = states[2] kernel_z = self.kernel_z * self.g_kernel_z / K.sqrt(K.sum(K.square(self.g_kernel_z), axis=0)) kernel_r = self.kernel_r * self.g_kernel_r / K.sqrt(K.sum(K.square(self.g_kernel_r), axis=0)) kernel_h = self.kernel_h * self.g_kernel_h / K.sqrt(K.sum(K.square(self.g_kernel_h), axis=0)) recurrent_kernel_z = self.recurrent_kernel_z * self.g_recurrent_kernel_z / K.sqrt(K.sum(K.square(self.g_recurrent_kernel_z), axis=0)) recurrent_kernel_r = self.recurrent_kernel_r * self.g_recurrent_kernel_r / K.sqrt(K.sum(K.square(self.g_recurrent_kernel_r), axis=0)) recurrent_kernel_h = self.recurrent_kernel_h * self.g_recurrent_kernel_h / K.sqrt(K.sum(K.square(self.g_recurrent_kernel_h), axis=0)) x_z = K.dot(x * B_W[0], kernel_z) x_r = K.dot(x * B_W[1], kernel_r) x_h = K.dot(x * B_W[2], kernel_h) if self.use_bias: x_z += self.bias_z x_r += self.bias_r x_h += self.bias_h z = self.recurrent_activation(x_z + K.dot(h_tm1 * B_U[0], recurrent_kernel_z)) r = self.recurrent_activation(x_r + K.dot(h_tm1 * B_U[1], recurrent_kernel_r)) hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], recurrent_kernel_h)) h = z * h_tm1 + (1 - z) * hh return h, [h] # Aliases
def euclidean_distance(vects): x, y = vects return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))
def normalize(x): # utility function to normalize a tensor by its L2 norm return x / (K.sqrt(K.mean(K.square(x))) + 1e-5)
def cost(self,source, target): #calculate the 3 MMD terms xx = self.kernel(source, source) xy = self.kernel(source, target) yy = self.kernel(target, target) #calculate the bias MMD estimater (cannot be less than 0) MMD = K.mean(xx) - 2 * K.mean(xy) + K.mean(yy) #return the square root of the MMD because it optimizes better return K.sqrt(MMD);
def cor(self,y1, y2, lamda): y1_mean = K.mean(y1, axis=0) y1_centered = y1 - y1_mean y2_mean = K.mean(y2, axis=0) y2_centered = y2 - y2_mean corr_nr = K.sum(y1_centered * y2_centered, axis=0) corr_dr1 = K.sqrt(K.sum(y1_centered * y1_centered, axis=0) + 1e-8) corr_dr2 = K.sqrt(K.sum(y2_centered * y2_centered, axis=0) + 1e-8) corr_dr = corr_dr1 * corr_dr2 corr = corr_nr / corr_dr return K.sum(corr) * lamda
def sum_corr(model): view1 = np.load("MFCC_Test.npy") view2 = np.load("XRMB_Test.npy") x = project(model,[view1,np.zeros_like(view2)]) y = project(model,[np.zeros_like(view1),view2]) print ("test correlation") corr = 0 for i in range(0,len(x[0])): x1 = x[:,i] - (np.ones(len(x))*(sum(x[:,i])/len(x))) x2 = y[:,i] - (np.ones(len(y))*(sum(y[:,i])/len(y))) nr = sum(x1 * x2)/(math.sqrt(sum(x1*x1))*math.sqrt(sum(x2*x2))) corr+=nr print (corr)
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) t = self.iterations + 1 vs = [K.zeros(K.get_variable_shape(p)) for p in params] self.weights = [self.iterations]+ vs for p, g, v in zip(params, grads, vs): v_t = (1-(self.gamma/t))*v + (self.gamma/t)*K.square(g) p_t = p - self.lr * g / (K.sqrt(t*v_t) + self.delta ) self.updates.append(K.update(v, v_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def _grad_norm(self, loss): grads = K.gradients(loss, self.parameter_list) return K.sqrt( sum([ K.sum(K.square(g)) for g in grads ]) )
def lifted_loss(margin=1): """ Lifted loss, per "Deep Metric Learning via Lifted Structured Feature Embedding" by Song et al Implemented in `keras` See also the `pytorch` implementation at: https://gist.github.com/bkj/565c5e145786cfd362cffdbd8c089cf4 """ def f(target, score): # Compute mask (-1 for different class, 1 for same class, 0 for diagonal) mask = (2 * K.equal(0, target - K.reshape(target, (-1, 1))) - 1) mask = (mask - K.eye(score.shape[0])) # Compute distance between rows mag = (score ** 2).sum(axis=-1) mag = K.tile(mag, (mag.shape[0], 1)) dist = (mag + mag.T - 2 * score.dot(score.T)) dist = K.sqrt(K.maximum(0, dist)) # Negative component (points from different class should be far) l_n = K.sum((K.exp(margin - dist) * K.equal(mask, -1)), axis=-1) l_n = K.tile(l_n, (score.shape[0], 1)) l_n = K.log(l_n + K.transpose(l_n)) l_n = l_n * K.equal(mask, 1) # Positive component (points from same class should be close) l_p = dist * K.equal(mask, 1) loss = K.sum((K.maximum(0, l_n + l_p) ** 2)) n_pos = K.sum(K.equal(mask, 1)) loss /= (2 * n_pos) return loss return f # --
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] mems = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs + mems for p, g, m, v, mem in zip(params, grads, ms, vs, mems): r = 1. / (1. + mem) m_t = (1. - r) * m + r * g v_t = (1. - r) * v + r * K.square(g) denoise = K.square(m_t) / (v_t + self.epsilon) p_t = p - g * K.minimum(lr, denoise) / (K.sqrt(v_t) + self.epsilon) mem_t = 1. + mem * (1. - denoise) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) self.updates.append(K.update(mem, mem_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] mems = [K.zeros(shape) for shape in shapes] denoises = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs + mems + denoises for p, g, m, v, mem, denoise in zip(params, grads, ms, vs, mems, denoises): r = K.minimum(0.2, K.maximum(0.005, 1. / (1. + mem))) mem_t = 1. / r - 1. m_t = (1. - r) * m + r * g v_t = (1. - r) * v + r * K.square(g) denoise_t = 0.99 * denoise + 0.01 * K.square(m_t) / (v_t + self.epsilon) p_t = p - g * denoise_t / (K.sqrt(v_t) + self.epsilon) mem_t = K.maximum(0., 1. + mem_t * (1. - denoise_t)) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) self.updates.append(K.update(mem, mem_t)) self.updates.append(K.update(denoise, denoise_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def __call__(self, p): # if self.data_format == 'channels_last': rotate_axis = list(range(K.ndim(p))) rotate_axis = [rotate_axis[-1]] + rotate_axis[:-1] p = K.permute_dimensions(p, rotate_axis) sumaxes = tuple(range(1, K.ndim(p))) if self.interleave: if self.singles: v = p[::3] w = p[1::3] else: v = p[::2] w = p[1::2] else: v = p[:self.m] w = p[self.m:2*self.m] v2 = w - v*K.sum(v*w, axis=sumaxes, keepdims=True)/K.sum(v*v, axis=sumaxes, keepdims=True) norms_paired = K.sqrt(K.sum(v**2 + v2**2, axis=sumaxes, keepdims=True)) v /= norms_paired v2 /= norms_paired if self.singles: if self.interleave: x = p[2::3] else: x = p[2*self.m:] norms_single = K.sqrt(K.sum(x**2, axis=sumaxes, keepdims=True)) x /= norms_single out = K.concatenate((v, v2, x), axis=0) else: out = K.concatenate((v, v2), axis=0) # if self.dim_ordering == 'tf': rotate_axis = list(range(K.ndim(out))) rotate_axis = rotate_axis[1:] + [rotate_axis[0]] out = K.permute_dimensions(out, rotate_axis) return out
def __call__(self, x): regularization = 0 dimorder = self.axes + list(set(range(K.ndim(x))) - set(self.axes)) p = K.permute_dimensions(x, dimorder) if self.TV: regularization += self.TV*K.sum(K.sqrt(K.square(diffr(p)) + K.square(diffc(p)) + K.epsilon())) if self.TV2: regularization += self.TV2*K.sum(K.sqrt(K.square(diffrr(p)) + K.square(diffcc(p)) + 2*K.square(diffrc(p)) + K.epsilon())) return regularization