def step(self, x, states): prev_output = states[0] B_U = states[1] B_W = states[2] if self.consume_less == 'cpu': h = x else: h = K.dot(x * B_W, self.W) if (self.activation=='soft_thresh'): preactivation = h + K.dot(prev_output * B_U, self.Uaug) preactivation_abs = K.sqrt(self.epsilon + preactivation**2 + preactivation[:,self.swap_re_im]**2) rescale = K.maximum(preactivation_abs+self.baug,0.)/(preactivation_abs + self.epsilon) output = preactivation*rescale else: print "Activation",self.activation,"not implemented" raise NotImplementedError return output, [output]
def compile(self, optimizer, **kwargs): qa_model = self.get_qa_model() good_output = qa_model([self.subject, self.relation, self.object_good]) bad_output = qa_model([self.subject, self.relation, self.object_bad]) loss = merge([good_output, bad_output], mode=lambda x: K.maximum(1e-6, self.config['margin'] - x[0] + x[1]), output_shape=lambda x: x[0]) self.training_model = Model(input=[self.subject, self.relation, self.object_good, self.object_bad], output=loss) self.training_model.compile(loss=lambda y_true, y_pred: y_pred + y_true - y_true, optimizer=optimizer, **kwargs) self.prediction_model = Model(input=[self.subject, self.relation, self.object_good], output=good_output) self.prediction_model.compile(loss='binary_crossentropy', optimizer=optimizer, **kwargs) self.training_model.summary()
def register(self, info_tensor, param_tensor): self.info_tensor = info_tensor #(128,1) if self.stddev_fix: self.param_tensor = param_tensor mean = K.clip(param_tensor[:, 0].dimshuffle(0, 'x'), self.min, self.max) std = 1.0 else: self.param_tensor = param_tensor # 2 mean = K.clip(param_tensor[:, 0].dimshuffle(0, 'x'), self.min, self.max) # std = K.maximum( param_tensor[:, 1].dimshuffle(0, 'x'), 0) std = K.sigmoid( param_tensor[:, 1].dimshuffle(0, 'x') ) e = (info_tensor-mean)/(std + K.epsilon()) self.log_Q_c_given_x = \ K.sum(-0.5*np.log(2*np.pi) -K.log(std+K.epsilon()) -0.5*(e**2), axis=1) * self.lmbd # m = Sequential([ Activation('softmax', input_shape=(self.n,)), Lambda(lambda x: K.log(x), lambda x: x) ]) return K.reshape(self.log_Q_c_given_x, (-1, 1))
def mix_gaussian_loss(x, mu, log_sig, w): ''' Combine the mixture of gaussian distribution and the loss into a single function so that we can do the log sum exp trick for numerical stability... ''' if K.backend() == "tensorflow": x.set_shape([None, 1]) gauss = log_norm_pdf(K.repeat_elements(x=x, rep=mu.shape[1], axis=1), mu, log_sig) # TODO: get rid of clipping. gauss = K.clip(gauss, -40, 40) max_gauss = K.maximum((0.), K.max(gauss)) # log sum exp trick... gauss = gauss - max_gauss out = K.sum(w * K.exp(gauss), axis=1) loss = K.mean(-K.log(out) + max_gauss) return loss
def contrastiveLoss(y_true, y_pred): x1 = y_true * y_pred x2 = (1 - y_true) * K.T.power((K.maximum(0, Margin - K.T.sqrt(y_pred))), 2) return K.T.mean(0.5 * (x1 + x2))
def rank_hinge_loss(kwargs=None): margin = 1. if isinstance(kwargs, dict) and 'margin' in kwargs: margin = kwargs['margin'] def _margin_loss(y_true, y_pred): #output_shape = K.int_shape(y_pred) y_pos = Lambda(lambda a: a[::2, :], output_shape= (1,))(y_pred) y_neg = Lambda(lambda a: a[1::2, :], output_shape= (1,))(y_pred) loss = K.maximum(0., margin + y_neg - y_pos) return K.mean(loss) return _margin_loss
def actual_accuracy(act, pred): ''' Calculate accuracy each batch. Keras' standard calculation factors in our padding classes. We don't. FIXME: Not always working ''' act_argm = K.argmax(act, axis=-1) # Indices of act. classes pred_argm = K.argmax(pred, axis=-1) # Indices of pred. classes incorrect = K.cast(K.not_equal(act_argm, pred_argm), dtype='float32') correct = K.cast(K.equal(act_argm, pred_argm), dtype='float32') padding = K.cast(K.equal(K.sum(act), 0), dtype='float32') start = K.cast(K.equal(act_argm, 0), dtype='float32') end = K.cast(K.equal(act_argm, 1), dtype='float32') pad_start = K.maximum(padding, start) pad_start_end = K.maximum(pad_start, end) # 1 where pad, start or end # Subtract pad_start_end from correct, then check equality to 1 # E.g.: act: [pad, pad, pad, <s>, tag, tag, tag, </s>] # pred: [pad, tag, pad, <s>, tag, tag, err, </s>] # correct: [1, 0, 1, 1, 1, 1, 0, 1] # p_s_e: [1, 1, 1, 1,, 0, 0, 0, 1] # corr-pse: [0, -1, 0, 0, 1, 1, 0, 0] # Subtraction # actu_corr: [0, 0, 0, 0, 1, 1, 0, 0] # Check equality to 1 corr_preds = K.sum(K.cast(K.equal(correct - pad_start_end, 1), dtype='float32')) incorr_preds = K.sum(K.cast(K.equal(incorrect - pad_start_end, 1), dtype='float32')) total = corr_preds + incorr_preds accuracy = corr_preds / total return accuracy
def my_logloss(act, pred): epsilon = 1e-15 pred = K.maximum(epsilon, pred) pred = K.minimum(1 - epsilon, pred) ll = K.sum(act * K.log(pred) + (1 - act) * K.log(1 - pred)) ll = ll * -1.0 / K.shape(act)[0] return ll
def logloss(act, pred): ''' ???????? :param act: :param pred: :return: ''' epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1 - epsilon, pred) ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred))) ll = ll * -1.0 / len(act) return ll
def ranking_loss(y_true, y_pred): pos = y_pred[:,0] neg = y_pred[:,1] loss = -K.sigmoid(pos-neg) # use loss = K.maximum(1.0 + neg - pos, 0.0) if you want to use margin ranking loss return K.mean(loss) + 0 * y_true
def Margin_Loss(y_true, y_pred): score_best = y_pred[0] score_predict = y_pred[1] loss = K.maximum(0.0, 1.0 - K.sigmoid(score_best - score_predict)) return K.mean(loss) + 0 * y_true
def lifted_loss(margin=1): """ Lifted loss, per "Deep Metric Learning via Lifted Structured Feature Embedding" by Song et al Implemented in `keras` See also the `pytorch` implementation at: https://gist.github.com/bkj/565c5e145786cfd362cffdbd8c089cf4 """ def f(target, score): # Compute mask (-1 for different class, 1 for same class, 0 for diagonal) mask = (2 * K.equal(0, target - K.reshape(target, (-1, 1))) - 1) mask = (mask - K.eye(score.shape[0])) # Compute distance between rows mag = (score ** 2).sum(axis=-1) mag = K.tile(mag, (mag.shape[0], 1)) dist = (mag + mag.T - 2 * score.dot(score.T)) dist = K.sqrt(K.maximum(0, dist)) # Negative component (points from different class should be far) l_n = K.sum((K.exp(margin - dist) * K.equal(mask, -1)), axis=-1) l_n = K.tile(l_n, (score.shape[0], 1)) l_n = K.log(l_n + K.transpose(l_n)) l_n = l_n * K.equal(mask, 1) # Positive component (points from same class should be close) l_p = dist * K.equal(mask, 1) loss = K.sum((K.maximum(0, l_n + l_p) ** 2)) n_pos = K.sum(K.equal(mask, 1)) loss /= (2 * n_pos) return loss return f # --
def _loss_sqeuclidean(self, inputs): p = K.sum(K.square(inputs[0] - inputs[1]), axis=-1, keepdims=True) n = K.sum(K.square(inputs[0] - inputs[2]), axis=-1, keepdims=True) loss = p + self.margin - n return K.maximum(loss, 0)
def _loss_cosine(self, inputs): p = -K.sum(inputs[0] * inputs[1], axis=-1, keepdims=True) n = -K.sum(inputs[0] * inputs[2], axis=-1, keepdims=True) loss = p + self.margin - n return K.maximum(loss, 0)
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] mems = [K.zeros(shape) for shape in shapes] denoises = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs + mems + denoises for p, g, m, v, mem, denoise in zip(params, grads, ms, vs, mems, denoises): r = K.minimum(0.2, K.maximum(0.005, 1. / (1. + mem))) mem_t = 1. / r - 1. m_t = (1. - r) * m + r * g v_t = (1. - r) * v + r * K.square(g) denoise_t = 0.99 * denoise + 0.01 * K.square(m_t) / (v_t + self.epsilon) p_t = p - g * denoise_t / (K.sqrt(v_t) + self.epsilon) mem_t = K.maximum(0., 1. + mem_t * (1. - denoise_t)) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) self.updates.append(K.update(mem, mem_t)) self.updates.append(K.update(denoise, denoise_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def clip_relu(x): y = K.maximum(x, 0) return K.minimum(y, 1)
def cross_entropy(self, y_true, y_pred): y_pred /= tf.reduce_sum(y_pred, axis=-1, keep_dims=True) y_pred = K.maximum(K.minimum(y_pred, 1 - 1e-15), 1e-15) cross_entropy_loss = - K.sum(y_true * K.log(y_pred), axis=-1) return cross_entropy_loss
def _softmax_loss(self, y_true, y_pred): y_pred = K.maximum(K.minimum(y_pred, 1 - 1e-15), 1e-15) softmax_loss = - K.sum(y_true * K.log(y_pred), axis=-1) return softmax_loss
def tpr(y_true, y_pred): t = 1 - K.cast(K.less(y_true, 0.5), dtype='float32') p = 1 - K.cast(K.less(y_pred, 0.5), dtype='float32') tpr = (K.sum(t * p, axis=[-3, -2]) / K.maximum(K.sum(t, axis=[-3, -2]), 1.0)) return tpr
def tnr(y_true, y_pred): t = K.cast(K.less(y_true, 0.5), dtype='float32') n = K.cast(K.less(y_pred, 0.5), dtype='float32') tnr = K.sum(t * n, axis=[-3, -2]) / K.maximum(K.sum(t, axis=[-3, -2]), 1.0) return tnr
def balanced_loss(y_true, y_pred): sq = K.square(y_pred - y_true) gt = K.cast(K.greater(y_true, 0.5), dtype='float') lt = K.cast(K.less(y_true, 0.5), dtype='float') pos = (K.sum(gt * sq, axis=[-3, -2]) / K.maximum(K.sum(gt, axis=[-3, -2]), 1.0)) neg = (K.sum(lt * sq, axis=[-3, -2]) / K.maximum(K.sum(lt, axis=[-3, -2]), 1.0)) f = 0.5 return (f * pos + (1 - f) * neg)
def calculate_P(X): print "Computing pairwise distances..." n = X.shape[0] P = np.zeros([n, batch_size]) for i in xrange(0, n, batch_size): P_batch = x2p(X[i:i + batch_size]) P_batch[np.isnan(P_batch)] = 0 P_batch = P_batch + P_batch.T P_batch = P_batch / P_batch.sum() P_batch = np.maximum(P_batch, 1e-12) P[i:i + batch_size] = P_batch return P
def KLdivergence(P, Y): alpha = low_dim - 1. sum_Y = K.sum(K.square(Y), axis=1) eps = K.variable(10e-15) D = sum_Y + K.reshape(sum_Y, [-1, 1]) - 2 * K.dot(Y, K.transpose(Y)) Q = K.pow(1 + D / alpha, -(alpha + 1) / 2) Q *= K.variable(1 - np.eye(batch_size)) Q /= K.sum(Q) Q = K.maximum(Q, eps) C = K.log((P + eps) / (Q + eps)) C = K.sum(P * C) return C
def eucLL(y_true, y_pred): myEps = 1e-15 probs = K.maximum(K.minimum(y_pred, 1 - myEps), myEps) return K.mean(K.binary_crossentropy(probs, y_true), axis=-1) # Compile # initLR = 0.001 # momentum = 0.9 # sgd = SGD(lr=initLR, momentum=momentum, decay=0, nesterov=False)
def euclidean_distance(vects): x, y = vects return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
def contrastive_loss(y_true, y_pred): '''Contrastive loss from Hadsell-et-al.'06 http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf ''' margin = 1 return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0))) # Accuracy
def eucLL(y_true, y_pred): myEps = 1e-15 probs = K.maximum(K.minimum(y_pred, 1 - myEps), myEps) return K.mean(K.binary_crossentropy(probs, 1 - y_true), axis=-1) # Compile
def compile_rt(self, optimizer, **kwargs): qa_model_rt = self.get_qa_model_rt() good_output = qa_model_rt([self.subject, self.relation, self.object_good]) bad_output = qa_model_rt([self.subject_bad, self.relation, self.object_good]) loss = merge([good_output, bad_output], mode=lambda x: K.maximum(1e-6, self.config['margin'] - x[0] + x[1]), output_shape=lambda x: x[0]) self.training_model_rt = Model(input=[self.subject, self.subject_bad, self.relation, self.object_good], output=loss) self.training_model_rt.compile(loss=lambda y_true, y_pred: y_pred + y_true - y_true, optimizer=optimizer, **kwargs) self.prediction_model_rt = Model(input=[self.subject, self.relation, self.object_good], output=good_output) self.prediction_model_rt.compile(loss='binary_crossentropy', optimizer=optimizer, **kwargs)
def contrastive_loss(y_true, y_pred): margin = 1 return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square( K.maximum(margin - y_pred, 0)))
def nrlu(x): std = K.mean(K.sigmoid(x)) eta = random_normal(shape=x.shape, std=std) y = K.maximum(x + eta, 0) return y
def sample_h_given_x(self, x): h_pre = K.dot(x, self.W) + self.bh h_sigm = K.maximum(self.scaling_h_given_x * h_pre, 0) #std = K.mean(K.sigmoid(self.scaling_h_given_x * h_pre)) #eta = random_normal(shape=h_pre.shape, std=std) #h_samp = K.maximum(h_pre + eta, 0) h_samp = nrlu(h_pre) return h_samp, h_pre, h_sigm
def weighted_bce_loss(y_true, y_pred, weight): # avoiding overflow epsilon = 1e-7 y_pred = K.clip(y_pred, epsilon, 1. - epsilon) logit_y_pred = K.log(y_pred / (1. - y_pred)) # https://www.tensorflow.org/api_docs/python/tf/nn/weighted_cross_entropy_with_logits loss = (1. - y_true) * logit_y_pred + (1. + (weight - 1.) * y_true) * \ (K.log(1. + K.exp(-K.abs(logit_y_pred))) + K.maximum(-logit_y_pred, 0.)) return K.sum(loss) / K.sum(weight)
def ranking_loss(self, y_true, y_pred): #import pdb;pdb.set_trace() pos_pred = y_pred[:,0] neg_pred = y_pred[:,1] loss = 1 - K.sigmoid(pos_pred - neg_pred) #K.maximum(1.0 + neg_pred - pos_pred, 0.0) return K.mean(loss)# + 0 * y_true
def nrlu(x): std = K.mean(K.sigmoid(x)) eta = K.random_normal(shape=x.shape, std=std) y = K.maximum(x + eta, 0) return y
def contrastive_loss(y_true, y_pred): '''Contrastive loss from Hadsell-et-al.'06 http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf ''' margin = 1 return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square( K.maximum(margin - y_pred, 0)))
def ramp(y_true, y_pred): beta = 1.0 return K.mean(K.minimum(1., K.maximum(0., 1. - beta * y_true * y_pred)), axis=-1)
def triplet_loss(self, y_true, y_pred): y_pred = K.sigmoid(y_pred) p_plus = K.sum(y_true * y_pred, axis=1, keepdims=True) p_gaps = y_pred - p_plus + self.margin L = K.maximum(0, p_gaps) # return T.max(L, axis=1) return K.sum(L, axis=1)
def amplitude_to_decibel(x, amin=1e-10, dynamic_range=80.0): """[K] Convert (linear) amplitude to decibel (log10(x)). x: Keras tensor or variable. amin: minimum amplitude. amplitude smaller than `amin` is set to this. dynamic_range: dynamic_range in decibel """ log_spec = 10 * K.log(K.maximum(x, amin)) / np.log(10).astype(K.floatx()) log_spec = log_spec - K.max(log_spec) # [-?, 0] log_spec = K.maximum(log_spec, -1 * dynamic_range) # [-80, 0] return log_spec
def build_discriminator(self): model = Sequential() model.add(Conv2D(64, kernel_size=4, strides=2, input_shape=self.img_shape, padding="same")) model.add(LeakyReLU(alpha=0.2)) model.add(Dropout(0.25)) model.add(Conv2D(128, kernel_size=4, strides=2, padding="same")) model.add(LeakyReLU(alpha=0.2)) model.add(BatchNormalization(momentum=0.8)) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(1024)) model.add(LeakyReLU(alpha=0.2)) model.add(BatchNormalization(momentum=0.8)) model.summary() img = Input(shape=self.img_shape) features = model(img) validity = Dense(1, activation="sigmoid")(features) def linmax(x): return K.maximum(x, -16) def linmax_shape(input_shape): return input_shape c_model = Dense(128)(features) c_model = LeakyReLU(alpha=0.2)(c_model) c_model = BatchNormalization(momentum=0.8)(c_model) label = Dense(self.num_classes, activation="softmax")(c_model) mean = Dense(1, activation="linear")(c_model) log_stddev = Dense(1)(c_model) log_stddev = Lambda(linmax, output_shape=linmax_shape)(log_stddev) cont = concatenate([mean, log_stddev], axis=1) return Model(img, [validity, label, cont])
def build_model2(self): def euclidean_distance(vecs): x, y = vecs return K.sum(K.square(x - y), axis=1, keepdims=True) def euclidean_dist_output_shape(shapes): shape1, _ = shapes return shape1[0], 1 def triplet_loss(y_true, y_pred): # Use y_true as alpha mse0, mse1 = y_pred[:, 0], y_pred[:, 1] return K.maximum(0.0, mse0 - mse1 + y_true[:, 0]) # input image dimensions img_rows, img_cols, img_channel = 100, 100, 3 # number of convolutional filters to use nb_filters = 20 # size of pooling area for max pooling nb_pool = 2 # convolution kernel size nb_conv = 5 # build a vision model self.vision_model.add(Convolution2D(nb_filters, nb_conv, nb_conv, activation='relu', input_shape=(img_channel, img_rows, img_cols))) self.vision_model.add(Convolution2D(nb_filters, nb_conv, nb_conv, activation='relu')) self.vision_model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) self.vision_model.add(Dropout(0.25)) self.vision_model.add(Convolution2D(nb_filters, nb_conv, nb_conv, activation='relu')) self.vision_model.add(Convolution2D(nb_filters, nb_conv, nb_conv, activation='relu')) self.vision_model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) self.vision_model.add(Flatten()) self.vision_model.add(Dense(self.hash_len)) # TODO: tunable! img1 = Input(shape=(img_channel, img_rows, img_cols), name='X1') img2 = Input(shape=(img_channel, img_rows, img_cols), name='X2') img3 = Input(shape=(img_channel, img_rows, img_cols), name='X3') hash1, hash2 = self.vision_model(img1), self.vision_model(img2) hash3 = self.vision_model(img3) vid = Dense(self.nb_classes, activation='softmax', name='aux_output')(hash1) distance_layer = Lambda(euclidean_distance, output_shape=euclidean_dist_output_shape) dist12 = distance_layer([hash1, hash2]) dist13 = distance_layer([hash1, hash3]) merged_out = merge([dist12, dist13], mode='concat', name='main_output') self.model = Model(input=[img1, img2, img3], output=[merged_out, vid]) self.model.summary() print(self.model.output_shape) print('DeepID dim:', self.hash_len) self.model.compile(optimizer='adadelta', loss={'main_output': triplet_loss, 'aux_output': 'categorical_crossentropy'}, loss_weights={'main_output': 1., 'aux_output': self.aux_weight})
def hinge_rank_loss(word_vectors, image_vectors, TESTING=False): """ Custom hinge loss per (image, label) example - Page4. word_vectors is y_true image_vectors is y_pred """ slice_first = lambda x: x[0:1 , :] slice_but_first = lambda x: x[1:, :] # separate correct/wrong images correct_image = Lambda(slice_first, output_shape=(1, WORD_DIM))(image_vectors) wrong_images = Lambda(slice_but_first, output_shape=(INCORRECT_BATCH, WORD_DIM))(image_vectors) # separate correct/wrong words correct_word = Lambda(slice_first, output_shape=(1, WORD_DIM))(word_vectors) wrong_words = Lambda(slice_but_first, output_shape=(INCORRECT_BATCH, WORD_DIM))(word_vectors) # l2 norm l2 = lambda x: K.sqrt(K.sum(K.square(x), axis=1, keepdims=True)) l2norm = lambda x: x/l2(x) # tiling to replicate correct_word and correct_image correct_words = K.tile(correct_word, (INCORRECT_BATCH,1)) correct_images = K.tile(correct_image, (INCORRECT_BATCH,1)) # converting to unit vectors correct_words = l2norm(correct_words) wrong_words = l2norm(wrong_words) correct_images = l2norm(correct_images) wrong_images = l2norm(wrong_images) # correct_image VS incorrect_words | Note the singular/plurals # cost_images = MARGIN - K.sum(correct_images * correct_words, 1) + K.sum(correct_images * wrong_words, 1) # cost_images = K.maximum(cost_images, 0.0) # correct_word VS incorrect_images | Note the singular/plurals cost_words = MARGIN - K.sum(correct_words * correct_images, axis=1) + K.sum(correct_words * wrong_images, axis=1) cost_words = K.maximum(cost_words, 0.0) # currently cost_words and cost_images are vectors - need to convert to scalar # cost_images = K.sum(cost_images, axis=-1) cost_words = K.sum(cost_words, axis=-1) if TESTING: # ipdb.set_trace() assert K.eval(wrong_words).shape[0] == INCORRECT_BATCH assert K.eval(correct_words).shape[0] == INCORRECT_BATCH assert K.eval(wrong_images).shape[0] == INCORRECT_BATCH assert K.eval(correct_images).shape[0] == INCORRECT_BATCH assert K.eval(correct_words).shape==K.eval(correct_images).shape assert K.eval(wrong_words).shape==K.eval(wrong_images).shape assert K.eval(correct_words).shape==K.eval(wrong_images).shape # return cost_words + cost_images return cost_words/INCORRECT_BATCH