我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用keras.backend.tanh()。
def call(self, x, mask=None): mean = super(IntraAttention, self).call(x, mask) # x: (batch_size, input_length, input_dim) # mean: (batch_size, input_dim) ones = K.expand_dims(K.mean(K.ones_like(x), axis=(0, 2)), dim=0) # (1, input_length) # (batch_size, input_length, input_dim) tiled_mean = K.permute_dimensions(K.dot(K.expand_dims(mean), ones), (0, 2, 1)) if mask is not None: if K.ndim(mask) > K.ndim(x): # Assuming this is because of the bug in Bidirectional. Temporary fix follows. # TODO: Fix Bidirectional. mask = K.any(mask, axis=(-2, -1)) if K.ndim(mask) < K.ndim(x): mask = K.expand_dims(mask) x = switch(mask, x, K.zeros_like(x)) # (batch_size, input_length, proj_dim) projected_combination = K.tanh(K.dot(x, self.vector_projector) + K.dot(tiled_mean, self.mean_projector)) scores = K.dot(projected_combination, self.scorer) # (batch_size, input_length) weights = K.softmax(scores) # (batch_size, input_length) attended_x = K.sum(K.expand_dims(weights) * x, axis=1) # (batch_size, input_dim) return attended_x
def __init__(self, units, h, h_dim, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', #activation='tanh', inner_activation='hard_sigmoid', #W_regularizer=None, U_regularizer=None, b_regularizer=None, #dropout_W=0., dropout_U=0., **kwargs): self.units = units self.h = h[:,-1,:] self.h_dim = h_dim self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) #self.activation = activations.get(activation) #self.inner_activation = activations.get(inner_activation) #self.W_regularizer = regularizers.get(W_regularizer) #self.U_regularizer = regularizers.get(U_regularizer) #self.b_regularizer = regularizers.get(b_regularizer) #self.dropout_W = dropout_W #self.dropout_U = dropout_U #if self.dropout_W or self.dropout_U: # self.uses_learning_phase = True super(Attention, self).__init__(**kwargs)
def __init__(self, units, h, h_dim, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', #activation='tanh', inner_activation='hard_sigmoid', #W_regularizer=None, U_regularizer=None, b_regularizer=None, #dropout_W=0., dropout_U=0., **kwargs): self.units = units self.h = h self.h_dim = h_dim self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) #self.activation = activations.get(activation) #self.inner_activation = activations.get(inner_activation) #self.W_regularizer = regularizers.get(W_regularizer) #self.U_regularizer = regularizers.get(U_regularizer) #self.b_regularizer = regularizers.get(b_regularizer) #self.dropout_W = dropout_W #self.dropout_U = dropout_U #if self.dropout_W or self.dropout_U: # self.uses_learning_phase = True super(SimpleAttention, self).__init__(**kwargs)
def __init__(self, units, h, h_dim, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', #activation='tanh', inner_activation='hard_sigmoid', #W_regularizer=None, U_regularizer=None, b_regularizer=None, #dropout_W=0., dropout_U=0., **kwargs): self.units = units self.h = h[:,-1,:] self.h_dim = h_dim self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) #self.activation = activations.get(activation) #self.inner_activation = activations.get(inner_activation) #self.W_regularizer = regularizers.get(W_regularizer) #self.U_regularizer = regularizers.get(U_regularizer) #self.b_regularizer = regularizers.get(b_regularizer) #self.dropout_W = dropout_W #self.dropout_U = dropout_U #if self.dropout_W or self.dropout_U: # self.uses_learning_phase = True super(SSimpleAttention, self).__init__(**kwargs)
def step(self, inputs, states): h_tm1 = states[0] # previous memory #B_U = states[1] # dropout matrices for recurrent units #B_W = states[2] h_tm1a = K.dot(h_tm1, self.Wa) eij = K.dot(K.tanh(h_tm1a + K.dot(inputs[:, :self.h_dim], self.Ua)), self.Va) eijs = K.repeat_elements(eij, self.h_dim, axis=1) #alphaij = K.softmax(eijs) # batchsize * lenh h batchsize * lenh * ndim #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0]) #cisum = K.sum(ci, axis=1) cisum = eijs*inputs[:, :self.h_dim] #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape) zr = K.sigmoid(K.dot(inputs[:, self.h_dim:], self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr)) zi = zr[:, :self.units] ri = zr[:, self.units: 2 * self.units] si_ = K.tanh(K.dot(inputs[:, self.h_dim:], self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C)) si = (1-zi) * h_tm1 + zi * si_ return si, [si] #h_tm1, [h_tm1]
def call(self, x, mask=None): eij = dot_product(x, self.W) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ? to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = K.dot(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ? to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def call(self, x, mask=None): # x: [..., time_steps, features] # ut = [..., time_steps, attention_dims] ut = K.dot(x, self.kernel) if self.use_bias: ut = K.bias_add(ut, self.bias) ut = K.tanh(ut) if self.use_context: ut = ut * self.context_kernel # Collapse `attention_dims` to 1. This indicates the weight for each time_step. ut = K.sum(ut, axis=-1, keepdims=True) # Convert those weights into a distribution but along time axis. # i.e., sum of alphas along `time_steps` axis should be 1. self.at = _softmax(ut, dim=1) if mask is not None: self.at *= K.cast(K.expand_dims(mask, -1), K.floatx()) # Weighted sum along `time_steps` axis. return K.sum(x * self.at, axis=-2)
def call(self, x, mask=None): y = K.dot(x, self.att_W) if not self.activation: if K.backend() == 'theano': weights = K.theano.tensor.tensordot(self.att_v, y, axes=[0, 2]) elif K.backend() == 'tensorflow': weights = K.tensorflow.python.ops.math_ops.tensordot(self.att_v, y, axes=[0, 2]) elif self.activation == 'tanh': if K.backend() == 'theano': weights = K.theano.tensor.tensordot(self.att_v, K.tanh(y), axes=[0, 2]) elif K.backend() == 'tensorflow': weights = K.tensorflow.python.ops.math_ops.tensordot(self.att_v, K.tanh(y), axes=[0, 2]) weights = K.softmax(weights) out = x * K.permute_dimensions(K.repeat(weights, x.shape[2]), [0, 2, 1]) if self.op == 'attsum': out = out.sum(axis=1) elif self.op == 'attmean': out = out.sum(axis=1) / mask.sum(axis=1, keepdims=True) return K.cast(out, K.floatx())
def custom_experiment_parameters(): """ Here we define the experiment parameters. We are using use_default_values=True, which will initialize all the parameters with their default values. These parameters are then fixed for the duration of the experiment and won't evolve. That means that we need to manually specify which parametres we want to test, and the possible values, either intervals or lists of values. If we want to test all the parameters and possible values, we can set use_default_values to False. In that case, random values will be generated and tested during the experiment. We can redefine some parameters if we want to fix their values. Reference parameters and default values are defined in minos.model.parameters """ experiment_parameters = ExperimentParameters(use_default_values=True) experiment_parameters.layout_parameter('rows', 1) experiment_parameters.layout_parameter('blocks', int_param(1, 5)) experiment_parameters.layout_parameter('layers', int_param(1, 5)) experiment_parameters.layer_parameter('Dense.output_dim', int_param(10, 100)) experiment_parameters.layer_parameter('Dense.activation', string_param(['relu', 'tanh', 'custom_activation_1'])) experiment_parameters.layer_parameter('Dropout.p', float_param(0.1, 0.9)) return experiment_parameters
def setup_output(self): """ Setup output tensor """ coordinates = get_coordinates(self.output_shape, input_channels=self.input_channels, num_filters=self.num_filters) num_parameters = np.prod(self.output_shape) * self.num_filters * \ self.input_channels print (num_parameters) # self.z_r = K.repeat_elements(self.z, rep=num_parameters, axis=0) self.z_r = self.init((num_parameters, 4)) # coordinates = K.concatenate([self.z_r, coordinates], axis=1) output = K.tanh(K.dot(self.z_r, self.weights[0]) + self.biases[0]) for i in range(1, len(self.weights) - 1): output = K.tanh(K.dot(output, self.weights[i]) + self.biases[i]) output = K.sigmoid(K.dot(output, self.weights[-1]) + self.biases[-1]) self.output = K.reshape(output, (self.num_filters, self.input_channels, *self.output_shape))
def get_output(self, x): """ Generate filters for given input """ # Assuming 'th' ordering # Input shape (batch, channels, rows, columns) # Output shape (batch, filter_size ** 2, rows, columns) # Use input to generate filter # (batch, 15, rows, columns) output = K.relu(K.conv2d(x, self.kernel1, border_mode="same")) # (batch, rows, columns, 15) output = K.permute_dimensions(output, (0, 2, 3, 1)) # (batch, rows, columns, 20) # output = K.tanh(K.dot(output, self.w1) + self.b1) # (batch, rows, columns, fs**2) output = K.tanh(K.dot(output, self.w2) + self.b2) # (batch, fs**2, rows, columns) output = K.permute_dimensions(output, (0, 3, 1, 2)) return output
def __call__(self, model): if self.crop_right: model = Lambda(lambda x: x[:, :, :K.int_shape(x)[2]-1, :])(model) if self.v is not None: model = Merge(mode='sum')([model, self.v]) if self.h is not None: hV = Dense(output_dim=2*self.filters)(self.h) hV = Reshape((1, 1, 2*self.filters))(hV) model = Lambda(lambda x: x[0]+x[1])([model,hV]) model_f = Lambda(lambda x: x[:,:,:,:self.filters])(model) model_g = Lambda(lambda x: x[:,:,:,self.filters:])(model) model_f = Lambda(lambda x: K.tanh(x))(model_f) model_g = Lambda(lambda x: K.sigmoid(x))(model_g) res = Merge(mode='mul')([model_f, model_g]) return res
def call(self, inputs, mask=None): if type(inputs) is not list or len(inputs) <= 1: raise Exception('BilinearTensorLayer must be called on a list of tensors ' '(at least 2). Got: ' + str(inputs)) e1 = inputs[0] e2 = inputs[1] batch_size = K.shape(e1)[0] k = self.output_dim # print([e1,e2]) feed_forward_product = K.dot(K.concatenate([e1,e2]), self.V) # print(feed_forward_product) bilinear_tensor_products = [ K.sum((e2 * K.dot(e1, self.W[0])) + self.b, axis=1) ] # print(bilinear_tensor_products) for i in range(k)[1:]: btp = K.sum((e2 * K.dot(e1, self.W[i])) + self.b, axis=1) bilinear_tensor_products.append(btp) result = K.tanh(K.reshape(K.concatenate(bilinear_tensor_products, axis=0), (batch_size, k)) + feed_forward_product) # print(result) return result
def step(self, inputs, states): vP_t = inputs hP_tm1 = states[0] _ = states[1:3] # ignore internal dropout/masks vP, WP_v, WPP_v, v, W_g2 = states[3:8] vP_mask, = states[8:] WP_v_Dot = K.dot(vP, WP_v) WPP_v_Dot = K.dot(K.expand_dims(vP_t, axis=1), WPP_v) s_t_hat = K.tanh(WPP_v_Dot + WP_v_Dot) s_t = K.dot(s_t_hat, v) s_t = K.batch_flatten(s_t) a_t = softmax(s_t, mask=vP_mask, axis=1) c_t = K.batch_dot(a_t, vP, axes=[1, 1]) GRU_inputs = K.concatenate([vP_t, c_t]) g = K.sigmoid(K.dot(GRU_inputs, W_g2)) GRU_inputs = g * GRU_inputs hP_t, s = super(SelfAttnGRU, self).step(GRU_inputs, states) return hP_t, s
def step(self, inputs, states): # input ha_tm1 = states[0] # (B, 2H) _ = states[1:3] # ignore internal dropout/masks hP, WP_h, Wa_h, v = states[3:7] # (B, P, 2H) hP_mask, = states[7:8] WP_h_Dot = K.dot(hP, WP_h) # (B, P, H) Wa_h_Dot = K.dot(K.expand_dims(ha_tm1, axis=1), Wa_h) # (B, 1, H) s_t_hat = K.tanh(WP_h_Dot + Wa_h_Dot) # (B, P, H) s_t = K.dot(s_t_hat, v) # (B, P, 1) s_t = K.batch_flatten(s_t) # (B, P) a_t = softmax(s_t, mask=hP_mask, axis=1) # (B, P) c_t = K.batch_dot(hP, a_t, axes=[1, 1]) # (B, 2H) GRU_inputs = c_t ha_t, (ha_t_,) = super(PointerGRU, self).step(GRU_inputs, states) return a_t, [ha_t]
def call(self, inputs, mask=None): assert(isinstance(inputs, list) and len(inputs) == 5) uQ, WQ_u, WQ_v, v, VQ_r = inputs uQ_mask = mask[0] if mask is not None else None ones = K.ones_like(K.sum(uQ, axis=1, keepdims=True)) # (B, 1, 2H) s_hat = K.dot(uQ, WQ_u) s_hat += K.dot(ones, K.dot(WQ_v, VQ_r)) s_hat = K.tanh(s_hat) s = K.dot(s_hat, v) s = K.batch_flatten(s) a = softmax(s, mask=uQ_mask, axis=1) rQ = K.batch_dot(uQ, a, axes=[1, 1]) return rQ
def step(self, inputs, states): uP_t = inputs vP_tm1 = states[0] _ = states[1:3] # ignore internal dropout/masks uQ, WQ_u, WP_v, WP_u, v, W_g1 = states[3:9] uQ_mask, = states[9:10] WQ_u_Dot = K.dot(uQ, WQ_u) #WQ_u WP_v_Dot = K.dot(K.expand_dims(vP_tm1, axis=1), WP_v) #WP_v WP_u_Dot = K.dot(K.expand_dims(uP_t, axis=1), WP_u) # WP_u s_t_hat = K.tanh(WQ_u_Dot + WP_v_Dot + WP_u_Dot) s_t = K.dot(s_t_hat, v) # v s_t = K.batch_flatten(s_t) a_t = softmax(s_t, mask=uQ_mask, axis=1) c_t = K.batch_dot(a_t, uQ, axes=[1, 1]) GRU_inputs = K.concatenate([uP_t, c_t]) g = K.sigmoid(K.dot(GRU_inputs, W_g1)) # W_g1 GRU_inputs = g * GRU_inputs vP_t, s = super(QuestionAttnGRU, self).step(GRU_inputs, states) return vP_t, s
def call(self, x, mask=None): #print '\nhi in attention' #print x._keras_shape uit = K.dot(x, self.W) #print '\nuit' #print uit._keras_shape uit += self.bw uit = K.tanh(uit) ait = K.dot(uit, self.uw) a = K.exp(ait) # apply mask after the exp. will be re-normalized next #print mask if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) #print "in att ", K.shape(a) weighted_input = x * a #print weighted_input ssi = K.sum(weighted_input, axis=1) #print "type ", type(ssi) #print "in att si ", theano.tensor.shape(ssi) #1111print "hello" return [a, ssi]
def __init__(self, op='attsum', activation='tanh', init_stdev=0.01, **kwargs): self.supports_masking = True assert op in {'attsum', 'attmean'} assert activation in {None, 'tanh'} self.op = op self.activation = activation self.init_stdev = init_stdev super(Attention, self).__init__(**kwargs)
def custom_activation(x): return 1 + backend.tanh(x)
def call(self, x, mask=None): eij = K.tanh(K.dot(x, self.W)) ai = K.exp(eij) weights = ai/K.sum(ai, axis=1).dimshuffle(0,'x') weighted_input = x*weights.dimshuffle(0,1,'x') return weighted_input.sum(axis=1)
def setup_output(self): """ Setup output tensor """ coordinates = get_coordinates_2D(self.output_shape, scale=self.scale) output = K.sin(K.dot(coordinates, self.weights[0]) + self.biases[0]) for i in range(1, len(self.weights) - 1): output = K.tanh(K.dot(output, self.weights[i]) + self.biases[i]) output = K.sigmoid(K.dot(output, self.weights[-1]) + self.biases[-1]) self.output = K.reshape(output, self.output_shape)
def get_output(self, z): """ Return output using the given z z has shape (batch_size, z_dim) """ assert len(z.shape) == 2 assert self.z_dim == z.shape[1] total_values = np.prod(self.output_shape) batch_total = total_values * z.shape[0] z_rep = K.repeat_elements(K.expand_dims(z, 1), total_values, 1) coords_rep = K.repeat_elements( K.expand_dims(self.coordinates, 0), z.shape[0], 0) coords_rep = K.reshape(coords_rep, (batch_total, self.coordinates.shape[1])) z_rep = K.reshape(z_rep, (batch_total, z.shape[1])) # Add z and coords to first layer output = K.sin(K.dot(coords_rep, self.weights[0]) + self.biases[0] + K.dot(z_rep, self.weights[-1])) for i in range(1, len(self.layer_sizes)): output = K.tanh(K.dot(output, self.weights[i]) + self.biases[i]) # Using -2 for weights since -1 is z vector weight output = K.sigmoid(K.dot(output, self.weights[-2]) + self.biases[-1]) return K.reshape(output, (z.shape[0], *self.output_shape))
def get_output(self, x): """ Generate filters for given input """ # Assuming 'th' ordering # Input shape (batch, channels, rows, columns) # Output shape (batch, filter_size ** 2, rows, columns) # Use input to generate filter # (batch, 10, rows, columns) output = K.relu(K.conv2d(x, self.kernel1, border_mode="same")) # (batch, 15, rows, columns) output = K.concatenate([output, self.coordinates], axis=1) # (batch, rows, columns, 15) output = K.permute_dimensions(output, (0, 2, 3, 1)) # (batch, rows, columns, 20) # output = K.tanh(K.dot(output, self.w1) + self.b1) # (batch, rows, columns, fs**2) output = K.tanh(K.dot(output, self.w2) + self.b2) # (batch, fs**2, rows, columns) output = K.permute_dimensions(output, (0, 3, 1, 2)) return output
def __call__(self, p): return K.tanh(p) * self.c
def call(self, x, mask=None): hidden = K.tanh(K.dot(x, self.Wh) + self.bh) output = K.dot(hidden,self.Wo) + self.bo return output
def call(self, x, mask=None): x_abs = K.sqrt(self.epsilon + x**2 + x[:,self.swap_re_im]**2) if self.flag_clip: x_abs = K.clip(x_abs,self.clip_min,self.clip_max) rescale = K.tanh(x_abs)/(x_abs + self.epsilon) return rescale * x
def __call__(self, xW, layer_idx): '''calculate gated activation maps given input maps ''' if self.stack_name == 'vertical': stack_tag = 'v' elif self.stack_name == 'horizontal': stack_tag = 'h' if self.crop_right: xW = Lambda(self._crop_right, name='h_crop_right_'+str(layer_idx))(xW) if self.v_map is not None: xW = merge([xW, self.v_map], mode='sum', name='h_merge_v_'+str(layer_idx)) if self.h is not None: hV = Dense(output_dim=2*self.nb_filters, name=stack_tag+'_dense_latent_'+str(layer_idx))(self.h) hV = Reshape((1, 1, 2*self.nb_filters), name=stack_tag+'_reshape_latent_'+str(layer_idx))(hV) #xW = merge([xW, hV], mode=lambda x: x[0]+x[1]) xW = Lambda(lambda x: x[0]+x[1], name=stack_tag+'_merge_latent_'+str(layer_idx))([xW,hV]) xW_f = Lambda(lambda x: x[:,:,:,:self.nb_filters], name=stack_tag+'_Wf_'+str(layer_idx))(xW) xW_g = Lambda(lambda x: x[:,:,:,self.nb_filters:], name=stack_tag+'_Wg_'+str(layer_idx))(xW) xW_f = Lambda(lambda x: K.tanh(x), name=stack_tag+'_tanh_'+str(layer_idx))(xW_f) xW_g = Lambda(lambda x: K.sigmoid(x), name=stack_tag+'_sigmoid_'+str(layer_idx))(xW_g) res = merge([xW_f, xW_g], mode='mul', name=stack_tag+'_merge_gate_'+str(layer_idx)) #print(type(res), K.int_shape(res), hasattr(res, '_keras_history')) return res
def call(self, x, mask=None): x_reshaped = tf.reshape(x, [K.shape(x)[0]*K.shape(x)[1], K.shape(x)[-1]]) ui = K.tanh(K.dot(x_reshaped, self.W) + self.bw) intermed = tf.reduce_sum(tf.multiply(self.uw, ui), axis=1) weights = tf.nn.softmax(tf.reshape(intermed, [K.shape(x)[0], K.shape(x)[1]]), dim=-1) weights = tf.expand_dims(weights, axis=-1) weighted_input = x*weights return K.sum(weighted_input, axis=1)
def call(self, x, mask=None): u_i = K.tanh(K.dot(x, self.pre_weight) + self.pre_bias) ai = K.exp(K.dot(u_i, self.u_w)) ai_sum = K.expand_dims(K.sum(ai, axis=1), axis=2) weights = ai / ai_sum att_output = x * weights if self.to_compose: att_output = K.sum(att_output, axis=1) return att_output
def call(self, x, mask=None): u_i = K.tanh(x) ai = K.exp(K.dot(u_i, self.u_w)) ai_sum = K.expand_dims(K.sum(ai, axis=1), axis=2) weights = ai / ai_sum weighted_input = x * weights return weighted_input
def build(self): subject = self.subject relation = self.relation object_ = self.get_object() # add embedding layers weights = self.model_params.get('initial_embed_weights', None) weights = weights if weights is None else [weights] embedding = Embedding(input_dim=self.config['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=weights, mask_zero=True) subject_embedding = embedding(subject) relation_embedding = embedding(relation) object_embedding = embedding(object_) # dropout dropout = Dropout(0.5) subject_dropout = dropout(subject_embedding) relation_dropout = dropout(relation_embedding) object_dropout = dropout(object_embedding) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) subject_maxpool = maxpool(subject_dropout) relation_maxpool = maxpool(relation_dropout) object_maxpool = maxpool(object_dropout) # activation activation = Activation('tanh') subject_output = activation(subject_maxpool) relation_output = activation(relation_maxpool) object_output = activation(object_maxpool) return subject_output, relation_output, object_output
def build(self): subject = self.get_subject() relation = self.relation object_ = self.object_good # add embedding layers weights = self.model_params.get('initial_embed_weights', None) weights = weights if weights is None else [weights] embedding = Embedding(input_dim=self.config['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=weights, mask_zero=True) subject_embedding = embedding(subject) relation_embedding = embedding(relation) object_embedding = embedding(object_) # dropout dropout = Dropout(0.5) subject_dropout = dropout(subject_embedding) relation_dropout = dropout(relation_embedding) object_dropout = dropout(object_embedding) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) subject_maxpool = maxpool(subject_dropout) relation_maxpool = maxpool(relation_dropout) object_maxpool = maxpool(object_dropout) # activation activation = Activation('tanh') subject_output = activation(subject_maxpool) relation_output = activation(relation_maxpool) object_output = activation(object_maxpool) return subject_output, relation_output, object_output # unused !!!!!!
def call(self, x, mask=None): y = K.dot(x, self.att_W) if not self.activation: weights = K.theano.tensor.tensordot(self.att_v, y, axes=[0, 2]) elif self.activation == 'tanh': weights = K.theano.tensor.tensordot(self.att_v, K.tanh(y), axes=[0, 2]) weights = K.softmax(weights) out = x * K.permute_dimensions(K.repeat(weights, x.shape[2]), [0, 2, 1]) if self.op == 'attsum': out = out.sum(axis=1) elif self.op == 'attmean': out = out.sum(axis=1) / mask.sum(axis=1, keepdims=True) return K.cast(out, K.floatx())
def call(self, x, mask=None): # eij = K.dot(x, self.W) TF backend doesn't support it # features_dim = self.W.shape[0] # step_dim = x._keras_shape[1] features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ? to the sum. a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def call(self, x, mask=None): # eij = K.dot(x, self.W) TF backend doesn't support it # features_dim = self.W.shape[0] # step_dim = x._keras_shape[1] features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a #print weigthted_input.shape return K.sum(weighted_input, axis=1)