我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用keras.backend.dot()。
def switch(condition, then_tensor, else_tensor): """ Keras' implementation of switch for tensorflow uses tf.switch which accepts only scalar conditions. It should use tf.select instead. """ if K.backend() == 'tensorflow': import tensorflow as tf condition_shape = condition.get_shape() input_shape = then_tensor.get_shape() if condition_shape[-1] != input_shape[-1] and condition_shape[-1] == 1: # This means the last dim is an embedding dim. Keras does not mask this dimension. But tf wants # the condition and the then and else tensors to be the same shape. condition = K.dot(tf.cast(condition, tf.float32), tf.ones((1, input_shape[-1]))) return tf.select(tf.cast(condition, dtype=tf.bool), then_tensor, else_tensor) else: import theano.tensor as T return T.switch(condition, then_tensor, else_tensor)
def call(self, x, mask=None): mean = super(IntraAttention, self).call(x, mask) # x: (batch_size, input_length, input_dim) # mean: (batch_size, input_dim) ones = K.expand_dims(K.mean(K.ones_like(x), axis=(0, 2)), dim=0) # (1, input_length) # (batch_size, input_length, input_dim) tiled_mean = K.permute_dimensions(K.dot(K.expand_dims(mean), ones), (0, 2, 1)) if mask is not None: if K.ndim(mask) > K.ndim(x): # Assuming this is because of the bug in Bidirectional. Temporary fix follows. # TODO: Fix Bidirectional. mask = K.any(mask, axis=(-2, -1)) if K.ndim(mask) < K.ndim(x): mask = K.expand_dims(mask) x = switch(mask, x, K.zeros_like(x)) # (batch_size, input_length, proj_dim) projected_combination = K.tanh(K.dot(x, self.vector_projector) + K.dot(tiled_mean, self.mean_projector)) scores = K.dot(projected_combination, self.scorer) # (batch_size, input_length) weights = K.softmax(scores) # (batch_size, input_length) attended_x = K.sum(K.expand_dims(weights) * x, axis=1) # (batch_size, input_dim) return attended_x
def categorical_crossentropy_3d(y_true, y_predicted): """ Computes categorical cross-entropy loss for a softmax distribution in a hot-encoded 3D array with shape (num_samples, num_classes, dim1, dim2, dim3) Parameters ---------- y_true : keras.placeholder [batches, dim0,dim1,dim2] Placeholder for data holding the ground-truth labels encoded in a one-hot representation y_predicted : keras.placeholder [batches,channels,dim0,dim1,dim2] Placeholder for data holding the softmax distribution over classes Returns ------- scalar Categorical cross-entropy loss value """ y_true_flatten = K.flatten(y_true) y_pred_flatten = K.flatten(y_predicted) y_pred_flatten_log = -K.log(y_pred_flatten + K.epsilon()) num_total_elements = K.sum(y_true_flatten) # cross_entropy = K.dot(y_true_flatten, K.transpose(y_pred_flatten_log)) cross_entropy = tf.reduce_sum(tf.multiply(y_true_flatten, y_pred_flatten_log)) mean_cross_entropy = cross_entropy / (num_total_elements + K.epsilon()) return mean_cross_entropy
def preprocess_input(self, inputs, training=None): #if self.consume_less == 'cpu': # input_shape = K.int_shape(x) # input_dim = input_shape[2] # timesteps = input_shape[1] # x_z = time_distributed_dense(x, self.W_z, self.b_z, self.dropout_W, # input_dim, self.units, timesteps) # x_r = time_distributed_dense(x, self.W_r, self.b_r, self.dropout_W, # input_dim, self.units, timesteps) # x_h = time_distributed_dense(x, self.W_h, self.b_h, self.dropout_W, # input_dim, self.units, timesteps) # return K.concatenate([x_z, x_r, x_h], axis=2) #else: # return x self.ha = K.dot(self.h, self.Ua) #time_distributed_dense(self.h, self.Ua) return inputs
def step(self, inputs, states): h_tm1 = states[0] # previous memory #B_U = states[1] # dropout matrices for recurrent units #B_W = states[2] h_tm1a = K.dot(h_tm1, self.Wa) eij = K.dot(K.tanh(h_tm1a + K.dot(inputs[:, :self.h_dim], self.Ua)), self.Va) eijs = K.repeat_elements(eij, self.h_dim, axis=1) #alphaij = K.softmax(eijs) # batchsize * lenh h batchsize * lenh * ndim #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0]) #cisum = K.sum(ci, axis=1) cisum = eijs*inputs[:, :self.h_dim] #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape) zr = K.sigmoid(K.dot(inputs[:, self.h_dim:], self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr)) zi = zr[:, :self.units] ri = zr[:, self.units: 2 * self.units] si_ = K.tanh(K.dot(inputs[:, self.h_dim:], self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C)) si = (1-zi) * h_tm1 + zi * si_ return si, [si] #h_tm1, [h_tm1]
def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': # todo: check that this is correct return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = K.dot(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ? to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def step(self, x, states): h_tm1 = states[0] c_tm1 = states[1] B_U = states[2] B_W = states[3] z = LN(K.dot(x * B_W[0], self.kernel), self.gamma_1, self.beta_1) + \ LN(K.dot(h_tm1 * B_U[0], self.recurrent_kernel), self.gamma_2, self.beta_2) if self.use_bias: z = K.bias_add(z, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units: 2 * self.units] z2 = z[:, 2 * self.units: 3 * self.units] z3 = z[:, 3 * self.units:] i = self.recurrent_activation(z0) f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) h = o * self.activation(LN(c, self.gamma_3, self.beta_3)) return h, [h, c]
def call(self, inputs): if self.data_format == 'channels_first': sq = K.mean(inputs, [2, 3]) else: sq = K.mean(inputs, [1, 2]) ex = K.dot(sq, self.kernel1) if self.use_bias: ex = K.bias_add(ex, self.bias1) ex= K.relu(ex) ex = K.dot(ex, self.kernel2) if self.use_bias: ex = K.bias_add(ex, self.bias2) ex= K.sigmoid(ex) if self.data_format == 'channels_first': ex = K.expand_dims(ex, -1) ex = K.expand_dims(ex, -1) else: ex = K.expand_dims(ex, 1) ex = K.expand_dims(ex, 1) return inputs * ex
def vae_loss(self, x, x_decoded_mean): xent_loss = K.sum(K.binary_crossentropy(x_decoded_mean, x), axis=-1) kl_loss = - 0.5 * K.sum(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis=-1) return xent_loss + kl_loss # def weighted_vae_loss(self, feature_weights): # def loss(y_true, y_pred): # try: # x = K.binary_crossentropy(y_pred, y_true) # y = tf.Variable(feature_weights.astype('float32')) # # y2 = y_true / K.sum(y_true) # # import pdb;pdb.set_trace() # xent_loss = K.dot(x, y) # kl_loss = - 0.5 * K.sum(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis=-1) # except Exception as e: # print e # import pdb;pdb.set_trace() # return xent_loss + kl_loss # return loss
def call(self, x, mask=None): # x: [..., time_steps, features] # ut = [..., time_steps, attention_dims] ut = K.dot(x, self.kernel) if self.use_bias: ut = K.bias_add(ut, self.bias) ut = K.tanh(ut) if self.use_context: ut = ut * self.context_kernel # Collapse `attention_dims` to 1. This indicates the weight for each time_step. ut = K.sum(ut, axis=-1, keepdims=True) # Convert those weights into a distribution but along time axis. # i.e., sum of alphas along `time_steps` axis should be 1. self.at = _softmax(ut, dim=1) if mask is not None: self.at *= K.cast(K.expand_dims(mask, -1), K.floatx()) # Weighted sum along `time_steps` axis. return K.sum(x * self.at, axis=-2)
def call(self, x, mask=None): y = K.dot(x, self.att_W) if not self.activation: if K.backend() == 'theano': weights = K.theano.tensor.tensordot(self.att_v, y, axes=[0, 2]) elif K.backend() == 'tensorflow': weights = K.tensorflow.python.ops.math_ops.tensordot(self.att_v, y, axes=[0, 2]) elif self.activation == 'tanh': if K.backend() == 'theano': weights = K.theano.tensor.tensordot(self.att_v, K.tanh(y), axes=[0, 2]) elif K.backend() == 'tensorflow': weights = K.tensorflow.python.ops.math_ops.tensordot(self.att_v, K.tanh(y), axes=[0, 2]) weights = K.softmax(weights) out = x * K.permute_dimensions(K.repeat(weights, x.shape[2]), [0, 2, 1]) if self.op == 'attsum': out = out.sum(axis=1) elif self.op == 'attmean': out = out.sum(axis=1) / mask.sum(axis=1, keepdims=True) return K.cast(out, K.floatx())
def get_initial_states(self, x): init_state_h = K.zeros_like(x) init_state_h = K.sum(init_state_h, axis = 1) reducer_s = K.zeros((self.input_dim, self.hidden_dim)) reducer_f = K.zeros((self.hidden_dim, self.freq_dim)) reducer_p = K.zeros((self.hidden_dim, self.output_dim)) init_state_h = K.dot(init_state_h, reducer_s) init_state_p = K.dot(init_state_h, reducer_p) init_state = K.zeros_like(init_state_h) init_freq = K.dot(init_state_h, reducer_f) init_state = K.reshape(init_state, (-1, self.hidden_dim, 1)) init_freq = K.reshape(init_freq, (-1, 1, self.freq_dim)) init_state_S_re = init_state * init_freq init_state_S_im = init_state * init_freq init_state_time = K.cast_to_floatx(0.) initial_states = [init_state_p, init_state_h, init_state_S_re, init_state_S_im, init_state_time] return initial_states
def reading(memory_t, weight_t): """ Reading memory. :param memory_t: the $N \times M$ memory matrix at time $t$, where $N$ is the number of memory locations, and $M$ is the vector size at each location. :param weight_t: $w_t$ is a vector of weightings over the $N$ locations emitted by a reading head at time $t$. Since all weightings are normalized, the $N$ elements $w_t(i)$ of $\textbf{w}_t$ obey the following constraints: $$\sum_{i=1}^{N} w_t(i) = 1, 0 \le w_t(i) \le 1,\forall i$$ The length $M$ read vector $r_t$ returned by the head is defined as a convex combination of the row-vectors $M_t(i)$ in memory: $$\textbf{r}_t \leftarrow \sum_{i=1}^{N}w_t(i)\textbf{M}_t(i)$$ :return: the content reading from memory. """ r_t = K.dot(memory_t, weight_t) return r_t
def setup_output(self, x): """ Setup output tensor """ x_max = K.max(x, axis=1) x_max = K.flatten(x_max) z = K.dot(x_max, self.w_proj_to_z) #+ self.b_proj_to_z hidden = K.dot(z, self.weights[0]) + self.biases[0] hidden = K.reshape(hidden, shape=(self.input_channels, self.hidden_dim)) output = K.dot(hidden, self.weights[1]) + self.biases[1] self.output = K.reshape(output, (self.num_filters, self.input_channels, *self.output_shape)) return self.output
def setup_output(self): """ Setup output tensor """ coordinates = get_coordinates(self.output_shape, input_channels=self.input_channels, num_filters=self.num_filters) num_parameters = np.prod(self.output_shape) * self.num_filters * \ self.input_channels print (num_parameters) # self.z_r = K.repeat_elements(self.z, rep=num_parameters, axis=0) self.z_r = self.init((num_parameters, 4)) # coordinates = K.concatenate([self.z_r, coordinates], axis=1) output = K.tanh(K.dot(self.z_r, self.weights[0]) + self.biases[0]) for i in range(1, len(self.weights) - 1): output = K.tanh(K.dot(output, self.weights[i]) + self.biases[i]) output = K.sigmoid(K.dot(output, self.weights[-1]) + self.biases[-1]) self.output = K.reshape(output, (self.num_filters, self.input_channels, *self.output_shape))
def get_output(self, x): """ Generate filters for given input """ # Assuming 'th' ordering # Input shape (batch, channels, rows, columns) # Output shape (batch, filter_size ** 2, rows, columns) # Use input to generate filter # (batch, 15, rows, columns) output = K.relu(K.conv2d(x, self.kernel1, border_mode="same")) # (batch, rows, columns, 15) output = K.permute_dimensions(output, (0, 2, 3, 1)) # (batch, rows, columns, 20) # output = K.tanh(K.dot(output, self.w1) + self.b1) # (batch, rows, columns, fs**2) output = K.tanh(K.dot(output, self.w2) + self.b2) # (batch, fs**2, rows, columns) output = K.permute_dimensions(output, (0, 3, 1, 2)) return output
def call(self, x): s, s_hat = x # Compute the variables defined in the class comment S2 = K.sum(s) S1 = s_hat[0, 1] N = s_hat[0, 0] # Compute the unbiased weights a2 = (S1 + S2) / N / s # Compute the biased weights and the scaling factor t a1 = K.pow(a2, self.k) sT = K.transpose(s) t = K.dot(sT, a2) / K.dot(sT, a1) return K.stop_gradient([a1 * t])[0]
def call(self, x, mask=None): # computes a probability distribution over the timesteps # uses 'max trick' for numerical stability # reshape is done to avoid issue with Tensorflow # and 1-dimensional weights logits = K.dot(x, self.W) x_shape = K.shape(x) logits = K.reshape(logits, (x_shape[0], x_shape[1])) ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) ai = ai * mask att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) weighted_input = x * K.expand_dims(att_weights) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, att_weights] return result
def _step(self, x_tm1, h_tm1, c_tm1, u_i, u_f, u_o, u_c, w_i, w_f, w_c, w_o, w_x, b_i, b_f, b_c, b_o, b_x): xi_t = K.dot(x_tm1, w_i) + b_i xf_t = K.dot(x_tm1, w_f) + b_f xc_t = K.dot(x_tm1, w_c) + b_c xo_t = K.dot(x_tm1, w_o) + b_o i_t = self.inner_activation(xi_t + K.dot(h_tm1, u_i)) f_t = self.inner_activation(xf_t + K.dot(h_tm1, u_f)) c_t = f_t * c_tm1 + i_t * self.activation(xc_t + K.dot(h_tm1, u_c)) o_t = self.inner_activation(xo_t + K.dot(h_tm1, u_o)) h_t = o_t * self.activation(c_t) x_t = K.dot(h_t, w_x) + b_x return x_t, h_t, c_t
def sstep(self, x_tm1, h_tm1, c_tm1, v, u_i, u_f, u_o, u_c, w_i, w_f, w_c, w_o, w_x, v_i, v_f, v_c, v_o, b_i, b_f, b_c, b_o, b_x): #Inputs = output from previous time step, vector from encoder xi_t = K.dot(x_tm1, w_i) + K.dot(v, v_i) + b_i xf_t = K.dot(x_tm1, w_f) + K.dot(v, v_f) + b_f xc_t = K.dot(x_tm1, w_c) + K.dot(v, v_c) + b_c xo_t = K.dot(x_tm1, w_o) + K.dot(v, v_o) + b_o i_t = self.inner_activation(xi_t + K.dot(h_tm1, u_i)) f_t = self.inner_activation(xf_t + K.dot(h_tm1, u_f)) c_t = f_t * c_tm1 + i_t * self.activation(xc_t + K.dot(h_tm1, u_c)) o_t = self.inner_activation(xo_t + K.dot(h_tm1, u_o)) h_t = o_t * self.activation(c_t) x_t = K.dot(h_t, w_x) + b_x return x_t, h_t, c_t
def get_output(self, train=False): X = self.get_input(train) def out_step(X_i, states): def in_step(x, in_states): output = K.dot(x, self.W) + self.b return output, [] _, in_outputs, _ = K.rnn(in_step, X_i, initial_states=[], mask=None) return in_outputs, [] _, outputs, _ = K.rnn(out_step, X, initial_states=[], mask=None) outputs = self.activation(outputs) return outputs
def times_reflection(input, n_hidden, reflection): input_re = input[:, :n_hidden] input_im = input[:, n_hidden:] reflect_re = reflection[:n_hidden] reflect_im = reflection[n_hidden:] vstarv = (reflection**2).sum() input_re_reflect_re = K.dot(input_re, reflect_re) input_re_reflect_im = K.dot(input_re, reflect_im) input_im_reflect_re = K.dot(input_im, reflect_re) input_im_reflect_im = K.dot(input_im, reflect_im) a = Kouter(input_re_reflect_re - input_im_reflect_im, reflect_re) b = Kouter(input_re_reflect_im + input_im_reflect_re, reflect_im) c = Kouter(input_re_reflect_re - input_im_reflect_im, reflect_im) d = Kouter(input_re_reflect_im + input_im_reflect_re, reflect_re) output = input output = T.inc_subtensor(output[:, :n_hidden], - 2. / vstarv * (a + b)) output = T.inc_subtensor(output[:, n_hidden:], - 2. / vstarv * (d - c)) return output
def step(self, x, states): prev_output = states[0] B_U = states[1] B_W = states[2] if self.consume_less == 'cpu': h = x else: h = K.dot(x * B_W, self.W) if (self.activation=='soft_thresh'): preactivation = h + K.dot(prev_output * B_U, self.Uaug) preactivation_abs = K.sqrt(self.epsilon + preactivation**2 + preactivation[:,self.swap_re_im]**2) rescale = K.maximum(preactivation_abs+self.baug,0.)/(preactivation_abs + self.epsilon) output = preactivation*rescale else: print "Activation",self.activation,"not implemented" raise NotImplementedError return output, [output]
def x2p(X): tol = 1e-5 n = X.shape[0] logU = np.log(perplexity) sum_X = np.sum(np.square(X), axis=1) D = sum_X + (sum_X.reshape([-1, 1]) - 2 * np.dot(X, X.T)) idx = (1 - np.eye(n)).astype(bool) D = D[idx].reshape([n, -1]) def generator(): for i in xrange(n): yield i, D[i], tol, logU pool = mp.Pool(n_jobs) result = pool.map(x2p_job, generator()) P = np.zeros([n, n]) for i, thisP in result: P[i, idx[i]] = thisP return P
def gram_matrix(x): """ Computes the outer-product of the input tensor x. Input ----- - x: input tensor of shape (C x H x W) Returns ------- - x . x^T Note that this can be computed efficiently if x is reshaped as a tensor of shape (C x H*W). """ # assert K.ndim(x) == 3 if K.image_dim_ordering() == 'th': features = K.batch_flatten(x) else: features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) return K.dot(features, K.transpose(features))
def step(self, x, states): h_tm1 = states[0] # previous memory c_c = states[1] c_z = states[2] c_r = states[3] matrix_x = K.dot(x, self.W) + self.b matrix_inner = K.dot(h_tm1, self.U[:, :2 * self.output_dim]) x_z = matrix_x[:, :self.output_dim] x_r = matrix_x[:, self.output_dim: 2 * self.output_dim] inner_z = matrix_inner[:, :self.output_dim] inner_r = matrix_inner[:, self.output_dim: 2 * self.output_dim] z = self.inner_activation(x_z + inner_z + c_z) r = self.inner_activation(x_r + inner_r + c_r) x_h = matrix_x[:, 2 * self.output_dim:] inner_h = r * (K.dot(h_tm1, self.U[:, 2 * self.output_dim:]) + c_c) hh = self.activation(x_h + inner_h) h = z * h_tm1 + (1 - z) * hh return h, [h]
def step(self, x, states): h_st, B_U, B_W = states if self.consume_less == 'cpu': x_t = x[:, :self.output_dim] x_h = x[:, self.output_dim: 2 * self.output_dim] elif self.consume_less == 'mem': x_t = K.dot(x * B_W[0], self.W_t) + self.b_t x_h = K.dot(x * B_W[1], self.W_h) + self.b_h else: raise Exception('Unknown `consume_less` mode.') for l in xrange(self.L): if l == 0: t = self.inner_activation(x_t + K.dot(h_st * B_U[0], self.U_ts[l]) + self.b_ts[l]) h = self.activation(x_h + K.dot(h_st * B_U[1], self.U_hs[l]) + self.b_hs[l]) else: t = self.inner_activation(K.dot(h_st * B_U[0], self.U_ts[l]) + self.b_ts[l]) h = self.activation(K.dot(h_st * B_U[1], self.U_hs[l]) + self.b_hs[l]) h_st = h * t + h_st * (1 - t) return h_st, [h_st]
def step(self, x, states): hidden_input = states[0] B_U = states[1] B_W = states[2] # Make last hidden input the residual of the prediction and # the last available feature. if self.inner_input_dim > 0: update = K.expand_dims(hidden_input[:, -1] - x[:, -1]) hidden_input = K.concatenate((hidden_input[:, :-1], update)) if self.ma_only: h = self.b else: h = K.dot(x * B_W, self.W) + self.b if self.inner_input_dim > 0: output = self.activation(h + K.dot(hidden_input * B_U, self.U)) new_state = K.concatenate((hidden_input[:, 1:], output)) return output, [new_state] else: output = self.activation(h) return output, [output]
def step(self, x, states): hidden_input = states[0] B_U = states[1] # Dropout mask for U B_W = states[2] # Dropout mask for W # Make last hidden input the residual of the prediction and # the last available feature. if self.inner_input_dim > 0: update = K.expand_dims(hidden_input[:, -1] - x[:, -1]) hidden_input = K.concatenate((hidden_input[:, :-1], update)) if self.ma_only: h = self.b else: h = K.dot(x * B_W, self.W) + self.b if self.inner_input_dim > 0: output = self.activation(h + K.dot(hidden_input * B_U, self.U)) new_state = K.concatenate((hidden_input[:, 1:], output)) return output, [new_state] else: output = self.activation(h) return output, [output]
def data_generator(H, out_dimZ, out_dimX, in_dim, p, batch_size=512, size=None, normcenterstab=False, normcentererr=False): c = 0 q = (1-p)/3 while True: flips = np.empty((batch_size, out_dimZ+out_dimX), dtype=int) # TODO dtype? byte? for i in range(batch_size): flips[i,:] = nonzeroflips(q, out_dimZ, out_dimX) stabs = np.dot(flips,H.T)%2 if normcenterstab: stabs = do_normcenterstab(stabs, p) if normcentererr: flips = do_normcentererr(flips, p) yield (stabs, flips) c += 1 if size and c==size: raise StopIteration
def test_sparse_dot(self): x_d = np.array([0, 7, 2, 3], dtype=np.float32) x_r = np.array([0, 2, 2, 3], dtype=np.int64) x_c = np.array([4, 3, 2, 3], dtype=np.int64) x_sparse = sparse.csr_matrix((x_d, (x_r, x_c)), shape=(4, 5)) x_dense = x_sparse.toarray() W = np.random.random((5, 4)) backends = [KTF] if KTH.th_sparse_module: # Theano has some dependency issues for sparse backends.append(KTH) for K in backends: t_W = K.variable(W) k_s = K.eval(K.dot(K.variable(x_sparse), t_W)) k_d = K.eval(K.dot(K.variable(x_dense), t_W)) assert k_s.shape == k_d.shape assert_allclose(k_s, k_d, atol=1e-05)
def call(self, x, mask=None): def get_node_w(node): return self.W[self.node_indices[node], :, :] def get_node_b(node): return self.b[self.node_indices[node], :] def compute_output(input, node=self.root_node): if not hasattr(node, 'left'): return zeros((K.shape(input)[0],)) + self.node_indices[node] else: node_output = K.dot(x, get_node_w(node)) if self.bias: node_output += get_node_b(node) left_prob = node_output[:, 0] right_prob = 1 - node_output[:, 0] left_node_output = compute_output(input, node.left) right_node_output = compute_output(input, node.right) return K.switch(left_prob > right_prob, left_node_output, right_node_output) return K.cast(compute_output(x), 'int32')
def sample_h_given_x(self, x): """ Draw sample from p(h|x). For Bernoulli RBM the conditional probability distribution can be derived to be p(h_j=1|x) = sigmoid(x^T W[:,j] + bh_j). """ h_pre = K.dot(x, self.W) + self.bh # pre-sigmoid (used in cross-entropy error calculation for better numerical stability) #h_sigm = K.sigmoid(h_pre) # mean of Bernoulli distribution ('p', prob. of variable taking value 1), sometimes called mean-field value h_sigm = self.activation(self.scaling_h_given_x * h_pre) # drop out noise if(0.0 < self.p < 1.0): noise_shape = self._get_noise_shape(h_sigm) h_sigm = K.in_train_phase(K.dropout(h_sigm, self.p, noise_shape), h_sigm) h_samp = random_binomial(shape=h_sigm.shape, n=1, p=h_sigm) # random sample # \hat{h} = 1, if p(h=1|x) > uniform(0, 1) # 0, otherwise return h_samp, h_pre, h_sigm
def sample_x_given_h(self, h): """ Draw sample from p(x|h). For Bernoulli RBM the conditional probability distribution can be derived to be p(x_i=1|h) = sigmoid(W[i,:] h + bx_i). """ # pre-sigmoid (used in cross-entropy error calculation for better numerical stability) x_pre = K.dot(h, self.W.T) + self.bx # mean of Bernoulli distribution ('p', prob. of variable taking value 1), sometimes called mean-field value x_sigm = K.sigmoid(self.scaling_x_given_h * x_pre) #x_sigm = self.activation(self.scaling_x_given_h * x_pre) x_samp = random_binomial(shape=x_sigm.shape, n=1, p=x_sigm) # random sample # \hat{x} = 1, if p(x=1|h) > uniform(0, 1) # 0, otherwise # pre and sigm are returned to compute cross-entropy return x_samp, x_pre, x_sigm
def gram_matrix(x): if K.image_dim_ordering() == "th": features = K.batch_flatten(x) else: features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) gram = K.dot(features, K.transpose(features)) return gram
def categorical_crossentropy_3d_SW(y_true_sw, y_predicted): """ Computes categorical cross-entropy loss for a softmax distribution in a hot-encoded 3D array with shape (num_samples, num_classes, dim1, dim2, dim3) Parameters ---------- y_true : keras.placeholder [batches, dim0,dim1,dim2] Placeholder for data holding the ground-truth labels encoded in a one-hot representation y_predicted : keras.placeholder [batches,channels,dim0,dim1,dim2] Placeholder for data holding the softmax distribution over classes Returns ------- scalar Categorical cross-entropy loss value """ sw = y_true_sw[:,:,:,:,K.int_shape(y_predicted)[-1]:] y_true = y_true_sw[:,:,:,:,:K.int_shape(y_predicted)[-1]] y_true_flatten = K.flatten(y_true*sw) y_pred_flatten = K.flatten(y_predicted) y_pred_flatten_log = -K.log(y_pred_flatten + K.epsilon()) num_total_elements = K.sum(y_true_flatten) # cross_entropy = K.dot(y_true_flatten, K.transpose(y_pred_flatten_log)) cross_entropy = tf.reduce_sum(tf.multiply(y_true_flatten, y_pred_flatten_log)) mean_cross_entropy = cross_entropy / (num_total_elements + K.epsilon()) return mean_cross_entropy
def categorical_crossentropy_3d_masked(vectors): """ Computes categorical cross-entropy loss for a softmax distribution in a hot-encoded 3D array with shape (num_samples, num_classes, dim1, dim2, dim3) Parameters ---------- y_true : keras.placeholder [batches, dim0,dim1,dim2] Placeholder for data holding the ground-truth labels encoded in a one-hot representation y_predicted : keras.placeholder [batches,channels,dim0,dim1,dim2] Placeholder for data holding the softmax distribution over classes Returns ------- scalar Categorical cross-entropy loss value """ y_predicted, mask, y_true = vectors y_true_flatten = K.flatten(y_true) y_pred_flatten = K.flatten(y_predicted) y_pred_flatten_log = -K.log(y_pred_flatten + K.epsilon()) num_total_elements = K.sum(mask) # cross_entropy = K.dot(y_true_flatten, K.transpose(y_pred_flatten_log)) cross_entropy = tf.reduce_sum(tf.multiply(y_true_flatten, y_pred_flatten_log)) mean_cross_entropy = cross_entropy / (num_total_elements + K.epsilon()) return mean_cross_entropy
def categorical_crossentropy_3d_lambda(vectors): y_true, y_pred = vectors y_true_flatten = K.flatten(y_true) y_pred_flatten = K.flatten(y_pred) y_pred_flatten_log = -K.log(y_pred_flatten + K.epsilon()) # cross_entropy = K.dot(y_true_flatten, K.transpose(y_pred_flatten_log)) cross_entropy = tf.reduce_sum(tf.multiply(y_true_flatten, y_pred_flatten_log)) mean_cross_entropy = cross_entropy / (K.sum(y_true) + K.epsilon()) return mean_cross_entropy
def call(self, h, mask=None): print(h.get_shape(), self.hatt.get_shape()) Ec = K.dot(h, self.W) Eq = K.dot(self.hatt, self.U) E = np.zeros((h.shape[0], self.hatt.shape[0])) for i in E.shape[0]: for j in E.shape[1]: E[i,j] = K.dot(K.tanh(Ec[i,:] + Eq[j,:]), self.V) return h
def step(self, x, states): h, [h, c] = super(AttentionLSTM, self).step(x, states) attention = states[4] m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a) # Intuitively it makes more sense to use a sigmoid (was getting some NaN problems # which I think might have been caused by the exponential function -> gradients blow up) s = K.sigmoid(K.dot(m, self.U_s) + self.b_s) if self.single_attention_param: h = h * K.repeat_elements(s, self.output_dim, axis=1) else: h = h * s return h, [h, c]
def get_constants(self, x): constants = super(AttentionLSTM, self).get_constants(x) constants.append(K.dot(self.attention_vec, self.U_m) + self.b_m) return constants
def step(self, x, states): h, [h, c] = self.layer.step(x, states) attention = states[4] m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a) s = K.sigmoid(K.dot(m, self.U_s) + self.b_s) if self.single_attention_param: h = h * K.repeat_elements(s, self.layer.output_dim, axis=1) else: h = h * s return h, [h, c]
def get_constants(self, x): constants = self.layer.get_constants(x) constants.append(K.dot(self.attention_vec, self.U_m) + self.b_m) return constants