我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.batched_dot()。
def get_output_for(self, inputs, attention_only=False, **kwargs): # inputs[0]: B x N x D # inputs[1]: B x Q x D # inputs[2]: B x N x Q / B x Q x N # self.mask: B x Q if self.transpose: M = inputs[2].dimshuffle((0,2,1)) else: M = inputs[2] alphas = T.nnet.softmax(T.reshape(M, (M.shape[0]*M.shape[1],M.shape[2]))) alphas_r = T.reshape(alphas, (M.shape[0],M.shape[1],M.shape[2]))* \ self.mask[:,np.newaxis,:] # B x N x Q alphas_r = alphas_r/alphas_r.sum(axis=2)[:,:,np.newaxis] # B x N x Q q_rep = T.batched_dot(alphas_r, inputs[1]) # B x N x D return eval(self.gating_fn)(inputs[0],q_rep)
def get_output_for(self, inputs, **kwargs): p_gru, q_gru, q_mask, feature = tuple(inputs) time_p = p_gru.shape[1] time_q = q_gru.shape[1] p_gru_re = p_gru.dimshuffle(0, 1, 'x', 2) # (batch, time_p, 1, units) q_gru_re = q_gru.dimshuffle(0, 'x', 1, 2) # (batch, 1, time_q, units) gru_merge = T.tanh(p_gru_re * q_gru_re).reshape((-1, time_q, self.units)) # (batch * time_p, time_q, units) att = T.dot(gru_merge, self.v1).reshape((-1, time_p, time_q)) # (batch, time_p, time_q) att_q = T.dot(q_gru, self.v2).squeeze() # (batch, time_q) att = att + att_q.dimshuffle(0, 'x', 1) + feature # (batch, time_p, time_q) att = T.nnet.softmax(att.reshape((-1, time_q))) # (batch * time_p, time_q) att = att.reshape((-1, time_p, time_q)) * q_mask.dimshuffle(0, 'x', 1) # (batch, time_p, time_q) att = att / (att.sum(axis = 2, keepdims = True) + 1e-8) # (batch, time_p, time_q) att = att.reshape((-1, time_q)) output = T.batched_dot(att, gru_merge) # (batch * time_p, units) output = output.reshape((-1, time_p, self.units)) return output
def get_output_for(self, inputs, **kwargs): sequence_length = inputs[0].shape[1]/2 input_first = inputs[0][(slice(None),) + (slice(0, sequence_length),)] input_second = inputs[1] mask = inputs[self.mask_incoming_index] if input_second.ndim == 3: input_second = input_second[(slice(None), -1)] M = nonlinearities.tanh(T.dot(input_first, self.W_y) + T.dot(input_second.dimshuffle(0, 'x', 1), self.W_h)) # M.shape = N * L * k alpha = nonlinearities.softmax(T.dot(M, self.w.T).reshape((inputs[0].shape[0], sequence_length))) # alpha.shape = N * L alpha = alpha * mask r = T.batched_dot(alpha, input_first) # r.shape = N * k h_star = nonlinearities.tanh(T.dot(r, self.W_p) + T.dot(input_second, self.W_x)) return h_star
def output_func(self, input): q = input[0] all_list = [q] for i in xrange(self.position): dot = T.batched_dot(q, T.dot(input[i + 1], self.W[i].T)) all_list.append(dot.dimshuffle(0, 'x')) all_list.append(input[i + 1]) begin_index = self.position for i in range(1, self.position): for j in range(0, i): dot = T.batched_dot(input[j + 1], T.dot(input[i + 1], self.W[begin_index].T)) all_list.append(dot.dimshuffle(0, 'x')) #begin_index += 1 out = T.concatenate(all_list, axis=1) # dot = T.batched_dot(q, T.batched_dot(a, self.W)) #dot = T.batched_dot(q, T.dot(a, self.W.T)) #out = T.concatenate([dot.dimshuffle(0, 'x'), q, a], axis=1) return out
def score_batch(self, e1, e2, r_index): """ :param e1: (batch, entity_dim, ) :param e2: (batch, entity_dim, ) :param r_index: (batch, ) :return: """ # (batch, entity_dim) dot (batch, entity_dim, entity_dim, hidden) dot (batch, entity_dim) -> hidden * (batch, ) hidden1_sep, _ = theano.scan(fn=self.step_batch, sequences=[self.slice_seq], non_sequences=[e1, e2, self.W[r_index]], name='batch_scan') # hidden * (batch, ) -> (batch, hidden) hidden1 = T.concatenate([hidden1_sep], axis=1).transpose() if self.keep_normal: # (batch, 2 * entity_dim) dot (batch, 2 * entity_dim, hidden) -> (batch, hidden, ) hidden2 = T.batched_dot(T.concatenate([e1, e2], axis=1), self.V[r_index]) # (batch, hidden) + (batch, hidden) + (batch, hidden) -> (batch, hidden) hidden = hidden1 + hidden2 + self.b[r_index] else: hidden = hidden1 # (batch, hidden) -> (batch, hidden) act_hidden = self.act.activate(hidden) # (batch, hidden) dot (batch, hidden) -> (batch, ) return T.sum(act_hidden * self.U[r_index], axis=1)
def grams(X): dim_ordering = K.image_dim_ordering() if dim_ordering == 'tf': X = K.permute_dimensions(X, (0, 3, 1, 2)) (samples, c, h, w) = get_shape(X) X_reshaped = K.reshape(X, (-1, c, h * w)) X_T = K.permute_dimensions(X_reshaped, (0, 2, 1)) if K._BACKEND == 'theano': X_gram = T.batched_dot(X_reshaped, X_T) else: X_gram = tf.batch_matmul(X_reshaped, X_T) X_gram /= c * h * w return X_gram
def op_matmul(s_x_, s_y_, axes_=(-2, -1)): ''' limited implementation of np.matmul, does not support broadcasting Args: s_x_: (batch of) matrix(matrices) s_y_: (batch of) matrix(matrices) axes_: tuple of int, the axes for the matrix ''' assert s_x_.ndim == s_y_.ndim ndim = s_x_.ndim assert -ndim <= axes_[0] < ndim assert -ndim <= axes_[1] < ndim assert ndim >= 2 axes = axes_[0]%ndim, axes_[1]%ndim if ndim == 2: if axes == (0,1): return T.dot(s_x_, s_y_) else: return T.dot(s_y_, s_x_) s_shp = T.shape(s_x_) s_size = reduce(T.mul, [s_shp[i] for i in range(s_x_.ndim) if i not in axes]) s_szu = s_shp[axes[0]] s_szv = s_shp[axes[1]] s_szw = T.shape(s_y_)[axes[1]] transpp = list(range(ndim)) transpp[axes[0]], transpp[ndim-2] = transpp[ndim-2], transpp[axes[0]] transpp[axes[1]], transpp[ndim-1] = transpp[ndim-1], transpp[axes[1]] s_shp2 = [s_shp[a] for a in transpp] s_shp2[axes[1]] = s_szw s_x = s_x_.transpose(*transpp).reshape((s_size, s_szu, s_szv)) s_y = s_y_.transpose(*transpp).reshape((s_size, s_szv, s_szw)) return T.batched_dot(s_x, s_y).reshape(s_shp2).transpose(transpp)
def get_output_for(self, inputs, **kwargs): # inputs[0]: B x N x D # inputs[1]: B x Q x D # self.mask: B x Q q_shuf = inputs[1].dimshuffle(0,2,1) # B x D x Q return T.batched_dot(inputs[0], q_shuf) # B x N x Q
def get_output_for(self, inputs, **kwargs): # inputs[0]: B x N x D # inputs[1]: B x Q x D # self.aggregator: B x N x C # self.pointer: B x 1 # self.mask: B x N q = inputs[1][T.arange(inputs[1].shape[0]),self.pointer,:] # B x D p = T.batched_dot(inputs[0],q) # B x N pm = T.nnet.softmax(p)*self.mask # B x N pm = pm/pm.sum(axis=1)[:,np.newaxis] # B x N return T.batched_dot(pm, self.aggregator)
def get_output_for(self, input, **kwargs): act = T.batched_dot(T.tensordot(input, self.V, axes = [1, 2]), input) + T.dot(input, self.W) + self.b.dimshuffle('x', 0) return self.nonlinearity(act)
def dot_time_distributed_merge(model, layers, cos_norm=False): """ Merging two time series layers into one, producing a new time series that contains a dot-product scalar for each time step. If cos_norm=True, actually computes cosine similarity. """ def batched_batched_dot(s): """ from (x,y,z)-shaped pair, produce (x,y)-shaped pair that replaces the z-vector pairs by their dot-products """ import theano import theano.tensor as T return theano.scan(fn=lambda xm, ym: T.batched_dot(xm, ym), outputs_info=None, sequences=s, non_sequences=None)[0] def batched_cos_sim(s): """ from (x,y,z)-shaped pair, produce (x,y)-shaped pair that replaces the z-vector pairs by their cosine similarities """ import theano import theano.tensor as T return theano.scan(fn=lambda xm, ym: T.batched_dot(xm, ym) / T.sqrt(T.batched_dot(xm, xm) * T.batched_dot(ym, ym)), outputs_info=None, sequences=s, non_sequences=None)[0] if cos_norm: lmb = batched_cos_sim else: lmb = batched_batched_dot return Lambda([model.nodes[l] for l in layers], lmb, lambda s: (s[1][0], s[1][1]))
def batch_cosine(self, doc_batch_proj, query_batch_proj): dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj) doc_square = T.sqr(doc_batch_proj) query_square = T.sqr(query_batch_proj) doc_norm = (T.sqrt(T.sum(doc_square, axis=1))) query_norm = T.sqrt(T.sum(query_square, axis=1)) batch_cosine_vec = dot_prod/(doc_norm * query_norm) return batch_cosine_vec
def batch_cosine(self, doc_batch_proj, query_batch_proj): dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj) doc_square = doc_batch_proj ** 2 query_square = query_batch_proj ** 2 doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum() query_norm = T.sqrt(query_square.sum(axis = 1)).sum() batch_cosine_vec = dot_prod/(doc_norm * query_norm) return batch_cosine_vec
def get_output_for(self, input_, **kwargs): W = T.tril(self.W, -1) interactions = T.batched_dot(T.dot(input_, W), input_) interactions = T.sqrt(T.max(interactions, 1e-6)) return self.nonlinearity(input_ + interactions)
def get_output_for(self, inputs, **kwargs): # inputs[0]: B x N x D, doc # inputs[1]: B x Q x D, query # self.aggregator: B x N x C # self.pointer: B x 1 # self.mask: B x N q = inputs[1][T.arange(inputs[1].shape[0]),self.pointer,:] # B x D p = T.batched_dot(inputs[0],q) # B x N pm = T.nnet.softmax(p)*self.mask # B x N pm = pm/pm.sum(axis=1)[:,np.newaxis] # B x N return T.batched_dot(pm, self.aggregator)
def __call__(self, q, a): return T.batched_dot(T.tensordot(q, self.W, axes=[1, 0]), a)
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a = input[0], input[1] dot = T.batched_dot(q, T.dot(a, self.W)) self.p_y_given_x = T.nnet.softmax(dot + self.b.dimshuffle('x', 0)) self.prob = self.p_y_given_x[:,-1] self.y_pred = T.argmax(self.p_y_given_x, axis=1) return self.y_pred
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a, feats = input[0], input[1], input[2] dot = T.batched_dot(q, T.dot(a, self.W)) feats_dot = T.dot(feats, self.W_feats) l = self.lamda.dimshuffle('x', 0) self.p_y_given_x = T.nnet.softmax(l*dot + (1-l) * feats_dot + self.b.dimshuffle('x', 0)) self.prob = self.p_y_given_x[:,-1] self.y_pred = T.argmax(self.p_y_given_x, axis=1) return self.y_pred
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a = input[0], input[1] # dot = T.batched_dot(q, T.dot(a, self.W.T)) dot = T.batched_dot(q, T.dot(a, self.W)) self.p_y_given_x = T.nnet.softmax(dot + self.b.dimshuffle('x', 0)) self.prob = self.p_y_given_x[:,-1] self.y_pred = T.argmax(self.p_y_given_x, axis=1) return self.y_pred
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a, feats = input[0], input[1], input[2] dot = T.batched_dot(q, T.dot(a, self.W)) feats_dot = T.dot(feats, self.W_feats) self.p_y_given_x = T.nnet.softmax(dot + feats_dot + self.b.dimshuffle('x', 0)) self.prob = self.p_y_given_x[:,-1] self.y_pred = T.argmax(self.p_y_given_x, axis=1) return self.y_pred
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a, feats = input[0], input[1], input[2] # dot = T.batched_dot(q, T.batched_dot(a, self.W)) dot = T.batched_dot(q, T.dot(a, self.W.T)) out = T.concatenate([dot.dimshuffle(0, 'x'), q, a, feats], axis=1) return out
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a, feats = input[0], input[1], input[2] # dot = T.batched_dot(q, T.batched_dot(a, self.W)) dot = T.batched_dot(q, T.dot(a, self.W.T)) out = T.concatenate([dot.dimshuffle(0, 'x'), feats], axis=1) # out = feats return out
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a = input[0], input[1] # dot = T.batched_dot(q, T.batched_dot(a, self.W)) dot = T.batched_dot(q, T.dot(a, self.W.T)) out = T.concatenate([dot.dimshuffle(0, 'x'), q, a], axis=1) return out
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a = input[0], input[1] # dot = T.batched_dot(q, T.batched_dot(a, self.W)) qdot = T.dot(q, self.Wq) adot = T.dot(a, self.Wa) dot = T.batched_dot(qdot, adot) out = T.concatenate([dot.dimshuffle(0, 'x'), q, a], axis=1) return out
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a = input[0], input[1] # dot = T.batched_dot(q, T.batched_dot(a, self.W)) out = T.batched_dot(q, T.dot(a, self.W.T)).dimshuffle(0, 'x') return out
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a, feats = input[0], input[1], input[2] dot = T.batched_dot(q, T.dot(a, self.W)) feats_dot = T.dot(feats, self.W_feats) self.p_y_given_x = T.nnet.softmax(dot + feats_dot + T.dot(q, self.W_q) + T.dot(a, self.W_a) + self.b.dimshuffle('x', 0)) self.prob = self.p_y_given_x[:,-1] self.y_pred = T.argmax(self.p_y_given_x, axis=1) return self.y_pred
def LayerNormalization(x, gamma, mask, estimated_mean=0.0, estimated_var=1.0): assert x.ndim == 3 or x.ndim == 2 if x.ndim == 3: x_mean = T.mean(x, axis=2).dimshuffle(0, 1, 'x') x_var = T.var(x, axis=2).dimshuffle(0, 1, 'x') return gamma*((x - x_mean) / T.sqrt(x_var+1e-7)), x_mean[0, 0], x_var[0, 0] elif x.ndim == 2: x_mean = T.mean(x, axis=1).dimshuffle(0, 'x') x_var = T.var(x, axis=1).dimshuffle(0, 'x') return gamma*((x - x_mean) / T.sqrt(x_var+1e-7)), x_mean[0], x_var[0] # Does theano.batched_dot. If last_axis is on it will loop over the last axis, otherwise it will loop over the first axis.
def BatchedDot(x, y, last_axis=False): if last_axis==False: return T.batched_dot(x, y) elif last_axis: if x.ndim == 2: shuffled_x = x.dimshuffle(1,0) elif x.ndim == 3: shuffled_x = x.dimshuffle(2,0,1) elif x.ndim == 4: shuffled_x = x.dimshuffle(3,0,1,2) else: raise ValueError('BatchedDot inputs must have between 2-4 dimensions, but x has ' + str(x.ndim) + ' dimensions') if y.ndim == 2: shuffled_y = y.dimshuffle(1,0) elif y.ndim == 3: shuffled_y = y.dimshuffle(2,0,1) elif y.ndim == 4: shuffled_y = y.dimshuffle(3,0,1,2) else: raise ValueError('BatchedDot inputs must have between 2-4 dimensions, but y has ' + str(y.ndim) + ' dimensions') dot = T.batched_dot(shuffled_x, shuffled_y) if dot.ndim == 2: return dot.dimshuffle(1,0) elif dot.ndim == 3: return dot.dimshuffle(1,2,0) elif dot.ndim == 4: return dot.dimshuffle(1,2,3,0)
def batched_gram5d(self, fmap): # (layer, batch, featuremaps, height*width) fmap=fmap.flatten(ndim=4) # (layer*batch, featuremaps, height*width) fmap2=fmap.reshape((-1, fmap.shape[-2], fmap.shape[-1])) # The T.prod term can't be taken outside as a T.mean in style_loss(), since the width and height of the image might vary return T.batched_dot(fmap2, fmap2.dimshuffle(0,2,1)).reshape(fmap.shape)/T.prod(fmap.shape[-2:])
def batched_gram(self, fmap): # (batch, featuremaps, height*width) fmap=fmap.flatten(ndim=3) # The T.prod term can't be taken outside as a T.mean in style_loss(), since the width and height of the image might vary if self.net_type == 0: return T.batched_dot(fmap, fmap.dimshuffle(0,2,1))/T.prod(fmap.shape[-2:]) elif self.net_type == 1: return T.batched_dot(fmap, fmap.dimshuffle(0,2,1))/T.prod(fmap.shape[-1])
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a = input[0], input[1] dot = T.batched_dot(q, T.dot(a, self.W)) self.p_y_given_x = T.nnet.softmax(dot + self.b.dimshuffle('x', 0)) self.prob = self.p_y_given_x[:, -1] self.y_pred = T.argmax(self.p_y_given_x, axis=1) return self.y_pred
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a, feats = input[0], input[1], input[2] dot = T.batched_dot(q, T.dot(a, self.W)) feats_dot = T.dot(feats, self.W_feats) l = self.lamda.dimshuffle('x', 0) self.p_y_given_x = T.nnet.softmax(l * dot + (1 - l) * feats_dot + self.b.dimshuffle('x', 0)) self.prob = self.p_y_given_x[:, -1] self.y_pred = T.argmax(self.p_y_given_x, axis=1) return self.y_pred
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a = input[0], input[1] # dot = T.batched_dot(q, T.dot(a, self.W.T)) dot = T.batched_dot(q, T.dot(a, self.W)) self.p_y_given_x = T.nnet.softmax(dot + self.b.dimshuffle('x', 0)) self.prob = self.p_y_given_x[:, -1] self.y_pred = T.argmax(self.p_y_given_x, axis=1) return self.y_pred
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q, a, feats = input[0], input[1], input[2] dot = T.batched_dot(q, T.dot(a, self.W)) feats_dot = T.dot(feats, self.W_feats) self.p_y_given_x = T.nnet.softmax(dot + feats_dot + self.b.dimshuffle('x', 0)) self.prob = self.p_y_given_x[:, -1] self.y_pred = T.argmax(self.p_y_given_x, axis=1) return self.y_pred
def output_func(self, input): # P(Y|X) = softmax(W.X + b) q = input[0] all_list = [q] for i in xrange(self.position): dot = T.batched_dot(q, T.dot(input[i + 1], self.W[i].T)) all_list.append(dot.dimshuffle(0, 'x')) all_list.append(input[i + 1]) # dot = T.batched_dot(q, T.batched_dot(a, self.W)) #dot = T.batched_dot(q, T.dot(a, self.W.T)) #out = T.concatenate([dot.dimshuffle(0, 'x'), q, a], axis=1) out = T.concatenate(all_list, axis=1) return out