我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.tile()。
def dist_info_sym(self, obs_var, latent_var=None): # this is ment to be for one path! # now this is not doing anything! And for computing the dist_info_vars of npo_snn_rewardMI it doesn't work if latent_var is None: latent_var1 = theano.shared(np.expand_dims(self.latent_fix, axis=0)) # new fix to avoid putting the latent as an input: just take the one fixed! latent_var = TT.tile(latent_var1, [obs_var.shape[0], 1]) # generate the generalized input (append latents to obs.) if self.bilinear_integration: extended_obs_var = TT.concatenate([obs_var, latent_var, TT.flatten(obs_var[:, :, np.newaxis] * latent_var[:, np.newaxis, :], outdim=2)] , axis=1) else: extended_obs_var = TT.concatenate([obs_var, latent_var], axis=1) mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], extended_obs_var) if self.min_std is not None: log_std_var = TT.maximum(log_std_var, np.log(self.min_std)) return dict(mean=mean_var, log_std=log_std_var)
def symbolic_distance_matrix(A, B): """ Defines the symbolic matrix that contains the distances between the vectors of A and B :param A: :param B: :return: """ aa = T.sum(A * A, axis=1) bb = T.sum(B * B, axis=1) AB = T.dot(A, T.transpose(B)) AA = T.transpose(T.tile(aa, (bb.shape[0], 1))) BB = T.tile(bb, (aa.shape[0], 1)) D = AA + BB - 2 * AB D = T.maximum(D, 0) D = T.sqrt(D) return D
def nll_of_x_given_o(self, input, ordering): """ Returns the theano graph that computes $-ln p(\bx|o)$. Parameters ---------- input: 1D vector One image with shape (nb_channels * images_height * images_width). ordering: 1D vector of int List of pixel indices representing the input ordering. """ D = int(np.prod(self.image_shape)) mask_o_d = T.zeros((D, D), dtype=theano.config.floatX) mask_o_d = T.set_subtensor(mask_o_d[T.arange(D), ordering], 1.) mask_o_lt_d = T.cumsum(mask_o_d, axis=0) mask_o_lt_d = T.set_subtensor(mask_o_lt_d[1:], mask_o_lt_d[:-1]) mask_o_lt_d = T.set_subtensor(mask_o_lt_d[0, :], 0.) input = T.tile(input[None, :], (D, 1)) nll = -T.sum(self.lnp_x_o_d_given_x_o_lt_d(input, mask_o_d, mask_o_lt_d)) return nll
def _create_constant_uas_across_datapoints(self): """ Helper function. Creates and returns new theano variables representing noise, where noise is the same across datapoints in the minibatch. Useful for binding the original noise variables in evaluation function where randomness is required but same predictions are needed across minibatch. """ n_data = tt.iscalar('n_data') net_uas = [tt.tile(self.srng.normal((n_units,), dtype=dtype), [n_data, 1]) for n_units in self.net.n_units[1:]] uaa = tt.tile(self.srng.normal((self.n_components,), dtype=dtype), [n_data, 1]) uams = [tt.tile(self.srng.normal((self.n_outputs,), dtype=dtype), [n_data, 1]) for _ in xrange(self.n_components)] uaUs = [tt.tile(self.srng.normal((self.n_outputs**2,), dtype=dtype), [n_data, 1]) for _ in xrange(self.n_components)] # NOTE: order matters here uas = net_uas + [uaa] + uams + uaUs return n_data, uas
def output(self, x,indexes,samples=0,use_indices=True): if samples == 0: samples = self.n_samples if use_indices == True: self.v_z = 1e-6 + self.logistic(self.log_var_param_z[ indexes, 0 : 1 ])*(self.v_prior_z - 2e-6) self.m_z = self.mean_param_z[ indexes, 0 : 1 ] self.z = self.randomness_z[ : , indexes, : ] * T.tile(T.sqrt(self.v_z), [ samples, 1, 1 ]) + T.tile(self.m_z, [ self.n_samples, 1, 1 ]) else: self.z = self.randomness_z[:,0:x.shape[1],:] * T.tile(T.sqrt(self.v_prior_z), [samples, 1, 1 ]) x = T.concatenate((x, self.z[ : , 0 : x.shape[ 1 ], : ]), 2) for layer in self.layers: x = layer.output(x,samples) return x
def quad_kappa_loss(y, t, y_pow=1, eps=1e-15): num_scored_items = y.shape[0] num_ratings = 3 tmp = T.tile(T.arange(0, num_ratings).reshape((num_ratings, 1)), reps=(1, num_ratings)).astype(theano.config.floatX) weights = (tmp - tmp.T) ** 2 / (num_ratings - 1) ** 2 y_ = y ** y_pow y_norm = y_ / (eps + y_.sum(axis=1).reshape((num_scored_items, 1))) hist_rater_a = y_norm.sum(axis=0) hist_rater_b = t.sum(axis=0) conf_mat = T.dot(y_norm.T, t) nom = T.sum(weights * conf_mat) denom = T.sum(weights * T.dot(hist_rater_a.reshape((num_ratings, 1)), hist_rater_b.reshape((1, num_ratings))) / num_scored_items.astype(theano.config.floatX)) return - (1 - nom / denom)
def note_to_encoding(self, chosen_note, relative_position, low_bound, high_bound): assert chosen_note.ndim == 1 n_batch = chosen_note.shape[0] dont_play_version = T.switch( T.shape_padright(T.eq(chosen_note, 0)), T.tile(np.array([[1,0] + [0]*(self.ENCODING_WIDTH-2)], dtype=np.float32), (n_batch, 1)), T.tile(np.array([[0,1] + [0]*(self.ENCODING_WIDTH-2)], dtype=np.float32), (n_batch, 1))) rcp = T.tile(np.array([0,0,1],dtype=np.float32), (n_batch, 1)) circle_1 = T.eye(4)[(chosen_note-2)%4] circle_2 = T.eye(3)[(chosen_note-2)%3] octave = T.eye(self.num_octaves)[(chosen_note-2+low_bound-self.octave_start)//12] play_version = T.concatenate([rcp, circle_1, circle_2, octave], 1) encoded_form = T.switch( T.shape_padright(T.lt(chosen_note, 2)), dont_play_version, play_version ) return encoded_form
def _L(x): # initialize with zeros batch_size = x.shape[0] a = T.zeros((batch_size, num_actuators, num_actuators)) # set diagonal elements batch_idx = T.extra_ops.repeat(T.arange(batch_size), num_actuators) diag_idx = T.tile(T.arange(num_actuators), batch_size) b = T.set_subtensor(a[batch_idx, diag_idx, diag_idx], T.flatten(T.exp(x[:, :num_actuators]))) # set lower triangle cols = np.concatenate([np.array(range(i), dtype=np.uint) for i in xrange(num_actuators)]) rows = np.concatenate([np.array([i]*i, dtype=np.uint) for i in xrange(num_actuators)]) cols_idx = T.tile(T.as_tensor_variable(cols), batch_size) rows_idx = T.tile(T.as_tensor_variable(rows), batch_size) batch_idx = T.extra_ops.repeat(T.arange(batch_size), len(cols)) c = T.set_subtensor(b[batch_idx, rows_idx, cols_idx], T.flatten(x[:, num_actuators:])) return c
def test2(self): # Test that alloc never gets instantiated during optimization mode = mode_opt.excluding('local_canonicalize_alloc') x = tensor.matrix('x') y = tensor.tile(x, (1,)*2) f = function([x], [y], mode=mode) op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()] print(op_classes) # We are supposed to test if tensr.Alloc is not in op_classes, # but since the proper proper optimization is not currently # implemented it will fail. Once the correct optimization is in place, # we have to change the following we should not see tensor.Alloc # in op_classes and we have to change the assert. assert tensor.Alloc in op_classes # The correct opt removes nodes, no need for check_stack_trace
def _get_hidden_layer_connectivity(self, layerIdx): layer_size = self._hidden_sizes[layerIdx] if layerIdx == 0: p_vals = self._get_p(T.min(self.layers_connectivity[layerIdx])) else: p_vals = self._get_p(T.min(self.layers_connectivity_updates[layerIdx-1])) # #Implementations of np.choose in theano GPU # return T.nonzero(self._mrng.multinomial(pvals=[self._p_vals] * layer_size, dtype=theano.config.floatX))[1].astype(dtype=theano.config.floatX) # return T.argmax(self._mrng.multinomial(pvals=[self._p_vals] * layer_size, dtype=theano.config.floatX), axis=1) return T.sum(T.cumsum(self._mrng.multinomial(pvals=T.tile(p_vals[::-1][None, :], (layer_size, 1)), dtype=theano.config.floatX), axis=1), axis=1)
def get_output_for(self, all_obs_var, **kwargs): # n_batch = all_obs_var.shape[:-1] # out = TT.tile(self.output_var, (n_batch, 1)) # out = TT.tile(self.output_var, TT.concatenate([n_batch, [1]])) # return out ndim = all_obs_var.ndim reshaped_cnt = TT.reshape(self.output_var, (1,) * (ndim - 1) + self.output_var.get_value().shape) tile_arg = TT.concatenate([all_obs_var.shape[:-1], [1]]) tiled = TT.tile(reshaped_cnt, tile_arg, ndim=ndim) return tiled
def get_actions(self, observations): observations = np.array(observations) # needed to do the outer product for the bilinear if self.latent_dim: if self.resample: latents = [self.latent_dist.sample(self.latent_dist_info) for _ in observations] print('resampling the latents') else: if not np.size(self.latent_fix) == self.latent_dim: # we decide to reset based on if smthing in the fix self.reset() if len(self.pre_fix_latent) == self.latent_dim: # If we have a pre_fix, reset will put the latent to it self.reset() # this overwrites the latent sampled or in latent_fix latents = np.tile(self.latent_fix, [len(observations), 1]) # maybe a broadcast operation better... if self.bilinear_integration: extended_obs = np.concatenate([observations, latents, np.reshape( observations[:, :, np.newaxis] * latents[:, np.newaxis, :], (observations.shape[0], -1))], axis=1) else: extended_obs = np.concatenate([observations, latents], axis=1) else: latents = np.array([[]] * len(observations)) extended_obs = observations # make mean, log_std also depend on the latents (as observ.) mean, log_std = self._f_dist(extended_obs) if self._set_std_to_0: actions = mean log_std = -1e6 * np.ones_like(log_std) else: rnd = np.random.normal(size=mean.shape) actions = rnd * np.exp(log_std) + mean return actions, dict(mean=mean, log_std=log_std, latents=latents)
def tile(x, n): return T.tile(x, n)
def get_output_for(self, input, **kwargs): ndim = input.ndim reshaped_param = TT.reshape(self.param, (1,) * (ndim - 1) + (self.num_units,)) tile_arg = TT.concatenate([input.shape[:-1], [1]]) tiled = TT.tile(reshaped_param, tile_arg, ndim=ndim) return tiled
def get_output_for(self, input, **kwargs): n_batches = input.shape[0] n_steps = input.shape[1] input = TT.reshape(input, (n_batches, n_steps, -1)) h0s = TT.tile(TT.reshape(self.h0, (1, self.num_units)), (n_batches, 1)) # flatten extra dimensions shuffled_input = input.dimshuffle(1, 0, 2) hs, _ = theano.scan(fn=self.step, sequences=[shuffled_input], outputs_info=h0s) shuffled_hs = hs.dimshuffle(1, 0, 2) return shuffled_hs
def invert(self, constraints, z_i): [_invert, z_updates, z, beta_r, z_const] = self.opt_model constraints_t = self.preprocess_constraints(constraints) [im_c_t, mask_c_t, im_e_t, mask_e_t] = constraints_t # [im_c_t, mask_c_t, im_e_t, mask_e_t] results = _invert(im_c_t, mask_c_t, im_e_t, mask_e_t, z_i.astype(np.float32)) [gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge] = results gx_t = (255 * self.inverse_transform(gx, npx=self.npx, nc=self.nc)).astype(np.uint8) if self.nc == 1: gx_t = np.tile(gx_t, (1, 1, 1, 3)) z_t = np.tanh(z.get_value()).copy() return gx_t, z_t, cost_all
def preprocess_constraints(self, constraints): [im_c_o, mask_c_o, im_e_o, mask_e_o] = constraints im_c = self.transform(im_c_o[np.newaxis, :], self.nc) mask_c = self.transform_mask(mask_c_o[np.newaxis, :]) im_e = self.transform(im_e_o[np.newaxis, :], self.nc) mask_t = self.transform_mask(mask_e_o[np.newaxis, :]) mask_e = self.hog.comp_mask(mask_t) shp = [self.batch_size, 1, 1, 1] im_c_t = np.tile(im_c, shp) mask_c_t = np.tile(mask_c, shp) im_e_t = np.tile(im_e, shp) mask_e_t = np.tile(mask_e, shp) return [im_c_t, mask_c_t, im_e_t, mask_e_t]
def gen_samples(self, z0): samples = self.model.gen_samples(z0=z0) if self.nc == 1: samples = np.tile(samples, [1,1,1,3]) return samples
def __init__(self, rng, x, d): self.input = x self.out_len = d self.encoded_output = self.encode_final_state() ### default seq-to-seq model: tile C as input to all frames ###
def encode_final_state(self): context_vector = self.input[-1, ] tiled_context_vector = T.tile(context_vector, (self.out_len, 1)) return tiled_context_vector
def __init__(self, rng, x, d): self.input = x self.dur_input = d self.encoded_output = self.encode_final_state() ### default seq-to-seq model: tile C as input to all frames ###
def encode_final_state(self): context_vector = self.input[-1, ] tiled_context_vector = T.tile(context_vector, (T.sum(self.dur_input), 1)) return tiled_context_vector
def __init__(self, rng, x, d): self.input = x self.dur_input = d self.encoded_output = self.encode_all_states() ### Distributed seq-to-seq model: tile C_1-C_n as input to corresponding decoder frames ###
def tile(self, x, n): return T.tile(x, n)
def tile(x, n): # TODO: `keras_shape` inference. return T.tile(x, n)
def tile(self, t, n): T.tile(t, n)
def eval(self, x, rand=False): """Evaluate net at locations in x.""" if rand: # compile theano computation graph, if haven't already done so if self.eval_f_rand == None: n_data = tt.iscalar('n_data') uas = [tt.tile(self.srng.normal((n_units,), dtype=dtype), [n_data, 1]) for n_units in self.n_units[1:]] self.eval_f_rand = theano.function( inputs=[self.hs[0], n_data], outputs=self.hs[-1], givens=zip(self.uas, uas) ) return self.eval_f_rand(x.astype(dtype), x.shape[0]) else: # compile theano computation graph, if haven't already done so if self.eval_f == None: self.eval_f = theano.function( inputs=[self.hs[0]], outputs=self.hs[-1], givens=zip(self.zas, self.mas) ) return self.eval_f(x.astype(dtype))
def reset(model): '''Given a Keras model consisting only of GraphFP, Dense, and Dropout layers, this function will reset the trainable weights to save time for CV tests.''' for layer in model.layers: # Note: these are custom depending on the layer type if '.GraphFP' in str(layer): W_inner = layer.init_inner((layer.inner_dim, layer.inner_dim)) b_inner = np.zeros((1, layer.inner_dim)) # Inner weights layer.W_inner.set_value((T.tile(W_inner, (layer.depth + 1, 1, 1)).eval() + \ initializers.uniform((layer.depth + 1, layer.inner_dim, layer.inner_dim)).eval()).astype(np.float32)) layer.b_inner.set_value((T.tile(b_inner, (layer.depth + 1, 1, 1)).eval() + \ initializers.uniform((layer.depth + 1, 1, layer.inner_dim)).eval()).astype(np.float32)) # Outer weights W_output = layer.init_output((layer.inner_dim, layer.output_dim), scale = layer.scale_output) b_output = np.zeros((1, layer.output_dim)) # Initialize weights tensor layer.W_output.set_value((T.tile(W_output, (layer.depth + 1, 1, 1)).eval()).astype(np.float32)) layer.b_output.set_value((T.tile(b_output, (layer.depth + 1, 1, 1)).eval()).astype(np.float32)) print('graphFP layer reset') elif '.Dense' in str(layer): layer.W.set_value((layer.init(layer.W.shape.eval()).eval()).astype(np.float32)) layer.b.set_value(np.zeros(layer.b.shape.eval(), dtype=np.float32)) print('dense layer reset') elif '.Dropout' in str(layer): print('dropout unchanged') else: print('Not reseting weights for {}'.format(str(layer))) print('Reset model weights') return model
def simple_upsample3d(inpt, up_factor): inpt = T.repeat(inpt, up_factor[0], axis=3) inpt = T.repeat(inpt, up_factor[1], axis=4) inpt = T.repeat(inpt, up_factor[2], axis=1) #rep = [1, up_factor[2], 1, up_factor[0], up_factor[1]] #inpt = T.tile(inpt, rep, ndim=5) return inpt
def forward_pass(self): def recurrence(x_t, h_tm1, c_tm1): i = T.nnet.sigmoid(T.dot(x_t, self.wi) + T.dot(h_tm1, self.wih) + self.bi) # input gate c_proposed = T.tanh(T.dot(x_t, self.wc) + T.dot(h_tm1, self.wch) + self.bc) # proposed memory cell content f = T.nnet.sigmoid(T.dot(x_t, self.wf) + T.dot(h_tm1, self.wfh) + self.bf) # forget gate c_t = (T.tile(i, self.memory_size) * c_proposed) + (T.tile(f, self.memory_size) * c_tm1) # new memory cell content o = T.nnet.sigmoid(T.dot(x_t, self.wo) + T.dot(h_tm1, self.woh) + self.bo) # output gate h_t = T.tile(o, self.memory_size) * T.tanh(c_t) return [h_t, c_t] [h, c], _ = theano.scan(fn=recurrence, sequences=self.input, outputs_info=[self.h0, self.c0], n_steps=self.input.shape[0]) return h, c
def get_regs(self, states_0_, states, M): """ Additional regularization terms. """ regs = 0 if self.L1_Wrec > 0: W = self.params['Wrec'] regs += self.L1_Wrec * tensor.mean(abs(W)) if self.L2_Wrec > 0: W = self.params['Wrec'] regs += self.L2_Wrec * tensor.mean(tensor.sqr(W)) #--------------------------------------------------------------------------------- # Firing rates #--------------------------------------------------------------------------------- if self.L2_r > 0: baseline = 0. M_ = (tensor.tile(M.T, (states.shape[-1], 1, 1))).T states_all = tensor.concatenate( [states_0_.reshape((1, states_0_.shape[0], states_0_.shape[1])), states], axis=0 ) r = self.f_hidden(states_all) regs += self.L2_r * tensor.sum(tensor.sqr(r - baseline)*M_)/tensor.sum(M_) #--------------------------------------------------------------------------------- return regs
def convolve1d_4D(input, W, mode='full'): batch_size, nchannels, nwords, ndim = input.shape nkernels_out, nkernels_in, filter_width, ndim = W.shape # Unroll filter along columns W_unrolled = W.dimshuffle(0, 2, 1, 3).flatten(ndim=3) # Replicate input filters 'batch_size' times and squash out_filters along column axis. # W_tiled = T.tile(W_unrolled, (1, 1, batch_size)).dimshuffle(1, 0, 2).flatten(ndim=2) # doesn't give a gradient W_tiled = T.alloc(W_unrolled, batch_size, W_unrolled.shape[0], W_unrolled.shape[1], W_unrolled.shape[2]).dimshuffle(1, 2, 0, 3).flatten(ndim=3).dimshuffle(1, 0, 2).flatten(ndim=2) # Unroll input and pad to fit the output filters. input_reshaped = input.dimshuffle(0, 2, 1, 3).flatten(ndim=3).dimshuffle(1,0,2).flatten(ndim=2) # input_tiled = T.tile(input_reshaped, (1, nkernels_out)) input_tiled = T.alloc(input_reshaped, nkernels_out, input_reshaped.shape[0], input_reshaped.shape[1]).dimshuffle(1, 0, 2).flatten(ndim=2) conv_res = convolve1d_2D(input_tiled, W_tiled, mode=mode) if mode == 'full': new_shape = (nwords+filter_width-1, nkernels_out, batch_size, nkernels_in, ndim) elif mode == 'valid': new_shape = (nwords-filter_width+1, nkernels_out, batch_size, nkernels_in, ndim) conv_out = conv_res.reshape(new_shape).dimshuffle(2, 1, 0, 3, 4).sum(axis=3) return conv_out ########################################## ### Using einsum for 4d matrices ##########################################
def convolve1d_4D_scan(input, W, mode='full'): batch_size, nchannels, nwords, ndim = input.shape nkernels_out, nkernels_in, filter_width, ndim = W.shape # Unroll filter along columns W_unrolled = W.dimshuffle(0, 2, 1, 3).flatten(ndim=3) # Replicate input filters 'batch_size' times and squash out_filters along column axis. # W_tiled = T.tile(W_unrolled, (1, 1, batch_size)).dimshuffle(1, 0, 2).flatten(ndim=2) # doesn't give a gradient W_tiled = T.alloc(W_unrolled, batch_size, W_unrolled.shape[0], W_unrolled.shape[1], W_unrolled.shape[2]).dimshuffle(1, 2, 0, 3).flatten(ndim=3).dimshuffle(1, 0, 2).flatten(ndim=2) W_tiled = W_tiled[::-1] # reverse_slicing = [slice(None, None, None)] * W_tiled.ndim # reverse_slicing[0] = slice(None, None, -1) # reverse_slicing = tuple(reverse_slicing) # W_tiled = W_tiled[reverse_slicing] # flip the kernel # Unroll input and pad to fit the output filters. input_reshaped = input.dimshuffle(0, 2, 1, 3).flatten(ndim=3).dimshuffle(1,0,2).flatten(ndim=2) # input_tiled = T.tile(input_reshaped, (1, nkernels_out)) input_tiled = T.alloc(input_reshaped, nkernels_out, input_reshaped.shape[0], input_reshaped.shape[1]).dimshuffle(1, 0, 2).flatten(ndim=2) if mode == 'full': pad = T.zeros((filter_width-1, nkernels_out*batch_size*nchannels*ndim)) input_padded = T.concatenate([pad, input_tiled, pad]) conv_out, _ = theano.scan(fn=lambda i: (W_tiled * input_padded[i:i+filter_width]).sum(axis=0), outputs_info=None, sequences=[T.arange(0, nwords+filter_width-1)]) new_shape = (nwords+filter_width-1, nkernels_out, batch_size, nkernels_in, ndim) elif mode == 'valid': conv_out, _ = theano.scan(fn=lambda i: (W_tiled * input_tiled[i:i+filter_width]).sum(axis=0), outputs_info=None, sequences=[T.arange(0, nwords-filter_width+1)]) new_shape = (nwords-filter_width+1, nkernels_out, batch_size, nkernels_in, ndim) conv_reshaped = conv_out.reshape(new_shape).dimshuffle(2, 1, 0, 3, 4).sum(axis=3) return conv_reshaped
def style_loss5d(self, out_layer, target_style_layer): # Each input is a 5D tensor: (style loss layer, batch, feature map, height, width) return T.mean(T.sum(T.sqr(self.batched_gram(out_layer) - T.tile(self.batched_gram(target_style_layer), (1, T.shape(out_layer)[0], 1, 1))), axis=(2,3)), axis=1)
def style_loss(self, out_layer, target_style_layer): # Each input is a 4D tensor: (batch, feature map, height, width) # TODO: Make the first dim broadcastable instead of tiling return T.mean(T.sqr(self.batched_gram(out_layer) - T.tile(self.batched_gram(target_style_layer), (T.shape(out_layer)[0], 1, 1))))
def style_loss_pg(self, out_layer, target_style_gram): # Each input is a 4D tensor: (batch, feature map, height, width) # TODO: Make the first dim broadcastable instead of tiling return T.mean(T.sqr(self.batched_gram(out_layer) - T.tile(target_style_gram, (T.shape(out_layer)[0], 1, 1))))
def build_model(tparams, options): """ Construct computation graph for the whole model """ # inputs (image, sentence, contrast images, constrast sentences) im = tensor.matrix('im', dtype='float32') s = tensor.matrix('s', dtype='float32') cim = tensor.matrix('cim', dtype='float32') cs = tensor.matrix('cs', dtype='float32') # image embedding lim = get_layer('ff')[1](tparams, im, options, prefix='ff_im', activ='linear') lcim = get_layer('ff')[1](tparams, cim, options, prefix='ff_im', activ='linear') # sentence embedding ls = get_layer('ff')[1](tparams, s, options, prefix='ff_s', activ='linear') lcs = get_layer('ff')[1](tparams, cs, options, prefix='ff_s', activ='linear') # L2 norm for sentences ls = l2norm(ls) lcs = l2norm(lcs) # Tile by number of contrast terms lim = tensor.tile(lim, (options['ncon'], 1)) ls = tensor.tile(ls, (options['ncon'], 1)) # pairwise ranking loss cost_im = options['margin'] - (lim * ls).sum(axis=1) + (lim * lcs).sum(axis=1) cost_im = cost_im * (cost_im > 0.) cost_im = cost_im.sum(0) cost_s = options['margin'] - (ls * lim).sum(axis=1) + (ls * lcim).sum(axis=1) cost_s = cost_s * (cost_s > 0.) cost_s = cost_s.sum(0) cost = cost_im + cost_s return [im, s, cim, cs], cost # build an encoder