我们从Python开源项目中,提取了以下32个代码示例,用于说明如何使用theano.tensor.mul()。
def rbf_kernel(X): XY = T.dot(X, X.T) x2 = T.sum(X**2, axis=1).dimshuffle(0, 'x') X2e = T.repeat(x2, X.shape[0], axis=1) H = X2e + X2e.T - 2. * XY V = H.flatten() # median distance h = T.switch(T.eq((V.shape[0] % 2), 0), # if even vector T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]), # if odd vector T.sort(V)[V.shape[0] // 2]) h = T.sqrt(.5 * h / T.log(H.shape[0].astype('float32') + 1.)) # compute the rbf kernel kxy = T.exp(-H / (h ** 2) / 2.0) dxkxy = -T.dot(kxy, X) sumkxy = T.sum(kxy, axis=1).dimshuffle(0, 'x') dxkxy = T.add(dxkxy, T.mul(X, sumkxy)) / (h ** 2) return kxy, dxkxy
def fit(self, weights, o_error, tpo ): gradients = T.grad(o_error ,weights) updates = [] for c, v, w, g in zip(self.t_cache, self.t_velocity, weights,gradients): new_velocity = T.sub( T.mul(tpo["momentum_rate"], v) , T.mul(tpo["learn_rate"], g) ) new_cache = T.add( T.mul(tpo["decay_rate"] , c) , T.mul(T.sub( 1, tpo["decay_rate"]) , T.sqr(g))) new_weights = T.sub(T.add(w , new_velocity) , T.true_div( T.mul(g,tpo["learn_rate"]) , T.sqrt(T.add(new_cache,0.1**8)))) updates.append((w, new_weights)) updates.append((v, new_velocity)) updates.append((c, new_cache)) return updates ###### Nesterov momentum ########################################
def sequence_iteration(self, output, mask,use_dropout=0,dropout_value=0.5): dot_product = T.dot(output , self.t_w_out) net_o = T.add( dot_product , self.t_b_out ) ex_net = T.exp(net_o) sum_net = T.sum(ex_net, axis=2, keepdims=True) softmax_o = ex_net / sum_net mask = T.addbroadcast(mask, 2) # to do nesseccary? output = T.mul(mask, softmax_o) + T.mul( (1. - mask) , 1e-6 ) return output #result ###### Linear Layer ########################################
def t_forward_step(self,mask, rzup_in_sig, h_pre, u_rz, u_up, t_n_out): #u_r, u_z, signal_act = self.activation gate_act = self.sigmoid() preact = T.dot( h_pre, u_rz) r = gate_act( T.add( rzup_in_sig[:, 0:t_n_out] , preact[:, 0:t_n_out] )) #T.dot( h_pre, u_r) ) ) z = gate_act( T.add( rzup_in_sig[:, t_n_out:2 * t_n_out] , preact[:, t_n_out:2 * t_n_out] )) #T.dot(h_pre, u_z) )) h_update = signal_act( T.add( rzup_in_sig[:, 2*t_n_out:3*t_n_out] , T.dot( T.mul( h_pre, r), u_up) )) h_new = T.add( (1.-z) * h_update , z * h_pre ) mask = T.addbroadcast(mask, 1) out_sig = T.add( mask * h_new , (1. - mask) * h_pre ) return out_sig
def test_recursive_lift(self): v = T.vector(dtype="float64") m = T.matrix(dtype="float64") out = ((v + 42) * (m + 84)).T g = FunctionGraph([v, m], [out]) init_str_g = ("[InplaceDimShuffle{1,0}(Elemwise{mul,no_inplace}" "(InplaceDimShuffle{x,0}(Elemwise{add,no_inplace}" "(<TensorType(float64, vector)>, " "InplaceDimShuffle{x}(TensorConstant{42}))), " "Elemwise{add,no_inplace}" "(<TensorType(float64, matrix)>, " "InplaceDimShuffle{x,x}(TensorConstant{84}))))]") self.assertTrue(str(g) == init_str_g) new_out = local_dimshuffle_lift.transform(g.outputs[0].owner)[0] new_g = FunctionGraph(g.inputs, [new_out]) opt_str_g = ("[Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}" "(InplaceDimShuffle{0,x}(<TensorType(float64, vector)>), " "InplaceDimShuffle{x,x}(TensorConstant{42})), " "Elemwise{add,no_inplace}(InplaceDimShuffle{1,0}" "(<TensorType(float64, matrix)>), " "InplaceDimShuffle{x,x}(TensorConstant{84})))]") self.assertTrue(str(new_g) == opt_str_g) # Check stacktrace was copied over correctly after opt was applied self.assertTrue(check_stack_trace(new_g, ops_to_check='all'))
def test_kording_bug(self): x, y = vectors('xy') eps = scalar('eps') s = scalar('s') #r = theano.tensor.mul(theano.tensor.fill(x, 2.*a), x/a , (y+z) , a) #r = theano.tensor.mul((x/a+y) , a, z) r = tensor.mul(s - 1, eps + x / s, eps + y / s, s) f = function([s, eps, x, y], r ** 2) s_val = numpy.asarray(4, dtype=config.floatX) eps_val = numpy.asarray(1.e-6, dtype=config.floatX) x_val = numpy.asarray([1.5, 2], dtype=config.floatX) y_val = numpy.asarray([2.3, 3.1], dtype=config.floatX) r0 = f(s_val, eps_val, x_val, y_val) r1 = f(s_val, eps_val, x_val, y_val) r2 = f(s_val, eps_val, x_val, y_val) assert numpy.all(r0 == r1) assert numpy.all(r0 == r2)
def test_mul(self): x = T.dmatrix() y = T.dmatrix() f = theano.function([x], T.mul(x), mode=self.mode) vx = numpy.random.rand(5, 4) vy = numpy.random.rand(5, 4) f(vx) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert topo[0].op == deep_copy_op f2 = theano.function([x, y], T.mul(x, y), mode=self.mode) assert numpy.all(f2(vx, vy) == vx * vy) topo2 = f2.maker.fgraph.toposort() assert len(topo2) == 1 assert isinstance(topo2[0].op, T.Elemwise) assert isinstance(topo2[0].op.scalar_op, theano.scalar.Mul)
def test_local_one_plus_erf(self): val = numpy.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30], dtype=config.floatX) x = T.vector() f = theano.function([x], 1 + T.erf(x), mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [ T.mul, T.erfc], f.maker.fgraph.toposort() f(val) f = theano.function([x], T.erf(x) + 1, mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [ T.mul, T.erfc], f.maker.fgraph.toposort() f(val) f = theano.function([x], T.erf(x) + 2, mode=self.mode) topo = f.maker.fgraph.toposort() assert len(topo) == 2 assert topo[0].op == T.erf assert isinstance(topo[1].op, T.Elemwise) assert isinstance(topo[1].op.scalar_op, scal.Add) f(val)
def test_local_erf_minus_one(self): val = numpy.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30], dtype=config.floatX) x = T.vector() f = theano.function([x], T.erf(x) - 1, mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc, T.mul] print(f(val)) f = theano.function([x], T.erf(x) + (-1), mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc, T.mul] print(f(val)) f = theano.function([x], -1 + T.erf(x), mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc, T.mul] print(f(val)) f = theano.function([x], T.erf(x) - 2, mode=self.mode) topo = f.maker.fgraph.toposort() assert len(topo) == 2 assert topo[0].op == T.erf assert isinstance(topo[1].op, T.Elemwise) assert isinstance(topo[1].op.scalar_op, scal.Add)\ or isinstance(topo[1].op.scalar_op, scal.Sub) print(f(val))
def create_attention(self, gru_con, in_con_mask, condition, batch_size, n_hidden_con, **kwargs): # (batch_size, n_attention) gru_cond2 = non_flattening_dense_layer( gru_con, self.in_con_mask, self.n_attention, nonlinearity=None) gru_que2 = DenseLayer(condition, self.n_attention, nonlinearity=None) gru_que2 = dimshuffle(gru_que2, (0, 'x', 1)) att = ElemwiseSumLayer([gru_cond2, gru_que2]) att = NonlinearityLayer(att, T.tanh) att = SliceLayer(non_flattening_dense_layer( att, self.in_con_mask, 1, nonlinearity=None), indices=0, axis=2) att_softmax = SequenceSoftmax(att, self.in_con_mask) rep = ElemwiseMergeLayer( [ForgetSizeLayer(dimshuffle(att_softmax, (0, 1, 'x'))), gru_con], T.mul) return ExpressionLayer(rep, lambda x: T.sum(x, axis=1), lambda s: (s[0],) + s[2:])
def SlopeLinInv(slope): """ Truncated linear unit :param slope: slope of negative quadrant :return: x if x > 0 else x*slope """ import theano.tensor as T def inner(x): return T.switch(T.gt(x, 0), x, T.mul(x, slope)) return inner
def op_matmul(s_x_, s_y_, axes_=(-2, -1)): ''' limited implementation of np.matmul, does not support broadcasting Args: s_x_: (batch of) matrix(matrices) s_y_: (batch of) matrix(matrices) axes_: tuple of int, the axes for the matrix ''' assert s_x_.ndim == s_y_.ndim ndim = s_x_.ndim assert -ndim <= axes_[0] < ndim assert -ndim <= axes_[1] < ndim assert ndim >= 2 axes = axes_[0]%ndim, axes_[1]%ndim if ndim == 2: if axes == (0,1): return T.dot(s_x_, s_y_) else: return T.dot(s_y_, s_x_) s_shp = T.shape(s_x_) s_size = reduce(T.mul, [s_shp[i] for i in range(s_x_.ndim) if i not in axes]) s_szu = s_shp[axes[0]] s_szv = s_shp[axes[1]] s_szw = T.shape(s_y_)[axes[1]] transpp = list(range(ndim)) transpp[axes[0]], transpp[ndim-2] = transpp[ndim-2], transpp[axes[0]] transpp[axes[1]], transpp[ndim-1] = transpp[ndim-1], transpp[axes[1]] s_shp2 = [s_shp[a] for a in transpp] s_shp2[axes[1]] = s_szw s_x = s_x_.transpose(*transpp).reshape((s_size, s_szu, s_szv)) s_y = s_y_.transpose(*transpp).reshape((s_size, s_szv, s_szw)) return T.batched_dot(s_x, s_y).reshape(s_shp2).transpose(transpp)
def _get_normalised_relevance_layer(self, layer, feeder): def add_epsilon(Zs): tmp = (T.cast(Zs >= 0, theano.config.floatX)*2.0 - 1.0) return Zs + self.epsilon * tmp if isinstance(layer, L.DenseLayer): forward_layer = L.DenseLayer(layer.input_layer, layer.num_units, W=layer.W, b=layer.b, nonlinearity=None) elif isinstance(layer, L.Conv2DLayer): forward_layer = L.Conv2DLayer(layer.input_layer, num_filters=layer.num_filters, W=layer.W, b=layer.b, stride=layer.stride, filter_size=layer.filter_size, flip_filters=layer.flip_filters, untie_biases=layer.untie_biases, pad=layer.pad, nonlinearity=None) else: raise NotImplementedError() forward_layer = L.ExpressionLayer(forward_layer, lambda x: 1.0 / add_epsilon(x)) feeder = L.ElemwiseMergeLayer([forward_layer, feeder], merge_function=T.mul) return feeder
def _invert_DenseLayer(self,layer,feeder): # Warning they are swapped here feeder = self._put_rectifiers(feeder, layer) feeder = self._get_normalised_relevance_layer(layer, feeder) output_units = np.prod(L.get_output_shape(layer.input_layer)[1:]) output_layer = L.DenseLayer(feeder, num_units=output_units) W = output_layer.W tmp_shape = np.asarray((-1,)+L.get_output_shape(output_layer)[1:]) x_layer = L.ReshapeLayer(layer.input_layer, tmp_shape.tolist()) output_layer = L.ElemwiseMergeLayer(incomings=[x_layer, output_layer], merge_function=T.mul) output_layer.W = W return output_layer
def output_error(self, input_sequence, true_output, mask): outputs = T.pow(true_output - input_sequence, 2) outputs = T.sum(outputs, axis=2) / outputs.shape[2] outputs = T.mul(outputs.dimshuffle(0,1,'x'), mask) return T.sum(outputs) / T.sum(mask) ###### 2-class weightes cross entropy ########################################
def output_error(self, input_sequence, true_output, mask): outputs = self._w_crossentropy(input_sequence, true_output) #outputs = T.mul(outputs.dimshuffle(0,1,'x'), mask) #todo correct mask implementation? influence on result? return T.sum(outputs) / T.sum(mask) ###### Standard cross entropy ########################################
def output_error(self, input_sequence, true_output, mask): outputs = T.nnet.categorical_crossentropy(input_sequence, true_output) outputs = T.mul(outputs.dimshuffle(0,1,'x'), mask) return T.sum(outputs) / T.sum(mask)
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ig_c, w_fg_c, w_og_c, w_ifco, b_ifco, t_n_out): ifco = T.add(T.dot(pre_out_sig, w_ifco), b_ifco) inner_act = self.activation gate_act = self.sigmoid() # Input Gate ig_t1 = gate_act(T.add(ifco[:, 0:t_n_out], T.mul(pre_cell_sig, w_ig_c), cur_w_in_sig[:, 0:t_n_out])) # Forget Gate fg_t1 = gate_act(T.add(ifco[:, 1 * t_n_out:2 * t_n_out], T.mul(pre_cell_sig, w_fg_c), cur_w_in_sig[:, 1 * t_n_out:2 * t_n_out])) # Cell State cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act( T.add(ifco[:, 2 * t_n_out:3 * t_n_out], cur_w_in_sig[:, 2 * t_n_out:3 * t_n_out])))) mask = T.addbroadcast(mask, 1) cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig # functionality: cs_t1 = T.switch(mask , cs_t1, pre_cell_sig) # Output Gate og_t1 = gate_act( T.add(ifco[:, 3 * t_n_out:4 * t_n_out], T.mul(cs_t1, w_og_c), cur_w_in_sig[:, 3 * t_n_out:4 * t_n_out])) # Output LSTM out_sig = T.mul(og_t1, inner_act(cs_t1)) out_sig = mask * out_sig + (1. - mask) * pre_out_sig return [out_sig, cs_t1]
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco, t_n_out): ifco = T.add(T.dot(pre_out_sig, w_ifco), b_ifco) inner_act = self.activation gate_act = self.sigmoid() # Input Gate ig_t1 = gate_act(T.add(ifco[:, 0:t_n_out], cur_w_in_sig[:, 0:t_n_out])) # Forget Gate fg_t1 = gate_act(T.add(ifco[:, 1 * t_n_out:2 * t_n_out], cur_w_in_sig[:, 1 * t_n_out:2 * t_n_out])) # Cell State cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act( T.add(ifco[:, 2 * t_n_out:3 * t_n_out], cur_w_in_sig[:, 2 * t_n_out:3 * t_n_out])))) mask = T.addbroadcast(mask, 1) cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig # functionality: cs_t1 = T.switch(mask , cs_t1, pre_cell_sig) # Output Gate og_t1 = gate_act( T.add(ifco[:, 3 * t_n_out:4 * t_n_out], cur_w_in_sig[:, 3 * t_n_out:4 * t_n_out])) # Output LSTM out_sig = T.mul(og_t1, inner_act(cs_t1)) out_sig = mask * out_sig + (1. - mask) * pre_out_sig return [out_sig, cs_t1]
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ig_c, w_fg_c, w_og_c, w_ifco, b_ifco, ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3, t_n_out): cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b1, ln_s1) pre_w_out_sig = T.dot(pre_out_sig, w_ifco) pre_w_out_sig_ln = self.ln(pre_w_out_sig, ln_b2, ln_s2) preact = T.add(cur_w_in_sig_ln, pre_w_out_sig_ln, b_ifco) inner_act = self.activation # T.nnet.hard_sigmoid T.tanh gate_act = self.sigmoid() # T.nnet.hard_sigmoid # Input Gate ig_t1 = gate_act(T.add(preact[:, 0:t_n_out], T.mul(pre_cell_sig, w_ig_c))) # Forget Gate fg_t1 = gate_act(T.add(preact[:, 1 * t_n_out:2 * t_n_out], T.mul(pre_cell_sig, w_fg_c),)) # Cell State cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act( T.add(preact[:, 2 * t_n_out:3 * t_n_out])))) mask = T.addbroadcast(mask, 1) cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig # functionality: cs_t1 = T.switch(mask , cs_t1, pre_cell_sig) cs_t1_ln = self.ln(cs_t1, ln_b3, ln_s3) # Output Gate og_t1 = gate_act( T.add(preact[:, 3 * t_n_out:4 * t_n_out], T.mul(cs_t1_ln, w_og_c))) # Output LSTM out_sig = T.mul(og_t1, inner_act(cs_t1_ln)) out_sig = mask * out_sig + (1. - mask) * pre_out_sig return [out_sig, cs_t1]
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3, t_n_out): cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b1, ln_s1) pre_w_out_sig = T.dot(pre_out_sig, w_ifco) pre_w_out_sig_ln = self.ln(pre_w_out_sig, ln_b2, ln_s2) preact = T.add(cur_w_in_sig_ln, pre_w_out_sig_ln, b_ifco) inner_act = self.activation # T.nnet.hard_sigmoid #T.tanh # T.nnet.hard_sigmoid T.tanh gate_act = self.sigmoid() # T.nnet.hard_sigmoid #T.nnet.sigmoid # Input Gate ig_t1 = gate_act(preact[:, 0:t_n_out]) # Forget Gate fg_t1 = gate_act(preact[:, 1 * t_n_out:2 * t_n_out]) # Cell State cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(preact[:, 2 * t_n_out:3 * t_n_out]))) mask = T.addbroadcast(mask, 1) cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig cs_t1_ln = self.ln(cs_t1, ln_b3, ln_s3) # Output Gate og_t1 = gate_act(preact[:, 3 * t_n_out:4 * t_n_out]) # Output LSTM out_sig = T.mul(og_t1, inner_act(cs_t1_ln)) out_sig = mask * out_sig + (1. - mask) * pre_out_sig return [out_sig, cs_t1]
def build_sampler(self, mu_layer, logsd_layer, noise_input_layer): sigma_layer = lasagne.layers.ExpressionLayer(logsd_layer, lambda x: T.exp(0.5*x), output_shape='auto') noise_layer = lasagne.layers.ElemwiseMergeLayer( [sigma_layer, noise_input_layer], T.mul) z_layer = lasagne.layers.ElemwiseSumLayer( [mu_layer, noise_layer], [1, 1]) return z_layer
def get_output_for(self, inputs, **kwargs): img = inputs[0] att = inputs[1].dimshuffle(0, 'x', 1, 2) output = T.mul(img, att) return output
def test_local_mul_specialize(): mode = theano.config.mode if mode == 'FAST_COMPILE': mode = 'FAST_RUN' mode = compile.mode.get_mode(mode) mode = mode.excluding('fusion') v = T.vector() m = T.vector() f = function([v], v * 1, mode=mode) nodes = [node.op for node in f.maker.fgraph.toposort()] nodes == [deep_copy_op] f = function([v], v * 0, mode=mode) nodes = [node.op for node in f.maker.fgraph.toposort()] assert nodes == [Shape_i(0), T.alloc] f = function([v], v * (-1), mode=mode) nodes = [node.op for node in f.maker.fgraph.toposort()] assert nodes == [T.neg] f = function([v, m], v * 1 * (-m), mode=mode) nodes = [node.op for node in f.maker.fgraph.toposort()] assert nodes == [T.mul] f = function([v, m], v * 0 * (-m), mode=mode) nodes = [node.op for node in f.maker.fgraph.toposort()] assert nodes == [Shape_i(0), T.alloc] f = function([v, m], v * (-1) * (-m), mode=mode) nodes = [node.op for node in f.maker.fgraph.toposort()] assert nodes == [T.mul] f = function([v, m], v * (-1) * m, mode=mode) nodes = [node.op for node in f.maker.fgraph.toposort()] assert nodes == [T.mul]
def test_elemwise(self): # float Ops mats = theano.tensor.matrices('cabxy') c, a, b, x, y = mats s1 = T.switch(c, a, b) s2 = T.switch(c, x, y) for op in (T.add, T.sub, T.mul, T.true_div, T.int_div, T.floor_div, T.minimum, T.maximum, T.gt, T.lt, T.ge, T.le, T.eq, T.neq, T.pow): g = optimize(FunctionGraph(mats, [op(s1, s2)])) assert str(g).count('Switch') == 1 # integer Ops mats = theano.tensor.imatrices('cabxy') c, a, b, x, y = mats s1 = T.switch(c, a, b) s2 = T.switch(c, x, y) for op in (T.and_, T.or_, T.xor, T.bitwise_and, T.bitwise_or, T.bitwise_xor): g = optimize(FunctionGraph(mats, [op(s1, s2)])) assert str(g).count('Switch') == 1 # add/mul with more than two inputs u, v = theano.tensor.matrices('uv') s3 = T.switch(c, u, v) for op in (T.add, T.mul): g = optimize(FunctionGraph(mats + [u, v], [op(s1, s2, s3)])) assert str(g).count('Switch') == 1
def apply_mask(x, mask): ''' x : 3D tensor mask : 2D tensor Example ------- >>> Input: [128, 500, 120] >>> Mask: [1, 1, 0] >>> Output: [128, 500, 0] ''' return T.mul(x, expand_dims(mask, -1))
def _invert_Conv2DLayer(self,layer,feeder): # Warning they are swapped here feeder = self._put_rectifiers(feeder,layer) feeder = self._get_normalised_relevance_layer(layer,feeder) f_s = layer.filter_size if layer.pad == 'same': pad = 'same' elif layer.pad == 'valid' or layer.pad == (0, 0): pad = 'full' else: raise RuntimeError("Define your padding as full or same.") # By definition the # Flip filters must be on to be a proper deconvolution. num_filters = L.get_output_shape(layer.input_layer)[1] if layer.stride == (4,4): # Todo: similar code gradient based explainers. Merge. feeder = L.Upscale2DLayer(feeder, layer.stride, mode='dilate') output_layer = L.Conv2DLayer(feeder, num_filters=num_filters, filter_size=f_s, stride=1, pad=pad, nonlinearity=None, b=None, flip_filters=True) conv_layer = output_layer tmp = L.SliceLayer(output_layer, slice(0, -3), axis=3) output_layer = L.SliceLayer(tmp, slice(0, -3), axis=2) output_layer.W = conv_layer.W else: output_layer = L.Conv2DLayer(feeder, num_filters=num_filters, filter_size=f_s, stride=1, pad=pad, nonlinearity=None, b=None, flip_filters=True) W = output_layer.W # Do the multiplication. x_layer = L.ReshapeLayer(layer.input_layer, (-1,)+L.get_output_shape(output_layer)[1:]) output_layer = L.ElemwiseMergeLayer(incomings=[x_layer, output_layer], merge_function=T.mul) output_layer.W = W return output_layer
def build(self): """build the model. This method should be called after self.add_data. """ x_sym = sparse.csr_matrix('x', dtype = 'float32') y_sym = T.imatrix('y') g_sym = T.imatrix('g') gy_sym = T.vector('gy') ind_sym = T.ivector('ind') l_x_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = x_sym) l_g_in = lasagne.layers.InputLayer(shape = (None, 2), input_var = g_sym) l_ind_in = lasagne.layers.InputLayer(shape = (None, ), input_var = ind_sym) l_gy_in = lasagne.layers.InputLayer(shape = (None, ), input_var = gy_sym) num_ver = max(self.graph.keys()) + 1 l_emb_in = lasagne.layers.SliceLayer(l_g_in, indices = 0, axis = 1) l_emb_in = lasagne.layers.EmbeddingLayer(l_emb_in, input_size = num_ver, output_size = self.embedding_size) l_emb_out = lasagne.layers.SliceLayer(l_g_in, indices = 1, axis = 1) if self.neg_samp > 0: l_emb_out = lasagne.layers.EmbeddingLayer(l_emb_out, input_size = num_ver, output_size = self.embedding_size) l_emd_f = lasagne.layers.EmbeddingLayer(l_ind_in, input_size = num_ver, output_size = self.embedding_size, W = l_emb_in.W) l_x_hid = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) if self.use_feature: l_emd_f = layers.DenseLayer(l_emd_f, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) l_y = lasagne.layers.ConcatLayer([l_x_hid, l_emd_f], axis = 1) l_y = layers.DenseLayer(l_y, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) else: l_y = layers.DenseLayer(l_emd_f, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) py_sym = lasagne.layers.get_output(l_y) loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean() if self.layer_loss and self.use_feature: hid_sym = lasagne.layers.get_output(l_x_hid) loss += lasagne.objectives.categorical_crossentropy(hid_sym, y_sym).mean() emd_sym = lasagne.layers.get_output(l_emd_f) loss += lasagne.objectives.categorical_crossentropy(emd_sym, y_sym).mean() if self.neg_samp == 0: l_gy = layers.DenseLayer(l_emb_in, num_ver, nonlinearity = lasagne.nonlinearities.softmax) pgy_sym = lasagne.layers.get_output(l_gy) g_loss = lasagne.objectives.categorical_crossentropy(pgy_sym, lasagne.layers.get_output(l_emb_out)).sum() else: l_gy = lasagne.layers.ElemwiseMergeLayer([l_emb_in, l_emb_out], T.mul) pgy_sym = lasagne.layers.get_output(l_gy) g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis = 1) * gy_sym)).sum() params = [l_emd_f.W, l_emd_f.b, l_x_hid.W, l_x_hid.b, l_y.W, l_y.b] if self.use_feature else [l_y.W, l_y.b] if self.update_emb: params = lasagne.layers.get_all_params(l_y) updates = lasagne.updates.sgd(loss, params, learning_rate = self.learning_rate) self.train_fn = theano.function([x_sym, y_sym, ind_sym], loss, updates = updates, on_unused_input = 'ignore') self.test_fn = theano.function([x_sym, ind_sym], py_sym, on_unused_input = 'ignore') self.l = [l_gy, l_y] g_params = lasagne.layers.get_all_params(l_gy, trainable = True) g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate = self.g_learning_rate) self.g_fn = theano.function([g_sym, gy_sym], g_loss, updates = g_updates, on_unused_input = 'ignore')
def build(self): """build the model. This method should be called after self.add_data. """ x_sym = sparse.csr_matrix('x', dtype = 'float32') self.x_sym = x_sym y_sym = T.imatrix('y') gx_sym = sparse.csr_matrix('gx', dtype = 'float32') gy_sym = T.ivector('gy') gz_sym = T.vector('gz') l_x_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = x_sym) l_gx_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = gx_sym) l_gy_in = lasagne.layers.InputLayer(shape = (None, ), input_var = gy_sym) l_x_1 = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) l_x_2 = layers.SparseLayer(l_x_in, self.embedding_size) W = l_x_2.W l_x_2 = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) if self.use_feature: l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis = 1) l_x = layers.DenseLayer(l_x, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) else: l_x = l_x_2 l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W = W) if self.neg_samp > 0: l_gy = lasagne.layers.EmbeddingLayer(l_gy_in, input_size = self.num_ver, output_size = self.embedding_size) l_gx = lasagne.layers.ElemwiseMergeLayer([l_gx, l_gy], T.mul) pgy_sym = lasagne.layers.get_output(l_gx) g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis = 1) * gz_sym)).sum() else: l_gx = lasagne.layers.DenseLayer(l_gx, self.num_ver, nonlinearity = lasagne.nonlinearities.softmax) pgy_sym = lasagne.layers.get_output(l_gx) g_loss = lasagne.objectives.categorical_crossentropy(pgy_sym, gy_sym).sum() self.l = [l_x, l_gx] py_sym = lasagne.layers.get_output(l_x) loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean() if self.layer_loss and self.use_feature: hid_sym = lasagne.layers.get_output(l_x_1) loss += lasagne.objectives.categorical_crossentropy(hid_sym, y_sym).mean() emd_sym = lasagne.layers.get_output(l_x_2) loss += lasagne.objectives.categorical_crossentropy(emd_sym, y_sym).mean() params = [l_x_1.W, l_x_1.b, l_x_2.W, l_x_2.b, l_x.W, l_x.b] if self.use_feature else [l_x.W, l_x.b] if self.update_emb: params = lasagne.layers.get_all_params(l_x) updates = lasagne.updates.sgd(loss, params, learning_rate = self.learning_rate) self.train_fn = theano.function([x_sym, y_sym], loss, updates = updates) g_params = lasagne.layers.get_all_params(l_gx) g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate = self.g_learning_rate) self.g_fn = theano.function([gx_sym, gy_sym, gz_sym], g_loss, updates = g_updates, on_unused_input = 'ignore') self.test_fn = theano.function([x_sym], py_sym)
def test_local_fill_useless(): # Test opt local_fill_cut x = dvector() y = dvector() z = lvector() m = dmatrix() x_ = numpy.random.rand(5,) y_ = numpy.random.rand(5,) z_ = (numpy.random.rand(5,) * 5).astype("int64") m_ = numpy.random.rand(5, 5) # basic case f = function([x], T.fill(x, x) * 2, mode=mode_opt) assert [node.op for node in f.maker.fgraph.toposort()] == [T.mul] f(x_) # basic case f = function([x, y], T.second(y, x) * 2, mode=mode_opt) assert [node.op for node in f.maker.fgraph.toposort()] == [T.mul] f(x_, y_) # basic case f = function([x, y], T.fill(x, y) * 2, mode=mode_opt) assert [node.op for node in f.maker.fgraph.toposort()] == [T.mul] f(x_, y_) # now with different type(cast) f = function([x, z], T.fill(z, x) * 2, mode=mode_opt) assert [node.op for node in f.maker.fgraph.toposort()] == [T.mul] f(x_, z_) # now with different type(cast) f = function([x, z], T.fill(x, z) * 2, mode=mode_opt) assert [node.op for node in f.maker.fgraph.toposort()] == [T.mul] f(x_, z_) # now cutting out the input ?? f = function([x, y], T.fill(x, y) * 2, mode=mode_opt) assert [node.op for node in f.maker.fgraph.toposort()] == [T.mul] f(x_, y_) # Test with different number of dimensions # The fill is not useless, so it should stay f = function([m, x], T.fill(m, x) * 2, mode=mode_opt) ops = [node.op.__class__ for node in f.maker.fgraph.toposort()] assert T.Alloc in ops f(m_, x_)
def test_gemm_canonicalize(): X, Y, Z, a, b = T.matrix('X'), T.matrix('Y'), T.matrix('Z'), T.scalar( 'a'), T.scalar('b') R, S, U, c, d = T.matrix('R'), T.matrix('S'), T.matrix('U'), T.scalar( 'c'), T.scalar('d') u = T.row('u') v = T.vector('v') w = T.col('w') can = [] _gemm_canonicalize(X + Y + Z, 1.0, can, 0) assert can == [(1.0, X), (1.0, Y), (1.0, Z)] can = [] _gemm_canonicalize(X + Y + u, 1.0, can, 0) assert can == [(1.0, X), (1.0, Y), (1.0, u)], can can = [] _gemm_canonicalize(X + Y + v, 1.0, can, 0) # [(1.0, X), (1.0, Y), (1.0, InplaceDimShuffle{x,0}(v))] assert can[:2] == [(1.0, X), (1.0, Y)] assert isinstance(can[2], tuple) assert len(can[2]) == 2 assert can[2][0] == 1.0 assert can[2][1].owner assert isinstance(can[2][1].owner.op, T.DimShuffle) assert can[2][1].owner.inputs == [v] can = [] _gemm_canonicalize(X + Y + w, 1.0, can, 0) assert can == [(1.0, X), (1.0, Y), (1.0, w)], can can = [] _gemm_canonicalize(a * X + Y - b * Z * c, 1.0, can, 0) assert can[0] == (a, X) assert can[1] == (1.0, Y) assert can[2][0].owner.op == T.mul assert can[2][0].owner.inputs[0].owner.op == T.neg assert can[2][0].owner.inputs[0].owner.inputs[0] == c assert can[2][0].owner.inputs[1] == b can = [] _gemm_canonicalize((-d) * X - (a * X + Y - b * Z * c), 1.0, can, 0) # print can assert can[0][0].owner.op == T.neg assert can[0][0].owner.inputs[0] == d assert can[0][1] == X assert can[1][0].owner.op == T.neg assert can[1][0].owner.inputs[0] == a assert can[2] == (-1.0, Y) assert can[3][0].owner.op == T.mul assert can[3][0].owner.inputs == [c, b]
def test_local_dot22_to_dot22scalar(): """ This test that the bug in gh-1507 is really fixed """ A = T.dmatrix() mode = theano.compile.mode.get_default_mode() opt = theano.tensor.opt.in2out( theano.tensor.blas.local_dot22_to_dot22scalar) mode = mode.__class__(optimizer=opt) x = T.dscalar() y = T.dscalar() z = T.dscalar() # make sure to don't have dimshuffle as we don't opt those cases m = T.dmatrix() r = T.drow() for idx, node in enumerate([ # Old working cases T.mul(_dot22(A, A), x), T.mul(_dot22(A, A), x, y), T.mul(_dot22(A, A), x, r), T.mul(_dot22(A, A), m, x), T.mul(_dot22(A, A), x, m), T.mul(_dot22(A, A), x, (m * y)), T.mul(_dot22(A, A), (m * y), x), T.mul(_dot22(A, A), x, (r * y)), T.mul(_dot22(A, A), (r * y), x), T.mul(_dot22(A, A), (x * y), (m * x)), T.mul(_dot22(A, A), (r * y), (y * x)), # Case that was raising an assert that is fixed in gh-1507 T.mul(_dot22(A, A), (m * y), m), T.mul(_dot22(A, A), m, (m * y)), T.mul(_dot22(A, A), (r * y), (m * x)), # assert fixed in gh-1507 and opt case added in gh-1515 T.mul(_dot22(A, A), (m * y * z), m), T.mul(_dot22(A, A), m, (m * y * z)), # Opt case added in gh-1515 T.mul(_dot22(A, A), T.mul(m, y, z), m), T.mul(_dot22(A, A), m, T.mul(m, y, z)), # Case that opt later in gh-1515 T.mul(_dot22(A, A), (r * m), (m * x)), ]): node2 = theano.tensor.blas.local_dot22_to_dot22scalar.transform( node.owner) assert node2 f = theano.function([x, y, z, m, r, A], node, mode=mode, on_unused_input='ignore') f(.1, .2, .3, [[1, 2], [3, 4]], [[5, 6]], [[7, 8], [9, 10]])