我们从Python开源项目中,提取了以下38个代码示例,用于说明如何使用theano.tensor.add()。
def rbf_kernel(X): XY = T.dot(X, X.T) x2 = T.sum(X**2, axis=1).dimshuffle(0, 'x') X2e = T.repeat(x2, X.shape[0], axis=1) H = X2e + X2e.T - 2. * XY V = H.flatten() # median distance h = T.switch(T.eq((V.shape[0] % 2), 0), # if even vector T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]), # if odd vector T.sort(V)[V.shape[0] // 2]) h = T.sqrt(.5 * h / T.log(H.shape[0].astype('float32') + 1.)) # compute the rbf kernel kxy = T.exp(-H / (h ** 2) / 2.0) dxkxy = -T.dot(kxy, X) sumkxy = T.sum(kxy, axis=1).dimshuffle(0, 'x') dxkxy = T.add(dxkxy, T.mul(X, sumkxy)) / (h ** 2) return kxy, dxkxy
def fit(self, weights, o_error, tpo ): gradients = T.grad(o_error ,weights) updates = [] for c, v, w, g in zip(self.t_cache, self.t_velocity, weights,gradients): new_velocity = T.sub( T.mul(tpo["momentum_rate"], v) , T.mul(tpo["learn_rate"], g) ) new_cache = T.add( T.mul(tpo["decay_rate"] , c) , T.mul(T.sub( 1, tpo["decay_rate"]) , T.sqr(g))) new_weights = T.sub(T.add(w , new_velocity) , T.true_div( T.mul(g,tpo["learn_rate"]) , T.sqrt(T.add(new_cache,0.1**8)))) updates.append((w, new_weights)) updates.append((v, new_velocity)) updates.append((c, new_cache)) return updates ###### Nesterov momentum ########################################
def sequence_iteration(self, output, mask,use_dropout=0,dropout_value=0.5): dot_product = T.dot(output , self.t_w_out) net_o = T.add( dot_product , self.t_b_out ) ex_net = T.exp(net_o) sum_net = T.sum(ex_net, axis=2, keepdims=True) softmax_o = ex_net / sum_net mask = T.addbroadcast(mask, 2) # to do nesseccary? output = T.mul(mask, softmax_o) + T.mul( (1. - mask) , 1e-6 ) return output #result ###### Linear Layer ########################################
def t_forward_step(self,mask, rzup_in_sig, h_pre, u_rz, u_up, t_n_out): #u_r, u_z, signal_act = self.activation gate_act = self.sigmoid() preact = T.dot( h_pre, u_rz) r = gate_act( T.add( rzup_in_sig[:, 0:t_n_out] , preact[:, 0:t_n_out] )) #T.dot( h_pre, u_r) ) ) z = gate_act( T.add( rzup_in_sig[:, t_n_out:2 * t_n_out] , preact[:, t_n_out:2 * t_n_out] )) #T.dot(h_pre, u_z) )) h_update = signal_act( T.add( rzup_in_sig[:, 2*t_n_out:3*t_n_out] , T.dot( T.mul( h_pre, r), u_up) )) h_new = T.add( (1.-z) * h_update , z * h_pre ) mask = T.addbroadcast(mask, 1) out_sig = T.add( mask * h_new , (1. - mask) * h_pre ) return out_sig
def sequence_iteration(self, in_seq, mask, use_dropout,dropout_value=1): in_seq_d = T.switch(use_dropout, (in_seq * self.trng.binomial(in_seq.shape, p=dropout_value, n=1, dtype=in_seq.dtype)), in_seq) rz_in_seq = T.add( T.dot(in_seq_d, self.weights[0]) , self.weights[1] ) out_seq, updates = theano.scan( fn=self.t_forward_step, sequences=[mask, rz_in_seq], # in_seq_d], outputs_info=[self.t_ol_t00], non_sequences=[i for i in self.weights][2:] + [self.t_n_out], go_backwards = self.go_backwards, truncate_gradient=-1, #n_steps=50, strict=True, allow_gc=False, ) return out_seq
def test_recursive_lift(self): v = T.vector(dtype="float64") m = T.matrix(dtype="float64") out = ((v + 42) * (m + 84)).T g = FunctionGraph([v, m], [out]) init_str_g = ("[InplaceDimShuffle{1,0}(Elemwise{mul,no_inplace}" "(InplaceDimShuffle{x,0}(Elemwise{add,no_inplace}" "(<TensorType(float64, vector)>, " "InplaceDimShuffle{x}(TensorConstant{42}))), " "Elemwise{add,no_inplace}" "(<TensorType(float64, matrix)>, " "InplaceDimShuffle{x,x}(TensorConstant{84}))))]") self.assertTrue(str(g) == init_str_g) new_out = local_dimshuffle_lift.transform(g.outputs[0].owner)[0] new_g = FunctionGraph(g.inputs, [new_out]) opt_str_g = ("[Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}" "(InplaceDimShuffle{0,x}(<TensorType(float64, vector)>), " "InplaceDimShuffle{x,x}(TensorConstant{42})), " "Elemwise{add,no_inplace}(InplaceDimShuffle{1,0}" "(<TensorType(float64, matrix)>), " "InplaceDimShuffle{x,x}(TensorConstant{84})))]") self.assertTrue(str(new_g) == opt_str_g) # Check stacktrace was copied over correctly after opt was applied self.assertTrue(check_stack_trace(new_g, ops_to_check='all'))
def test1(self): # basic test that the optimization work with scalar broadcasted x = tensor.matrix('x') y = tensor.scalar('y') z = tensor.matrix('z') f = function([x, y, z], tensor.exp(x + y + z)[0], mode=mode_opt) # Check stacktrace was copied over correctly after opt was applied self.assertTrue(check_stack_trace(f, ops_to_check=[ Subtensor, tensor.DimShuffle])) prog = f.maker.fgraph.toposort() assert isinstance(prog[0].op, tensor.Subtensor) assert isinstance(prog[1].op, tensor.DimShuffle) assert isinstance(prog[2].op, tensor.Subtensor) assert isinstance(prog[3].op.scalar_op, theano.scalar. Composite) # Composite{add,add} assert len(prog) == 4 f([[0, 1], [2, 3]], 4, [[4, 5], [6, 7]]) # let debugmode test something
def test2(self): # as 1, but take a slice x = tensor.matrix('x') y = tensor.scalar('y') z = tensor.matrix('z') f = function([x, y, z], tensor.exp(x + y + z)[0:2], mode=mode_opt) # Check stacktrace was copied over correctly after opt was applied self.assertTrue(check_stack_trace(f, ops_to_check=[ Subtensor, tensor.DimShuffle])) prog = f.maker.fgraph.toposort() assert isinstance(prog[0].op, tensor.Subtensor) assert isinstance(prog[1].op, tensor.DimShuffle) assert isinstance(prog[2].op, tensor.Subtensor) assert isinstance(prog[3].op.scalar_op, theano.scalar. Composite) # Composite{add,add} assert len(prog) == 4 f([[0, 1], [2, 3]], 4, [[4, 5], [6, 7]]) # let debugmode test something
def test4(self): # basic test that the optimization doesn't work with broadcasting # ... It *could* be extended to, # ... but right now it doesn't, so it shouldn't try. x = tensor.matrix('x') y = tensor.vector('y') f = function([x, y], tensor.exp(x + y)[0], mode=mode_opt) # Opt doesn't apply, so no need for check_stack_trace # self.assertTrue(check_stack_trace(f, ops_to_check='all')) prog = f.maker.fgraph.toposort() assert isinstance(prog[0].op, tensor.DimShuffle) assert prog[1].op == tensor.add assert isinstance(prog[2].op, tensor.Subtensor) # first subtensor assert prog[3].op == inplace.exp_inplace assert len(prog) == 4 f([[0, 1], [2, 3]], [4, 5]) # let debugmode test something
def test5(self): # test that we don't lift when we reuse the output of the # elemwise for other computation. x = tensor.matrix('x') y = tensor.vector('y') f = function([x, y], [tensor.exp(x + y)[0], tensor.exp(x + y) + x], mode=mode_opt) # Opt doesn't apply, so no need for check_stack_trace # self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor)) prog = f.maker.fgraph.toposort() assert isinstance(prog[0].op, tensor.DimShuffle) assert isinstance(prog[1].op.scalar_op, theano.scalar. Composite) # Composite{add,exp} assert prog[2].op == tensor.add assert isinstance(prog[3].op, tensor.Subtensor) # first subtensor assert len(prog) == 4 f([[0, 1], [2, 3]], [4, 5]) # let debugmode test something
def test_add(self): x = T.dmatrix() y = T.dmatrix() f = theano.function([x], T.add(x), mode=self.mode) vx = numpy.random.rand(5, 4) vy = numpy.random.rand(5, 4) f(vx) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert topo[0].op == deep_copy_op f2 = theano.function([x, y], T.add(x, y), mode=self.mode) assert numpy.all(f2(vx, vy) == vx + vy) topo2 = f2.maker.fgraph.toposort() assert len(topo2) == 1 assert isinstance(topo2[0].op, T.Elemwise) assert isinstance(topo2[0].op.scalar_op, theano.scalar.Add)
def test_local_add_specialize(): # test of non-zero dimension a = tensor.vector() s = tensor.add(tensor.zeros_like(a)) assert local_add_specialize.transform(s.owner) # test of 0-d a = tensor.scalar() s = tensor.add(tensor.zeros_like(a)) assert local_add_specialize.transform(s.owner) # Test when the 0 input is forcing upcasting a = tensor.constant(0, dtype='int64') b = tensor.constant(1, dtype='int32') s = a + b transformed = local_add_specialize.transform(s.owner) assert transformed assert transformed[0].type == s.type
def replace_input(layer, m, done=set({})): if layer in m: return m[layer] if layer in done: return layer done.add(layer) if hasattr(layer, 'input_layer'): if layer.input_layer in m: layer.input_layer = m[layer.input_layer] else: replace_input(layer.input_layer, m, done) if hasattr(layer, 'input_layers'): for i, t in enumerate(layer.input_layers): if t in m: layer.input_layers[i] = m[t] else: replace_input(t, m, done) return layer
def editdist_np(source, target): if len(source) < len(target): return editdist_np(target, source) if len(target) == 0: return len(source) previous_row = np.arange(target.size + 1) for s in source: current_row = previous_row + 1 current_row[1:] = np.minimum(current_row[1:], np.add(previous_row[:-1], target != s)) current_row[1:] = np.minimum(current_row[1:], current_row[0:-1] + 1) previous_row = current_row return previous_row[-1] # Pure python version # from [https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python, the 6th version]
def _editdist(s, t): """ Levenshtein's edit distance function :param s: vector, source string :param t: vector, target string :return: edit distance, scalar """ def update(x, previous_row): current_row = previous_row + 1 current_row = tensor.set_subtensor(current_row[1:], tensor.minimum(current_row[1:], tensor.add(previous_row[:-1], tensor.neq(target,x)))) current_row = tensor.set_subtensor(current_row[1:], tensor.minimum(current_row[1:], current_row[0:-1] + 1)) return current_row source, target = ifelse(tensor.lt(s.shape[0], t.shape[0]), (t, s), (s, t)) previous_row = tensor.arange(target.size + 1, dtype=theano.config.floatX) result, updates = theano.scan(fn=update, sequences=source, outputs_info=previous_row, name='editdist') return result[-1,-1]
def get_output_for(self, inputs, deterministic=False, **kwargs): alpha,beta = inputs # return 2*T.true_div(alpha,T.add(alpha,beta)+1e-8)-1 return 2*(alpha/(alpha+beta+1e-8))-1 # Convenience Function to produce a residual pre-activation MDCL block
def sequence_iteration(self, output, mask, use_dropout=0, dropout_value=0.5): dot_product = T.dot(output, self.t_w_out) linear_o = T.add(dot_product, self.t_b_out) mask = T.addbroadcast(mask, 2) # to do nesseccary? output = T.mul(mask, linear_o) + T.mul((1. - mask), 1e-6) return output # result ### TEST FUNCTIONS # to do make new file with test functions
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, w_hidden_hidden, b_act): pre_w_sig = T.dot(pre_out_sig, w_hidden_hidden) inner_act = self.activation out_sig = inner_act(T.add(cur_w_in_sig, pre_w_sig, b_act)) mask = T.addbroadcast(mask, 1) out_sig_m = mask * out_sig + (1. - mask) * pre_out_sig return [out_sig_m]
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ig_c, w_fg_c, w_og_c, w_ifco, b_ifco, t_n_out): ifco = T.add(T.dot(pre_out_sig, w_ifco), b_ifco) inner_act = self.activation gate_act = self.sigmoid() # Input Gate ig_t1 = gate_act(T.add(ifco[:, 0:t_n_out], T.mul(pre_cell_sig, w_ig_c), cur_w_in_sig[:, 0:t_n_out])) # Forget Gate fg_t1 = gate_act(T.add(ifco[:, 1 * t_n_out:2 * t_n_out], T.mul(pre_cell_sig, w_fg_c), cur_w_in_sig[:, 1 * t_n_out:2 * t_n_out])) # Cell State cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act( T.add(ifco[:, 2 * t_n_out:3 * t_n_out], cur_w_in_sig[:, 2 * t_n_out:3 * t_n_out])))) mask = T.addbroadcast(mask, 1) cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig # functionality: cs_t1 = T.switch(mask , cs_t1, pre_cell_sig) # Output Gate og_t1 = gate_act( T.add(ifco[:, 3 * t_n_out:4 * t_n_out], T.mul(cs_t1, w_og_c), cur_w_in_sig[:, 3 * t_n_out:4 * t_n_out])) # Output LSTM out_sig = T.mul(og_t1, inner_act(cs_t1)) out_sig = mask * out_sig + (1. - mask) * pre_out_sig return [out_sig, cs_t1]
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco, t_n_out): ifco = T.add(T.dot(pre_out_sig, w_ifco), b_ifco) inner_act = self.activation gate_act = self.sigmoid() # Input Gate ig_t1 = gate_act(T.add(ifco[:, 0:t_n_out], cur_w_in_sig[:, 0:t_n_out])) # Forget Gate fg_t1 = gate_act(T.add(ifco[:, 1 * t_n_out:2 * t_n_out], cur_w_in_sig[:, 1 * t_n_out:2 * t_n_out])) # Cell State cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act( T.add(ifco[:, 2 * t_n_out:3 * t_n_out], cur_w_in_sig[:, 2 * t_n_out:3 * t_n_out])))) mask = T.addbroadcast(mask, 1) cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig # functionality: cs_t1 = T.switch(mask , cs_t1, pre_cell_sig) # Output Gate og_t1 = gate_act( T.add(ifco[:, 3 * t_n_out:4 * t_n_out], cur_w_in_sig[:, 3 * t_n_out:4 * t_n_out])) # Output LSTM out_sig = T.mul(og_t1, inner_act(cs_t1)) out_sig = mask * out_sig + (1. - mask) * pre_out_sig return [out_sig, cs_t1]
def sequence_iteration(self, in_seq, mask, use_dropout, dropout_value=1): in_seq_d = T.switch(use_dropout, (in_seq * self.trng.binomial(in_seq.shape, p=dropout_value, n=1, dtype=in_seq.dtype)), in_seq) w_in_seq = T.add(T.dot(in_seq_d, self.weights[2]), self.weights[3]) [out_seq, cell_seq], updates = theano.scan( fn=self.t_forward_step, sequences=[mask, w_in_seq], outputs_info=[self.t_ol_t00, self.t_cs_t00], non_sequences=self.weights[:2] + [self.t_n_out], go_backwards=self.go_backwards, truncate_gradient=-1, # n_steps=50, strict=True, allow_gc=False, ) return out_seq ###### GRU Layer ########################################
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, w_hidden_hidden, b_act, ln_s1, ln_b1, ln_s2, ln_b2): pre_w_sig = T.dot(pre_out_sig, w_hidden_hidden) inner_act = self.activation pre_w_sig_ln = self.ln(pre_w_sig, ln_b1, ln_s1) cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b2, ln_s2) out_sig = inner_act(T.add(cur_w_in_sig_ln, pre_w_sig_ln, b_act)) mask = T.addbroadcast(mask, 1) out_sig_m = mask * out_sig + (1. - mask) * pre_out_sig return [out_sig_m]
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3, t_n_out): cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b1, ln_s1) pre_w_out_sig = T.dot(pre_out_sig, w_ifco) pre_w_out_sig_ln = self.ln(pre_w_out_sig, ln_b2, ln_s2) preact = T.add(cur_w_in_sig_ln, pre_w_out_sig_ln, b_ifco) inner_act = self.activation # T.nnet.hard_sigmoid #T.tanh # T.nnet.hard_sigmoid T.tanh gate_act = self.sigmoid() # T.nnet.hard_sigmoid #T.nnet.sigmoid # Input Gate ig_t1 = gate_act(preact[:, 0:t_n_out]) # Forget Gate fg_t1 = gate_act(preact[:, 1 * t_n_out:2 * t_n_out]) # Cell State cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(preact[:, 2 * t_n_out:3 * t_n_out]))) mask = T.addbroadcast(mask, 1) cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig cs_t1_ln = self.ln(cs_t1, ln_b3, ln_s3) # Output Gate og_t1 = gate_act(preact[:, 3 * t_n_out:4 * t_n_out]) # Output LSTM out_sig = T.mul(og_t1, inner_act(cs_t1_ln)) out_sig = mask * out_sig + (1. - mask) * pre_out_sig return [out_sig, cs_t1]
def t_forward_step(self,mask, rzup_in_sig, h_pre,b_rzup, u_rz, u_up,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3, t_n_out): signal_act = self.activation gate_act = self.sigmoid() rzup_in_sig_ln = self.ln(rzup_in_sig, ln_b1, ln_s1) rzup_b_in_sig_ln = T.add(rzup_in_sig_ln, b_rzup) preact = T.dot( h_pre, u_rz) preact_ln = self.ln(preact, ln_b2, ln_s2) r = gate_act( T.add( rzup_b_in_sig_ln[:, 0:t_n_out] , preact_ln[:, 0:t_n_out] )) z = gate_act( T.add( rzup_b_in_sig_ln[:, t_n_out:2 * t_n_out] , preact_ln[:, t_n_out:2 * t_n_out] )) preactx = T.dot(h_pre , u_up) preactx_ln = self.ln(preactx, ln_b3, ln_s3) h_pre_r_ln = T.mul( preactx_ln, r) h_update = signal_act( T.add( rzup_b_in_sig_ln[:, 2*t_n_out:3*t_n_out] , h_pre_r_ln )) h_new = T.add( (1.-z) * h_update , z * h_pre ) mask = T.addbroadcast(mask, 1) out_sig = T.add( mask * h_new , (1. - mask) * h_pre ) return out_sig
def __init__(self, x, y, args): self.params_theta = [] self.params_lambda = [] self.params_weight = [] if args.dataset == 'mnist': input_size = (None, 1, 28, 28) elif args.dataset == 'cifar10': input_size = (None, 3, 32, 32) else: raise AssertionError layers = [ll.InputLayer(input_size)] self.penalty = theano.shared(np.array(0.)) #conv1 layers.append(Conv2DLayerWithReg(args, layers[-1], 20, 5)) self.add_params_to_self(args, layers[-1]) layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2)) #conv1 layers.append(Conv2DLayerWithReg(args, layers[-1], 50, 5)) self.add_params_to_self(args, layers[-1]) layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2)) #fc1 layers.append(DenseLayerWithReg(args, layers[-1], num_units=500)) self.add_params_to_self(args, layers[-1]) #softmax layers.append(DenseLayerWithReg(args, layers[-1], num_units=10, nonlinearity=nonlinearities.softmax)) self.add_params_to_self(args, layers[-1]) self.layers = layers self.y = ll.get_output(layers[-1], x, deterministic=False) self.prediction = T.argmax(self.y, axis=1) # self.penalty = penalty if penalty != 0. else T.constant(0.) print(self.params_lambda) # time.sleep(20) # cost function self.loss = T.mean(categorical_crossentropy(self.y, y)) self.lossWithPenalty = T.add(self.loss, self.penalty) print "loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty)
def satt2(img, ref, img_shape, ref_size=10, num_int_ch=32, l_name=None, dropout=None, W_ini=lasagne.init.GlorotUniform(), nonlinearity=lasagne.nonlinearities.softmax, batch_norm=True, rtn_att_map=False, context_box=(1, 1)): if l_name: l_img_name = l_name+'[img]' l_q_name = l_name+'[q]' l_att_name = l_name+'[mix]' else: l_img_name = l_q_name = l_att_name = None #l_img = conv(img, num_filters=num_int_ch, filter_size=(1, 1), name=l_img_name, W=W_ini, nonlinearity=None) l_img = conv(img, num_filters=num_int_ch, filter_size=context_box, name=l_img_name, W=W_ini, nonlinearity=None, pad='same') l_q = lasagne.layers.EmbeddingLayer(ref, input_size=ref_size, output_size=num_int_ch, W=W_ini, name=l_q_name) l_mix = SpatialElemwiseMergeLayer(l_img, l_q, T.add) l_nonlin = lasagne.layers.NonlinearityLayer(l_mix) if batch_norm: l_bn = BatchNormLayer(l_nonlin) else: l_bn = l_nonlin if dropout: l_drp = lasagne.layers.DropoutLayer(l_bn) else: l_drp = l_nonlin l_weight = conv(l_drp, num_filters=1, filter_size=(1,1), name=l_att_name, W=W_ini, nonlinearity=None) l_weight_flat = lasagne.layers.FlattenLayer(l_weight) l_prob_flat = lasagne.layers.NonlinearityLayer(l_weight_flat, nonlinearity) l_prob = lasagne.layers.ReshapeLayer(l_prob_flat, (-1,)+img_shape) return l_prob
def satt(img, ref, img_shape, ref_size=10, num_int_ch=32, l_name=None, dropout=None, W_ini=lasagne.init.GlorotUniform(), nonlinearity=lasagne.nonlinearities.softmax, batch_norm=True, rtn_att_map=False, context_box=(1, 1)): if l_name: l_img_name = l_name+'[img]' l_q_name = l_name+'[q]' l_att_name = l_name+'[mix]' else: l_img_name = l_q_name = l_att_name = None #l_img = conv(img, num_filters=num_int_ch, filter_size=(1, 1), name=l_img_name, W=W_ini, nonlinearity=None) l_img = conv(img, num_filters=num_int_ch, filter_size=context_box, name=l_img_name, W=W_ini, nonlinearity=None, pad='same') l_q = lasagne.layers.EmbeddingLayer(ref, input_size=ref_size, output_size=num_int_ch, W=W_ini, name=l_q_name) l_mix = SpatialElemwiseMergeLayer(l_img, l_q, T.add) l_nonlin = lasagne.layers.NonlinearityLayer(l_mix) if batch_norm: l_bn = BatchNormLayer(l_nonlin) else: l_bn = l_nonlin if dropout: l_drp = lasagne.layers.DropoutLayer(l_bn) else: l_drp = l_nonlin l_weight = conv(l_drp, num_filters=1, filter_size=(1,1), name=l_att_name, W=W_ini, nonlinearity=None) l_weight_flat = lasagne.layers.FlattenLayer(l_weight) l_prob_flat = lasagne.layers.NonlinearityLayer(l_weight_flat, nonlinearity) l_prob = lasagne.layers.ReshapeLayer(l_prob_flat, (-1,)+img_shape) l_att_out = SpatialAttentionLayer(img, l_prob) if rtn_att_map: return l_att_out, l_prob else: return l_att_out
def __add__(left, right): return add(left, right)
def __radd__(right, left): return add(left, right)
def test_lift(self): x, y, z = inputs([False] * 1, [False] * 2, [False] * 3) e = x + y + z g = FunctionGraph([x, y, z], [e]) # It does not really matter if the DimShuffles are inplace # or not. init_str_g_inplace = ( "[Elemwise{add,no_inplace}(InplaceDimShuffle{x,0,1}" "(Elemwise{add,no_inplace}(InplaceDimShuffle{x,0}(x), y)), z)]") init_str_g_noinplace = ( "[Elemwise{add,no_inplace}(DimShuffle{x,0,1}" "(Elemwise{add,no_inplace}(DimShuffle{x,0}(x), y)), z)]") self.assertTrue(str(g) in (init_str_g_inplace, init_str_g_noinplace), str(g)) opt_str_g_inplace = ( "[Elemwise{add,no_inplace}(Elemwise{add,no_inplace}" "(InplaceDimShuffle{x,x,0}(x), InplaceDimShuffle{x,0,1}(y)), z)]") opt_str_g_noinplace = ( "[Elemwise{add,no_inplace}(Elemwise{add,no_inplace}" "(DimShuffle{x,x,0}(x), DimShuffle{x,0,1}(y)), z)]") dimshuffle_lift.optimize(g) self.assertTrue(str(g) in (opt_str_g_inplace, opt_str_g_noinplace), str(g)) # Check stacktrace was copied over correctly after opt was applied self.assertTrue(check_stack_trace(g, ops_to_check='all'))
def test_log_add(): m = theano.config.mode if m == 'FAST_COMPILE': m = 'FAST_RUN' m = compile.mode.get_mode(m) m = m.excluding('fusion') m = copy.copy(m) # No need to put them back as we have a new object m.check_isfinite = False # check some basic cases x = dvector() y = dvector() f = function([x, y], T.log(T.exp(x) + T.exp(y)), mode=m) f([10000], [10000]) # causes overflow if handled incorrectly assert numpy.isfinite(f([10000], [10000])) utt.assert_allclose(f([10000], [10000]), 10000 + numpy.log1p(1)) # test that it give the same result when it don't overflow f([10], [10]) # don't causes overflow utt.assert_allclose(f([10], [10]), 10 + numpy.log1p(1)) # test that it also works with more than two args, (this currently fails) x = dvector() y = dvector() f = function([x, y], T.log(T.exp(x) + T.exp(y) + T.exp(x - y) + T.exp( x + y)), mode=m) try: f([10000], [10000]) # causes overflow if handled incorrectly utt.assert_allclose(f([10000], [10000]), 20000) except utt.WrongValue: raise SkipTest("log(add(exp)) is not stabilized when adding " "more than 2 elements, see #623") # TODO: test that the optimization works in the presence of broadcasting. # TODO: (write and) test that the optimization works with Sum in addition to working with Add.
def test_elemwise(self): # float Ops mats = theano.tensor.matrices('cabxy') c, a, b, x, y = mats s1 = T.switch(c, a, b) s2 = T.switch(c, x, y) for op in (T.add, T.sub, T.mul, T.true_div, T.int_div, T.floor_div, T.minimum, T.maximum, T.gt, T.lt, T.ge, T.le, T.eq, T.neq, T.pow): g = optimize(FunctionGraph(mats, [op(s1, s2)])) assert str(g).count('Switch') == 1 # integer Ops mats = theano.tensor.imatrices('cabxy') c, a, b, x, y = mats s1 = T.switch(c, a, b) s2 = T.switch(c, x, y) for op in (T.and_, T.or_, T.xor, T.bitwise_and, T.bitwise_or, T.bitwise_xor): g = optimize(FunctionGraph(mats, [op(s1, s2)])) assert str(g).count('Switch') == 1 # add/mul with more than two inputs u, v = theano.tensor.matrices('uv') s3 = T.switch(c, u, v) for op in (T.add, T.mul): g = optimize(FunctionGraph(mats + [u, v], [op(s1, s2, s3)])) assert str(g).count('Switch') == 1
def test_softmax_optimizations_w_bias2(self): x = tensor.matrix('x') b = tensor.vector('b') c = tensor.vector('c') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot fgraph = gof.FunctionGraph( [x, b, c, one_of_n], [op(softmax_op(T.add(x, b, c)), one_of_n)]) assert fgraph.outputs[0].owner.op == op # print 'BEFORE' # for node in fgraph.toposort(): # print node.op # print '----' theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) # print 'AFTER' # for node in fgraph.toposort(): # print node.op # print '====' assert len(fgraph.toposort()) == 3 assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert (fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias)
def __init__(self, incomings, coeffs=Normal(std=0.01, mean=1.0), cropping=None, **kwargs): super(AdaptiveElemwiseSumLayer, self).__init__(incomings, T.add, cropping=cropping, **kwargs) ''' if isinstance(coeffs, list): if len(coeffs) != len(incomings): raise ValueError("Mismatch: got %d coeffs for %d incomings" % (len(coeffs), len(incomings))) else: coeffs = [coeffs] * len(incomings) ''' self.coeffs = [] for i in range(len(incomings)): coeff = theano.shared(np.float32(1.0), 'adacoeff{}'.format(i)) self.coeffs.append(self.add_param(coeff, coeff.shape, trainable=True, scaling_param=True))
def add_handler(layers, flags, stacks, this_model): return lasagne.layers.ElemwiseMergeLayer(layers, T.add)
def editdist(s, t): def update(x, previous_row): current_row = previous_row + 1 current_row = tensor.set_subtensor(current_row[1:], tensor.minimum(current_row[1:], tensor.add(previous_row[:-1], tensor.neq(target,x)))) current_row = tensor.set_subtensor(current_row[1:], tensor.minimum(current_row[1:], current_row[0:-1] + 1)) return current_row source, target = ifelse(tensor.lt(s.shape[0], t.shape[0]), (t, s), (s, t)) previous_row = tensor.arange(target.size + 1, dtype=theano.config.floatX) result, updates = theano.scan(fn = update, sequences=source, outputs_info=previous_row, name='editdist') return result[-1,-1] # numpy version # from [https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python, the 5th version]
def __init__(self, x, y, args): self.params_theta = [] self.params_lambda = [] self.params_weight = [] if args.dataset == 'mnist': input_size = (None, 28*28) elif args.dataset == 'cifar10': input_size = (None, 3, 32*32) else: raise AssertionError layers = [ll.InputLayer(input_size)] penalty = theano.shared(np.array(0.)) for (k, num) in enumerate(args.MLPlayer): # the last layer should use softmax if k == len(args.MLPlayer) - 1: # layers.append(ll.DenseLayer(layers[-1], num, nonlinearity=nonlinearities.softmax)) layers.append(DenseLayerWithReg(args, layers[-1], num_units=num, nonlinearity=nonlinearities.softmax)) else: # layers.append(ll.DenseLayer(layers[-1], num)) layers.append(DenseLayerWithReg(args, layers[-1], num_units=num)) if layers[-1].W is not None: self.params_theta += [layers[-1].W, layers[-1].b] self.params_weight += [layers[-1].W] # define new regularization term for a layer if args.regL2 is True: tempL2 = layers[-1].L2 * T.sqr(layers[-1].W) penalty += T.sum(tempL2) self.params_lambda += [layers[-1].L2] if args.regL1 is True: tempL1 = layers[-1].L1 * layers[-1].W penalty += T.sum(tempL1) self.params_lambda += [layers[-1].L1] self.layers = layers self.y = ll.get_output(layers[-1], x, deterministic=False) self.prediction = T.argmax(self.y, axis=1) self.penalty = penalty # self.penalty = penalty if penalty != 0. else T.constant(0.) print(self.params_lambda) # time.sleep(20) # cost function self.loss = T.mean(categorical_crossentropy(self.y, y)) self.lossWithPenalty = T.add(self.loss, self.penalty) print "loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty) # self.classError = T.mean(T.cast(T.neq(self.prediction, y), 'float32'))
def add(x, y): """ Add two matrices, at least one of which is sparse. This method will provide the right op according to the inputs. Parameters ---------- x A matrix variable. y A matrix variable. Returns ------- A sparse matrix `x` + `y` Notes ----- At least one of `x` and `y` must be a sparse matrix. The grad will be structured only when one of the variable will be a dense matrix. """ if hasattr(x, 'getnnz'): x = as_sparse_variable(x) if hasattr(y, 'getnnz'): y = as_sparse_variable(y) if not isinstance(x, theano.Variable): x = theano.tensor.as_tensor_variable(x) if not isinstance(y, theano.Variable): y = theano.tensor.as_tensor_variable(y) x_is_sparse_variable = _is_sparse_variable(x) y_is_sparse_variable = _is_sparse_variable(y) assert x_is_sparse_variable or y_is_sparse_variable if x_is_sparse_variable and y_is_sparse_variable: return add_s_s(x, y) elif x_is_sparse_variable and not y_is_sparse_variable: return add_s_d(x, y) elif y_is_sparse_variable and not x_is_sparse_variable: return add_s_d(y, x) else: raise NotImplementedError()