我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.eq()。
def crossentropy(y_pred, y_true, void_labels, one_hot=False): # Clip predictions y_pred = T.clip(y_pred, _EPSILON, 1.0 - _EPSILON) if one_hot: y_true = T.argmax(y_true, axis=1) # Create mask mask = T.ones_like(y_true, dtype=_FLOATX) for el in void_labels: mask = T.set_subtensor(mask[T.eq(y_true, el).nonzero()], 0.) # Modify y_true temporarily y_true_tmp = y_true * mask y_true_tmp = y_true_tmp.astype('int32') # Compute cross-entropy loss = T.nnet.categorical_crossentropy(y_pred, y_true_tmp) # Compute masked mean loss loss *= mask loss = T.sum(loss) / T.sum(mask) return loss
def gate_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs): """ compute the forward pass for a gate layer Parameters ---------- tparams : OrderedDict of theano shared variables, {parameter name: value} X_word : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector) X_char : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector) options : dictionary, {hyperparameter: value} prefix : string, layer name pretrain_mode : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char activ : string, activation function: 'liner', 'tanh', or 'rectifier' Returns ------- X : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector) """ # compute gating values, Eq.(3) G = tensor.nnet.sigmoid(tensor.dot(X_word, tparams[p_name(prefix, 'v')]) + tparams[p_name(prefix, 'b')][0]) X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)), ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char), G[:, :, None] * X_char + (1. - G)[:, :, None] * X_word) return eval(activ)(X)
def concat_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs): """ compute the forward pass for a concat layer Parameters ---------- tparams : OrderedDict of theano shared variables, {parameter name: value} X_word : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector) X_char : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector) options : dictionary, {hyperparameter: value} prefix : string, layer name pretrain_mode : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char activ : string, activation function: 'liner', 'tanh', or 'rectifier' Returns ------- X : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector) """ X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)), ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char), tensor.dot(tensor.concatenate([X_word, X_char], axis=2), tparams[p_name(prefix, 'W')]) + tparams[p_name(prefix, 'b')]) return eval(activ)(X)
def compile_eval_function(nnet): X = T.tensor4() y = T.ivector() # get prediciton by fully convolutional network prediction = lasagne.layers.get_output(nnet.dense3_conv_layer, deterministic=True, inputs=X) # get output scores on first dim # before flattening on 2dim and then get scores on second dim prediction = prediction.transpose((1, 0, 2, 3))\ .flatten(2).transpose((1, 0)) prediction = T.nnet.softmax(prediction) # spatial averaging prediction = T.mean(prediction, axis=0) # compute top1 and top5 accuracies sorted_pred = T.argsort(prediction) top1_acc = T.mean(T.eq(sorted_pred[-1], y), dtype='floatX') top5_acc = T.mean(T.any(T.eq(sorted_pred[-5:], T.shape_padright(y)), axis=1), dtype='floatX') return theano.function([X, y], [top1_acc, top5_acc])
def _ctc_normal(self, predict,labels): n = labels.shape[0] labels2 = T.concatenate((labels, [self.tpo["CTC_blank"], self.tpo["CTC_blank"]])) sec_diag = T.neq(labels2[:-2], labels2[2:]) * \ T.eq(labels2[1:-1], self.tpo["CTC_blank"]) recurrence_relation = \ T.eye(n) + \ T.eye(n, k=1) + \ T.eye(n, k=2) * sec_diag.dimshuffle((0, 'x')) pred_y = predict[:, labels] probabilities, _ = theano.scan( lambda curr, accum: curr * T.dot(accum, recurrence_relation), sequences=[pred_y], outputs_info=[T.eye(n)[0]] ) labels_probab = T.sum(probabilities[-1, -2:]) return -T.log(labels_probab)
def log_zero_inflated_poisson(x, pi, log_lambda, eps = 0.0): """ Compute log pdf of a zero-inflated Poisson distribution with success probability pi and number of failures, r, until the experiment is stopped, at values x. A simple variation of Stirling's approximation is used: log x! = x log x - x. """ pi = T.clip(pi, eps, 1.0 - eps) lambda_ = T.exp(log_lambda) lambda_ = T.clip(lambda_, eps, lambda_) y_0 = T.log(pi + (1 - pi) * T.exp(-lambda_)) y_1 = T.log(1 - pi) + log_poisson(x, log_lambda, eps) y = T.eq(x, 0) * y_0 + T.gt(x, 0) * y_1 return y
def get_output(self, input_, label): """ This function overrides the parents' one. Computes the loss by model input_ion and real label. Parameters ---------- input_: TensorVariable an array of (batch size, input_ion). for accuracy task, "input_" is 2D matrix. label: TensorVariable an array of (batch size, answer) or (batchsize,) if label is a list of class labels. for classification, highly recommend second one. should make label as integer. Returns ------- TensorVariable a symbolic tensor variable which is scalar. """ # do # TODO: Not tested return T.mean(T.eq(T.gt(input_, 0.5), label))
def generate(self, relative_position, **kwargs): """ Generate a position input for a given timestep. Parameters: relative_position: A theano tensor (int32) of shape (n_parallel), giving the current relative position for this timestep Returns: piece: A theano tensor (float32) of shape (n_parallel, PART_WIDTH) """ delta = (self.up_bound-self.low_bound) / (self.PART_WIDTH-1) indicator_pos = np.array([[i*delta + self.low_bound for i in range(self.PART_WIDTH)]], np.float32) # differences[i][j] is the difference between relative_position[i] and indicator_pos[j] differences = T.cast(T.shape_padright(relative_position),'float32') - indicator_pos # We want each indicator to activate at its division point, and fall off linearly around it, # capped from 0 to 1. activities = T.maximum(0, 1-abs(differences/delta)) # activities = theano.printing.Print("PositionInputPart")(activities) # activities = T.opt.Assert()(activities, T.eq(activities.shape[1], self.PART_WIDTH)) return activities
def generate(self, relative_position, cur_chord_root, cur_chord_type, **kwargs): """ Generate a chord input for a given timestep. Parameters: relative_position: A theano tensor (int32) of shape (n_parallel), giving the current relative position for this timestep cur_chord_root: A theano tensor (int32) of shape (n_parallel) giving the unshifted chord root cur_chord_type: A theano tensor (int32) of shape (n_parallel, CHORD_WIDTH), giving the unshifted chord type representation, parsed from the leadsheet Returns: piece: A theano tensor (float32) of shape (n_parallel, PART_WIDTH) """ def _map_fn(pos, chord): # Now pos is scalar and chord is of shape (CHORD_WIDTH), so we can roll return T.roll(chord, (-pos)%12, 0) shifted_chords, _ = theano.map(_map_fn, sequences=[relative_position-cur_chord_root, cur_chord_type]) # shifted_chords = theano.printing.Print("ChordShiftInputPart")(shifted_chords) # shifted_chords = T.opt.Assert()(shifted_chords, T.eq(shifted_chords.shape[1], self.PART_WIDTH)) return shifted_chords
def generate(self, timestep, **kwargs): """ Generate a beat input for a given timestep. Parameters: timestep: A theano int of shape (n_parallel). The current timestep to generate the beat input for. Returns: piece: A theano tensor (float32) of shape (n_parallel, PART_WIDTH) """ result = T.eq(T.shape_padright(timestep) % np.expand_dims(self.BEAT_PERIODS,0), 0) # result = theano.printing.Print("BeatInputPart")(result) # result = T.opt.Assert()(result, T.eq(result.shape[1], self.PART_WIDTH)) return result
def accuracy(self, outputs): '''Build a theano expression for computing the network accuracy. Parameters ---------- outputs : dict mapping str to theano expression A dictionary of all outputs generated by the layers in this network. Returns ------- acc : theano expression A theano expression representing the network accuracy. ''' predict = TT.argmax(outputs[self.output_name()], axis=-1) correct = TT.eq(predict, self.labels) acc = correct.mean() if self.weighted: acc = (self.weights * correct).sum() / self.weights.sum() return acc
def score(self, x, y, w=None): '''Compute the mean accuracy on a set of labeled data. Parameters ---------- x : ndarray (num-examples, num-variables) An array containing examples to classify. Examples are given as the rows in this array. y : ndarray (num-examples, ) A vector of integer class labels, one for each row of input data. w : ndarray (num-examples, ) A vector of weights, one for each row of input data. Returns ------- score : float The (possibly weighted) mean accuracy of the model on the data. ''' eq = y == self.predict(x) if w is not None: return (w * eq).sum() / w.sum() return eq.mean()
def test_eq(self): x = T.dmatrix() y = T.dmatrix() f = theano.function([x, y], T.eq(x, y), mode=self.mode) vx = numpy.random.rand(5, 4) vy = numpy.random.rand(5, 4) f(vx, vy) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, T.Elemwise) assert isinstance(topo[0].op.scalar_op, theano.scalar.EQ) f2 = theano.function([x], T.eq(x, x), mode=self.mode) assert numpy.all(f2(vx) == numpy.ones((5, 4))) topo2 = f2.maker.fgraph.toposort() # Shape_i{1}(<TensorType(float64, matrix)>), Shape_i{0}(<TensorType(float64, matrix)>), Alloc([[1]], Shape_i{0}.0, Shape_i{1}.0 assert len(topo2) == 3 assert isinstance(topo2[-1].op, T.Alloc)
def accuracy_instance(predictions, targets, n=[1, 2, 3, 4, 5, 10], \ nb_classes=5, nb_samples_per_class=10, batch_size=1): accuracy_0 = theano.shared(np.zeros((batch_size, nb_samples_per_class), \ dtype=theano.config.floatX)) indices_0 = theano.shared(np.zeros((batch_size, nb_classes), \ dtype=np.int32)) batch_range = T.arange(batch_size) def step_(p, t, acc, idx): acc = T.inc_subtensor(acc[batch_range, idx[batch_range, t]], T.eq(p, t)) idx = T.inc_subtensor(idx[batch_range, t], 1) return (acc, idx) (raw_accuracy, _), _ = theano.foldl(step_, sequences=[predictions.dimshuffle(1, 0), \ targets.dimshuffle(1, 0)], outputs_info=[accuracy_0, indices_0]) accuracy = T.mean(raw_accuracy / nb_classes, axis=0) return accuracy
def compute_loss(self, o, y_sym, weights_sym, train=True): if train: logger.info("Trainable Parameters") logger.info("-" * 40) for param in self.parameters: logger.info("%s %s", param, param.get_value().shape) logger.info("-" * 40) loss_sym = (T.nnet.binary_crossentropy(o, y_sym) * weights_sym).mean() reged_loss_sym = loss_sym if self.reg_type and train: reged_loss_sym = loss_sym + self.reg_rate * self.reg # accuracy function probas = T.concatenate( [(1 - o).reshape((-1, 1)), o.reshape((-1, 1))], axis=1) pred_sym = T.argmax(probas, axis=1) acc_sym = T.mean(T.eq(pred_sym, y_sym)) return reged_loss_sym, loss_sym, acc_sym, pred_sym, probas
def _remove_adjdup(x): """ Remove adjacent duplicate items of a vector x: vector return a vector with adjacent duplicate items removed, for example [1,2,2,2,3,3,4] -> [1,2,3,4] """ def update(x, nondup, idx): nondup = tensor.switch(tensor.eq(nondup[idx], x), nondup, tensor.set_subtensor(nondup[idx + 1], x)) # tensor.switch is much faster than ifelse idx = tensor.switch(tensor.eq(nondup[idx], x), idx, idx + 1) return nondup, idx nondup = x idx = tensor.as_tensor_variable(0) idx = tensor.cast(idx, 'int32') result, updates = theano.scan(fn = update, sequences=x, outputs_info=[nondup, idx], name='remove_adjdup') nondup = result[0][-1] idx = result[1][-1] return nondup[0:idx+1]
def _recurrence_relation(queryseq_padded, queryseq_mask_padded=None, blank_symbol=None): """ Generate structured matrix r2 & r3 for dynamic programming recurrence :param queryseq_padded: (2L+1, B) :param queryseq_mask_padded: (2L+1, B) :param blank_symbol: = C :return: r2 (2L+1, 2L+1), r3 (2L+1, 2L+1, B) """ L2 = queryseq_padded.shape[0] # = 2L+1 blanks = tensor.zeros((2, queryseq_padded.shape[1])) + blank_symbol # (2, B) ybb = tensor.concatenate((queryseq_padded, blanks), axis=0).T # (2L+3, B) -> (B, 2L+3) sec_diag = tensor.neq(ybb[:, :-2], ybb[:, 2:]) * tensor.eq(ybb[:, 1:-1], blank_symbol) # (B, 2L+1) if queryseq_mask_padded is not None: sec_diag *= queryseq_mask_padded.T r2 = tensor.eye(L2, k=1) # upper diagonal matrix (2L+1, 2L+1) r3 = tensor.eye(L2, k=2).dimshuffle(0, 1, 'x') * sec_diag.dimshuffle(1, 'x', 0) # (2L+1, 2L+1, B) return r2, r3
def one_hot_max(x, axis=-1): ''' Example ------- >>> Input: [[0.0, 0.0, 0.5], >>> [0.0, 0.3, 0.1], >>> [0.6, 0.0, 0.2]] >>> Output: [[0.0, 0.0, 1.0], >>> [0.0, 1.0, 0.0], >>> [1.0, 0.0, 0.0]] ''' return T.cast( T.eq(T.arange(x.shape[axis])[None, :], T.argmax(x, axis=axis, keepdims=True)), _FLOATX )
def get_corrupted_input(rng, input, corruption_level, ntype='zeromask'): ''' depending on requirement, returns input corrupted by zeromask/gaussian/salt&pepper''' MRG = RNG_MRG.MRG_RandomStreams(rng.randint(2 ** 30)) #theano_rng = RandomStreams() if corruption_level == 0.0: return input if ntype=='zeromask': return MRG.binomial(size=input.shape, n=1, p=1-corruption_level,dtype=theano.config.floatX) * input elif ntype=='gaussian': return input + MRG.normal(size = input.shape, avg = 0.0, std = corruption_level, dtype = theano.config.floatX) elif ntype=='salt_pepper': # salt and pepper noise print 'DAE uses salt and pepper noise' a = MRG.binomial(size=input.shape, n=1,\ p=1-corruption_level,dtype=theano.config.floatX) b = MRG.binomial(size=input.shape, n=1,\ p=corruption_level,dtype=theano.config.floatX) c = T.eq(a,0) * b return input * a + c
def build_model(model_): global fn_predict, fn_record global g_ozer, g_mdl g_ozer = dict(simple=VanillaSGD, adam=AdamSGD)[OZER]() g_ozer.lr = LEARN_RATE s_x = T.tensor4('x') s_y = T.ivector('y') s_pdpo = T.scalar() s_out = model_(s_x, s_pdpo) s_y_onehot = T.extra_ops.to_one_hot(s_y, len(g_dataset.label_map)) s_loss = T.mean(-s_y_onehot*T.log(s_out + 1e-3)) s_accr = T.mean( T.switch( T.eq(T.argmax(s_out, axis=1), T.argmax(s_y_onehot, axis=1)), 1, 0)) no_dropout = [(s_pdpo, T.constant(0., dtype=th.config.floatX))] fn_predict = th.function( [s_x, s_y], {'pred':s_out, 'accr':s_accr, 'loss':s_loss}, givens=no_dropout, profile=PROFILE) rec_fetches = { 'x': s_x, 'y': s_y, 'pred': s_out} rec_fetches.update(g_mdl.params_di) fn_record = th.function( [s_x, s_y], rec_fetches, givens=no_dropout, profile=PROFILE) g_ozer.compile( [s_x, s_y], s_loss, g_mdl.params_di.values(), fetches_={'pred': s_out, 'loss': s_loss, 'accr': s_accr}, givens_=[(s_pdpo, T.constant(TRAIN_PDPO, dtype=th.config.floatX))], profile_=PROFILE)
def geoseries_sum(r, t_end, t_start): """ Sum of r**t from t=t_start to t=t_end, inclusive :param r: :param t_end: :param t_start: :return: """ # return ifelse(tt.eq(r, 1), (t_end-t_start+1).astype(theano.config.floatX), (r**(t_end+1)-r**t_start)/(r-1)) return ifelse(tt.eq(r, 1), (t_end-t_start+1).astype(theano.config.floatX), (r**(t_end+1)-r**t_start)/(r-1))
def jaccard(y_pred, y_true, n_classes, one_hot=False): assert (y_pred.ndim == 2) or (y_pred.ndim == 1) # y_pred to indices if y_pred.ndim == 2: y_pred = T.argmax(y_pred, axis=1) if one_hot: y_true = T.argmax(y_true, axis=1) # Compute confusion matrix cm = T.zeros((n_classes, n_classes)) for i in range(n_classes): for j in range(n_classes): cm = T.set_subtensor( cm[i, j], T.sum(T.eq(y_pred, i) * T.eq(y_true, j))) # Compute Jaccard Index TP_perclass = T.cast(cm.diagonal(), _FLOATX) FP_perclass = cm.sum(1) - TP_perclass FN_perclass = cm.sum(0) - TP_perclass num = TP_perclass denom = TP_perclass + FP_perclass + FN_perclass return T.stack([num, denom], axis=0)
def accuracy(y_pred, y_true, void_labels, one_hot=False): assert (y_pred.ndim == 2) or (y_pred.ndim == 1) # y_pred to indices if y_pred.ndim == 2: y_pred = T.argmax(y_pred, axis=1) if one_hot: y_true = T.argmax(y_true, axis=1) # Compute accuracy acc = T.eq(y_pred, y_true).astype(_FLOATX) # Create mask mask = T.ones_like(y_true, dtype=_FLOATX) for el in void_labels: indices = T.eq(y_true, el).nonzero() if any(indices): mask = T.set_subtensor(mask[indices], 0.) # Apply mask acc *= mask acc = T.sum(acc) / T.sum(mask) return acc
def equal(x, y): return T.eq(x, y)
def unk_ratio(words, mask, unk): num_unk = (tensor.eq(words, unk) * mask).sum() return num_unk / mask.sum()
def get_f1_acc(outputs,y_labels): outputs_i=outputs+0.5 outputs_i=outputs_i.astype('int32') y_ilab=y_labels.astype('int32') gd_num=T.sum(y_ilab,axis=0) pr_num=T.sum(outputs_i,axis=0) # pr_rtm=T.eq(outputs_i,y_ilab) # pr_rt=T.sum(pr_rtm,axis=0) sum_ones=y_ilab+outputs_i pr_rtm=sum_ones/2 # pr_rtm=T.eq(outputs_i,y_ilab) pr_rt=T.sum(pr_rtm,axis=0) #prevent nan to destroy the f1 pr_rt=pr_rt.astype('float32') gd_num=gd_num.astype('float32') pr_num=pr_num.astype('float32') acc=pr_rt/outputs.shape[0] zero_scale=T.zeros_like(T.min(pr_rt)) if T.eq(zero_scale,T.min(gd_num)): gd_num+=1 if T.eq(zero_scale,T.min(pr_num)): pr_num+=1 if T.eq(zero_scale,T.min(pr_rt)): pr_rt+=0.01 recall=pr_rt/gd_num precision=pr_rt/pr_num f1=2*recall*precision/(recall+precision) # return T.min(pr_rt) return acc,f1
def _recurrence_relation(y, y_mask, blank_symbol): """ Construct a permutation matrix and tensor for computing CTC transitions. Parameters ---------- y : matrix (L, B) the target label sequences y_mask : matrix (L, B) indicates which values of y to use blank_symbol: integer indicates the symbol that signifies a blank label. Returns ------- matrix (L, L) tensor3 (L, L, B) """ n_y = y.shape[0] blanks = tensor.zeros((2, y.shape[1])) + blank_symbol ybb = tensor.concatenate((y, blanks), axis=0).T sec_diag = (tensor.neq(ybb[:, :-2], ybb[:, 2:]) * tensor.eq(ybb[:, 1:-1], blank_symbol) * y_mask.T) # r1: LxL # r2: LxL # r3: LxLxB eye2 = tensor.eye(n_y + 2) r2 = eye2[2:, 1:-1] # tensor.eye(n_y, k=1) r3 = (eye2[2:, :-2].dimshuffle(0, 1, 'x') * sec_diag.dimshuffle(1, 'x', 0)) return r2, r3
def equal(self, x, y): return T.eq(x, y)
def acc_fn(model_predict, target_var): """Theano function to calculate accuracy of input data""" return T.mean(T.eq(T.argmax(model_predict, axis=1), target_var), dtype=theano.config.floatX) #------------------------------------------------------------------------------#
def index_fn(model_predict, input_var, target_var): """ Theano function returns an array containing boolean values that indicate whether predicted label matches target_var """ index_temp = T.eq(T.argmax(model_predict, axis=1), target_var) return theano.function([input_var, target_var], index_temp, allow_input_downcast=True) #------------------------------------------------------------------------------#
def eq(self, l, r): return T.eq(l, r)
def log_zero_inflated_negative_binomial(x, pi, p, log_r, eps = 0.0): pi = T.clip(pi, eps, 1.0 - eps) p = T.clip(p, eps, 1.0 - eps) r = T.exp(log_r) r = T.clip(r, eps, r) y_0 = T.log(pi + (1 - pi) * T.pow(1 - p, r)) y_1 = T.log(1 - pi) + log_negative_binomial(x, p, log_r, eps) y = T.eq(x, 0) * y_0 + T.gt(x, 0) * y_1 return y
def compute_cost_t(lin_output,y_t): RNN_output = T.nnet.softmax(lin_output) CE = T.nnet.categorical_crossentropy(RNN_output, y_t) cost_t = CE.mean() acc_t =(T.eq(T.argmax(RNN_output, axis=-1), y_t)).mean(dtype=theano.config.floatX) return cost_t, acc_t
def get_output(self, predict, label): """ This function overrides the parents' one. Computes the loss by model prediction and real label. use theano implemented categorical_crossentropy directly. Parameters ---------- predict: TensorVariable an array of (batch size, prediction). for accuracy task, "predict" is 2D matrix. label: TensorVariable an array of (batch size, answer) or (batchsize,) if label is a list of class labels. for classification, highly recommend second one. Returns ------- TensorVariable a symbolic tensor variable which is scalar. """ if label.ndim == 1: one_hot_label = T.eq(label.dimshuffle(0,'x'), T.arange(self.num_class).dimshuffle('x',0)) elif label.ndim == 2: one_hot_label = label predict_max = T.argmax(predict, axis=-1) one_hot_predict = T.eq(predict_max.dimshuffle(0,'x'), T.arange(self.num_class).dimshuffle('x',0)) confusion = T.dot(T.transpose(one_hot_label), one_hot_predict) confusion_sum = T.sum(confusion, axis = 1) confusion_norm = confusion / (confusion_sum.dimshuffle(0,'x') + 1e-7) return T.nlinalg.trace(confusion_norm)
def eq(a, b): return T.eq(a,b)
def get_output_mask(self, train=None): X = self.get_input(train) if not self.mask_zero: return None else: return T.ones_like(X) * (1 - T.eq(X,0))
def one_hot(t, r=None): """Compute one hot encoding. given a tensor t of dimension d with integer values from range(r), return a new tensor of dimension d + 1 with values 0/1, where the last dimension gives a one-hot representation of the values in t. if r is not given, r is set to max(t) + 1 """ if r is None: r = tensor.max(t) + 1 ranges = tensor.shape_padleft(tensor.arange(r), t.ndim) return tensor.eq(ranges, tensor.shape_padright(t, 1))
def __init__(self, embedding_dim=100, num_hidden_layers=2, hidden_dim=200, in_dropout_p=0.2, hidden_dropout_p=0.5, update_hyperparams={'learning_rate': 0.01}): self.embedding_dim = embedding_dim self.num_hidden_layers = num_hidden_layers self.hidden_dim = hidden_dim self.in_dropout_p = in_dropout_p self.hidden_dropout_p = update_hyperparams print >> sys.stderr, 'Building computation graph for discriminator...' self.input_var = T.matrix('input') self.input_var_extra = T.matrix('input_extra') self.target_var = T.matrix('target') self.cos_feats = cosine_sim(self.input_var, T.repeat(self.input_var_extra, 2, axis=0)).reshape((-1, 1)) self.total_input = T.concatenate([self.input_var, self.cos_feats], axis=1) self.l_in = lasagne.layers.InputLayer(shape=(None, self.embedding_dim+1), input_var=self.total_input, name='l_in') self.l_in_dr = lasagne.layers.DropoutLayer(self.l_in, 0.2) self.layers = [self.l_in, self.l_in_dr] for i in xrange(self.num_hidden_layers): l_hid = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(self.layers[-1], num_units=self.hidden_dim, nonlinearity=lasagne.nonlinearities.leaky_rectify, W=lasagne.init.GlorotUniform(gain=leaky_relu_gain), name=('l_hid_%s' % i))) l_hid_dr = lasagne.layers.DropoutLayer(l_hid, 0.5) self.layers.append(l_hid) self.layers.append(l_hid_dr) self.l_preout = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(self.layers[-1], num_units=1, nonlinearity=None, name='l_preout')) self.l_out = lasagne.layers.NonlinearityLayer(self.l_preout, nonlinearity=lasagne.nonlinearities.sigmoid, name='l_out') self.prediction = lasagne.layers.get_output(self.l_out) self.loss = lasagne.objectives.binary_crossentropy(self.prediction, self.target_var).mean() self.accuracy = T.eq(T.ge(self.prediction, 0.5), self.target_var).mean() self.params = lasagne.layers.get_all_params(self.l_out, trainable=True) self.updates = lasagne.updates.adam(self.loss, self.params, **update_hyperparams) print >> sys.stderr, 'Compiling discriminator...' self.train_fn = theano.function([self.input_var, self.input_var_extra, self.target_var], [self.loss, self.accuracy], updates=self.updates) self.eval_fn = theano.function([self.input_var, self.input_var_extra, self.target_var], [self.loss, self.accuracy])
def __init__(self, embedding_dim=100, num_hidden_layers=2, hidden_dim=200, in_dropout_p=0.2, hidden_dropout_p=0.5, hidden2out_dropout_p=0.5, update_hyperparams={'learning_rate': 0.01}): self.embedding_dim = embedding_dim self.num_hidden_layers = num_hidden_layers self.hidden_dim = hidden_dim self.in_dropout_p = in_dropout_p self.hidden_dropout_p = hidden_dropout_p self.hidden2out_dropout_p = hidden2out_dropout_p self.update_hyperparameters = update_hyperparams print >> sys.stderr, 'Building computation graph for discriminator...' self.input_var = T.matrix('input') self.target_var = T.matrix('targer') self.l_in = lasagne.layers.InputLayer(shape=(None, self.embedding_dim), input_var=T.tanh(self.input_var), name='l_in') self.l_in_dr = lasagne.layers.DropoutLayer(self.l_in, self.in_dropout_p) self.l_prehid = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(self.l_in_dr, num_units=self.hidden_dim, nonlinearity=lasagne.nonlinearities.leaky_rectify, W=lasagne.init.GlorotUniform(gain=leaky_relu_gain), name='l_prehid')) self.layers = [self.l_in, self.l_in_dr, self.l_prehid] for i in xrange(self.num_hidden_layers): l_hid_predr = lasagne.layers.DropoutLayer(self.layers[-1], self.hidden_dropout_p) l_hid = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(l_hid_predr, num_units=self.hidden_dim, nonlinearity=lasagne.nonlinearities.leaky_rectify, W=lasagne.init.GlorotUniform(gain=leaky_relu_gain), name=('l_hid_%s' % i))) l_hid_sum = lasagne.layers.ElemwiseSumLayer([self.layers[-1], l_hid]) self.layers.append(l_hid_predr) self.layers.append(l_hid) self.layers.append(l_hid_sum) self.l_preout_predr = lasagne.layers.DropoutLayer(self.layers[-1], self.hidden2out_dropout_p) self.l_preout = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(self.l_preout_predr, num_units=1, nonlinearity=None, name='l_preout')) self.l_out = lasagne.layers.NonlinearityLayer(self.l_preout, nonlinearity=lasagne.nonlinearities.sigmoid, name='l_out') self.prediction = lasagne.layers.get_output(self.l_out) self.loss = lasagne.objectives.binary_crossentropy(self.prediction, self.target_var).mean() self.accuracy = T.eq(T.ge(self.prediction, 0.5), self.target_var).mean() self.params = lasagne.layers.get_all_params(self.l_out, trainable=True) self.updates = lasagne.updates.adam(self.loss, self.params, **update_hyperparams) print >> sys.stderr, 'Compiling discriminator...' self.train_fn = theano.function([self.input_var, self.target_var], [self.loss, self.accuracy], updates=self.updates) self.eval_fn = theano.function([self.input_var, self.target_var], [self.loss, self.accuracy])
def __init__(self, embedding_dim=100, num_hidden_layers=2, hidden_dim=200, in_dropout_p=0.2, hidden_dropout_p=0.5, update_hyperparams={'learning_rate': 0.01}): self.embedding_dim = embedding_dim self.num_hidden_layers = num_hidden_layers self.hidden_dim = hidden_dim self.in_dropout_p = in_dropout_p self.hidden_dropout_p = update_hyperparams print >> sys.stderr, 'Building computation graph for discriminator...' self.input_var = T.matrix('input') self.target_var = T.matrix('targer') self.l_in = lasagne.layers.InputLayer(shape=(None, self.embedding_dim), input_var=T.tanh(self.input_var), name='l_in') self.l_in_dr = lasagne.layers.DropoutLayer(self.l_in, 0.2) self.layers = [self.l_in, self.l_in_dr] for i in xrange(self.num_hidden_layers): l_hid = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(self.layers[-1], num_units=self.hidden_dim, nonlinearity=lasagne.nonlinearities.leaky_rectify, W=lasagne.init.GlorotUniform(gain=leaky_relu_gain), name=('l_hid_%s' % i))) l_hid_dr = lasagne.layers.DropoutLayer(l_hid, 0.5) self.layers.append(l_hid) self.layers.append(l_hid_dr) self.l_preout = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(self.layers[-1], num_units=1, nonlinearity=None, name='l_preout')) self.l_out = lasagne.layers.NonlinearityLayer(self.l_preout, nonlinearity=lasagne.nonlinearities.sigmoid, name='l_out') self.prediction = lasagne.layers.get_output(self.l_out) self.loss = lasagne.objectives.binary_crossentropy(self.prediction, self.target_var).mean() self.accuracy = T.eq(T.ge(self.prediction, 0.5), self.target_var).mean() self.params = lasagne.layers.get_all_params(self.l_out, trainable=True) self.updates = lasagne.updates.adam(self.loss, self.params, **update_hyperparams) print >> sys.stderr, 'Compiling discriminator...' self.train_fn = theano.function([self.input_var, self.target_var], [self.loss, self.accuracy], updates=self.updates) self.eval_fn = theano.function([self.input_var, self.target_var], [self.loss, self.accuracy])
def __init__(self, embedding_dim=100, num_hidden_layers=2, hidden_dim=200, in_dropout_p=0.2, hidden_dropout_p=0.5, update_hyperparams={'learning_rate': 0.01}): self.embedding_dim = embedding_dim self.num_hidden_layers = num_hidden_layers self.hidden_dim = hidden_dim self.in_dropout_p = in_dropout_p self.hidden_dropout_p = hidden_dropout_p self.update_hyperparameters = update_hyperparams print >> sys.stderr, 'Building computation graph for discriminator...' self.input_var = T.matrix('input') self.target_var = T.matrix('targer') self.l_in = lasagne.layers.InputLayer(shape=(None, self.embedding_dim), input_var=T.tanh(self.input_var), name='l_in') self.l_in_dr = lasagne.layers.DropoutLayer(self.l_in, self.in_dropout_p) self.layers = [self.l_in, self.l_in_dr] for i in xrange(self.num_hidden_layers): l_hid = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(self.layers[-1], num_units=self.hidden_dim, nonlinearity=lasagne.nonlinearities.leaky_rectify, W=lasagne.init.GlorotUniform(gain=leaky_relu_gain), name=('l_hid_%s' % i))) l_hid_dr = lasagne.layers.DropoutLayer(l_hid, self.hidden_dropout_p) self.layers.append(l_hid) self.layers.append(l_hid_dr) self.l_preout = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(self.layers[-1], num_units=1, nonlinearity=None, name='l_preout')) self.l_out = lasagne.layers.NonlinearityLayer(self.l_preout, nonlinearity=lasagne.nonlinearities.sigmoid, name='l_out') self.prediction = lasagne.layers.get_output(self.l_out) self.loss = lasagne.objectives.binary_crossentropy(self.prediction, self.target_var).mean() self.accuracy = T.eq(T.ge(self.prediction, 0.5), self.target_var).mean() self.params = lasagne.layers.get_all_params(self.l_out, trainable=True) self.updates = lasagne.updates.adam(self.loss, self.params, **update_hyperparams) print >> sys.stderr, 'Compiling discriminator...' self.train_fn = theano.function([self.input_var, self.target_var], [self.loss, self.accuracy], updates=self.updates) self.eval_fn = theano.function([self.input_var, self.target_var], [self.loss, self.accuracy])