我们从Python开源项目中,提取了以下45个代码示例,用于说明如何使用torch.nn.LogSoftmax()。
def call_nn_op(op, epsilon): """ a helper function that adds appropriate parameters when calling an nn module representing an operation like Softmax :param op: the nn.Module operation to instantiate :param epsilon: a scaling parameter for certain custom modules :return: instantiation of the op module with appropriate parameters """ if op in [ClippedSoftmax]: try: return op(epsilon, dim=1) except TypeError: # Support older pytorch 0.2 release. return op(epsilon) elif op in [ClippedSigmoid]: return op(epsilon) elif op in [nn.Softmax, nn.LogSoftmax]: return op(dim=1) else: return op()
def new_instance(src_dict, trg_dict, model_params=None, random_seed=None, gpu_ids=None, init_value=0.1): if model_params is None: from nmmt import NMTEngine model_params = NMTEngine.Parameters() if gpu_ids is not None and len(gpu_ids) > 0: torch.cuda.set_device(gpu_ids[0]) encoder = Models.Encoder(model_params, src_dict) decoder = Models.Decoder(model_params, trg_dict) generator = nn.Sequential(nn.Linear(model_params.rnn_size, trg_dict.size()), nn.LogSoftmax()) model = Models.NMTModel(encoder, decoder) if gpu_ids is not None and len(gpu_ids) > 0: model.cuda() generator.cuda() if len(gpu_ids) > 1: model = nn.DataParallel(model, device_ids=gpu_ids, dim=1) generator = nn.DataParallel(generator, device_ids=gpu_ids, dim=0) else: model.cpu() generator.cpu() model.generator = generator for p in model.parameters(): p.data.uniform_(-init_value, init_value) optim = Optim(model_params.optim, model_params.learning_rate, model_params.max_grad_norm, lr_decay=model_params.learning_rate_decay, start_decay_at=model_params.start_decay_at) optim.set_parameters(model.parameters()) return NMTEngineTrainer(model, optim, src_dict, trg_dict, model_params=model_params, gpu_ids=gpu_ids, random_seed=random_seed)
def __init__(self, embeddings_in, embeddings_out, **kwargs): super(Seq2SeqAttnModel, self).__init__(embeddings_in, embeddings_out) self.hsz = kwargs['hsz'] nlayers = kwargs['layers'] rnntype = kwargs['rnntype'] pdrop = kwargs.get('dropout', 0.5) dsz = embeddings_in.dsz self.gpu = kwargs.get('gpu', True) self.encoder_rnn = pytorch_rnn(dsz, self.hsz, rnntype, nlayers, pdrop) self.dropout = nn.Dropout(pdrop) self.decoder_rnn = pytorch_rnn_cell(self.hsz + dsz, self.hsz, rnntype, nlayers, pdrop) self.preds = nn.Linear(self.hsz, self.nc) self.probs = nn.LogSoftmax() self.output_to_attn = nn.Linear(self.hsz, self.hsz, bias=False) self.attn_softmax = nn.Softmax() self.attn_out = nn.Linear(2 * self.hsz, self.hsz, bias=False) self.attn_tanh = pytorch_activation("tanh") self.nlayers = nlayers
def __init__(self, opt, data_agent): super().__init__() self.opt = opt self.input_emb = nn.Embedding(data_agent.wordcnt, opt['embedding_dim'], padding_idx=0) self.action_type_emb = nn.Embedding(data_agent.get_num_actions(), opt['action_type_emb_dim']) self.encoder = nn.GRU(opt['embedding_dim'], opt['rnn_h'], opt['rnn_layers'], batch_first=True, bidirectional=opt['bidir']) self.decoder = nn.Sequential( nn.Linear(opt['rnn_h'], 1), ) self.log_softmax = nn.LogSoftmax() self.trans = nn.Sequential( nn.Linear(opt['rnn_h'] * (2 if opt['bidir'] else 1), opt['embedding_dim']), nn.Tanh(), ) counter_emb = opt['counter_emb_dim'] if opt['counter_ablation']: counter_emb = 0 self.dec_gru = nn.GRU(opt['rnn_h'] * (2 if opt['bidir'] else 1) + counter_emb + (opt['embedding_dim'] if not opt['room_ablation'] else 0) + opt['action_type_emb_dim'] + opt['action_type_emb_dim'] + opt['embedding_dim'] + opt['embedding_dim'] + opt['rnn_h'] * (2 if opt['bidir'] else 1), opt['rnn_h'], opt['rnn_layers'], batch_first=True) self.merge = nn.Sequential( nn.Linear(opt['rnn_h'] * 2, opt['rnn_h']), nn.Tanh(), ) self.counter_emb = nn.Embedding(opt['counter_max'] + 1, opt['counter_emb_dim'])
def __init__(self, opt, data_agent): super().__init__() self.opt = opt self.y_dim = data_agent.y_dim self.input_emb = nn.Embedding(data_agent.wordcnt, opt['embedding_dim'], padding_idx=0) self.encoder = nn.GRU(opt['embedding_dim'], opt['rnn_h'], opt['rnn_layers'], batch_first=True) self.decoder = nn.GRU(self.y_dim, opt['rnn_h'], opt['rnn_layers'], batch_first=True) self.mapping = nn.Sequential( nn.Linear(opt['rnn_h'] * 2, self.y_dim), nn.LogSoftmax(), )
def __init__(self, vocab_size, hidden_size, embedding_size, num_layers=1): super().__init__() self.embed = nn.Embedding(vocab_size, embedding_size) self.gru = nn.GRU(embedding_size, hidden_size, num_layers) self.fc = nn.Linear(hidden_size, vocab_size) self.softmax = nn.LogSoftmax()
def __init__(self, opt): self.opt = opt self.tt = torch.cuda if opt.cuda else torch checkpoint = torch.load(opt.model) model_opt = checkpoint['settings'] self.model_opt = model_opt model = Transformer( model_opt.src_vocab_size, model_opt.tgt_vocab_size, model_opt.max_token_seq_len, proj_share_weight=model_opt.proj_share_weight, embs_share_weight=model_opt.embs_share_weight, d_k=model_opt.d_k, d_v=model_opt.d_v, d_model=model_opt.d_model, d_word_vec=model_opt.d_word_vec, d_inner_hid=model_opt.d_inner_hid, n_layers=model_opt.n_layers, n_head=model_opt.n_head, dropout=model_opt.dropout) prob_projection = nn.LogSoftmax() model.load_state_dict(checkpoint['model']) print('[Info] Trained model state loaded.') if opt.cuda: model.cuda() prob_projection.cuda() else: model.cpu() prob_projection.cpu() model.prob_projection = prob_projection self.model = model self.model.eval()
def __init__(self, args, dropout=0.5): super(SentimentModule, self).__init__() self.cuda_flag = args.cuda self.mem_dim = args.mem_dim self.num_classes = args.num_classes self.dropout = dropout self.linear_layer = nn.Linear(self.mem_dim, self.num_classes) self.logsoftmax = nn.LogSoftmax() self.softmax = nn.Softmax() if self.cuda_flag: self.linear_layer = self.linear_layer.cuda()
def load_from_checkpoint(checkpoint_path, using_cuda): checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) model_opt = NMTEngine.Parameters() model_opt.__dict__.update(checkpoint['opt']) src_dict = checkpoint['dicts']['src'] trg_dict = checkpoint['dicts']['tgt'] encoder = Models.Encoder(model_opt, src_dict) decoder = Models.Decoder(model_opt, trg_dict) model = Models.NMTModel(encoder, decoder) model.load_state_dict(checkpoint['model']) generator = nn.Sequential(nn.Linear(model_opt.rnn_size, trg_dict.size()), nn.LogSoftmax()) generator.load_state_dict(checkpoint['generator']) if using_cuda: model.cuda() generator.cuda() else: model.cpu() generator.cpu() model.generator = generator model.eval() optim = checkpoint['optim'] optim.set_parameters(model.parameters()) optim.optimizer.load_state_dict(checkpoint['optim'].optimizer.state_dict()) return NMTEngine(model_opt, src_dict, trg_dict, model, optim, checkpoint, using_cuda)
def __init__(self, opt): self.opt = opt self.tt = torch.cuda if opt.cuda else torch self.beam_accum = None checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] self.src_dict = checkpoint['dicts']['src'] self.tgt_dict = checkpoint['dicts']['tgt'] self._type = model_opt.encoder_type \ if "encoder_type" in model_opt else "text" if self._type == "text": encoder = onmt.Models.Encoder(model_opt, self.src_dict) elif self._type == "img": loadImageLibs() encoder = onmt.modules.ImageEncoder(model_opt) decoder = onmt.Models.Decoder(model_opt, self.tgt_dict) model = onmt.Models.NMTModel(encoder, decoder) generator = nn.Sequential( nn.Linear(model_opt.rnn_size, self.tgt_dict.size()), nn.LogSoftmax()) model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) if opt.cuda: model.cuda() generator.cuda() else: model.cpu() generator.cpu() model.generator = generator self.model = model self.model.eval()
def __init__(self, opt, model=None, src_dict=None, tgt_dict=None): self.opt = opt self.tt = torch.cuda if opt.cuda else torch if model is None: checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] src_dict = checkpoint['dicts']['src'] tgt_dict = checkpoint['dicts']['tgt'] encoder = onmt.Models.Encoder(model_opt, src_dict) decoder = onmt.Models.Decoder(model_opt, tgt_dict) model = onmt.Models.NMTModel(encoder, decoder) generator = nn.Sequential( nn.Linear(model_opt.rnn_size, tgt_dict.size()), nn.LogSoftmax()) model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) if opt.cuda: model.cuda() generator.cuda() else: model.cpu() generator.cpu() model.generator = generator self.src_dict = src_dict self.tgt_dict = tgt_dict self.model = model self.model.eval()
def __init__(self, input_dims, hidden_dims, output_dims): """Init discriminator.""" super(Discriminator, self).__init__() self.restored = False self.layer = nn.Sequential( nn.Linear(input_dims, hidden_dims), nn.ReLU(), nn.Linear(hidden_dims, hidden_dims), nn.ReLU(), nn.Linear(hidden_dims, output_dims), nn.LogSoftmax() )
def log_softmax(): return nn.LogSoftmax()
def __init__(self, config, emb_data): super(SquadModel, self).__init__() # an embedding layer to lookup pre-trained word embeddings self.embed = nn.Embedding(config.vocab_size, config.emb_dim) self.embed.weight.requires_grad = False # do not propagate into the pre-trained word embeddings self.embed.weight.data.copy_(emb_data) # used for eq(6) does FFNN(p_i)*FFNN(q_j) self.ff_align = nn.Linear(config.emb_dim, config.ff_dim) # used for eq(10) does FFNN(q_j') self.ff_q_indep = nn.Linear(2*config.hidden_dim, config.ff_dim) # used for eq(2) does FFNN(h_a) in a simplified form so that it can be re-used, # note: h_a = [u,v] where u and v are start and end words respectively # we have 2*config.hidden_dim since we are using a bi-directional LSTM self.p_end_ff = nn.Linear(2 * config.hidden_dim, config.ff_dim) self.p_start_ff = nn.Linear(2 * config.hidden_dim, config.ff_dim) # used for eq(2) plays the role of w_a self.w_a = nn.Linear(config.ff_dim, 1, bias=False) # used for eq(10) plays the role of w_q self.w_q = nn.Linear(config.ff_dim, 1, bias=False) self.relu = nn.ReLU() self.softmax = nn.Softmax() self.logsoftmax = nn.LogSoftmax() self.dropout = nn.Dropout(0.6) self.hidden_qindp = self.init_hidden(config.num_layers, config.hidden_dim, config.batch_size) self.hidden = self.init_hidden(config.num_layers, config.hidden_dim, config.batch_size) # since we are using q_align and p_emb as p_star we have input as 2*emb_dim # num_layers = 2 and dropout = 0.1 self.gru = nn.LSTM(input_size = 2*config.emb_dim + 2*config.hidden_dim, hidden_size = config.hidden_dim, num_layers = config.num_layers, dropout=0.6, bidirectional=True) self.q_indep_bilstm = nn.LSTM(input_size = config.emb_dim, hidden_size = config.hidden_dim, num_layers = config.num_layers, dropout=0.6, bidirectional=True) #change init_hidden when you change this gru/lstm parameters = ifilter(lambda p: p.requires_grad, self.parameters()) for p in parameters: self.init_param(p)
def __init__(self, config, emb_data): super(SquadModel, self).__init__() # an embedding layer to lookup pre-trained word embeddings self.embed = nn.Embedding(config.vocab_size, config.emb_dim) self.embed.weight.requires_grad = False # do not propagate into the pre-trained word embeddings self.embed.weight.data.copy_(emb_data) # used for eq(6) does FFNN(p_i)*FFNN(q_j) self.ff_align = nn.Linear(config.emb_dim, config.ff_dim) # used for eq(10) does FFNN(q_j') self.ff_q_indep = nn.Linear(2*config.hidden_dim, config.ff_dim) # used for eq(2) does FFNN(h_a) in a simplified form so that it can be re-used, # note: h_a = [u,v] where u and v are start and end words respectively # we have 2*config.hidden_dim since we are using a bi-directional LSTM self.p_end_ff = nn.Linear(2 * config.hidden_dim, config.ff_dim) self.p_start_ff = nn.Linear(2 * config.hidden_dim, config.ff_dim) # used for eq(2) plays the role of w_a self.w_a = nn.Linear(config.ff_dim, 1, bias=False) # used for eq(10) plays the role of w_q self.w_q = nn.Linear(config.ff_dim, 1, bias=False) self.relu = nn.ReLU() self.softmax = nn.Softmax() self.logsoftmax = nn.LogSoftmax() self.dropout = nn.Dropout(0.2) self.hidden_qindp = self.init_hidden(config.num_layers, config.hidden_dim, config.batch_size) self.hidden = self.init_hidden(config.num_layers, config.hidden_dim, config.batch_size) # since we are using q_align and p_emb as p_star we have input as 2*emb_dim # num_layers = 2 and dropout = 0.1 self.gru = nn.LSTM(input_size = 2*config.emb_dim + 1 + 2*config.hidden_dim, hidden_size = config.hidden_dim, num_layers = config.num_layers, dropout=0.1, bidirectional=True) self.q_indep_bilstm = nn.LSTM(input_size = config.emb_dim, hidden_size = config.hidden_dim, num_layers = config.num_layers, dropout=0.1, bidirectional=True) #change init_hidden when you change this gru/lstm parameters = ifilter(lambda p: p.requires_grad, self.parameters()) for p in parameters: self.init_param(p)
def __init__(self, n_in, n_hidden, n_out): super(MLPDropout, self).__init__() self.hidden_layer = nn.Linear(n_in, n_hidden) self.tanh = nn.Tanh() self.logistic_layer = nn.Linear(n_hidden, n_out) self.softmax = nn.LogSoftmax()
def __init__(self, n_in, n_hidden, n_out): super(MLP, self).__init__() self.mlp = nn.Sequential(nn.Linear(n_in, n_hidden), nn.Tanh(), nn.Linear(n_hidden, n_out), nn.LogSoftmax() )
def __init__(self, word_dim, num_words, char_dim, num_chars, num_filters, kernel_size, rnn_mode, hidden_size, num_layers, num_labels, tag_space=0, embedd_word=None, embedd_char=None, p_in=0.2, p_rnn=0.5): super(BiRecurrentConv, self).__init__() self.word_embedd = Embedding(num_words, word_dim, init_embedding=embedd_word) self.char_embedd = Embedding(num_chars, char_dim, init_embedding=embedd_char) self.conv1d = nn.Conv1d(char_dim, num_filters, kernel_size, padding=kernel_size - 1) self.dropout_in = nn.Dropout(p=p_in) self.dropout_rnn = nn.Dropout(p_rnn) if rnn_mode == 'RNN': RNN = nn.RNN elif rnn_mode == 'LSTM': RNN = nn.LSTM elif rnn_mode == 'GRU': RNN = nn.GRU else: raise ValueError('Unknown RNN mode: %s' % rnn_mode) self.rnn = RNN(word_dim + num_filters, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=p_rnn) self.dense = None out_dim = hidden_size * 2 if tag_space: self.dense = nn.Linear(out_dim, tag_space) out_dim = tag_space self.dense_softmax = nn.Linear(out_dim, num_labels) # TODO set dim for log_softmax and set reduce=False to NLLLoss self.logsoftmax = nn.LogSoftmax() self.nll_loss = nn.NLLLoss(size_average=False)
def __init__(self, word_dim, num_words, char_dim, num_chars, pos_dim, num_pos, num_filters, kernel_size, rnn_mode, hidden_size, num_layers, num_labels, arc_space, type_space, embedd_word=None, embedd_char=None, embedd_pos=None, p_in=0.2, p_out=0.5, p_rnn=(0.5, 0.5), biaffine=True): super(BiRecurrentConvBiAffine, self).__init__() self.word_embedd = Embedding(num_words, word_dim, init_embedding=embedd_word) self.char_embedd = Embedding(num_chars, char_dim, init_embedding=embedd_char) self.pos_embedd = Embedding(num_pos, pos_dim, init_embedding=embedd_pos) self.conv1d = nn.Conv1d(char_dim, num_filters, kernel_size, padding=kernel_size - 1) self.dropout_in = nn.Dropout2d(p=p_in) self.dropout_out = nn.Dropout2d(p=p_out) self.num_labels = num_labels if rnn_mode == 'RNN': RNN = VarMaskedRNN elif rnn_mode == 'LSTM': RNN = VarMaskedLSTM elif rnn_mode == 'FastLSTM': RNN = VarMaskedFastLSTM elif rnn_mode == 'GRU': RNN = VarMaskedGRU else: raise ValueError('Unknown RNN mode: %s' % rnn_mode) self.rnn = RNN(word_dim + num_filters + pos_dim, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=p_rnn) out_dim = hidden_size * 2 self.arc_h = nn.Linear(out_dim, arc_space) self.arc_c = nn.Linear(out_dim, arc_space) self.attention = BiAAttention(arc_space, arc_space, 1, biaffine=biaffine) self.type_h = nn.Linear(out_dim, type_space) self.type_c = nn.Linear(out_dim, type_space) self.bilinear = BiLinear(type_space, type_space, self.num_labels) self.logsoftmax = nn.LogSoftmax()
def _init_output(self, input_dim, nc): self.output = nn.Sequential() append2seq(self.output, ( nn.Linear(input_dim, nc), nn.LogSoftmax() ))
def __init__(self, embeddings_in, embeddings_out, **kwargs): super(Seq2SeqModel, self).__init__(embeddings_in, embeddings_out) self.hsz = kwargs['hsz'] nlayers = kwargs['layers'] rnntype = kwargs['rnntype'] pdrop = kwargs.get('dropout', 0.5) dsz = embeddings_in.dsz self.gpu = kwargs.get('gpu', True) self.dropout = nn.Dropout(pdrop) self.encoder_rnn = pytorch_rnn(dsz, self.hsz, rnntype, nlayers, pdrop) self.preds = nn.Linear(self.hsz, self.nc) self.decoder_rnn = pytorch_rnn_cell(dsz, self.hsz, rnntype, nlayers, pdrop) self.probs = nn.LogSoftmax()
def __init__(self, input_size, hidden_size, output_size): super(ANN, self).__init__() self.i2h = nn.Linear(input_size, hidden_size) #self.h2h = nn.Linear(hidden_size, hidden_size) self.h2o = nn.Linear(hidden_size, output_size) self.softmax = nn.LogSoftmax()
def build_output(self, hid_dim, deepout_layers, deepout_act, tie_weights): """ Create output projection (from decoder output to softmax) """ output = [] if deepout_layers > 0: output.append( Highway(hid_dim, num_layers=deepout_layers, activation=deepout_act)) emb_dim = self.embeddings.embedding_dim vocab_size = self.embeddings.num_embeddings if not tie_weights: proj = nn.Linear(hid_dim, vocab_size) else: proj = nn.Linear(emb_dim, vocab_size) proj.weight = self.embeddings.weight if emb_dim != hid_dim: # inp embeddings are (vocab x emb_dim); output is (hid x vocab) # if emb_dim != hidden, we insert a projection logging.warn("When tying weights, output layer and " "embedding layer should have equal size. " "A projection layer will be insterted.") proj = nn.Sequential(nn.Linear(hid_dim, emb_dim), proj) output.append(proj) output.append(nn.LogSoftmax(dim=1)) return nn.Sequential(*output)
def forward(self, g, h_in, e): h = [] # Padding to some larger dimension d h_t = torch.cat([h_in, Variable(torch.Tensor(h_in.size(0), h_in.size(1), self.args['out'] - h_in.size(2)).type_as(h_in.data).zero_())], 2) h.append(h_t.clone()) # Layer for t in range(0, self.n_layers): h_t = Variable(torch.zeros(h[0].size(0), h[0].size(1), h[0].size(2)).type_as(h_in.data)) # Apply one layer pass (Message + Update) for v in range(0, h_in.size(1)): m = self.m[0].forward(h[t][:, v, :], h[t], e[:, v, :]) # Nodes without edge set message to 0 m = g[:, v, :, None].expand_as(m) * m m = torch.sum(m, 1) # Update h_t[:, v, :] = self.u[0].forward(h[t][:, v, :], m) # Delete virtual nodes h_t = (torch.sum(torch.abs(h_in), 2).expand_as(h_t) > 0).type_as(h_t)*h_t h.append(h_t.clone()) # Readout res = self.r.forward(h) if self.type == 'classification': res = nn.LogSoftmax()(res) return res
def forward(self, g, h_in, e): h = [] h.append(h_in) # Layer for t in range(0, len(self.m)): u_args = self.u[t].get_args() h_t = Variable(torch.zeros(h_in.size(0), h_in.size(1), u_args['out']).type_as(h[t].data)) # Apply one layer pass (Message + Update) for v in range(0, h_in.size(1)): m = self.m[t].forward(h[t][:, v, :], h[t], e[:, v, :, :]) # Nodes without edge set message to 0 m = g[:, v, :,None].expand_as(m) * m m = torch.sum(m, 1) # Interaction Net opt = {} opt['x_v'] = Variable(torch.Tensor([]).type_as(m.data)) h_t[:, v, :] = self.u[t].forward(h[t][:, v, :], m, opt) h.append(h_t.clone()) # Readout res = self.r.forward(h) if self.type == 'classification': res = nn.LogSoftmax()(res) return res
def forward(self, g, h_in, e): h = [] # Padding to some larger dimension d h_t = torch.cat([h_in, Variable( torch.zeros(h_in.size(0), h_in.size(1), self.args['out'] - h_in.size(2)).type_as(h_in.data))], 2) h.append(h_t.clone()) # Layer for t in range(0, self.n_layers): e_aux = e.view(-1, e.size(3)) h_aux = h[t].view(-1, h[t].size(2)) m = self.m[0].forward(h[t], h_aux, e_aux) m = m.view(h[0].size(0), h[0].size(1), -1, m.size(1)) # Nodes without edge set message to 0 m = torch.unsqueeze(g, 3).expand_as(m) * m m = torch.squeeze(torch.sum(m, 1)) h_t = self.u[0].forward(h[t], m) # Delete virtual nodes h_t = (torch.sum(h_in, 2).expand_as(h_t) > 0).type_as(h_t) * h_t h.append(h_t) # Readout res = self.r.forward(h) if self.type == 'classification': res = nn.LogSoftmax()(res) return res
def __init__(self, batch_size, size): super(Pool, self).__init__() self.size = size self.inputs = Variable(torch.FloatTensor(batch_size, 1, size, size)).cuda() self.targets = Variable(torch.LongTensor(batch_size)).cuda() self.medium = nn.Parameter(torch.randn(num_media, 1, size, size) * 0.02, requires_grad=True) self.conv0 = nn.Conv2d(1, 1, 3, padding=1, bias=False) self.fc0_size = 8 * 8 self.fc0 = nn.Linear(self.fc0_size, num_classes) self.maxPool = nn.AvgPool2d(8) self.relu = nn.ReLU() self.tanh = nn.Tanh() self.logSoftmax = nn.LogSoftmax() self.loss = nn.NLLLoss() learning_rate = 0.0005 self.conv0.weight.requires_grad = False s = 0.25 kernel = torch.FloatTensor([0.0, s, 0.0, s, 0.0, s, 0.0, s, 0.0]).view(3, 3) self.conv0.weight.data.copy_(kernel) parameters = ifilter(lambda p: p.requires_grad, self.parameters()) parameters = list(parameters) parameters.append(self.medium) self.optimizer = optim.RMSprop(parameters, lr=learning_rate, momentum=0.0)
def __init__(self): super(Net_cls,self).__init__() self.cls_model = nn.Sequential(OrderedDict([ ('fc4',nn.Linear(1024,512)), ('relu',nn.ReLU()), ('fc5',nn.Linear(512,2)), ('log_softmax',nn.LogSoftmax()), ]))
def __init__(self, input_size, hidden_size, output_size): super(RNN, self).__init__() self.hidden_size = hidden_size self.i2h = nn.Linear(input_size + hidden_size, hidden_size) self.i2o = nn.Linear(input_size + hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=1)
def __init__(self, hidden_size, output_size, n_layers=1): super(DecoderRNN, self).__init__() self.n_layers = n_layers self.hidden_size = hidden_size self.embedding = nn.Embedding(output_size, hidden_size) self.gru = nn.GRU(hidden_size, hidden_size) self.out = nn.Linear(hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=1)
def __init__(self, input_size, hidden_size, output_size): super(RNN, self).__init__() self.hidden_size = hidden_size self.i2h = nn.Linear(n_categories + input_size + hidden_size, hidden_size) self.i2o = nn.Linear(n_categories + input_size + hidden_size, output_size) self.o2o = nn.Linear(hidden_size + output_size, output_size) self.dropout = nn.Dropout(0.1) self.softmax = nn.LogSoftmax(dim=1)
def __init__(self, num_emb, emb_dim, hidden_dim, use_cuda): super(TargetLSTM, self).__init__() self.num_emb = num_emb self.emb_dim = emb_dim self.hidden_dim = hidden_dim self.use_cuda = use_cuda self.emb = nn.Embedding(num_emb, emb_dim) self.lstm = nn.LSTM(emb_dim, hidden_dim, batch_first=True) self.lin = nn.Linear(hidden_dim, num_emb) self.softmax = nn.LogSoftmax() self.init_params()
def __init__(self, num_emb, emb_dim, hidden_dim, use_cuda): super(Generator, self).__init__() self.num_emb = num_emb self.emb_dim = emb_dim self.hidden_dim = hidden_dim self.use_cuda = use_cuda self.emb = nn.Embedding(num_emb, emb_dim) self.lstm = nn.LSTM(emb_dim, hidden_dim, batch_first=True) self.lin = nn.Linear(hidden_dim, num_emb) self.softmax = nn.LogSoftmax() self.init_params()
def __init__(self, num_classes, vocab_size, emb_dim, filter_sizes, num_filters, dropout): super(Discriminator, self).__init__() self.emb = nn.Embedding(vocab_size, emb_dim) self.convs = nn.ModuleList([ nn.Conv2d(1, n, (f, emb_dim)) for (n, f) in zip(num_filters, filter_sizes) ]) self.highway = nn.Linear(sum(num_filters), sum(num_filters)) self.dropout = nn.Dropout(p=dropout) self.lin = nn.Linear(sum(num_filters), num_classes) self.softmax = nn.LogSoftmax() self.init_parameters()
def __init__(self, vocab_size, embedding_dim, hidden_dim): super(PoetryModel, self).__init__() self.hidden_dim = hidden_dim self.embeddings = nn.Embedding(vocab_size, embedding_dim) self.lstm = nn.LSTM(embedding_dim, self.hidden_dim) self.linear1 = nn.Linear(self.hidden_dim, vocab_size) # self.dropout = nn.Dropout(0.2) self.softmax = nn.LogSoftmax()
def __init__(self, hidden_size, output_size, n_layers=1): super(DecoderRNN, self).__init__() self.n_layers = n_layers self.hidden_size = hidden_size self.embedding = nn.Embedding(output_size, hidden_size) self.gru = nn.GRU(hidden_size, hidden_size) self.out = nn.Linear(hidden_size, output_size) self.softmax = nn.LogSoftmax()
def __init__(self, opts): super(CM_Discriminator, self).__init__() cnn_feat_map = {'resnet18': 512, 'resnet50': 2048, 'vgg16': 2048} self.cnn_feat_size = cnn_feat_map[opts.cnn] self.num_cls = opts.D_num_cls # net1: parallel net hidden_lst1 = [self.cnn_feat_size] + opts.D_hidden layers1 = OrderedDict() if opts.input_relu== 1: layers1['relu'] = nn.ReLU() for n, (dim_in, dim_out) in enumerate(zip(hidden_lst1, hidden_lst1[1::])): layers1['fc%d' % n] = nn.Linear(dim_in, dim_out, bias = False) layers1['bn%d' % n] = nn.BatchNorm1d(dim_out) layers1['leaky_relu%d' % n] = nn.LeakyReLU(0.2) self.net1 = nn.Sequential(layers1) # net2: fusing net hidden_lst2 = [2 * hidden_lst1[-1]] + opts.D_hidden2 + [self.num_cls + 1] layers2 = OrderedDict() for n, (dim_in, dim_out) in enumerate(zip(hidden_lst2, hidden_lst2[1::])): layers2['fc%d' % n] = nn.Linear(dim_in, dim_out, bias = False) if n < len(hidden_lst2) - 2: layers2['bn%d' % n] = nn.BatchNorm1d(dim_out) layers2['leaky_relu%d' % n] = nn.LeakyReLU(0.2) layers2['logsoftmax'] = nn.LogSoftmax() self.net2 = nn.Sequential(layers2)
def __init__(self, cuda, mem_dim, num_classes, dropout = False): super(SentimentModule, self).__init__() self.cudaFlag = cuda self.mem_dim = mem_dim self.num_classes = num_classes self.dropout = dropout # torch.manual_seed(456) self.l1 = nn.Linear(self.mem_dim, self.num_classes) self.logsoftmax = nn.LogSoftmax() if self.cudaFlag: self.l1 = self.l1.cuda()
def __init__(self, embedding, n_holistic_filters, n_per_dim_filters, filter_widths, hidden_layer_units, num_classes, dropout, ext_feats): super(MPCNN, self).__init__() self.embedding = embedding self.n_word_dim = embedding.weight.size(1) self.n_holistic_filters = n_holistic_filters self.n_per_dim_filters = n_per_dim_filters self.filter_widths = filter_widths self.ext_feats = ext_feats holistic_conv_layers = [] per_dim_conv_layers = [] for ws in filter_widths: if np.isinf(ws): continue holistic_conv_layers.append(nn.Sequential( nn.Conv1d(self.n_word_dim, n_holistic_filters, ws), nn.Tanh() )) per_dim_conv_layers.append(nn.Sequential( nn.Conv1d(self.n_word_dim, self.n_word_dim * n_per_dim_filters, ws, groups=self.n_word_dim), nn.Tanh() )) self.holistic_conv_layers = nn.ModuleList(holistic_conv_layers) self.per_dim_conv_layers = nn.ModuleList(per_dim_conv_layers) # compute number of inputs to first hidden layer COMP_1_COMPONENTS_HOLISTIC, COMP_1_COMPONENTS_PER_DIM, COMP_2_COMPONENTS = 2 + n_holistic_filters, 2 + self.n_word_dim, 2 EXT_FEATS = 4 if ext_feats else 0 n_feat_h = 3 * len(self.filter_widths) * COMP_2_COMPONENTS n_feat_v = ( # comparison units from holistic conv for min, max, mean pooling for non-infinite widths 3 * ((len(self.filter_widths) - 1) ** 2) * COMP_1_COMPONENTS_HOLISTIC + # comparison units from holistic conv for min, max, mean pooling for infinite widths 3 * 3 + # comparison units from per-dim conv 2 * (len(self.filter_widths) - 1) * n_per_dim_filters * COMP_1_COMPONENTS_PER_DIM ) n_feat = n_feat_h + n_feat_v + EXT_FEATS self.final_layers = nn.Sequential( nn.Linear(n_feat, hidden_layer_units), nn.Tanh(), nn.Dropout(dropout), nn.Linear(hidden_layer_units, num_classes), nn.LogSoftmax() )
def loss_calc(pred, label, gpu): """ This function returns cross entropy loss for semantic segmentation """ # out shape batch_size x channels x h x w -> batch_size x channels x h x w # label shape h x w x 1 x batch_size -> batch_size x 1 x h x w label = torch.from_numpy(label).long() label = Variable(label).cuda(gpu) m = nn.LogSoftmax() criterion = CrossEntropy2d().cuda(gpu) pred = m(pred) return criterion(pred, label)
def __init__(self, word_dim, num_words, char_dim, num_chars, pos_dim, num_pos, num_filters, kernel_size, rnn_mode, hidden_size, num_layers, num_labels, arc_space, type_space, embedd_word=None, embedd_char=None, embedd_pos=None, p_in=0.2, p_out=0.5, p_rnn=(0.5, 0.5), biaffine=True, prior_order='deep_first', skipConnect=False, biasArc=False, biasType=False): super(StackPtrNet, self).__init__() self.word_embedd = Embedding(num_words, word_dim, init_embedding=embedd_word) self.char_embedd = Embedding(num_chars, char_dim, init_embedding=embedd_char) self.pos_embedd = Embedding(num_pos, pos_dim, init_embedding=embedd_pos) self.conv1d = nn.Conv1d(char_dim, num_filters, kernel_size, padding=kernel_size - 1) self.dropout_in = nn.Dropout2d(p=p_in) self.dropout_out = nn.Dropout2d(p=p_out) self.num_labels = num_labels if prior_order in ['deep_first', 'shallow_first']: self.prior_order = PriorOrder.DEPTH elif prior_order == 'inside_out': self.prior_order = PriorOrder.INSIDE_OUT elif prior_order == 'left2right': self.prior_order = PriorOrder.LEFT2RIGTH else: raise ValueError('Unknown prior order: %s' % prior_order) self.skipConnect = skipConnect self.biasArc = biasArc self.biasType = biasType if rnn_mode == 'RNN': RNN_ENCODER = VarMaskedRNN RNN_DECODER = SkipConnectRNN if skipConnect else VarMaskedRNN elif rnn_mode == 'LSTM': RNN_ENCODER = VarMaskedLSTM RNN_DECODER = SkipConnectLSTM if skipConnect else VarMaskedLSTM elif rnn_mode == 'FastLSTM': RNN_ENCODER = VarMaskedFastLSTM RNN_DECODER = SkipConnectFastLSTM if skipConnect else VarMaskedFastLSTM elif rnn_mode == 'GRU': RNN_ENCODER = VarMaskedGRU RNN_DECODER = SkipConnectGRU if skipConnect else VarMaskedGRU else: raise ValueError('Unknown RNN mode: %s' % rnn_mode) self.encoder = RNN_ENCODER(word_dim + num_filters + pos_dim, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=p_rnn) self.decoder = RNN_DECODER(word_dim + num_filters + pos_dim, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=False, dropout=p_rnn) self.hx_dense = nn.Linear(2 * hidden_size, hidden_size) self.arc_h = nn.Linear(hidden_size * 3, arc_space) if self.biasArc else nn.Linear(hidden_size, arc_space) # arc dense for decoder self.arc_c = nn.Linear(hidden_size * 2, arc_space) # arc dense for encoder self.attention = BiAAttention(arc_space, arc_space, 1, biaffine=biaffine) self.type_h = nn.Linear(hidden_size * 3, type_space) if self.biasType else nn.Linear(hidden_size, type_space) # type dense for decoder self.type_c = nn.Linear(hidden_size * 2, type_space) # type dense for encoder self.bilinear = BiLinear(type_space, type_space, self.num_labels) self.logsoftmax = nn.LogSoftmax()
def forward(self, g, h_in, e, plotter=None): h = [] h.append(h_in) # Layer for t in range(0, len(self.m)): u_args = self.u[t].get_args() h_t = Variable(torch.zeros(h_in.size(0), h_in.size(1), u_args['out']).type_as(h[t].data)) # Apply one layer pass (Message + Update) for v in range(0, h_in.size(1)): m = self.m[t].forward(h[t][:, v, :], h[t], e[:, v, :]) # Nodes without edge set message to 0 m = g[:, v, :, None].expand_as(m) * m m = torch.sum(m, 1) # Duvenaud deg = torch.sum(g[:, v, :].data, 1) # Separate degrees for i in range(len(u_args['deg'])): ind = deg == u_args['deg'][i] ind = Variable(torch.squeeze(torch.nonzero(torch.squeeze(ind))), volatile=True) opt = {'deg': i} # Update if len(ind) != 0: aux = self.u[t].forward(torch.index_select(h[t], 0, ind)[:, v, :], torch.index_select(m, 0, ind), opt) ind = ind.data.cpu().numpy() for j in range(len(ind)): h_t[ind[j], v, :] = aux[j, :] if plotter is not None: num_feat = h_t.size(2) color = h_t[0,:,:].data.cpu().numpy() for i in range(num_feat): plotter(color[:, i], 'layer_' + str(t) + '_element_' + str(i) + '.png') h.append(h_t.clone()) # Readout res = self.r.forward(h) if self.type == 'classification': res = nn.LogSoftmax()(res) return res
def run(net, loader, optimizer, tracker, train=False, prefix='', epoch=0): """ Run an epoch over the given loader """ if train: net.train() tracker_class, tracker_params = tracker.MovingMeanMonitor, {'momentum': 0.99} else: net.eval() tracker_class, tracker_params = tracker.MeanMonitor, {} answ = [] idxs = [] accs = [] tq = tqdm(loader, desc='{} E{:03d}'.format(prefix, epoch), ncols=0) loss_tracker = tracker.track('{}_loss'.format(prefix), tracker_class(**tracker_params)) acc_tracker = tracker.track('{}_acc'.format(prefix), tracker_class(**tracker_params)) log_softmax = nn.LogSoftmax().cuda() for v, q, a, idx, q_len in tq: var_params = { 'volatile': not train, 'requires_grad': False, } v = Variable(v.cuda(async=True), **var_params) q = Variable(q.cuda(async=True), **var_params) a = Variable(a.cuda(async=True), **var_params) q_len = Variable(q_len.cuda(async=True), **var_params) out = net(v, q, q_len) nll = -log_softmax(out) loss = (nll * a / 10).sum(dim=1).mean() acc = utils.batch_accuracy(out.data, a.data).cpu() if train: global total_iterations update_learning_rate(optimizer, total_iterations) optimizer.zero_grad() loss.backward() optimizer.step() total_iterations += 1 else: # store information about evaluation of this minibatch _, answer = out.data.cpu().max(dim=1) answ.append(answer.view(-1)) accs.append(acc.view(-1)) idxs.append(idx.view(-1).clone()) loss_tracker.append(loss.data[0]) acc_tracker.append(acc.mean()) fmt = '{:.4f}'.format tq.set_postfix(loss=fmt(loss_tracker.mean.value), acc=fmt(acc_tracker.mean.value)) if not train: answ = list(torch.cat(answ, dim=0)) accs = list(torch.cat(accs, dim=0)) idxs = list(torch.cat(idxs, dim=0)) return answ, accs, idxs
def __init__(self, args): super(CharCNN, self).__init__() self.conv1 = nn.Sequential( nn.Conv1d(args.num_features, 256, kernel_size=7, stride=1), nn.ReLU(), nn.MaxPool1d(kernel_size=3, stride=3) ) self.conv2 = nn.Sequential( nn.Conv1d(256, 256, kernel_size=7, stride=1), nn.ReLU(), nn.MaxPool1d(kernel_size=3, stride=3) ) self.conv3 = nn.Sequential( nn.Conv1d(256, 256, kernel_size=3, stride=1), nn.ReLU() ) self.conv4 = nn.Sequential( nn.Conv1d(256, 256, kernel_size=3, stride=1), nn.ReLU() ) self.conv5 = nn.Sequential( nn.Conv1d(256, 256, kernel_size=3, stride=1), nn.ReLU() ) self.conv6 = nn.Sequential( nn.Conv1d(256, 256, kernel_size=3, stride=1), nn.ReLU(), nn.MaxPool1d(kernel_size=3, stride=3) ) self.fc1 = nn.Sequential( nn.Linear(8704, 1024), nn.ReLU(), nn.Dropout(p=args.dropout) ) self.fc2 = nn.Sequential( nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(p=args.dropout) ) self.fc3 = nn.Linear(1024, 4) self.log_softmax = nn.LogSoftmax()
def __init__(self, num_features): super(CharCNN, self).__init__() self.num_features = num_features self.conv1 = nn.Sequential( nn.Conv2d(1, 256, kernel_size=(7, self.num_features), stride=1), nn.ReLU() ) self.maxpool1 = nn.MaxPool2d(kernel_size=(3, 1), stride=(3, 1)) self.conv2 = nn.Sequential( nn.Conv2d(1, 256, kernel_size=(7, 256), stride=1), nn.ReLU() ) self.maxpool2 = nn.MaxPool2d(kernel_size=(3, 1), stride=(3, 1)) self.conv3 = nn.Sequential( nn.Conv2d(1, 256, kernel_size=(3, 256), stride=1), nn.ReLU() ) self.conv4 = nn.Sequential( nn.Conv2d(1, 256, kernel_size=(3, 256), stride=1), nn.ReLU() ) self.conv5 = nn.Sequential( nn.Conv2d(1, 256, kernel_size=(3, 256), stride=1), nn.ReLU() ) self.conv6 = nn.Sequential( nn.Conv2d(1, 256, kernel_size=(3, 256), stride=1), nn.ReLU() ) self.maxpool6 = nn.MaxPool2d(kernel_size=(3, 1), stride=(3, 1)) self.fc1 = nn.Sequential( nn.Linear(8704, 1024), nn.ReLU(), nn.Dropout(p=0.5) ) self.fc2 = nn.Sequential( nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(p=0.5) ) self.fc3 =nn.Linear(1024, 4) self.softmax = nn.LogSoftmax() # nn.LogSoftmax() # self.inference_log_softmax = InferenceBatchLogSoftmax()