我们从Python开源项目中,提取了以下31个代码示例,用于说明如何使用torch.nn.NLLLoss()。
def fit_batch(self, premise_batch, hypothesis_batch, y_batch): if not hasattr(self, 'criterion'): self.criterion = nn.NLLLoss() if not hasattr(self, 'optimizer'): self.optimizer = optim.Adam(self.parameters(), lr=self.options['LR'], betas=(0.9, 0.999), eps=1e-08, weight_decay=self.options['L2']) self.optimizer.zero_grad() preds = self.__call__(premise_batch, hypothesis_batch, training=True) loss = self.criterion(preds, y_batch) loss.backward() self.optimizer.step() _, pred_labels = torch.max(preds, dim=-1, keepdim=True) y_true = self._get_numpy_array_from_variable(y_batch) y_pred = self._get_numpy_array_from_variable(pred_labels) acc = accuracy_score(y_true, y_pred) ret_loss = self._get_numpy_array_from_variable(loss)[0] return ret_loss, acc
def fit_batch(self, premise_batch, hypothesis_batch, y_batch): if not hasattr(self,'criterion'): self.criterion = nn.NLLLoss() if not hasattr(self, 'optimizer'): self.optimizer = optim.Adam(self.parameters(), lr=self.options['LR'], betas=(0.9, 0.999), eps=1e-08, weight_decay=self.options['L2']) self.optimizer.zero_grad() preds = self.__call__(premise_batch, hypothesis_batch, training= True) loss = self.criterion(preds, y_batch) loss.backward() self.optimizer.step() _, pred_labels = torch.max(preds, dim=-1, keepdim = True) y_true = self._get_numpy_array_from_variable(y_batch) y_pred = self._get_numpy_array_from_variable(pred_labels) acc = accuracy_score(y_true, y_pred) ret_loss = self._get_numpy_array_from_variable(loss)[0] return ret_loss, acc
def train(output, input, ann,learning_rate=.005): # function for training the neural net criterion = nn.NLLLoss() ann.zero_grad() # initializing gradients with zeros # predicting the output output_p = ann(input) # input --> hidden_layer --> output loss = criterion(output_p, output) # comparing the guessed output with actual output loss.backward() # backpropagating to compute gradients with respect to loss for p in ann.parameters(): # adding learning rate to slow down the network p.data.add_(-learning_rate, p.grad.data) return output, loss.data[0] # returning predicted output and loss #n_iters=100000
def __init__( self, input_size, hidden_size, batch_size): super(PtrNet_tanh, self).__init__() self.rnn_layers = 1 self.hidden_size = hidden_size self.batch_size = batch_size self.input_size = input_size self.n = 16 self.init_var = 0.08 self.init_token = nn.Parameter(torch.zeros((self.input_size))) self.W1 = nn.Parameter(torch.randn((self.hidden_size, self.hidden_size)) * self.init_var) self.W2 = nn.Parameter(torch.randn((self.hidden_size, self.hidden_size)) * self.init_var) self.v = nn.Parameter(torch.randn((self.hidden_size, 1)) * self.init_var) # cells self.encoder_cell = nn.GRUCell(input_size, hidden_size) self.decoder_cell = nn.GRUCell(input_size, hidden_size) self.NLLoss = nn.NLLLoss(size_average=True) # initialize weights self.init_weights()
def __init__(self, input_size, hidden_size, batch_size): super(PtrNet_tanh, self).__init__() print('Initializing Parameters Merge') self.hidden_size = hidden_size self.batch_size = batch_size self.input_size = input_size self.n = 12 self.init_var = 0.08 self.init_token = nn.Parameter(-1 * torch.ones((self.input_size))) self.pad_token = nn.Parameter(-1 * torch.ones((self.input_size))) self.end_state = nn.Parameter(-1 * torch.ones((self.hidden_size))) self.W1 = nn.Parameter(torch.randn((self.hidden_size, self.hidden_size)) * self.init_var) self.W2 = nn.Parameter(torch.randn((self.hidden_size, self.hidden_size)) * self.init_var) self.v = nn.Parameter(torch.randn((self.hidden_size, 1)) * self.init_var) # cells self.encoder_cell = nn.GRUCell(input_size, hidden_size) self.decoder_cell = nn.GRUCell(input_size, hidden_size) self.NLLoss = nn.NLLLoss(size_average=True) # initialize weights self.init_weights()
def __init__(self, parameter): super(LSTMTagger, self).__init__() self.hidden_dim = parameter['hidden_dim'] self.word_embeddings = nn.Embedding(parameter['vocab_size'], parameter['embedding_dim']) self.embedding_dim = parameter['embedding_dim'] # The LSTM takes word embeddings and captical embedding as inputs, and outputs hidden states # with dimensionality hidden_dim. self.lstm = nn.LSTM(self.embedding_dim, parameter['hidden_dim']) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(parameter['hidden_dim'], parameter['tagset_size']) self.hidden = self.init_hidden() self.loss_function = nn.NLLLoss()
def batchNLLLoss(self, inp, target): """ Returns the NLL Loss for predicting target sequence. Inputs: inp, target - inp: batch_size x seq_len - target: batch_size x seq_len inp should be target with <s> (start letter) prepended """ loss_fn = nn.NLLLoss() batch_size, seq_len = inp.size() inp = inp.permute(1, 0) # seq_len x batch_size target = target.permute(1, 0) # seq_len x batch_size h = self.init_hidden(batch_size) loss = 0 for i in range(seq_len): out, h = self.forward(inp[i], h) loss += loss_fn(out, target[i]) return loss # per batch
def __init__(self, params): # memorize params for field, value in params.iteritems(): setattr(self, field, value); self.aBot = Answerer(params); self.qBot = Questioner(params); self.criterion = nn.NLLLoss(); self.reward = torch.Tensor(self.batchSize, 1); self.totalReward = None; self.rlNegReward = -10*self.rlScale; # ship to gpu if needed if self.useGPU: self.aBot = self.aBot.cuda(); self.qBot = self.qBot.cuda(); self.reward = self.reward.cuda(); print(self.aBot) print(self.qBot) # switch to train
def __init__(self, generator, tgt_vocab, label_smoothing=0.0): super(NMTLossCompute, self).__init__(generator, tgt_vocab) # CHECK assert (label_smoothing >= 0.0 and label_smoothing <= 1.0) # END CHECK if label_smoothing > 0: # When label smoothing is turned on, # KL-divergence between q_{smoothed ground truth prob.}(w) # and p_{prob. computed by model}(w) is minimized. # If label smoothing value is set to zero, the loss # is equivalent to NLLLoss or CrossEntropyLoss. # All non-true labels are uniformly set to low-confidence. self.criterion = nn.KLDivLoss(size_average=False) one_hot = torch.randn(1, len(tgt_vocab)) one_hot.fill_(label_smoothing / (len(tgt_vocab) - 2)) one_hot[0][self.padding_idx] = 0 self.register_buffer('one_hot', one_hot) else: weight = torch.ones(len(tgt_vocab)) weight[self.padding_idx] = 0 self.criterion = nn.NLLLoss(weight, size_average=False) self.confidence = 1.0 - label_smoothing
def __init__(self): super(Criterion, self).__init__() #self.loss = nn.MultiLabelMarginLoss() self.loss = nn.MultiLabelSoftMarginLoss() #self.loss = nn.MultiMarginLoss() #self.loss = nn.CrossEntropyLoss() #self.loss = nn.NLLLoss()
def load_best_model(model_filename, args): model = torch.load(model_filename) trainer = SentimentTrainer(args, model,criterion=nn.NLLLoss(), optimizer=None) return trainer
def _new_nmt_criterion(self, vocab_size): weight = torch.ones(vocab_size) weight[Constants.PAD] = 0 criterion = nn.NLLLoss(weight, size_average=False) if self._gpu_ids is not None: criterion.cuda() return criterion
def NMTCriterion(vocabSize, gpus): weight = torch.ones(vocabSize) weight[Constants.PAD] = 0 crit = nn.NLLLoss(weight, size_average=False) if gpus: crit.cuda() return crit
def log_loss(loss_weight=None, size_ave=True, dim=2): if dim == 1: return nn.NLLLoss(weight=loss_weight,size_average=size_ave) elif dim == 2: return nn.NLLLoss2d(weight=loss_weight,size_average=size_ave)
def main(): if torch.cuda.is_available(): use_cuda = True else: use_cuda = False # Dataset trainset = datasets.MNIST('../../data', download=True,train=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])) train_loader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4) # Model model = Net() # NLLLoss nllloss = nn.NLLLoss() #CrossEntropyLoss = log_softmax + NLLLoss # CenterLoss loss_weight = 1.0 centerloss = CenterLoss(10,2,loss_weight) if use_cuda: nllloss = nllloss.cuda() centerloss = centerloss.cuda() model = model.cuda() criterion = [nllloss, centerloss] # optimzer4nn optimizer4nn = optim.SGD(model.parameters(),lr=0.001,momentum=0.9, weight_decay=0.0005) sheduler = lr_scheduler.StepLR(optimizer4nn,20,gamma=0.8) # optimzer4center optimzer4center = optim.SGD(centerloss.parameters(), lr =0.5) for epoch in range(50): sheduler.step() # print optimizer4nn.param_groups[0]['lr'] train(train_loader, model, criterion, [optimizer4nn, optimzer4center], epoch+1, use_cuda)
def __init__(self, word_dim, num_words, char_dim, num_chars, num_filters, kernel_size, rnn_mode, hidden_size, num_layers, num_labels, tag_space=0, embedd_word=None, embedd_char=None, p_in=0.2, p_rnn=0.5): super(BiRecurrentConv, self).__init__() self.word_embedd = Embedding(num_words, word_dim, init_embedding=embedd_word) self.char_embedd = Embedding(num_chars, char_dim, init_embedding=embedd_char) self.conv1d = nn.Conv1d(char_dim, num_filters, kernel_size, padding=kernel_size - 1) self.dropout_in = nn.Dropout(p=p_in) self.dropout_rnn = nn.Dropout(p_rnn) if rnn_mode == 'RNN': RNN = nn.RNN elif rnn_mode == 'LSTM': RNN = nn.LSTM elif rnn_mode == 'GRU': RNN = nn.GRU else: raise ValueError('Unknown RNN mode: %s' % rnn_mode) self.rnn = RNN(word_dim + num_filters, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=p_rnn) self.dense = None out_dim = hidden_size * 2 if tag_space: self.dense = nn.Linear(out_dim, tag_space) out_dim = tag_space self.dense_softmax = nn.Linear(out_dim, num_labels) # TODO set dim for log_softmax and set reduce=False to NLLLoss self.logsoftmax = nn.LogSoftmax() self.nll_loss = nn.NLLLoss(size_average=False)
def create_loss(self): return nn.NLLLoss()
def __init__(self, LossFn=nn.NLLLoss): super(SequenceCriterion, self).__init__() self.crit = LossFn(ignore_index=0, size_average=False)
def __init__(self, weight=None, mask=None, size_average=True): self.mask = mask self.size_average = size_average if mask is not None: if weight is None: raise ValueError("Must provide weight with a mask.") weight[mask] = 0 super(NLLLoss, self).__init__( self._NAME, nn.NLLLoss(weight=weight, size_average=size_average))
def seqtoseq_train(n_iters, training_data,print_every=1000, learning_rate=0.01, tfl=False): print_loss_total = 0 hidden_size = 256 in_lang, out_lang, inwords, outwords = dataclean(training_data) metadata = open('app/brain/seqtoseq_meta.pkl', 'wb') pk.dump([in_lang, out_lang], metadata) if tfl == False: encoder = EncoderRNN(inwords, hidden_size) decoder = AttnDecoderRNN(hidden_size, outwords, dropout_p=0.1) else: encoder = torch.load('app/brain/encoder.pt') decoder = torch.load('app/brain/decoder.pt') if use_cuda: encoder = encoder.cuda() decoder = decoder.cuda() encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) training_data = [variablesFromPair(random.choice(training_data),in_lang,out_lang) for i in range(n_iters)] criterion = nn.NLLLoss() if use_cuda: criterion = criterion.cuda() for iter in range(1, n_iters + 1): training_pair = training_data[iter - 1] input_variable = training_pair[0] target_variable = training_pair[1] loss = train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) print_loss_total += loss accuracy = 100-(loss*100) if accuracy < 0: accuracy = 0 if iter%1000 == 0: print(accuracy,"%") torch.save(encoder, 'app/brain/encoder.pt') torch.save(decoder, 'app/brain/decoder.pt')
def make_criterion(vocab_size, pad): weight = torch.ones(vocab_size) weight[pad] = 0 # don't average batches since num words is variable (depending on padding) criterion = nn.NLLLoss(weight, size_average=False) return criterion
def __init__(self, batch_size, size): super(Pool, self).__init__() self.size = size self.inputs = Variable(torch.FloatTensor(batch_size, 1, size, size)).cuda() self.targets = Variable(torch.LongTensor(batch_size)).cuda() self.medium = nn.Parameter(torch.randn(num_media, 1, size, size) * 0.02, requires_grad=True) self.conv0 = nn.Conv2d(1, 1, 3, padding=1, bias=False) self.fc0_size = 8 * 8 self.fc0 = nn.Linear(self.fc0_size, num_classes) self.maxPool = nn.AvgPool2d(8) self.relu = nn.ReLU() self.tanh = nn.Tanh() self.logSoftmax = nn.LogSoftmax() self.loss = nn.NLLLoss() learning_rate = 0.0005 self.conv0.weight.requires_grad = False s = 0.25 kernel = torch.FloatTensor([0.0, s, 0.0, s, 0.0, s, 0.0, s, 0.0]).view(3, 3) self.conv0.weight.data.copy_(kernel) parameters = ifilter(lambda p: p.requires_grad, self.parameters()) parameters = list(parameters) parameters.append(self.medium) self.optimizer = optim.RMSprop(parameters, lr=learning_rate, momentum=0.0)
def __init__(self): super(seq2seq, self).__init__() self.max_epoches = 100000 self.batch_index = 0 self.GO_token = 2 self.EOS_token = 1 self.input_size = 14 self.output_size = 15 self.hidden_size = 100 self.max_length = 15 self.show_epoch = 100 self.use_cuda = USE_CUDA self.model_path = "./model/" self.n_layers = 1 self.dropout_p = 0.05 self.beam_search = True self.top_k = 5 self.alpha = 0.5 self.enc_vec = [] self.dec_vec = [] # ???encoder?decoder self.encoder = EncoderRNN(self.input_size, self.hidden_size, self.n_layers) self.decoder = AttnDecoderRNN('general', self.hidden_size, self.output_size, self.n_layers, self.dropout_p, self.max_length) if USE_CUDA: self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() self.encoder_optimizer = optim.Adam(self.encoder.parameters()) self.decoder_optimizer = optim.Adam(self.decoder.parameters()) self.criterion = nn.NLLLoss()
def evaluate(data, model, verbose=False): correct_actions = 0 total_actions = 0 tot_loss = 0. instance_count = 0 criterion = nn.NLLLoss() for sentence, actions in data: if len(sentence) > 1: outputs, _, actions_done = model(sentence, actions) loss = ag.Variable(torch.FloatTensor([0])) action_idxs = [ ag.Variable(torch.LongTensor([ a ])) for a in actions_done ] for output, act in zip(outputs, action_idxs): loss += criterion(output.view((-1, 3)), act) tot_loss += utils.to_scalar(loss.data) instance_count += 1 for gold, output in zip(actions_done, outputs): pred_act = utils.argmax(output.data) if pred_act == gold: correct_actions += 1 total_actions += len(outputs) acc = float(correct_actions) / total_actions loss = float(tot_loss) / instance_count if verbose: print "Number of instances: {} Number of network actions: {}".format(instance_count, total_actions) print "Acc: {} Loss: {}".format(float(correct_actions) / total_actions, tot_loss / instance_count) return acc, loss
def train(train_dataset, dev_dataset, vocab, args): # Optionally reweight loss per class to the distribution of classes in # the public dataset weight = torch.Tensor([1/0.024, 1/0.820, 1/0.156]) if args.reweight else None criterion = nn.NLLLoss(weight=weight) # initialize model, criterion/loss_function, optimizer embedding_model = load_embedding_model(args,vocab) model = TreeLSTMSentiment(args=args, criterion=criterion, embeddings=embedding_model, vocab=vocab) if args.cuda: model.cuda() criterion.cuda() optimizer = choose_optimizer(args,model) # create trainer object for training and testing trainer = SentimentTrainer(args, model ,criterion, optimizer, embedding_model) experiment_dir = os.path.join(os.getcwd(), args.saved, "models_" + args.name) if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) open(experiment_dir+"/"+"config.txt", "w+").write(str(args)) max_dev = 0 max_dev_epoch = 0 for epoch in range(args.epochs): train_loss = trainer.train(train_dataset) dev_loss, dev_acc, _, _ = trainer.test(dev_dataset) dev_acc = torch.mean(dev_acc) print('==> Train loss : %f \t' % train_loss, end="") print('Epoch ', epoch, 'dev percentage ', dev_acc) model_filename = experiment_dir + '/' +'model_' + str(epoch) + '.pth' torch.save(model, model_filename) if dev_acc > max_dev: max_dev = dev_acc max_dev_epoch = epoch max_model_filename = model_filename gc.collect() print('epoch ' + str(max_dev_epoch) + ' dev score of ' + str(max_dev)) return max_dev_epoch, max_dev, max_model_filename
def get_loss(self, model, target, output): backend = model.get_backend() if backend.get_name() == 'keras': return keras_wrap(model, target, output, 'categorical_crossentropy') elif backend.get_name() == 'pytorch': # pylint: disable=import-error import torch import torch.nn as nn # pylint: enable=import-error loss = model.data.move(nn.NLLLoss()) def do_loss(truth, prediction): """ Calculates CCE loss. """ # Truth will be one-hot: (batch_size, ..., n_words) # But PyTorch only uses class labels (rather than one-hot). # PyTorch doesn't automatically broadcast loss into higher # dimensions, so we need to flatten it out. # There is only one input for this loss function. truth = truth[0] # Flatten it out into: (lots of entries, number of classes) truth = truth.view(-1, truth.size(truth.dim() - 1)) # Convert one-hot to class label: (lots of entries, ) truth = torch.max(truth, 1)[1].squeeze(1) # Flatten out the prediction into: # (lots of entries, number of classes) prediction = prediction.view( -1, prediction.size(prediction.dim() - 1) ) return loss(prediction, truth) return [ [ (target, model.data.placeholder(target)) ], do_loss ] else: raise ValueError('Unsupported backend "{}" for loss function "{}"' .format(backend.get_name(), self.get_name())) ### EOF.EOF.EOF.EOF.EOF.EOF.EOF.EOF.EOF.EOF.EOF.EOF.EOF.EOF.EOF.EOF.EOF.EOF.EOF
def train(data, model, optimizer, verbose=True): criterion = nn.NLLLoss() if model.use_cuda: criterion.cuda() correct_actions = 0 total_actions = 0 tot_loss = 0. instance_count = 0 for sentence, actions in data: if len(sentence) <= 2: continue optimizer.zero_grad() model.refresh() outputs, _, actions_done = model(sentence, actions) if model.use_cuda: loss = ag.Variable(cuda.FloatTensor([0])) action_idxs = [ ag.Variable(cuda.LongTensor([ a ])) for a in actions_done ] else: loss = ag.Variable(torch.FloatTensor([0])) action_idxs = [ ag.Variable(torch.LongTensor([ a ])) for a in actions_done ] for output, act in zip(outputs, action_idxs): loss += criterion(output.view(-1, 3), act) tot_loss += utils.to_scalar(loss.data) instance_count += 1 for gold, output in zip(actions_done, outputs): pred_act = utils.argmax(output.data) if pred_act == gold: correct_actions += 1 total_actions += len(outputs) loss.backward() optimizer.step() acc = float(correct_actions) / total_actions loss = float(tot_loss) / instance_count if verbose: print "Number of instances: {} Number of network actions: {}".format(instance_count, total_actions) print "Acc: {} Loss: {}".format(float(correct_actions) / total_actions, tot_loss / instance_count)
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01): start = time.time() plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) training_pairs = [variablesFromPair(random.choice(pairs)) for i in range(n_iters)] criterion = nn.NLLLoss() for iter in range(1, n_iters + 1): training_pair = training_pairs[iter - 1] input_variable = training_pair[0] target_variable = training_pair[1] loss = train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) print_loss_total += loss plot_loss_total += loss if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg)) if iter % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 showPlot(plot_losses) ###################################################################### # Plotting results # ---------------- # # Plotting is done with matplotlib, using the array of loss values # ``plot_losses`` saved while training. #
def train_matching_network(model, file_handle, base_classes, m=389, n=10, initlr=0.1, momentum=0.9, wd=0.001, step_after=20000, niter=60000): model = model.cuda() lr = initlr optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, dampening=momentum, weight_decay = wd) loss_fn = nn.NLLLoss() all_labels = file_handle['all_labels'][...] total_loss = 0.0 loss_count = 0.0 for it in range(niter): optimizer.zero_grad() rand_labels = np.random.choice(base_classes, m, replace=False) num = np.random.choice(n, m)+1 batchsize = int(np.sum(num)) train_feats = torch.zeros(batchsize, model.feat_dim) train_Y = torch.zeros(batchsize, m) test_feats = torch.zeros(m, model.feat_dim) test_labels = torch.range(0,m-1) count=0 for j in range(m): idx = np.where(all_labels==rand_labels[j])[0] train_idx = np.sort(np.random.choice(idx, num[j], replace=False)) test_idx = np.random.choice(idx) F_tmp = file_handle['all_feats'][list(train_idx)] train_feats[count:count+num[j]] = torch.Tensor(F_tmp) train_Y[count:count+num[j],j] = 1 F_tmp = file_handle['all_feats'][test_idx] test_feats[j] = torch.Tensor(F_tmp) count = count+num[j] train_feats = Variable(train_feats.cuda()) train_Y = Variable(train_Y.cuda()) test_feats = Variable(test_feats.cuda()) test_labels = Variable(test_labels.long().cuda()) logprob = model(test_feats, train_feats, train_Y) loss = loss_fn(logprob, test_labels) loss.backward() optimizer.step() if (it+1) % step_after == 0: lr = lr / 10 for param_group in optimizer.param_groups: param_group['lr'] = lr total_loss = total_loss + loss.data[0] loss_count = loss_count + 1 if (it+1)%1 == 0: print('{:d}:{:f}'.format(it, total_loss / loss_count)) total_loss = 0.0 loss_count = 0.0 return model