我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用torch.nn.CrossEntropyLoss()。
def eval_model(model, valid): model.eval() args = model.args total_loss = 0.0 unroll_size = model.args.unroll_size criterion = nn.CrossEntropyLoss(size_average=False) hidden = model.init_hidden(1) N = (len(valid[0])-1)//unroll_size + 1 for i in range(N): x = valid[0][i*unroll_size:(i+1)*unroll_size] y = valid[1][i*unroll_size:(i+1)*unroll_size].view(-1) x, y = Variable(x, volatile=True), Variable(y) hidden = (Variable(hidden[0].data), Variable(hidden[1].data)) if args.lstm \ else Variable(hidden.data) output, hidden = model(x, hidden) loss = criterion(output, y) total_loss += loss.data[0] avg_loss = total_loss / valid[1].numel() ppl = np.exp(avg_loss) return ppl
def forward(self, x): encoder_outs = [] # encoder pathway, save outputs for merging for i, module in enumerate(self.down_convs): x, before_pool = module(x) encoder_outs.append(before_pool) for i, module in enumerate(self.up_convs): before_pool = encoder_outs[-(i+2)] x = module(before_pool, x) # No softmax is used. This means you need to use # nn.CrossEntropyLoss is your training script, # as this module includes a softmax already. x = self.conv_final(x) return x
def initialize(is_gpu, dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker): trainloader, testloader, li_class = make_dataloader_custom_file( dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker) #net = Net().cuda() net = Net_gap() #t1 = net.cuda() criterion = nn.CrossEntropyLoss() if is_gpu: net.cuda() criterion.cuda() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=1, patience = 8, epsilon=0.00001, min_lr=0.000001) # set up scheduler return trainloader, testloader, net, criterion, optimizer, scheduler, li_class
def _loss_fn(self, image, label): # lazy import import torch import torch.nn as nn from torch.autograd import Variable image = self._process_input(image) target = np.array([label]) target = torch.from_numpy(target) if self.cuda: # pragma: no cover target = target.cuda() target = Variable(target) images = torch.from_numpy(image[None]) if self.cuda: # pragma: no cover images = images.cuda() images = Variable(images, volatile=True) predictions = self._model(images) ce = nn.CrossEntropyLoss() loss = ce(predictions, target) loss = loss.data if self.cuda: # pragma: no cover loss = loss.cpu() loss = loss.numpy() return loss
def __init__(self, training_file='../res/trump_tweets.txt', model_file='../res/model.pt', n_epochs=1000000, hidden_size=256, n_layers=2, learning_rate=0.001, chunk_len=140): self.training_file = training_file self.model_file = model_file self.n_epochs = n_epochs self.hidden_size = hidden_size self.n_layers = n_layers self.learning_rate = learning_rate self.chunk_len = chunk_len self.file, self.file_len = read_file(training_file) if os.path.isfile(model_file): self.decoder = torch.load(model_file) print('Loaded old model!') else: self.decoder = RNN(n_characters, hidden_size, n_characters, n_layers) print('Constructed new model!') self.decoder_optimizer = torch.optim.Adam(self.decoder.parameters(), learning_rate) self.criterion = nn.CrossEntropyLoss() self.generator = Generator(self.decoder)
def training_loop(lowshot_dataset, num_classes, params, batchsize=1000, maxiters=1000): featdim = lowshot_dataset.featdim() model = nn.Linear(featdim, num_classes) model = model.cuda() optimizer = torch.optim.SGD(model.parameters(), params.lr, momentum=params.momentum, dampening=params.momentum, weight_decay=params.wd) loss_function = nn.CrossEntropyLoss() loss_function = loss_function.cuda() for i in range(maxiters): (x,y) = lowshot_dataset.get_sample(batchsize) optimizer.zero_grad() x = Variable(x.cuda()) y = Variable(y.cuda()) scores = model(x) loss = loss_function(scores,y) loss.backward() optimizer.step() if (i%100==0): print('{:d}: {:f}'.format(i, loss.data[0])) return model
def __init__(self, params, inputdim, nclasses, l2reg=0., batch_size=64, seed=1111, cudaEfficient=False): super(self.__class__, self).__init__(inputdim, nclasses, l2reg, batch_size, seed, cudaEfficient) """ PARAMETERS: -nhid: number of hidden units (0: Logistic Regression) -optim: optimizer ("sgd,lr=0.1", "adam", "rmsprop" ..) -tenacity: how many times dev acc does not increase before stopping -epoch_size: each epoch corresponds to epoch_size pass on the train set -max_epoch: max number of epoches -dropout: dropout for MLP """ self.nhid = 0 if "nhid" not in params else params["nhid"] self.optim = "adam" if "optim" not in params else params["optim"] self.tenacity = 5 if "tenacity" not in params else params["tenacity"] self.epoch_size = 4 if "epoch_size" not in params else params["epoch_size"] self.max_epoch = 200 if "max_epoch" not in params else params["max_epoch"] self.dropout = 0. if "dropout" not in params else params["dropout"] self.batch_size = 64 if "batch_size" not in params else params["batch_size"] if params["nhid"] == 0: self.model = nn.Sequential( nn.Linear(self.inputdim, self.nclasses), ).cuda() else: self.model = nn.Sequential( nn.Linear(self.inputdim, params["nhid"]), nn.Dropout(p=self.dropout), nn.Tanh(), nn.Linear(params["nhid"], self.nclasses), ).cuda() self.loss_fn = nn.CrossEntropyLoss().cuda() self.loss_fn.size_average = False optim_fn, optim_params = utils.get_optimizer(self.optim) self.optimizer = optim_fn(self.model.parameters(), **optim_params) self.optimizer.param_groups[0]['weight_decay'] = self.l2reg
def eval_model(model_path, mode='dev'): torch.manual_seed(6) snli_d, mnli_d, embd = data_loader.load_data_sm( config.DATA_ROOT, config.EMBD_FILE, reseversed=False, batch_sizes=(32, 32, 32, 32, 32), device=0) m_train, m_dev_m, m_dev_um, m_test_m, m_test_um = mnli_d m_dev_um.shuffle = False m_dev_m.shuffle = False m_dev_um.sort = False m_dev_m.sort = False m_test_um.shuffle = False m_test_m.shuffle = False m_test_um.sort = False m_test_m.sort = False model = StackBiLSTMMaxout() model.Embd.weight.data = embd if torch.cuda.is_available(): embd.cuda() model.cuda() criterion = nn.CrossEntropyLoss() model.load_state_dict(torch.load(model_path)) model.max_l = 150 m_pred = model_eval(model, m_dev_m, criterion) um_pred = model_eval(model, m_dev_um, criterion) print("dev_mismatched_score (acc, loss):", um_pred) print("dev_matched_score (acc, loss):", m_pred)
def __init__(self): super(Criterion, self).__init__() #self.loss = nn.MultiLabelMarginLoss() self.loss = nn.MultiLabelSoftMarginLoss() #self.loss = nn.MultiMarginLoss() #self.loss = nn.CrossEntropyLoss() #self.loss = nn.NLLLoss()
def test_as_2d_criterion(self): from inferno.extensions.criteria.core import As2DCriterion import torch import torch.nn as nn from torch.autograd import Variable prediction = Variable(torch.FloatTensor(2, 10, 100, 100).uniform_()) prediction = nn.Softmax2d()(prediction) target = Variable(torch.LongTensor(2, 100, 100).fill_(0)) criterion = As2DCriterion(nn.CrossEntropyLoss()) loss = criterion(prediction, target)
def eval_model(niter, model, valid_x, valid_y): model.eval() N = len(valid_x) criterion = nn.CrossEntropyLoss() correct = 0.0 cnt = 0 total_loss = 0.0 for x, y in zip(valid_x, valid_y): x, y = Variable(x, volatile=True), Variable(y) output = model(x) loss = criterion(output, y) total_loss += loss.data[0]*x.size(1) pred = output.data.max(1)[1] correct += pred.eq(y.data).cpu().sum() cnt += y.numel() model.train() return 1.0-correct/cnt
def train_model(epoch, model, optimizer, train_x, train_y, valid_x, valid_y, test_x, test_y, best_valid, test_err): model.train() args = model.args N = len(train_x) niter = epoch*len(train_x) criterion = nn.CrossEntropyLoss() cnt = 0 for x, y in zip(train_x, train_y): niter += 1 cnt += 1 model.zero_grad() x, y = Variable(x), Variable(y) output = model(x) loss = criterion(output, y) loss.backward() optimizer.step() valid_err = eval_model(niter, model, valid_x, valid_y) sys.stdout.write("Epoch={} iter={} lr={:.6f} train_loss={:.6f} valid_err={:.6f}\n".format( epoch, niter, optimizer.param_groups[0]['lr'], loss.data[0], valid_err )) if valid_err < best_valid: best_valid = valid_err test_err = eval_model(niter, model, test_x, test_y) sys.stdout.write("\n") return best_valid, test_err
def __init__(self, hidden_size, vocab_size, num_hop=3, qa=None): super(DMNPlus, self).__init__() self.num_hop = num_hop self.qa = qa self.word_embedding = nn.Embedding(vocab_size, hidden_size, padding_idx=0, sparse=True).cuda() init.uniform(self.word_embedding.state_dict()['weight'], a=-(3**0.5), b=3**0.5) self.criterion = nn.CrossEntropyLoss(size_average=False) self.input_module = InputModule(vocab_size, hidden_size) self.question_module = QuestionModule(vocab_size, hidden_size) self.memory = EpisodicMemory(hidden_size) self.answer_module = AnswerModule(vocab_size, hidden_size)
def test(classifier, generator, data_loader, dataset="MNIST"): """Evaluate classifier on source or target domains.""" # set eval state for Dropout and BN layers generator.eval() classifier.eval() # init loss and accuracy loss = 0 acc = 0 # set loss function criterion = nn.CrossEntropyLoss() # evaluate network for (images, labels) in data_loader: images = make_variable(images, volatile=True) labels = make_variable(labels.squeeze_()) preds = classifier(generator(images)) loss += criterion(preds, labels).data[0] pred_cls = preds.data.max(1)[1] acc += pred_cls.eq(labels.data).cpu().sum() loss /= len(data_loader) acc /= len(data_loader.dataset) print("Avg Loss = {:.5f}, Avg Accuracy = {:2.5%}".format(loss, acc))
def __init__(self, opt): self.net = Classifier(opt) if opt['classify']: self.criterion = nn.CrossEntropyLoss() else: self.criterion = [] self.nParams = sum([i.view(-1).size()[0] for i in self.net.parameters()]) self.outSize = self.net.outSize
def __init__(self, opt): self.net = Classifier(opt) if opt['classify']: self.criterion = nn.CrossEntropyLoss() else: self.criterion = [] self.nParams = np.sum([1 for i in self.net.parameters()]) self.outSize = self.net.outSize
def __init__(self, opt): super(MatchingNet, self).__init__() # function cosine-similarity layer self.cosineSim = nn.CosineSimilarity() # local embedding model (simple or FCE) self.embedModel = importlib.import_module(opt['embedModel']).build(opt) # set Cuda self.embedModel.setCuda(opt['useCUDA']) # load loss. Why does not load with the model self.lossF = nn.CrossEntropyLoss() # Set training or evaluation mode
def build_network(self): print('=' * 50) print('Building network...') self.R = Refiner(4, cfg.img_channels, nb_features=64) self.D = Discriminator(input_features=cfg.img_channels) if cfg.cuda_use: self.R.cuda(cfg.cuda_num) self.D.cuda(cfg.cuda_num) self.opt_R = torch.optim.Adam(self.R.parameters(), lr=cfg.r_lr) self.opt_D = torch.optim.SGD(self.D.parameters(), lr=cfg.d_lr) self.self_regularization_loss = nn.L1Loss(size_average=False) self.local_adversarial_loss = nn.CrossEntropyLoss(size_average=True) self.delta = cfg.delta
def __init__(self, dictionary, device_id=None, bad_toks=[], size_average=True): w = torch.Tensor(len(dictionary)).fill_(1) for tok in bad_toks: w[dictionary.get_idx(tok)] = 0.0 if device_id is not None: w = w.cuda(device_id) self.crit = nn.CrossEntropyLoss(w, size_average=size_average)
def span_loss(config, q_mask, logits_start, start, logits_end, end): size = config.max_num_sents * config.max_sent_size loss_mask = reduce_mask(q_mask, 1) losses_start = nn.CrossEntropyLoss(logits_start, start.view(-1, size)) ce_loss_start = torch.mean(loss_mask * losses) losses_end = nn.CrossEntropyLoss(logits_end, end.view(-1, size)) ce_loss_end = torch.mean(loss_mean) return ce_loss_end - ce_loss_start
def build_model(self): print ('==> Build model and setup loss and optimizer') #build model self.model = resnet101(pretrained= True, channel=self.channel).cuda() #print self.model #Loss function and optimizer self.criterion = nn.CrossEntropyLoss().cuda() self.optimizer = torch.optim.SGD(self.model.parameters(), self.lr, momentum=0.9) self.scheduler = ReduceLROnPlateau(self.optimizer, 'min', patience=1,verbose=True)
def build_model(self): print ('==> Build model and setup loss and optimizer') #build model self.model = resnet101(pretrained= True, channel=3).cuda() #Loss function and optimizer self.criterion = nn.CrossEntropyLoss().cuda() self.optimizer = torch.optim.SGD(self.model.parameters(), self.lr, momentum=0.9) self.scheduler = ReduceLROnPlateau(self.optimizer, 'min', patience=1,verbose=True)
def main_voc2007(): global args, best_prec1, use_gpu args = parser.parse_args() use_gpu = torch.cuda.is_available() # define dataset train_dataset = Mit67(args.data, 'train') val_dataset = Mit67(args.data, 'test') num_classes = 67 # load model model = resnet101_wildcat(num_classes, pretrained=True, kmax=args.k, alpha=args.alpha, num_maps=args.maps) print('classifier', model.classifier) print('spatial pooling', model.spatial_pooling) # define loss function (criterion) criterion = nn.CrossEntropyLoss() # define optimizer optimizer = torch.optim.SGD(model.get_config_optim(args.lr, args.lrp), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) state = {'batch_size': args.batch_size, 'image_size': args.image_size, 'max_epochs': args.epochs, 'evaluate': args.evaluate, 'resume': args.resume} state['difficult_examples'] = True state['save_model_path'] = '../expes/models/mit67/' engine = MulticlassEngine(state) engine.learning(model, criterion, train_dataset, val_dataset, optimizer)
def factory(opt, cuda=True): criterion = nn.CrossEntropyLoss() if cuda: criterion = criterion.cuda() return criterion
def eval_src(encoder, classifier, data_loader): """Evaluate classifier for source domain.""" # set eval state for Dropout and BN layers encoder.eval() classifier.eval() # init loss and accuracy loss = 0 acc = 0 # set loss function criterion = nn.CrossEntropyLoss() # evaluate network for (images, labels) in data_loader: images = make_variable(images, volatile=True) labels = make_variable(labels) preds = classifier(encoder(images)) loss += criterion(preds, labels).data[0] pred_cls = preds.data.max(1)[1] acc += pred_cls.eq(labels.data).cpu().sum() loss /= len(data_loader) acc /= len(data_loader.dataset) print("Avg Loss = {}, Avg Accuracy = {:2%}".format(loss, acc))
def eval_tgt(encoder, classifier, data_loader): """Evaluation for target encoder by source classifier on target dataset.""" # set eval state for Dropout and BN layers encoder.eval() classifier.eval() # init loss and accuracy loss = 0 acc = 0 # set loss function criterion = nn.CrossEntropyLoss() # evaluate network for (images, labels) in data_loader: images = make_variable(images, volatile=True) labels = make_variable(labels).squeeze_() preds = classifier(encoder(images)) loss += criterion(preds, labels).data[0] pred_cls = preds.data.max(1)[1] acc += pred_cls.eq(labels.data).cpu().sum() loss /= len(data_loader) acc /= len(data_loader.dataset) print("Avg Loss = {}, Avg Accuracy = {:2%}".format(loss, acc))
def prepare(self, p): """Set up model.""" # The CPU version is slow... p['batch_size'] = 32 if p.cuda else 4 def cast(tensor): return tensor.cuda() if p.cuda else tensor self.model = memnn.MemNN(p, p.num_features) self.criterion = nn.CrossEntropyLoss() self.data_batches = [ [ # memories, queries, memory_lengths, query_lengths Variable(cast(torch.zeros(p.batch_size * p.mem_size).long())), Variable(cast(torch.zeros(p.batch_size * 28).long())), Variable(cast(torch.ones(p.batch_size, p.mem_size).long())), Variable(cast(torch.LongTensor(p.batch_size).fill_(28).long())), ] for _ in range(p.num_batches) ] self.cand_batches = [ Variable(cast(torch.zeros(p.batch_size * 14, p.embedding_size))) for _ in range(p.num_batches) ] self.target_batches = [ Variable(cast(torch.ones(p.batch_size).long())) for _ in range(p.num_batches) ] if p.cuda: self.model.cuda() self.criterion.cuda()
def prepare(self, p): def get_rnn(): if p.rnn_type in ['LSTM', 'GRU']: return getattr(nn, p.rnn_type)(p.embedding_size, p.hidden_size, p.num_layers, dropout=p.dropout) else: nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[p.rnn_type] return nn.RNN(p.embedding_size, p.hidden_size, p.num_layers, nonlinearity=nonlinearity, dropout=p.dropout) class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.drop = nn.Dropout(p.dropout) self.rnn = get_rnn() self.encoder = nn.Embedding(p.num_tokens, p.embedding_size) self.decoder = nn.Linear(p.hidden_size, p.num_tokens) def forward(self, input): emb = self.drop(self.encoder(input)) output, hidden = self.rnn(emb) output = self.drop(output) decoded = self.decoder(output.view(output.size(0) * output.size(1), output.size(2))) return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden def cast(tensor): return tensor.long().cuda() if p.cuda else tensor.long() self.model = Model() self.criterion = nn.CrossEntropyLoss() self.data_batches = [Variable(cast(torch.zeros(p.bptt, p.batch_size))) for _ in range(p.num_batches)] self.target_batches = [Variable(cast(torch.zeros(p.bptt * p.batch_size))) for _ in range(p.num_batches)] if p.cuda: self.model.cuda() self.criterion.cuda()
def prepare(self, p): # The CPU version is slow... p['batch_size'] = 20 if p.cuda else 5 class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.rnn = bnlstm.LSTM(cell_class=bnlstm.BNLSTMCell, input_size=1, hidden_size=p.hidden_size, batch_first=True, max_length=p.max_length) self.fc = nn.Linear(in_features=p.hidden_size, out_features=10) # 10 digits in mnist def forward(self, data): hx = None if not p.pmnist: h0 = Variable(data.data.new(data.size(0), p.hidden_size) .normal_(0, 0.1)) c0 = Variable(data.data.new(data.size(0), p.hidden_size) .normal_(0, 0.1)) hx = (h0, c0) _, (h_n, _) = self.rnn(input_=data, hx = hx) logits = self.fc(h_n[0]) return logits def cast(tensor): return tensor.cuda() if p.cuda else tensor self.model = Model() self.criterion = nn.CrossEntropyLoss() self.data_batches = [Variable(cast(torch.zeros(p.batch_size, 28 * 28, 1))) for _ in range(p.num_batches)] self.target_batches = [Variable(cast(torch.zeros(p.batch_size)).long()) for _ in range(p.num_batches)] if p.cuda: self.model.cuda() self.criterion.cuda()
def ce_loss(loss_weight=None, size_ave=True): return nn.CrossEntropyLoss(weight=loss_weight,size_average=size_ave)
def initialize(mode, is_gpu, dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker): if 'TORCHVISION_MEMORY' == mode: trainloader, testloader, li_class = make_dataloader_torchvison_memory( dir_data, di_set_transform, n_img_per_batch, n_worker) elif 'TORCHVISION_IMAGEFOLDER' == mode: trainloader, testloader, li_class = make_dataloader_torchvison_imagefolder( dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker) elif 'CUSTOM_MEMORY' == mode: trainloader, testloader, li_class = make_dataloader_custom_memory( dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker) elif 'CUSTOM_FILE' == mode: trainloader, testloader, li_class = make_dataloader_custom_file( dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker) else: trainloader, testloader, li_class = make_dataloader_custom_tensordataset( dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker) #net = Net().cuda() net = Net() #t1 = net.cuda() criterion = nn.CrossEntropyLoss() if is_gpu: net.cuda() criterion.cuda() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=1, patience = 8, epsilon=0.00001, min_lr=0.000001) # set up scheduler return trainloader, testloader, net, criterion, optimizer, scheduler, li_class
def set_temperature(self, valid_loader): """ Tune the tempearature of the model (using the validation set). We're going to set it to optimize NLL. valid_loader (DataLoader): validation set loader """ self.cuda() nll_criterion = nn.CrossEntropyLoss().cuda() ece_criterion = _ECELoss().cuda() # First: collect all the logits and labels for the validation set logits_list = [] labels_list = [] for input, label in valid_loader: input_var = Variable(input, volatile=True).cuda() logits_var = self.model(input_var) logits_list.append(logits_var.data) labels_list.append(label) logits = torch.cat(logits_list).cuda() labels = torch.cat(labels_list).cuda() logits_var = Variable(logits) labels_var = Variable(labels) # Calculate NLL and ECE before temperature scaling before_temperature_nll = nll_criterion(logits_var, labels_var).data[0] before_temperature_ece = ece_criterion(logits_var, labels_var).data[0] print('Before temperature - NLL: %.3f, ECE: %.3f' % (before_temperature_nll, before_temperature_ece)) # Next: optimize the temperature w.r.t. NLL optimizer = optim.LBFGS([self.temperature], lr=0.01, max_iter=50) def eval(): loss = nll_criterion(self.temperature_scale(logits_var), labels_var) loss.backward() return loss optimizer.step(eval) # Calculate NLL and ECE after temperature scaling after_temperature_nll = nll_criterion(self.temperature_scale(logits_var), labels_var).data[0] after_temperature_ece = ece_criterion(self.temperature_scale(logits_var), labels_var).data[0] print('Optimal temperature: %.3f' % self.temperature.data[0]) print('After temperature - NLL: %.3f, ECE: %.3f' % (after_temperature_nll, after_temperature_ece)) return self
def run_main(): import torch.nn as nn from repeval.models.inner_att import InnerAtt from repeval.routines import main args = parser.parse_args() Model = InnerAtt loss_function = nn.CrossEntropyLoss() main(args, Model, Corpora, EmbeddingsList, loss_function)
def __init__(self, input_size, hidden_size, output_size, n_layers=1, gpu=-1): self.decoder = RNN(input_size, hidden_size, output_size, n_layers, gpu) if gpu >= 0: print("Use GPU %d" % torch.cuda.current_device()) self.decoder.cuda() self.optimizer = torch.optim.Adam(self.decoder.parameters(), lr=0.01) self.criterion = nn.CrossEntropyLoss()
def __init__(self): super(Classification, self).__init__() self.loss = nn.CrossEntropyLoss()
def __init__(self,aux_loss_type, aux_loss_wt, num_classes): aux_loss_fns = dict(l2=l2_loss, sgm=SGMLoss(num_classes), batchsgm=BatchSGMLoss(num_classes)) self.aux_loss_fn = aux_loss_fns[aux_loss_type] self.aux_loss_type = aux_loss_type self.cross_entropy_loss = nn.CrossEntropyLoss() self.aux_loss_wt = aux_loss_wt
def train_classifier(filehandle, base_classes, cachefile, networkfile, total_num_classes = 1000, lr=0.1, wd=0.0001, momentum=0.9, batchsize=1000, niter=10000): # either use pre-existing classifier or train one all_labels = filehandle['all_labels'][...] all_labels = all_labels.astype(int) all_feats = filehandle['all_feats'] base_class_ids = np.where(np.in1d(all_labels, base_classes))[0] loss = nn.CrossEntropyLoss().cuda() model = nn.Linear(all_feats[0].size, total_num_classes).cuda() if os.path.isfile(cachefile): tmp = torch.load(cachefile) model.load_state_dict(tmp) elif os.path.isfile(networkfile): tmp = torch.load(networkfile) if 'module.classifier.bias' in tmp['state']: state_dict = {'weight':tmp['state']['module.classifier.weight'], 'bias':tmp['state']['module.classifier.bias']} else: model = nn.Linear(all_feats[0].size, total_num_classes, bias=False).cuda() state_dict = {'weight':tmp['state']['module.classifier.weight']} model.load_state_dict(state_dict) else: optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=wd, dampening=0) for i in range(niter): optimizer.zero_grad() idx = np.sort(np.random.choice(base_class_ids, batchsize, replace=False)) F = all_feats[idx,:] F = Variable(torch.Tensor(F)).cuda() L = Variable(torch.LongTensor(all_labels[idx])).cuda() S = model(F) loss_val = loss(S, L) loss_val.backward() optimizer.step() if i % 100 == 0: print('Classifier training {:d}: {:f}'.format(i, loss_val.data[0])) torch.save(model.state_dict(), cachefile) return model
def train(model, db, exp, args, use_cuda=False): print("Training...") init_time = time.clock() #trainsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(0, int(len(db) * 0.7))) #trainloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=trainsampler) #testsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(int(len(db) * 0.7), len(db) - 1)) #testloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=testsampler) trainloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) best_loss = 100000 for epoch in range(args.epoch): running_loss = 0.0 for i, data in enumerate(trainloader, 1): inputs, targets = data inputs = inputs.unsqueeze(1) targets = target_onehot_to_classnum_tensor(targets) if use_cuda and cuda_ava: inputs = Variable(inputs.float().cuda()) targets = Variable(targets.cuda()) else: inputs = Variable(inputs.float()) targets = Variable(targets) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() running_loss += loss.data[0] last_loss = loss.data[0] if i % 100 == 0: print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100)) running_loss = 0 if last_loss < best_loss or i % 100 == 0: best_loss = last_loss #acc = evaluate(model, testloader, args, use_cuda) acc = evaluate(model, trainloader, args, use_cuda) exp.add_scalar_value('train_loss', last_loss, time.clock() - init_time) exp.add_scalar_value('test_accu', acc, time.clock() - init_time) torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT2_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S")))) #acc = evaluate(model, testloader, args, use_cuda) acc = evaluate(model, trainloader, args, use_cuda) torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT2_all_acc_{}.t7'.format(acc))) print("Finished Training!")
def train(model, db, exp, args, use_cuda=False): print("Training...") init_time = time.clock() trainsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(0, int(len(db) * 0.7))) trainloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=trainsampler) testsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(int(len(db) * 0.7), len(db) - 1)) testloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=testsampler) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-4) best_loss = 100000 for epoch in range(args.epoch): running_loss = 0.0 for i, data in enumerate(trainloader, 1): model.train() inputs, targets = data inputs = inputs.unsqueeze(1) targets = target_onehot_to_classnum_tensor(targets) if use_cuda and cuda_ava: inputs = Variable(inputs.float().cuda()) targets = Variable(targets.cuda()) else: inputs = Variable(inputs.float()) targets = Variable(targets) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() running_loss += loss.data[0] last_loss = loss.data[0] if i % 100 == 0: print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100)) running_loss = 0 if last_loss < best_loss or i % 100 == 0: best_loss = last_loss acc = evaluate(model, testloader, args, use_cuda) exp.add_scalar_value('train_loss', last_loss, time.clock() - init_time) exp.add_scalar_value('test_accu', acc, time.clock() - init_time) torch.save(model.state_dict(), os.path.join('saved_model', '{}_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(args.name, epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S")))) acc = evaluate(model, testloader, args, use_cuda) torch.save(model.state_dict(), os.path.join('saved_model', '{}_all_acc_{}.t7'.format(args.name, acc))) print("Finished Training!")
def train(model, db, exp, args, use_cuda=False): print("Training...") init_time = time.clock() trainsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(0, int(len(db) * 0.7))) trainloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=trainsampler) testsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(int(len(db) * 0.7), len(db) - 1)) testloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=testsampler) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-4) best_loss = 100000 for epoch in range(args.epoch): running_loss = 0.0 for i, data in enumerate(trainloader, 1): inputs, targets = data inputs = inputs.unsqueeze(1) targets = target_onehot_to_classnum_tensor(targets) if use_cuda and cuda_ava: inputs = Variable(inputs.float().cuda()) targets = Variable(targets.cuda()) else: inputs = Variable(inputs.float()) targets = Variable(targets) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() running_loss += loss.data[0] last_loss = loss.data[0] if i % 100 == 0: print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100)) running_loss = 0 if last_loss < best_loss or i % 200 == 0: best_loss = last_loss acc = evaluate(model, testloader, args, use_cuda) exp.add_scalar_value('train_loss', last_loss, time.clock() - init_time) exp.add_scalar_value('test_acc', acc, time.clock() - init_time) torch.save(model.state_dict(), os.path.join('saved_model', '{}_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(args.name, epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S")))) acc = evaluate(model, testloader, args, use_cuda) torch.save(model.state_dict(), os.path.join('saved_model', '{}_all_acc_{}.t7'.format(args.name, acc))) print("Finished Training!")
def train(model, db, exp, args, use_cuda=False): print("Training...") init_time = time.clock() trainsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(0, int(len(db) * 0.7))) trainloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=trainsampler) testsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(int(len(db) * 0.7), len(db) - 1)) testloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=testsampler) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-4) best_loss = 100000 for epoch in range(args.epoch): running_loss = 0.0 for i, data in enumerate(trainloader, 1): inputs, targets = data inputs = inputs.unsqueeze(1) targets = target_onehot_to_classnum_tensor(targets) if use_cuda and cuda_ava: inputs = Variable(inputs.float().cuda()) targets = Variable(targets.cuda()) else: inputs = Variable(inputs.float()) targets = Variable(targets) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() running_loss += loss.data[0] last_loss = loss.data[0] if i % 100 == 0: print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100)) running_loss = 0 if last_loss < best_loss or i % 100 == 0: best_loss = last_loss acc = evaluate(model, testloader, args, use_cuda) exp.add_scalar_value('train_loss', last_loss, time.clock() - init_time) exp.add_scalar_value('test_accu', acc, time.clock() - init_time) torch.save(model.state_dict(), os.path.join('saved_model', '{}_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(args.name, epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S")))) acc = evaluate(model, testloader, args, use_cuda) torch.save(model.state_dict(), os.path.join('saved_model', '{}_all_acc_{}.t7'.format(args.name, acc))) print("Finished Training!")
def build_kaggle_submission_file(model_path): torch.manual_seed(6) snli_d, mnli_d, embd = data_loader.load_data_sm( config.DATA_ROOT, config.EMBD_FILE, reseversed=False, batch_sizes=(32, 32, 32, 32, 32), device=0) m_train, m_dev_m, m_dev_um, m_test_m, m_test_um = mnli_d m_test_um.shuffle = False m_test_m.shuffle = False m_test_um.sort = False m_test_m.sort = False model = StackBiLSTMMaxout() model.Embd.weight.data = embd # model.display() if torch.cuda.is_available(): embd.cuda() model.cuda() criterion = nn.CrossEntropyLoss() model.load_state_dict(torch.load(model_path)) m_pred = model_eval(model, m_test_m, criterion, pred=True) um_pred = model_eval(model, m_test_um, criterion, pred=True) model.max_l = 150 print(um_pred) print(m_pred) with open('./sub_um.csv', 'w+') as f: index = ['entailment', 'contradiction', 'neutral'] f.write("pairID,gold_label\n") for i, k in enumerate(um_pred): f.write(str(i) + "," + index[k] + "\n") with open('./sub_m.csv', 'w+') as f: index = ['entailment', 'contradiction', 'neutral'] f.write("pairID,gold_label\n") for j, k in enumerate(m_pred): f.write(str(j + 9847) + "," + index[k] + "\n")
def __init__(self, opt, shared=None): opt['cuda'] = not opt['no_cuda'] and torch.cuda.is_available() if opt['cuda']: print('[ Using CUDA ]') torch.cuda.device(opt['gpu']) if not shared: self.opt = opt self.id = 'MemNN' self.dict = DictionaryAgent(opt) self.answers = [None] * opt['batchsize'] self.model = MemNN(opt, len(self.dict)) self.mem_size = opt['mem_size'] self.loss_fn = CrossEntropyLoss() self.decoder = None self.longest_label = 1 self.END = self.dict.end_token self.END_TENSOR = torch.LongTensor(self.dict.parse(self.END)) self.START = self.dict.start_token self.START_TENSOR = torch.LongTensor(self.dict.parse(self.START)) if opt['output'] == 'generate' or opt['output'] == 'g': self.decoder = Decoder(opt['embedding_size'], opt['embedding_size'], opt['rnn_layers'], opt, self.dict) elif opt['output'] != 'rank' and opt['output'] != 'r': raise NotImplementedError('Output type not supported.') optim_params = [p for p in self.model.parameters() if p.requires_grad] lr = opt['learning_rate'] if opt['optimizer'] == 'sgd': self.optimizers = {'memnn': optim.SGD(optim_params, lr=lr)} if self.decoder is not None: self.optimizers['decoder'] = optim.SGD(self.decoder.parameters(), lr=lr) elif opt['optimizer'] == 'adam': self.optimizers = {'memnn': optim.Adam(optim_params, lr=lr)} if self.decoder is not None: self.optimizers['decoder'] = optim.Adam(self.decoder.parameters(), lr=lr) else: raise NotImplementedError('Optimizer not supported.') if opt['cuda']: self.model.share_memory() if self.decoder is not None: self.decoder.cuda() if opt.get('model_file') and os.path.isfile(opt['model_file']): print('Loading existing model parameters from ' + opt['model_file']) self.load(opt['model_file']) else: self.answers = shared['answers'] self.episode_done = True self.last_cands, self.last_cands_list = None, None super().__init__(opt, shared)
def train_model(epoch, model, train): model.train() args = model.args unroll_size = args.unroll_size batch_size = args.batch_size N = (len(train[0])-1)//unroll_size + 1 lr = args.lr total_loss = 0.0 criterion = nn.CrossEntropyLoss(size_average=False) hidden = model.init_hidden(batch_size) for i in range(N): x = train[0][i*unroll_size:(i+1)*unroll_size] y = train[1][i*unroll_size:(i+1)*unroll_size].view(-1) x, y = Variable(x), Variable(y) hidden = (Variable(hidden[0].data), Variable(hidden[1].data)) if args.lstm \ else Variable(hidden.data) model.zero_grad() output, hidden = model(x, hidden) assert x.size(1) == batch_size loss = criterion(output, y) / x.size(1) loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), args.clip_grad) for p in model.parameters(): if p.requires_grad: if args.weight_decay > 0: p.data.mul_(1.0-args.weight_decay) p.data.add_(-lr, p.grad.data) if math.isnan(loss.data[0]) or math.isinf(loss.data[0]): sys.exit(0) return total_loss += loss.data[0] / x.size(0) if i%10 == 0: sys.stdout.write("\r{}".format(i)) sys.stdout.flush() return np.exp(total_loss/N)
def predictions_and_gradient(self, image, label): # lazy import import torch import torch.nn as nn from torch.autograd import Variable image = self._process_input(image) target = np.array([label]) target = torch.from_numpy(target) if self.cuda: # pragma: no cover target = target.cuda() target = Variable(target) assert image.ndim == 3 images = image[np.newaxis] images = torch.from_numpy(images) if self.cuda: # pragma: no cover images = images.cuda() images = Variable(images, requires_grad=True) predictions = self._model(images) ce = nn.CrossEntropyLoss() loss = ce(predictions, target) loss.backward() grad = images.grad predictions = predictions.data if self.cuda: # pragma: no cover predictions = predictions.cpu() predictions = predictions.numpy() predictions = np.squeeze(predictions, axis=0) assert predictions.ndim == 1 assert predictions.shape == (self.num_classes(),) grad = grad.data if self.cuda: # pragma: no cover grad = grad.cpu() grad = grad.numpy() grad = self._process_gradient(grad) grad = np.squeeze(grad, axis=0) assert grad.shape == image.shape return predictions, grad
def train(model, train_set, valid_set, test_set, save, train_size=0, valid_size=5000, n_epochs=1, batch_size=64, lr=0.1, wd=0.0001, momentum=0.9, seed=None): if seed is not None: torch.manual_seed(seed) # Make model, criterion, optimizer, data loaders train_loader, valid_loader, test_loader = _make_dataloaders( train_set=train_set, valid_set=valid_set, test_set=test_set, train_size=train_size, valid_size=valid_size, batch_size=batch_size, ) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, nesterov=True, weight_decay=wd) # Wrap model if multiple gpus if torch.cuda.device_count() > 1: model_wrapper = torch.nn.DataParallel(model).cuda() else: model_wrapper = model.cuda() # Train model best_error = 1 for epoch in range(1, n_epochs + 1): _set_lr(optimizer, epoch, n_epochs, lr) train_results = run_epoch( loader=train_loader, model=model_wrapper, criterion=criterion, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, train=True, ) valid_results = run_epoch( loader=valid_loader, model=model_wrapper, criterion=criterion, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, train=False, ) # Determine if model is the best _, _, valid_error = valid_results if valid_error[0] < best_error: best_error = valid_error[0] print('New best error: %.4f' % best_error) torch.save(model.state_dict(), os.path.join(save, 'model.t7'))
def main(): inputs = datasets.snli.ParsedTextField(lower=True) transitions = datasets.snli.ShiftReduceField() answers = data.Field(sequential=False) train, dev, test = datasets.SNLI.splits(inputs, answers, transitions) inputs.build_vocab(train, dev, test) answers.build_vocab(train) # print(dir(inputs)) train_iter, dev_iter, test_iter = data.BucketIterator.splits( (train, dev, test), batch_size=args.batch_size, device=0 if args.cuda else -1) # for batch in train_iter: # # print(dir(batch)) # # print(batch.dataset) # # print(batch.label) # # print(batch.premise_transitions) # for x in batch.dataset: # print(Tree(x, inputs.vocab, answers.vocab).root) # # print(dir(x)) # # print(x.label, x.premise, x.premise_transitions) # break model = SPINN(3, 100, 1000) criterion = nn.CrossEntropyLoss() opt = optim.Adam(model.parameters(), lr=0.01) for epoch in range(10): start = time.time() iteration = 0 for batch_idx, batch in enumerate(train_iter): opt.zero_grad() all_logits, all_labels = [], [] fold = torchfold.Fold(cuda=args.cuda) for example in batch.dataset: tree = Tree(example, inputs.vocab, answers.vocab) if args.fold: all_logits.append(encode_tree_fold(fold, tree)) else: all_logits.append(encode_tree_regular(model, tree)) all_labels.append(tree.label) if args.fold: res = fold.apply(model, [all_logits, all_labels]) loss = criterion(res[0], res[1]) else: loss = criterion(torch.cat(all_logits, 0), Variable(torch.LongTensor(all_labels))) loss.backward(); opt.step() iteration += 1 if iteration % 10 == 0: print("Avg. Time: %fs" % ((time.time() - start) / iteration)) # iteration = 0 # start = time.time()