Python torch.nn 模块,CrossEntropyLoss() 实例源码

我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用torch.nn.CrossEntropyLoss()

项目:sru    作者:taolei87    | 项目源码 | 文件源码
def eval_model(model, valid):
    model.eval()
    args = model.args
    total_loss = 0.0
    unroll_size = model.args.unroll_size
    criterion = nn.CrossEntropyLoss(size_average=False)
    hidden = model.init_hidden(1)
    N = (len(valid[0])-1)//unroll_size + 1
    for i in range(N):
        x = valid[0][i*unroll_size:(i+1)*unroll_size]
        y = valid[1][i*unroll_size:(i+1)*unroll_size].view(-1)
        x, y = Variable(x, volatile=True), Variable(y)
        hidden = (Variable(hidden[0].data), Variable(hidden[1].data)) if args.lstm \
            else Variable(hidden.data)
        output, hidden = model(x, hidden)
        loss = criterion(output, y)
        total_loss += loss.data[0]
    avg_loss = total_loss / valid[1].numel()
    ppl = np.exp(avg_loss)
    return ppl
项目:unet-pytorch    作者:jaxony    | 项目源码 | 文件源码
def forward(self, x):
        encoder_outs = []

        # encoder pathway, save outputs for merging
        for i, module in enumerate(self.down_convs):
            x, before_pool = module(x)
            encoder_outs.append(before_pool)

        for i, module in enumerate(self.up_convs):
            before_pool = encoder_outs[-(i+2)]
            x = module(before_pool, x)

        # No softmax is used. This means you need to use
        # nn.CrossEntropyLoss is your training script,
        # as this module includes a softmax already.
        x = self.conv_final(x)
        return x
项目:pytorch_60min_blitz    作者:kyuhyoung    | 项目源码 | 文件源码
def initialize(is_gpu, dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker):

    trainloader, testloader, li_class = make_dataloader_custom_file(
        dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker)

    #net = Net().cuda()
    net = Net_gap()
    #t1 = net.cuda()
    criterion = nn.CrossEntropyLoss()
    if is_gpu:
        net.cuda()
        criterion.cuda()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=1, patience = 8, epsilon=0.00001, min_lr=0.000001) # set up scheduler

    return trainloader, testloader, net, criterion, optimizer, scheduler, li_class
项目:foolbox    作者:bethgelab    | 项目源码 | 文件源码
def _loss_fn(self, image, label):
        # lazy import
        import torch
        import torch.nn as nn
        from torch.autograd import Variable

        image = self._process_input(image)
        target = np.array([label])
        target = torch.from_numpy(target)
        if self.cuda:  # pragma: no cover
            target = target.cuda()
        target = Variable(target)

        images = torch.from_numpy(image[None])
        if self.cuda:  # pragma: no cover
            images = images.cuda()
        images = Variable(images, volatile=True)
        predictions = self._model(images)
        ce = nn.CrossEntropyLoss()
        loss = ce(predictions, target)
        loss = loss.data
        if self.cuda:  # pragma: no cover
            loss = loss.cpu()
        loss = loss.numpy()
        return loss
项目:autotrump    作者:Rabrg    | 项目源码 | 文件源码
def __init__(self, training_file='../res/trump_tweets.txt', model_file='../res/model.pt', n_epochs=1000000,
                 hidden_size=256, n_layers=2, learning_rate=0.001, chunk_len=140):
        self.training_file = training_file
        self.model_file = model_file
        self.n_epochs = n_epochs
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.learning_rate = learning_rate
        self.chunk_len = chunk_len
        self.file, self.file_len = read_file(training_file)
        if os.path.isfile(model_file):
            self.decoder = torch.load(model_file)
            print('Loaded old model!')
        else:
            self.decoder = RNN(n_characters, hidden_size, n_characters, n_layers)
            print('Constructed new model!')
        self.decoder_optimizer = torch.optim.Adam(self.decoder.parameters(), learning_rate)
        self.criterion = nn.CrossEntropyLoss()
        self.generator = Generator(self.decoder)
项目:low-shot-shrink-hallucinate    作者:facebookresearch    | 项目源码 | 文件源码
def training_loop(lowshot_dataset, num_classes, params, batchsize=1000, maxiters=1000):
    featdim = lowshot_dataset.featdim()
    model = nn.Linear(featdim, num_classes)
    model = model.cuda()
    optimizer = torch.optim.SGD(model.parameters(), params.lr, momentum=params.momentum, dampening=params.momentum, weight_decay=params.wd)

    loss_function = nn.CrossEntropyLoss()
    loss_function = loss_function.cuda()
    for i in range(maxiters):
        (x,y) = lowshot_dataset.get_sample(batchsize)
        optimizer.zero_grad()

        x = Variable(x.cuda())
        y = Variable(y.cuda())
        scores = model(x)

        loss = loss_function(scores,y)
        loss.backward()
        optimizer.step()
        if (i%100==0):
            print('{:d}: {:f}'.format(i, loss.data[0]))

    return model
项目:SentEval    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, params, inputdim, nclasses, l2reg=0., batch_size=64,
                 seed=1111, cudaEfficient=False):
        super(self.__class__, self).__init__(inputdim, nclasses, l2reg,
                                             batch_size, seed, cudaEfficient)
        """
        PARAMETERS:
        -nhid:       number of hidden units (0: Logistic Regression)
        -optim:      optimizer ("sgd,lr=0.1", "adam", "rmsprop" ..)
        -tenacity:   how many times dev acc does not increase before stopping
        -epoch_size: each epoch corresponds to epoch_size pass on the train set
        -max_epoch:  max number of epoches
        -dropout:    dropout for MLP
        """

        self.nhid = 0 if "nhid" not in params else params["nhid"]
        self.optim = "adam" if "optim" not in params else params["optim"]
        self.tenacity = 5 if "tenacity" not in params else params["tenacity"]
        self.epoch_size = 4 if "epoch_size" not in params else params["epoch_size"]
        self.max_epoch = 200 if "max_epoch" not in params else params["max_epoch"]
        self.dropout = 0. if "dropout" not in params else params["dropout"]
        self.batch_size = 64 if "batch_size" not in params else params["batch_size"]

        if params["nhid"] == 0:
            self.model = nn.Sequential(
                nn.Linear(self.inputdim, self.nclasses),
                ).cuda()
        else:
            self.model = nn.Sequential(
                nn.Linear(self.inputdim, params["nhid"]),
                nn.Dropout(p=self.dropout),
                nn.Tanh(),
                nn.Linear(params["nhid"], self.nclasses),
                ).cuda()

        self.loss_fn = nn.CrossEntropyLoss().cuda()
        self.loss_fn.size_average = False

        optim_fn, optim_params = utils.get_optimizer(self.optim)
        self.optimizer = optim_fn(self.model.parameters(), **optim_params)
        self.optimizer.param_groups[0]['weight_decay'] = self.l2reg
项目:multiNLI_encoder    作者:easonnie    | 项目源码 | 文件源码
def eval_model(model_path, mode='dev'):
    torch.manual_seed(6)

    snli_d, mnli_d, embd = data_loader.load_data_sm(
        config.DATA_ROOT, config.EMBD_FILE, reseversed=False, batch_sizes=(32, 32, 32, 32, 32), device=0)

    m_train, m_dev_m, m_dev_um, m_test_m, m_test_um = mnli_d

    m_dev_um.shuffle = False
    m_dev_m.shuffle = False
    m_dev_um.sort = False
    m_dev_m.sort = False

    m_test_um.shuffle = False
    m_test_m.shuffle = False
    m_test_um.sort = False
    m_test_m.sort = False

    model = StackBiLSTMMaxout()
    model.Embd.weight.data = embd

    if torch.cuda.is_available():
        embd.cuda()
        model.cuda()

    criterion = nn.CrossEntropyLoss()

    model.load_state_dict(torch.load(model_path))

    model.max_l = 150
    m_pred = model_eval(model, m_dev_m, criterion)
    um_pred = model_eval(model, m_dev_um, criterion)

    print("dev_mismatched_score (acc, loss):", um_pred)
    print("dev_matched_score (acc, loss):", m_pred)
项目:MIL.pytorch    作者:gujiuxiang    | 项目源码 | 文件源码
def __init__(self):
        super(Criterion, self).__init__()
        #self.loss = nn.MultiLabelMarginLoss()
        self.loss = nn.MultiLabelSoftMarginLoss()
        #self.loss = nn.MultiMarginLoss()
        #self.loss = nn.CrossEntropyLoss()
        #self.loss = nn.NLLLoss()
项目:inferno    作者:inferno-pytorch    | 项目源码 | 文件源码
def test_as_2d_criterion(self):
        from inferno.extensions.criteria.core import As2DCriterion
        import torch
        import torch.nn as nn
        from torch.autograd import Variable

        prediction = Variable(torch.FloatTensor(2, 10, 100, 100).uniform_())
        prediction = nn.Softmax2d()(prediction)
        target = Variable(torch.LongTensor(2, 100, 100).fill_(0))
        criterion = As2DCriterion(nn.CrossEntropyLoss())
        loss = criterion(prediction, target)
项目:sru    作者:taolei87    | 项目源码 | 文件源码
def eval_model(niter, model, valid_x, valid_y):
    model.eval()
    N = len(valid_x)
    criterion = nn.CrossEntropyLoss()
    correct = 0.0
    cnt = 0
    total_loss = 0.0
    for x, y in zip(valid_x, valid_y):
        x, y = Variable(x, volatile=True), Variable(y)
        output = model(x)
        loss = criterion(output, y)
        total_loss += loss.data[0]*x.size(1)
        pred = output.data.max(1)[1]
        correct += pred.eq(y.data).cpu().sum()
        cnt += y.numel()
    model.train()
    return 1.0-correct/cnt
项目:sru    作者:taolei87    | 项目源码 | 文件源码
def train_model(epoch, model, optimizer,
        train_x, train_y, valid_x, valid_y,
        test_x, test_y,
        best_valid, test_err):

    model.train()
    args = model.args
    N = len(train_x)
    niter = epoch*len(train_x)
    criterion = nn.CrossEntropyLoss()

    cnt = 0
    for x, y in zip(train_x, train_y):
        niter += 1
        cnt += 1
        model.zero_grad()
        x, y = Variable(x), Variable(y)
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()

    valid_err = eval_model(niter, model, valid_x, valid_y)

    sys.stdout.write("Epoch={} iter={} lr={:.6f} train_loss={:.6f} valid_err={:.6f}\n".format(
        epoch, niter,
        optimizer.param_groups[0]['lr'],
        loss.data[0],
        valid_err
    ))

    if valid_err < best_valid:
        best_valid = valid_err
        test_err = eval_model(niter, model, test_x, test_y)
    sys.stdout.write("\n")
    return best_valid, test_err
项目:Dynamic-memory-networks-plus-Pytorch    作者:dandelin    | 项目源码 | 文件源码
def __init__(self, hidden_size, vocab_size, num_hop=3, qa=None):
        super(DMNPlus, self).__init__()
        self.num_hop = num_hop
        self.qa = qa
        self.word_embedding = nn.Embedding(vocab_size, hidden_size, padding_idx=0, sparse=True).cuda()
        init.uniform(self.word_embedding.state_dict()['weight'], a=-(3**0.5), b=3**0.5)
        self.criterion = nn.CrossEntropyLoss(size_average=False)

        self.input_module = InputModule(vocab_size, hidden_size)
        self.question_module = QuestionModule(vocab_size, hidden_size)
        self.memory = EpisodicMemory(hidden_size)
        self.answer_module = AnswerModule(vocab_size, hidden_size)
项目:pytorch-arda    作者:corenel    | 项目源码 | 文件源码
def test(classifier, generator, data_loader, dataset="MNIST"):
    """Evaluate classifier on source or target domains."""
    # set eval state for Dropout and BN layers
    generator.eval()
    classifier.eval()

    # init loss and accuracy
    loss = 0
    acc = 0

    # set loss function
    criterion = nn.CrossEntropyLoss()

    # evaluate network
    for (images, labels) in data_loader:
        images = make_variable(images, volatile=True)
        labels = make_variable(labels.squeeze_())

        preds = classifier(generator(images))
        loss += criterion(preds, labels).data[0]

        pred_cls = preds.data.max(1)[1]
        acc += pred_cls.eq(labels.data).cpu().sum()

    loss /= len(data_loader)
    acc /= len(data_loader.dataset)

    print("Avg Loss = {:.5f}, Avg Accuracy = {:2.5%}".format(loss, acc))
项目:FewShotLearning    作者:gitabcworld    | 项目源码 | 文件源码
def __init__(self, opt):

        self.net = Classifier(opt)
        if opt['classify']:
            self.criterion = nn.CrossEntropyLoss()
        else:
            self.criterion = []
        self.nParams = sum([i.view(-1).size()[0] for i in self.net.parameters()])
        self.outSize = self.net.outSize
项目:FewShotLearning    作者:gitabcworld    | 项目源码 | 文件源码
def __init__(self, opt):

        self.net = Classifier(opt)
        if opt['classify']:
            self.criterion = nn.CrossEntropyLoss()
        else:
            self.criterion = []
        self.nParams = np.sum([1 for i in self.net.parameters()])
        self.outSize = self.net.outSize
项目:FewShotLearning    作者:gitabcworld    | 项目源码 | 文件源码
def __init__(self, opt):
        super(MatchingNet, self).__init__()

        # function cosine-similarity layer
        self.cosineSim = nn.CosineSimilarity()

        # local embedding model (simple or FCE)
        self.embedModel = importlib.import_module(opt['embedModel']).build(opt)
        # set Cuda
        self.embedModel.setCuda(opt['useCUDA'])

        # load loss. Why does not load with the model
        self.lossF = nn.CrossEntropyLoss()

    # Set training or evaluation mode
项目:SimGAN_pytorch    作者:AlexHex7    | 项目源码 | 文件源码
def build_network(self):
        print('=' * 50)
        print('Building network...')
        self.R = Refiner(4, cfg.img_channels, nb_features=64)
        self.D = Discriminator(input_features=cfg.img_channels)

        if cfg.cuda_use:
            self.R.cuda(cfg.cuda_num)
            self.D.cuda(cfg.cuda_num)

        self.opt_R = torch.optim.Adam(self.R.parameters(), lr=cfg.r_lr)
        self.opt_D = torch.optim.SGD(self.D.parameters(), lr=cfg.d_lr)
        self.self_regularization_loss = nn.L1Loss(size_average=False)
        self.local_adversarial_loss = nn.CrossEntropyLoss(size_average=True)
        self.delta = cfg.delta
项目:end-to-end-negotiator    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, dictionary, device_id=None, bad_toks=[], size_average=True):
        w = torch.Tensor(len(dictionary)).fill_(1)
        for tok in bad_toks:
            w[dictionary.get_idx(tok)] = 0.0
        if device_id is not None:
            w = w.cuda(device_id)
        self.crit = nn.CrossEntropyLoss(w, size_average=size_average)
项目:BiDAF-PyTorch    作者:kelayamatoz    | 项目源码 | 文件源码
def span_loss(config, q_mask, logits_start, start, logits_end, end):
    size = config.max_num_sents * config.max_sent_size
    loss_mask = reduce_mask(q_mask, 1)
    losses_start = nn.CrossEntropyLoss(logits_start, start.view(-1, size))
    ce_loss_start = torch.mean(loss_mask * losses)
    losses_end = nn.CrossEntropyLoss(logits_end, end.view(-1, size))
    ce_loss_end = torch.mean(loss_mean)
    return ce_loss_end - ce_loss_start
项目:two-stream-action-recognition    作者:jeffreyhuang1    | 项目源码 | 文件源码
def build_model(self):
        print ('==> Build model and setup loss and optimizer')
        #build model
        self.model = resnet101(pretrained= True, channel=self.channel).cuda()
        #print self.model
        #Loss function and optimizer
        self.criterion = nn.CrossEntropyLoss().cuda()
        self.optimizer = torch.optim.SGD(self.model.parameters(), self.lr, momentum=0.9)
        self.scheduler = ReduceLROnPlateau(self.optimizer, 'min', patience=1,verbose=True)
项目:two-stream-action-recognition    作者:jeffreyhuang1    | 项目源码 | 文件源码
def build_model(self):
        print ('==> Build model and setup loss and optimizer')
        #build model
        self.model = resnet101(pretrained= True, channel=3).cuda()
        #Loss function and optimizer
        self.criterion = nn.CrossEntropyLoss().cuda()
        self.optimizer = torch.optim.SGD(self.model.parameters(), self.lr, momentum=0.9)
        self.scheduler = ReduceLROnPlateau(self.optimizer, 'min', patience=1,verbose=True)
项目:wildcat.pytorch    作者:durandtibo    | 项目源码 | 文件源码
def main_voc2007():
    global args, best_prec1, use_gpu
    args = parser.parse_args()

    use_gpu = torch.cuda.is_available()

    # define dataset
    train_dataset = Mit67(args.data, 'train')
    val_dataset = Mit67(args.data, 'test')
    num_classes = 67

    # load model
    model = resnet101_wildcat(num_classes, pretrained=True, kmax=args.k, alpha=args.alpha, num_maps=args.maps)
    print('classifier', model.classifier)
    print('spatial pooling', model.spatial_pooling)

    # define loss function (criterion)
    criterion = nn.CrossEntropyLoss()

    # define optimizer
    optimizer = torch.optim.SGD(model.get_config_optim(args.lr, args.lrp),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    state = {'batch_size': args.batch_size, 'image_size': args.image_size, 'max_epochs': args.epochs,
             'evaluate': args.evaluate, 'resume': args.resume}
    state['difficult_examples'] = True
    state['save_model_path'] = '../expes/models/mit67/'

    engine = MulticlassEngine(state)
    engine.learning(model, criterion, train_dataset, val_dataset, optimizer)
项目:vqa.pytorch    作者:Cadene    | 项目源码 | 文件源码
def factory(opt, cuda=True):
    criterion = nn.CrossEntropyLoss()
    if cuda:
        criterion = criterion.cuda()
    return criterion
项目:pytorch-adda    作者:corenel    | 项目源码 | 文件源码
def eval_src(encoder, classifier, data_loader):
    """Evaluate classifier for source domain."""
    # set eval state for Dropout and BN layers
    encoder.eval()
    classifier.eval()

    # init loss and accuracy
    loss = 0
    acc = 0

    # set loss function
    criterion = nn.CrossEntropyLoss()

    # evaluate network
    for (images, labels) in data_loader:
        images = make_variable(images, volatile=True)
        labels = make_variable(labels)

        preds = classifier(encoder(images))
        loss += criterion(preds, labels).data[0]

        pred_cls = preds.data.max(1)[1]
        acc += pred_cls.eq(labels.data).cpu().sum()

    loss /= len(data_loader)
    acc /= len(data_loader.dataset)

    print("Avg Loss = {}, Avg Accuracy = {:2%}".format(loss, acc))
项目:pytorch-adda    作者:corenel    | 项目源码 | 文件源码
def eval_tgt(encoder, classifier, data_loader):
    """Evaluation for target encoder by source classifier on target dataset."""
    # set eval state for Dropout and BN layers
    encoder.eval()
    classifier.eval()

    # init loss and accuracy
    loss = 0
    acc = 0

    # set loss function
    criterion = nn.CrossEntropyLoss()

    # evaluate network
    for (images, labels) in data_loader:
        images = make_variable(images, volatile=True)
        labels = make_variable(labels).squeeze_()

        preds = classifier(encoder(images))
        loss += criterion(preds, labels).data[0]

        pred_cls = preds.data.max(1)[1]
        acc += pred_cls.eq(labels.data).cpu().sum()

    loss /= len(data_loader)
    acc /= len(data_loader.dataset)

    print("Avg Loss = {}, Avg Accuracy = {:2%}".format(loss, acc))
项目:benchmark    作者:pytorch    | 项目源码 | 文件源码
def prepare(self, p):
        """Set up model."""
        # The CPU version is slow...
        p['batch_size'] = 32 if p.cuda else 4

        def cast(tensor):
            return tensor.cuda() if p.cuda else tensor

        self.model = memnn.MemNN(p, p.num_features)
        self.criterion = nn.CrossEntropyLoss()
        self.data_batches = [
            [  # memories, queries, memory_lengths, query_lengths
                Variable(cast(torch.zeros(p.batch_size * p.mem_size).long())),
                Variable(cast(torch.zeros(p.batch_size * 28).long())),
                Variable(cast(torch.ones(p.batch_size, p.mem_size).long())),
                Variable(cast(torch.LongTensor(p.batch_size).fill_(28).long())),
            ]
            for _ in range(p.num_batches)
        ]
        self.cand_batches = [
            Variable(cast(torch.zeros(p.batch_size * 14, p.embedding_size)))
            for _ in range(p.num_batches)
        ]
        self.target_batches = [
            Variable(cast(torch.ones(p.batch_size).long()))
            for _ in range(p.num_batches)
        ]
        if p.cuda:
            self.model.cuda()
            self.criterion.cuda()
项目:benchmark    作者:pytorch    | 项目源码 | 文件源码
def prepare(self, p):
        def get_rnn():
            if p.rnn_type in ['LSTM', 'GRU']:
                return getattr(nn, p.rnn_type)(p.embedding_size, p.hidden_size, p.num_layers, dropout=p.dropout)
            else:
                nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[p.rnn_type]
                return nn.RNN(p.embedding_size, p.hidden_size, p.num_layers, nonlinearity=nonlinearity, dropout=p.dropout)

        class Model(nn.Module):
            def __init__(self):
                super(Model, self).__init__()
                self.drop = nn.Dropout(p.dropout)
                self.rnn = get_rnn()
                self.encoder = nn.Embedding(p.num_tokens, p.embedding_size)
                self.decoder = nn.Linear(p.hidden_size, p.num_tokens)

            def forward(self, input):
                emb = self.drop(self.encoder(input))
                output, hidden = self.rnn(emb)
                output = self.drop(output)
                decoded = self.decoder(output.view(output.size(0) * output.size(1), output.size(2)))
                return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden

        def cast(tensor):
            return tensor.long().cuda() if p.cuda else tensor.long()

        self.model = Model()
        self.criterion = nn.CrossEntropyLoss()
        self.data_batches = [Variable(cast(torch.zeros(p.bptt, p.batch_size))) for _ in range(p.num_batches)]
        self.target_batches = [Variable(cast(torch.zeros(p.bptt * p.batch_size))) for _ in range(p.num_batches)]
        if p.cuda:
            self.model.cuda()
            self.criterion.cuda()
项目:benchmark    作者:pytorch    | 项目源码 | 文件源码
def prepare(self, p):
        # The CPU version is slow...
        p['batch_size'] = 20 if p.cuda else 5
        class Model(nn.Module):
            def __init__(self):
                super(Model, self).__init__()
                self.rnn = bnlstm.LSTM(cell_class=bnlstm.BNLSTMCell, input_size=1, hidden_size=p.hidden_size, batch_first=True, max_length=p.max_length)
                self.fc = nn.Linear(in_features=p.hidden_size, out_features=10) # 10 digits in mnist

            def forward(self, data):
                hx = None
                if not p.pmnist:
                    h0 = Variable(data.data.new(data.size(0), p.hidden_size)
                                  .normal_(0, 0.1))
                    c0 = Variable(data.data.new(data.size(0), p.hidden_size)
                                  .normal_(0, 0.1))
                    hx = (h0, c0)
                _, (h_n, _) = self.rnn(input_=data, hx = hx)
                logits = self.fc(h_n[0])
                return logits

        def cast(tensor):
            return tensor.cuda() if p.cuda else tensor

        self.model = Model()
        self.criterion = nn.CrossEntropyLoss()
        self.data_batches = [Variable(cast(torch.zeros(p.batch_size, 28 * 28, 1))) for _ in range(p.num_batches)]
        self.target_batches = [Variable(cast(torch.zeros(p.batch_size)).long()) for _ in range(p.num_batches)]
        if p.cuda:
            self.model.cuda()
            self.criterion.cuda()
项目:covfefe    作者:deepnn    | 项目源码 | 文件源码
def ce_loss(loss_weight=None, size_ave=True):
    return nn.CrossEntropyLoss(weight=loss_weight,size_average=size_ave)
项目:pytorch_60min_blitz    作者:kyuhyoung    | 项目源码 | 文件源码
def initialize(mode, is_gpu, dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker):

    if 'TORCHVISION_MEMORY' == mode:
        trainloader, testloader, li_class = make_dataloader_torchvison_memory(
            dir_data, di_set_transform, n_img_per_batch, n_worker)
    elif 'TORCHVISION_IMAGEFOLDER' == mode:
        trainloader, testloader, li_class = make_dataloader_torchvison_imagefolder(
            dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker)
    elif 'CUSTOM_MEMORY' == mode:
        trainloader, testloader, li_class = make_dataloader_custom_memory(
            dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker)
    elif 'CUSTOM_FILE' == mode:
        trainloader, testloader, li_class = make_dataloader_custom_file(
            dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker)
    else:
        trainloader, testloader, li_class = make_dataloader_custom_tensordataset(
            dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker)


    #net = Net().cuda()
    net = Net()
    #t1 = net.cuda()
    criterion = nn.CrossEntropyLoss()
    if is_gpu:
        net.cuda()
        criterion.cuda()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=1, patience = 8, epsilon=0.00001, min_lr=0.000001) # set up scheduler

    return trainloader, testloader, net, criterion, optimizer, scheduler, li_class
项目:temperature_scaling    作者:gpleiss    | 项目源码 | 文件源码
def set_temperature(self, valid_loader):
        """
        Tune the tempearature of the model (using the validation set).
        We're going to set it to optimize NLL.
        valid_loader (DataLoader): validation set loader
        """
        self.cuda()
        nll_criterion = nn.CrossEntropyLoss().cuda()
        ece_criterion = _ECELoss().cuda()

        # First: collect all the logits and labels for the validation set
        logits_list = []
        labels_list = []
        for input, label in valid_loader:
            input_var = Variable(input, volatile=True).cuda()
            logits_var = self.model(input_var)
            logits_list.append(logits_var.data)
            labels_list.append(label)
        logits = torch.cat(logits_list).cuda()
        labels = torch.cat(labels_list).cuda()
        logits_var = Variable(logits)
        labels_var = Variable(labels)

        # Calculate NLL and ECE before temperature scaling
        before_temperature_nll = nll_criterion(logits_var, labels_var).data[0]
        before_temperature_ece = ece_criterion(logits_var, labels_var).data[0]
        print('Before temperature - NLL: %.3f, ECE: %.3f' % (before_temperature_nll, before_temperature_ece))

        # Next: optimize the temperature w.r.t. NLL
        optimizer = optim.LBFGS([self.temperature], lr=0.01, max_iter=50)
        def eval():
            loss = nll_criterion(self.temperature_scale(logits_var), labels_var)
            loss.backward()
            return loss
        optimizer.step(eval)

        # Calculate NLL and ECE after temperature scaling
        after_temperature_nll = nll_criterion(self.temperature_scale(logits_var), labels_var).data[0]
        after_temperature_ece = ece_criterion(self.temperature_scale(logits_var), labels_var).data[0]
        print('Optimal temperature: %.3f' % self.temperature.data[0])
        print('After temperature - NLL: %.3f, ECE: %.3f' % (after_temperature_nll, after_temperature_ece))

        return self
项目:repeval_rivercorners    作者:jabalazs    | 项目源码 | 文件源码
def run_main():
    import torch.nn as nn
    from repeval.models.inner_att import InnerAtt
    from repeval.routines import main

    args = parser.parse_args()

    Model = InnerAtt

    loss_function = nn.CrossEntropyLoss()

    main(args, Model, Corpora, EmbeddingsList, loss_function)
项目:char-rnn    作者:hiepph    | 项目源码 | 文件源码
def __init__(self, input_size, hidden_size, output_size, n_layers=1, gpu=-1):
        self.decoder = RNN(input_size, hidden_size, output_size, n_layers, gpu)
        if gpu >= 0:
            print("Use GPU %d" % torch.cuda.current_device())
            self.decoder.cuda()

        self.optimizer = torch.optim.Adam(self.decoder.parameters(), lr=0.01)
        self.criterion = nn.CrossEntropyLoss()
项目:pytorchnet    作者:human-analysis    | 项目源码 | 文件源码
def __init__(self):
        super(Classification, self).__init__()
        self.loss = nn.CrossEntropyLoss()
项目:low-shot-shrink-hallucinate    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self,aux_loss_type, aux_loss_wt, num_classes):
        aux_loss_fns = dict(l2=l2_loss, sgm=SGMLoss(num_classes), batchsgm=BatchSGMLoss(num_classes))
        self.aux_loss_fn = aux_loss_fns[aux_loss_type]
        self.aux_loss_type = aux_loss_type
        self.cross_entropy_loss = nn.CrossEntropyLoss()
        self.aux_loss_wt = aux_loss_wt
项目:low-shot-shrink-hallucinate    作者:facebookresearch    | 项目源码 | 文件源码
def train_classifier(filehandle, base_classes, cachefile, networkfile, total_num_classes = 1000, lr=0.1, wd=0.0001, momentum=0.9, batchsize=1000, niter=10000):
    # either use pre-existing classifier or train one
    all_labels = filehandle['all_labels'][...]
    all_labels = all_labels.astype(int)
    all_feats = filehandle['all_feats']
    base_class_ids = np.where(np.in1d(all_labels, base_classes))[0]
    loss = nn.CrossEntropyLoss().cuda()
    model = nn.Linear(all_feats[0].size, total_num_classes).cuda()
    if os.path.isfile(cachefile):
        tmp = torch.load(cachefile)
        model.load_state_dict(tmp)
    elif os.path.isfile(networkfile):
        tmp = torch.load(networkfile)
        if 'module.classifier.bias' in tmp['state']:
            state_dict = {'weight':tmp['state']['module.classifier.weight'], 'bias':tmp['state']['module.classifier.bias']}
        else:
            model = nn.Linear(all_feats[0].size, total_num_classes, bias=False).cuda()
            state_dict = {'weight':tmp['state']['module.classifier.weight']}
        model.load_state_dict(state_dict)
    else:
        optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=wd, dampening=0)
        for i in range(niter):
            optimizer.zero_grad()
            idx = np.sort(np.random.choice(base_class_ids, batchsize, replace=False))
            F = all_feats[idx,:]
            F = Variable(torch.Tensor(F)).cuda()
            L = Variable(torch.LongTensor(all_labels[idx])).cuda()
            S = model(F)
            loss_val = loss(S, L)
            loss_val.backward()
            optimizer.step()
            if i % 100 == 0:
                print('Classifier training {:d}: {:f}'.format(i, loss_val.data[0]))
        torch.save(model.state_dict(), cachefile)

    return model
项目:future-price-predictor    作者:htfy96    | 项目源码 | 文件源码
def train(model, db, exp, args, use_cuda=False):
    print("Training...")
    init_time = time.clock()

    #trainsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(0, int(len(db) * 0.7)))
    #trainloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=trainsampler)
    #testsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(int(len(db) * 0.7), len(db) - 1))
    #testloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=testsampler)
    trainloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    best_loss = 100000

    for epoch in range(args.epoch):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 1):
            inputs, targets = data
            inputs = inputs.unsqueeze(1)
            targets = target_onehot_to_classnum_tensor(targets)
            if use_cuda and cuda_ava:
                inputs = Variable(inputs.float().cuda())
                targets = Variable(targets.cuda())
            else:
                inputs = Variable(inputs.float())
                targets = Variable(targets)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.data[0]
            last_loss = loss.data[0]
            if i % 100 == 0:
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100))
                running_loss = 0

            if last_loss < best_loss or i % 100 == 0:
                best_loss = last_loss
                #acc = evaluate(model, testloader, args, use_cuda)
                acc = evaluate(model, trainloader, args, use_cuda)
                exp.add_scalar_value('train_loss', last_loss, time.clock() - init_time)
                exp.add_scalar_value('test_accu', acc, time.clock() - init_time)
                torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT2_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S"))))
    #acc = evaluate(model, testloader, args, use_cuda)
    acc = evaluate(model, trainloader, args, use_cuda)
    torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT2_all_acc_{}.t7'.format(acc)))

    print("Finished Training!")
项目:future-price-predictor    作者:htfy96    | 项目源码 | 文件源码
def train(model, db, exp, args, use_cuda=False):
    print("Training...")
    init_time = time.clock()

    trainsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(0, int(len(db) * 0.7)))
    trainloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=trainsampler)
    testsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(int(len(db) * 0.7), len(db) - 1))
    testloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=testsampler)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    best_loss = 100000

    for epoch in range(args.epoch):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 1):
            model.train()
            inputs, targets = data
            inputs = inputs.unsqueeze(1)
            targets = target_onehot_to_classnum_tensor(targets)
            if use_cuda and cuda_ava:
                inputs = Variable(inputs.float().cuda())
                targets = Variable(targets.cuda())
            else:
                inputs = Variable(inputs.float())
                targets = Variable(targets)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.data[0]
            last_loss = loss.data[0]
            if i % 100 == 0:
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100))
                running_loss = 0

            if last_loss < best_loss or i % 100 == 0:
                best_loss = last_loss
                acc = evaluate(model, testloader, args, use_cuda)
                exp.add_scalar_value('train_loss', last_loss, time.clock() - init_time)
                exp.add_scalar_value('test_accu', acc, time.clock() - init_time)
                torch.save(model.state_dict(), os.path.join('saved_model', '{}_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(args.name, epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S"))))
    acc = evaluate(model, testloader, args, use_cuda)
    torch.save(model.state_dict(), os.path.join('saved_model', '{}_all_acc_{}.t7'.format(args.name, acc)))

    print("Finished Training!")
项目:future-price-predictor    作者:htfy96    | 项目源码 | 文件源码
def train(model, db, exp, args, use_cuda=False):
    print("Training...")
    init_time = time.clock()

    trainsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(0, int(len(db) * 0.7)))
    trainloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=trainsampler)
    testsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(int(len(db) * 0.7), len(db) - 1))
    testloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=testsampler)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    best_loss = 100000

    for epoch in range(args.epoch):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 1):
            inputs, targets = data
            inputs = inputs.unsqueeze(1)
            targets = target_onehot_to_classnum_tensor(targets)
            if use_cuda and cuda_ava:
                inputs = Variable(inputs.float().cuda())
                targets = Variable(targets.cuda())
            else:
                inputs = Variable(inputs.float())
                targets = Variable(targets)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.data[0]
            last_loss = loss.data[0]
            if i % 100 == 0:
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100))
                running_loss = 0

            if last_loss < best_loss or i % 200 == 0:
                best_loss = last_loss
                acc = evaluate(model, testloader, args, use_cuda)
                exp.add_scalar_value('train_loss', last_loss, time.clock() - init_time)
                exp.add_scalar_value('test_acc', acc, time.clock() - init_time)
                torch.save(model.state_dict(), os.path.join('saved_model', '{}_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(args.name, epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S"))))
    acc = evaluate(model, testloader, args, use_cuda)
    torch.save(model.state_dict(), os.path.join('saved_model', '{}_all_acc_{}.t7'.format(args.name, acc)))

    print("Finished Training!")
项目:future-price-predictor    作者:htfy96    | 项目源码 | 文件源码
def train(model, db, exp, args, use_cuda=False):
    print("Training...")
    init_time = time.clock()

    trainsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(0, int(len(db) * 0.7)))
    trainloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=trainsampler)
    testsampler = data_utils.sampler.SubsetRandomSampler(SequentialIndexList(int(len(db) * 0.7), len(db) - 1))
    testloader = data_utils.DataLoader(dataset=db, batch_size=args.batch_size, shuffle=True, sampler=testsampler)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    best_loss = 100000

    for epoch in range(args.epoch):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 1):
            inputs, targets = data
            inputs = inputs.unsqueeze(1)
            targets = target_onehot_to_classnum_tensor(targets)
            if use_cuda and cuda_ava:
                inputs = Variable(inputs.float().cuda())
                targets = Variable(targets.cuda())
            else:
                inputs = Variable(inputs.float())
                targets = Variable(targets)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.data[0]
            last_loss = loss.data[0]
            if i % 100 == 0:
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100))
                running_loss = 0

            if last_loss < best_loss or i % 100 == 0:
                best_loss = last_loss
                acc = evaluate(model, testloader, args, use_cuda)
                exp.add_scalar_value('train_loss', last_loss, time.clock() - init_time)
                exp.add_scalar_value('test_accu', acc, time.clock() - init_time)
                torch.save(model.state_dict(), os.path.join('saved_model', '{}_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(args.name, epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S"))))
    acc = evaluate(model, testloader, args, use_cuda)
    torch.save(model.state_dict(), os.path.join('saved_model', '{}_all_acc_{}.t7'.format(args.name, acc)))

    print("Finished Training!")
项目:multiNLI_encoder    作者:easonnie    | 项目源码 | 文件源码
def build_kaggle_submission_file(model_path):
    torch.manual_seed(6)

    snli_d, mnli_d, embd = data_loader.load_data_sm(
        config.DATA_ROOT, config.EMBD_FILE, reseversed=False, batch_sizes=(32, 32, 32, 32, 32), device=0)

    m_train, m_dev_m, m_dev_um, m_test_m, m_test_um = mnli_d

    m_test_um.shuffle = False
    m_test_m.shuffle = False
    m_test_um.sort = False
    m_test_m.sort = False

    model = StackBiLSTMMaxout()
    model.Embd.weight.data = embd
    # model.display()

    if torch.cuda.is_available():
        embd.cuda()
        model.cuda()

    criterion = nn.CrossEntropyLoss()

    model.load_state_dict(torch.load(model_path))

    m_pred = model_eval(model, m_test_m, criterion, pred=True)
    um_pred = model_eval(model, m_test_um, criterion, pred=True)

    model.max_l = 150
    print(um_pred)
    print(m_pred)

    with open('./sub_um.csv', 'w+') as f:
        index = ['entailment', 'contradiction', 'neutral']
        f.write("pairID,gold_label\n")
        for i, k in enumerate(um_pred):
            f.write(str(i) + "," + index[k] + "\n")

    with open('./sub_m.csv', 'w+') as f:
        index = ['entailment', 'contradiction', 'neutral']
        f.write("pairID,gold_label\n")
        for j, k in enumerate(m_pred):
            f.write(str(j + 9847) + "," + index[k] + "\n")
项目:ParlAI    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, opt, shared=None):
        opt['cuda'] = not opt['no_cuda'] and torch.cuda.is_available()
        if opt['cuda']:
            print('[ Using CUDA ]')
            torch.cuda.device(opt['gpu'])

        if not shared:
            self.opt = opt
            self.id = 'MemNN'
            self.dict = DictionaryAgent(opt)
            self.answers = [None] * opt['batchsize']

            self.model = MemNN(opt, len(self.dict))
            self.mem_size = opt['mem_size']
            self.loss_fn = CrossEntropyLoss()

            self.decoder = None
            self.longest_label = 1
            self.END = self.dict.end_token
            self.END_TENSOR = torch.LongTensor(self.dict.parse(self.END))
            self.START = self.dict.start_token
            self.START_TENSOR = torch.LongTensor(self.dict.parse(self.START))
            if opt['output'] == 'generate' or opt['output'] == 'g':
                self.decoder = Decoder(opt['embedding_size'], opt['embedding_size'],
                                        opt['rnn_layers'], opt, self.dict)
            elif opt['output'] != 'rank' and opt['output'] != 'r':
                raise NotImplementedError('Output type not supported.')

            optim_params = [p for p in self.model.parameters() if p.requires_grad]
            lr = opt['learning_rate']
            if opt['optimizer'] == 'sgd':
                self.optimizers = {'memnn': optim.SGD(optim_params, lr=lr)}
                if self.decoder is not None:
                    self.optimizers['decoder'] = optim.SGD(self.decoder.parameters(), lr=lr)
            elif opt['optimizer'] == 'adam':
                self.optimizers = {'memnn': optim.Adam(optim_params, lr=lr)}
                if self.decoder is not None:
                    self.optimizers['decoder'] = optim.Adam(self.decoder.parameters(), lr=lr)
            else:
                raise NotImplementedError('Optimizer not supported.')

            if opt['cuda']:
                self.model.share_memory()
                if self.decoder is not None:
                    self.decoder.cuda()

            if opt.get('model_file') and os.path.isfile(opt['model_file']):
                print('Loading existing model parameters from ' + opt['model_file'])
                self.load(opt['model_file'])
        else:
            self.answers = shared['answers']

        self.episode_done = True
        self.last_cands, self.last_cands_list = None, None
        super().__init__(opt, shared)
项目:sru    作者:taolei87    | 项目源码 | 文件源码
def train_model(epoch, model, train):
    model.train()
    args = model.args

    unroll_size = args.unroll_size
    batch_size = args.batch_size
    N = (len(train[0])-1)//unroll_size + 1
    lr = args.lr

    total_loss = 0.0
    criterion = nn.CrossEntropyLoss(size_average=False)
    hidden = model.init_hidden(batch_size)
    for i in range(N):
        x = train[0][i*unroll_size:(i+1)*unroll_size]
        y = train[1][i*unroll_size:(i+1)*unroll_size].view(-1)
        x, y =  Variable(x), Variable(y)
        hidden = (Variable(hidden[0].data), Variable(hidden[1].data)) if args.lstm \
            else Variable(hidden.data)

        model.zero_grad()
        output, hidden = model(x, hidden)
        assert x.size(1) == batch_size
        loss = criterion(output, y) / x.size(1)
        loss.backward()

        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip_grad)
        for p in model.parameters():
            if p.requires_grad:
                if args.weight_decay > 0:
                    p.data.mul_(1.0-args.weight_decay)
                p.data.add_(-lr, p.grad.data)

        if math.isnan(loss.data[0]) or math.isinf(loss.data[0]):
            sys.exit(0)
            return

        total_loss += loss.data[0] / x.size(0)
        if i%10 == 0:
            sys.stdout.write("\r{}".format(i))
            sys.stdout.flush()

    return np.exp(total_loss/N)
项目:foolbox    作者:bethgelab    | 项目源码 | 文件源码
def predictions_and_gradient(self, image, label):
        # lazy import
        import torch
        import torch.nn as nn
        from torch.autograd import Variable

        image = self._process_input(image)
        target = np.array([label])
        target = torch.from_numpy(target)
        if self.cuda:  # pragma: no cover
            target = target.cuda()
        target = Variable(target)

        assert image.ndim == 3
        images = image[np.newaxis]
        images = torch.from_numpy(images)
        if self.cuda:  # pragma: no cover
            images = images.cuda()
        images = Variable(images, requires_grad=True)
        predictions = self._model(images)
        ce = nn.CrossEntropyLoss()
        loss = ce(predictions, target)
        loss.backward()
        grad = images.grad

        predictions = predictions.data
        if self.cuda:  # pragma: no cover
            predictions = predictions.cpu()

        predictions = predictions.numpy()
        predictions = np.squeeze(predictions, axis=0)
        assert predictions.ndim == 1
        assert predictions.shape == (self.num_classes(),)

        grad = grad.data
        if self.cuda:  # pragma: no cover
            grad = grad.cpu()
        grad = grad.numpy()
        grad = self._process_gradient(grad)
        grad = np.squeeze(grad, axis=0)
        assert grad.shape == image.shape

        return predictions, grad
项目:efficient_densenet_pytorch    作者:gpleiss    | 项目源码 | 文件源码
def train(model, train_set, valid_set, test_set, save, train_size=0, valid_size=5000,
          n_epochs=1, batch_size=64, lr=0.1, wd=0.0001, momentum=0.9, seed=None):

    if seed is not None:
        torch.manual_seed(seed)

    # Make model, criterion, optimizer, data loaders
    train_loader, valid_loader, test_loader = _make_dataloaders(
        train_set=train_set,
        valid_set=valid_set,
        test_set=test_set,
        train_size=train_size,
        valid_size=valid_size,
        batch_size=batch_size,
    )
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, nesterov=True, weight_decay=wd)

    # Wrap model if multiple gpus
    if torch.cuda.device_count() > 1:
        model_wrapper = torch.nn.DataParallel(model).cuda()
    else:
        model_wrapper = model.cuda()

    # Train model
    best_error = 1
    for epoch in range(1, n_epochs + 1):
        _set_lr(optimizer, epoch, n_epochs, lr)
        train_results = run_epoch(
            loader=train_loader,
            model=model_wrapper,
            criterion=criterion,
            optimizer=optimizer,
            epoch=epoch,
            n_epochs=n_epochs,
            train=True,
        )
        valid_results = run_epoch(
            loader=valid_loader,
            model=model_wrapper,
            criterion=criterion,
            optimizer=optimizer,
            epoch=epoch,
            n_epochs=n_epochs,
            train=False,
        )

        # Determine if model is the best
        _, _, valid_error = valid_results
        if valid_error[0] < best_error:
            best_error = valid_error[0]
            print('New best error: %.4f' % best_error)
            torch.save(model.state_dict(), os.path.join(save, 'model.t7'))
项目:pytorch-tools    作者:nearai    | 项目源码 | 文件源码
def main():
    inputs = datasets.snli.ParsedTextField(lower=True)
    transitions = datasets.snli.ShiftReduceField()
    answers = data.Field(sequential=False)

    train, dev, test = datasets.SNLI.splits(inputs, answers, transitions)
    inputs.build_vocab(train, dev, test)
    answers.build_vocab(train)
    # print(dir(inputs))
    train_iter, dev_iter, test_iter = data.BucketIterator.splits(
        (train, dev, test), batch_size=args.batch_size, device=0 if args.cuda else -1)
    # for batch in train_iter:
    #     # print(dir(batch))
    #     # print(batch.dataset)
    #     # print(batch.label)
    #     # print(batch.premise_transitions)
    #     for x in batch.dataset:
    #         print(Tree(x, inputs.vocab, answers.vocab).root)
    #         # print(dir(x))
    #         # print(x.label, x.premise, x.premise_transitions)
    #     break

    model = SPINN(3, 100, 1000)
    criterion = nn.CrossEntropyLoss()
    opt = optim.Adam(model.parameters(), lr=0.01)

    for epoch in range(10):
        start = time.time()
        iteration = 0
        for batch_idx, batch in enumerate(train_iter):
            opt.zero_grad()

            all_logits, all_labels = [], []
            fold = torchfold.Fold(cuda=args.cuda)
            for example in batch.dataset:
                tree = Tree(example, inputs.vocab, answers.vocab)
                if args.fold:
                    all_logits.append(encode_tree_fold(fold, tree))
                else:
                    all_logits.append(encode_tree_regular(model, tree))
                all_labels.append(tree.label)

            if args.fold:
                res = fold.apply(model, [all_logits, all_labels])
                loss = criterion(res[0], res[1])
            else:
                loss = criterion(torch.cat(all_logits, 0), Variable(torch.LongTensor(all_labels)))
            loss.backward(); opt.step()

            iteration += 1
            if iteration % 10 == 0:
                print("Avg. Time: %fs" % ((time.time() - start) / iteration))
                # iteration = 0
                # start = time.time()