我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用torch.optim.SGD。
def test_sgd(self): self._test_rosenbrock( lambda params: optim.SGD(params, lr=1e-3), wrap_old_fn(old_optim.sgd, learningRate=1e-3) ) self._test_rosenbrock( lambda params: optim.SGD(params, lr=1e-3, momentum=0.9, dampening=0), wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9, dampening=0) ) self._test_basic_cases( lambda weight, bias: optim.SGD([weight, bias], lr=1e-3) ) self._test_basic_cases( lambda weight, bias: optim.SGD( self._build_params_dict(weight, bias, lr=1e-2), lr=1e-3) )
def init_optimizer(self, state_dict=None): """Initialize an optimizer for the free parameters of the network. Args: state_dict: network parameters """ if self.args.fix_embeddings: for p in self.network.embedding.parameters(): p.requires_grad = False parameters = [p for p in self.network.parameters() if p.requires_grad] if self.args.optimizer == 'sgd': self.optimizer = optim.SGD(parameters, self.args.learning_rate, momentum=self.args.momentum, weight_decay=self.args.weight_decay) elif self.args.optimizer == 'adamax': self.optimizer = optim.Adamax(parameters, weight_decay=self.args.weight_decay) else: raise RuntimeError('Unsupported optimizer: %s' % self.args.optimizer) # -------------------------------------------------------------------------- # Learning # --------------------------------------------------------------------------
def __init__(self, model, args, device_id=None, verbose=False): self.model = model self.args = args self.device_id = device_id self.verbose = verbose self.opt = optim.SGD(self.model.parameters(), lr=self.args.lr, momentum=self.args.momentum, nesterov=(self.args.nesterov and self.args.momentum > 0)) self.crit = Criterion(self.model.word_dict, device_id=device_id) self.sel_crit = Criterion( self.model.item_dict, device_id=device_id, bad_toks=['<disconnect>', '<disagree>']) if self.args.visual: self.model_plot = vis.ModulePlot(self.model, plot_weight=False, plot_grad=True) self.loss_plot = vis.Plot(['train', 'valid', 'valid_select'], 'loss', 'loss', 'epoch', running_n=1) self.ppl_plot = vis.Plot(['train', 'valid', 'valid_select'], 'perplexity', 'ppl', 'epoch', running_n=1)
def get_optimizer(args, params): if args.dataset == 'mnist': if args.model == 'optnet-eq': params = list(params) A_param = params.pop(0) assert(A_param.size() == (args.neq, args.nHidden)) optimizer = optim.Adam([ {'params': params, 'lr': 1e-3}, {'params': [A_param], 'lr': 1e-1} ]) else: optimizer = optim.Adam(params) elif args.dataset in ('cifar-10', 'cifar-100'): if args.opt == 'sgd': optimizer = optim.SGD(params, lr=1e-1, momentum=0.9, weight_decay=args.weightDecay) elif args.opt == 'adam': optimizer = optim.Adam(params, weight_decay=args.weightDecay) else: assert(False) return optimizer
def test_sgd(self): self._test_rosenbrock( lambda params: optim.SGD(params, lr=1e-3), wrap_old_fn(old_optim.sgd, learningRate=1e-3) ) self._test_rosenbrock( lambda params: optim.SGD(params, lr=1e-3, momentum=0.9, dampening=0, weight_decay=1e-4), wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9, dampening=0, weightDecay=1e-4) ) self._test_basic_cases( lambda weight, bias: optim.SGD([weight, bias], lr=1e-3) ) self._test_basic_cases( lambda weight, bias: optim.SGD( self._build_params_dict(weight, bias, lr=1e-2), lr=1e-3) )
def train(rank, args, model): torch.manual_seed(args.seed + rank) train_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.batch_size, shuffle=True, num_workers=1) test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.batch_size, shuffle=True, num_workers=1) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) for epoch in range(1, args.epochs + 1): train_epoch(epoch, args, model, train_loader, optimizer) test_epoch(model, test_loader)
def initialize(is_gpu, dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker): trainloader, testloader, li_class = make_dataloader_custom_file( dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker) #net = Net().cuda() net = Net_gap() #t1 = net.cuda() criterion = nn.CrossEntropyLoss() if is_gpu: net.cuda() criterion.cuda() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=1, patience = 8, epsilon=0.00001, min_lr=0.000001) # set up scheduler return trainloader, testloader, net, criterion, optimizer, scheduler, li_class
def test_sgd(self): self._test_rosenbrock( lambda params: optim.SGD(params, lr=1e-3), wrap_old_fn(old_optim.sgd, learningRate=1e-3) ) self._test_rosenbrock( lambda params: optim.SGD(params, lr=1e-3, momentum=0.9, dampening=0, weight_decay=1e-4), wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9, dampening=0, weightDecay=1e-4) ) self._test_basic_cases( lambda weight, bias: optim.SGD([weight, bias], lr=1e-3) ) self._test_basic_cases( lambda weight, bias: optim.SGD( self._build_params_dict(weight, bias, lr=1e-2), lr=1e-3) ) self._test_basic_cases( lambda weight, bias: optim.SGD( self._build_params_dict_single(weight, bias, lr=1e-2), lr=1e-3) )
def __init__(self, model, criterion, opt, optimState): self.model = model self.criterion = criterion self.optimState = optimState if self.optimState == None: self.optimState = { 'learningRate' : opt.LR, 'learningRateDecay' : opt.LRDParam, 'momentum' : opt.momentum, 'nesterov' : False, 'dampening' : opt.dampening, 'weightDecay' : opt.weightDecay } self.opt = opt if opt.optimizer == 'SGD': self.optimizer = optim.SGD(model.parameters(), lr=opt.LR, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weightDecay) elif opt.optimizer == 'Adam': self.optimizer = optim.Adam(model.parameters(), lr=opt.LR, betas=(opt.momentum, 0.999), eps=1e-8, weight_decay=opt.weightDecay) self.logger = { 'train' : open(os.path.join(opt.resume, 'train.log'), 'a+'), 'val' : open(os.path.join(opt.resume, 'val.log'), 'a+') }
def __init__(self, model, criterion, opt, optimState): self.model = model self.criterion = criterion self.optimState = optimState if self.optimState == None: self.optimState = { 'learningRate' : opt.LR, 'learningRateDecay' : opt.LRDParam, 'momentum' : opt.momentum, 'nesterov' : False, 'dampening' : opt.dampening, 'weightDecay' : opt.weightDecay } self.opt = opt if opt.optimizer == 'SGD': self.optimizer = optim.SGD(model.parameters(), lr=opt.LR, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weightDecay) elif opt.optimizer == 'Adam': self.optimizer = optim.Adam(model.parameters(), lr=opt.LR, betas=(0.9,0.999), eps=1e-8, weight_decay=opt.weightDecay) self.logger = { 'train' : open(os.path.join(opt.resume, 'train.log'), 'a+'), 'val' : open(os.path.join(opt.resume, 'val.log'), 'a+') }
def test_predict_after_train_d3_1(): """ 1 point(s) """ global test_sent, gold, word_to_ix, vocab torch.manual_seed(1) feat_extract = SimpleFeatureExtractor() word_embed = VanillaWordEmbeddingLookup(word_to_ix, TEST_EMBEDDING_DIM) act_chooser = ActionChooserNetwork(TEST_EMBEDDING_DIM * NUM_FEATURES) combiner = MLPCombinerNetwork(TEST_EMBEDDING_DIM) parser = TransitionParser(feat_extract, word_embed, act_chooser, combiner) # Train for i in xrange(75): train([ (test_sent[:-1], gold) ], parser, optim.SGD(parser.parameters(), lr=0.01), verbose=False) # predict pred = parser.predict(test_sent[:-1]) gold_graph = dependency_graph_from_oracle(test_sent[:-1], gold) assert pred == gold_graph
def generative_fine_tune(dbn, lr = 1e-2, epoch = 100, batch_size = 50, input_data = None, CD_k = 1, optimization_method = "Adam", momentum = 0, weight_decay = 0, test_input = None): if optimization_method == "RMSprop": optimizer = optim.RMSprop(dbn.parameters(), lr = lr, momentum = momentum, weight_decay = weight_decay) elif optimization_method == "SGD": optimizer = optim.SGD(dbn.parameters(), lr = lr, momentum = momentum, weight_decay = weight_decay) elif optimization_method == "Adam": optimizer = optim.Adam(dbn.parameters(), lr = lr, weight_decay = weight_decay) for i in dbn.parameters(): i.mean().backward() train_set = torch.utils.data.dataset.TensorDataset(input_data, torch.zeros(input_data.size()[0])) train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True) for i in range(epoch): for batch_idx, (data, target) in enumerate(train_loader): sleep_wake(dbn = dbn, optimizer = optimizer, lr = lr, CD_k = CD_k, v = data, batch_size = batch_size) if not (type(test_input) == type(None)): print("fine tune", i, ais_dbn.logp_ais(self, test_input, step = 1000, M_Z = 20, M_IS = 100, parallel = True))
def joint_train(dbm, lr = 1e-3, epoch = 100, batch_size = 50, input_data = None, weight_decay = 0, k_positive=10, k_negative=10, alpha = [1e-1,1e-1,1]): u1 = nn.Parameter(torch.zeros(1)) u2 = nn.Parameter(torch.zeros(1)) # optimizer = optim.Adam(dbm.parameters(), lr = lr, weight_decay = weight_decay) optimizer = optim.SGD(dbm.parameters(), lr = lr, momentum = 0.5) train_set = torch.utils.data.dataset.TensorDataset(input_data, torch.zeros(input_data.size()[0])) train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True) optimizer_u = optim.Adam([u1,u2], lr = lr/1000, weight_decay = weight_decay) for _ in range(epoch): print("training epoch %i with u1 = %.4f, u2 = %.4f"%(_, u1.data.numpy()[0], u2.data.numpy()[0])) for batch_idx, (data, target) in enumerate(train_loader): data = Variable(data) positive_phase, negative_phase= dbm(v_input = data, k_positive = k_positive, k_negative=k_negative, greedy = False) loss = energy(dbm = dbm, layer = positive_phase) - energy(dbm = dbm, layer = negative_phase)+alpha[0] * torch.norm(torch.norm(dbm.W[0],2,1)-u1.repeat(dbm.W[0].size()[0],1))**2 + alpha[1]*torch.norm(torch.norm(dbm.W[1],2,1)-u2.repeat(dbm.W[1].size()[0],1))**2 + alpha[2] * (u1 - u2)**2 loss.backward() optimizer.step() optimizer.zero_grad() optimizer_u.step() optimizer_u.zero_grad()
def get_opt(name): opts = { 'SGD': optim.SGD, 'Adam': optim.Adam, 'Adagrad': optim.Adagrad, 'RMSprop': optim.RMSprop, } return opts[name]
def pretrain(self, x, pt_epochs, verbose=True): n = x.data.size()[0] num_batches = n / self.batch_size t = x # Pre-train 1 autoencoder at a time for i, ae_re in enumerate(self.autoencoders_ref): # Get the current autoencoder ae = getattr(self.sequential, ae_re) # Getting encoded output from the previous autoencoder if i > 0: # Set the requires_grad to False so that backprop doesn't # travel all the way back to the previous autoencoder temp = Variable(torch.FloatTensor(n, ae.d_in), requires_grad=False) for k in range(num_batches): start, end = k * self.batch_size, (k + 1) * self.batch_size prev_ae = getattr(self.sequential, self.autoencoders_ref[i - 1]) temp.data[start:end] = prev_ae.encode(t[start:end], add_noise=False).data t = temp optimizer = SGD(ae.parameters(), lr=self.pre_lr) # Pre-training print("Pre-training Autoencoder:", i) for ep in range(pt_epochs): agg_cost = 0. for k in range(num_batches): start, end = k * self.batch_size, (k + 1) * self.batch_size bt = t[start:end] optimizer.zero_grad() z = ae.encode(bt, add_noise=True) z = ae.decode(z) loss = -torch.sum(bt * torch.log(z) + (1.0 - bt) * torch.log(1.0 - z), 1) cost = torch.mean(loss) cost.backward() optimizer.step() agg_cost += cost agg_cost /= num_batches if verbose: print("Pre-training Autoencoder:", i, "Epoch:", ep, "Cost:", agg_cost.data[0])
def finetune(self, train_X, train_y, valid_X, valid_y, valid_actual_size, ft_epochs, verbose=True): n = train_X.data.size()[0] num_batches = n / self.batch_size n_v = valid_X.data.size()[0] num_batches_v = n_v / self.batch_size optimizer = SGD(self.parameters(), lr=self.ft_lr) loss = torch.nn.NLLLoss() for ef in range(ft_epochs): agg_cost = 0 for k in range(num_batches): start, end = k * self.batch_size, (k + 1) * self.batch_size bX = train_X[start:end] by = train_y[start:end] optimizer.zero_grad() p = self.forward(bX) cost = loss.forward(p, by) agg_cost += cost cost.backward() optimizer.step() agg_cost /= num_batches preds = np.zeros((n_v, self.d_out)) # Calculate accuracy on Validation set for k in range(num_batches_v): start, end = k * self.batch_size, (k + 1) * self.batch_size bX = valid_X[start:end] p = self.forward(bX).data.numpy() preds[start:end] = p correct = 0 for actual, prediction in zip(valid_y[:valid_actual_size], preds[:valid_actual_size]): ind = np.argmax(prediction) actual = actual.data.numpy() if ind == actual: correct += 1 if verbose: print("Fine-tuning Epoch:", ef, "Cost:", agg_cost.data[0], "Validation Accuracy:", "{0:.4f}".format(correct / float(valid_actual_size)))
def train(model, db, args, bsz=32, eph=1, use_cuda=False): print("Training...") trainloader = data_utils.DataLoader(dataset=db, batch_size=bsz, shuffle=True) criterion = torch.nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9) best_loss = 100000 for epoch in range(eph): running_loss = 0.0 for i, data in enumerate(trainloader, 1): inputs, targets = data inputs = inputs.unsqueeze(1) targets = target_onehot_to_classnum_tensor(targets) if use_cuda and cuda_ava: inputs = Variable(inputs.float().cuda()) targets = Variable(targets.cuda()) else: inputs = Variable(inputs.float()) targets = Variable(targets) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() running_loss += loss.data[0] last_loss = loss.data[0] if i % 100 == 0: print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100)) running_loss = 0 if last_loss < best_loss: best_loss = last_loss acc = evaluate(model, trainloader, use_cuda) torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT1_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S")))) acc = evaluate(model, trainloader, use_cuda) torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT1_all_acc_{}.t7'.format(acc))) print("Finished Training!")
def __init__(self, input_file_name, output_file_name, emb_dimension=100, batch_size=100, window_size=5, iteration=5, initial_lr=0.025, min_count=5, using_hs=False, using_neg=False, context_size=2, hidden_size=128, cbow=None, skip_gram=None): print("\nInput File loading......\n") self.data = InputData(input_file_name, min_count) print("\nInput File loaded.\n") self.output_file_name = output_file_name self.emb_size = len(self.data.word2id) self.emb_dimension = emb_dimension self.batch_size = batch_size self.window_size = window_size self.iteration = iteration self.initial_lr = initial_lr self.context_size = context_size self.hidden_size = hidden_size self.using_hs = using_hs self.using_neg = using_neg self.cbow = cbow self.skip_gram = skip_gram if self.skip_gram is not None and self.skip_gram: self.skip_gram_model = SkipGramModel(self.emb_size, self.emb_dimension) print("skip_gram_model", self.skip_gram_model) self.optimizer = optim.SGD(self.skip_gram_model.parameters(), lr=self.initial_lr) if self.cbow is not None and self.cbow: self.cbow_model = CBOW(self.emb_size, self.emb_dimension) print("CBOW_model", self.cbow_model) self.optimizer = optim.SGD(self.cbow_model.parameters(), lr=self.initial_lr)
def __init__(self, opt, word_dict, feature_dict, state_dict=None): # Book-keeping. self.opt = opt self.word_dict = word_dict self.feature_dict = feature_dict self.updates = 0 self.train_loss = AverageMeter() # Building network. self.network = RnnDocReader(opt) if state_dict: new_state = set(self.network.state_dict().keys()) for k in list(state_dict['network'].keys()): if not k in new_state: del state_dict['network'][k] self.network.load_state_dict(state_dict['network']) # Building optimizer. parameters = [p for p in self.network.parameters() if p.requires_grad] if opt['optimizer'] == 'sgd': self.optimizer = optim.SGD(parameters, opt['learning_rate'], momentum=opt['momentum'], weight_decay=opt['weight_decay']) elif opt['optimizer'] == 'adamax': self.optimizer = optim.Adamax(parameters, weight_decay=opt['weight_decay']) else: raise RuntimeError('Unsupported optimizer: %s' % opt['optimizer'])
def __init__(self, opt, embedding=None, state_dict=None): # Book-keeping. self.opt = opt self.updates = state_dict['updates'] if state_dict else 0 self.train_loss = AverageMeter() # Building network. self.network = RnnDocReader(opt, embedding=embedding) if state_dict: new_state = set(self.network.state_dict().keys()) for k in list(state_dict['network'].keys()): if k not in new_state: del state_dict['network'][k] self.network.load_state_dict(state_dict['network']) # Building optimizer. parameters = [p for p in self.network.parameters() if p.requires_grad] if opt['optimizer'] == 'sgd': self.optimizer = optim.SGD(parameters, opt['learning_rate'], momentum=opt['momentum'], weight_decay=opt['weight_decay']) elif opt['optimizer'] == 'adamax': self.optimizer = optim.Adamax(parameters, opt['learning_rate'], weight_decay=opt['weight_decay']) else: raise RuntimeError('Unsupported optimizer: %s' % opt['optimizer']) if state_dict: self.optimizer.load_state_dict(state_dict['optimizer']) num_params = sum(p.data.numel() for p in parameters if p.data.data_ptr() != self.network.embedding.weight.data.data_ptr()) print ("{} parameters".format(num_params))
def _makeOptimizer(self): if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr) else: raise RuntimeError("Invalid optim method: " + self.method)
def test(nnName, dataName, CUDA_DEVICE, epsilon, temperature): net1 = torch.load("../models/{}.pth".format(nnName)) optimizer1 = optim.SGD(net1.parameters(), lr = 0, momentum = 0) net1.cuda(CUDA_DEVICE) if dataName != "Uniform" and dataName != "Gaussian": testsetout = torchvision.datasets.ImageFolder("../data/{}".format(dataName), transform=transform) testloaderOut = torch.utils.data.DataLoader(testsetout, batch_size=1, shuffle=False, num_workers=2) if nnName == "densenet10" or nnName == "wideresnet10": testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform) testloaderIn = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=2) if nnName == "densenet100" or nnName == "wideresnet100": testset = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform) testloaderIn = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=2) if dataName == "Gaussian": d.testGaussian(net1, criterion, CUDA_DEVICE, testloaderIn, testloaderIn, nnName, dataName, epsilon, temperature) m.metric(nnName, dataName) elif dataName == "Uniform": d.testUni(net1, criterion, CUDA_DEVICE, testloaderIn, testloaderIn, nnName, dataName, epsilon, temperature) m.metric(nnName, dataName) else: d.testData(net1, criterion, CUDA_DEVICE, testloaderIn, testloaderOut, nnName, dataName, epsilon, temperature) m.metric(nnName, dataName)
def train(self, corpus): """Entry point.""" N = len(corpus.word_dict) best_model, best_valid_select_loss = None, 1e100 lr = self.args.lr last_decay_epoch = 0 self.t = 0 validdata = corpus.valid_dataset(self.args.bsz, device_id=self.device_id) for epoch in range(1, self.args.max_epoch + 1): traindata = corpus.train_dataset(self.args.bsz, device_id=self.device_id) _, _, valid_select_loss = self.iter(N, epoch, lr, traindata, validdata) if valid_select_loss < best_valid_select_loss: best_valid_select_loss = valid_select_loss best_model = copy.deepcopy(self.model) if self.verbose: print('| start annealing | best validselectloss %.3f | best validselectppl %.3f' % ( best_valid_select_loss, np.exp(best_valid_select_loss))) self.model = best_model for epoch in range(self.args.max_epoch + 1, 100): if epoch - last_decay_epoch >= self.args.decay_every: last_decay_epoch = epoch lr /= self.args.decay_rate if lr < self.args.min_lr: break self.opt = optim.SGD(self.model.parameters(), lr=lr) traindata = corpus.train_dataset(self.args.bsz, device_id=self.device_id) train_loss, valid_loss, valid_select_loss = self.iter( N, epoch, lr, traindata, validdata) return train_loss, valid_loss, valid_select_loss
def __init__(self, model, args, name='Alice'): super(RlAgent, self).__init__(model, args, name=name) self.opt = optim.SGD( self.model.parameters(), lr=self.args.rl_lr, momentum=self.args.momentum, nesterov=(self.args.nesterov and self.args.momentum > 0)) self.all_rewards = [] if self.args.visual: self.model_plot = vis.ModulePlot(self.model, plot_weight=False, plot_grad=True) self.reward_plot = vis.Plot(['reward',], 'reward', 'reward') self.loss_plot = vis.Plot(['loss',], 'loss', 'loss') self.t = 0
def __init__(self,growthRate, depth, nClasses, epochs, t_0, scale_lr=True, how_scale = 'cubic',const_time=False, cfg=cfg['E'],batch_norm=True): super(DenseNet, self).__init__() self.epochs = epochs self.t_0 = t_0 self.scale_lr = scale_lr self.how_scale = how_scale self.const_time = const_time self.layer_index = 0 self.features = self.make_layers(cfg,batch_norm) self.classifier = nn.Sequential( nn.Linear(512, 512), nn.BatchNorm1d(512), nn.ReLU(True), nn.Dropout(), nn.Linear(512, 512), nn.BatchNorm1d(512), nn.ReLU(True), nn.BatchNorm1d(512), nn.Dropout(), nn.Linear(512, nClasses), ) self.classifier.layer_index = self.layer_index self.classifier.active = True self._initialize_weights() # Optimizer self.optim = optim.SGD([{'params':m.parameters(), 'lr':m.lr, 'layer_index':m.layer_index} for m in self.modules() if hasattr(m,'active')], nesterov=True,momentum=0.9, weight_decay=1e-4) # Iteration Counter self.j = 0 # A simple dummy variable that indicates we are using an iteration-wise # annealing scheme as opposed to epoch-wise. self.lr_sched = {'itr':0}
def set_parameters(self, params): self.params = list(params) # careful: params may be a generator if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr) else: raise RuntimeError("Invalid optim method: " + self.method)
def test_invalid_param_type(self): with self.assertRaises(TypeError): optim.SGD(Variable(torch.randn(5, 5)), lr=3)
def sgd(w, lr=0.1, m=0, damp=0, w_decay=0, nesterov=False): return nn.SGD(params=w, lr=lr, momentum=m, dampening=damp, weight_decay=w_decay, nesterov=nesterov)
def initialize(mode, is_gpu, dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker): if 'TORCHVISION_MEMORY' == mode: trainloader, testloader, li_class = make_dataloader_torchvison_memory( dir_data, di_set_transform, n_img_per_batch, n_worker) elif 'TORCHVISION_IMAGEFOLDER' == mode: trainloader, testloader, li_class = make_dataloader_torchvison_imagefolder( dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker) elif 'CUSTOM_MEMORY' == mode: trainloader, testloader, li_class = make_dataloader_custom_memory( dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker) elif 'CUSTOM_FILE' == mode: trainloader, testloader, li_class = make_dataloader_custom_file( dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker) else: trainloader, testloader, li_class = make_dataloader_custom_tensordataset( dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker) #net = Net().cuda() net = Net() #t1 = net.cuda() criterion = nn.CrossEntropyLoss() if is_gpu: net.cuda() criterion.cuda() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=1, patience = 8, epsilon=0.00001, min_lr=0.000001) # set up scheduler return trainloader, testloader, net, criterion, optimizer, scheduler, li_class
def _get_optimizer(self, model): if self.opt == 'MomentumSGD': optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) elif self.opt == "Adam": optimizer = optim.Adam(model.parameters()) return optimizer
def __init__(self, deep): super(Net, self).__init__() self.deep = deep if deep: self.fc1 = nn.Linear(28*28, 100); self.fc2 = nn.Linear(100, 100); self.fc3 = nn.Linear(100, 100); self.fc4 = nn.Linear(100, 100); self.fc5 = nn.Linear(100, 100); self.fc6 = nn.Linear(100, 100); self.fc7 = nn.Linear(100, 100); self.fc8 = nn.Linear(100, 100); self.fc9 = nn.Linear(100, 100); self.fc10 = nn.Linear(100, 100); self.fc11 = nn.Linear(100, 100); self.fc12 = nn.Linear(100, 100); self.fc13 = nn.Linear(100, 100); self.fc14 = nn.Linear(100, 100); self.fc15 = nn.Linear(100, 100); self.fc16 = nn.Linear(100, 100); self.fc17 = nn.Linear(100, 100); self.fc18 = nn.Linear(100, 100); self.fc19 = nn.Linear(100, 100); self.fc20 = nn.Linear(100, 10); self.fcs = [self.fc1, self.fc2, self.fc3, self.fc4, self.fc5, self.fc6, self.fc7, self.fc8, self.fc9, self.fc10, self.fc11, self.fc12, self.fc13, self.fc14, self.fc15, self.fc16, self.fc17, self.fc18, self.fc19, self.fc20] else: self.fc1 = nn.Linear(28*28, 150) self.fc2 = nn.Linear(150, 100) self.fc3 = nn.Linear(100, 50) self.fc4 = nn.Linear(50, 10) self.fc5 = nn.Linear(10, 10) self.optimizer = optim.SGD(self.parameters(), lr=args.lr, momentum=args.momentum) self.train_acc = [] self.test_acc = []
def add_layer(self): if self.training_c: self.training_c.requires_grad = False self.frozen_c.append(self.training_c) try: self.training_c = self.standby_c.pop(0) self.training_cf = self.standby_cf.pop(0) trainable_params = [{'params': self.training_c.parameters()}, {'params': self.training_cf.parameters()} ] self.optimizer = optim.SGD(trainable_params, lr=args.lr, momentum=args.momentum) except: print('No more standby layers!')
def setUp(self): self.net = SchedulerTestNet() self.opt = SGD( [{'params': self.net.conv1.parameters()}, {'params': self.net.conv2.parameters(), 'lr': 0.5}], lr=0.05)
def __init__(self, opt, embedding=None, state_dict=None): # Book-keeping. self.opt = opt self.updates = state_dict['updates'] if state_dict else 0 self.train_loss = AverageMeter() # Building network. self.network = RnnDocReader(opt, embedding=embedding) if state_dict: new_state = set(self.network.state_dict().keys()) for k in list(state_dict['network'].keys()): if k not in new_state: del state_dict['network'][k] self.network.load_state_dict(state_dict['network']) # Building optimizer. parameters = [p for p in self.network.parameters() if p.requires_grad] if opt['optimizer'] == 'sgd': self.optimizer = optim.SGD(parameters, opt['learning_rate'], momentum=opt['momentum'], weight_decay=opt['weight_decay']) elif opt['optimizer'] == 'adamax': self.optimizer = optim.Adamax(parameters, weight_decay=opt['weight_decay']) else: raise RuntimeError('Unsupported optimizer: %s' % opt['optimizer']) if state_dict: self.optimizer.load_state_dict(state_dict['optimizer'])
def _makeOptimizer(self): if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, betas=(0.5, 0.999)) else: raise RuntimeError("Invalid optim method: " + self.method)
def main(): if torch.cuda.is_available(): use_cuda = True else: use_cuda = False # Dataset trainset = datasets.MNIST('../../data', download=True,train=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])) train_loader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4) # Model model = Net() # NLLLoss nllloss = nn.NLLLoss() #CrossEntropyLoss = log_softmax + NLLLoss # CenterLoss loss_weight = 1.0 centerloss = CenterLoss(10,2,loss_weight) if use_cuda: nllloss = nllloss.cuda() centerloss = centerloss.cuda() model = model.cuda() criterion = [nllloss, centerloss] # optimzer4nn optimizer4nn = optim.SGD(model.parameters(),lr=0.001,momentum=0.9, weight_decay=0.0005) sheduler = lr_scheduler.StepLR(optimizer4nn,20,gamma=0.8) # optimzer4center optimzer4center = optim.SGD(centerloss.parameters(), lr =0.5) for epoch in range(50): sheduler.step() # print optimizer4nn.param_groups[0]['lr'] train(train_loader, model, criterion, [optimizer4nn, optimzer4center], epoch+1, use_cuda)
def adjust_learning_rate(optimizer): """Updates the learning rate given the learning rate decay. The routine has been implemented according to the original Lua SGD optimizer """ for group in optimizer.param_groups: if 'step' not in group: group['step'] = 0 group['step'] += 1 group['lr'] = args.lr / (1 + group['step'] * args.lr_decay)
def create_optimizer(model, new_lr): # setup optimizer if args.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=new_lr, momentum=0.9, dampening=0.9, weight_decay=args.wd) elif args.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=new_lr, weight_decay=args.wd, betas=(args.beta1, 0.999)) elif args.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=new_lr, lr_decay=args.lr_decay, weight_decay=args.wd) return optimizer