我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.optim.Adam()。
def build_model(self): """Builds a generator and a discriminator.""" self.g12 = G12(self.config, conv_dim=self.g_conv_dim) self.g21 = G21(self.config, conv_dim=self.g_conv_dim) self.d1 = D1(conv_dim=self.d_conv_dim) self.d2 = D2(conv_dim=self.d_conv_dim) g_params = list(self.g12.parameters()) + list(self.g21.parameters()) d_params = list(self.d1.parameters()) + list(self.d2.parameters()) self.g_optimizer = optim.Adam(g_params, self.lr, [self.beta1, self.beta2]) self.d_optimizer = optim.Adam(d_params, self.lr, [self.beta1, self.beta2]) if torch.cuda.is_available(): self.g12.cuda() self.g21.cuda() self.d1.cuda() self.d2.cuda()
def test_adam(self): self._test_rosenbrock( lambda params: optim.Adam(params, lr=1e-2), wrap_old_fn(old_optim.adam, learningRate=1e-2) ) self._test_rosenbrock( lambda params: optim.Adam(params, lr=1e-2, weight_decay=1e-2), wrap_old_fn(old_optim.adam, learningRate=1e-2, weightDecay=1e-2) ) self._test_basic_cases( lambda weight, bias: optim.Adam([weight, bias], lr=1e-3) ) self._test_basic_cases( lambda weight, bias: optim.Adam( self._build_params_dict(weight, bias, lr=1e-2), lr=1e-3) )
def build_model(self): """Builds a generator and a discriminator.""" self.g12 = G12(conv_dim=self.g_conv_dim) self.g21 = G21(conv_dim=self.g_conv_dim) self.d1 = D1(conv_dim=self.d_conv_dim, use_labels=self.use_labels) self.d2 = D2(conv_dim=self.d_conv_dim, use_labels=self.use_labels) g_params = list(self.g12.parameters()) + list(self.g21.parameters()) d_params = list(self.d1.parameters()) + list(self.d2.parameters()) self.g_optimizer = optim.Adam(g_params, self.lr, [self.beta1, self.beta2]) self.d_optimizer = optim.Adam(d_params, self.lr, [self.beta1, self.beta2]) if torch.cuda.is_available(): self.g12.cuda() self.g21.cuda() self.d1.cuda() self.d2.cuda()
def train(self): optimizer = O.Adam(self.model.parameters()) t = tqdm.tqdm() for epoch_id in range(self.epochs): for x, y in self.data_generator: if self.model.W.weight.is_cuda: x = x.cuda() y = y.cuda() optimizer.zero_grad() loss = self.run_loss(x, y) loss.backward() optimizer.step() loss_val = loss.data[0] t.set_description("loss: {}".format(loss_val)) t.update()
def __init__(self, args, mapping): super(CharLM, self).__init__() self.batch_size = args.batch_size self.seq_length = args.seq_length self.vocab_size = args.vocab_size self.embedding_dim = args.embedding_dim self.layer_num = args.layer_num self.dropout_prob = args.dropout_prob self.lr = args.lr self.char_embedding = nn.Embedding(self.vocab_size, self.embedding_dim) self.dropout = nn.Dropout(self.dropout_prob) self.lstm = nn.LSTM(input_size = self.embedding_dim, hidden_size = self.embedding_dim, num_layers= self.layer_num, dropout = self.dropout_prob) self.fc = nn.Linear(self.embedding_dim, self.vocab_size) self.optimizer = optim.Adam(self.parameters(), lr=self.lr) self.mapping = mapping
def __init__(self, args, attr_size, node_size): super(TreeLM, self).__init__() self.batch_size = args.batch_size self.seq_length = args.seq_length self.attr_size = attr_size self.node_size = node_size self.embedding_dim = args.embedding_dim self.layer_num = args.layer_num self.dropout_prob = args.dropout_prob self.lr = args.lr self.attr_embedding = nn.Embedding(self.attr_size, self.embedding_dim) self.dropout = nn.Dropout(self.dropout_prob) self.lstm = nn.LSTM(input_size = self.embedding_dim, hidden_size = self.embedding_dim, num_layers= self.layer_num, dropout = self.dropout_prob) self.fc = nn.Linear(self.embedding_dim, self.node_size) self.optimizer = optim.Adam(self.parameters(), lr=self.lr) # self.node_mapping = node_mapping
def fit_batch(self, premise_batch, hypothesis_batch, y_batch): if not hasattr(self, 'criterion'): self.criterion = nn.NLLLoss() if not hasattr(self, 'optimizer'): self.optimizer = optim.Adam(self.parameters(), lr=self.options['LR'], betas=(0.9, 0.999), eps=1e-08, weight_decay=self.options['L2']) self.optimizer.zero_grad() preds = self.__call__(premise_batch, hypothesis_batch, training=True) loss = self.criterion(preds, y_batch) loss.backward() self.optimizer.step() _, pred_labels = torch.max(preds, dim=-1, keepdim=True) y_true = self._get_numpy_array_from_variable(y_batch) y_pred = self._get_numpy_array_from_variable(pred_labels) acc = accuracy_score(y_true, y_pred) ret_loss = self._get_numpy_array_from_variable(loss)[0] return ret_loss, acc
def fit_batch(self, premise_batch, hypothesis_batch, y_batch): if not hasattr(self,'criterion'): self.criterion = nn.NLLLoss() if not hasattr(self, 'optimizer'): self.optimizer = optim.Adam(self.parameters(), lr=self.options['LR'], betas=(0.9, 0.999), eps=1e-08, weight_decay=self.options['L2']) self.optimizer.zero_grad() preds = self.__call__(premise_batch, hypothesis_batch, training= True) loss = self.criterion(preds, y_batch) loss.backward() self.optimizer.step() _, pred_labels = torch.max(preds, dim=-1, keepdim = True) y_true = self._get_numpy_array_from_variable(y_batch) y_pred = self._get_numpy_array_from_variable(pred_labels) acc = accuracy_score(y_true, y_pred) ret_loss = self._get_numpy_array_from_variable(loss)[0] return ret_loss, acc
def run_rmse_net(model, variables, X_train, Y_train): opt = optim.Adam(model.parameters(), lr=1e-3) for i in range(1000): opt.zero_grad() model.train() train_loss = nn.MSELoss()( model(variables['X_train_'])[0], variables['Y_train_']) train_loss.backward() opt.step() model.eval() test_loss = nn.MSELoss()( model(variables['X_test_'])[0], variables['Y_test_']) print(i, train_loss.data[0], test_loss.data[0]) model.eval() model.set_sig(variables['X_train_'], variables['Y_train_']) return model # TODO: minibatching
def get_optimizer(args, params): if args.dataset == 'mnist': if args.model == 'optnet-eq': params = list(params) A_param = params.pop(0) assert(A_param.size() == (args.neq, args.nHidden)) optimizer = optim.Adam([ {'params': params, 'lr': 1e-3}, {'params': [A_param], 'lr': 1e-1} ]) else: optimizer = optim.Adam(params) elif args.dataset in ('cifar-10', 'cifar-100'): if args.opt == 'sgd': optimizer = optim.SGD(params, lr=1e-1, momentum=0.9, weight_decay=args.weightDecay) elif args.opt == 'adam': optimizer = optim.Adam(params, weight_decay=args.weightDecay) else: assert(False) return optimizer
def train_epoch(self, X, y, show_bar=True): optimizer = optim.Adam(self.parameters()) if show_bar: bar = Progbar(len(X)) for ix, (elem, tags) in enumerate(zip(X, y)): self.zero_grad() sentence, feature_vector, sentence_markers = self.get_sentence_feature_vector(elem) if self.GPU: targets = torch.LongTensor(tags).cuda() else: targets = torch.LongTensor(tags) neg_log_likelihood = self.neg_log_likelihood(sentence, feature_vector, targets) neg_log_likelihood.backward() optimizer.step() if show_bar: bar.update(ix + 1) if show_bar: print '' sys.stdout.flush()
def test_kissgp_classification_error(): model = GPClassificationModel() # Find optimal model hyperparameters model.train() optimizer = optim.Adam(model.parameters(), lr=0.15) optimizer.n_iter = 0 for i in range(20): optimizer.zero_grad() output = model.forward(train_x) loss = -model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) assert(mean_abs_error.data.squeeze()[0] < 1e-5)
def test_kissgp_classification_error(): model = GPClassificationModel() # Find optimal model hyperparameters model.train() optimizer = optim.Adam(model.parameters(), lr=0.15) optimizer.n_iter = 0 for i in range(200): optimizer.zero_grad() output = model.forward(train_x) loss = -model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) assert(mean_abs_error.data.squeeze()[0] < 1e-5)
def test_kissgp_classification_error(): train_x, train_y = train_data() model = GPClassificationModel(train_x.data) # Find optimal model hyperparameters model.train() optimizer = optim.Adam(model.parameters(), lr=0.1) optimizer.n_iter = 0 for i in range(50): optimizer.zero_grad() output = model.forward(train_x) loss = -model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) assert(mean_abs_error.data.squeeze()[0] < 1e-5)
def test_kissgp_classification_error_cuda(): if torch.cuda.is_available(): train_x, train_y = train_data(cuda=True) model = GPClassificationModel(train_x.data).cuda() model.condition(train_x, train_y) # Find optimal model hyperparameters model.train() optimizer = optim.Adam(model.parameters(), lr=0.1) optimizer.n_iter = 0 for i in range(50): optimizer.zero_grad() output = model.forward(train_x) loss = -model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) assert(mean_abs_error.data.squeeze()[0] < 1e-5)
def test_kissgp_classification_error(): gpytorch.functions.use_toeplitz = False model = GPClassificationModel() # Find optimal model hyperparameters model.train() optimizer = optim.Adam(model.parameters(), lr=0.15) optimizer.n_iter = 0 for i in range(100): optimizer.zero_grad() output = model.forward(train_x) loss = -model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) gpytorch.functions.use_toeplitz = True assert(mean_abs_error.data.squeeze()[0] < 5e-2)
def test_spectral_mixture_gp_mean_abs_error(): gp_model = SpectralMixtureGPModel() # Optimize the model gp_model.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) optimizer.n_iter = 0 gpytorch.functions.fastest = False for i in range(50): optimizer.zero_grad() output = gp_model(train_x) loss = -gp_model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() gp_model.condition(train_x, train_y) test_preds = gp_model(test_x).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) # The spectral mixture kernel should be trivially able to extrapolate the sine function. assert(mean_abs_error.data.squeeze()[0] < 0.05)
def test_kissgp_gp_mean_abs_error(): gp_model = GPRegressionModel() # Optimize the model gp_model.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.2) optimizer.n_iter = 0 for i in range(20): optimizer.zero_grad() output = gp_model(train_x) loss = -gp_model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() gp_model.condition(train_x, train_y) test_preds = gp_model(test_x).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) assert(mean_abs_error.data.squeeze()[0] < 0.1)
def test_kissgp_gp_mean_abs_error_cuda(): if torch.cuda.is_available(): train_x, train_y, test_x, test_y = make_data(cuda=True) gp_model = GPRegressionModel().cuda() # Optimize the model gp_model.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) optimizer.n_iter = 0 for i in range(25): optimizer.zero_grad() output = gp_model(train_x) loss = -gp_model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() gp_model.condition(train_x, train_y) test_preds = gp_model(test_x).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) assert(mean_abs_error.data.squeeze()[0] < 0.02)
def __init__(self, model, criterion, opt, optimState): self.model = model self.criterion = criterion self.optimState = optimState if self.optimState == None: self.optimState = { 'learningRate' : opt.LR, 'learningRateDecay' : opt.LRDParam, 'momentum' : opt.momentum, 'nesterov' : False, 'dampening' : opt.dampening, 'weightDecay' : opt.weightDecay } self.opt = opt if opt.optimizer == 'SGD': self.optimizer = optim.SGD(model.parameters(), lr=opt.LR, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weightDecay) elif opt.optimizer == 'Adam': self.optimizer = optim.Adam(model.parameters(), lr=opt.LR, betas=(opt.momentum, 0.999), eps=1e-8, weight_decay=opt.weightDecay) self.logger = { 'train' : open(os.path.join(opt.resume, 'train.log'), 'a+'), 'val' : open(os.path.join(opt.resume, 'val.log'), 'a+') }
def __init__(self, model, criterion, opt, optimState): self.model = model self.criterion = criterion self.optimState = optimState if self.optimState == None: self.optimState = { 'learningRate' : opt.LR, 'learningRateDecay' : opt.LRDParam, 'momentum' : opt.momentum, 'nesterov' : False, 'dampening' : opt.dampening, 'weightDecay' : opt.weightDecay } self.opt = opt if opt.optimizer == 'SGD': self.optimizer = optim.SGD(model.parameters(), lr=opt.LR, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weightDecay) elif opt.optimizer == 'Adam': self.optimizer = optim.Adam(model.parameters(), lr=opt.LR, betas=(0.9,0.999), eps=1e-8, weight_decay=opt.weightDecay) self.logger = { 'train' : open(os.path.join(opt.resume, 'train.log'), 'a+'), 'val' : open(os.path.join(opt.resume, 'val.log'), 'a+') }
def __init__(self): super(Generator, self).__init__() self.main = nn.Sequential( nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False), nn.BatchNorm2d(ngf * 8), nn.ReLU(True), nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 4), nn.ReLU(True), nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 2), nn.ReLU(True), nn.ConvTranspose2d(ngf * 2, ngf * 1, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 1), nn.ReLU(True), nn.ConvTranspose2d(ngf * 1, nc, 4, 2, 1, bias=False), nn.Tanh() ) self.apply(weights_init) self.optimizer = optim.Adam(self.parameters(), lr=learning_rate, betas=(beta_1, beta_2)) #self.optimizer = optim.RMSprop(self.parameters(), lr=learning_rate, alpha=beta_2)
def __init__(self): super(Discriminator, self).__init__() self.main = nn.Sequential( nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 2), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 4), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 8), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), nn.Sigmoid() ) self.apply(weights_init) self.optimizer = optim.Adam(self.parameters(), lr=learning_rate, betas=(beta_1, beta_2)) #self.optimizer = optim.RMSprop(self.parameters(), lr=learning_rate, alpha=beta_2)
def __init__(self): super(Discriminator, self).__init__() self.conv0 = nn.Conv1d(nc, ndf, 4, 2, 1, bias=False) self.conv1 = nn.Conv1d(ndf, ndf * 2, 4, 2, 1, bias=False) self.conv2 = nn.Conv1d(ndf * 2, ndf * 4, 4, 2, 1, bias=False) self.conv3 = nn.Conv1d(ndf * 4, ndf * 8, 4, 2, 1, bias=False) self.fc0_size = 512 * 128 self.fc0 = nn.Linear(self.fc0_size, 100) self.relu = nn.LeakyReLU(0.2, inplace=True) self.bn1 = nn.BatchNorm1d(ndf * 2) self.bn2 = nn.BatchNorm1d(ndf * 4) self.bn3 = nn.BatchNorm1d(ndf * 8) self.sigmoid = nn.Sigmoid() self.apply(weights_init) self.optimizer = optim.Adam(self.parameters(), lr=learning_rate, betas=(beta_1, beta_2)) #self.optimizer = optim.RMSprop(self.parameters(), lr=learning_rate, alpha=beta_2)
def train(self, lr, iters, batch_size = 256): optimizer = optim.Adam(self.parameters(), lr=lr) t = trange(iters) for i in t: optimizer.zero_grad() inds = torch.floor(torch.rand(batch_size) * self.M).long().cuda() # bug: floor(rand()) sometimes gives 1 inds[inds >= self.M] = self.M - 1 inds = Variable(inds) loss = self.forward(inds) # print loss.data[0] t.set_description( str(loss.data[0]) ) loss.backward() optimizer.step() return self.state_model, self.goal_model
def train(self, lr, iters): optimizer = optim.Adam(self.parameters(), lr=lr) t = trange(iters) for i in t: optimizer.zero_grad() loss = self.forward( () ) # print loss.data[0] t.set_description( '%.3f | %.3f | %.3f | %.3f' % (self.mse, self.divergence, self.world_mse, self.location_mse) ) loss.backward() optimizer.step() U, V = self.__lookup() recon = torch.mm(U, V.t()) # print U, V, recon U = U.data.cpu().numpy() V = V.data.cpu().numpy() recon = recon.data.cpu().numpy() return U, V, recon
def generative_fine_tune(dbn, lr = 1e-2, epoch = 100, batch_size = 50, input_data = None, CD_k = 1, optimization_method = "Adam", momentum = 0, weight_decay = 0, test_input = None): if optimization_method == "RMSprop": optimizer = optim.RMSprop(dbn.parameters(), lr = lr, momentum = momentum, weight_decay = weight_decay) elif optimization_method == "SGD": optimizer = optim.SGD(dbn.parameters(), lr = lr, momentum = momentum, weight_decay = weight_decay) elif optimization_method == "Adam": optimizer = optim.Adam(dbn.parameters(), lr = lr, weight_decay = weight_decay) for i in dbn.parameters(): i.mean().backward() train_set = torch.utils.data.dataset.TensorDataset(input_data, torch.zeros(input_data.size()[0])) train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True) for i in range(epoch): for batch_idx, (data, target) in enumerate(train_loader): sleep_wake(dbn = dbn, optimizer = optimizer, lr = lr, CD_k = CD_k, v = data, batch_size = batch_size) if not (type(test_input) == type(None)): print("fine tune", i, ais_dbn.logp_ais(self, test_input, step = 1000, M_Z = 20, M_IS = 100, parallel = True))
def joint_train(dbm, lr = 1e-3, epoch = 100, batch_size = 50, input_data = None, weight_decay = 0, k_positive=10, k_negative=10, alpha = [1e-1,1e-1,1]): u1 = nn.Parameter(torch.zeros(1)) u2 = nn.Parameter(torch.zeros(1)) # optimizer = optim.Adam(dbm.parameters(), lr = lr, weight_decay = weight_decay) optimizer = optim.SGD(dbm.parameters(), lr = lr, momentum = 0.5) train_set = torch.utils.data.dataset.TensorDataset(input_data, torch.zeros(input_data.size()[0])) train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True) optimizer_u = optim.Adam([u1,u2], lr = lr/1000, weight_decay = weight_decay) for _ in range(epoch): print("training epoch %i with u1 = %.4f, u2 = %.4f"%(_, u1.data.numpy()[0], u2.data.numpy()[0])) for batch_idx, (data, target) in enumerate(train_loader): data = Variable(data) positive_phase, negative_phase= dbm(v_input = data, k_positive = k_positive, k_negative=k_negative, greedy = False) loss = energy(dbm = dbm, layer = positive_phase) - energy(dbm = dbm, layer = negative_phase)+alpha[0] * torch.norm(torch.norm(dbm.W[0],2,1)-u1.repeat(dbm.W[0].size()[0],1))**2 + alpha[1]*torch.norm(torch.norm(dbm.W[1],2,1)-u2.repeat(dbm.W[1].size()[0],1))**2 + alpha[2] * (u1 - u2)**2 loss.backward() optimizer.step() optimizer.zero_grad() optimizer_u.step() optimizer_u.zero_grad()
def train(rbm, lr = 1e-3, epoch = 100, batch_size = 50, input_data = None, weight_decay = 0, L1_penalty = 0, test_set = None, CD_k = 10): train_set = torch.utils.data.dataset.TensorDataset(input_data, torch.zeros(input_data.size()[0])) train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True) optimizer = optim.Adam(rbm.parameters(), lr = lr, weight_decay = weight_decay) for i in range(epoch): for batch_idx, (data, target) in enumerate(train_loader): input_data = Variable(data) v, v_ = rbm(input_data, CD_k = CD_k) loss = rbm.free_energy(v) - rbm.free_energy(v_.detach()) loss.backward() optimizer.step() optimizer.zero_grad() if not type(test_set) == type(None): print("epoch %i: "%i, reconstruct_error(rbm, Variable(test_set)))
def get_opt(name): opts = { 'SGD': optim.SGD, 'Adam': optim.Adam, 'Adagrad': optim.Adagrad, 'RMSprop': optim.RMSprop, } return opts[name]
def train_epochs(model, loss_fn, init_lr, model_dir): if os.path.exists(model_dir): shutil.rmtree(model_dir) os.makedirs(model_dir) optimizer = optim.Adam(model.parameters(), lr = init_lr) # setup the optimizer learning_rate = init_lr max_iter = 5 start_halfing_iter = 2 halfing_factor = 0.1 count = 0 half_flag = False while count < max_iter: count += 1 if count >= start_halfing_iter: half_flag = True print ("Starting epoch", count) if half_flag: learning_rate *= halfing_factor adjust_learning_rate(optimizer, halfing_factor) # decay learning rate model_path = model_dir + '/epoch' + str(count) + '_lr' + str(learning_rate) + '.pkl' train_one_epoch(model, loss_fn, optimizer) # train one epoch torch.save(model.state_dict(), model_path) print ("End training")
def __init__(self, train, valid, test, config): # fix seed self.seed = config['seed'] np.random.seed(self.seed) torch.manual_seed(self.seed) torch.cuda.manual_seed(self.seed) self.train = train self.valid = valid self.test = test self.imgdim = len(train['imgfeat'][0]) self.sentdim = len(train['sentfeat'][0]) self.projdim = config['projdim'] self.margin = config['margin'] self.batch_size = 128 self.ncontrast = 30 self.maxepoch = 20 self.early_stop = True config_model = {'imgdim': self.imgdim,'sentdim': self.sentdim, 'projdim': self.projdim} self.model = COCOProjNet(config_model).cuda() self.loss_fn = PairwiseRankingLoss(margin=self.margin).cuda() self.optimizer = optim.Adam(self.model.parameters())
def __init__(self, train, valid, test, devscores, config): # fix seed np.random.seed(config['seed']) torch.manual_seed(config['seed']) assert torch.cuda.is_available(), 'torch.cuda required for Relatedness' torch.cuda.manual_seed(config['seed']) self.train = train self.valid = valid self.test = test self.devscores = devscores self.inputdim = train['X'].shape[1] self.nclasses = config['nclasses'] self.seed = config['seed'] self.l2reg = 0. self.batch_size = 64 self.maxepoch = 1000 self.early_stop = True self.model = nn.Sequential( nn.Linear(self.inputdim, self.nclasses), nn.Softmax(), ) self.loss_fn = nn.MSELoss() if torch.cuda.is_available(): self.model = self.model.cuda() self.loss_fn = self.loss_fn.cuda() self.loss_fn.size_average = False self.optimizer = optim.Adam(self.model.parameters(), weight_decay=self.l2reg)
def get_optimizer(encoder, decoder, step=None, state=None, lr=0.0001): encoder_optimizer = optim.Adam(encoder.parameters(), lr=lr) decoder_optimizer = optim.Adam(decoder.parameters(), lr=lr) if not state: state = load_state(step) if state: encoder_optimizer.load_state_dict(state['encoder_optim']) decoder_optimizer.load_state_dict(state['decoder_optim']) return encoder_optimizer, decoder_optimizer
def __init__(self): if use_cuda: self.encoder = EncoderRNN().cuda() self.decoder = DecoderRNN().cuda() else: self.encoder = EncoderRNN() self.decoder = DecoderRNN() self.encoder_optimizer = optim.Adam(self.encoder.parameters(), hp.lr) self.decoder_optimizer = optim.Adam(self.decoder.parameters(), hp.lr) self.eta_step = hp.eta_min
def train(self): optimizer = O.Adam([p for p in self.model.parameters() if p.requires_grad]) step = 0 t = tqdm.tqdm() for epoch in range(self.n_epochs): for data in self.data_generator: step += 1 optimizer.zero_grad() if step % self.val_period == 0: loss_b, loss_s = self.step_val(step, data) else: loss_b, loss_s = self.step_train(step, data) loss_b.backward() clip_grad_norm(self.model.parameters(), 10) optimizer.step() loss_val = loss_s.data[0] if step % self.save_period == 0: filename = self.ckpt_format.format( epoch="{:02d}".format(epoch), step="{:07d}".format(step), loss="{:.4f}".format(loss_val) ) self.save(filename) t.set_description("[{}|{}]: loss={:.4f}".format( epoch, step, loss_val )) t.update()
def build_model(self): """Build generator and discriminator.""" self.generator = Generator(z_dim=self.z_dim, image_size=self.image_size, conv_dim=self.g_conv_dim) self.discriminator = Discriminator(image_size=self.image_size, conv_dim=self.d_conv_dim) self.g_optimizer = optim.Adam(self.generator.parameters(), self.lr, [self.beta1, self.beta2]) self.d_optimizer = optim.Adam(self.discriminator.parameters(), self.lr, [self.beta1, self.beta2]) if torch.cuda.is_available(): self.generator.cuda() self.discriminator.cuda()
def _makeOptimizer(self): if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr) else: raise RuntimeError("Invalid optim method: " + self.method)
def get_optimizer(net, name="Adam"): """Get optimizer by name.""" if name == "Adam": return optim.Adam(net.parameters(), lr=params.learning_rate, betas=(params.beta1, params.beta2))
def choose_optimizer(args, model): if args.optim =='adam': return optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd) elif args.optim=='adagrad': # optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd) return optim.Adagrad([ {'params': model.parameters(), 'lr': args.lr} ], lr=args.lr, weight_decay=args.wd)
def __init__(self, args): super(RN, self).__init__(args, 'RN') self.conv = ConvInputModel() ##(number of filters per object+coordinate of object)*2+question vector self.g_fc1 = nn.Linear((24+2)*2+11, 256) self.g_fc2 = nn.Linear(256, 256) self.g_fc3 = nn.Linear(256, 256) self.g_fc4 = nn.Linear(256, 256) self.f_fc1 = nn.Linear(256, 256) self.coord_oi = torch.FloatTensor(args.batch_size, 2) self.coord_oj = torch.FloatTensor(args.batch_size, 2) if args.cuda: self.coord_oi = self.coord_oi.cuda() self.coord_oj = self.coord_oj.cuda() self.coord_oi = Variable(self.coord_oi) self.coord_oj = Variable(self.coord_oj) # prepare coord tensor def cvt_coord(i): return [(i/5-2)/2., (i%5-2)/2.] self.coord_tensor = torch.FloatTensor(args.batch_size, 25, 2) if args.cuda: self.coord_tensor = self.coord_tensor.cuda() self.coord_tensor = Variable(self.coord_tensor) np_coord_tensor = np.zeros((args.batch_size, 25, 2)) for i in range(25): np_coord_tensor[:,i,:] = np.array( cvt_coord(i) ) self.coord_tensor.data.copy_(torch.from_numpy(np_coord_tensor)) self.fcout = FCOutputModel() self.optimizer = optim.Adam(self.parameters(), lr=args.lr)
def __init__(self, args): super(CNN_MLP, self).__init__(args, 'CNNMLP') self.conv = ConvInputModel() self.fc1 = nn.Linear(5*5*24 + 11, 256) # question concatenated to all self.fcout = FCOutputModel() self.optimizer = optim.Adam(self.parameters(), lr=args.lr) #print([ a for a in self.parameters() ] )