我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.nn.Embedding()。
def __init__(self, config): super(SNLIClassifier, self).__init__() self.config = config self.embed = nn.Embedding(config.n_embed, config.d_embed) self.projection = Linear(config.d_embed, config.d_proj) self.embed_bn = BatchNorm(config.d_proj) self.embed_dropout = nn.Dropout(p=config.embed_dropout) self.encoder = SPINN(config) if config.spinn else Encoder(config) feat_in_size = config.d_hidden * ( 2 if self.config.birnn and not self.config.spinn else 1) self.feature = Feature(feat_in_size, config.mlp_dropout) self.mlp_dropout = nn.Dropout(p=config.mlp_dropout) self.relu = nn.ReLU() mlp_in_size = 4 * feat_in_size mlp = [nn.Linear(mlp_in_size, config.d_mlp), self.relu, nn.BatchNorm1d(config.d_mlp), self.mlp_dropout] for i in range(config.n_mlp_layers - 1): mlp.extend([nn.Linear(config.d_mlp, config.d_mlp), self.relu, nn.BatchNorm1d(config.d_mlp), self.mlp_dropout]) mlp.append(nn.Linear(config.d_mlp, config.d_out)) self.out = nn.Sequential(*mlp)
def __init__(self, args): super(CNN_Text,self).__init__() self.args = args V = args.embed_num D = args.embed_dim C = args.class_num Ci = 1 Co = args.kernel_num Ks = args.kernel_sizes self.embed = nn.Embedding(V, D) #self.convs1 = [nn.Conv2d(Ci, Co, (K, D)) for K in Ks] self.convs1 = nn.ModuleList([nn.Conv2d(Ci, Co, (K, D)) for K in Ks]) ''' self.conv13 = nn.Conv2d(Ci, Co, (3, D)) self.conv14 = nn.Conv2d(Ci, Co, (4, D)) self.conv15 = nn.Conv2d(Ci, Co, (5, D)) ''' self.dropout = nn.Dropout(args.dropout) self.fc1 = nn.Linear(len(Ks)*Co, C)
def __init__( self, n_src_vocab, n_max_seq, n_layers=6, n_head=8, d_k=64, d_v=64, d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1): super(Encoder, self).__init__() n_position = n_max_seq + 1 self.n_max_seq = n_max_seq self.d_model = d_model self.position_enc = nn.Embedding(n_position, d_word_vec, padding_idx=Constants.PAD) self.position_enc.weight.data = position_encoding_init(n_position, d_word_vec) self.src_word_emb = nn.Embedding(n_src_vocab, d_word_vec, padding_idx=Constants.PAD) self.layer_stack = nn.ModuleList([ EncoderLayer(d_model, d_inner_hid, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers)])
def __init__( self, n_tgt_vocab, n_max_seq, n_layers=6, n_head=8, d_k=64, d_v=64, d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1): super(Decoder, self).__init__() n_position = n_max_seq + 1 self.n_max_seq = n_max_seq self.d_model = d_model self.position_enc = nn.Embedding( n_position, d_word_vec, padding_idx=Constants.PAD) self.position_enc.weight.data = position_encoding_init(n_position, d_word_vec) self.tgt_word_emb = nn.Embedding( n_tgt_vocab, d_word_vec, padding_idx=Constants.PAD) self.dropout = nn.Dropout(dropout) self.layer_stack = nn.ModuleList([ DecoderLayer(d_model, d_inner_hid, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers)])
def __init__(self, args, mapping): super(CharLM, self).__init__() self.batch_size = args.batch_size self.seq_length = args.seq_length self.vocab_size = args.vocab_size self.embedding_dim = args.embedding_dim self.layer_num = args.layer_num self.dropout_prob = args.dropout_prob self.lr = args.lr self.char_embedding = nn.Embedding(self.vocab_size, self.embedding_dim) self.dropout = nn.Dropout(self.dropout_prob) self.lstm = nn.LSTM(input_size = self.embedding_dim, hidden_size = self.embedding_dim, num_layers= self.layer_num, dropout = self.dropout_prob) self.fc = nn.Linear(self.embedding_dim, self.vocab_size) self.optimizer = optim.Adam(self.parameters(), lr=self.lr) self.mapping = mapping
def __init__(self, args, attr_size, node_size): super(TreeLM, self).__init__() self.batch_size = args.batch_size self.seq_length = args.seq_length self.attr_size = attr_size self.node_size = node_size self.embedding_dim = args.embedding_dim self.layer_num = args.layer_num self.dropout_prob = args.dropout_prob self.lr = args.lr self.attr_embedding = nn.Embedding(self.attr_size, self.embedding_dim) self.dropout = nn.Dropout(self.dropout_prob) self.lstm = nn.LSTM(input_size = self.embedding_dim, hidden_size = self.embedding_dim, num_layers= self.layer_num, dropout = self.dropout_prob) self.fc = nn.Linear(self.embedding_dim, self.node_size) self.optimizer = optim.Adam(self.parameters(), lr=self.lr) # self.node_mapping = node_mapping
def __init__(self, args): super(GRU, self).__init__() self.args = args # print(args) self.hidden_dim = args.lstm_hidden_dim self.num_layers = args.lstm_num_layers V = args.embed_num D = args.embed_dim C = args.class_num # self.embed = nn.Embedding(V, D, max_norm=args.max_norm) self.embed = nn.Embedding(V, D) # word embedding if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # gru self.gru = nn.GRU(D, self.hidden_dim, dropout=args.dropout, num_layers=self.num_layers) # linear self.hidden2label = nn.Linear(self.hidden_dim, C) # hidden self.hidden = self.init_hidden(self.num_layers, args.batch_size) # dropout self.dropout = nn.Dropout(args.dropout)
def __init__(self, args): super(CNN_Text,self).__init__() self.args = args V = args.embed_num D = args.embed_dim C = args.class_num Ci = 1 Co = args.kernel_num Ks = args.kernel_sizes self.embed = nn.Embedding(V, D) # print("aaaaaaaa", self.embed.weight) pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # print("bbbbbbbb", self.embed.weight) self.convs1 = [nn.Conv2d(Ci, Co, (K, D)) for K in Ks] ''' self.conv13 = nn.Conv2d(Ci, Co, (3, D)) self.conv14 = nn.Conv2d(Ci, Co, (4, D)) self.conv15 = nn.Conv2d(Ci, Co, (5, D)) ''' self.dropout = nn.Dropout(args.dropout) self.fc1 = nn.Linear(len(Ks)*Co, C)
def __init__(self, args): super(BiGRU, self).__init__() self.args = args # print(args) self.hidden_dim = args.lstm_hidden_dim self.num_layers = args.lstm_num_layers V = args.embed_num D = args.embed_dim C = args.class_num # self.embed = nn.Embedding(V, D, max_norm=args.max_norm) self.embed = nn.Embedding(V, D) # word embedding if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # gru self.bigru = nn.GRU(D, self.hidden_dim, dropout=args.dropout, num_layers=self.num_layers, bidirectional=True) # linear self.hidden2label = nn.Linear(self.hidden_dim * 2, C) # hidden self.hidden = self.init_hidden(self.num_layers, args.batch_size) # dropout self.dropout = nn.Dropout(args.dropout)
def __init__(self, embeddings, num_layers): """ embeddings should be a torch tensor, of dimension max_idx - 1 x num_hidden we'll derive num_hidden from the second dimension of embeddings """ super().__init__() self.num_hidden = embeddings.shape[1] self.num_layers = num_layers self.embedding = nn.Embedding( embeddings.shape[0], self.num_hidden ) self.embedding.weight.data = embeddings self.embedding.weight.requires_grad = False self.lstm = nn.LSTM( input_size=self.num_hidden, hidden_size=self.num_hidden, num_layers=num_layers) self.initial_state = None self.initial_cell = None self.linear = nn.Linear(self.num_hidden, 1) # modeling as a regression
def __init__(self, embeddings, num_layers, pad_id): super().__init__() self.num_hidden = embeddings.shape[1] self.num_layers = num_layers self.embedding = nn.Embedding( embeddings.shape[0], self.num_hidden ) self.embedding.weight.data = embeddings self.embedding.weight.requires_grad = False self.lstm = nn.LSTM( input_size=self.num_hidden, hidden_size=self.num_hidden, num_layers=num_layers, bidirectional=True) self.initial_state = None self.initial_cell = None # self.rationales = None self.linear = nn.Linear(self.num_hidden * 2, 1) self.pad_id = pad_id
def __init__(self, dictionary, encoder_embed_dim=512, embed_dim=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True): super().__init__() self.dictionary = dictionary self.dropout_in = dropout_in self.dropout_out = dropout_out num_embeddings = len(dictionary) padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) self.layers = nn.ModuleList([ LSTMCell(encoder_embed_dim + embed_dim if layer == 0 else embed_dim, embed_dim) for layer in range(num_layers) ]) self.attention = AttentionLayer(encoder_embed_dim, embed_dim) if embed_dim != out_embed_dim: self.additional_fc = Linear(embed_dim, out_embed_dim) self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def __init__(self, dictionary, embed_dim=512, max_positions=1024, convolutions=((512, 3),) * 20, dropout=0.1): super().__init__() self.dictionary = dictionary self.dropout = dropout self.num_attention_layers = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) self.embed_positions = Embedding(max_positions, embed_dim, padding_idx) in_channels = convolutions[0][0] self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) self.projections = nn.ModuleList() self.convolutions = nn.ModuleList() for (out_channels, kernel_size) in convolutions: pad = (kernel_size - 1) / 2 self.projections.append(Linear(in_channels, out_channels) if in_channels != out_channels else None) self.convolutions.append( ConvTBC(in_channels, out_channels * 2, kernel_size, padding=pad, dropout=dropout)) in_channels = out_channels self.fc2 = Linear(in_channels, embed_dim)
def __init__(self, vocab_size, tag_to_ix, embedding_dim, hidden_dim): super(BiLSTM_CRF, self).__init__() self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size self.tag_to_ix = tag_to_ix self.tagset_size = len(tag_to_ix) self.word_embeds = nn.Embedding(vocab_size, embedding_dim, padding_idx = 0) self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2, num_layers=1, bidirectional=True) # Maps the output of the LSTM into tag space. self.hidden2tag = nn.Linear(hidden_dim, self.tagset_size) # Matrix of transition parameters. Entry i,j is the score of # transitioning *to* i *from* j. self.transitions = nn.Parameter( torch.randn(self.tagset_size, self.tagset_size)) self.hidden = self.init_hidden()
def __init__(self, vocab_dict, dropout_rate, embed_dim, hidden_dim, bidirectional=True): super(AoAReader, self).__init__() self.vocab_dict = vocab_dict self.hidden_dim = hidden_dim self.embed_dim = embed_dim self.dropout_rate = dropout_rate self.embedding = nn.Embedding(vocab_dict.size(), self.embed_dim, padding_idx=Constants.PAD) self.embedding.weight.data.uniform_(-0.05, 0.05) input_size = self.embed_dim self.gru = nn.GRU(input_size, hidden_size=self.hidden_dim, dropout=dropout_rate, bidirectional=bidirectional, batch_first=True) # try independent gru #self.query_gru = nn.GRU(input_size, hidden_size=self.hidden_dim, dropout=dropout_rate, # bidirectional=bidirectional, batch_first=True) for weight in self.gru.parameters(): if len(weight.size()) > 1: weigth_init.orthogonal(weight.data)
def __init__(self, encoder_vocab_size=100, decoder_vocab_size=100, wordvec_dim=300, hidden_dim=256, rnn_num_layers=2, rnn_dropout=0, null_token=0, start_token=1, end_token=2, encoder_embed=None ): super(Seq2Seq, self).__init__() self.encoder_embed = nn.Embedding(encoder_vocab_size, wordvec_dim) self.encoder_rnn = nn.LSTM(wordvec_dim, hidden_dim, rnn_num_layers, dropout=rnn_dropout, batch_first=True) self.decoder_embed = nn.Embedding(decoder_vocab_size, wordvec_dim) self.decoder_rnn = nn.LSTM(wordvec_dim + hidden_dim, hidden_dim, rnn_num_layers, dropout=rnn_dropout, batch_first=True) self.decoder_linear = nn.Linear(hidden_dim, decoder_vocab_size) self.NULL = null_token self.START = start_token self.END = end_token self.multinomial_outputs = None
def __init__(self, opt, dicts): self.layers = opt.layers self.num_directions = 2 if opt.brnn else 1 assert opt.rnn_size % self.num_directions == 0 self.hidden_size = opt.rnn_size // self.num_directions input_size = opt.word_vec_size super(Encoder, self).__init__() self.word_lut = nn.Embedding(dicts.size(), opt.word_vec_size, padding_idx=onmt.Constants.PAD) self.rnn = getattr(nn, opt.rnn_type)( input_size, self.hidden_size, num_layers=opt.layers, dropout=opt.dropout, bidirectional=opt.brnn)
def __init__(self, opt, dicts): self.layers = opt.layers self.input_feed = opt.input_feed input_size = opt.word_vec_size if self.input_feed: input_size += opt.rnn_size super(Decoder, self).__init__() self.word_lut = nn.Embedding(dicts.size(), opt.word_vec_size, padding_idx=onmt.Constants.PAD) stackedCell = StackedLSTM if opt.rnn_type == "LSTM" else StackedGRU self.rnn = stackedCell(opt.layers, input_size, opt.rnn_size, opt.dropout) self.attn = onmt.modules.GlobalAttention(opt.rnn_size) self.context_gate = None if opt.context_gate is not None: self.context_gate = ContextGateFactory( opt.context_gate, opt.word_vec_size, opt.rnn_size, opt.rnn_size, opt.rnn_size ) self.dropout = nn.Dropout(opt.dropout) self.hidden_size = opt.rnn_size
def __init__(self, opt, dicts): self.layers = opt.layers self.input_feed = opt.input_feed input_size = opt.word_vec_size if self.input_feed: input_size += opt.rnn_size super(Decoder, self).__init__() self.word_lut = nn.Embedding(dicts.size(), opt.word_vec_size, padding_idx=onmt.Constants.PAD) self.rnn = StackedLSTM(opt.layers, input_size, opt.rnn_size, opt.dropout) self.attn = onmt.modules.GlobalAttention(opt.rnn_size) self.dropout = nn.Dropout(opt.dropout) self.hidden_size = opt.rnn_size
def __init__(self, opt): self.name = "TextLstm" super(TextLSTM, self).__init__() self.opt = opt self.embedding = nn.Embedding(opt.vocab_size, opt.embed_dim) self.lstm = nn.LSTM(input_size=opt.embed_dim, hidden_size=opt.hidden_size, num_layers=1, batch_first=True, bidirectional=False) self.linears = nn.Sequential( nn.Linear(opt.hidden_size, opt.linear_hidden_size), nn.ReLU(), nn.Dropout(0.25), nn.Linear(opt.linear_hidden_size, opt.num_classes), # nn.Softmax() ) if opt.embedding_path: self.embedding.weight.data.copy_(torch.from_numpy(np.load(opt.embedding_path))) # # self.embedding.weight.requires_grad = False
def __init__(self, args): super(TextCNN, self).__init__() self.args = args V = args.vocab_size D = args.embed_dim C = args.num_classes Cin = 1 Cout = args.kernel_num Ks = args.kernel_sizes self.embeding = nn.Embedding(V, D) self.convs = nn.ModuleList([nn.Conv2d(Cin, Cout, (K, D)) for K in Ks]) self.dropout = nn.Dropout(args.dropout) self.fc = nn.Linear(len(Ks)*Cout, C)
def __init__(self, opt ): super(CNNTextInception, self).__init__() incept_dim=opt.inception_dim self.model_name = 'CNNTextInception' self.opt=opt self.encoder = nn.Embedding(opt.vocab_size,opt.embedding_dim) self.title_conv=nn.Sequential( Inception(opt.embedding_dim,incept_dim),#(batch_size,64,opt.title_seq_len)->(batch_size,32,(opt.title_seq_len)/2) Inception(incept_dim,incept_dim), Inception(incept_dim,incept_dim), nn.MaxPool1d(opt.title_seq_len) ) self.content_conv=nn.Sequential( Inception(opt.embedding_dim,incept_dim),#(batch_size,64,opt.content_seq_len)->(batch_size,64,(opt.content_seq_len)/2) #Inception(incept_dim,incept_dim),#(batch_size,64,opt.content_seq_len/2)->(batch_size,32,(opt.content_seq_len)/4) Inception(incept_dim,incept_dim), Inception(incept_dim,incept_dim), nn.MaxPool1d(opt.content_seq_len) ) self.fc = nn.Sequential( nn.Linear(incept_dim*2,opt.linear_hidden_size), nn.BatchNorm1d(opt.linear_hidden_size), nn.ReLU(inplace=True), nn.Linear(opt.linear_hidden_size,opt.num_classes) )
def __init__(self, opt ): super(CNNText, self).__init__() self.model_name = 'CNNText' self.opt=opt self.encoder = nn.Embedding(opt.vocab_size,opt.embedding_dim) self.title_conv = nn.Sequential( nn.Conv1d(in_channels = opt.embedding_dim, out_channels = opt.title_dim, kernel_size = opt.kernel_size), nn.ReLU(), nn.MaxPool1d(kernel_size = (opt.title_seq_len - opt.kernel_size + 1)) ) self.content_conv = nn.Sequential( nn.Conv1d(in_channels = opt.embedding_dim, out_channels = opt.content_dim, kernel_size = opt.kernel_size), nn.ReLU(), nn.MaxPool1d(kernel_size = (opt.content_seq_len - opt.kernel_size + 1)) ) self.fc = nn.Linear(opt.title_dim+opt.content_dim, opt.num_classes) if opt.embedding_path: self.encoder.weight.data.copy_(t.from_numpy(np.load(opt.embedding_path)['vector']))
def __init__(self): super(Chunking, self).__init__() self.input_size = embedding_size \ + nb_postags \ + postag_hn_size * 2 self.w = nn.Parameter(torch.randn(chunking_nb_layers * 2, max_sentence_size, chunking_hn_size)) self.h = nn.Parameter(torch.randn(chunking_nb_layers * 2, max_sentence_size, chunking_hn_size)) self.embedding = nn.Embedding(nb_postags, chunking_postag_emb_size) self.aux_emb = torch.arange(0, nb_postags) self.aux_emb = Variable(self.aux_emb).long() self.bi_lstm = nn.LSTM(self.input_size, chunking_hn_size, chunking_nb_layers, bidirectional=True) self.fc = nn.Linear(chunking_hn_size * 2, nb_chunktags)
def __init__(self, vocab_size, hidden_size=512, embedding_size=None, num_layers=6, num_heads=8, inner_linear=1024, mask_symbol=PAD, dropout=0): super(TransformerAttentionEncoder, self).__init__() embedding_size = embedding_size or hidden_size self.hidden_size = hidden_size self.batch_first = True self.mask_symbol = mask_symbol self.embedder = nn.Embedding( vocab_size, embedding_size, padding_idx=PAD) self.scale_embedding = hidden_size ** 0.5 self.dropout = nn.Dropout(dropout, inplace=True) self.blocks = nn.ModuleList([EncoderBlock(hidden_size, num_heads, inner_linear, dropout) for _ in range(num_layers) ])
def __init__(self, vocab_size, hidden_size=512, embedding_size=None, num_layers=6, num_heads=8, dropout=0, inner_linear=1024, mask_symbol=PAD, tie_embedding=True): super(TransformerAttentionDecoder, self).__init__() embedding_size = embedding_size or hidden_size self.batch_first = True self.mask_symbol = mask_symbol self.embedder = nn.Embedding( vocab_size, embedding_size, padding_idx=PAD) self.scale_embedding = hidden_size ** 0.5 self.dropout = nn.Dropout(dropout, inplace=True) self.blocks = nn.ModuleList([DecoderBlock(hidden_size, num_heads, inner_linear, dropout) for _ in range(num_layers) ]) self.classifier = nn.Linear(hidden_size, vocab_size) if tie_embedding: self.embedder.weight = self.classifier.weight
def __init__(self, vocab_size, hidden_size=128, num_layers=1, bias=True, batch_first=False, forget_bias=None, dropout=0, mode='LSTM', residual=False, tie_embedding=True): super(RecurrentDecoder, self).__init__() self.layers = num_layers self.hidden_size = hidden_size self.batch_first = batch_first embedding_size = hidden_size self.embedder = nn.Embedding(vocab_size, embedding_size, sparse=True, padding_idx=PAD) self.rnn = Recurrent(mode, embedding_size, self.hidden_size, num_layers=num_layers, bias=bias, forget_bias=forget_bias, batch_first=batch_first, residual=residual, dropout=dropout, bidirectional=False) self.classifier = nn.Linear(hidden_size, vocab_size) if tie_embedding: self.classifier.weight = self.embedder.weight
def __init__(self, vocab_size, context_size, hidden_size=128, embedding_size=None, num_layers=1, bias=True, forget_bias=None, batch_first=False, dropout=0, tie_embedding=False, residual=False, attention=None, concat_attention=True, num_pre_attention_layers=None): super(RecurrentAttentionDecoder, self).__init__() embedding_size = embedding_size or hidden_size attention = attention or {} self.layers = num_layers self.batch_first = batch_first self.embedder = nn.Embedding(vocab_size, embedding_size, sparse=True, padding_idx=PAD) self.rnn = RecurrentAttention(hidden_size, context_size, hidden_size, num_layers=num_layers, bias=bias, batch_first=batch_first, dropout=dropout, forget_bias=forget_bias, residual=residual, attention=attention, concat_attention=concat_attention, num_pre_attention_layers=num_pre_attention_layers) self.classifier = nn.Linear(hidden_size, vocab_size) if tie_embedding: self.classifier.weight = self.embedder.weight self.hidden_size = hidden_size
def _lstm_loop(self, state, embed, context, mask=None): """ :param state: Current decoder state (batch_size, dec_dim) :param embed: Embedding size (batch_size, embed_dim) :param context: All the context from encoder (batch_size, source_l, enc_dim) :param mask: Mask of size (batch_size, source_l) with 1 if that token is valid in encoder, 0 otherwise. :return: out: (batch_size, vocab_size) distribution over labels state: (batch_size, dec_dim) next state alpha: (batch_size, source_l) distribution over the encoded hidden states, useful for debugging maybe """ c_t, alpha = self.attn(state, context, mask) gru_inp = torch.cat((embed, c_t), 1).unsqueeze(0) state = self.gru(gru_inp, state.unsqueeze(0))[0].squeeze(0) out = self.out(state) return out, state, alpha
def test_mini_wlm(self): """Exercise null-edge pruning in the tracer.""" @torch.jit.compile(verify=True) class MyModel(nn.Module): def __init__(self): super(MyModel, self).__init__() self.encoder = nn.Embedding(2, 2) def forward(self, input, hidden): emb = self.encoder(input) hidden = hidden.clone() # simulate some RNN operation return emb, hidden model = MyModel() x = Variable(torch.LongTensor([[0, 1], [1, 0]])) y = Variable(torch.FloatTensor([0])) z, _ = model(x, y) z.sum().backward() z, _ = model(x, y) z.sum().backward()
def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1): super(BahdanauAttnDecoderRNN, self).__init__() # Define parameters self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout_p = dropout_p self.max_length = max_length # Define layers self.embedding = nn.Embedding(output_size, hidden_size) self.dropout = nn.Dropout(dropout_p) self.attn = Attn('concat', hidden_size) self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout_p) self.out = nn.Linear(hidden_size, output_size)
def __init__(self, attn_model, hidden_size, output_size, n_layers=1, dropout=0.1): super(LuongAttnDecoderRNN, self).__init__() # Keep for reference self.attn_model = attn_model self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout = dropout # Define layers self.embedding = nn.Embedding(output_size, hidden_size) self.embedding_dropout = nn.Dropout(dropout) self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout) self.concat = nn.Linear(hidden_size * 2, hidden_size) self.out = nn.Linear(hidden_size, output_size) # Choose attention model if attn_model != 'none': self.attn = Attn(attn_model, hidden_size)
def create(cls, embeddings, labels, **kwargs): finetune = kwargs.get('finetune', True) dsz = embeddings.dsz model = cls() model.pdrop = kwargs.get('dropout', 0.5) model.labels = labels nc = len(labels) model.vocab = embeddings.vocab model.lut = nn.Embedding(embeddings.vsz + 1, dsz) del model.lut.weight model.lut.weight = nn.Parameter(torch.FloatTensor(embeddings.weights), requires_grad=finetune) pool_dim = model._init_pool(dsz, **kwargs) stacked_dim = model._init_stacked(pool_dim, **kwargs) model._init_output(stacked_dim, nc) print(model) return model
def __init__(self, vocab_size, max_len, hidden_size, sos_id, eos_id, n_layers=1, rnn_cell='gru', bidirectional=False, input_dropout_p=0, dropout_p=0, use_attention=False): super(DecoderRNN, self).__init__(vocab_size, max_len, hidden_size, input_dropout_p, dropout_p, n_layers, rnn_cell) self.bidirectional_encoder = bidirectional self.rnn = self.rnn_cell(hidden_size, hidden_size, n_layers, batch_first=True, dropout=dropout_p) self.output_size = vocab_size self.max_length = max_len self.use_attention = use_attention self.eos_id = eos_id self.sos_id = sos_id self.init_input = None self.embedding = nn.Embedding(self.output_size, self.hidden_size) if use_attention: self.attention = Attention(self.hidden_size) self.out = nn.Linear(self.hidden_size, self.output_size)
def __init__(self, opt): super(FCModel, self).__init__() self.vocab_size = opt.vocab_size self.input_encoding_size = opt.input_encoding_size self.rnn_type = opt.rnn_type self.rnn_size = opt.rnn_size self.num_layers = opt.num_layers self.drop_prob_lm = opt.drop_prob_lm self.seq_length = opt.seq_length self.fc_feat_size = opt.fc_feat_size self.ss_prob = 0.0 # Schedule sampling probability self.img_embed = nn.Linear(self.fc_feat_size, self.input_encoding_size) self.core = LSTMCore(opt) self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1) self.init_weights()
def __init__(self, opt): super(OldModel, self).__init__() self.vocab_size = opt.vocab_size self.input_encoding_size = opt.input_encoding_size self.rnn_type = opt.rnn_type self.rnn_size = opt.rnn_size self.num_layers = opt.num_layers self.drop_prob_lm = opt.drop_prob_lm self.seq_length = opt.seq_length self.fc_feat_size = opt.fc_feat_size self.att_feat_size = opt.att_feat_size self.ss_prob = 0.0 # Schedule sampling probability self.linear = nn.Linear(self.fc_feat_size, self.num_layers * self.rnn_size) # feature to rnn_size self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1) self.dropout = nn.Dropout(self.drop_prob_lm) self.init_weights()
def __init__(self, opt): super(ShowTellModel, self).__init__() self.vocab_size = opt.vocab_size self.input_encoding_size = opt.input_encoding_size self.rnn_type = opt.rnn_type self.rnn_size = opt.rnn_size self.num_layers = opt.num_layers self.drop_prob_lm = opt.drop_prob_lm self.seq_length = opt.seq_length self.fc_feat_size = opt.fc_feat_size self.ss_prob = 0.0 # Schedule sampling probability self.img_embed = nn.Linear(self.fc_feat_size, self.input_encoding_size) self.core = getattr(nn, self.rnn_type.upper())(self.input_encoding_size, self.rnn_size, self.num_layers, bias=False, dropout=self.drop_prob_lm) self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1) self.dropout = nn.Dropout(self.drop_prob_lm) self.init_weights()
def __init__(self, opt): super(Att2inModel, self).__init__() self.vocab_size = opt.vocab_size self.input_encoding_size = opt.input_encoding_size #self.rnn_type = opt.rnn_type self.rnn_size = opt.rnn_size self.num_layers = 1 self.drop_prob_lm = opt.drop_prob_lm self.seq_length = opt.seq_length self.fc_feat_size = opt.fc_feat_size self.att_feat_size = opt.att_feat_size self.att_hid_size = opt.att_hid_size self.ss_prob = 0.0 # Schedule sampling probability self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1) self.ctx2att = nn.Linear(self.att_feat_size, self.att_hid_size) self.core = Att2inCore(opt) self.init_weights()
def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1, max_length=MAX_LENGTH): super(AttnDecoderRNN, self).__init__() self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout_p = dropout_p self.max_length = max_length if use_cuda: self.embedding = nn.Embedding(self.output_size, self.hidden_size).cuda() self.attn = nn.Linear(self.hidden_size * 2, self.max_length).cuda() self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size).cuda() self.dropout = nn.Dropout(self.dropout_p).cuda() self.gru = nn.GRU(self.hidden_size, self.hidden_size).cuda() self.out = nn.Linear(self.hidden_size, self.output_size).cuda() else: self.embedding = nn.Embedding(self.output_size, self.hidden_size) self.attn = nn.Linear(self.hidden_size * 2, self.max_length) self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size) self.dropout = nn.Dropout(self.dropout_p) self.gru = nn.GRU(self.hidden_size, self.hidden_size) self.out = nn.Linear(self.hidden_size, self.output_size)
def __init__(self, cuda, vocab_size, in_dim, mem_dim, sparsity): super(ChildSumTreeLSTM, self).__init__() self.cudaFlag = cuda self.in_dim = in_dim self.mem_dim = mem_dim self.emb = nn.Embedding(vocab_size, in_dim, padding_idx=Constants.PAD, sparse=sparsity) self.ix = nn.Linear(self.in_dim, self.mem_dim) self.ih = nn.Linear(self.mem_dim, self.mem_dim) self.fx = nn.Linear(self.in_dim, self.mem_dim) self.fh = nn.Linear(self.mem_dim, self.mem_dim) self.ox = nn.Linear(self.in_dim, self.mem_dim) self.oh = nn.Linear(self.mem_dim, self.mem_dim) self.ux = nn.Linear(self.in_dim, self.mem_dim) self.uh = nn.Linear(self.mem_dim, self.mem_dim)
def __init__(self, config): super(SNLIClassifier, self).__init__() self.config = config self.embed = nn.Embedding(config.n_embed, config.d_embed) self.projection = Linear(config.d_embed, config.d_proj) self.encoder = Encoder(config) self.dropout = nn.Dropout(p=config.dp_ratio) self.relu = nn.ReLU() seq_in_size = 2*config.d_hidden if self.config.birnn: seq_in_size *= 2 lin_config = [seq_in_size]*2 self.out = nn.Sequential( Linear(*lin_config), self.relu, self.dropout, Linear(*lin_config), self.relu, self.dropout, Linear(*lin_config), self.relu, self.dropout, Linear(seq_in_size, config.d_out))
def __init__(self, config): super(BiLSTM, self).__init__() self.drop = nn.Dropout(config['dropout']) self.encoder = nn.Embedding(config['ntoken'], config['ninp']) self.bilstm = nn.LSTM(config['ninp'], config['nhid'], config['nlayers'], dropout=config['dropout'], bidirectional=True) self.nlayers = config['nlayers'] self.nhid = config['nhid'] self.pooling = config['pooling'] self.dictionary = config['dictionary'] # self.init_weights() self.encoder.weight.data[self.dictionary.word2idx['<pad>']] = 0 if os.path.exists(config['word-vector']): print('Loading word vectors from', config['word-vector']) vectors = torch.load(config['word-vector']) assert vectors[2] >= config['ninp'] vocab = vectors[0] vectors = vectors[1] loaded_cnt = 0 for word in self.dictionary.word2idx: if word not in vocab: continue real_id = self.dictionary.word2idx[word] loaded_id = vocab[word] self.encoder.weight.data[real_id] = vectors[loaded_id][:config['ninp']] loaded_cnt += 1 print('%d words from external word vectors loaded.' % loaded_cnt) # note: init_range constraints the value of initial weights
def __init__(self, batch_size, num_tokens, embed_size, word_gru_hidden, bidirectional= True, init_range=0.1, use_lstm=False): super(AttentionWordRNN, self).__init__() self.batch_size = batch_size self.num_tokens = num_tokens self.embed_size = embed_size self.word_gru_hidden = word_gru_hidden self.bidirectional = bidirectional self.use_lstm = use_lstm self.lookup = nn.Embedding(num_tokens, embed_size) if bidirectional == True: if use_lstm: print("inside using LSTM") self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= True) else: self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= True) self.weight_W_word = nn.Parameter(torch.Tensor(2* word_gru_hidden, 2*word_gru_hidden)) self.bias_word = nn.Parameter(torch.Tensor(2* word_gru_hidden,1)) self.weight_proj_word = nn.Parameter(torch.Tensor(2*word_gru_hidden, 1)) else: if use_lstm: self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= False) else: self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= False) self.weight_W_word = nn.Parameter(torch.Tensor(word_gru_hidden, word_gru_hidden)) self.bias_word = nn.Parameter(torch.Tensor(word_gru_hidden,1)) self.weight_proj_word = nn.Parameter(torch.Tensor(word_gru_hidden, 1)) self.softmax_word = nn.Softmax() self.weight_W_word.data.uniform_(-init_range, init_range) self.weight_proj_word.data.uniform_(-init_range, init_range)
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False): super(RNNModel, self).__init__() self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) if rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) else: try: nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type] except KeyError: raise ValueError( """An invalid option for `--model` was supplied, options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""") self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: if nhid != ninp: raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers
def __init__(self, num_characters, dim): super(RNNEncoder, self).__init__() self.embedding = nn.Embedding(num_characters, dim) # TODO self.rnn = nn.GRU()
def __init__(self, num_characters, dim): super(RNNDecoder, self).__init__() self.embedding = nn.Embedding(num_characters, dim) self.rnn_cell = nn.GRUCell(dim, dim)
def __init__(self, vocab_size, in_dim, mem_dim, hidden_dim, num_classes, sparsity, freeze): super(SimilarityTreeLSTM, self).__init__() self.emb = nn.Embedding(vocab_size, in_dim, padding_idx=Constants.PAD, sparse=sparsity) if freeze: self.emb.weight.requires_grad = False self.childsumtreelstm = ChildSumTreeLSTM(in_dim, mem_dim) self.similarity = Similarity(mem_dim, hidden_dim, num_classes)
def test_embedding_padding_idx(self): embedding = nn.Embedding(10, 20, padding_idx = 0) input = Variable(torch.LongTensor([[0,2,4,5],[4,3,0,9]])) output = embedding(input) self.assertEqual(output[0][0].sum().data[0], 0) self.assertEqual(output[1][2].sum().data[0], 0)