Python torch.nn 模块,Embedding() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.nn.Embedding()

项目:PyTorchDemystified    作者:hhsecond    | 项目源码 | 文件源码
def __init__(self, config):
        super(SNLIClassifier, self).__init__()
        self.config = config
        self.embed = nn.Embedding(config.n_embed, config.d_embed)
        self.projection = Linear(config.d_embed, config.d_proj)
        self.embed_bn = BatchNorm(config.d_proj)
        self.embed_dropout = nn.Dropout(p=config.embed_dropout)
        self.encoder = SPINN(config) if config.spinn else Encoder(config)
        feat_in_size = config.d_hidden * (
            2 if self.config.birnn and not self.config.spinn else 1)
        self.feature = Feature(feat_in_size, config.mlp_dropout)
        self.mlp_dropout = nn.Dropout(p=config.mlp_dropout)
        self.relu = nn.ReLU()
        mlp_in_size = 4 * feat_in_size
        mlp = [nn.Linear(mlp_in_size, config.d_mlp), self.relu,
               nn.BatchNorm1d(config.d_mlp), self.mlp_dropout]
        for i in range(config.n_mlp_layers - 1):
            mlp.extend([nn.Linear(config.d_mlp, config.d_mlp), self.relu,
                        nn.BatchNorm1d(config.d_mlp), self.mlp_dropout])
        mlp.append(nn.Linear(config.d_mlp, config.d_out))
        self.out = nn.Sequential(*mlp)
项目:cnn-text-classification-pytorch    作者:Shawn1993    | 项目源码 | 文件源码
def __init__(self, args):
        super(CNN_Text,self).__init__()
        self.args = args

        V = args.embed_num
        D = args.embed_dim
        C = args.class_num
        Ci = 1
        Co = args.kernel_num
        Ks = args.kernel_sizes

        self.embed = nn.Embedding(V, D)
        #self.convs1 = [nn.Conv2d(Ci, Co, (K, D)) for K in Ks]
        self.convs1 = nn.ModuleList([nn.Conv2d(Ci, Co, (K, D)) for K in Ks])
        '''
        self.conv13 = nn.Conv2d(Ci, Co, (3, D))
        self.conv14 = nn.Conv2d(Ci, Co, (4, D))
        self.conv15 = nn.Conv2d(Ci, Co, (5, D))
        '''
        self.dropout = nn.Dropout(args.dropout)
        self.fc1 = nn.Linear(len(Ks)*Co, C)
项目:attention-is-all-you-need-pytorch    作者:jadore801120    | 项目源码 | 文件源码
def __init__(
            self, n_src_vocab, n_max_seq, n_layers=6, n_head=8, d_k=64, d_v=64,
            d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1):

        super(Encoder, self).__init__()

        n_position = n_max_seq + 1
        self.n_max_seq = n_max_seq
        self.d_model = d_model

        self.position_enc = nn.Embedding(n_position, d_word_vec, padding_idx=Constants.PAD)
        self.position_enc.weight.data = position_encoding_init(n_position, d_word_vec)

        self.src_word_emb = nn.Embedding(n_src_vocab, d_word_vec, padding_idx=Constants.PAD)

        self.layer_stack = nn.ModuleList([
            EncoderLayer(d_model, d_inner_hid, n_head, d_k, d_v, dropout=dropout)
            for _ in range(n_layers)])
项目:attention-is-all-you-need-pytorch    作者:jadore801120    | 项目源码 | 文件源码
def __init__(
            self, n_tgt_vocab, n_max_seq, n_layers=6, n_head=8, d_k=64, d_v=64,
            d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1):

        super(Decoder, self).__init__()
        n_position = n_max_seq + 1
        self.n_max_seq = n_max_seq
        self.d_model = d_model

        self.position_enc = nn.Embedding(
            n_position, d_word_vec, padding_idx=Constants.PAD)
        self.position_enc.weight.data = position_encoding_init(n_position, d_word_vec)

        self.tgt_word_emb = nn.Embedding(
            n_tgt_vocab, d_word_vec, padding_idx=Constants.PAD)
        self.dropout = nn.Dropout(dropout)

        self.layer_stack = nn.ModuleList([
            DecoderLayer(d_model, d_inner_hid, n_head, d_k, d_v, dropout=dropout)
            for _ in range(n_layers)])
项目:Tree-LSTM-LM    作者:vgene    | 项目源码 | 文件源码
def __init__(self, args, mapping):
        super(CharLM, self).__init__()

        self.batch_size = args.batch_size
        self.seq_length = args.seq_length
        self.vocab_size = args.vocab_size
        self.embedding_dim = args.embedding_dim
        self.layer_num = args.layer_num
        self.dropout_prob = args.dropout_prob
        self.lr = args.lr
        self.char_embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.dropout = nn.Dropout(self.dropout_prob)

        self.lstm = nn.LSTM(input_size = self.embedding_dim,
                            hidden_size = self.embedding_dim,
                            num_layers= self.layer_num,
                            dropout = self.dropout_prob)
        self.fc = nn.Linear(self.embedding_dim, self.vocab_size)
        self.optimizer = optim.Adam(self.parameters(), lr=self.lr)
        self.mapping = mapping
项目:Tree-LSTM-LM    作者:vgene    | 项目源码 | 文件源码
def __init__(self, args, attr_size, node_size):
        super(TreeLM, self).__init__()

        self.batch_size = args.batch_size
        self.seq_length = args.seq_length
        self.attr_size = attr_size
        self.node_size = node_size

        self.embedding_dim = args.embedding_dim
        self.layer_num = args.layer_num
        self.dropout_prob = args.dropout_prob
        self.lr = args.lr

        self.attr_embedding = nn.Embedding(self.attr_size, self.embedding_dim)
        self.dropout = nn.Dropout(self.dropout_prob)

        self.lstm = nn.LSTM(input_size = self.embedding_dim,
                            hidden_size = self.embedding_dim,
                            num_layers= self.layer_num,
                            dropout = self.dropout_prob)

        self.fc = nn.Linear(self.embedding_dim, self.node_size)
        self.optimizer = optim.Adam(self.parameters(), lr=self.lr)
        # self.node_mapping = node_mapping
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch    作者:bamtercelboo    | 项目源码 | 文件源码
def __init__(self, args):
        super(GRU, self).__init__()
        self.args = args
        # print(args)

        self.hidden_dim = args.lstm_hidden_dim
        self.num_layers = args.lstm_num_layers
        V = args.embed_num
        D = args.embed_dim
        C = args.class_num
        # self.embed = nn.Embedding(V, D, max_norm=args.max_norm)
        self.embed = nn.Embedding(V, D)
        # word embedding
        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))
        # gru
        self.gru = nn.GRU(D, self.hidden_dim, dropout=args.dropout, num_layers=self.num_layers)
        # linear
        self.hidden2label = nn.Linear(self.hidden_dim, C)
        # hidden
        self.hidden = self.init_hidden(self.num_layers, args.batch_size)
        # dropout
        self.dropout = nn.Dropout(args.dropout)
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch    作者:bamtercelboo    | 项目源码 | 文件源码
def __init__(self, args):
        super(CNN_Text,self).__init__()
        self.args = args

        V = args.embed_num
        D = args.embed_dim
        C = args.class_num
        Ci = 1
        Co = args.kernel_num
        Ks = args.kernel_sizes

        self.embed = nn.Embedding(V, D)
        # print("aaaaaaaa", self.embed.weight)
        pretrained_weight = np.array(args.pretrained_weight)
        self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))
        # print("bbbbbbbb", self.embed.weight)

        self.convs1 = [nn.Conv2d(Ci, Co, (K, D)) for K in Ks]
        '''
        self.conv13 = nn.Conv2d(Ci, Co, (3, D))
        self.conv14 = nn.Conv2d(Ci, Co, (4, D))
        self.conv15 = nn.Conv2d(Ci, Co, (5, D))
        '''
        self.dropout = nn.Dropout(args.dropout)
        self.fc1 = nn.Linear(len(Ks)*Co, C)
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch    作者:bamtercelboo    | 项目源码 | 文件源码
def __init__(self, args):
        super(BiGRU, self).__init__()
        self.args = args
        # print(args)

        self.hidden_dim = args.lstm_hidden_dim
        self.num_layers = args.lstm_num_layers
        V = args.embed_num
        D = args.embed_dim
        C = args.class_num
        # self.embed = nn.Embedding(V, D, max_norm=args.max_norm)
        self.embed = nn.Embedding(V, D)
        # word embedding
        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))
        # gru
        self.bigru = nn.GRU(D, self.hidden_dim, dropout=args.dropout, num_layers=self.num_layers, bidirectional=True)
        # linear
        self.hidden2label = nn.Linear(self.hidden_dim * 2, C)
        # hidden
        self.hidden = self.init_hidden(self.num_layers, args.batch_size)
        # dropout
        self.dropout = nn.Dropout(args.dropout)
项目:rationalizing-neural-predictions    作者:hughperkins    | 项目源码 | 文件源码
def __init__(self, embeddings, num_layers):
        """
        embeddings should be a torch tensor, of dimension
        max_idx - 1 x num_hidden

        we'll derive num_hidden from the second dimension of embeddings
        """
        super().__init__()
        self.num_hidden = embeddings.shape[1]
        self.num_layers = num_layers
        self.embedding = nn.Embedding(
            embeddings.shape[0],
            self.num_hidden
        )
        self.embedding.weight.data = embeddings
        self.embedding.weight.requires_grad = False
        self.lstm = nn.LSTM(
            input_size=self.num_hidden,
            hidden_size=self.num_hidden,
            num_layers=num_layers)
        self.initial_state = None
        self.initial_cell = None
        self.linear = nn.Linear(self.num_hidden, 1)  # modeling as a regression
项目:rationalizing-neural-predictions    作者:hughperkins    | 项目源码 | 文件源码
def __init__(self, embeddings, num_layers, pad_id):
        super().__init__()
        self.num_hidden = embeddings.shape[1]
        self.num_layers = num_layers
        self.embedding = nn.Embedding(
            embeddings.shape[0],
            self.num_hidden
        )
        self.embedding.weight.data = embeddings
        self.embedding.weight.requires_grad = False
        self.lstm = nn.LSTM(
            input_size=self.num_hidden,
            hidden_size=self.num_hidden,
            num_layers=num_layers,
            bidirectional=True)
        self.initial_state = None
        self.initial_cell = None
        # self.rationales = None
        self.linear = nn.Linear(self.num_hidden * 2, 1)
        self.pad_id = pad_id
项目:fairseq-py    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, dictionary, encoder_embed_dim=512, embed_dim=512,
                 out_embed_dim=512, num_layers=1, dropout_in=0.1,
                 dropout_out=0.1, attention=True):
        super().__init__()
        self.dictionary = dictionary
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out

        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)

        self.layers = nn.ModuleList([
            LSTMCell(encoder_embed_dim + embed_dim if layer == 0 else embed_dim, embed_dim)
            for layer in range(num_layers)
        ])
        self.attention = AttentionLayer(encoder_embed_dim, embed_dim)
        if embed_dim != out_embed_dim:
            self.additional_fc = Linear(embed_dim, out_embed_dim)
        self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
项目:fairseq-py    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, dictionary, embed_dim=512, max_positions=1024,
                 convolutions=((512, 3),) * 20, dropout=0.1):
        super().__init__()
        self.dictionary = dictionary
        self.dropout = dropout
        self.num_attention_layers = None

        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        self.embed_positions = Embedding(max_positions, embed_dim, padding_idx)

        in_channels = convolutions[0][0]
        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        for (out_channels, kernel_size) in convolutions:
            pad = (kernel_size - 1) / 2
            self.projections.append(Linear(in_channels, out_channels)
                                    if in_channels != out_channels else None)
            self.convolutions.append(
                ConvTBC(in_channels, out_channels * 2, kernel_size, padding=pad,
                        dropout=dropout))
            in_channels = out_channels
        self.fc2 = Linear(in_channels, embed_dim)
项目:RNN-for-tagging    作者:SendongZhao    | 项目源码 | 文件源码
def __init__(self, vocab_size, tag_to_ix, embedding_dim, hidden_dim):
        super(BiLSTM_CRF, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        self.tag_to_ix = tag_to_ix
        self.tagset_size = len(tag_to_ix)

        self.word_embeds = nn.Embedding(vocab_size, embedding_dim, padding_idx = 0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2,
                            num_layers=1, bidirectional=True)

        # Maps the output of the LSTM into tag space.
        self.hidden2tag = nn.Linear(hidden_dim, self.tagset_size)

        # Matrix of transition parameters.  Entry i,j is the score of
        # transitioning *to* i *from* j.
        self.transitions = nn.Parameter(
            torch.randn(self.tagset_size, self.tagset_size))

        self.hidden = self.init_hidden()
项目:AoAReader    作者:kevinkwl    | 项目源码 | 文件源码
def __init__(self, vocab_dict, dropout_rate, embed_dim, hidden_dim, bidirectional=True):
        super(AoAReader, self).__init__()
        self.vocab_dict = vocab_dict
        self.hidden_dim = hidden_dim
        self.embed_dim = embed_dim
        self.dropout_rate = dropout_rate

        self.embedding = nn.Embedding(vocab_dict.size(),
                                      self.embed_dim,
                                      padding_idx=Constants.PAD)
        self.embedding.weight.data.uniform_(-0.05, 0.05)

        input_size = self.embed_dim
        self.gru = nn.GRU(input_size, hidden_size=self.hidden_dim, dropout=dropout_rate,
                          bidirectional=bidirectional, batch_first=True)

        # try independent gru
        #self.query_gru = nn.GRU(input_size, hidden_size=self.hidden_dim, dropout=dropout_rate,
        #                 bidirectional=bidirectional, batch_first=True)

        for weight in self.gru.parameters():
            if len(weight.size()) > 1:
                weigth_init.orthogonal(weight.data)
项目:clevr-iep    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self,
    encoder_vocab_size=100,
    decoder_vocab_size=100,
    wordvec_dim=300,
    hidden_dim=256,
    rnn_num_layers=2,
    rnn_dropout=0,
    null_token=0,
    start_token=1,
    end_token=2,
    encoder_embed=None
  ):
    super(Seq2Seq, self).__init__()
    self.encoder_embed = nn.Embedding(encoder_vocab_size, wordvec_dim)
    self.encoder_rnn = nn.LSTM(wordvec_dim, hidden_dim, rnn_num_layers,
                               dropout=rnn_dropout, batch_first=True)
    self.decoder_embed = nn.Embedding(decoder_vocab_size, wordvec_dim)
    self.decoder_rnn = nn.LSTM(wordvec_dim + hidden_dim, hidden_dim, rnn_num_layers,
                               dropout=rnn_dropout, batch_first=True)
    self.decoder_linear = nn.Linear(hidden_dim, decoder_vocab_size)
    self.NULL = null_token
    self.START = start_token
    self.END = end_token
    self.multinomial_outputs = None
项目:NeuralMT    作者:hlt-mt    | 项目源码 | 文件源码
def __init__(self, opt, dicts):
        self.layers = opt.layers
        self.num_directions = 2 if opt.brnn else 1
        assert opt.rnn_size % self.num_directions == 0
        self.hidden_size = opt.rnn_size // self.num_directions
        input_size = opt.word_vec_size

        super(Encoder, self).__init__()
        self.word_lut = nn.Embedding(dicts.size(),
                                     opt.word_vec_size,
                                     padding_idx=onmt.Constants.PAD)
        self.rnn = getattr(nn, opt.rnn_type)(
             input_size, self.hidden_size,
             num_layers=opt.layers,
             dropout=opt.dropout,
             bidirectional=opt.brnn)
项目:NeuralMT    作者:hlt-mt    | 项目源码 | 文件源码
def __init__(self, opt, dicts):
        self.layers = opt.layers
        self.input_feed = opt.input_feed
        input_size = opt.word_vec_size
        if self.input_feed:
            input_size += opt.rnn_size

        super(Decoder, self).__init__()
        self.word_lut = nn.Embedding(dicts.size(),
                                     opt.word_vec_size,
                                     padding_idx=onmt.Constants.PAD)

        stackedCell = StackedLSTM if opt.rnn_type == "LSTM" else StackedGRU
        self.rnn = stackedCell(opt.layers, input_size,
                               opt.rnn_size, opt.dropout)
        self.attn = onmt.modules.GlobalAttention(opt.rnn_size)
        self.context_gate = None
        if opt.context_gate is not None:
            self.context_gate = ContextGateFactory(
                opt.context_gate, opt.word_vec_size,
                opt.rnn_size, opt.rnn_size, opt.rnn_size
            )
        self.dropout = nn.Dropout(opt.dropout)

        self.hidden_size = opt.rnn_size
项目:alpha-dimt-icmlws    作者:sotetsuk    | 项目源码 | 文件源码
def __init__(self, opt, dicts):
        self.layers = opt.layers
        self.input_feed = opt.input_feed
        input_size = opt.word_vec_size
        if self.input_feed:
            input_size += opt.rnn_size

        super(Decoder, self).__init__()
        self.word_lut = nn.Embedding(dicts.size(),
                                  opt.word_vec_size,
                                  padding_idx=onmt.Constants.PAD)
        self.rnn = StackedLSTM(opt.layers, input_size, opt.rnn_size, opt.dropout)
        self.attn = onmt.modules.GlobalAttention(opt.rnn_size)
        self.dropout = nn.Dropout(opt.dropout)

        self.hidden_size = opt.rnn_size
项目:semanaly    作者:zqhZY    | 项目源码 | 文件源码
def __init__(self, opt):
        self.name = "TextLstm"
        super(TextLSTM, self).__init__()
        self.opt = opt

        self.embedding = nn.Embedding(opt.vocab_size, opt.embed_dim)

        self.lstm = nn.LSTM(input_size=opt.embed_dim,
                            hidden_size=opt.hidden_size,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=False)

        self.linears = nn.Sequential(
            nn.Linear(opt.hidden_size, opt.linear_hidden_size),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(opt.linear_hidden_size, opt.num_classes),
            # nn.Softmax()
        )

        if opt.embedding_path:
            self.embedding.weight.data.copy_(torch.from_numpy(np.load(opt.embedding_path)))
        #     # self.embedding.weight.requires_grad = False
项目:semanaly    作者:zqhZY    | 项目源码 | 文件源码
def __init__(self, args):
        super(TextCNN, self).__init__()

        self.args = args
        V = args.vocab_size
        D = args.embed_dim
        C = args.num_classes
        Cin = 1
        Cout = args.kernel_num
        Ks = args.kernel_sizes


        self.embeding = nn.Embedding(V, D)
        self.convs = nn.ModuleList([nn.Conv2d(Cin, Cout, (K, D)) for K in Ks])

        self.dropout = nn.Dropout(args.dropout)
        self.fc = nn.Linear(len(Ks)*Cout, C)
项目:PyTorchText    作者:chenyuntc    | 项目源码 | 文件源码
def __init__(self, opt ):
        super(CNNTextInception, self).__init__()
        incept_dim=opt.inception_dim
        self.model_name = 'CNNTextInception'
        self.opt=opt
        self.encoder = nn.Embedding(opt.vocab_size,opt.embedding_dim)
        self.title_conv=nn.Sequential(
            Inception(opt.embedding_dim,incept_dim),#(batch_size,64,opt.title_seq_len)->(batch_size,32,(opt.title_seq_len)/2)
            Inception(incept_dim,incept_dim),
            Inception(incept_dim,incept_dim),
            nn.MaxPool1d(opt.title_seq_len)
        )
        self.content_conv=nn.Sequential(
            Inception(opt.embedding_dim,incept_dim),#(batch_size,64,opt.content_seq_len)->(batch_size,64,(opt.content_seq_len)/2)
            #Inception(incept_dim,incept_dim),#(batch_size,64,opt.content_seq_len/2)->(batch_size,32,(opt.content_seq_len)/4)
            Inception(incept_dim,incept_dim),
            Inception(incept_dim,incept_dim),
            nn.MaxPool1d(opt.content_seq_len)
        )
        self.fc = nn.Sequential(
            nn.Linear(incept_dim*2,opt.linear_hidden_size),
            nn.BatchNorm1d(opt.linear_hidden_size),
            nn.ReLU(inplace=True),
            nn.Linear(opt.linear_hidden_size,opt.num_classes)
        )
项目:PyTorchText    作者:chenyuntc    | 项目源码 | 文件源码
def __init__(self, opt ):
        super(CNNText, self).__init__()
        self.model_name = 'CNNText'
        self.opt=opt
        self.encoder = nn.Embedding(opt.vocab_size,opt.embedding_dim)

        self.title_conv = nn.Sequential(
            nn.Conv1d(in_channels = opt.embedding_dim,
                      out_channels = opt.title_dim,
                      kernel_size = opt.kernel_size),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size = (opt.title_seq_len - opt.kernel_size + 1))
        )

        self.content_conv = nn.Sequential(
            nn.Conv1d(in_channels = opt.embedding_dim,
                      out_channels = opt.content_dim,
                      kernel_size = opt.kernel_size),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size = (opt.content_seq_len - opt.kernel_size + 1))
        )

        self.fc = nn.Linear(opt.title_dim+opt.content_dim, opt.num_classes)
        if opt.embedding_path:
            self.encoder.weight.data.copy_(t.from_numpy(np.load(opt.embedding_path)['vector']))
项目:multitask_sentiment_analysis    作者:polaroidz    | 项目源码 | 文件源码
def __init__(self):
        super(Chunking, self).__init__()

        self.input_size = embedding_size \
                        + nb_postags \
                        + postag_hn_size * 2

        self.w = nn.Parameter(torch.randn(chunking_nb_layers * 2, 
                                          max_sentence_size, 
                                          chunking_hn_size))
        self.h = nn.Parameter(torch.randn(chunking_nb_layers * 2, 
                                          max_sentence_size,
                                          chunking_hn_size))

        self.embedding = nn.Embedding(nb_postags, chunking_postag_emb_size)

        self.aux_emb = torch.arange(0, nb_postags)
        self.aux_emb = Variable(self.aux_emb).long()

        self.bi_lstm = nn.LSTM(self.input_size, 
                               chunking_hn_size,
                               chunking_nb_layers,
                               bidirectional=True)

        self.fc = nn.Linear(chunking_hn_size * 2, nb_chunktags)
项目:seq2seq.pytorch    作者:eladhoffer    | 项目源码 | 文件源码
def __init__(self, vocab_size, hidden_size=512, embedding_size=None,
                 num_layers=6, num_heads=8, inner_linear=1024,
                 mask_symbol=PAD, dropout=0):

        super(TransformerAttentionEncoder, self).__init__()
        embedding_size = embedding_size or hidden_size
        self.hidden_size = hidden_size
        self.batch_first = True
        self.mask_symbol = mask_symbol
        self.embedder = nn.Embedding(
            vocab_size, embedding_size, padding_idx=PAD)
        self.scale_embedding = hidden_size ** 0.5
        self.dropout = nn.Dropout(dropout, inplace=True)
        self.blocks = nn.ModuleList([EncoderBlock(hidden_size, num_heads, inner_linear, dropout)
                                     for _ in range(num_layers)
                                     ])
项目:seq2seq.pytorch    作者:eladhoffer    | 项目源码 | 文件源码
def __init__(self, vocab_size, hidden_size=512, embedding_size=None,
                 num_layers=6, num_heads=8, dropout=0, inner_linear=1024,
                 mask_symbol=PAD, tie_embedding=True):

        super(TransformerAttentionDecoder, self).__init__()
        embedding_size = embedding_size or hidden_size
        self.batch_first = True
        self.mask_symbol = mask_symbol
        self.embedder = nn.Embedding(
            vocab_size, embedding_size, padding_idx=PAD)
        self.scale_embedding = hidden_size ** 0.5
        self.dropout = nn.Dropout(dropout, inplace=True)
        self.blocks = nn.ModuleList([DecoderBlock(hidden_size, num_heads, inner_linear, dropout)
                                     for _ in range(num_layers)
                                     ])
        self.classifier = nn.Linear(hidden_size, vocab_size)
        if tie_embedding:
            self.embedder.weight = self.classifier.weight
项目:seq2seq.pytorch    作者:eladhoffer    | 项目源码 | 文件源码
def __init__(self, vocab_size, hidden_size=128,
                 num_layers=1, bias=True, batch_first=False, forget_bias=None,
                 dropout=0, mode='LSTM', residual=False, tie_embedding=True):
        super(RecurrentDecoder, self).__init__()
        self.layers = num_layers
        self.hidden_size = hidden_size
        self.batch_first = batch_first
        embedding_size = hidden_size
        self.embedder = nn.Embedding(vocab_size,
                                     embedding_size,
                                     sparse=True,
                                     padding_idx=PAD)
        self.rnn = Recurrent(mode, embedding_size, self.hidden_size,
                             num_layers=num_layers, bias=bias, forget_bias=forget_bias,
                             batch_first=batch_first, residual=residual,
                             dropout=dropout, bidirectional=False)
        self.classifier = nn.Linear(hidden_size, vocab_size)
        if tie_embedding:
            self.classifier.weight = self.embedder.weight
项目:seq2seq.pytorch    作者:eladhoffer    | 项目源码 | 文件源码
def __init__(self, vocab_size, context_size, hidden_size=128,
                 embedding_size=None, num_layers=1, bias=True, forget_bias=None,
                 batch_first=False, dropout=0, tie_embedding=False, residual=False,
                 attention=None, concat_attention=True, num_pre_attention_layers=None):
        super(RecurrentAttentionDecoder, self).__init__()
        embedding_size = embedding_size or hidden_size
        attention = attention or {}

        self.layers = num_layers
        self.batch_first = batch_first
        self.embedder = nn.Embedding(vocab_size,
                                     embedding_size,
                                     sparse=True,
                                     padding_idx=PAD)
        self.rnn = RecurrentAttention(hidden_size, context_size, hidden_size, num_layers=num_layers,
                                      bias=bias, batch_first=batch_first, dropout=dropout,
                                      forget_bias=forget_bias, residual=residual,
                                      attention=attention, concat_attention=concat_attention,
                                      num_pre_attention_layers=num_pre_attention_layers)
        self.classifier = nn.Linear(hidden_size, vocab_size)
        if tie_embedding:
            self.classifier.weight = self.embedder.weight

        self.hidden_size = hidden_size
项目:pytorch-seq2seq    作者:rowanz    | 项目源码 | 文件源码
def _lstm_loop(self, state, embed, context, mask=None):
        """
        :param state: Current decoder state (batch_size, dec_dim)
        :param embed: Embedding size (batch_size, embed_dim)
        :param context: All the context from encoder (batch_size, source_l, enc_dim)
        :param mask: Mask of size (batch_size, source_l) with 1 if that token is valid in encoder,
                     0 otherwise.
        :return: out: (batch_size, vocab_size) distribution over labels
                 state: (batch_size, dec_dim) next state
                alpha: (batch_size, source_l) distribution over the encoded hidden states,
                       useful for debugging maybe
        """
        c_t, alpha = self.attn(state, context, mask)
        gru_inp = torch.cat((embed, c_t), 1).unsqueeze(0)

        state = self.gru(gru_inp, state.unsqueeze(0))[0].squeeze(0)
        out = self.out(state)

        return out, state, alpha
项目:pytorch    作者:ezyang    | 项目源码 | 文件源码
def test_mini_wlm(self):
        """Exercise null-edge pruning in the tracer."""

        @torch.jit.compile(verify=True)
        class MyModel(nn.Module):
            def __init__(self):
                super(MyModel, self).__init__()
                self.encoder = nn.Embedding(2, 2)

            def forward(self, input, hidden):
                emb = self.encoder(input)
                hidden = hidden.clone()  # simulate some RNN operation
                return emb, hidden

        model = MyModel()

        x = Variable(torch.LongTensor([[0, 1], [1, 0]]))
        y = Variable(torch.FloatTensor([0]))

        z, _ = model(x, y)
        z.sum().backward()

        z, _ = model(x, y)
        z.sum().backward()
项目:Seq2Seq-on-Word-Sense-Disambiguition    作者:lbwbowenLi    | 项目源码 | 文件源码
def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1):
        super(BahdanauAttnDecoderRNN, self).__init__()

        # Define parameters
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout_p
        self.max_length = max_length

        # Define layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.dropout = nn.Dropout(dropout_p)
        self.attn = Attn('concat', hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout_p)
        self.out = nn.Linear(hidden_size, output_size)
项目:Seq2Seq-on-Word-Sense-Disambiguition    作者:lbwbowenLi    | 项目源码 | 文件源码
def __init__(self, attn_model, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(LuongAttnDecoderRNN, self).__init__()

        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        # Define layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout)
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

        # Choose attention model
        if attn_model != 'none':
            self.attn = Attn(attn_model, hidden_size)
项目:Seq2Seq-on-Word-Sense-Disambiguition    作者:lbwbowenLi    | 项目源码 | 文件源码
def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1):
        super(BahdanauAttnDecoderRNN, self).__init__()

        # Define parameters
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout_p
        self.max_length = max_length

        # Define layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.dropout = nn.Dropout(dropout_p)
        self.attn = Attn('concat', hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout_p)
        self.out = nn.Linear(hidden_size, output_size)
项目:baseline    作者:dpressel    | 项目源码 | 文件源码
def create(cls, embeddings, labels, **kwargs):
        finetune = kwargs.get('finetune', True)
        dsz = embeddings.dsz
        model = cls()
        model.pdrop = kwargs.get('dropout', 0.5)
        model.labels = labels
        nc = len(labels)
        model.vocab = embeddings.vocab
        model.lut = nn.Embedding(embeddings.vsz + 1, dsz)
        del model.lut.weight
        model.lut.weight = nn.Parameter(torch.FloatTensor(embeddings.weights), requires_grad=finetune)
        pool_dim = model._init_pool(dsz, **kwargs)
        stacked_dim = model._init_stacked(pool_dim, **kwargs)
        model._init_output(stacked_dim, nc)
        print(model)
        return model
项目:pytorch-seq2seq    作者:IBM    | 项目源码 | 文件源码
def __init__(self, vocab_size, max_len, hidden_size,
            sos_id, eos_id,
            n_layers=1, rnn_cell='gru', bidirectional=False,
            input_dropout_p=0, dropout_p=0, use_attention=False):
        super(DecoderRNN, self).__init__(vocab_size, max_len, hidden_size,
                input_dropout_p, dropout_p,
                n_layers, rnn_cell)

        self.bidirectional_encoder = bidirectional
        self.rnn = self.rnn_cell(hidden_size, hidden_size, n_layers, batch_first=True, dropout=dropout_p)

        self.output_size = vocab_size
        self.max_length = max_len
        self.use_attention = use_attention
        self.eos_id = eos_id
        self.sos_id = sos_id

        self.init_input = None

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        if use_attention:
            self.attention = Attention(self.hidden_size)

        self.out = nn.Linear(self.hidden_size, self.output_size)
项目:self-critical.pytorch    作者:ruotianluo    | 项目源码 | 文件源码
def __init__(self, opt):
        super(FCModel, self).__init__()
        self.vocab_size = opt.vocab_size
        self.input_encoding_size = opt.input_encoding_size
        self.rnn_type = opt.rnn_type
        self.rnn_size = opt.rnn_size
        self.num_layers = opt.num_layers
        self.drop_prob_lm = opt.drop_prob_lm
        self.seq_length = opt.seq_length
        self.fc_feat_size = opt.fc_feat_size

        self.ss_prob = 0.0 # Schedule sampling probability

        self.img_embed = nn.Linear(self.fc_feat_size, self.input_encoding_size)
        self.core = LSTMCore(opt)
        self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)
        self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1)

        self.init_weights()
项目:self-critical.pytorch    作者:ruotianluo    | 项目源码 | 文件源码
def __init__(self, opt):
        super(OldModel, self).__init__()
        self.vocab_size = opt.vocab_size
        self.input_encoding_size = opt.input_encoding_size
        self.rnn_type = opt.rnn_type
        self.rnn_size = opt.rnn_size
        self.num_layers = opt.num_layers
        self.drop_prob_lm = opt.drop_prob_lm
        self.seq_length = opt.seq_length
        self.fc_feat_size = opt.fc_feat_size
        self.att_feat_size = opt.att_feat_size

        self.ss_prob = 0.0 # Schedule sampling probability

        self.linear = nn.Linear(self.fc_feat_size, self.num_layers * self.rnn_size) # feature to rnn_size
        self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)
        self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1)
        self.dropout = nn.Dropout(self.drop_prob_lm)

        self.init_weights()
项目:self-critical.pytorch    作者:ruotianluo    | 项目源码 | 文件源码
def __init__(self, opt):
        super(ShowTellModel, self).__init__()
        self.vocab_size = opt.vocab_size
        self.input_encoding_size = opt.input_encoding_size
        self.rnn_type = opt.rnn_type
        self.rnn_size = opt.rnn_size
        self.num_layers = opt.num_layers
        self.drop_prob_lm = opt.drop_prob_lm
        self.seq_length = opt.seq_length
        self.fc_feat_size = opt.fc_feat_size

        self.ss_prob = 0.0 # Schedule sampling probability

        self.img_embed = nn.Linear(self.fc_feat_size, self.input_encoding_size)
        self.core = getattr(nn, self.rnn_type.upper())(self.input_encoding_size, self.rnn_size, self.num_layers, bias=False, dropout=self.drop_prob_lm)
        self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)
        self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1)
        self.dropout = nn.Dropout(self.drop_prob_lm)

        self.init_weights()
项目:self-critical.pytorch    作者:ruotianluo    | 项目源码 | 文件源码
def __init__(self, opt):
        super(Att2inModel, self).__init__()
        self.vocab_size = opt.vocab_size
        self.input_encoding_size = opt.input_encoding_size
        #self.rnn_type = opt.rnn_type
        self.rnn_size = opt.rnn_size
        self.num_layers = 1
        self.drop_prob_lm = opt.drop_prob_lm
        self.seq_length = opt.seq_length
        self.fc_feat_size = opt.fc_feat_size
        self.att_feat_size = opt.att_feat_size
        self.att_hid_size = opt.att_hid_size

        self.ss_prob = 0.0 # Schedule sampling probability

        self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)
        self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1)
        self.ctx2att = nn.Linear(self.att_feat_size, self.att_hid_size)
        self.core = Att2inCore(opt)

        self.init_weights()
项目:Rita    作者:RITct    | 项目源码 | 文件源码
def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout_p
        self.max_length = max_length
        if use_cuda:

           self.embedding = nn.Embedding(self.output_size, self.hidden_size).cuda()
           self.attn = nn.Linear(self.hidden_size * 2, self.max_length).cuda()
           self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size).cuda()
           self.dropout = nn.Dropout(self.dropout_p).cuda()
           self.gru = nn.GRU(self.hidden_size, self.hidden_size).cuda()
           self.out = nn.Linear(self.hidden_size, self.output_size).cuda()
        else:
           self.embedding = nn.Embedding(self.output_size, self.hidden_size)
           self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
           self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
           self.dropout = nn.Dropout(self.dropout_p)
           self.gru = nn.GRU(self.hidden_size, self.hidden_size)
           self.out = nn.Linear(self.hidden_size, self.output_size)
项目:treelstm-pytorch    作者:pklfz    | 项目源码 | 文件源码
def __init__(self, cuda, vocab_size, in_dim, mem_dim, sparsity):
        super(ChildSumTreeLSTM, self).__init__()
        self.cudaFlag = cuda
        self.in_dim = in_dim
        self.mem_dim = mem_dim

        self.emb = nn.Embedding(vocab_size, in_dim,
                                padding_idx=Constants.PAD,
                                sparse=sparsity)

        self.ix = nn.Linear(self.in_dim, self.mem_dim)
        self.ih = nn.Linear(self.mem_dim, self.mem_dim)

        self.fx = nn.Linear(self.in_dim, self.mem_dim)
        self.fh = nn.Linear(self.mem_dim, self.mem_dim)

        self.ox = nn.Linear(self.in_dim, self.mem_dim)
        self.oh = nn.Linear(self.mem_dim, self.mem_dim)

        self.ux = nn.Linear(self.in_dim, self.mem_dim)
        self.uh = nn.Linear(self.mem_dim, self.mem_dim)
项目:examples    作者:pytorch    | 项目源码 | 文件源码
def __init__(self, config):
        super(SNLIClassifier, self).__init__()
        self.config = config
        self.embed = nn.Embedding(config.n_embed, config.d_embed)
        self.projection = Linear(config.d_embed, config.d_proj)
        self.encoder = Encoder(config)
        self.dropout = nn.Dropout(p=config.dp_ratio)
        self.relu = nn.ReLU()
        seq_in_size = 2*config.d_hidden
        if self.config.birnn:
            seq_in_size *= 2
        lin_config = [seq_in_size]*2
        self.out = nn.Sequential(
            Linear(*lin_config),
            self.relu,
            self.dropout,
            Linear(*lin_config),
            self.relu,
            self.dropout,
            Linear(*lin_config),
            self.relu,
            self.dropout,
            Linear(seq_in_size, config.d_out))
项目:Structured-Self-Attentive-Sentence-Embedding    作者:ExplorerFreda    | 项目源码 | 文件源码
def __init__(self, config):
        super(BiLSTM, self).__init__()
        self.drop = nn.Dropout(config['dropout'])
        self.encoder = nn.Embedding(config['ntoken'], config['ninp'])
        self.bilstm = nn.LSTM(config['ninp'], config['nhid'], config['nlayers'], dropout=config['dropout'],
                              bidirectional=True)
        self.nlayers = config['nlayers']
        self.nhid = config['nhid']
        self.pooling = config['pooling']
        self.dictionary = config['dictionary']
#        self.init_weights()
        self.encoder.weight.data[self.dictionary.word2idx['<pad>']] = 0
        if os.path.exists(config['word-vector']):
            print('Loading word vectors from', config['word-vector'])
            vectors = torch.load(config['word-vector'])
            assert vectors[2] >= config['ninp']
            vocab = vectors[0]
            vectors = vectors[1]
            loaded_cnt = 0
            for word in self.dictionary.word2idx:
                if word not in vocab:
                    continue
                real_id = self.dictionary.word2idx[word]
                loaded_id = vocab[word]
                self.encoder.weight.data[real_id] = vectors[loaded_id][:config['ninp']]
                loaded_cnt += 1
            print('%d words from external word vectors loaded.' % loaded_cnt)

    # note: init_range constraints the value of initial weights
项目:YellowFin_Pytorch    作者:JianGoForIt    | 项目源码 | 文件源码
def __init__(self, batch_size, num_tokens, embed_size, word_gru_hidden, bidirectional= True, init_range=0.1, use_lstm=False):        

        super(AttentionWordRNN, self).__init__()

        self.batch_size = batch_size
        self.num_tokens = num_tokens
        self.embed_size = embed_size
        self.word_gru_hidden = word_gru_hidden
        self.bidirectional = bidirectional
        self.use_lstm = use_lstm       

        self.lookup = nn.Embedding(num_tokens, embed_size)
        if bidirectional == True:
            if use_lstm:
                print("inside using LSTM")
                self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= True)
            else:
                self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= True)
            self.weight_W_word = nn.Parameter(torch.Tensor(2* word_gru_hidden, 2*word_gru_hidden))
            self.bias_word = nn.Parameter(torch.Tensor(2* word_gru_hidden,1))
            self.weight_proj_word = nn.Parameter(torch.Tensor(2*word_gru_hidden, 1))
        else:
            if use_lstm:
                self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= False)
            else:
                self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= False)
            self.weight_W_word = nn.Parameter(torch.Tensor(word_gru_hidden, word_gru_hidden))
            self.bias_word = nn.Parameter(torch.Tensor(word_gru_hidden,1))
            self.weight_proj_word = nn.Parameter(torch.Tensor(word_gru_hidden, 1))

        self.softmax_word = nn.Softmax()
        self.weight_W_word.data.uniform_(-init_range, init_range)
        self.weight_proj_word.data.uniform_(-init_range, init_range)
项目:YellowFin_Pytorch    作者:JianGoForIt    | 项目源码 | 文件源码
def __init__(self, batch_size, num_tokens, embed_size, word_gru_hidden, bidirectional= True, init_range=0.1, use_lstm=False):        

        super(AttentionWordRNN, self).__init__()

        self.batch_size = batch_size
        self.num_tokens = num_tokens
        self.embed_size = embed_size
        self.word_gru_hidden = word_gru_hidden
        self.bidirectional = bidirectional
        self.use_lstm = use_lstm       

        self.lookup = nn.Embedding(num_tokens, embed_size)
        if bidirectional == True:
            if use_lstm:
                print("inside using LSTM")
                self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= True)
            else:
                self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= True)
            self.weight_W_word = nn.Parameter(torch.Tensor(2* word_gru_hidden, 2*word_gru_hidden))
            self.bias_word = nn.Parameter(torch.Tensor(2* word_gru_hidden,1))
            self.weight_proj_word = nn.Parameter(torch.Tensor(2*word_gru_hidden, 1))
        else:
            if use_lstm:
                self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= False)
            else:
                self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= False)
            self.weight_W_word = nn.Parameter(torch.Tensor(word_gru_hidden, word_gru_hidden))
            self.bias_word = nn.Parameter(torch.Tensor(word_gru_hidden,1))
            self.weight_proj_word = nn.Parameter(torch.Tensor(word_gru_hidden, 1))

        self.softmax_word = nn.Softmax()
        self.weight_W_word.data.uniform_(-init_range, init_range)
        self.weight_proj_word.data.uniform_(-init_range, init_range)
项目:YellowFin_Pytorch    作者:JianGoForIt    | 项目源码 | 文件源码
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False):
        super(RNNModel, self).__init__()
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        if rnn_type in ['LSTM', 'GRU']:
            self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
        else:
            try:
                nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
            except KeyError:
                raise ValueError( """An invalid option for `--model` was supplied,
                                 options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""")
            self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            if nhid != ninp:
                raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers
项目:twin-spelling    作者:dmitriy-serdyuk    | 项目源码 | 文件源码
def __init__(self, num_characters, dim):
        super(RNNEncoder, self).__init__()

        self.embedding = nn.Embedding(num_characters, dim)
        # TODO
        self.rnn = nn.GRU()
项目:twin-spelling    作者:dmitriy-serdyuk    | 项目源码 | 文件源码
def __init__(self, num_characters, dim):
        super(RNNDecoder, self).__init__()

        self.embedding = nn.Embedding(num_characters, dim)
        self.rnn_cell = nn.GRUCell(dim, dim)
项目:treelstm.pytorch    作者:dasguptar    | 项目源码 | 文件源码
def __init__(self, vocab_size, in_dim, mem_dim, hidden_dim, num_classes, sparsity, freeze):
        super(SimilarityTreeLSTM, self).__init__()
        self.emb = nn.Embedding(vocab_size, in_dim, padding_idx=Constants.PAD, sparse=sparsity)
        if freeze:
            self.emb.weight.requires_grad = False
        self.childsumtreelstm = ChildSumTreeLSTM(in_dim, mem_dim)
        self.similarity = Similarity(mem_dim, hidden_dim, num_classes)
项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_embedding_padding_idx(self):
        embedding = nn.Embedding(10, 20, padding_idx = 0)
        input = Variable(torch.LongTensor([[0,2,4,5],[4,3,0,9]]))
        output = embedding(input)
        self.assertEqual(output[0][0].sum().data[0], 0)
        self.assertEqual(output[1][2].sum().data[0], 0)