word2vec.py

word2vec.py

 

import torch
import torch.nn.functional as F
import numpy as np


class SkipGram(torch.nn.Module):
    def __init__(self,vocab_size,embedding_size):
        super().__init__()
        self.vocab_size=vocab_size
        self.embedding_size=embedding_size
        self.hidden=torch.nn.Linear(self.vocab_size,self.embedding_size)
        self.predict=torch.nn.Linear(self.embedding_size,self.vocab_size)

        #self.w1=torch.
        #self.w2=torch.

    def forward(self,X):
        hidden=self.hidden(X)
        predict=F.softmax(self.predict(hidden))
        return predict

def data_iter(batch_size):
    sentance='The quick fox jumps over the lazy dog'
    words=sentance.split()
    w_len=len(words)
    word2id={words[i]:i for i in range(w_len)}
    one_hot=np.eye(len(words))

    context_size=1
    pairs=[]
    for i in range(w_len):
        prior=words[i-context_size:i]
        behind = words[i+1:i+1+context_size]
        context=prior+behind
        pairs.extend([(one_hot[i],word2id[c]) for c in context])
    
    idx=0
    while idx<len(pairs):
        yield pairs[idx:idx+1+batch_size]
     idx+=batch_size
def test(): net=SkipGram(8,3) optimizer = torch.optim.SGD(net.parameters(), lr=0.2) loss_fun=torch.nn.CrossEntropyLoss() losses=[] for i in range(100): count=0 for x,y in data_iter(2): x,y=torch.FloatTensor(x),torch.FloatTensor(y) pred=net(x) loss=loss_fun(y,pred) losses.append(loss.item()) optimizer.zero_grad() loss.backward() if count%10==0: print('loss:%.4f'%(sum(losses)/len(losses))) optimizer.step() count+=1 if __name__=='__main__': #for x,context in data_iter(): # print('%s:%s'%(x,context)) test()

 

posted @ 2020-06-28 19:37  morein2008  阅读(141)  评论(0)    收藏  举报