# DGL学习(五): DGL构建异质图

import numpy as np
import dgl
import scipy.sparse as sp
import networkx as nx

ratings = dgl.heterograph(
{('user', '+1', 'movie') : (np.array([0, 0, 1]), np.array([0, 1, 0])),
('user', '-1', 'movie') : (np.array([2]), np.array([1]))})

## 从稀疏矩阵构造图
plus1 = sp.coo_matrix(([1, 1, 1], ([0, 0, 1], [0, 1, 0])), shape=(3, 2))
minus1 = sp.coo_matrix(([1], ([2], [1])), shape=(3, 2))
ratings = dgl.heterograph(
{('user', '+1', 'movie') : plus1,
('user', '-1', 'movie') : minus1})

## 从networkX构造图
plus1 = nx.DiGraph()
plus1.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0)
plus1.add_nodes_from(['m0', 'm1'], bipartite=1)
plus1.add_edges_from([('u0', 'm0'), ('u0', 'm1'), ('u1', 'm0')])
ratings = dgl.heterograph(
{('user', '+1', 'movie') : plus1,
('user', '-1', 'movie') : minus1})

import scipy.io
import urllib.request

data_url = 'https://data.dgl.ai/dataset/ACM.mat'
data_file_path = '/tmp/ACM.mat'

urllib.request.urlretrieve(data_url, data_file_path)
data = scipy.io.loadmat(data_file_path)
print(list(data.keys()))
['__header__', '__version__', '__globals__', 'TvsP', 'PvsA', 'PvsV', 'AvsF', 'VvsC', 'PvsL', 'PvsC', 'A', 'C', 'F', 'L', 'P', 'T', 'V', 'PvsT', 'CNormPvsA', 'RNormPvsA', 'CNormPvsC', 'RNormPvsC', 'CNormPvsT', 'RNormPvsT', 'CNormPvsV', 'RNormPvsV', 'CNormVvsC', 'RNormVvsC', 'CNormAvsF', 'RNormAvsF', 'CNormPvsL', 'RNormPvsL', 'stopwords', 'nPvsT', 'nT', 'CNormnPvsT', 'RNormnPvsT', 'nnPvsT', 'nnT', 'CNormnnPvsT', 'RNormnnPvsT', 'PvsP', 'CNormPvsP', 'RNormPvsP']

A代表作者， P代表论文， C代表会议，L是主题代码; 边存储为键XvsY下的SciPy稀疏矩阵，其中X和Y可以是任何节点类型代码。

print(type(data['PvsA']))
print('#Papers:', data['PvsA'].shape[0])
print('#Authors:', data['PvsA'].shape[1])
print('#Links:', data['PvsA'].nnz)

pa_g = dgl.heterograph({('paper', 'written-by', 'author') : data['PvsA']})
# equivalent (shorter) API for creating heterograph with two node types:
pa_g = dgl.bipartite(data['PvsA'], 'paper', 'written-by', 'author')

print('Node types:', pa_g.ntypes)
print('Edge types:', pa_g.etypes)
print('Canonical edge types:', pa_g.canonical_etypes)

# 节点和边都是从零开始的整数ID，每种类型都有其自己的计数。要区分不同类型的节点和边缘，需要指定类型名称作为参数。
print(pa_g.number_of_nodes('paper'))

# 如果规范边类型名称是唯一可区分的，则可以将其简化为边类型名称。
print(pa_g.number_of_edges(('paper', 'written-by', 'author')))
print(pa_g.number_of_edges('written-by'))
## 获得论文#1 的作者
print(pa_g.successors(1, etype='written-by')) 
Node types: ['paper', 'author']
Edge types: ['written-by']
Canonical edge types: [('paper', 'written-by', 'author')]
12499
37055
37055
tensor([3532, 6421, 8516, 8560])

Metagraph

## Metagraph（或网络模式）是异质图结构的一个概览。 被用作异质图的模板，它描述了网络中存在多少种对象以及可能存在的链接。
print(G.metagraph)
Graph(num_nodes={'author': 17431, 'paper': 12499, 'subject': 73},
num_edges={('paper', 'written-by', 'author'): 37055, ('author', 'writing', 'paper'): 37055, ('paper', 'citing', 'paper'): 30789, ('paper', 'cited', 'paper'): 30789, ('paper', 'is-about', 'subject'): 12499, ('subject', 'has', 'paper'): 12499},
metagraph=[('author', 'paper'), ('paper', 'author'), ('paper', 'paper'), ('paper', 'paper'), ('paper', 'subject'), ('subject', 'paper')])

import dgl.function as fn

class HeteroRGCNLayer(nn.Module):
def __init__(self, in_size, out_size, etypes):
super(HeteroRGCNLayer, self).__init__()
# W_r for each relation
self.weight = nn.ModuleDict({
name : nn.Linear(in_size, out_size) for name in etypes
})

def forward(self, G, feat_dict):
# The input is a dictionary of node features for each type
funcs = {}
for srctype, etype, dsttype in G.canonical_etypes:
# 计算每一类etype的 W_r * h
Wh = self.weight[etype](feat_dict[srctype])
# Save it in graph for message passing
G.nodes[srctype].data['Wh_%s' % etype] = Wh
# 消息函数 copy_u: 将源节点的特征聚合到'm'中; reduce函数: 将'm'求均值赋值给 'h'
funcs[etype] = (fn.copy_u('Wh_%s' % etype, 'm'), fn.mean('m', 'h'))
# Trigger message passing of multiple types.
# The first argument is the message passing functions for each relation.
# The second one is the type wise reducer, could be "sum", "max",
# "min", "mean", "stack"
G.multi_update_all(funcs, 'sum')
# return the updated node feature dictionary
return {ntype : G.nodes[ntype].data['h'] for ntype in G.ntypes}
class HeteroRGCN(nn.Module):
def __init__(self, G, in_size, hidden_size, out_size):
super(HeteroRGCN, self).__init__()
# Use trainable node embeddings as featureless inputs.
embed_dict = {ntype : nn.Parameter(torch.Tensor(G.number_of_nodes(ntype), in_size))
for ntype in G.ntypes}
for key, embed in embed_dict.items():
nn.init.xavier_uniform_(embed)
self.embed = nn.ParameterDict(embed_dict)
# create layers
self.layer1 = HeteroRGCNLayer(in_size, hidden_size, G.etypes)
self.layer2 = HeteroRGCNLayer(hidden_size, out_size, G.etypes)

def forward(self, G):
h_dict = self.layer1(G, self.embed)
h_dict = {k : F.leaky_relu(h) for k, h in h_dict.items()}
h_dict = self.layer2(G, h_dict)
# get paper logits
return h_dict['paper']

### Train and evaluate

model = HeteroRGCN(G, 10, 10, 3)

opt = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

best_val_acc = 0
best_test_acc = 0

for epoch in range(100):
logits = model(G)
# The loss is computed only for labeled nodes.
loss = F.cross_entropy(logits[train_idx], labels[train_idx])

pred = logits.argmax(1)
train_acc = (pred[train_idx] == labels[train_idx]).float().mean()
val_acc = (pred[val_idx] == labels[val_idx]).float().mean()
test_acc = (pred[test_idx] == labels[test_idx]).float().mean()

if best_val_acc < val_acc:
best_val_acc = val_acc
best_test_acc = test_acc

opt.zero_grad()
loss.backward()
opt.step()

if epoch % 5 == 0:
print('Loss %.4f, Train Acc %.4f, Val Acc %.4f (Best %.4f), Test Acc %.4f (Best %.4f)' % (
loss.item(),
train_acc.item(),
val_acc.item(),
best_val_acc.item(),
test_acc.item(),
best_test_acc.item(),
))

posted @ 2020-07-23 15:29  樱花庄的龙之介大人  阅读(370)  评论(2编辑  收藏