数据描述
- Directed graph (each unordered pair of nodes is saved once): web-Google.txt
- Webgraph from the Google programming contest, 2002
- Nodes: 875713 Edges: 5105039
- FromNodeId ToNodeId
代码
import numpy as np
P = 0.8
N = 875713
BIG_N = 930000
# N = 4
# BIG_N = 8
base = (1 / N) * 0.2
init_value = -1
allNodeList = [init_value] * 930000 # value[i] = -1: i is not a valid nodeId; >=0 i is nodeId
nodeTable = [[] for i in range(BIG_N)]
my_old_row = np.zeros(BIG_N)
my_new_row = np.zeros(BIG_N)
my_future_row = np.zeros(BIG_N)
id2rank = dict()
# pre process
def preprocess():
for i in range(BIG_N):
nodeTable[i].append(-1)
for i in range(BIG_N):
nodeTable[i].append(0)
with open("web-Google.txt", "r") as f:
# with open("web_test.txt", "r") as f:
count = 0
for line in f: # iter each line
# count = count + 1
# if count == 50:
# break
sourceNodeID, targetNodeID = line.split()
sourceNodeID = int(sourceNodeID)
targetNodeID = int(targetNodeID)
allNodeList[sourceNodeID] = sourceNodeID
allNodeList[targetNodeID] = targetNodeID
if nodeTable[sourceNodeID][0] == -1:
nodeTable[sourceNodeID][0] = sourceNodeID
nodeTable[sourceNodeID][1] = nodeTable[sourceNodeID][1] + 1
nodeTable[sourceNodeID].append(targetNodeID)
else:
nodeTable[sourceNodeID][1] = nodeTable[sourceNodeID][1] + 1
nodeTable[sourceNodeID].append(targetNodeID)
for i in range(BIG_N):
if allNodeList[i] >= 0:
# my_new_row[i] = 0 no random
# my_new_row[i] = base
my_old_row[i] = 1 / N
if __name__ == '__main__':
preprocess()
# for i in range(10):
# print(nodeTable[i])
# print(nodeTable)
for m_iter in range(10):
S = 0
for i in range(BIG_N):
if allNodeList[i] >= 0:
for j in nodeTable[i][2:]:
my_new_row[j] = my_new_row[j] + P * my_old_row[i] / nodeTable[i][1]
S = my_new_row.sum()
print("S: ", S)
# print(my_new_row)
for k in range(BIG_N):
if allNodeList[k] >= 0:
my_future_row[k] = my_new_row[k] + (1 - S) / N
for k in range(BIG_N):
if allNodeList[k] >= 0:
my_old_row[k] = my_future_row[k]
my_new_row[k] = 0
print("range ", m_iter, "finish")
for k in range(BIG_N):
if allNodeList[k] >= 0:
id2rank[k] = my_old_row[k]
count = 0
items = id2rank.items()
final_result = sorted(items, key=lambda kv: (kv[1], kv[0]))
for key, value in reversed(final_result):
print(key, " ", value)
count += 1
if count == 100:
break
# result = np.sort(my_old_row)
# print(result)
# print(result.sum())
# for i in range(92900, 93000):
# print(result[i])