Dataset 数据集处理

无向图->重新排序->保存为无向图边表

有向图->重新排序->保存为有向图边表

import networkx as nx
def undirect_renumber_and_save(input_file, output_file):
    G = nx.Graph()
    # 读取原始txt文件,构建图
    with open(input_file, 'r') as f:
        for line in f:
            source, target = line.strip().split('\t')
            G.add_edge(int(source), int(target))
    H = nx.convert_node_labels_to_integers(G, first_label=0, ordering='default', label_attribute=None)
    # 将重新编号后的图保存为新的txt文件
    with open(output_file, 'w') as new_f:
        for edge in H.edges():
            new_source, new_target = edge
            new_line = f"{new_source}\t{new_target}\n"
            new_f.write(new_line)
            new_line = f"{new_target}\t{new_source}\n" #无向图的target->source也要写入文件
            new_f.write(new_line)

def direct_renumber_and_save(input_file, output_file):
    G = nx.DiGraph()
    # 读取原始txt文件,构建图
    with open(input_file, 'r') as f:
        for line in f:
            source, target = line.strip().split('\t')
            G.add_edge(int(source), int(target))
    H = nx.convert_node_labels_to_integers(G, first_label=0, ordering='default', label_attribute=None)
    # 将重新编号后的图保存为新的txt文件
    with open(output_file, 'w') as new_f:
        for edge in H.edges():
            new_source, new_target = edge
            new_line = f"{new_source}\t{new_target}\n"
            new_f.write(new_line)

undirect_renumber_and_save('HepTh.txt', 'HepTh_re.txt')
direct_renumber_and_save('wiki-Vote.txt', 'wiki-Vote_re.txt')
direct_renumber_and_save('p2p-Gnutella05.txt', 'p2p-Gnutella05_re.txt')

读入一个CSV->重新排序->导出txt

import pandas as pd
# 读入一个无向图的csv,重新排序后导出为txt
input_file = 'lastfm.csv'
output_file = 'lastfm_re.txt'
G_type = 'undirect'
df = pd.read_csv(input_file, header=None, names = ['source', 'target'])
# 创建一个无向图
if G_type == 'undirect':
    G = nx.Graph()
elif G_type == 'direct':
    G = nx.DiGraph()
# 从DataFrame添加边
for _, row in df.iterrows():
    G.add_edge(row['source'], row['target'])
H = nx.convert_node_labels_to_integers(G, first_label=0, ordering='default', label_attribute=None)
with open(output_file, 'w') as new_f:
    for edge in H.edges():
        new_source, new_target = edge
        new_line = f"{new_source}\t{new_target}\n"
        new_f.write(new_line)
        new_line = f"{new_target}\t{new_source}\n" #无向图的target->source也要写入文件
        new_f.write(new_line)
posted @ 2023-08-19 19:58  X1OO  阅读(38)  评论(0)    收藏  举报