进程示例

import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import time
from multiprocessing import Process
def node():
    #获取节点数据
    global_id=[]
    order_number=[]

    with open("./entity_csv/yinni_kg_intopieces.csv",encoding="utf-8") as f:
        lines=f.readlines()
        for line in lines:
            line=line.replace("\n","")
            intopiece_list=line.split(",")
            intopiece_list=[x.replace('"',"") for x in intopiece_list ]
            global_id.append(intopiece_list[0])
            order_number.append(intopiece_list[2])            
        df_node=pd.DataFrame({
            "global_id":global_id,
            "order_number":order_number,

        })

        print(df_node.shape[0],df_node.head())
        return df_node
def edge(df_node,column):
    #获取边数据
    start_id=[]
    end_id=[]
    file_path="./relations_csv/yinni_kg_intopieces_link_by_"+column+".csv"
    with open(file_path,encoding="utf-8") as f:
        lines=f.readlines()
        for line in lines:
            line=line.replace("\n","")
            intopiece_list=line.split(",")
            intopiece_list=[x.replace('"',"") for x in intopiece_list ]
            start_id.append(intopiece_list[0])
            end_id.append(intopiece_list[2])            
        df_edge_0=pd.DataFrame({
            "start_id":start_id,
            "end_id":end_id,

        })

    #     print(df_edge_0.shape[0],df_edge_0.head())
        df_edge_1=df_edge_0.merge(df_node,how="left",left_on="start_id",right_on="global_id")
        df_edge_1.rename(columns={"order_number":"start_order_number"},inplace=True)

        df_edge_2=df_edge_1.merge(df_node,how="left",left_on="end_id",right_on="global_id")
        df_edge_2.rename(columns={"order_number":"end_order_number"},inplace=True)
        #删除同样类型的多条边
        df_edge_2.drop_duplicates(inplace=True)        
        df_edge=df_edge_2[['start_order_number', 'end_order_number']]
        print(df_edge.shape[0],df_edge.head())
        file_path=column+".txt"
        df_edge.to_csv(file_path,encoding="utf-8",index=False,sep="|")
def main():
    df_node=node() 
    ps=[]
 
    #创建子进程实例
    labelList =["bank","link_phone","person_address"]
    for i in range(3):
        p=Process(target=edge,args=(df_node,labelList[i]))
        ps.append(p)
    #开启进程
    for i in range(3):
        ps[i].start()
 
    #阻塞进程
    for i in range(3):
        ps[i].join()
 
if __name__=="__main__":
    start=time.time()
    main()   
    end=time.time()
    print(end-start)  #2 58.53214192390442  3 56.898120164871216

 

posted on 2020-05-26 17:15  happygril3  阅读(156)  评论(0)    收藏  举报

导航