python 爬取页面数据,生成词云和网络图

 

简单的爬取页面数据,并生成词云和网络图

需要安卓  networkx wordcloud 包

代码如下

 

 1 # @Author  :whyCai
 2 # @Time    :2020/10/17 10:35
 3 from time import sleep
 4 
 5 import requests,json,jieba,wordcloud,networkx as nx,jieba.posseg as jp
 6 from matplotlib import pyplot as plt
 7 
 8 """
 9 #爬取页面的数据
10 def getCommText():
11     '''
12     爬取页面的数据
13     :return: 
14     '''
15     text = ''
16     url = 'https://xxxxxxx'
17     headers = {'content-type': 'application/json'}
18 
19     for i in range(0,300):
20         data = {"pageIndex": i+1,"xxxx":1}
21         r = requests.post(url, data=json.dumps(data), headers=headers)
22         res = json.loads(r.text)
23         #获取接口的字段值
24         resContent = res['result']['items']
25         lenComm = len(resContent)
26         # 获取接口的字段值
27         for j in range(0,lenComm):
28             # text = text + resContent[j]['content']+' '
29             print(resContent[j]['content'])
30         sleep(0.2)
31     # print(text)
32     # return text
33 getCommText()
34 """
35 
36 """
37 #生成词云
38 
39 #读取数据
40 f = open('xxx.txt',encoding='utf-8')
41 text = f.read()
42 txtlist = jieba.lcut(text)
43 txtlist = " ".join(txtlist)
44 w = wordcloud.WordCloud(width=1000,height=700,background_color='white',font_path='msyh.ttc')
45 w.generate(txtlist)
46 #生成词云
47 w.to_file('output2-poem.png')
48 """
49 
50 
51 """
52 #生成网络图
53 
54 #text 为 上面词云中的 text = f.read()
55 words = jp.lcut(text)
56 G = nx.MultiDiGraph()  
57 # 添加节点
58 for word in words:
59     G.add_node(word.flag)
60 # 添加边
61 for i in range(len(words) - 1):
62     G.add_edge(words[i].flag, words[i+1].flag)
63 # 绘图
64 nx.draw(G, alpha=0.8, with_labels=True, node_color='lightgreen', font_size=36, node_size=999, width=2)
65 # 展示
66 plt.show()
67 """
View Code

 

 

参考博客:

词云:https://www.cnblogs.com/wkfvawl/p/11585986.html

网络图:https://blog.csdn.net/your_answer/article/details/79189660

posted @ 2020-10-17 14:41  菜小鱼~  阅读(857)  评论(0编辑  收藏  举报