爬虫大作业

 1 f=open("C:/Users/Administrator/PycharmProjects/test/test.txt",'w+',encoding='utf8')
 2 import jieba
 3 import requests
 4 from bs4 import BeautifulSoup
 5  
 6  
 7 def songlist(url):
 8     res = requests.get(url)
 9     res.encoding = 'UTF-8'
10     soup = BeautifulSoup(res.text, 'html.parser')
11     songname=soup.select('.song')
12     for i in songname[1:]:
13         url=i.select('a')[0].attrs['href']
14         songread(url)
15  
16  
17 def songread(url):
18     f=open("C:/Users/Administrator/PycharmProjects/test/test.txt",'w+',encoding='utf8')
19     res = requests.get(url)
20     res.encoding = 'UTF-8'
21     soup = BeautifulSoup(res.text, 'html.parser')
22     song=soup.select('.lrcItem')
23     for i in song:
24  
25         f.write(i.text)
26  
27  
28  
29 songlist('http://www.qq.com')
30 f=open("C:/Users/Administrator/PycharmProjects/test/test.txt",'r',encoding='utf8')
31 str=f.read()
32 f.close()
33  
34 wordList=jieba.cut(str)
35 wordList=list(jieba.cut(str))
36  
37 wordDic = {}
38 for i in set(wordList):
39     wordDic[i] = wordList.count(i)
40  
41 sort_word = sorted(wordDic.items(), key=lambda d: d[1], reverse=True)
42 for i in range(60):
43     print(sort_word[i])
44  
45  
46 fo=open("C:/Users/Administrator/PycharmProjects/test/test1.txt",'w',encoding='utf8')
47 for i in range(60):
48     fo.write(sort_word[i][0] +'\n')
49  
50 fo.close()

将高频词汇放入test.txt以后打开 http://www.picdata.cn/ 用网上词云生成图片

  

 

posted on 2018-04-30 19:20  185程嘉明  阅读(114)  评论(0编辑  收藏  举报