1 #导包 2 import requests 3 from bs4 import BeautifulSoup 4 5 6 7 #指定url 8 url = 'http://www.shicimingju.com/book/sanguoyanyi.html' 9 10 #发起请求 11 headers = { 12 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' 13 } 14 15 #此方法用来获取具体章节的具体内容 16 def get_content(url): 17 #发起请求 18 response = requests.get(url=url,headers=headers) 19 #获取页面数据 20 page_text = response.text 21 22 #创建BeatufulSoup对象 23 soup = BeautifulSoup(page_text,'lxml') 24 content = soup.find('div',class_="chapter_content").text 25 return content 26 27 28 29 30 31 response = requests.get(url=url,headers=headers) 32 33 #获取页面数据 34 page_text = response.text 35 36 #数据分析 37 #创建BeatufulSoup对象 38 soup = BeautifulSoup(page_text,'lxml') 39 contents = soup.select('.book-mulu > ul > li > a') 40 41 #持久化存储 42 fp=open('./三国演义.txt','w',encoding='utf-8') 43 num = 1 44 for content in contents: 45 content_url='http://www.shicimingju.com'+content['href'] 46 title = content.string 47 print("开始下载第%d章:%s"%(num,title)) 48 content_detail = get_content(content_url) 49 fp.write(title+':'+content_detail +"\n\n\n") 50 print("开始下载第%d章下载完毕"%num) 51 num+=1 52 53 54 print("全部数据写入完毕")
注:此代码仅供学习参阅
要有最最遥远的梦想
和最最朴素的生活
即使明日天寒地冻
路遥马亡
浙公网安备 33010602011771号