#coding:utf-8
import requests,os
from bs4 import BeautifulSoup
class downloader():
def __init__(self):
self.urls = [] # 保存章节链接
self.name = [] # 保存章节名
def Response(self):
response = requests.get(url)
response.encoding = 'gbk' # 解决乱码
self.soup = BeautifulSoup(response.text, 'lxml') # 解析网页
div = self.soup.find_all('div', class_='listmain') # 在解析结果中查找class_='listmain'
soup1 = BeautifulSoup(str(div), 'lxml') # 删除字符串头和尾的空格
h = soup1.find_all('a') # 在class_='listmain下面找到a标签
for i in h:
self.name.append(i.string) # 将a标签中的非属性字符,即章节名添加到name
self.urls.append('https://www.biqugex.com%s' % i.get('href')) # 将a标签中的链接,添加到urls
return url
def file(self):
"""查找小说名字,并创建同名文件夹"""
div1 = self.soup.select('body > div.book > div.info > h2')
a = BeautifulSoup(str(div1), 'lxml')
b = a.find('h2')
b = b.string
c = 'C:\\Users\\Administrator\\Desktop\\%s' % b
if not os.path.exists(c):
os.mkdir(c)
# 循环解析urls,得到小说正文
i = 0
while i < len(self.urls):
response1 = requests.get(url=self.urls[i])
response1.encoding = 'gbk'
soup2 = BeautifulSoup(response1.text, 'lxml')
d = soup2.find_all('div', id='content')
id1 = BeautifulSoup(str(d), 'lxml')
# 创建文件名
src = self.name[i] + '.txt'
filename = c + '/' + src
print(filename)
# 将解析到的小说正文写到文件中
for result in id1:
res = result.text
id2 = soup2.select('#content')
with open(filename, 'w+', encoding='utf-8') as f:
f.write(res)
i += 1
#如果输入的网址不是正确的网址,则提示请输入正确的笔趣阁网址
def Try(self):
try:
url ='https://www.biqugex.com/book_104027/'
b=downloader()
b.Response()
b.file()
except:
print('请输入正确的笔趣阁网址')
if __name__ == '__main__':
url=input('请输入网址:')
# url='https://www.biqugexcom/book_104027/'
a = downloader()
a.Try()