import requests
from bs4 import BeautifulSoup
def get_url(chap):
url = 'http://www.cnblogs.com/...'
return url + str(chap) + '.html'
def get_content(url, data=None):
rep = requests.get(url, timeout=120)
rep.encoding = 'utf-8'
return rep.text
def get_data(htmltext):
content = []
bs = BeautifulSoup(htmltext, "html.parser")
body = bs.body
html_data = body.find('div', {'class': 'panel-body'}).get_text()
return html_data
def mod_data(Num):
url = get_url(Num)
htmltext = get_content(url)
htmldata = get_data(htmltext)
return htmldata
if __name__ == '__main__':
for i in range(2,3):
Num = i
name = '第'+ str(Num) +'章.txt'
data_new = mod_data(Num)
with open(name,"w") as f:
f.write(data_new)