import requests
from lxml import etree
def jianli():
for s in range(2, 5):
url1 = "http://www.gerenjianli.com/biaoge/index_" + str(s) + ".html"
res = requests.get(url1)
res.encoding = 'gb2312'
text = res.text
selector = etree.HTML(text)
list = selector.xpath('//*[@class="prlist"]/li')
for i in list:
title = i.xpath('.//a/img/@alt')[0].replace("\n", '').replace(" ", '')
href1 = i.xpath('.//a/@href')[0].replace("\n", '').replace(" ", ' ')
res = requests.get(href1)
res.encoding = "gb2312"
text2 = res.text
selector2 = etree.HTML(text2)
'''
<div class="donwurl2"><a href="http://down.gerenjianli.com/bg2018/jianli_biaoge_28.doc" target="_blank">
<img src="http://www.gerenjianli.com/image/anniu.jpg" border="0"></a></div>
'''
href2 = selector2.xpath('//*[@class="donwurl2"]/a/@href')[0]
r = requests.get(href2)
r.encoding = 'gb2312'
with open('./' + str(title) + ".doc", "wb") as code:
code.write(r.content)
print(str(title) + "下载完毕!")
print("请到./下查看!")
jianli()
部分截图:
![]()