python 爬虫简单案例
import urllib.request;
import re;
'''
爬取指定地址的页面内容
'''
def getHtmlCode(url):
page = urllib.request.urlopen(url)
htmlCode = page.read()
return htmlCode.decode('utf-8')
# htmlCon = getHtml("https://tieba.baidu.com/p/1753935195")
# htmlCon = htmlCon.decode('utf-8')
# pageFile = open("xh.txt", 'w')
# pageFile.write(htmlCon)
# pageFile.close()
'''
获取页面内的所有图片并下载到本地
'''
def getImg(htmlCode):
reg = r'src="(.+?\.jpg)" width'
regImg = re.compile(reg)
imgList = regImg.findall(htmlCode)
x = 0
for img in imgList:
urllib.request.urlretrieve(img, '%s.jpg' % x)
x += 1
# htmlCode = getHtmlCode("https://tieba.baidu.com/p/1753935195")
# htmlCode = htmlCode.decode('utf-8')
print(u'---------网页图片抓取------------')
print(u'请输入url:')
url = input()
if url:
pass
else:
print(u'---------没有输入地址,使用默认地址。--------')
url = "https://tieba.baidu.com/p/1753935195"
print(u'-------正在抓取网页----------')
htmlCode = getHtmlCode(url);
print(u'-------正在下载图片---------')
getImg(htmlCode);
print(u'-------下载图片完成-------')
input('Press Enter to exit')
print('hello world')
学习来源:https://www.cnblogs.com/Axi8/p/5757270.html 贴吧图片爬取

浙公网安备 33010602011771号