图片爬虫学习1

单张图片的获取

import requests
import os

url="http://a.hiphotos.baidu.com/image/pic/item/aec379310a55b319e37d5eb949a98226cefc1771.jpg" root="F://pic//" path=root+url.split('/')[-1]
#用split()方法以“/”为标识符对url分割,返回一个列表对象
try: if not os.path.exists(root): os.mkdir(root) if not os.path.exists(path):
#检查文件保存的路径若不存下则创建 r
=requests.get(url) with open(path,'wb') as f: f.write(r.content) f.close() print("文件以保存") else: print("文件以存在") except: print("爬取失败")

#获取整页图片


import
requests import re import os def GetHtml(url): #得到网页下图片地址 try: html1=requests.get(url) html1.raise_for_status() html1.encoding=html1.apparent_encoding html1=html1.text pat1 = "http://.+\.jpg" results = re.compile(pat1).findall(html1) return results except: return "" def bc(url): #保存文件 root = "F://pic//" path = root + url.split('/')[-1] try: if not os.path.exists(root): os.mkdir(root) if not os.path.exists(path): r = requests.get(url) with open(path, 'wb') as f: f.write(r.content) f.close() print("文件以保存") else: print("文件以存在") except: print("爬取失败") url='http://www.tooopen.com/img/87_0_1_2.aspx' results=GetHtml(url) for result in results: bc(result)

 

爬取多页图片,了解获取整页图片后我们可以通过一个for循环来获得所有页码,过程与上述相似。

 

posted @ 2018-03-15 23:34  fdf-python  阅读(270)  评论(0)    收藏  举报