图片爬虫学习1

单张图片的获取

import requests
import os

url="http://a.hiphotos.baidu.com/image/pic/item/aec379310a55b319e37d5eb949a98226cefc1771.jpg"
root="F://pic//"
path=root+url.split('/')[-1] 
 #用split()方法以“/”为标识符对url分割，返回一个列表对象
try:
     if not os.path.exists(root):    
          os.mkdir(root)
     if not os.path.exists(path):    
          #检查文件保存的路径若不存下则创建
          r=requests.get(url)        
          with open(path,'wb') as f:
               f.write(r.content)
               f.close()
               print("文件以保存")
     else:
          print("文件以存在")
except:
     print("爬取失败")


#获取整页图片


import requests
import re
import os


def GetHtml(url): #得到网页下图片地址
    try:
        html1=requests.get(url)
        html1.raise_for_status()
        html1.encoding=html1.apparent_encoding
        html1=html1.text

        pat1 = "http://.+\.jpg"
        results = re.compile(pat1).findall(html1)


        return results
    except:
        return ""


def bc(url):  #保存文件
    root = "F://pic//"
    path = root + url.split('/')[-1]
    try:
        if not os.path.exists(root):
            os.mkdir(root)
        if not os.path.exists(path):
            r = requests.get(url)
            with open(path, 'wb') as f:
                f.write(r.content)
                f.close()
                print("文件以保存")
        else:
            print("文件以存在")
    except:
        print("爬取失败")


url='http://www.tooopen.com/img/87_0_1_2.aspx'
results=GetHtml(url)
for result in results:
    bc(result)

爬取多页图片，了解获取整页图片后我们可以通过一个for循环来获得所有页码，过程与上述相似。

posted @ 2018-03-15 23:34 fdf-python 阅读(270) 评论(0) 收藏举报

刷新页面返回顶部

fdf-python

图片爬虫学习1

公告