愤怒中的小草

博客园 首页 新随笔 联系 订阅 管理
import requests
import os

#url 请求
def getHTMLText(url):
try:
r = requests.get(url, timeout = 30)
r.raise_for_status() #如果状态不是200,引发HttpError异常
r.encoding = r.apparent_encoding
return r.text
except:
return "产生异常"

#带参数的url请求
def getHTMLParams(url, params):
try:
r = requests.get(url, params)
r.raise_for_status() #如果状态不是200,引发HttpError异常
r.encoding = r.apparent_encoding
return r.text
except:
return "产生异常"

#爬取图片 path存储文件的路径
def getPicture(url,path):
try:
if not os.path.exists(root):
os.mkdir(root)
if not os.path.exists(path):
r = requests.get(url)
with open(path, 'wb') as f:
f.write(r.content)
f.close()
print('文件保存成功')
else:
print('文件已存在')
except:
print('爬取失败')


if __name__ == "__main__":
# url = "http://www.baidu.com"
# print(getHTMLText(url))

#kv = {'wd': 'python'}
#url = "http://www.baidu.com/s"
#r = getHTMLParams(url, kv)
#print(len(r))
#爬取图片的简单样例
url = "http://pic41.nipic.com/20140508/18609517_112216473140_2.jpg"
root = "D://pics//"
path = root + url.split('/')[-1]
getPicture(url,path)
#robots协议 举例可以查看京东https://www.jd.com/robots.txt
#百度搜索关键字接口
#http://www.baidu.com/s?wd=keyword

#www.ip138.com IP归属地查询
posted on 2019-08-18 20:15  愤怒中的小草  阅读(166)  评论(0编辑  收藏  举报