requests简单爬虫项目实战

requests实战之搜索引擎爬取搜索内容

import requests
#指定url
url='https://www.sogou.com/web'
kw=input('enter a word: ')
header={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/'
}
param={
    'query':kw
}
#发起请求
#UA伪装
response=requests.get(url=url,params=param,headers=header)
#获取相应数据
content=response.text
fileName=kw+'.html'
#将数据保存在本地
with open(fileName,'w',encoding='utf-8') as fp:
    fp.write(content)
print(fileName,'爬取结束!!!')

requests实战之破解百度翻译

import json
import requests
url='https://fanyi.baidu.com/sug'
word=input('请输入想翻译的词语或句子:')
data={
    'kw':word
}
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
}
reponse=requests.post(url=url,data=data,headers=headers)
dic_obj=reponse.json()
print(dic_obj)
filename=word+'.json'
with open(filename,'w',encoding='utf-8') as fp:
    json.dump(dic_obj,fp=fp,ensure_ascii=False)

print('爬取结束!!!')

requests实战之爬取豆瓣电影榜单

import json
import requests
url='https://movie.douban.com/j/chart/top_list?'
params={
    'type': '11',
    'interval_id': '100:90',
    'action': '',
    'start': '0',
    'limit': '20',
}
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
}
reponse=requests.get(url=url,params=params,headers=headers)
dic_obj=reponse.json()
print(dic_obj)
with open('douban.json','w',encoding='utf-8') as fp:
    json.dump(dic_obj,fp=fp,ensure_ascii=False)

print('爬取结束!!!')

requests实战之爬取肯德基门店地址

import json
import requests
url='http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
location=input('请输入你想查询的地点:')
data={
    'cname':'',
    'pid':'',
    'keyword': location,
    'pageIndex': '1',
    'pageSize': '10',
}
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
}
reponse=requests.post(url=url,data=data,headers=headers)
content=reponse.text
with open(location+'.html','w',encoding='utf-8') as fp:
    fp.write(content)

print('爬取结束!!!')

requests实战之药监总局相关数据

import json
import requests
url='http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList'
id_list = []
all_information=[]
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
}
for page in range(1,6):
    page=str(page)
    data={
        'on': 'true',
        'page': page,
        'pageSize': '15',
        'productName': '',
        'conditionType': '1',
        'applyname': '',
        'applysn': '',
    }
    response = requests.post(url=url, data=data, headers=headers).json()
    for dic in response['list']:
        id_list.append(dic['ID'])
print('爬取商家id结束')
url1='http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsById'
for id in id_list:
    data = {
        'id': id
    }
    reponse=requests.post(url=url1,data=data,headers=headers).json()
    all_information.append(reponse)
with open('information.json','w',encoding='utf-8') as fp:
    json.dump(all_information,fp=fp,ensure_ascii='utf-8')
print('爬取商家具体信息结束!!!')

参考路飞学社视频

posted @ 2020-09-30 17:41  _Hsiung  阅读(200)  评论(0编辑  收藏  举报