爬虫基础-登陆抽屉

登陆抽屉,并点赞所有新闻

 

import requests
from bs4 import BeautifulSoup
#先访问抽屉新热榜,获取cookie(未授权)

ret = requests.get(
    url='https://dig.chouti.com/all/hot/recent/1',
    headers={
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
    }
)
r1_cookie_dict = ret.cookies.get_dict()

#伪造浏览器登陆,并且发送用户名和密码认证+ cookie(未认证)
response_login = requests.post(
    url='https://dig.chouti.com/login',
    data={
        'phone':'8613121758648',
        'password':'woshiniba',
        'oneMonth':1
    },
    headers={
        'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
    },
    cookies=r1_cookie_dict
)
# cookie_dict = response_login.cookies.get_dict()   #以字典形式获取cookies

# print(response.text)
#将抽屉页面所有新闻点赞
for page_num in range(1,11):
    response_index = requests.get(
        url='https://dig.chouti.com/all/hot/recent/%d' %page_num,
        headers={
            'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
        }
    )
    soup = BeautifulSoup(response_index.text,'html.parser')
    div = soup.find(attrs={'id':'content-list'})
    
    items = div.find_all(attrs={'class':'item'})
    
    for item in items:
        tag = item.find(name='div',attrs={'class':'part2'})
        if not tag:
            continue
        nid = tag.get('share-linkid')
        print(nid)
    
    #根据每一个新闻ID点赞
        r1 = requests.post(
            url='https://dig.chouti.com/link/vote?linksId=%s' %nid,
            headers={
                'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
            },
            cookies=r1_cookie_dict
        )
        print(r1.text)

 

随堂笔记

requests.post(

  url='https://....',

  data={key:value},

  headers={key:value},

  cookies={}

)

cookie_dict = response.cookies.get_dict()

 

注意:

  --伪造浏览器

  --请求分析

    1,请求最初的cookie

    2,使用最初的cookie登陆

 

posted @ 2018-07-04 23:43  风度翩翩睿  阅读(139)  评论(0)    收藏  举报