import scrapy
from scrapy.http.cookies import CookieJar
from scrapy.http import Request
from urllib.parse import urlencode
class ChoutiSpider(scrapy.Spider):
name = 'chouti'
allowed_domains = ['chouti.com']
start_urls = ['http://chouti.com/']
# start_urls = ['http://127.0.0.1:80/app01/login/']
cookie_dict = {}
def parse(self, response):
'''
第一次访问抽屉返回的内容: response
:param response:
:return:
'''
# 去响应头中获取cookie,cookie保存在cookie_jar对象
cookie_jar = CookieJar()
cookie_jar.extract_cookies(response, response.request)
# 去对象中将cookie解析到字典
for k, v in cookie_jar._cookies.items():
for i, j in v.items():
for m, n in j.items():
self.cookie_dict[m] = n.value
# 去登入抽屉网
yield Request(
url='https://dig.chouti.com/login',
method='POST',
body='phone=18669195713&password=123456',
# body=urlencode({'phone':'18669195713', 'password':'123456'})
# urlencode会把字典解析成上面&连接的格式
cookies=self.cookie_dict,
headers={
'User-Agent': '', # 这个也可以在配置文件里面加上,就不用每次在这加了
'Content-Type': '',
},
callback=self.check_login
)
def check_login(self, response):
'''
登入成功后再去访问首页
:param response:
:return:
'''
print(response.text) # 只是检测有没有登入成功
yield Request(
url='https://dig.chouti.com/all/hot/recent/1',
cookies=self.cookie_dict,
callback=self.index,
)
def index(self, response):
'''
进入首页后再去拿到所有新闻的ID去点赞
:param response:
:return:
'''
news_list = response.xpath('')
for new in news_list:
link_id = new.xpath('').extract_first()
yield Request(
url='https://dig.chouti.com/link/vote_linksId=%s' % (link_id),
method='POST',
cookies=self.cookie_dict,
callback=self.check_result,
)
def check_result(self, response):
# 点赞成功返回的信息
print(response.text)