黑板课Python爬虫第三关(两种解法)
解法一:
1 from selenium import webdriver 2 username = 'freetime' 3 password = '123456' 4 5 browser = webdriver.Firefox() 6 browser.get('http://www.heibanke.com/accounts/login') 7 browser.find_element_by_id('id_username').send_keys(username) 8 browser.find_element_by_id('id_password').send_keys(password) 9 browser.find_element_by_id('id_submit').click() 10 11 pwd = 0 12 while pwd<=30: 13 browser.get('http://www.heibanke.com/lesson/crawler_ex02/') 14 browser.find_element_by_name('username').send_keys('123') 15 browser.find_element_by_id('id_password').send_keys(pwd) 16 browser.find_element_by_id('id_submit').click() 17 18 html = browser.page_source 19 if u'输入的密码错误' in html: 20 pwd = pwd+1 21 continue 22 else: 23 print pwd 24 break
解法二:
1 #coding=utf-8 2 import requests 3 from lxml import etree 4 5 # Django中CSRF防护源码解析: 6 # Django的CSRF防御方法采用的是在请求中添加toke并验证方法,源码分析如下: 如果有form提交时, 7 # 验证csrftoken,django在打开这个页面时就会将csrftoken存在cookie中, 8 # 当用户提交表单时进行匹配,如果相同则证明是一个安全提交。 csrftoken的生成方式如下: 9 # 10 # md5_constructor("%s%s" % (randrange(0, _MAX_CSRF_KEY), settings.SECRET_KEY)).hexdigest() 11 # 设置csrftoken: 12 # response.set_cookie(settings.CSRF_COOKIE_NAME, request.META["CSRF_COOKIE"], max_age = 60 * 60 * 24 * 7 * 52, 13 # domain=settings.CSRF_COOKIE_DOMAIN) 14 # 15 # csrftoken与cookie中的token比较: 16 # if not constant_time_compare(request_csrf_token, csrf_token): 17 # 如果不一致返回403错误 注意:表单内必须加入csrf_token的tag,否则站内提交也会被阻止,除非添加@csrf_exempt装饰器 18 19 # 如果是CSRF的token,先确认每次HTTP请求能存储Cookie,接着GET页面,然后用正则提取token,POST时将键值加上就行。 20 # 21 # 比如: 22 # <input type='hidden' name='csrfmiddlewaretoken' value='*************************' /> 23 # 24 # 随便写个正则:随便写个正则:/name='csrf_token' value='([0-9a-z]+)'/ 25 # 然后POST时加上csrf_token='f072a9912fc217e09b03b206a5ad0fdb'。 26 27 # requests库的session对象能够帮我们跨请求保持某些参数,也会在同一个session实例发出的所有请求之间保持cookies 28 29 30 31 def request(s,url,payload): 32 r = s.get(url) 33 34 html = r.content.decode('utf-8') 35 page = etree.HTML(html) 36 csrf_token = page.xpath('/html/body/div/div/div[2]/form/input/@value')[0] 37 payload['csrfmiddlewaretoken'] = csrf_token 38 html = s.post(url, data = payload).content.decode('utf-8') 39 return html 40 #登陆 41 login_url = 'http://www.heibanke.com/accounts/login' 42 ex02_url = 'http://www.heibanke.com/lesson/crawler_ex02/' 43 44 45 46 #登陆表单 47 payload_login = { 48 'username':'test', 49 'password':'test123' 50 } 51 #ex02表单 52 payload_ex02 = { 53 'username':'123', 54 'password':0 55 } 56 57 58 #用session,保持会话 59 s = requests.session() 60 61 #先进入登陆页面 62 request(s,login_url,payload=payload_login) 63 #再猜测密码 64 pwd = 0 65 while pwd<=30: 66 payload_ex02['password'] = pwd 67 html = request(s, ex02_url, payload_ex02) 68 if u'输入的密码错误' in html: 69 pwd = pwd+1 70 continue 71 else: 72 print pwd 73 break
浙公网安备 33010602011771号