用cookie登录慕课网络教学中心刷评论

声明:本文仅供学习参考

我们学校和的网络教学平台是在慕课网上的,需要登录到慕课网的教学平台以后,拿到cookie

 

 

注意:没次提交后需要休眠,否则刷评论过快会被系统发现

如果请求太快,很容易被系统发现(输入验证码)所以没用多线程

对于cookie的获取采取手动方式,也可以尝试从浏览器获取cookie,cookie需要转化为字典形式

chrome浏览器cookie的位置在:“C:\Users\Garbos\AppData\Local\Google\Chrome\User Data\Default\Cookies”,需要修改路径中的系统用户名

 

 1 # coding   :utf-8
 2 # @Time    : 2017/7/30 16:42
 3 # @Author  : Jingxiao Fu
 4 # @File    : headers_cookies.py
 5 import random
 6 import os
 7 import subprocess
 8 import sqlite3
 9 import win32crypt
10 import sys
11 
12 header_str = '''Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50
13 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)
14 Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)'''
15 
16 
17 def get_headers():
18     header = header_str.split('\n')
19     header_length = len(header)
20     headers = {
21         'Connection': 'keep-alive',
22         'user-agent': header[random.randint(0, header_length - 1)],
23         'Content-Type': 'application/x-www-form-urlencoded',
24         'Accept-Language': 'zh-CN,zh;q=0.8'
25     }
26     return headers
27 
28 
29 def get_cookie_use_hand():
30     '''手动复制cookie'''
31     cookie = ""
32 
33     cookie = cookie.replace('Cookie:', '')
34     cookie = cookie.replace(' ', '')
35     cookies = cookie.split(';')
36     for i in range(len(cookies)):
37         cookies[i] = cookies[i].replace('=', ':', 1)
38     cookies_dict = {}
39     for header in cookies:
40         L = header.split(':', 1)
41         cookies_dict[L[0]] = L[1]
42     return cookies_dict
43 
44 
45 '''从chrome浏览器获取cookie'''
46 
47 
48 def get_cookie_from_chrome():
49     host_url = 'mooc1.xynu.edu.cn'
50     cookie_file_path = r"C:\Users\Garbos\AppData\Local\Google\Chrome\User Data\Default\Cookies"
51 
52     sql_query = "select host_key, name, encrypted_value ,value from cookies WHERE host_key='%s'" % host_url
53     with sqlite3.connect(cookie_file_path) as con:
54         cu = con.cursor()
55         cu.execute(sql_query)
56         cookies_sql = {name: win32crypt.CryptUnprotectData(encrypted_value)[1].decode() for
57                        host_key, name, encrypted_value, value in cu.execute(sql_query).fetchall()}
58 
59     return cookies_sql

手动模拟几遍,发现关键的步骤,就可以实现

 1 # coding   :utf-8
 2 # @Time    : 2017/8/7 9:19
 3 # @Author  : Yong-life
 4 # @File    : CrawlingNetStudyRoom.py
 5 
 6 import re
 7 import time
 8 import requests
 9 from headers_cookies import get_headers
10 from headers_cookies import get_cookie_from_chrome
11 
12 
13 def get_response(url):
14     '''封装http请求'''
15     cookie = get_cookie_from_chrome()
16     response = requests.get(url, headers=get_headers(), cookies=cookie)
17     return response.text
18 
19 
20 def crawling_classid(html):
21     '''爬取课程列表,因为可能有些可能放在了文件夹中,需要递归遍历'''
22     classid_list = []
23     pattern_classid = 'httpsClass" >[\s]*<a[\s]*href=\'(.*?)\'  target="_blank"   >'
24     classid_list.extend(re.findall(pattern_classid, html))
25     file_list = []
26     pattern_file = r'class="Mcon1img">[\s]*<input type="hidden" value="(.*?)" />'
27     file_list.extend(re.findall(pattern_file, html))
28     for file_id in file_list:
29         file_url = 'http://mooc1.xynu.edu.cn/visit/courses/study?isAjax=true&fileId=' + file_id
30         classid_list.extend(crawling_classid(get_response(file_url)))
31     return classid_list
32 
33 
34 def send_msg(course_id, clazz_id, count):
35     '''发送请求,刷评论,可以修改data中的str(i)'''
36     url = 'http://mooc1.xynu.edu.cn/bbscircle/grouptopic/publish'
37     cookie = get_cookie_from_chrome()
38     for i in range(1, count+1):
39         data = (
40         'courseId=' + course_id + '&clazzid=' + clazz_id + '&title=' + str(i) + '&content=&type=4&files=').encode(
41             'utf-8')
42         print('send: ' + course_id + ' ' + str(i))
43         response = requests.post(url, data, cookies=cookie, headers=get_headers())
44         time.sleep(3)
45 
46 
47 def manager_crawling():
48     url = 'http://mooc1.xynu.edu.cn/visit/courses?template=1&s=7c6b45e1ac1448adf0ff862c75c8ab3f'
49     html = get_response(url)
50     print('已进入学习空间...')
51     MAX_COUNT = 10
52     class_url_list = crawling_classid(html)
53     print('爬取学习空间课程列表...')
54     print('开始发送数据...')
55     for i, url in zip(range(len(class_url_list)), class_url_list):
56         pattern_class = r'courseId=(.*?)&clazzid=(.*?)&enc'
57         regx = re.search(pattern_class, url)
58         course_id, clazz_id = regx.group(1), regx.group(2)
59         send_msg(course_id, clazz_id, MAX_COUNT)
60 
61     print('执行完毕!')
62 
63 if __name__ == '__main__':
64     manager_crawling()

效果:

 少年,还在为学校规定的评论数发愁吗?

posted @ 2017-08-07 10:58  朤尧  阅读(1315)  评论(0)    收藏  举报