Python抓取网易云粉丝 第一种 【三】

import scrapy#一次性写入
import time
import random

import math

from Crypto.Cipher import AES

import codecs

import base64

import requests

from lxml import etree

import json


class niub:
    def __init__(self):

        self.key = '0CoJUm6Qyw8W8jud'

        self.f = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'

        self.e = '010001'

        self.singer_id = '1411492497'

        self.post_url1 = 'https://music.163.com/weapi/user/getfolloweds?csrf_token='

        self.post_url2 = 'https://music.163.com/weapi/v1/play/record?csrf_token='

    # 生成16个随机字符

    def _generate_random_strs(self, length):


        string = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"

        # 控制次数参数i

        i = 0

        # 初始化随机字符串

        random_strs = ""

        while i < length:
            e = random.random() * len(string)

            # 向下取整

            e = math.floor(e)

            random_strs = random_strs + list(string)[e]

            i = i + 1

        return random_strs

    # AES加密

    def _AESencrypt(self, msg, key):

        # 如果不是16的倍数则进行填充(paddiing)

        padding = 16 - len(msg) % 16

        msg = msg + padding * chr(padding)

        # 用来加密或者解密的初始向量(必须是16位)

        iv = '0102030405060708'

        Cipher = AES.new(key.encode('utf-8'), AES.MODE_CBC, iv.encode('utf-8'))

        # 加密后得到的是bytes类型的数据

        encryptedbytes = Cipher.encrypt(msg.encode('utf8'))

        # 使用Base64进行编码,返回byte字符串

        encodestrs = base64.b64encode(encryptedbytes)

        # 对byte字符串按utf-8进行解码

        enctext = encodestrs.decode('utf-8')

        return enctext

    # RSA加密

    def _RSAencrypt(self, randomstrs, key, f):

        # 随机字符串逆序排列

        string = randomstrs[::-1]

        # 将随机字符串转换成byte类型数据

        text = bytes(string, 'utf-8')

        seckey = int(codecs.encode(text, encoding='hex'), 16) ** int(key, 16) % int(f, 16)

        return format(seckey, 'x').zfill(256)

    def _get_params1(self, page):

        offset = (page - 1) * 20

        msg = '{"userId": "1411492497", "offset":' + str(
            offset) + ', "total": "false", "limit": "20", "csrf_token": ""}'

        enctext = self._AESencrypt(msg, self.key)

        # 生成长度为16的随机字符串

        i = self._generate_random_strs(16)

        # 两次AES加密之后得到params的值

        encText = self._AESencrypt(enctext, i)

        # RSA加密之后得到encSecKey的值

        encSecKey = self._RSAencrypt(i, self.e, self.f)

        return encText, encSecKey

    def start_requests(self):
        for i in range(1,2):
            params, encSecKey = self._get_params1(i)
            formdata = {

                'params': params, 'encSecKey': encSecKey

            }
            yield formdata
            return formdata



start = time.time()
msg = '{"userId": "1411492497", "offset":"1", "total": "false", "limit": "20", "csrf_token": ""}'
key = '0CoJUm6Qyw8W8jud'
# print(x.start_requests())
x = niub()
heji = []
heji1 = []


for i in range(1, 4):
    data1 = x._get_params1(i)
    heji.append(data1[0])
    heji1.append(data1[1])



from bs4 import BeautifulSoup
def lianjie(g):
    headers = {'Host': 'music.163.com',
               'Connection': 'keep-alive',
               'Content-Length': '476',
               'Pragma': 'no-cache',
               'Cache-Control': 'no-cache',
               'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',
               'Content-Type': 'application/x-www-form-urlencoded',
               'Accept': '*/*',
               'Origin': 'https://music.163.com',
               'Sec-Fetch-Site': 'same-origin',
               'Sec-Fetch-Mode': 'cors',
               'Sec-Fetch-Dest': 'empty',
               'Referer': 'https://music.163.com/user/fans?id=1411492497',
               'Accept-Encoding': 'gzip, deflate, br',
               'Accept-Language': 'zh-CN,zh;q=0.9',
               # 'Cookie': '_iuqxldmzr_=32; _ntes_nnid=008eb89f93bb80b8c5abbbfeb29cf783,1601351876041; _ntes_nuid=008eb89f93bb80b8c5abbbfeb29cf783; NMTID=00OjXQcC8_wl8Qc5Eyzj_hKZKF_GlUAAAF02AJ4-A; WM_NI=Nz8nT1vsX8DoejbrC5yMqBrqv70bOcl%2Fe9pgZSO9wSff8VZdQamhdi38Tu5LOB4kn7SaIJfCij4ENk3o9AkK0xpJ9ALg8jqb0bfyIAprddlPL1%2FzcgWpVXoiyEbZoNNKdHQ%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6eea2e47b9bbaaf93b6508d9a8ab7d44b828b9aafb546a2909db9b3489c949eaff22af0fea7c3b92a8ea6f88fd73bb6ad98d9c765a7b6ae93c23fbb93bfa3f17bb6e90083ce69b0f1abdaf36d9a8d81daf659a6baf8b7d97093bfa389e763f1e89c96b8488d9efc8ced3d91bf87abe549abb6fca6cd61f1b1f7a2ea41f4a8c0abbc4b90b9f88bf46af4beaa9acc4bb4aeac85ef5c90efe19be26d95b09ab9ee63bb9baebbee468e95aca9d437e2a3; WM_TID=5UHlf7z1yZ1FQBVRREY%2FJaeDTwOCfBMs; JSESSIONID-WYYY=khAdF6WsaT8Vl%2FBmeuUxNUJzXuSo9AuMAkkyWuiGbGlShWwbk%5CW3flpBsDz0ZTNpKPz8PcvsO%2FYH8jX9F07a5ACh0KqO5O0nAoEJO5W%2FR8yfJSJdCm95FQaQxo7QQzQ%2FfJpypzjeXQI8RO3opWeXr1x7z1GUBQQ2sn4P5sEWeDNkPoSO%3A1601382242186'

               }


    url = 'https://music.163.com/weapi/user/getfolloweds?csrf_token='
    # payload = {"params": "9XF7hjblVik3GtlIL+qnj89FSxZniqBCgNQp/MC8sl2Bnh0+UBdiNELWLWE4sH5ewDJBmdgfCJsOGtAV+DHgaBYQ70n4zPJHXxzxh7ZogOe7+UIXMS+Zi9R4W2Yhz9aZ1g8Wl6cz5Fzw70F4thj+tL6YcLRd4ISi1TBFsVikMhb41eFj0PMkAjNH8HysS3B0", "encSecKey": "735f184a826789473c8123979b51608ac82c2be7142ebaa300c8b1787c34155336c6dce64bc9eeb74a66fc9ecb51e20e7e03e354943cfea3ebedec850964cac70740af2c7f5a2f5ac2c4c46071063bcd3b7059722be2d866248bd9915d9b6cea870b06e0d2bc0ceda65ddf6b8de4bded25c6bf2012fe71080ab8d36c962612fa"}
    data = {

        'params':heji[g],
        'encSecKey':heji1[g],
    }
    # 与 get 请求一样,r 为响应对象
    r = requests.post(url=url, data=data, headers=headers, verify=False)
    # 查看响应结果
    t = r.json()

    return r

if __name__ == '__main__':

    for g in range(3):
        for user in json.loads(lianjie(g).text)['followeds']:
            mingzi = user['nickname']
            guanzhu = user['follows']
            fensi = user['followeds']
            dongtai = user['eventCount']
            shuju = '名字' + ':' + str(mingzi) + ';' + '动态' + ':' + str(dongtai) + ';' + '关注' + ':' + str(
                guanzhu) + ';' + '粉丝' + ':' + str(fensi) + '\n'
            with open('鱿小鱼.txt', 'a+', encoding='utf-8') as q:
                q.write(shuju)
                end = time.time()
                q.close()
            print(end-start)

这是第一种方法抓取粉丝,最好把User-Agen换掉。这种不能一次性抓取20页以上,容易卡死。

posted @ 2020-10-02 16:16  凹凸曼大人  阅读(383)  评论(0)    收藏  举报