07.Cookie池设计

Cookie池设计思路:

看了IP池设计,Cookie设计也是差不多一样的原理

  1.获取Cookie的来源 (可能需要IP池作为支撑)

  2.Cookie程序内管理

  3.应用到requests

程序实现:

selenium获取Cookie来源:

import time
from concurrent.futures import ThreadPoolExecutor
from queue import Queue
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
import sys
sys.path.append("D:\Work\IPS")
from redis_cli import IPS_
from threading import Lock
ips = IPS_()
import random


class IPS_():
    def __init__(self):
        self.lock1 = Lock()
        self.IpUrls  = ['https://xm.esfxiaoqu.zhuge.com/1007323/',
                        'https://xm.esfxiaoqu.zhuge.com/1001471/',
                        'https://xm.esfxiaoqu.zhuge.com/1007892/',
                        'https://xm.esfxiaoqu.zhuge.com/1003688/',
                        'https://xm.esfxiaoqu.zhuge.com/1001693/'
                    ]
        self.queue_ip = Queue()
        self.threadPoll = ThreadPoolExecutor(max_workers=8)

    def get_ip(self):
        self.lock1.acquire()
        ip = ips.one()
        self.lock1.release()
        return ip

    def thread_PullIP(self):
        # 两个子线程去访问付费ip网址,主线程继续往下执行。
        for i in range(20):
            self.threadPoll.submit(self.pullIP)
            # self.pullIP()

    def pullIP(self):
        ip = self.get_ip()
        n = 0
        while True:
            try:
                url = random.choice(self.IpUrls)
                options = webdriver.ChromeOptions()
                options.add_experimental_option('excludeSwitches', ['enable-automation'])
                options.add_argument('--headless')
                options.add_argument("--disable-blink-features=AutomationControlled")

                # 不加载图片
                prefs = {
                        # 不加载imgs
                        'profile.managed_default_content_settings.images': 2,
                         # 不加载弹窗
                         'profile.default_content_setting_values': {
                             'notifications': 2
                         }
                         }
                options.add_experimental_option('prefs', prefs)

                # 添加代理和头部
                options.add_argument(('--proxy-server=http://' + ip))
                options.add_argument(
                    'User-Agent={}'.format(
                        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.81 Safari/537.36'))

                # 设置路径
                driver = webdriver.Chrome(options=options,executable_path='D:\zhoukai_workspace\WebDriver\chromedriver.exe')
                driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
                    "source": """
                                                                Object.defineProperty(navigator, 'webdriver', {
                                                                  get: () => undefined
                                                                })
                                                              """
                })
                driver.implicitly_wait(5)
                driver.set_page_load_timeout(20)
                driver.set_script_timeout(20)
                driver.get(url)
                time.sleep(1)
                if n > 15:
                    driver.quit()
                    return '', ip
                elif 'arg1=' in driver.page_source:
                    n += 1
                    ip = self.get_ip()
                    driver.quit()
                else:
                    try:
                        acw_tc = driver.get_cookie(name='acw_tc')['value']
                        acw_sc__v2 = driver.get_cookie(name='acw_sc__v2')['value']
                        coo = 'acw_tc={0}; acw_sc__v2={1}'.format(acw_tc, acw_sc__v2)
                        time1 = time.time()
                        cookie = '{}+{}--{}'.format(coo,ip,time1)
                        print(cookie)
                        driver.quit()
                        with open('D:\JR\jr\ZKGIT\ZhuGeZhaoFang\Cookie_pool\cookie.txt', 'a') as f:
                            f.write(cookie)
                            f.write('\n')
                    except:
                        n += 1
                        ip = self.get_ip()
                        driver.quit()
            except TimeoutException as ex:
                driver.quit()
                n += 1
                ip = self.get_ip()
                print('关闭drive界面')
            except Exception as ex:
                driver.quit()
                print(ex)
                n += 1
                ip = self.get_ip()
                print('关闭drive界面')

    def delete_cookie(self):
        while True:
            datas = []
            with open('cookie.txt', 'r') as f:
                for line in f.readlines():
                    line = line.strip('\n')  # 去掉列表中每一个元素的换行符
                    datas.append(line)

            with open('cookie.txt', 'w') as f:
                for data in datas:
                    try:
                        local_time = float(data.split('--')[-1])
                        if int(float(time.time()) - local_time) > 300:
                            print('{} --- 过期'.format(data))
                            continue
                        f.write(data)
                        f.write('\n')
                    except:
                        pass

    def run(self):

        self.thread_PullIP()

        # self.delete_cookie()

if __name__ == '__main__':

    IPS_().run()

 

posted @ 2022-03-04 15:22  锋芒毕露的蜘蛛  阅读(131)  评论(0)    收藏  举报