缺口验证码

# -*-coding:utf-8 -*-
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from urllib.request import urlretrieve
import time, random
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException
from PIL import ImageChops
import PIL.Image as image
import cv2
from scrapy.http import HtmlResponse


# options = Options()
# options.add_argument('--headless')
# options.add_argument('--no-sandbox')
# options.add_argument('--disable-dev-shm-usage')
# driver = webdriver.Chrome(executable_path="d:\CaiPan\Chrome\chromedriver.exe", chrome_options=options)

class Crack(object):
    def __init__(self, url):

        self.options = Options()
        # self.options.add_argument('--headless')
        # self.options.add_argument('--disable-dev-shm-usage')
        self.options.add_argument('--disable-gpu')
        self.options.add_argument("--no-sandbox")

        # self.options.add_experimental_option('excludeSwitches', ['enable-automation'])
        # self.options.add_experimental_option('debuggerAddress', '127.0.0.1:9222')
        self.options.add_argument('user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36"')
        self.options.add_argument('upgrade-insecure-requests="1"')
        self.options.add_argument('sec-fetch-user="?1"')
        self.options.add_argument('sec-fetch-site="none"')
        self.options.add_argument('sec-fetch-mode="navigate"')
        self.options.add_argument('pragma="no-cache"')

        # self.options.add_argument('cookie="xhsTrackerId=05e3b581-4bbf-4f7b-cf0c-d0aa80b4151a; ra-user-id-ares=5bfe244a9df0a90001b38b2c; Hm_lvt_d0ae755ac51e3c5ff9b1596b0c09c826=1565592524,1565593646,1565593678,1565594279; Hm_lpvt_d0ae755ac51e3c5ff9b1596b0c09c826=1565595993; Hm_lvt_b344979f0455853bf22b3ef05fa7b4ac=1566370861; Hm_lpvt_b344979f0455853bf22b3ef05fa7b4ac=1566370861; xhs_spses.6983=*; solar.beaker.session.id=6bc441db11c89bbade2ee08edcd106efe516723egAJ9cQAoWAwAAABsb2dpbkFjY291bnRxAVgLAAAAMTMxMjIyNTIzMjRxAlgEAAAAcm9sZXEDWAUAAABicmFuZHEEWAoAAABiQWNjb3VudE5vcQVYCgAAADk5NzMyNTg2NTFxBlgJAAAAbG9naW5UeXBlcQdLAVgIAAAAbmlja05hbWVxCFgOAAAAQmVsbGEncyBHYXJkZW5xCVgIAAAAc2hvcE5hbWVxCk5YCQAAAHJlc291cmNlc3ELXXEMKFgYAAAANWMzNTUzZGUxZDk3NzE1OGEzNjc0OTM0cQ1lWAYAAABhdmF0YXJxDlhaAAAAaHR0cHM6Ly9pbWcueGlhb2hvbmdzaHUuY29tL2F2YXRhci81YzM0NTU1NGMxMmZkNDAwMDExMjE3NzYuanBnQDEyMHdfMTIwaF85MnFfMWVfMWNfMXguanBncQ9YDgAAAF9hY2Nlc3NlZF90aW1lcRBHQddXnAhVcKRYBgAAAHVzZXJJZHERWBgAAAA1YmZlMjQ0YTlkZjBhOTAwMDFiMzhiMmNxElgIAAAAYXR0ckxpc3RxE05YDAAAAGJBY2NvdW50VHlwZXEUWAcAAABQUklNQVJZcRVYCwAAAGRlYWN0aXZhdGVkcRaJWAgAAABzZWxsZXJJZHEXWBgAAAA1YzM1NTNkZTFkOTc3MTU4YTM2NzQ5MzRxGFgOAAAAX2NyZWF0aW9uX3RpbWVxGUdB11ecCFVwpFgLAAAAcGVybWlzc2lvbnNxGl1xGyhlWAsAAABzZWxsZXJSb2xlc3EcTlgDAAAAX2lkcR1YIAAAADUxZDNhZWM0NzRmNDQ2MmRhNTM2NDFiNzU5Y2QzYWM2cR5YCgAAAHNlbGxlclR5cGVxH0sEWAUAAABlbWFpbHEgWCEAAAA1YmZlMjQ0YTlkZjBhOTAwMDFiMzhiMmNAeGhzLmZha2VxIXUu; xhs_spid.6983=fe43536f085a4a3f.1565594090.21.1566470205.1566270695.ba61cacc-e97c-41c6-b72f-c6ca1a6b4d9c"')
        self.options.add_argument('cache-control="no-cache"')
        self.options.add_argument('accept-language="en-US,en;q=0.9"')
        self.options.add_argument('accept-encoding="gzip, deflate, br"')
        self.options.add_argument(':scheme="https"')
        self.options.add_argument(':method="GET"')
        self.options.add_argument(':authority="www.xiaohongshu.com"')
        self.options.add_argument('accept="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"')

        self.url = url
        # self.url = 'https://www.xiaohongshu.com/web-login/captcha?redirectPath=https%3A%2F%2Finfluencer.xiaohongshu.com%2Fsolar%2Fadvertiser%2Fpatterns%2Fkol'
        self.browser = webdriver.Chrome('D:\CaiPan\Chrome\chromedriver.exe', chrome_options=self.options)
        self.wait = WebDriverWait(self.browser, 100)
        # self.keyword = keyword
        self.BORDER = 6

    def open(self):
        """
        打开浏览器,并输入查询内容
        """
        cookie1 = {'name': 'xhsTrackerId', 'value': '05e3b581-4bbf-4f7b-cf0c-d0aa80b4151a'}
        cookie2 = {'name': 'ra-user-id-ares', 'value': '5bfe244a9df0a90001b38b2c'}
        cookie3 = {'name': 'Hm_lvt_d0ae755ac51e3c5ff9b1596b0c09c826', 'value': '1565592524,1565593646,1565593678,1565594279'}
        cookie4 = {'name': 'Hm_lpvt_d0ae755ac51e3c5ff9b1596b0c09c826', 'value': '1565595993'}
        cookie5 = {'name': 'Hm_lvt_b344979f0455853bf22b3ef05fa7b4ac', 'value': '1566370861'}
        cookie6 = {'name': 'Hm_lpvt_b344979f0455853bf22b3ef05fa7b4ac', 'value': '1566370861'}
        cookie7 = {'name': 'xhs_spses.6983', 'value': '*'}
        cookie8 = {'name': 'solar.beaker.session.id', 'value': '6bc441db11c89bbade2ee08edcd106efe516723egAJ9cQAoWAwAAABsb2dpbkFjY291bnRxAVgLAAAAMTMxMjIyNTIzMjRxAlgEAAAAcm9sZXEDWAUAAABicmFuZHEEWAoAAABiQWNjb3VudE5vcQVYCgAAADk5NzMyNTg2NTFxBlgJAAAAbG9naW5UeXBlcQdLAVgIAAAAbmlja05hbWVxCFgOAAAAQmVsbGEncyBHYXJkZW5xCVgIAAAAc2hvcE5hbWVxCk5YCQAAAHJlc291cmNlc3ELXXEMKFgYAAAANWMzNTUzZGUxZDk3NzE1OGEzNjc0OTM0cQ1lWAYAAABhdmF0YXJxDlhaAAAAaHR0cHM6Ly9pbWcueGlhb2hvbmdzaHUuY29tL2F2YXRhci81YzM0NTU1NGMxMmZkNDAwMDExMjE3NzYuanBnQDEyMHdfMTIwaF85MnFfMWVfMWNfMXguanBncQ9YDgAAAF9hY2Nlc3NlZF90aW1lcRBHQddXnAhVcKRYBgAAAHVzZXJJZHERWBgAAAA1YmZlMjQ0YTlkZjBhOTAwMDFiMzhiMmNxElgIAAAAYXR0ckxpc3RxE05YDAAAAGJBY2NvdW50VHlwZXEUWAcAAABQUklNQVJZcRVYCwAAAGRlYWN0aXZhdGVkcRaJWAgAAABzZWxsZXJJZHEXWBgAAAA1YzM1NTNkZTFkOTc3MTU4YTM2NzQ5MzRxGFgOAAAAX2NyZWF0aW9uX3RpbWVxGUdB11ecCFVwpFgLAAAAcGVybWlzc2lvbnNxGl1xGyhlWAsAAABzZWxsZXJSb2xlc3EcTlgDAAAAX2lkcR1YIAAAADUxZDNhZWM0NzRmNDQ2MmRhNTM2NDFiNzU5Y2QzYWM2cR5YCgAAAHNlbGxlclR5cGVxH0sEWAUAAABlbWFpbHEgWCEAAAA1YmZlMjQ0YTlkZjBhOTAwMDFiMzhiMmNAeGhzLmZha2VxIXUu'}
        cookie9 = {'name': 'xhs_spid.6983', 'value': 'fe43536f085a4a3f.1565594090.21.1566470205.1566270695.ba61cacc-e97c-41c6-b72f-c6ca1a6b4d9c'}

        self.browser.get(self.url)
        # self.browser.add_cookie(cookie1)
        # self.browser.add_cookie(cookie2)
        # self.browser.add_cookie(cookie3)
        # self.browser.add_cookie(cookie4)
        # self.browser.add_cookie(cookie5)
        # self.browser.add_cookie(cookie6)
        # self.browser.add_cookie(cookie7)
        # self.browser.add_cookie(cookie8)
        # self.browser.add_cookie(cookie9)
        # self.browser.get(self.url)
        self.browser.implicitly_wait(30)

    def get_size(self):
        screenSize = self.browser.get_window_size()  # 返回个字典

        print(f"当前屏幕尺寸为{screenSize}")  # 当前屏幕尺寸为{'width': 1080, 'height': 2280}

        # width = screenSize['width']
        # height = screenSize['height']

    def get_images(self):
        """
        获取验证码图片
        :return: 图片的location信息
        """
        # bg = []
        # gb = []
        bg_filename = 'bg.jpg'
        fg_filename = 'fg.jpg'
        bg_location_list = []
        fg_location_list = []
        while True:
            try:
                fg = self.browser.find_element_by_class_name("shumei_captcha_loaded_img_fg")
                if fg:
                    time.sleep(2)
                    print(fg.location)
                    fg_location_list.append(fg.location)
                    print(fg.size)
                    start_x = fg.location["x"] + int(fg.size['width']) * 0.2
                    start_y = fg.location["y"] + int(fg.size['height']) * 0.5
                    end_x = fg.location['x'] + int(fg.size['width']) * 0.8
                    end_y = fg.location['y'] + int(fg.size['height']) * 0.5
                    print(start_x, start_y, end_x, end_y)
                    fg_url = fg.get_attribute("src")
                    if fg_url:
                        print(fg_url)
                        urlretrieve(url=fg_url, filename=fg_filename)
                        print('缺口图片下载完成')
                        break
            except TimeoutException:
                self.get_images()

        while True:
            try:
                bgfull = self.browser.find_element_by_class_name("shumei_captcha_loaded_img_bg")
                time.sleep(2)
                if bgfull:
                    print(bgfull.location)
                    bg_location_list.append(bgfull.location)
                    print(bgfull.size)
                    start_xx = bgfull.location["x"] + int(bgfull.size['width']) * 0.2
                    start_yy = bgfull.location["y"] + int(bgfull.size['height']) * 0.5
                    end_xx = bgfull.location['x'] + int(bgfull.size['width']) * 0.8
                    end_yy = bgfull.location['y'] + int(bgfull.size['height']) * 0.5
                    print(start_xx, start_yy, end_xx, end_yy)
                    bg_url = bgfull.get_attribute("src")
                    if bg_url:
                        print(bg_url)
                        urlretrieve(url=bg_url, filename=bg_filename)
                        print('背景图片下载完成')
                        break
            except TimeoutException:
                self.get_images()
        distance = end_xx - end_x
        print(distance)
        return distance
        # return bg_location_list, fg_location_list

    def get_gap(self, img1, img2):
        """
        获取缺口偏移量
        :param img1: 不带缺口图片
        :param img2: 带缺口图片
        :return:
        """
        left = 15
        for i in range(left, img1.size[0]):
            for j in range(img1.size[1]):
                if not self.is_pixel_equal(img1, img2, i, j):
                    left = i
                    return left
        return left

    def is_pixel_equal(self, img1, img2, x, y):
        """
        判断两个像素是否相同
        :param image1: 图片1
        :param image2: 图片2
        :param x: 位置x
        :param y: 位置y
        :return: 像素是否相同
        """
        # 取两个图片的像素点
        pix1 = img1.load()[x, y]
        pix2 = img2.load()[x, y]
        threshold = 60
        if (abs(pix1[0] - pix2[0] < threshold) and abs(pix1[1] - pix2[1] < threshold) and abs(
                pix1[2] - pix2[2] < threshold)):
            return True
        else:
            return False

    def crack(self):
        # 打开浏览器
        self.open()
        bg_filename = 'bg.jpg'
        fg_filename = 'fg.jpg'
        # 获取图片
        bg_location_list, fullbg_location_list = self.get_images()

        # 根据位置对图片进行合并还原
        bg_img = self.get_merge_image(bg_filename, bg_location_list)
        fullbg_img = self.get_merge_image(fg_filename, fullbg_location_list)

        # 获取缺口位置
        gap = self.get_gap(fullbg_img, bg_img)
        print('缺口位置', gap)

        track = self.get_track(gap - self.BORDER)
        print('滑动滑块')
        print(track)

    def get_merge_image(self, filename, location_list):
        """
        根据位置对图片进行合并还原
        :filename:图片
        :location_list:图片位置
        """
        im = image.open(filename)

        # 浏览器生成的图片规格是260px * 116px , 所以指定image.new('RGB', (260, 116))·
        new_im = image.new('RGB', (400, 200))
        im_list_upper = []
        im_list_down = []

        for location in location_list:
            if location['y'] == -100:
                im_list_upper.append(im.crop((abs(location['x']), 100, abs(location['x']) + 10, 200)))
            if location['y'] == 0:
                im_list_down.append(im.crop((abs(location['x']), 0, abs(location['x']) + 10, 100)))
        new_im = image.new('RGB', (400, 200))
        x_offset = 0
        for im in im_list_upper:
            new_im.paste(im, (x_offset, 0))
            x_offset += im.size[0]
        x_offset = 0
        for im in im_list_down:
            new_im.paste(im, (x_offset, 100))
            x_offset += im.size[0]
        new_im.save(filename)
        print(new_im)
        return new_im

    def fixed_size(self, infile, outfile, width, height):
        # infile = 'fg.jpg'
        # outfile = 'new_fg.png'

        """按照固定尺寸处理图片"""
        im = image.open(infile)
        out = im.resize((width, height), image.ANTIALIAS)
        out.save(outfile)

    def FindPic(self, target, template):
        """
        找出图像中最佳匹配位置
        :param target: 目标即背景图
        :param template: 模板即需要找到的图
        :return: 返回最佳匹配及其最差匹配和对应的坐标
        """
        target_rgb = cv2.imread(target)
        target_gray = cv2.cvtColor(target_rgb, cv2.COLOR_BGR2GRAY)
        template_rgb = cv2.imread(template, 0)
        res = cv2.matchTemplate(target_gray, template_rgb, cv2.TM_CCOEFF_NORMED)
        value = cv2.minMaxLoc(res)
        print('*****')
        print(value)
        return value[-1][0]

    def get_slider(self):
        """
        获取滑块
        :return: 滑块对象
        """
        while True:
            try:
                slider = self.browser.find_element_by_xpath("//div[@class='shumei_captcha_slide_btn']")
                # print(slider)
                break
            except:
                time.sleep(0.5)
        return slider


    def get_track(self, distance):
        """
        根据偏移量获取移动轨迹
        :param distance: 偏移量
        :return: 移动轨迹
        """
        print("=" * 10, distance)
        # 移动轨迹
        track = []
        # 当前位移
        current = 0
        # 减速阈值
        mid = distance * 4 / 5
        print(mid)
        # 计算间隔
        t = 0.2
        # 初速度
        v = 0

        while current < distance:
            if current < mid:
                # 加速度为正2
                a = 4
            else:
                # 加速度为负3
                a = -3.5
            # 初速度v0
            v0 = v
            # 当前速度v = v0 + at
            v = v0 + a * t
            # 移动距离x = v0t + 1/2 * a * t^2
            move = v0 * t + 1 / 2 * a * t * t
            # 当前位移
            current += move
            # 加入轨迹
            track.append(round(move))
        # print(track)
        return track


    def move_to_gap(self, slider, track):
        """
        拖动滑块到缺口处
        :param slider: 滑块
        :param track: 轨迹
        :return:
        """
        ActionChains(self.browser).click_and_hold(slider).perform()
        a = []
        b = track
        for x in track:
            ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()

        time.sleep(0.8)
        ActionChains(self.browser).release().perform()

    def result_html(self):
        response = HtmlResponse(url=self.browser.current_url, body=self.browser.page_source,
                                encoding='utf-8')
        if '验证失败,请重新再试' in response.text:
            c.process()
        else:
            print(response.text)
            return response

    def close(self):
        self.browser.close()

    def process(self):
        self.get_images()
        self.fixed_size('bg.jpg', '1bg.jpg', 400, 200)
        self.fixed_size('fg.jpg', '1fg.png', 60, 200)
        x = self.FindPic('1bg.jpg', '1fg.png')
        a = self.get_slider()
        r = self.get_track(x)
        self.move_to_gap(a, r)
        time.sleep(2)
        self.result_html()
        self.close()


if __name__ == '__main__':
   url = 'https://www.xiaohongshu.com/web-login/captcha?redirectPath=http%3A%2F%2Fwww.xiaohongshu.com%2Fuser%2Fprofile%2F590d4d5950c4b4281396ea20'
   c = Crack(url)
   c.open()
   c.get_size()
   for i in range(1, 2):
       c.process()

缺口验证码的验证!!

posted on 2019-11-22 10:14  零度风格  阅读(894)  评论(0编辑  收藏  举报

导航