爬淘宝

from selenium import webdriver
import time, re
import csv
import tkinter as tk

window = tk.Tk()
window.title('淘宝店铺采集')  # 标题
window.geometry('600x500')  # 窗口尺寸


def request(name):
    url = "https://www.taobao.com/"
    browser.get(url)
    browser.find_element_by_xpath('//*[@id="q"]').send_keys(name)
    time.sleep(5)
    browser.find_element_by_xpath('//*[@id="J_TSearchForm"]/div[1]/button').click()
    browser.implicitly_wait(10)  # 隐式等待
    browser.maximize_window()  # 窗口最大化

    # 解决登陆
    browser.find_element_by_xpath('//*[@id="login"]/div[1]/i').click()
    time.sleep(20)
    page = browser.find_element_by_xpath('//*[@id="mainsrp-pager"]/div/div/div/div[1]').text
    count = re.search('\d+', page).group()
    print(count)
    return int(count)


# 获取想要信息
def get_data():
    shop_list = browser.find_elements_by_xpath('//div[@class="items"]/div')
    for div in shop_list:
        shop_link = div.find_element_by_xpath('.//div[@class="row row-3 g-clearfix"]/div[1]/a').get_attribute('href')
        shop_name = div.find_element_by_xpath('.//div[@class="row row-3 g-clearfix"]/div[1]/a').text
        with open('data.csv', 'a', newline="") as filecsv:
            csvwriter = csv.writer(filecsv, delimiter=',')
            csvwriter.writerow([shop_name, shop_link])
        print(shop_link, shop_name)


def main():
    with open('data.csv', 'w', newline="") as filecsv:
        csvwriter = csv.writer(filecsv, delimiter=',')
        csvwriter.writerow(['店铺名称', '店铺链接'])
    name = input("请输入关键字:").strip()
    page = request(name)
    get_data()
    page_num = 1
    while page_num != page:
        browser.get(f'https://s.taobao.com/search?q={name}&s={page_num * 44}')
        browser.implicitly_wait(10)
        get_data()
        page_num += 1



# 工作
def work():
    global browser
    browser = webdriver.Chrome()
    main()
    browser.quit()
    print()
    print()
    print()
    print("抓取完成!")


bt1 = tk.Button(window, text='开始运行', width=15, height=2, command=work)
bt1.place(x=250, y=200)

window.mainloop()  # 显示

 

posted @ 2020-08-29 16:00  凯帅  阅读(89)  评论(0)    收藏  举报