from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pymongo
def get_goods(driver):
#连接mongodb
client=pymongo.MongoClient('localhost',27017)
#指定数据库
db=client.jd
#指定表
collection=db.messages
number = 400
# 循环拖动鼠标滚轮,使当前页的所有商品信息加载完成
for line in range(20):
js = 'window.scrollTo(0,%s)' % number
number += 500
driver.execute_script(js)
time.sleep(0.1)
# 拿到商品信息最大的div
goods_div = driver.find_element_by_id('J_goodsList')
# 找到每个商品的item
goods_list = goods_div.find_elements_by_class_name('gl-item')
# 循环拿到的所有商品
good_item={}
for good in goods_list:
# 依次取出名字,价格,链接,图片,评论数
good_item['名字'] = good.find_element_by_css_selector('.p-name em').text.replace("\n", "")
good_item['价格'] = good.find_element_by_css_selector('.p-price').text.replace("\n", "")
good_item['链接'] = good.find_element_by_css_selector('.p-img a').get_attribute('href')
good_item['图片'] = good.find_element_by_css_selector('.p-img img').get_attribute('src')
good_item['评论数'] = good.find_element_by_css_selector('.p-commit').text.replace("\n", "")
# 将爬取的信息写入文件
# with open('jd.txt', 'a', encoding='utf-8') as f:
# f.write(goods + '\n')
#将数据插入mongodb
good=dict(good_item)
collection.insert_one(good)
# 找到当前页的“下一页”按钮
next_tag = driver.find_element_by_class_name('pn-next')
next_tag.click()
time.sleep(3)
# 递归执行当前函数
get_goods(driver)
# 获得谷歌控制器
driver = webdriver.Chrome()
try:
# 使用控制器,打开京东网站
driver.get('https://www.jd.com/')
driver.implicitly_wait(10)
# 得到文本搜索框
input_tag = driver.find_element_by_id('key')
# 写入搜索关键字
input_tag.send_keys('电脑')
# 回车
# input_tag.send_keys(Keys.ENTER)
#获取点击搜索按钮
button = driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
button.click()
get_goods(driver)
time.sleep(1000)
finally:
driver.close()