selenium爬虫京东商品信息
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
try:
bro = webdriver.Chrome(executable_path='chromedriver_mac64')
bro.get('https://www.jd.com/')
bro.implicitly_wait(10)
bro.maximize_window()
search_btn = bro.find_element(By.ID, value='key')
search_btn.send_keys('茅台')
search_btn.send_keys(Keys.ENTER)
1.获取到搜索框 输入 茅台 并 按下回车
time.sleep(2)
bro.execute_script('scrollTo(0,5000)')
2.将网页下滑到5000px位置 这样可以加载出更多的商品
time.sleep(2)
goods_list = bro.find_elements(By.CLASS_NAME, value='gl-item')
3.拿到所有的商品标签列表
for goods in goods_list:
try:
img = goods.find_element(By.CSS_SELECTOR, value='.p-img img').get_attribute('src')
4.获取每一个商品的图片地址,前端有懒加载为加载的我们可以通过其他属性获取
if not img:
img = 'https://' + goods.find_element(By.CSS_SELECTOR, value='.p-img img').get_attribute(
'data-lazy-img')
url = goods.find_element(By.CSS_SELECTOR, value='.p-img a').get_attribute('href')
price = goods.find_element(By.CSS_SELECTOR, value='.p-price i').text
name = goods.find_element(By.CSS_SELECTOR, value='.p-name em').text 5.获取商品名称,发现在.p-name类属性标签下的 em 标签 内的所有文本信息
shop = goods.find_element(By.CSS_SELECTOR, value='.p-shop a').text
print('''
商品名:%s,
图片地址:%s,
价格:%s,
店铺:%s,
'''%(name,img,price,shop))
except Exception:
continue
except Exception as e:
print(e)