爬取京东商品信息
爬取京东商品信息
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
import time
option = ChromeOptions()
option.add_argument('disable-infobars')
driver = webdriver.Chrome(chrome_options=option)
def get_goods(driver):
number = 400
for line in range(20):
js = '''
window.scrollTo(0, %s)
''' % number
number += 500
driver.execute_script(js)
time.sleep(0.2)
# 查找所有商品的父标签
good_div = driver.find_element_by_id('J_goodsList')
# 获取所有商品的标签
good_list = good_div.find_elements_by_class_name('gl-item')
for good in good_list:
'''
商品信息:
名称
价格
链接
图片
评价人数
'''
# 商品名称
good_name = good.find_element_by_css_selector('.p-name em').text.replace('\n', '')
# 商品价格
good_price = good.find_element_by_css_selector('.p-price').text.replace('\n', '')
# 商品链接
good_link = good.find_element_by_css_selector('.p-img a').get_attribute('href')
# 商品图片
good_img = good.find_element_by_css_selector('.p-img img').get_attribute('src')
# 评价人数
good_commit = good.find_element_by_css_selector('.p-commit').text.replace('\n', ' ')
goods = '''
商品名称: %s
商品价格: %s
商品链接: %s
商品图片: %s
评价人数: %s
''' % (good_name, good_price, good_link, good_img, good_commit)
print(goods)
with open('京东女士内衣数据爬去.txt', 'a', encoding='utf-8') as f:
f.write(goods + '\n')
next_tag = driver.find_element_by_class_name('pn-next')
next_tag.click()
time.sleep(3)
# 递归执行get_goods函数
get_goods(driver)
try:
driver.get('https://www.jd.com/')
driver.implicitly_wait(10)
input_tag = driver.find_element_by_id('key')
input_tag.send_keys('女士内衣')
search_button = driver.find_element_by_class_name('button')
search_button.click()
get_goods(driver)
time.sleep(1000)
finally:
driver.close()

浙公网安备 33010602011771号