def catch_data(url,FileName):
ip = commonMethod.getIP()
userAgent = commonMethod.get_userAgent()
driver = commonMethod.get_driver(ip, userAgent, False)
try:
driver.get(url)
time.sleep(5)
for i in range(1,12):
wid = 500 * i + 500
js = "var q=document.documentElement.scrollTop=" + str(wid)
driver.execute_script(js)
time.sleep(2)
selenium_html = driver.execute_script("return document.documentElement.outerHTML")
doc = pq(selenium_html)
spans = doc("div[class='ml-wrap']").find("div[id='J_goodsList']").find("ul[class='gl-warp clearfix']").find("li[class^='gl-item']")
data_list = []
headList = ['大图链接', '价格', '商品名称', '评价数', '店铺名称']
for span in spans.items():
list1=[]
picture_url = 'https:'+span.find("div[class='gl-i-wrap']").find("div[class='p-img']").find("a").find("img").attr('src')
list1.append(picture_url)
price = span.find("div[class='gl-i-wrap']").find("div[class='p-price']").text()
list1.append(price)
name = span.find("div[class='gl-i-wrap']").find("div[class='p-name p-name-type-3']").find("a").find("em").text()
list1.append(name)
comment_count = span.find("div[class='gl-i-wrap']").find("div[class='p-commit']").find("strong").find("a").text()
list1.append(comment_count)
store = span.find("div[class='gl-i-wrap']").find("div[class='p-shop']").find("span[class='J_im_icon']").find("a").attr('title')
list1.append(store)
print(picture_url,price,name,comment_count,store)
data_list.append(list1)
wbk = xlwt.Workbook()
sheet1 = wbk.add_sheet('sheet', cell_overwrite_ok=True)
rowIndex = 0
commonMethod.WriteSheetRow(sheet1, headList, rowIndex, True)
for lst in data_list:
rowIndex += 1
commonMethod.WriteSheetRow(sheet1, lst, rowIndex, False)
wbk.save(FileName)
time.sleep(1)
except Exception as ex:
print(ex)
def catch_category1(FileName,url):
ip = commonMethod.getIP()
userAgent = commonMethod.get_userAgent()
driver = commonMethod.get_driver(ip,userAgent, False)
try:
# url = 'https://www.jd.com/'
driver.get(url)
time.sleep(10)
elements = driver.find_elements_by_xpath('//div[@class="fs_col1"]/div[@id="J_cate"]/ul[@class="JS_navCtn cate_menu"]/li[@class="cate_menu_item"]')
category_one_list =[]
for element in elements:
print(element.text)
txt = str(element.text).replace(' / ','/')
category_one_list.append(txt)
ActionChains(driver).move_to_element(element).perform()
time.sleep(1)
selenium_html = driver.execute_script("return document.documentElement.outerHTML")
doc = pq(selenium_html)
# spans = doc("div[class='fs_col1']").find("div[id='J_cate']").find("div[id='J_popCtn']").find("div[class='cate_part clearfix']").find(
# "div[class='cate_part_col1']").find("div[class='cate_detail']").find("dl[class^='cate_detail_item cate_detail_item']")
spans = doc("div[class='fs_col1']").find("div[id='J_cate']").find("div[id='J_popCtn']").find("div[class='cate_part clearfix']")
category_two = ''
category_two_link = ''
# .find("div[class='mc']").find("div[class='items']").find("dl[class='clearfix']")
headList = ['序号', '一级分类', '二级分类', '三级分类', '三级分类链接']
data_list = []
index =0
count=1
for span in spans.items():
category_one = category_one_list[index]
index += 1
subSpans = span.find(
"div[class='cate_part_col1']").find("div[class='cate_detail']").find("dl[class^='cate_detail_item cate_detail_item']")
for item in subSpans.items():
category_two = item.find("dt[class='cate_detail_tit']").find("a[class='cate_detail_tit_lk']").text()
category_two_link = item.find("dt[class='cate_detail_tit']").find("a[class='cate_detail_tit_lk']").attr(
'href')
sub_spans = item.find("dd[class='cate_detail_con']").find("a[class='cate_detail_con_lk']")
for sub_span in sub_spans.items():
col_list = []
ccategory_three = sub_span.text()
category_three_link = 'https:' + sub_span.attr('href')
print(category_one,category_two, ccategory_three, category_three_link)
col_list.append(count)
count += 1
col_list.append(category_one)
col_list.append(category_two)
col_list.append(ccategory_three)
col_list.append(category_three_link)
data_list.append(col_list)
wbk = xlwt.Workbook()
sheet1 = wbk.add_sheet('sheet', cell_overwrite_ok=True)
rowIndex = 0
commonMethod.WriteSheetRow(sheet1, headList, rowIndex, True)
for lst in data_list:
rowIndex += 1
commonMethod.WriteSheetRow(sheet1, lst, rowIndex, False)
wbk.save(FileName)
time.sleep(1)
except Exception as ex:
print(ex)