爬虫类_selenium获相关信息
一、所需库
selenium、xlrd、xlwt
二、代码
1 #!/usr/bin/env python 2 # -*- coding:utf-8 -*- 3 # Author:Fore Wang 4 from selenium import webdriver 5 import time 6 import re 7 import xlrd 8 import xlwt 9 10 # 打开文件并读取 11 path = r"单编号样表.xls" 12 x1 = xlrd.open_workbook(path) 13 14 # 打开 sheet1 15 sh1 = x1.sheet_by_name("Sheet1") 16 person_num = sh1.nrows - 1 17 18 original_li = [] 19 for i in range(1, 349): 20 hk_num = sh1.cell_value(i, 1) # 还款编号 21 if hk_num is None: 22 break 23 original_li.append(hk_num) 24 aa = len(original_li) 25 print(aa, original_li) 26 27 28 # 登陆客户系统==============================================================1 29 driver = webdriver.Chrome() 30 driver.get("http://hsr.huashenghaoche.com/hshcmdm/login") 31 driver.implicitly_wait(3) 32 33 # 输入用户名、密码,登陆 34 driver.find_element_by_id("loginId").send_keys("账户名称") 35 # time.sleep(1) 36 driver.find_element_by_id("password").send_keys("账户密码") 37 # time.sleep(1) 38 driver.find_element_by_xpath("/html/body/div/div/div/form/input").click() 39 driver.implicitly_wait(5) # 隐式等待 40 # 主界面句柄 41 main_handle = driver.current_window_handle 42 43 info_writr_li = [] 44 45 46 # 订单综合查询页面===============================================================2 47 URL = "http://collection.huashenghaoche.com/hshc-collection-web/commonSearch/gotoPage" 48 js = 'window.open("%s");' % URL 49 driver.execute_script(js) # js脚本方式打开新窗口 50 driver.implicitly_wait(5) 51 handles = driver.window_handles 52 # 切换至新窗口--总览界面 53 driver.switch_to.window(handles[1]) 54 55 un_hk_list =[] 56 temp_count = 0 # 执行次数 57 for i in original_li: 58 temp_count += 1 59 print("第%d次获取数据,剩下%d次》》》》》》》"% (temp_count, (aa - temp_count))) 60 time.sleep(2) 61 driver.find_element_by_id('settleCode').send_keys(i) 62 driver.find_element_by_id("search").click() 63 time.sleep(1) 64 driver.find_element_by_id("reset").click() 65 66 # 获取信息查询界面 67 time.sleep(2) 68 try: 69 check_hk = driver.find_element_by_xpath('//*[@id="commonSearchTable"]/tbody/tr/td[9]/a').\ 70 get_attribute('textContent') 71 except: 72 un_hk_list.append(i) 73 continue 74 URL2 = driver.find_element_by_xpath('//*[@id="commonSearchTable"]/tbody/tr/td[9]/a').get_attribute('href') 75 # print(check_hk, URL2) 76 77 # 进入信息查询界面================================================================================3 78 js1 = 'window.open("%s");' % URL2 79 driver.execute_script(js1) 80 driver.implicitly_wait(5) 81 handles = driver.window_handles 82 # 切换至新窗口--总览界面 83 driver.switch_to.window(handles[2]) 84 time.sleep(4) 85 86 # 获取客户本人信息 87 # 客户姓名 88 customer_name = driver.find_element_by_id('baseInfo_name').get_attribute("textContent") 89 # 门店 90 shop_name = driver.find_element_by_id('baseInfo_shopName').get_attribute('textContent') 91 # 车型 92 customer_car = driver.find_element_by_id('baseInfo_modelName').get_attribute("textContent") 93 # 手机号码 94 customer_phone = driver.find_element_by_id('phoneInfo_cellPhone').get_attribute("textContent") 95 96 # 本人信息 =========================================!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 97 person_self_info = [customer_name, customer_phone, shop_name, customer_car] 98 # print(person_self_info) 99 driver.implicitly_wait(5) 100 101 # 客户基本信息界面 102 driver.find_element_by_xpath('//*[@id="taskDetailTabs"]/li[2]/a').click() 103 time.sleep(2) 104 link = driver.find_element_by_id('phoneBookTable') 105 link_info = link.get_attribute('textContent') 106 107 # re提取联系人信息 108 li = link_info.split("联系人类型") 109 con_info_list = [] 110 111 for j in li: 112 name = re.findall(r'.*\u540d:(.*)\u7535.*', j) # 名:......电 113 phone = re.findall(r'.*\u8bdd:(.*)\u4e0e', j) # 话:......与 114 contact = re.findall(r'.*\u7cfb:(.*)\u5355', j) # 系:....单 115 if len(name) == 0: 116 continue 117 # print(name, phone, contact) 118 temp_ = "%s(%s)" % (name[0], contact[0]) 119 info = [temp_, phone[0]] 120 con_info_list.append(info) 121 # print(con_info_list) 122 final_info_list = person_self_info + con_info_list 123 print(final_info_list) 124 info_writr_li.append(final_info_list) 125 driver.close() 126 time.sleep(1) 127 driver.switch_to.window(handles[1]) 128 129 130 # 写入信息!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 131 wb = xlwt.Workbook() 132 ws = wb.add_sheet('客户信息') 133 134 # 写入第一行 135 first_row = ["姓名", "手机号码", "门店", "车型", 136 "姓名(与本人关系)", "电话", 137 "姓名(与本人关系)", "电话", 138 "姓名(与本人关系)", "电话", 139 "姓名(与本人关系)", "电话", 140 "姓名(与本人关系)", "电话"] 141 for ii in range(0, len(first_row)): 142 ws.write(0, ii, first_row[ii]) 143 count = len(info_writr_li) 144 new_li = [] 145 146 for i in range(count): 147 # print(write_li[i]) 148 tem_li = [] 149 for j in range(len(info_writr_li[i])): 150 151 if isinstance(info_writr_li[i][j], list): 152 tem_li = tem_li + info_writr_li[i][j] 153 else: 154 tem_li.append(info_writr_li[i][j]) 155 156 print(tem_li) 157 158 # 写数据 159 for k in range(len(tem_li)): 160 ws.write(i+1, k, tem_li[k]) 161 162 wb.save(r"C:\\Users\Administrator\Desktop\单编号三方联系方式.xls") 163 164 print("异常还款编号:%s" % un_hk_list)
三、心得
1、不熟悉re模块导致学习50分钟才写了95-97行代码;
2、多用xpath(自己写路径)筛选元素
3、面向对象编程很容易,下次多使用面向对象编程;
4、代码是脚本风格,下次使用软件开发规范;

浙公网安备 33010602011771号