爬虫类_selenium获相关信息

一、所需库 

selenium、xlrd、xlwt

 

二、代码

  1 #!/usr/bin/env python
  2 # -*- coding:utf-8 -*-
  3 # Author:Fore Wang
  4 from selenium import webdriver
  5 import time
  6 import re
  7 import xlrd
  8 import xlwt
  9 
 10 # 打开文件并读取
 11 path = r"单编号样表.xls"
 12 x1 = xlrd.open_workbook(path)
 13 
 14 # 打开 sheet1
 15 sh1 = x1.sheet_by_name("Sheet1")
 16 person_num = sh1.nrows - 1
 17 
 18 original_li = []
 19 for i in range(1, 349):
 20     hk_num = sh1.cell_value(i, 1)  # 还款编号
 21     if hk_num is None:
 22         break
 23     original_li.append(hk_num)
 24 aa = len(original_li)
 25 print(aa, original_li)
 26 
 27 
 28 # 登陆客户系统==============================================================1
 29 driver = webdriver.Chrome()
 30 driver.get("http://hsr.huashenghaoche.com/hshcmdm/login")
 31 driver.implicitly_wait(3)
 32 
 33 # 输入用户名、密码,登陆
 34 driver.find_element_by_id("loginId").send_keys("账户名称")
 35 # time.sleep(1)
 36 driver.find_element_by_id("password").send_keys("账户密码")
 37 # time.sleep(1)
 38 driver.find_element_by_xpath("/html/body/div/div/div/form/input").click()
 39 driver.implicitly_wait(5)  # 隐式等待
 40 # 主界面句柄
 41 main_handle = driver.current_window_handle
 42 
 43 info_writr_li = []
 44 
 45 
 46 # 订单综合查询页面===============================================================2
 47 URL = "http://collection.huashenghaoche.com/hshc-collection-web/commonSearch/gotoPage"
 48 js = 'window.open("%s");' % URL
 49 driver.execute_script(js)   # js脚本方式打开新窗口
 50 driver.implicitly_wait(5)
 51 handles = driver.window_handles
 52 # 切换至新窗口--总览界面
 53 driver.switch_to.window(handles[1])
 54 
 55 un_hk_list =[]
 56 temp_count = 0  # 执行次数
 57 for i in original_li:
 58     temp_count += 1
 59     print("第%d次获取数据,剩下%d次》》》》》》》"% (temp_count, (aa - temp_count)))
 60     time.sleep(2)
 61     driver.find_element_by_id('settleCode').send_keys(i)
 62     driver.find_element_by_id("search").click()
 63     time.sleep(1)
 64     driver.find_element_by_id("reset").click()
 65 
 66     # 获取信息查询界面
 67     time.sleep(2)
 68     try:
 69         check_hk = driver.find_element_by_xpath('//*[@id="commonSearchTable"]/tbody/tr/td[9]/a').\
 70             get_attribute('textContent')
 71     except:
 72         un_hk_list.append(i)
 73         continue
 74     URL2 = driver.find_element_by_xpath('//*[@id="commonSearchTable"]/tbody/tr/td[9]/a').get_attribute('href')
 75     # print(check_hk, URL2)
 76 
 77     # 进入信息查询界面================================================================================3
 78     js1 = 'window.open("%s");' % URL2
 79     driver.execute_script(js1)
 80     driver.implicitly_wait(5)
 81     handles = driver.window_handles
 82     # 切换至新窗口--总览界面
 83     driver.switch_to.window(handles[2])
 84     time.sleep(4)
 85 
 86     # 获取客户本人信息
 87     # 客户姓名
 88     customer_name = driver.find_element_by_id('baseInfo_name').get_attribute("textContent")
 89     # 门店
 90     shop_name = driver.find_element_by_id('baseInfo_shopName').get_attribute('textContent')
 91     # 车型
 92     customer_car = driver.find_element_by_id('baseInfo_modelName').get_attribute("textContent")
 93     # 手机号码
 94     customer_phone = driver.find_element_by_id('phoneInfo_cellPhone').get_attribute("textContent")
 95 
 96     # 本人信息 =========================================!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 97     person_self_info = [customer_name, customer_phone, shop_name, customer_car]
 98     # print(person_self_info)
 99     driver.implicitly_wait(5)
100 
101     # 客户基本信息界面
102     driver.find_element_by_xpath('//*[@id="taskDetailTabs"]/li[2]/a').click()
103     time.sleep(2)
104     link = driver.find_element_by_id('phoneBookTable')
105     link_info = link.get_attribute('textContent')
106 
107     # re提取联系人信息
108     li = link_info.split("联系人类型")
109     con_info_list = []
110 
111     for j in li:
112         name = re.findall(r'.*\u540d:(.*)\u7535.*', j)  # 名:......电
113         phone = re.findall(r'.*\u8bdd:(.*)\u4e0e', j)  # 话:......与
114         contact = re.findall(r'.*\u7cfb:(.*)\u5355', j)  # 系:....单
115         if len(name) == 0:
116             continue
117         # print(name, phone, contact)
118         temp_ = "%s(%s)" % (name[0], contact[0])
119         info = [temp_, phone[0]]
120         con_info_list.append(info)
121     # print(con_info_list)
122     final_info_list = person_self_info + con_info_list
123     print(final_info_list)
124     info_writr_li.append(final_info_list)
125     driver.close()
126     time.sleep(1)
127     driver.switch_to.window(handles[1])
128 
129 
130 # 写入信息!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
131 wb = xlwt.Workbook()
132 ws = wb.add_sheet('客户信息')
133 
134 # 写入第一行
135 first_row = ["姓名", "手机号码", "门店", "车型",
136              "姓名(与本人关系)", "电话",
137              "姓名(与本人关系)", "电话",
138              "姓名(与本人关系)", "电话",
139              "姓名(与本人关系)", "电话",
140              "姓名(与本人关系)", "电话"]
141 for ii in range(0, len(first_row)):
142     ws.write(0, ii, first_row[ii])
143 count = len(info_writr_li)
144 new_li = []
145 
146 for i in range(count):
147     # print(write_li[i]) 
148     tem_li = []
149     for j in range(len(info_writr_li[i])):
150 
151         if isinstance(info_writr_li[i][j], list):
152             tem_li = tem_li + info_writr_li[i][j]
153         else:
154             tem_li.append(info_writr_li[i][j])
155 
156     print(tem_li)
157 
158     # 写数据
159     for k in range(len(tem_li)):
160         ws.write(i+1, k, tem_li[k])
161 
162 wb.save(r"C:\\Users\Administrator\Desktop\单编号三方联系方式.xls")
163 
164 print("异常还款编号:%s" % un_hk_list)

 

 

三、心得

1、不熟悉re模块导致学习50分钟才写了95-97行代码;

2、多用xpath(自己写路径)筛选元素

3、面向对象编程很容易,下次多使用面向对象编程;

4、代码是脚本风格,下次使用软件开发规范;

posted @ 2020-03-03 15:10  wangspy  阅读(78)  评论(0)    收藏  举报