浏览器分离

 

from selenium import webdriver
import time, random


def gen_browser(btype='chrome'):
return webdriver.Chrome()


browser = webdriver.Chrome()

url = 'https://www.wukong.com/'
browser.get(url)
browser.refresh()
time.sleep(20)
for isc in range(20):
time.sleep(random.randint(1, 10))
js = 'window.scrollTo(0,document.body.scrollHeight)'
browser.execute_script(js)

all_a = browser.find_elements_by_tag_name('a')
href_l = []
wukong_l = []
for i in all_a:
try:
href_t = i.get_attribute('href')
# wukong
if 'uid' in href_t and i.text != '':
# wukong_l.append((href_t.split('uid=')[-1], i.text))
uid, txt = href_t.split('uid=')[-1], i.text
wukong_detail_url = 'https://www.wukong.com/user/?uid=60607752166'.replace('60607752166', uid)
browser_tmp = gen_browser()
browser_tmp.get(wukong_detail_url)
time.sleep(2)
selfd = browser_tmp.find_element_by_class_name('user-title').text
wukong_l.append((uid, txt, selfd))
browser_tmp.quit()
except Exception as e:
print(e)
browser.quit()

import pymysql

h, pt, u, p, db = '192.168.6.20', 3306, 'root', 'n126', 'media'


def mysql_write(sql):
global h, pt, u, p, db
try:
conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
except Exception as e:
print(e)
return 1
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
return 0


sql, fi = 'INSERT INTO toutiao_uid_gathered_wukong (wukong_uid,wukong_nickname,selfintroduction,time_script) VALUES ', ''
for i in wukong_l:
sql_i = '("{}","{}","{}","{}")'.format(i[0], i[1], i[2], int(time.time()))
sql = '{}{}{}'.format(sql, fi, sql_i)
if fi == '':
fi = ','

time.sleep(32)
print(sql)
with (open('sql.tmp.sql', 'a', encoding='utf-8')) as fo:
fo.write(sql)

mysql_write(sql)



posted @ 2018-01-17 19:39  papering  阅读(263)  评论(0编辑  收藏  举报