import time
from selenium import webdriver
browser = webdriver.Chrome()
wait_time = 1
USER = 'xl.feng'
PWD = 'fengxiaole'
seed_url = 'http://xx.com/login.jhtml'
browser.get(seed_url)
# time.sleep(wait_time)
search_input_user = browser.find_element_by_id('username')
search_input_pwd = browser.find_element_by_id('password')
search_submit = browser.find_element_by_xpath('//div[@class="x-y"]/input[@type="submit"]')
search_input_user.send_keys(USER)
search_input_pwd.send_keys(PWD)
search_submit.click()
time.sleep(wait_time)
new_url = 'http://xx.com/index.jhtml'
browser.get(new_url)
target = browser.find_element_by_xpath('//ul[@class="x-list y-sidebar-list"]/li[3]')
target.click()
target = browser.find_element_by_id('clickHome')
target.click()
new_url = 'http://xx.com/ask/list.jhtml?nxType=nx'
browser.get(new_url)
counter_ = 1
def fun():
global counter_
if counter_ == 170000:
return
wait_time = 2
time.sleep(wait_time)
target = browser.find_element_by_id('dg_next')
target.click()
current_url = browser.current_url
page_source = browser.page_source
localtime_ = time.strftime("%y%m%d", time.localtime())
dir_ = 'ml_task_html/'
filename_ = '%s%s_%s%s' % (dir_, counter_, localtime_, '.html')
fo = open(filename_, 'w', encoding='utf-8')
fo.write(page_source)
fo.closed
print(counter_)
counter_ += 1
fun()
fun()
dd = 0