python利用selenium爬取同花顺官网股票信息(一)

       俗话说金融市场潮起潮落,中国A股市场也是如同波浪潮汐般周而复始,突然觉得每次选股票太麻烦,
于是决定写一个小程序,并加之筛选算法,最终在GUI便捷化,数据可视化,在海量的股票中选择出符合自己
决策算法的牛股来。
本来计划用requests包和BeautifulSoup来写个静态的,结果发现网站是用 Ajax编写,故用selenium自动化
爬取,写下测试源代码,如下:


# coding=utf-8
import pandas as pd
import numpy as np
#import matplotlib
from selenium import webdriver
import time
if __name__=='__main__':
url='http://www.ots-sec.cn/ots911/vip_doc/18922945_0_0_1.html'
url='http://q.10jqka.com.cn/' #同花顺官网涨跌幅排行榜
driverpath=r'C:\Program Files\Mozilla Firefox\geckodriver.exe'
browser=webdriver.Firefox(executable_path=driverpath)
browser.get(url)
print(browser.current_url)
print(browser.page_source)
with open('test.txt','w') as f:
f.write(browser.page_source)
Monday=browser.find_element_by_xpath('html/body/div[2]/div[2]')
print('=======================================')
print(Monday)
# href="javascript:void(0)" 由于页码按钮 超链接是js代码,故只能selenium
Tuesday=browser.find_element_by_xpath(".//*[@id='m-page']/a[1]")
button_Tuesday=Tuesday
button_Tuesday.click()
print(Tuesday)
Tuesday = browser.find_element_by_xpath(".//*[@id='m-page']/a[2]")
button_Tuesday = Tuesday
button_Tuesday.click()
print(Tuesday)

Wednesday=browser.find_element_by_xpath(".//*[@id='m-page']/a[8]") #这个一直是下一页,只有在a[2]页面才行
button_Wednesday=Wednesday
button_Wednesday.click()
time.sleep(1)

Thursday=browser.find_element_by_xpath("html/body/div[2]/div[2]") # 个股行情 的全部
print(Thursday.text)
Friday=browser.find_element_by_xpath(".//*[@id='m-page']/span") #首页 上一页 1 2 3 4 5 下一页 尾页 ‘3/186’
print(Friday.text)
number=(str(Friday.text)).split('/')[1] #总数
print(number)

for i in range(1,int(50),1):    #这个循环的数可以自己改写,也可以用之前的number
Wednesday = browser.find_element_by_xpath(".//*[@id='m-page']/a[8]") # 这个一直是下一页,只有在a[2]页面才行
button_Wednesday = Wednesday
button_Wednesday.click()
time.sleep(1)
Thursday = browser.find_element_by_xpath("html/body/div[2]/div[2]") # 个股行情 的全部
print(Thursday.text)
with open('spider_socketest.csv','a+') as f:
f.write(Thursday.text)
with open('spider_socketest.txt','a+') as f:
f.write(Thursday.text)


#time.sleep(3)
print("------------end-----------------")
browser.close() #浏览器关闭
posted @ 2020-01-16 23:58  乾坤伯格  阅读(1464)  评论(0)    收藏  举报