#!/usr/bin/env Python
# coding=utf-8
from selenium import webdriver # 导入模块
import time
from lxml import etree # lxml库是一个HTML、XML的解析器
import tushare as ts
import requests, json
import threading
import random
def get_init_driver():
time_start = time.time()
option = webdriver.ChromeOptions() # 创建一个配置对象(ChromeOptions是控制Chrome启动属性的类)
option.add_argument('headless') # 添加启动参数add_argument为'headless',开启无界面模式(设置浏览器静默,让浏览器在后台运行,不需要加载样式和渲染)
driver = webdriver.Chrome(options=option) # 用webdriver启动浏览器(实例化带有配置的driver对象)
time_end = time.time()
print("初始化driver花时:% f 秒" % float(time_end - time_start))
return driver
# 关闭driver
def get_close_driver(driver):
driver.close()
# 今日板块行业资金流入前20名
# 近5日,近10日行业流金流入:https://data.eastmoney.com/bkzj/hy.html?stat=10
# http://quote.eastmoney.com/center/boardlist.html#industry_board
# https://data.eastmoney.com/bkzj/hy.html
def get_today_industry_hy_html(driver, number):
time2_start = time.time()
driver.get("https://data.eastmoney.com/bkzj/hy.html") # 跳转到指定的url地址(请求地址)
# time.sleep(1)
source = driver.page_source # 获取页面源码
mytree = etree.HTML(source) # 解析网页
# 用xpath获取所需的内容,返回一个列表
tables = mytree.xpath("//div[@class='dataview-body']/table")[0] # 定位表格table
# print("定位表格table")
# print(tables)
trs = tables.xpath('.//tr') # 取出所有tr标签(每一行单元格)
onetable = []
h = 0
for tr in trs:
h = h + 1
# print("第行:", h)
if h == 1 or h == 2:
continue
if h >= number:
continue
n = 0
for td in tr:
n = n + 1
# print("第列:", n)
if n != 2:
# 只取第二列
continue
texts = td.xpath(".//text()") # 取出所有td标签下的文本(每个单元格)
for text in texts:
onetable.append(text.strip(" ")) # 去除文本前后空格
# time.sleep(2)
# driver.close() # 关闭
time2_end = time.time()
hy_list_5 = get_today_industry_hy_api(6)
onetable.extend(hy_list_5)
hy_list_10 = get_today_industry_hy_api(6)
onetable.extend(hy_list_10)
onetable2 = list(set(onetable))
print("查询实时行业(6倍数据更新一次)花时:% .3f 秒" % float(time2_end - time2_start))
return onetable2
# 今日概念资金流入前20名
# http://quote.eastmoney.com/center/boardlist.html#concept_board
def get_now_gn_html(driver, number):
time2_start = time.time()
driver.get("https://data.eastmoney.com/bkzj/gn.html") # 跳转到指定的url地址(请求地址)
# time.sleep(1)
source = driver.page_source # 获取页面源码
mytree = etree.HTML(source) # 解析网页
# 用xpath获取所需的内容,返回一个列表
tables = mytree.xpath("//div[@class='dataview-body']/table")[0] # 定位表格table
# print("定位表格table")
# print(tables)
trs = tables.xpath('.//tr') # 取出所有tr标签(每一行单元格)
onetable = []
h = 0
for tr in trs:
h = h + 1
# print("第行:", h)
if h == 1 or h == 2:
continue
if h >= number:
continue
n = 0
for td in tr:
n = n + 1
# print("第列:", n)
if n != 2:
# 只取第二列
continue
texts = td.xpath(".//text()") # 取出所有td标签下的文本(每个单元格)
for text in texts:
onetable.append(text.strip(" ")) # 去除文本前后空格
if basic_rule_1.is_string_in_array(onetable, "CPO"):
onetable.append("共封装光学(CPO)")
onetable.append("CPO概念")
if basic_rule_1.is_string_in_array(onetable, "算力概念"):
onetable.append("东数西算(算力)")
if basic_rule_1.is_string_in_array(onetable, "光通信模块"):
onetable.append("共封装光学(CPO)")
# 删除昨日涨停_含一字
# time.sleep(2)
# driver.close() # 关闭
time2_end = time.time()
# 合并返五日概念数组#type = 'f62' # 当前f62,近5天,f164;涨幅:f3
list5 = get_now_gn_api_type(6, 'f3')
onetable.extend(list5)
# 合并返10日概念数组
list10 = get_now_gn_api_type(6, 'f3')
onetable.extend(list10)
#print("查询实时概念(6倍数据更新一次)花时:% .3f 秒" % float(time2_end - time2_start))
# 去除涨停概念数据
onetable = basic_rule_1.is_array_not_in_array(onetable, None)
# 去除重复
onetable2 = list(set(onetable))
return onetable2
def get_now_gn_df_html(driver, ts_code):
code = ts_code[0:6]
sc = ts_code[7:9]
# print(code)
code = sc + code
# print(code)
time2_start = time.time()
driver.get(
"https://emweb.securities.eastmoney.com/PC_HSF10/CoreConception/Index?type=web&code=" + code) # 跳转到指定的url地址(请求地址)
# time.sleep(1)
source = driver.page_source # 获取页面源码
mytree = etree.HTML(source) # 解析网页
# 用xpath获取所需的内容,返回一个列表
ps = mytree.xpath("//div[@class='summary']") # 定位表格table
# print("定位ps")
# print(ps)
trs = ps[0].xpath("//p/text()")
# trs = ps.xpath(".//text()") # 取出所有tr标签(每一行单元格)
onetable = []
h = 0
for tr in trs:
h = h + 1
if h != 10:
continue
gn_str_list = tr.strip(" ")
gn_str_list = gn_str_list.strip('\n')
onetable = gn_str_list.split(" ")
# print(onetable)
time2_end = time.time()
# print("查询实时概念花时:% f 秒" % float(time2_end-time2_start))
if len(onetable) <= 0:
print("在东方财富网没有到到概念数据,在Tushare查询")
onetable = get_stock_gn(ts_code)
# 去除涨停概念数据
onetable = basic_rule_1.is_array_not_in_array(onetable, None)
return onetable