import urllib.request;
from pandas import DataFrame;
from pandas import Series;
from bs4 import BeautifulSoup;
import pandas as pd
file_name = "2222.txt"
file_path = 'file:///F:/python/untitled1/core/do_data/'
response = urllib.request.urlopen(file_path + file_name)
html = response.read();
soup = BeautifulSoup(html,"html.parser")
trs = soup.find_all('tr')
ths = trs[0].find_all('th');
index_d = []
for th in ths:
index_d.append(th.getText())
data = DataFrame(columns=index_d)
print(index_d)
for tr in trs :
tds = tr.find_all('td')
td_datas = []
for td in tds:
td_datas.append(td.getText())
if len(td_datas) != 0:
data=data.append(
Series(
td_datas,
index=index_d
), ignore_index=True
)
print(len(data))
str2s = []
for i in range(len(data["股票全码"])):
str2 = data["涨停时间"][i] +" "+ data["历史涨停原因"][i] +" "+ data["涨停选原因"][i]
str2s.append(str2)
data["new"] = str2s
data=data.drop_duplicates(subset=['股票代码'],keep='last',inplace=False)
print(len(data))
new_data = pd.read_table("extern_user.txt",header= None,sep="|",encoding='utf-8',dtype=str)
new_data = new_data.iloc[:,0:4]
new_data = new_data.dropna()
new_data.columns.name = ["a","b","c","d"]
new_data.columns = ["a","b","c","d"]
for i in range(len(data)):
#for i in range(10):
d_code = str(data.loc[i,"股票代码"])
#new_data.loc[((new_data["b"] == d_code) & (new_data["c"] == "31")),"d"]=data.loc[i,"new"]
flag = ''
if d_code[0] == "6":
flag = "1"
else:
flag = "0"
row=[flag,d_code,"31",data.loc[i,"new"]]
print(row)
new_data = new_data.append(
Series(
row,
index=new_data.columns
), ignore_index = True
)
new_data = new_data.drop_duplicates(subset=["b","c"],keep='last',inplace=False)
new_data["c"]=new_data["c"].astype(int)
new_data = new_data.sort_values(by=["c","b"] , ascending=(True,True))
print(new_data.columns)
new_data=new_data.reset_index(drop=True)
new_data["e"]="0.00"
new_data.to_csv('save/extern_user.txt', sep='|', index=False,header=None,)