import urllib.request;
from pandas import DataFrame;
from pandas import Series;
from bs4 import BeautifulSoup;
response = urllib.request.urlopen('file:///F:/python/untitled1/core/do_data/2month.html');
html = response.read();
soup = BeautifulSoup(html,"html.parser")
trs = soup.find_all('tr')
ths = trs[0].find_all('th');
index_d = []
for th in ths:
    index_d.append(th.getText())
data = DataFrame(columns=index_d)
print(index_d)
for tr in trs :
    tds = tr.find_all('td')
    td_datas = []
    for td in tds:
        td_datas.append(td.getText())
    if len(td_datas) != 0:
        data=data.append(
            Series(
                td_datas,
                index=index_d
            ), ignore_index=True
        )
print(len(data))
str2s = []
for i in range(len(data["股票全码"])):
    str2 =str(data["股票全码"][i])
    str2 = str2.replace("SZ","0|")
    str2 = str2.replace("SH","1|")
    str2 = str2 + "|" + data["涨停时间"][i] +" "+ data["历史涨停原因"][i] +" "+ data["涨停选原因"][i]
    str2s.append(str2)
data["new"] = str2s
data=data.drop_duplicates(subset=['股票代码'],keep='last',inplace=False)
print(len(data))
df2 = data["new"].values
#print(type(df2))
file = open('data.txt', 'w')
file.writelines("\n".join(df2));
file.close()