#从500彩票网站下载双色球历史数据,网页另存为01.txt
data = []
file = open('01.txt')
for line in file:
line = line.lstrip()
line = line.strip('\n')
line = line.split(' ')[0]
if len(line)==19 or len(line)==20 or len(line)==21:
data.append(line)
newFile = open('彩票数据.txt','w')
for i in data:
newFile.write(i)
newFile.write('\n')
newFile.close()
##for k in data:
## print('20%s%s: %s %s %s %s %s %s--%s'%\
## (k[0:2],k[2:5],k[5:7],k[7:9],k[9:11],k[11:13],k[13:15],k[15:17],k[17:19]))
import urllib.request
import re
url = 'http://datachart.500.com/ssq/history/newinc/history.php?start=16110&end=16120'
html = urllib.request.urlopen(url).read()
html = html.decode('utf-8')
s1 = r'\d{5}</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont4">\d\d</td><td'
s2 = r'(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont4">(.*?)</td><td'
pat1 = re.compile(s1)
t1 = pat1.findall(html)
allData = []
tempList = []
for i in t1:
pat2 = re.compile(s2)
t2 = pat2.findall(i)
tempList.append(t2)
print(tempList)