import requests
import re
from lxml import etree
'''噢百万抓取'''
url = 'http://www.obaiwan.com/hk49/results/'
p = re.compile('''<tr >\r\n<td >.+?</td>\r\n<td >(.+?)</td>\r\n<td >(.+?)</td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n<td >.+?</td>\r\n<td >.+?</td>\r\n<td >.+?</td>\r\n<td >.+?</td>\r\n<td >.+?</td>\r\n<td >.+?</td>\r\n<td >.+?</td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n</tr>''')
f = open('history.txt','w')
res = ''
for i in range(2003, 2016):
year = i
data = {'qinum':year,'submit':'%CC%E1%BD%BB%B2%E9%D1%AF'}
r = requests.post(url, data=data)
r.encoding = 'gb2312'
matchs = p.findall(r.text)
for row in matchs:
res += ','.join(row) + '\n'
f.write(res)
f.close()