import re
from urllib.request import urlopen
def getPage(url):
response=urlopen(url)
return response.read().decode('gbk',errors='ignore')
def parsePage(s):
com=re.compile(r'<td height="26">.*?<b>.*?<a href="(?P<url_name>.*?)" class="ulink">.*?',re.S)
ret=com.finditer(s)
for i in ret :
return "http://www.dytt8.net"+i.group("url_name")
def parsePage1(s):
com=re.compile(r'<div id="Zoom">.*?译.*?名(?P<name>.*?)<br />◎片.*?名(?P<pianname>.*?)<br />.*?◎导.*?演(?P<daoyan>.*?)<br />'+
'◎主.*?演(?P<zhuyan>.*?)<br /><br />◎简.*?介.*?<td.*?><a href="(?P<xiazaidizhi>.*?)">',re.S)
ret1=com.finditer(s)
# print('****************************************************************')
for i in ret1 :
yield {"yiming":(re.sub("[\u3000]", "",i.group('name'))),
"pianming":re.sub("[\u3000]", "",i.group("pianname")),
"daoyan":re.sub("[\u3000]", "",i.group("daoyan")),
"zhuyan":re.sub("[\u3000]", "",i.group("zhuyan")),
"xiazaidizhi":re.sub("[\u3000]", "",i.group("xiazaidizhi"))}
def main(num):
url="http://www.dytt8.net/html/gndy/dyzz/list_23_%s.html" % num
response_html=getPage(url)
xiangqing=parsePage(response_html)
response1_html = getPage(xiangqing)
ret=parsePage1(response1_html)
f = open("move_list", "a", encoding="utf8")
for obj in ret:
print(obj)
data = str(obj)
f.write(data + "\n")
for i in range(1,181):
main(i)