#-*- coding: utf-8 -*-
import urllib.request
import re
pythontagurl = "http://www.cnblogs.com/itech/category/170012.html"
pythonarticleurlregrex = "(<a.*?href=\"http://www.cnblogs.com/itech/archive.*?>([Pp]ython.*?)</a>)"
# get the page content string which contains all python article links
pythontagpage = urllib.request.urlopen(pythontagurl)
pythontagstr = ""
for line in pythontagpage.readlines():
try:
newline = line.decode('utf-8', 'strict')
#print(newline)
except:
continue
pythontagstr += newline
pythontagpage.close()
# get all link and sort
pythonlinkandtiles = re.findall(pythonarticleurlregrex, pythontagstr)
d = dict()
for link, title in pythonlinkandtiles:
d[title] = link
pythontitles = list(d.keys())
bstr1 = "python基础"
bstr2 = "python语法"
estr = "python实例"
lstr = "python类库"
tstr = "python技巧"
ostr = "python其他"
basic = []
examples = []
libs = []
tips = []
others = []
for k in pythontitles:
if k.startswith(bstr1) or k.startswith(bstr2):
basic.append(k)
elif k.startswith(estr) :
examples.append(k)
elif k.startswith(lstr) :
libs.append(k)
elif k.startswith(tstr):
tips.append(k)
else:
others.append(k)
basic.sort()
libs.sort()
examples.sort()
tips.sort()
others.sort()
pythonarticles = []
fonts = "<br/><font color=red size = 5>"
fonte = ":</font>"
pythonarticles.append( fonts + bstr1 + fonte )
for py in basic: pythonarticles.append(d[py])
pythonarticles.append(fonts + lstr + fonte )
for py in libs: pythonarticles.append(d[py])
pythonarticles.append(fonts + estr + fonte )
for py in examples: pythonarticles.append(d[py])
pythonarticles.append(fonts + tstr + fonte )
for py in tips: pythonarticles.append(d[py])
pythonarticles.append(fonts + ostr + fonte )
for py in others: pythonarticles.append(d[py])
# generate pythonindex.html
pythonindex = open("pythonindex.html", "w",encoding='utf-8')
pythonindex.write("<html>")
pythonindex.write("<head>")
pythonindex.write("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>")
pythonindex.write("<title>Python - iTech's Blog</title>")
pythonindex.write("</head>")
pythonindex.write("<body>")
pythonindex.write("Total number is :" + str(len(pythonarticles)) + "</br>")
for pa in pythonarticles:
pythonindex.write(pa)
pythonindex.write("</br>")
pythonindex.write("</body>")
pythonindex.write("</html>")
pythonindex.close()