import requests
import Queue
import urllib
import urllib2
import re
import requests
alreadyImg = set()
s = requests.session()
s.post("http://acm.hrbust.edu.cn/index.php?m=User&a=login"
, data={
"user_name": "1304020306",
"password": "123456"
})
r = s.get("http://acm.hrbust.edu.cn/index.php?m=User&a=userInfo&user_name=1404020214")
print r.text
urllist = Queue.Queue(maxsize = -1)
already = set()
url = "http://acm.hrbust.edu.cn/index.php?m=Ranklist&a=showRatingrank"
urllist.put(url)
reg = r'a href="(.+?)"'
httpre = re.compile(reg)
#reg = r'src="(.+?\.jpg)"'
reimg = r'img class="large_avatar" src="([^>]+?\.(png|jpg))>?"'
imgre = re.compile(reimg)
def putUrl(html):
httplist = re.findall(httpre, html)
for url in httplist:
realurl = url
if 'http' not in url:
realurl = "http://acm.hrbust.edu.cn/"+url
#print realurl
if url not in already:
already.add(url)
urllist.put(realurl)
x = 0;
def getImg(html):
Imglist = re.findall(imgre, html)
global x
for Img in Imglist:
Img = Img[0]
if Img in alreadyImg:
continue
else:
alreadyImg.add(Img)
print Img
if Img[0] != 'h':
Img = "http://acm.hrbust.edu.cn/" + Img
#print "Img == " +Img
try:
urllib.urlretrieve(Img, 'C:/%s.jpg' % x)
except urllib2.URLError, e:
pass
else:
#print "http://acm.hrbust.edu.cn/"+Img
x += 1
while True != urllist.empty():
url = urllist.get(urllist)
print url
try:
r = s.get(url)
html = r.text
if "index.php?m=Ranklist&a=showRatingrank" in url:
putUrl(html)
getImg(html)
except urllib2.URLError, e:
pass
except urllib2.HTTPError, e:
pass
else:
pass
#else:
# print url
#print html
#break