1 # python3
2 # jiandan meizi tu
3 import urllib
4 import urllib.request as req
5 import os
6 import time
7 import random
8
9
10 def url_open(url):
11 req1 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.0'})
12 req2 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.1'})
13 req3 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.5'})
14 req4 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.1'})
15
16 req_list = [req1, req2,req3, req4]
17 response = urllib.request.urlopen(random.choice(req_list))
18 html = response.read()
19 # print ('url_open done!')
20 return html
21
22 def url_open2(url):
23 req1 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.0'})
24 req2 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.1'})
25 req3 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.5'})
26 req4 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.1'})
27 req_list = [req1, req2,req3, req4]
28
29 ip_list = ['117.135.251.136:82']
30 ip = random.choice(ip_list)
31 print (ip)
32
33 proxy = req.ProxyHandler({'http': ip})
34 # auth = req.HTTPBasicAuthHandler()
35 opener = req.build_opener(proxy, req.HTTPHandler)
36 req.install_opener(opener)
37 conn = req.urlopen(random.choice(req_list))
38 return_str = conn.read()
39 return return_str
40
41 def get_current_page(url):
42 html = url_open2(url).decode('utf-8')
43 a = html.find('current-comment-page') + 23
44 b = html.find(']',a)
45 return html[a:b]
46
47 def find_imgs(url):
48 html = url_open2(url).decode('utf-8')
49 img_addrs = []
50 a = html.find('img src="http')
51 while a != -1:
52 b = html.find('.jpg',a, a+255)
53 if b != -1:
54 img_addrs.append(html[a+9:b+4])
55 else:
56 b = a + 13
57 a = html.find('img src="http', b)
58 return img_addrs
59
60 def save_imgs(folder,img_addrs):
61 for each in img_addrs:
62 filename = each.split('/')[-1]
63 with open(filename,'wb') as f:
64 img = url_open2(each)
65 f.write(img)
66
67
68 def download_mm(folder = 'xx',pages = 300):
69 # os.mkdir(folder)
70 os.chdir(folder)
71
72 url = 'http://jandan.net/ooxx/'
73 current_page_num = int(get_current_page(url))
74 for i in range(pages):
75 print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),'current_page_num', current_page_num)
76 if i%3 == 0:
77 print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),"sleep 2 seconds...")
78 time.sleep(2)
79 current_page_num -= 1
80 page_url = url + 'page-' + str(current_page_num) + '#comments'
81 img_addrs = find_imgs(page_url)
82 save_imgs(folder, img_addrs)
83
84 if __name__ == '__main__':
85 download_mm()