python 下载图片

1,不使用代理

 1 import urllib.request
 2 import os
 3 import getpass
 4 
 5 
 6 def url_open(url):
 7     req=urllib.request.Request(url)
 8     req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/5.0.2.1000 Chrome/47.0.2526.73 Safari/537.36')
 9  
10     response=urllib.request.urlopen(url)
11     html=response.read()
12     return html
13 
14 
15 def get_page(url):
16     
17     html=url_open(url).decode('utf-8')
18 
19     a=html.find('current-comment-page')+23
20     b=html.find(']',a)
21     return(html[a:b])
22 
23 def find_imgs(url):
24     html=url_open(url).decode('utf-8')
25     img_addrs=[]
26     a=html.find('img src=')
27     while a!=-1:
28         b=html.find('.jpg',a,a+255)
29         if b !=-1:
30             img_addrs.append(html[a+9:b+4])
31         else:
32             b=a+9
33 
34         a=html.find('img src=',b)
35 
36     return img_addrs
37 
38 def save_imgs(folder,img_addrs):
39     for each in img_addrs:
40         filename=each.split('/')[-1]
41         with open(filename,'wb') as f:
42             img=url_open('http:'+each)
43             f.write(img)
44 
45 
46 
47 
48 def download_mm(folder='OOXX',pages=10):
49     os.chdir("C:\\Users"+"\\"+getpass.getuser()+"\\Desktop")
50     os.mkdir(folder)
51     os.chdir(folder)
52 
53     url="http://jandan.net/ooxx/"
54     page_num=int(get_page(url))
55 
56     for i in range(pages):
57         page_num-=1
58         page_url=url+'page-'+str(page_num)+'#comments'
59         img_addrs=find_imgs(page_url)
60         save_imgs(folder,img_addrs)
61 
62 if __name__=='__main__':
63     download_mm()
View Code

2,使用代理(静态)

 1 import urllib.request
 2 import os
 3 import getpass
 4 import random
 5 
 6 def url_open(url):
 7     req=urllib.request.Request(url)
 8     req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/5.0.2.1000 Chrome/47.0.2526.73 Safari/537.36')
 9     #添加代理开始(下载图片有问题)
10     proxies=['58.222.254.11:3128','61.185.219.126:3128','218.247.161.37:80','61.172.249.96:80']
11     proxy=random.choice(proxies)
12 
13     proxy_support=urllib.request.ProxyHandler({'http':proxy})
14     opener=urllib.request.build_opener(proxy_support)
15     urllib.request.install_opener(opener)
16 
17     #添加代理结束
18     response=urllib.request.urlopen(url)
19     html=response.read()
20     return html
21 
22 
23 def get_page(url):
24     
25     html=url_open(url).decode('utf-8')
26 
27     a=html.find('current-comment-page')+23
28     b=html.find(']',a)
29     return(html[a:b])
30 
31 def find_imgs(url):
32     html=url_open(url).decode('utf-8')
33     img_addrs=[]
34     a=html.find('img src=')
35     while a!=-1:
36         b=html.find('.jpg',a,a+255)
37         if b !=-1:
38             img_addrs.append(html[a+9:b+4])
39         else:
40             b=a+9
41 
42         a=html.find('img src=',b)
43 
44     return img_addrs
45 
46 def save_imgs(folder,img_addrs):
47     for each in img_addrs:
48         filename=each.split('/')[-1]
49         with open(filename,'wb') as f:
50             img=url_open('http:'+each)
51             f.write(img)
52 
53 
54 
55 
56 def download_mm(folder='OOXX',pages=10):
57     os.chdir("C:\\Users"+"\\"+getpass.getuser()+"\\Desktop")
58     os.mkdir(folder)
59     os.chdir(folder)
60 
61     url="http://jandan.net/ooxx/"
62     page_num=int(get_page(url))
63 
64     for i in range(pages):
65         page_num-=1
66         page_url=url+'page-'+str(page_num)+'#comments'
67         img_addrs=find_imgs(page_url)
68         save_imgs(folder,img_addrs)
69 
70 if __name__=='__main__':
71     download_mm()
View Code

 

posted @ 2017-03-21 23:36  山的那一边  阅读(106)  评论(0)    收藏  举报