python 下载图片
1,不使用代理
1 import urllib.request 2 import os 3 import getpass 4 5 6 def url_open(url): 7 req=urllib.request.Request(url) 8 req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/5.0.2.1000 Chrome/47.0.2526.73 Safari/537.36') 9 10 response=urllib.request.urlopen(url) 11 html=response.read() 12 return html 13 14 15 def get_page(url): 16 17 html=url_open(url).decode('utf-8') 18 19 a=html.find('current-comment-page')+23 20 b=html.find(']',a) 21 return(html[a:b]) 22 23 def find_imgs(url): 24 html=url_open(url).decode('utf-8') 25 img_addrs=[] 26 a=html.find('img src=') 27 while a!=-1: 28 b=html.find('.jpg',a,a+255) 29 if b !=-1: 30 img_addrs.append(html[a+9:b+4]) 31 else: 32 b=a+9 33 34 a=html.find('img src=',b) 35 36 return img_addrs 37 38 def save_imgs(folder,img_addrs): 39 for each in img_addrs: 40 filename=each.split('/')[-1] 41 with open(filename,'wb') as f: 42 img=url_open('http:'+each) 43 f.write(img) 44 45 46 47 48 def download_mm(folder='OOXX',pages=10): 49 os.chdir("C:\\Users"+"\\"+getpass.getuser()+"\\Desktop") 50 os.mkdir(folder) 51 os.chdir(folder) 52 53 url="http://jandan.net/ooxx/" 54 page_num=int(get_page(url)) 55 56 for i in range(pages): 57 page_num-=1 58 page_url=url+'page-'+str(page_num)+'#comments' 59 img_addrs=find_imgs(page_url) 60 save_imgs(folder,img_addrs) 61 62 if __name__=='__main__': 63 download_mm()
2,使用代理(静态)
1 import urllib.request 2 import os 3 import getpass 4 import random 5 6 def url_open(url): 7 req=urllib.request.Request(url) 8 req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/5.0.2.1000 Chrome/47.0.2526.73 Safari/537.36') 9 #添加代理开始(下载图片有问题) 10 proxies=['58.222.254.11:3128','61.185.219.126:3128','218.247.161.37:80','61.172.249.96:80'] 11 proxy=random.choice(proxies) 12 13 proxy_support=urllib.request.ProxyHandler({'http':proxy}) 14 opener=urllib.request.build_opener(proxy_support) 15 urllib.request.install_opener(opener) 16 17 #添加代理结束 18 response=urllib.request.urlopen(url) 19 html=response.read() 20 return html 21 22 23 def get_page(url): 24 25 html=url_open(url).decode('utf-8') 26 27 a=html.find('current-comment-page')+23 28 b=html.find(']',a) 29 return(html[a:b]) 30 31 def find_imgs(url): 32 html=url_open(url).decode('utf-8') 33 img_addrs=[] 34 a=html.find('img src=') 35 while a!=-1: 36 b=html.find('.jpg',a,a+255) 37 if b !=-1: 38 img_addrs.append(html[a+9:b+4]) 39 else: 40 b=a+9 41 42 a=html.find('img src=',b) 43 44 return img_addrs 45 46 def save_imgs(folder,img_addrs): 47 for each in img_addrs: 48 filename=each.split('/')[-1] 49 with open(filename,'wb') as f: 50 img=url_open('http:'+each) 51 f.write(img) 52 53 54 55 56 def download_mm(folder='OOXX',pages=10): 57 os.chdir("C:\\Users"+"\\"+getpass.getuser()+"\\Desktop") 58 os.mkdir(folder) 59 os.chdir(folder) 60 61 url="http://jandan.net/ooxx/" 62 page_num=int(get_page(url)) 63 64 for i in range(pages): 65 page_num-=1 66 page_url=url+'page-'+str(page_num)+'#comments' 67 img_addrs=find_imgs(page_url) 68 save_imgs(folder,img_addrs) 69 70 if __name__=='__main__': 71 download_mm()
------山的那一边

浙公网安备 33010602011771号