1 import os
2 import io
3 import sys
4 import re
5 import urllib.request
6
7 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb2312')
8
9 """
10 headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6',
11 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
12 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
13 'Accept-Encoding': 'none',
14 'Accept-Language': 'en-US,en;q=0.8',
15 'Connection': 'keep-alive'}
16 """
17
18 headers = {
19 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
20 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
21 'Accept-Encoding': 'gzip, deflate',
22 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4,en-GB;q=0.2'
23 }
24 """
25 def get_image(url):
26 request = urllib.request.Request(url, headers=headers)
27 # params = urllib.urlencode(post_params)
28 responseurl = urllib.request.urlopen(request)
29 get_img = responseurl.read()
30 with open('001.jpg', 'wb') as fp:
31 fp.write(get_img)
32 print('图片下载完成')
33 return
34
35 url = 'http://image.tianjimedia.com/uploadImages/2016/009/27/FW632S21L801.jpg'
36 get_image(url)
37
38 """
39 # headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
40
41
42 def download_page(url):
43 # request = urllib.request.Request(url)
44 request = urllib.request.Request(url, headers=headers)
45 responseurl = urllib.request.urlopen(url)
46 data = responseurl.read()
47 # data = data.decode('gbk')
48 return data
49
50
51 def get_image(html):
52 regx = r'http://[\S]*\.jpg'
53 pattern = re.compile(regx)
54 get_img = re.findall(pattern, repr(html))
55 num = 1
56 for img in get_img:
57 image = download_page(img)
58 with open('%s.jpg' % num, 'wb') as fp:
59 fp.write(image)
60 num += 1
61 # fp.close()
62 print(u'正在下载第%s张图片' % num)
63 return
64
65 url = 'http://pic.yesky.com/180/99839180_2.shtml'
66 html = download_page(url)
67 get_image(html)