import urllib.request
import os
def url_open(url):
req = urllib.request.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36')
response = urllib.request.urlopen(url)
html = response.read()
return html
def get_page(url):
html = url_open(url).decode('gbk')
a = html.find('current-comment-page') + 23
b = html.find(']',a)
return html[a:b]
def find_imgs(url):
html = url_open(url).decode('gbk')
img_addrs = []
a = html.find('img src=') #下载的只是img src = 格式的图片
while a!=-1:
b = html.find('.jpg',a,a+255)
if b != -1:
x = html[a+9:b+4]
tt = x[47:] #解析图片的地址
ttt = 'http://' + tt
img_addrs.append(ttt)
else:
b = a + 9
a = html.find('img src=',b)
return img_addrs
def save_imgs(folder,img_addrs):
for each in img_addrs:
filename = each.split("/")[-1]
urllib.request.urlretrieve(each,filename,None)
def download_picture(folder='WWWW'):
#os.mkdir(folder)
os.chdir(folder)
url = "https://gboy.taobao.com/index.htm?ali_trackid=17_cfb4dbeb80eb264e50f77d137e3a83d0&spm=a21bo.7724922.8410.1.HwGQ44"
img_addrs = find_imgs(url)
save_imgs(folder,img_addrs)
if __name__=='__main__':
download_picture()