用python requests库写一个人人网相册爬虫

担心人人网会黄掉,写个爬虫,把我的相册照片都下载下来。代码如下:

# -*- coding: utf-8 -*-
import requests
import json
import os

def mkdir(path):
    path=path.strip()
    path=path.rstrip("\\")
    isExists=os.path.exists(path)
    if not isExists:
        print path+u' 创建成功'
        os.makedirs(path)
        return "yes"
    else:
        print path+u' 目录已存在'
        return "no"

def login_renren(s):
    origin_url = 'http://www.renren.com'
    login_data = {
        'email':'用户名',
        'domain':'renren.com',
        'origURL':'http://www.renren.com/home',
        'key_id':'1',
        'captcha_type':'web_login',
        'password':'密码抓包获得',
        'rkey':'rkey抓包获得'
    }
    r = s.post("http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2016742045262", data = login_data)
    if 'true' in r.content:
        print u'登录人人网成功'
    return s
def get_albums(s): r = s.get('http://photo.renren.com/photo/278382090/albumlist/v7?showAll=1#') #print r.content content = r.content index1 = content.find('nx.data.photo = ') #print index1 index2 = content.find('nx.data.hasHiddenAlbum =') #print index2 target_json = content[index1+16:index2].strip() target_json = target_json[0:len(target_json)-1] #print target_json data = json.loads(target_json.replace("\'", '"')); album_list = data['albumList'] album_count = album_list['albumCount'] tip = u'一共有'+str(album_count)+u'个相册' print tip album_ids = [] for album in album_list['albumList']: #print album['albumName'] album_ids.append(album['albumId']) return album_ids,s def download_albums(album_ids,s): #访问相册 for album_id in album_ids: album_url = 'http://photo.renren.com/photo/278382090/album-'+album_id+'/v7' r = s.get(album_url) if "photoId" in r.content: print u'进入相册成功' #print r.content content = r.content index1 = content.find('nx.data.photo = ') #print index1 index2 = content.find('; define.config') #print index2 target_json = content[index1+16:index2].strip() target_json = target_json[13:len(target_json)-2] #print target_json data = json.loads(target_json.replace("\'", '"')); photos = data['photoList'] album_name = data['albumName'] # 定义并创建目录 album_path = 'd:\\'+album_name #print album_path if mkdir(album_path)=='yes': for photo in photos: #print photo['url'] image_name = photo['photoId'] photo_url = photo['url'] r = requests.get(photo_url) image_path = album_path+'/'+image_name+'.jpg' f = open(image_path, 'wb') f.write(r.content) f.close() tip = u'相片'+image_name+u'下载成功' print tip else: print u'相册已经下载' #执行该文件的主过程 if __name__ == '__main__': #创建requests会话 s = requests.Session() #登录人人网 s = login_renren(s) #获取相册列表 album_ids,s = get_albums(s) #下载相册 download_albums(album_ids,s)

搞定!运行效果如下:

 

posted @ 2016-08-18 22:52  天外归云  阅读(1249)  评论(0编辑  收藏  举报