游民星空图片爬取

import os
import re
import requests
import json
import urllib
url = 'http://pic.gamersky.com/home/getimagesindex?sort=time_desc&pageIndex=1&pageSize=50&nodeId=21086'
headers = {
    'Request':'http://pic.gamersky.com/home/getimagesindex?sort=time_desc&pageIndex=1&pageSize=50&nodeId=21086',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}

proxies = {
    'https':'124.205.155.147:9090',
    'https':'121.232.199.138:9000'#高匿名
}

if os.path.exists(r"11.30/test.txt"):
    os.remove(r"11.30/test.txt")

rep = requests.get(url=url,headers=headers, proxies=proxies)
cont_str = rep.content.decode('utf-8')
img_list = json.loads(json.loads(cont_str)).get('body')
for img1 in img_list:
    print("\n外部:"+img1.get('tinyImg')) 
    k1 = img1.get('path')
    k2 = img1.get('tinyImg')
    f2 = open('11.30/test.txt','a')
    f2.write("\n"+k1)
    f2.close()

# with open("11.30/test.txt", "r") as f: 
#     thedata = f.read().replace("\n", ",") 
#     array = thedata.split(",") 
#     array = [item.strip() for item in array if item.strip() != ""] 
# print(array[0])




# rek = requests.get(array)
# content = rek.content.decode("utf-8")
# img =json.loads(json.dumps(content)).get("body")
# for img2 in img:
#     print(img.get("tinyImg"))
(仅供学习参考)

 

posted @ 2021-03-15 15:02  outsider078  阅读(94)  评论(0)    收藏  举报