# _*_ coding=utf-8 _*_
import requests
import time
import math
import os
import pandas as pd
cookies = input('请输入Cookie:')
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
'Cookie': cookies,
'Host': 'xgop.in.zhihu.com',
'Referer': '***'
}
tempmap = {6: '大图', 7: '文字链', 8: '小图', 10: '多图', 11: '视频'}
zonemap = {8:'知乎 APP 信息流',152:'知乎极速版首页',153:'知乎极速版回答页',20:'推荐阅读',33:'搜索',30:'App问题页信息流'}
positionmap = {1:'首页',3:'回答页',2:'问题页'}
osmap = {1:'安卓',2:'苹果'}
networkmap = {1:'wifi',2:'2G',3:'3G',4:'4G'}
equipmentPricemap = {1:'1500元以下',2:'1500-2500元',3:'2500-3500元',4:'3500元-4500元',5:'4500元以上'}
mobileOperatormap = {0:'中国移动',1:'中国联通',2:'中国电信'}
gendermap = {0:'女',1:'男'}
all_data = []
def get_single_data(url):
try:
res = requests.get(url, headers=headers)
except Exception as e:
print('异常请求链接--->' + url + str(e))
else:
data = res.json().get('result',0)
if data:
for i in data:
single_data = {}
try:
single_data['创意id'] = i['id']
single_data['账户id'] = i['userId']
single_data['目标类型'] = i['targetType']
single_data['标题'] = i['asset']['title']['value']
single_data['描述'] = i['asset']['desc']['value']
try:
single_data['图片url'] = i['asset']['main']['url']
except Exception as e:
single_data['图片url'] = ''
single_data['cta'] = i['asset']['cta']['value']
single_data['状态'] = i['status']
#single_data['落地页url'] = i['url']
single_data['创意名称'] = i['name']
single_data['曝光'] = i['counter']['impression']
single_data['点击'] = i['counter']['click']
single_data['点击率'] = i['counter']['clickRatio']
single_data['点击价格'] = i['counter']['clickPrice']/100
single_data['花费'] = i['counter']['cost']/100
single_data['样式'] = tempmap.get(i['ad']['templateId']) #数字
single_data['推广开始日期'] = i['ad']['dateBegin']
single_data['产品id'] = i['productId']
single_data['出价'] = i['ad']['price']/100
single_data['投放平台'] = ', '.join([osmap.get(d,'不限, ') for d in i['ad']['targeting']['os']]) or '不限'
try:
single_data['app行为'] = i['ad']['targeting']['appCategory']
except Exception as e:
single_data['app行为'] = '无'
try:
single_data['自定义人群'] = i['ad']['targeting']['crowd']
except Exception as e:
single_data['自定义人群'] = '无'
single_data['性别'] = ', '.join([gendermap.get(d,'不限, ') for d in i['ad']['targeting']['gender']]) or '不限'
try:
single_data['兴趣'] = i['ad']['targeting']['interest'] if len(i['ad']['targeting']['interest']) > 0 else '不限'
except Exception as e:
single_data['兴趣'] = ''
single_data['网络'] = ', '.join([networkmap.get(d,'不限, ') for d in i['ad']['targeting']['network']]) or '不限'
try:
single_data['运营商'] = ', '.join([mobileOperatormap.get(d,'不限, ') for d in i['ad']['targeting']['mobileOperator']]) or '不限'
except Exception as e:
single_data['运营商'] = '无'
try:
single_data['设备价格'] = ', '.join([equipmentPricemap.get(d,'不限, ') for d in i['ad']['targeting']['equipmentPrice']]) or '不限'
except Exception as e:
single_data['设备价格'] = '无'
single_data['关键词'] = ', '.join(i['ad']['targeting']['keyword'])
single_data['创意展现方式'] = i['ad']['strategy']['creative']
single_data['编辑页面地址'] = '****'.format(single_data['账户id'],single_data['创意id'])
single_data['展现位置'] = ', '.join([positionmap.get(d,'未知') for d in i['ad']['zoneIds']])
except Exception as e:
print('异常解析链接--->' + url+ str(e))
pass
if float(single_data.get('花费',0)) >=0:
all_data.append(single_data)
print(len(all_data))
def get_all_urls(userid, start_time, end_time):
base_url = '******'
first_page_url = base_url.format(page=1, userid=int(userid), start_time=str(start_time), end_time=str(end_time))
try:
res = requests.get(first_page_url, headers=headers)
except Exception as e:
print('异常all链接--->' + first_page_url + str(e))
else:
total_page = math.ceil(res.json()['totalCount'] / 10)
all_url = [base_url.format(page=int(page), userid=int(userid), start_time=str(start_time), end_time=str(end_time)) for
page in range(1, int(total_page))]
return all_url
def main():
uids = input('请输入uids(格式:111,222,333):')
start_time = input('请输入开始时间(格式:2018-01-01):')
end_time = input('请输入结束时间(格式:2018-07-03):')
if len(uids) > 0:
for userid in uids.split(','):
for url in get_all_urls(userid.strip(), start_time, end_time):
time.sleep(3)
get_single_data(url)
df1 = pd.DataFrame(all_data)
if not os.path.exists(uids):
os.mkdir(os.path.join(os.getcwd(),uids))
df1.to_excel(uids+'/'+str(uids)+ "-" + time.strftime("%Y%m%d%H%M") +'有消费创意' + '.xlsx',
index=False)
print('done')
if __name__ == '__main__':
main()