import requests
import json
import pandas as pd
import time
import re
headers = {
'User-Agent': 'XXXX',
'Cookie': 'XXX'}
def get_ad(page):
url = 'https://m.weibo.cn/api/container/getIndex?containerid=102803&openApp=0&since_id={}'.format(page)
res = requests.get(url,headers=headers)
data = json.loads(res.text)
all_ad = []
for k,i in enumerate(data['data']['cards']):
aa = i['mblog'].get('from_cateid')
if aa in ['Brand','Sfst','FanstopExtend','Wax']:
result={}
pattern = re.compile(r'<.*?>')
cc = pattern.sub('',i['mblog']['text'])
result['uid'] = i['mblog']['user']['id']
result['昵称'] = i['mblog']['user']['screen_name']
result['排名'] = k+1
result['出现页数'] = page+1
result['博文'] = cc
result['转发数'] = i['mblog']['reposts_count']
result['评论数'] = i['mblog']['comments_count']
result['点赞数'] = i['mblog']['attitudes_count']
all_ad.append(result)
return all_ad
all_data = []
for i in range(50):
time.sleep(1)
print(len(all_data))
fina_data = get_ad(i)
if fina_data:
all_data+=fina_data
df1 =pd.DataFrame(all_data)
df1.to_excel('result'+time.strftime("%Y%m%d%H%M%S")+'.xlsx',index=False)
print('done')