使用python获取知乎**话题下的所有回答,并统计后发布。

 

 

 

 

 

 

 

第一步:获取话题需要的url需要,并向上取整

 for idx in range(0,math.ceil(totals/5)):
        url = f"https://www.zhihu.com/api/v4/questions/29114634/answers?include=data%5B%2A%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cattachment%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Cis_labeled%2Cpaid_info%2Cpaid_info_content%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_recognized%3Bdata%5B%2A%5D.mark_infos%5B%2A%5D.url%3Bdata%5B%2A%5D.author.follower_count%2Cvip_info%2Cbadge%5B%2A%5D.topics%3Bdata%5B%2A%5D.settings.table_of_content.enabled&limit=5&offset={idx*5}&platform=desktop&sort_by=default"
        url_list.append(url)e 

 

 第二步:使用多线程,批量请求所有话题内容,获取到【“书籍”】列表

#创建十个个线程作为生产者,请求
    for x in range(10):
        product = threading.Thread(target=get_pic_url)
        product.start()

  

#生产者:请求url,获取所有书籍list
def get_pic_url():
    while True:
        glock.acquire()
        if len(url_list) == 0:
            glock.release()
            break
        else:
            page_url = url_list.pop()
            glock.release()
            res = urllib.request.Request(page_url,headers=headers)#请求
            res2 = urllib.request.urlopen(res).read().decode("utf-8")#获取html
            objContent = json.loads(res2)['data']#获取数据data
            ddd = re.compile(r'《.*?.》')#正则《》包裹的书籍
            glock.acquire()
            for rel in objContent:
                result = ddd.findall(str(rel['content']))
                for gtygty in result:
                    if len(gtygty)<30: #《》如果小于30个字符,就是正常书籍
                        contentList.append(gtygty) 
                    else:
                        zz = re.compile(r'>.*?.<')#带有超链接的,则在处理一遍
                        hrefcontent = zz.findall(gtygty)
                        data = str(hrefcontent).replace(">","《",1).replace("<","》",1)
                        contentList.append(data[2:-2]) 
            glock.release()

  第三:获取到图书后,统计每本书出现的次数

# 统计书籍出现的频率
def download_picture():
    ifStop=0
    submit = []
    while True:
        glock.acquire()
        if len(contentList) ==0:
            glock.release()
            ifStop +=1 
            if ifStop == 2:
                y2 = {k: v for k, v in sorted(tongjicishu.items(), key=lambda item: item[1], reverse=True)}
                for key in y2.keys():
                    submit.append("<p>"+ key+ ":推荐人数"+str(y2[key])+"人</p>")
                return ''.join(submit)	
                break
            else:
                continue
        else:
            url = contentList.pop()
            glock.release()
            #修改文件名
            if tongjicishu.__contains__(url) :
                tongjicishu[url]=tongjicishu[url]+1
            else :
                tongjicishu[url]=1

  第四步:调用download_picture函数,获取到可发布的带标签的content,并发布

putcontent = download_picture()
submitPut("<p>本话题汇总,目前"+str(totals)+"回答</p>"+"<p>每天"+time.strftime('%H:%M:%S')+"更新</p>"+json.dumps(putcontent,ensure_ascii=False))

  

def submitPut(putcontent):
    putUrl = "https://www.zhihu.com/api/v4/answers/2342429808?include=is_visible%2Cpaid_info%2Cpaid_info_content%2Cadmin_closed_comment%2Creward_info%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_normal%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Cattachment%2Crelationship.is_authorized%2Cvoting%2Cis_thanked%2Cis_author%2Cis_nothelp%2Cis_recognized%2Cis_labeled%3Bmark_infos%5B*%5D.url%3Bauthor.vip_info%2Cbadge%5B*%5D.topics%3Bsettings.table_of_content.enabled"
    data = {
           "content":putcontent,
           "reshipment_settings":"disallowed",
           "comment_permission":"all",
           "reward_setting":{"can_reward":False,"tagline":""},
           "disclaimer_status":"close",
           "disclaimer_type":"none",
           "commercial_report_info":{"is_report":False},
           "is_report":False,
           "push_activity":True,
           "table_of_contents_enabled":False,
           "thank_inviter_status":"close"
           }
    datascontent=json.dumps(data).encode('utf8')
    # data = urllib.parse.urlencode(formData).encode("utf-8")
    putres = urllib.request.Request(putUrl,data=datascontent,headers=headers,method='PUT')#请求
    putres2 = urllib.request.urlopen(putres).read().decode("utf-8")#获取html

 代码地址>>

posted @ 2022-02-15 16:05  开江鱼gty  阅读(136)  评论(0编辑  收藏  举报