抓取搜狗微信相关公众号文章

import requests,re,json
import xmltodict
from lxml import etree
headers = {
    "User-Agent": 'xxxxxxx',
    "Accept-Language": "xxxxxx",
    "Host": "xxxxxx",
    "Cookie": "xxxxxx"
}
for c in range(1,10):
    url = 'https://weixin.sogou.com/weixin?query=银川&_sug_type_=&s_from=input&_sug_=y&type=2&page=%s&ie=utf8' %c
    r = requests.get(url=url,headers=headers)
    page_text = r.text
    tree=etree.HTML(r.text)
    for x in range(0, 10):
        for td in tree:
            f = {}
            title = ''.join(td.xpath('//*[@id="sogou_vr_11002601_title_%s"]//text()' % x))
            content = ''.join(td.xpath('//*[@id="sogou_vr_11002601_summary_%s"]//text()' % x))
            channl = ''.join(td.xpath('//*[@id="sogou_vr_11002601_account_%s"]//text()' % x))
            f['title'] = title
            f['content'] = content
            f['channl'] = channl
        print(f)

营销类网站反爬不会很难,隐私性较强得网站也别怕。道高一尺,魔高一丈啊~兄弟

posted @ 2022-12-30 10:29  乐乐乐乐乐乐樂  阅读(38)  评论(0)    收藏  举报
jQuery火箭图标返回顶部代码