爬虫大杂烩

"""
# 1 爬拉钩职位信息
import requests

headers = {
    'Accept-Language': "zh-CN,zh;q=0.9",
    'Host': 'www.lagou.com',
    'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36",
    'Referer': "https://www.lagou.com/jobs/list_python?city=%E4%B8%8A%E6%B5%B7&cl=false&fromSearch=true&labelWords=&suginput=",
    'Cookie': "index_location_city=%E4%B8%8A%E6%B5%B7; user_trace_token=20200303202747-787f5b5e-8819-4d60-a8c0-3920aaf97b87; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22170a05dd252be-062a9d067fa6cc-366b420b-1049088-170a05dd25333f%22%2C%22%24device_id%22%3A%22170a05dd252be-062a9d067fa6cc-366b420b-1049088-170a05dd25333f%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2Flink%22%2C%22%24latest_referrer_host%22%3A%22www.baidu.com%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24os%22%3A%22Windows%22%2C%22%24browser%22%3A%22Chrome%22%2C%22%24browser_version%22%3A%2279.0.3945.130%22%7D%7D; _ga=GA1.2.442852312.1586218701; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1586218701; LGUID=20200407081821-ea0bc584-cc75-43f0-9aa2-3c6fbe25bd10; JSESSIONID=ABAAAECAAFDAAEHA77B0A7162DFBDB833136F9E1BB7A309; WEBTJ-ID=20200407081848-1715200fade24c-0cb3e5dd9dd159-366b420b-1049088-1715200fadf446; _putrc=75D0A37619AD39A0123F89F2B170EADC; login=true; unick=%E5%8D%A0%E4%BA%9A%E5%B3%B0; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; privacyPolicyPopup=false; index_location_city=%E4%B8%8A%E6%B5%B7; TG-TRACK-CODE=search_code; X_HTTP_TOKEN=ed5749058ca1359c4174036851b1e35881c33e2f3e; gate_login_token=7c10fb5f4a047e902fb2a37fe1f50c11a9127b60c1b4a449e8fbaf21a885afc7; _gid=GA1.2.1134112076.1586353092; _gat=1; PRE_UTM=; PRE_HOST=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2Fshanghai; PRE_SITE=https%3A%2F%2Fwww.lagou.com; LGSID=20200408213812-42f9d697-f383-473d-bf8d-0b78af930d27; hasDeliver=24; LGRID=20200408213814-dd89ae8a-ec86-4878-a2fb-863cec451b35; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1586353093",
    'Accept': "application/json, text/javascript, */*; q=0.01",
    'X-Anit-Forge-Code': "0",
    'X-Anit-Forge-Token': None,
    'X-Requested-With': 'XMLHttpRequest'

}
form_data = {
    'first': 'false',
    'pn': 1,
    'kd': 'python'
}
ret=requests.post('https://www.lagou.com/jobs/positionAjax.json?city=%E4%B8%8A%E6%B5%B7&needAddtionalResult=false',
                 headers=headers,data=form_data)
print(ret.text)


# 2 爬cnblogs新闻
import requests
from bs4 import BeautifulSoup
ret=requests.get('https://www.cnblogs.com/sitehome/p/3')
soup=BeautifulSoup(ret.text,'lxml')

article_list=soup.find_all(class_='post_item')
for article in article_list:
    title=article.find(class_='titlelnk').text
    href=article.find(class_='titlelnk')['href']
    desc=article.find(class_='post_item_summary').text
    author=article.find(class_='lightblue').text
    print('''
    文章标题：%s
    文章地址：%s
    文章摘要：%s
    文章作者：%s
    '''%(title,href,desc,author))

# 爬红楼梦小说
import requests
from bs4 import BeautifulSoup

ret = requests.get('http://www.shicimingju.com/book/hongloumeng.html')
soup = BeautifulSoup(ret.text, 'lxml')

li_list = soup.find(class_='book-mulu').find_all(name='li')
with open("红楼.txt", 'w', encoding='utf-8') as f:
    for li in li_list:
        title = li.find(name='a').text
        url = li.find(name='a')['href']
        # print(title)
        f.write(title + '\n')
        ret_detail = requests.get('http://www.shicimingju.com' + url)
        soup2 = BeautifulSoup(ret_detail.text, 'lxml')
        content = soup2.find(class_='chapter_content').text
        f.write(content + '\n')
        print(title, "写入")


# 微信机器人
from wxpy import *
from pyecharts import Pie
import webbrowser
bot=Bot(cache_path=True) #注意手机确认登录

friends=bot.friends()
#拿到所有朋友对象，放到列表里
attr=['男朋友','女朋友','未知性别']
value=[0,0,0]
for friend in friends:
    if friend.sex == 1: # 等于1代表男性
        value[0]+=1
    elif friend.sex == 2: #等于2代表女性
        value[1]+=1
    else:
        value[2]+=1


pie = Pie("朋友男女比例")
pie.add("", attr, value, is_label_show=True)
#图表名称str，属性名称list，属性所对应的值list，is_label_show是否现在标签
pie.render('sex.html')#生成html页面
# 打开浏览器
webbrowser.open("sex.html")
from wxpy import *
bot=Bot(cache_path=True)

@bot.register()
def recv_send_msg(recv_msg):
    print('收到的消息：',recv_msg.text) # recv_msg.text取得文本
    return '好的'

# 进入Python命令行，让程序保持运行
embed()



# 爬糗事百科
import requests
from bs4 import BeautifulSoup
ret=requests.get('https://www.qiushibaike.com/text/page/2/')
# print(ret.text)

soup=BeautifulSoup(ret.text,'lxml')

article_list=soup.find_all(class_='article')
# print(article_list)
for article in article_list:
    content=article.find(class_='content').text
    print(content)
    print('-------')



# 爬肯德基门店
import requests

header = {
    'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36",
}
data = {
    'cname': '',
    'pid': 20,
    'keyword': '浦东',
    'pageIndex': 1,
    'pageSize': 10
}
ret = requests.post('http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword', data=data, headers=header)
print(ret.text)

"""
posted @ 2020-04-08 22:30 alen_zhan 阅读(564) 评论(0) 收藏举报
刷新页面返回顶部
爬虫大杂烩

爬虫大杂烩

公告