1.6学习进度

今天学习1.5小时

继续昨天的爬虫学习

from bs4 import BeautifulSoup
from bs4 import *
import re
import requests
from fake_useragent import UserAgent

url='https://www.qiushibaike.com/text/'
headers={
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62"
}
resp=requests.get(url,headers=headers)
# print(resp.text)
#创建一个bs4的对象
soup=BeautifulSoup(resp.text,'lxml')
#获取标签
# print(soup.span)
#获取属性
print(soup.div.attrs)
print(soup.div.get('id'))
print(soup.a['href'])
#获取内容
print(soup.title.string)
print(soup.title.text)

# print(type(soup.div.string))
# #findall()
# m=soup.find_all('div')
print(soup.find_all(class_='author'))
bs4的使用
from  urllib.request import *
from urllib.parse import urlencode
from fake_useragent import UserAgent
from random import *
from http.cookiejar import MozillaCookieJar
def get_cookie():
    login_url="http://www.sxt.cn/index/login/login"
    form_data={
        "user":"17703181473",
        "password":"123456"
    }
    headers={
        "User-Agent":UserAgent().random
    }
    req=Request(login_url,headers=headers)

    cookie_jar=MozillaCookieJar()
    handler=HTTPCookieProcessor(cookie_jar)
    opener=build_opener(handler)
    resp=opener.open(req)
    cookie_jar.save('cookie.txt',ignore_discard=True,ignore_expires=True)

def use_cookie():
    info_url="http://www.sxt.cn/index/user.html"
    headers = {
        "User-Agent": UserAgent().random
    }
    req=Request(info_url,headers=headers)
    cookie_jar=MozillaCookieJar()
    cookie_jar.load("cookie.txt",ignore_expires=True,ignore_discard=True)
    handler=HTTPCookieProcessor(cookie_jar)
    opener=build_opener(handler)
    resp=opener.open(req)
    print(resp.read().decode)
if __name__=='__main__':
    get_cookie()
    use_cookie()
cookie的使用

 

posted @ 2022-01-06 19:35  陈涵  阅读(11)  评论(0编辑  收藏  举报