Python:爬虫第二步(单线程与多线程)

from urllib import request
from bs4 import BeautifulSoup
import os

# 请求
response = request.urlopen("https://www.i4.cn/ring_22_0_1.html")
data = response.read()
# print(data.decode("utf-8"))
# with open("i4.txt", "wb") as fp:
#     fp.write(data)

# 检查有没有music目录
if not os.path.exists('music'):
    os.mkdir('music')

# 生成bs4对象
bs = BeautifulSoup(data, "html.parser")
# print(type(bs))
# 选中指定标签
kbox = bs.select(".kbox")[0]
# 选中所有类名为audio_play的子标签
audio_play = kbox.select(".audio_play")
# print(audio_play)

# 获取铃声地址
for item in audio_play:
    mp3url = item.attrs['data-mp3']

    # 请求播放mp3地址
    res = request.urlopen(mp3url)
    mp3 = res.read()

    parent = item.parent
    title = parent.select('.title')[0].attrs['title'] + '.mp3'
    fileName = os.path.join('music', title)

    # with open(fileName, 'wb') as fp:
    #     fp.write(mp3)

 

多线程:

from urllib import request
from bs4 import BeautifulSoup
import os
import threading

# 请求
response = request.urlopen("https://www.i4.cn/ring_22_0_1.html")
data = response.read()
# print(data.decode("utf-8"))
# with open("i4.txt", "wb") as fp:
#     fp.write(data)

# 检查有没有music目录
if not os.path.exists('music'):
    os.mkdir('music')

# 生成bs4对象
bs = BeautifulSoup(data, "html.parser")
# print(type(bs))
# 选中指定标签
kbox = bs.select(".kbox")[0]
# 选中所有类名为audio_play的子标签
audio_play = kbox.select(".audio_play")
# print(audio_play)

def load_ring(item, directroy):
    mp3url = item.attrs['data-mp3']

    # 请求播放mp3地址
    res = request.urlopen(mp3url)
    mp3 = res.read()

    parent = item.parent
    title = parent.select('.title')[0].attrs['title'] + '.mp3'
    fileName = os.path.join(directroy, title)
    print(fileName)

    with open(fileName, 'wb') as fp:
        fp.write(mp3)

# 获取铃声地址
for item in audio_play:
    # 生成线程
    thread = threading.Thread(target=load_ring, args=(item, 'music'))
    thread.start()

 

posted @ 2020-07-08 15:32  楚云no1  阅读(226)  评论(0)    收藏  举报