Python:爬虫第二步(单线程与多线程)
from urllib import request from bs4 import BeautifulSoup import os # 请求 response = request.urlopen("https://www.i4.cn/ring_22_0_1.html") data = response.read() # print(data.decode("utf-8")) # with open("i4.txt", "wb") as fp: # fp.write(data) # 检查有没有music目录 if not os.path.exists('music'): os.mkdir('music') # 生成bs4对象 bs = BeautifulSoup(data, "html.parser") # print(type(bs)) # 选中指定标签 kbox = bs.select(".kbox")[0] # 选中所有类名为audio_play的子标签 audio_play = kbox.select(".audio_play") # print(audio_play) # 获取铃声地址 for item in audio_play: mp3url = item.attrs['data-mp3'] # 请求播放mp3地址 res = request.urlopen(mp3url) mp3 = res.read() parent = item.parent title = parent.select('.title')[0].attrs['title'] + '.mp3' fileName = os.path.join('music', title) # with open(fileName, 'wb') as fp: # fp.write(mp3)
多线程:
from urllib import request from bs4 import BeautifulSoup import os import threading # 请求 response = request.urlopen("https://www.i4.cn/ring_22_0_1.html") data = response.read() # print(data.decode("utf-8")) # with open("i4.txt", "wb") as fp: # fp.write(data) # 检查有没有music目录 if not os.path.exists('music'): os.mkdir('music') # 生成bs4对象 bs = BeautifulSoup(data, "html.parser") # print(type(bs)) # 选中指定标签 kbox = bs.select(".kbox")[0] # 选中所有类名为audio_play的子标签 audio_play = kbox.select(".audio_play") # print(audio_play) def load_ring(item, directroy): mp3url = item.attrs['data-mp3'] # 请求播放mp3地址 res = request.urlopen(mp3url) mp3 = res.read() parent = item.parent title = parent.select('.title')[0].attrs['title'] + '.mp3' fileName = os.path.join(directroy, title) print(fileName) with open(fileName, 'wb') as fp: fp.write(mp3) # 获取铃声地址 for item in audio_play: # 生成线程 thread = threading.Thread(target=load_ring, args=(item, 'music')) thread.start()

浙公网安备 33010602011771号