python爬虫-视频模板

爬过...

#模板#代码不全,仅供参考!
import urllib.request
import os
import re
import time

def url_open(url):
    headers={
   "User-Agent":""#改动处#User-Agent
    }          
    req=urllib.request.Request(url=url,headers=headers)
    response = urllib.request.urlopen(req)
    html = response.read()
    
    return html

def find_videos(url):
    
    partern = re.compile('''<h5 class="text-overflow"><a href="(.*?)" target="_blank">.*?</a></h5>''')#改动处#匹配模式
    partern2 = re.compile('''<a title='下载地址' href="(.*?)" target=".*?">''')#改动处#匹配模式
    
    #爬取主页
    html = url_open(url).decode('utf-8')
    video_mian_addrs = re.findall(partern,html)
    
##    print(video_mian_addrs)
##    print("主页地址爬取完毕")
##    print("sum have:",len(video_mian_addrs))
    
    #视频爬取
    video_addrs =[]
    for video in video_mian_addrs:
        html = url_open("https://www.xxx.com" + video).decode('utf-8')#改动处#打开主页#此处拼接完整网址,有些不用
     
        temp =re.findall(partern2,html)
        temp_addr = "https://www.ccc.com" + temp[0]#改动处#拼接视频网址#此处拼接完整网址,有些不用
        video_addrs.append(temp_addr)
    
##    print(video_addrs)
##    print("视频网址爬取完毕!")
##    print("sum have:",len(video_addrs))
    
    return video_addrs

def save_videos(floder,video_addrs):
    n=len(video_addrs) #下载视频的个数
    
    for j in range(1,n+1):
        each = video_addrs[j-1]
        filename = each.split('/')[-1]
        
        with open(filename,'wb') as f:
            print("正在下载视频:",j)
            video = url_open(each)
            f.write(video) #保存视频
            print("视频",j,"下载完毕。。")
        time.sleep(0.2)

def mm(floder='download'):
    os.chdir(floder)
    
    url = "" #改动处#视频网址

    video_addrs = find_videos(url)#查找视频地址
    
    save_videos(floder,video_addrs)#保存视频

    print("视频爬取完毕!")

if __name__ == '__main__':
    mm()
posted @ 2020-11-29 20:30  jt_coder  阅读(117)  评论(0)    收藏  举报