B站-批量下载教学视频集-爬虫教程----------java
1. 说明:下面代码是根据 https://blog.csdn.net/xiuminglee/article/details/106014889 网站代码二次修改的!!!!!!!!!!!!
2. 请先在本地下载 ffmpeg 并解压--记住解压路径。
3. 本文使用的依赖如下:
<!-- B站视频下载 依赖 -->
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.3.0</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>
<!-- 阿里JSON解析器 -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
4. 代码如下:
package com.srn.cn.hbase;
import cn.hutool.http.HttpRequest;
import cn.hutool.http.HttpResponse;
import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONObject;
import com.alibaba.fastjson.JSON;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @ClassName dowload
* @Description TODO
* @Author 王志亭
* @Date 2020/12/29 15:05
**/
public class dowload {
/** 视频地址 */
// bilibili 通用视频连接
private static String BILIBILI_VIDEO_URL = "https://www.bilibili.com/video/";
// 具体视频连接
private static String CONCRETE_VIDEO_URL = "BV1x54y1Q7mo";
// 获取全部视频信息接口--为了获取全部视频名称
private static String VIDEO_NAMES_URL = "https://api.bilibili.com/x/player/pagelist?bvid="+CONCRETE_VIDEO_URL+"&jsonp=jsonp";
// 具体的视频接口
private static String VIDEO_URL = BILIBILI_VIDEO_URL + CONCRETE_VIDEO_URL;
// ffmpeg 本地地址
private static String FFMPEG_PATH = "D:\\Wangzhiting\\FFmpeg\\ffmpeg-4.3.1-2020-11-19-essentials_build\\bin\\ffmpeg.exe";
private static String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36";
private static VideoInfo VIDEO_INFO = new VideoInfo();
private static String SAVE_PATH;
public static void main(String[] args) {
// htmlParser();
htmlParserList();
}
/**
* @Author 王志亭
* @Description 解析多个网页信息获取视频集合
* @Date 20-12-30 11:25:49
* @return void
*/
private static void htmlParserList() {
// 获取全部视频名称
List<String> videoNames = getVideoNames();
String url = "";
for (int i = 0; i < videoNames.size(); i++) {
url = VIDEO_URL + "?p=" + (i+1);
System.out.println("url地址 = " + url);
HttpResponse res = HttpRequest.get(url).timeout(2000).execute();
String html = res.body();
// 视频名称
System.out.println("视频名称== " + videoNames.get(i));
VIDEO_INFO.videoName = videoNames.get(i);
// 截取视频信息
Pattern pattern = Pattern.compile("(?<=<script>window.__playinfo__=).*?(?=</script>)");
Matcher matcher = pattern.matcher(html);
if (matcher.find()) {
VIDEO_INFO.videoInfo = new JSONObject(matcher.group());
} else {
System.err.println("未匹配到视频信息,退出程序!");
return;
}
getVideoInfo();
}
}
/** 解析HTML获取相关信息 */
private static void htmlParser() {
HttpResponse res = HttpRequest.get(VIDEO_URL).timeout(2000).execute();
String html = res.body();
Document document = Jsoup.parse(html);
Element title = document.getElementsByTag("title").first();
// 视频名称
VIDEO_INFO.videoName = title.text();
// 截取视频信息
Pattern pattern = Pattern.compile("(?<=<script>window.__playinfo__=).*?(?=</script>)");
Matcher matcher = pattern.matcher(html);
if (matcher.find()) {
VIDEO_INFO.videoInfo = new JSONObject(matcher.group());
} else {
System.err.println("未匹配到视频信息,退出程序!");
return;
}
getVideoInfo();
}
/**
* @Author 王志亭
* @Description 获取全部视频名称集合
* @Date 20-12-31 14:40:02
* @return java.util.List<java.lang.String>
*/
private static List<String> getVideoNames(){
HttpResponse res = HttpRequest.get(VIDEO_NAMES_URL).timeout(2000).execute();
String jsonUrl = res.body();
com.alibaba.fastjson.JSONObject jsonObject = JSON.parseObject(jsonUrl);
com.alibaba.fastjson.JSONArray pages = jsonObject.getJSONArray("data");
List<String> list = new ArrayList<>();
com.alibaba.fastjson.JSONObject object = null;
for (int i = 0; i < pages.size(); i++) {
object = pages.getJSONObject(i);
list.add(object.getString("part"));
}
return list;
}
/** 解析视频和音频的具体信息 */
private static void getVideoInfo() {
// 获取视频的基本信息
JSONObject videoInfo = VIDEO_INFO.videoInfo;
JSONArray videoInfoArr = videoInfo.getJSONObject("data").getJSONObject("dash").getJSONArray("video");
VIDEO_INFO.videoBaseUrl = videoInfoArr.getJSONObject(0).getStr("baseUrl");
VIDEO_INFO.videoBaseRange = videoInfoArr.getJSONObject(0).getJSONObject("SegmentBase").getStr("Initialization");
HttpResponse videoRes = HttpRequest.get(VIDEO_INFO.videoBaseUrl)
.header("Referer", VIDEO_URL)
.header("Range", "bytes=" + VIDEO_INFO.videoBaseRange)
.header("User-Agent", USER_AGENT)
.timeout(2000)
.execute();
VIDEO_INFO.videoSize = videoRes.header("Content-Range").split("/")[1];
// 获取音频基本信息
JSONArray audioInfoArr = videoInfo.getJSONObject("data").getJSONObject("dash").getJSONArray("audio");
VIDEO_INFO.audioBaseUrl = audioInfoArr.getJSONObject(0).getStr("baseUrl");
VIDEO_INFO.audioBaseRange = audioInfoArr.getJSONObject(0).getJSONObject("SegmentBase").getStr("Initialization");
HttpResponse audioRes = HttpRequest.get(VIDEO_INFO.audioBaseUrl)
.header("Referer", VIDEO_URL)
.header("Range", "bytes=" + VIDEO_INFO.audioBaseRange)
.header("User-Agent", USER_AGENT)
.timeout(2000)
.execute();
VIDEO_INFO.audioSize = audioRes.header("Content-Range").split("/")[1];
downloadFile();
}
/** 下载音视频 */
private static void downloadFile(){
// 保存音视频的位置
SAVE_PATH = "H://test/" + File.separator;
File fileDir = new File(SAVE_PATH);
if (!fileDir.exists()){
fileDir.mkdirs();
}
// 下载视频
File videoFile = new File(SAVE_PATH + File.separator + VIDEO_INFO.videoName + "_video.mp4");
if (!videoFile.exists()){
System.out.println("--------------开始下载视频文件--------------");
HttpResponse videoRes = HttpRequest.get(VIDEO_INFO.videoBaseUrl)
.header("Referer", VIDEO_URL)
.header("Range", "bytes=0-" + VIDEO_INFO.videoSize)
.header("User-Agent", USER_AGENT)
.execute();
videoRes.writeBody(videoFile);
System.out.println("--------------视频文件下载完成--------------");
}
// 下载音频
File audioFile = new File(SAVE_PATH + File.separator + VIDEO_INFO.videoName + "_audio.mp4");
if (!audioFile.exists()){
System.out.println("--------------开始下载音频文件--------------");
HttpResponse audioRes = HttpRequest.get(VIDEO_INFO.audioBaseUrl)
.header("Referer", VIDEO_URL)
.header("Range", "bytes=0-" + VIDEO_INFO.audioSize)
.header("User-Agent", USER_AGENT)
.execute();
audioRes.writeBody(audioFile);
System.out.println("--------------音频文件下载完成--------------");
}
mergeFiles(videoFile,audioFile);
}
/**
* @Author 王志亭
* @Description 合并音视频
* @Date 20-12-31 14:46:14
* @param videoFile :
* @param audioFile :
* @return void
*/
private static void mergeFiles(File videoFile,File audioFile){
System.out.println("--------------开始合并音视频--------------");
String outFile = SAVE_PATH + File.separator + VIDEO_INFO.videoName + ".mp4";
List<String> commend = new ArrayList<>();
commend.add(FFMPEG_PATH);
commend.add("-i");
commend.add(videoFile.getAbsolutePath());
commend.add("-i");
commend.add(audioFile.getAbsolutePath());
commend.add("-vcodec");
commend.add("copy");
commend.add("-acodec");
commend.add("copy");
commend.add(outFile);
ProcessBuilder builder = new ProcessBuilder();
builder.command(commend);
try {
builder.inheritIO().start().waitFor();
System.out.println("--------------音视频合并完成--------------");
videoFile.delete();
audioFile.delete();
} catch (InterruptedException | IOException e) {
System.err.println("音视频合并失败!");
e.printStackTrace();
}
}
}
class VideoInfo{ // 真实项目中不推荐直接使用`public`哦😯
public String videoName;
public JSONObject videoInfo;
public String videoBaseUrl;
public String audioBaseUrl;
public String videoBaseRange;
public String audioBaseRange;
public String videoSize;
public String audioSize;
}
浙公网安备 33010602011771号