|
@@ -1,14 +1,31 @@
|
|
-# 导入数据请求模块
|
|
|
|
|
|
+# @Author : @Cloudmistery
|
|
|
|
+
|
|
|
|
+"""
|
|
|
|
+tips:错误记录
|
|
|
|
+1.headers请求头设置错误导致文件不可读(就是错了)
|
|
|
|
+2.文件名合法化,不合法的文件名直接创建不了
|
|
|
|
+3.音频和视频需要分开爬取及合成
|
|
|
|
+"""
|
|
|
|
+
|
|
|
|
+# 请求模块
|
|
import requests
|
|
import requests
|
|
-# 导入正则表达式模块
|
|
|
|
|
|
+# 正则模块
|
|
import re
|
|
import re
|
|
-# 导入json模块
|
|
|
|
|
|
+# json模块
|
|
import json
|
|
import json
|
|
-# 导入合并模块
|
|
|
|
|
|
+# 合并模块
|
|
from moviepy.editor import *
|
|
from moviepy.editor import *
|
|
|
|
|
|
|
|
+# TODO:修改url和cookie获取你想要的视频,先在本目录下先创建三个文件夹
|
|
# 设置
|
|
# 设置
|
|
-url = 'https://www.bilibili.com/video/BV1LxSuYDEBR/?spm_id_from=333.1007.tianma.1-1-1.click&vd_source=f6247aa12dae1ff1bce74ef0af381757'
|
|
|
|
|
|
+url = 'https://www.bilibili.com/video/BV1ki4y1B7LB/'
|
|
|
|
+A_path = 'Audio/'
|
|
|
|
+V_path = 'Video/'
|
|
|
|
+Synth_path = 'File/'
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+# 主程序
|
|
|
|
+# headers只需要三个参数
|
|
headers = {
|
|
headers = {
|
|
"Referer": url,
|
|
"Referer": url,
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
@@ -45,7 +62,7 @@ headers = {
|
|
response = requests.get(url=url, headers=headers)
|
|
response = requests.get(url=url, headers=headers)
|
|
html = response.text
|
|
html = response.text
|
|
|
|
|
|
-# 提取,合法化并显示视频标题
|
|
|
|
|
|
+# 提取,合法化字符并显示 视频标题
|
|
title = re.findall('title="(.*?)"', html)[0]
|
|
title = re.findall('title="(.*?)"', html)[0]
|
|
if title:
|
|
if title:
|
|
illegal_chars = fr'<|>\/:"*?'
|
|
illegal_chars = fr'<|>\/:"*?'
|
|
@@ -54,31 +71,28 @@ if title:
|
|
title_ill = title_ill.replace(char, "")
|
|
title_ill = title_ill.replace(char, "")
|
|
return title_ill
|
|
return title_ill
|
|
title = remove_illegal_chars(title)
|
|
title = remove_illegal_chars(title)
|
|
-print(title)
|
|
|
|
|
|
+print("视频标题:",title)
|
|
|
|
|
|
# 提取视频信息
|
|
# 提取视频信息
|
|
info = re.findall('window.__playinfo__=(.*?)</script>', html)[0]
|
|
info = re.findall('window.__playinfo__=(.*?)</script>', html)[0]
|
|
json_data = json.loads(info)
|
|
json_data = json.loads(info)
|
|
|
|
|
|
# 提取音视频链接
|
|
# 提取音视频链接
|
|
-# TODO:设置数值来声明文件品质
|
|
|
|
-video_url = json_data['data']['dash']['video'][1]['baseUrl']
|
|
|
|
|
|
+# TODO:设置数值来声明文件品质,0默认最高
|
|
|
|
+video_url = json_data['data']['dash']['video'][0]['baseUrl']
|
|
audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
|
|
audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
|
|
|
|
|
|
# 获取音视频内容
|
|
# 获取音视频内容
|
|
video_content = requests.get(url=video_url, headers=headers).content
|
|
video_content = requests.get(url=video_url, headers=headers).content
|
|
audio_content = requests.get(url=audio_url, headers=headers).content
|
|
audio_content = requests.get(url=audio_url, headers=headers).content
|
|
|
|
|
|
-# 设置保存地址
|
|
|
|
-A_path = 'D:/pyp/Spider/bilibili_pachong/bili_test/audio/'
|
|
|
|
-V_path = 'D:/pyp/Spider/bilibili_pachong/bili_test/video/'
|
|
|
|
-Synth_path = 'D:/pyp/Spider/bilibili_pachong/bili_test/File/'
|
|
|
|
|
|
+# 保存单音频
|
|
|
|
+with open(A_path + title + '.mp3', mode='wb') as a:
|
|
|
|
+ a.write(audio_content)
|
|
|
|
|
|
-# 保存文件数据
|
|
|
|
|
|
+# 保存单视频
|
|
with open(V_path + title + '.mp4', mode='wb') as v:
|
|
with open(V_path + title + '.mp4', mode='wb') as v:
|
|
v.write(video_content)
|
|
v.write(video_content)
|
|
-with open(A_path + title + '.mp3', mode='wb') as a:
|
|
|
|
- a.write(audio_content)
|
|
|
|
|
|
|
|
# 合并视频
|
|
# 合并视频
|
|
audio_path = fr"{A_path}{title}{'.mp3'}"
|
|
audio_path = fr"{A_path}{title}{'.mp3'}"
|