Cloudmistery há 5 meses atrás
pai
commit
b5ab59b965
3 ficheiros alterados com 39 adições e 42 exclusões
  1. 30 16
      Spider.py
  2. 1 15
      utils/GetResponse.py
  3. 8 11
      utils/Save_bili.py

+ 30 - 16
bili_jicheng.py → Spider.py

@@ -1,14 +1,31 @@
-# 导入数据请求模块
+# @Author  : @Cloudmistery
+
+"""
+tips:错误记录
+1.headers请求头设置错误导致文件不可读(就是错了)
+2.文件名合法化,不合法的文件名直接创建不了
+3.音频和视频需要分开爬取及合成
+"""
+
+# 请求模块
 import requests
-# 导入正则表达式模块
+# 正则模块
 import re
-# 导入json模块
+# json模块
 import json
-# 导入合并模块
+# 合并模块
 from moviepy.editor import *
 
+# TODO:修改url和cookie获取你想要的视频,先在本目录下先创建三个文件夹
 # 设置
-url = 'https://www.bilibili.com/video/BV1LxSuYDEBR/?spm_id_from=333.1007.tianma.1-1-1.click&vd_source=f6247aa12dae1ff1bce74ef0af381757'
+url = 'https://www.bilibili.com/video/BV1ki4y1B7LB/'
+A_path = 'Audio/'
+V_path = 'Video/'
+Synth_path = 'File/'
+
+
+# 主程序
+# headers只需要三个参数
 headers = {
         "Referer": url,
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
@@ -45,7 +62,7 @@ headers = {
 response = requests.get(url=url, headers=headers)
 html = response.text
 
-# 提取,合法化并显示视频标题
+# 提取,合法化字符并显示 视频标题
 title = re.findall('title="(.*?)"', html)[0]
 if title:
     illegal_chars = fr'<|>\/:"*?'
@@ -54,31 +71,28 @@ if title:
             title_ill = title_ill.replace(char, "")
         return title_ill
     title = remove_illegal_chars(title)
-print(title)
+print("视频标题:",title)
 
 # 提取视频信息
 info = re.findall('window.__playinfo__=(.*?)</script>', html)[0]
 json_data = json.loads(info)
 
 # 提取音视频链接
-# TODO:设置数值来声明文件品质
-video_url = json_data['data']['dash']['video'][1]['baseUrl']
+# TODO:设置数值来声明文件品质,0默认最高
+video_url = json_data['data']['dash']['video'][0]['baseUrl']
 audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
 
 # 获取音视频内容
 video_content = requests.get(url=video_url, headers=headers).content
 audio_content = requests.get(url=audio_url, headers=headers).content
 
-# 设置保存地址
-A_path = 'D:/pyp/Spider/bilibili_pachong/bili_test/audio/'
-V_path = 'D:/pyp/Spider/bilibili_pachong/bili_test/video/'
-Synth_path = 'D:/pyp/Spider/bilibili_pachong/bili_test/File/'
+# 保存单音频
+with open(A_path + title + '.mp3', mode='wb') as a:
+    a.write(audio_content)
 
-# 保存文件数据
+# 保存单视频
 with open(V_path + title + '.mp4', mode='wb') as v:
     v.write(video_content)
-with open(A_path + title + '.mp3', mode='wb') as a:
-    a.write(audio_content)
 
 # 合并视频
 audio_path = fr"{A_path}{title}{'.mp3'}"

+ 1 - 15
utils/GetResponse.py

@@ -7,26 +7,12 @@ from .setting import *
 import requests
 
 # //视频音频请求头//
-headers_bili_av = {
-        "Referer": url,
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
-        'cookie': 'buvid3=D82F9D33-E75C-8D6E-6D2D-D128A352B3EC99573infoc; b_nut=1730271799; _uuid=363DA659-CAA7-AEE3-31EF-105ADAD95EFA299807infoc; enable_web_push=DISABLE; buvid4=A8A4CC10-7B3C-8459-B07C-EB4E11E948D401840-024103007-fmI38SPP%2F5%2BxXWrCTYgty62zOzJos1uEW8lnI07A3XTkLJ6dLJvsmsAJBqLNPrF9; rpdid=0zbfvUnKtc|3XngooMd|T30|3w1T62JV; header_theme_version=CLOSE; DedeUserID=35990046; DedeUserID__ckMd5=bf34a8eab39f0c11; buvid_fp_plain=undefined; LIVE_BUVID=AUTO3317306871682983; CURRENT_QUALITY=80; fingerprint=afd86aa8ba10b52e21767b02b25b933b; buvid_fp=afd86aa8ba10b52e21767b02b25b933b; CURRENT_FNVAL=4048; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MzIyNjg3OTMsImlhdCI6MTczMjAwOTUzMywicGx0IjotMX0.XrtK4fW5yyBnEWTn4gQeSTZAh_8KweejUzSk9tPDC08; bili_ticket_expires=1732268733; PVID=3; bmg_af_switch=1; bmg_src_def_domain=i2.hdslb.com; b_lsid=AEA3F28F_1934D6025DD; SESSDATA=994c56c3%2C1747721973%2C32480%2Ab1CjC6H2WiSR2TdOw7R4ED2MVj0RDd_OeOczPS9dMUYYuj8I9ilx01iewdYO_1V7J5VSoSVkVtaE1tV0hxRHIwb2tndmJraTd3X3pvcXBleHI4NDhRN3o5YnAtOWlVX1lwVXhTc210RS05VDFkUnVaMUQ3MjBiQ0h1dENpdi1VOU9hLUxycHUzR2RBIIEC; bili_jct=4cd051b6642a32eeb57f558eac9f02f7; sid=6w3kthlu; home_feed_column=5; browser_resolution=1528-712; bsource=search_bing; bp_t_offset_35990046=1002173800399241216'
-}
-# //准许请求头//
-headers_bili_ref = {
+headers = {
         "Referer": url,
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
         'cookie': 'buvid3=D82F9D33-E75C-8D6E-6D2D-D128A352B3EC99573infoc; b_nut=1730271799; _uuid=363DA659-CAA7-AEE3-31EF-105ADAD95EFA299807infoc; enable_web_push=DISABLE; buvid4=A8A4CC10-7B3C-8459-B07C-EB4E11E948D401840-024103007-fmI38SPP%2F5%2BxXWrCTYgty62zOzJos1uEW8lnI07A3XTkLJ6dLJvsmsAJBqLNPrF9; rpdid=0zbfvUnKtc|3XngooMd|T30|3w1T62JV; header_theme_version=CLOSE; DedeUserID=35990046; DedeUserID__ckMd5=bf34a8eab39f0c11; buvid_fp_plain=undefined; LIVE_BUVID=AUTO3317306871682983; CURRENT_QUALITY=80; fingerprint=afd86aa8ba10b52e21767b02b25b933b; buvid_fp=afd86aa8ba10b52e21767b02b25b933b; CURRENT_FNVAL=4048; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MzIyNjg3OTMsImlhdCI6MTczMjAwOTUzMywicGx0IjotMX0.XrtK4fW5yyBnEWTn4gQeSTZAh_8KweejUzSk9tPDC08; bili_ticket_expires=1732268733; PVID=3; bmg_af_switch=1; bmg_src_def_domain=i2.hdslb.com; b_lsid=AEA3F28F_1934D6025DD; SESSDATA=994c56c3%2C1747721973%2C32480%2Ab1CjC6H2WiSR2TdOw7R4ED2MVj0RDd_OeOczPS9dMUYYuj8I9ilx01iewdYO_1V7J5VSoSVkVtaE1tV0hxRHIwb2tndmJraTd3X3pvcXBleHI4NDhRN3o5YnAtOWlVX1lwVXhTc210RS05VDFkUnVaMUQ3MjBiQ0h1dENpdi1VOU9hLUxycHUzR2RBIIEC; bili_jct=4cd051b6642a32eeb57f558eac9f02f7; sid=6w3kthlu; home_feed_column=5; browser_resolution=1528-712; bsource=search_bing; bp_t_offset_35990046=1002173800399241216'
 }
 
-headers = {
-    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-    'Accept-Language': 'zh,en-US;q=0.7,en;q=0.3',
-    'Accept-Encoding': 'gzip, deflate',
-    'Connection': 'keep-alive',
-    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0'
-}
-
 video_json = {
 
 }

+ 8 - 11
utils/Save_bili.py

@@ -8,15 +8,13 @@ import requests
 
 def SaveAV(session: requests.session(),url,title,v_url,a_url):
     # 设置请求头
-    headers_bili_av['Referer'] = url
-    headers_bili_ref['Referer'] = url
-
+    headers['Referer'] = url
     # 获取准许
-    session.options(v_url, headers=headers_bili_ref)
-    session.options(a_url, headers=headers_bili_ref)
+    session.options(v_url, headers=headers)
+    session.options(a_url, headers=headers)
 
-    A_content = session.get(a_url, headers=headers_bili_av).content
-    V_content = session.get(v_url, headers=headers_bili_av).content
+    A_content = session.get(a_url, headers=headers).content
+    V_content = session.get(v_url, headers=headers).content
 
     with open(Audio_path + title + Audio_format, mode='wb') as audio:
         audio.write(A_content)
@@ -25,12 +23,11 @@ def SaveAV(session: requests.session(),url,title,v_url,a_url):
 
 def SaveAudio(session,url,title,a_url):
     # 设置请求头
-    headers_bili_av['Referer'] = url
-    headers_bili_ref['Referer'] = url
+    headers['Referer'] = url
 
     # 获取准许
-    session.options(a_url, headers=headers_bili_ref)
-    A_content = session.get(a_url, headers=headers_bili_av).content
+    session.options(a_url, headers=headers)
+    A_content = session.get(a_url, headers=headers).content
 
     # 下载程序
     print('开始下载音频...')