Spider.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. # @Author : @Cloudmistery
  2. """
  3. tips:错误记录
  4. 1.headers请求头设置错误导致文件不可读(就是错了)
  5. 2.文件名合法化,不合法的文件名直接创建不了
  6. 3.音频和视频需要分开爬取及合成
  7. """
  8. # 请求模块
  9. import requests
  10. # 正则模块
  11. import re
  12. # json模块
  13. import json
  14. # 合并模块
  15. from moviepy.editor import *
  16. # TODO:修改url和cookie获取你想要的视频,先在本目录下先创建三个文件夹
  17. # 设置
  18. url = 'https://www.bilibili.com/video/BV1ki4y1B7LB/'
  19. A_path = 'Audio/'
  20. V_path = 'Video/'
  21. Synth_path = 'File/'
  22. # 主程序
  23. # headers只需要三个参数
  24. headers = {
  25. "Referer": url,
  26. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
  27. 'cookie': 'buvid3=D82F9D33-E75C-8D6E-6D2D-D128A352B3EC99573infoc; '
  28. 'b_nut=1730271799; '
  29. '_uuid=363DA659-CAA7-AEE3-31EF-105ADAD95EFA299807infoc; '
  30. 'enable_web_push=DISABLE; '
  31. 'buvid4=A8A4CC10-7B3C-8459-B07C-EB4E11E948D401840-024103007-fmI38SPP%2F5%2BxXWrCTYgty62zOzJos1uEW8lnI07A3XTkLJ6dLJvsmsAJBqLNPrF9; '
  32. 'rpdid=0zbfvUnKtc|3XngooMd|T30|3w1T62JV; '
  33. 'header_theme_version=CLOSE; '
  34. 'DedeUserID=35990046; '
  35. 'DedeUserID__ckMd5=bf34a8eab39f0c11; '
  36. 'buvid_fp_plain=undefined; '
  37. 'LIVE_BUVID=AUTO3317306871682983; '
  38. 'CURRENT_QUALITY=80; '
  39. 'fingerprint=afd86aa8ba10b52e21767b02b25b933b; '
  40. 'buvid_fp=afd86aa8ba10b52e21767b02b25b933b; '
  41. 'CURRENT_FNVAL=4048; '
  42. 'bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MzIyNjg3OTMsImlhdCI6MTczMjAwOTUzMywicGx0IjotMX0.XrtK4fW5yyBnEWTn4gQeSTZAh_8KweejUzSk9tPDC08; '
  43. 'bili_ticket_expires=1732268733; '
  44. 'PVID=3; bmg_af_switch=1; '
  45. 'bmg_src_def_domain=i2.hdslb.com; '
  46. 'b_lsid=AEA3F28F_1934D6025DD; '
  47. 'SESSDATA=994c56c3%2C1747721973%2C32480%2Ab1CjC6H2WiSR2TdOw7R4ED2MVj0RDd_OeOczPS9dMUYYuj8I9ilx01iewdYO_1V7J5VSoSVkVtaE1tV0hxRHIwb2tndmJraTd3X3pvcXBleHI4NDhRN3o5YnAtOWlVX1lwVXhTc210RS05VDFkUnVaMUQ3MjBiQ0h1dENpdi1VOU9hLUxycHUzR2RBIIEC; '
  48. 'bili_jct=4cd051b6642a32eeb57f558eac9f02f7; '
  49. 'sid=6w3kthlu; '
  50. 'home_feed_column=5; '
  51. 'browser_resolution=1528-712; '
  52. 'bsource=search_bing; '
  53. 'bp_t_offset_35990046=1002173800399241216'
  54. }
  55. # 发送请求
  56. response = requests.get(url=url, headers=headers)
  57. html = response.text
  58. # 提取,合法化字符并显示 视频标题
  59. title = re.findall('title="(.*?)"', html)[0]
  60. if title:
  61. illegal_chars = fr'<|>\/:"*?'
  62. def remove_illegal_chars(title_ill):
  63. for char in illegal_chars:
  64. title_ill = title_ill.replace(char, "")
  65. return title_ill
  66. title = remove_illegal_chars(title)
  67. print("视频标题:",title)
  68. # 提取视频信息
  69. info = re.findall('window.__playinfo__=(.*?)</script>', html)[0]
  70. json_data = json.loads(info)
  71. # 提取音视频链接
  72. # TODO:设置数值来声明文件品质,0默认最高
  73. video_url = json_data['data']['dash']['video'][0]['baseUrl']
  74. audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
  75. # 获取音视频内容
  76. video_content = requests.get(url=video_url, headers=headers).content
  77. audio_content = requests.get(url=audio_url, headers=headers).content
  78. # 保存单音频
  79. with open(A_path + title + '.mp3', mode='wb') as a:
  80. a.write(audio_content)
  81. # 保存单视频
  82. with open(V_path + title + '.mp4', mode='wb') as v:
  83. v.write(video_content)
  84. # 合并视频
  85. audio_path = fr"{A_path}{title}{'.mp3'}"
  86. audio_clip = AudioFileClip(audio_path)
  87. video_path = fr"{V_path}{title}{'.mp4'}"
  88. video_clip = VideoFileClip(video_path)
  89. audio_clip = audio_clip.set_duration(video_clip.duration)
  90. video_clip_with_audio = video_clip.set_audio(audio_clip)
  91. video_clip_with_audio.write_videofile(fr"{Synth_path}{title}{'.mp4'}", codec="libx264", audio_codec="aac")