12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- # -*- coding: utf-8 -*-
- # @Author : ChenZhaoyuchen
- # @Time : 2024/9/13 15:02
- # @File : GetVideoInfo.py
- import json
- import re
- from utils.GetResponse import *
- from pprint import pprint
- video_json = {
- }
- def GetVideoInfo() -> str:
- response_video = GetResponse_AV()
- html_video = response_video.text
- info_video = re.findall('<script>window.__playinfo__=(.*?)</script>', html_video)[0]
- json_data_video = json.loads(info_video)
- video_url = json_data_video['data']['dash']['video'][0]['baseUrl']
- return video_url
- def GetAudioInfo() -> str:
- response_audio = GetResponse_AV()
- html_audio = response_audio.text
- info_audio = re.findall('<script>window.__playinfo__=(.*?)</script>', html_audio)[0]
- json_data_audio = json.loads(info_audio)
- audio_url = json_data_audio['data']['dash']['audio'][0]['baseUrl']
- return audio_url
- def GetTitile():
- response_audio = GetResponse_AV()
- html = response_audio.text
- title = re.findall('<title data-vue-meta="true">(.*?)</title>', html)[0]
- print("原名为:",title)
- video_json[title] = title
- if not title:
- title = '未知'
- if title:
- illegal_chars = fr'<|>\/:"*?'
- def remove_illegal_chars(title_ill):
- for char in illegal_chars:
- title_ill = title_ill.replace(char, "")
- return title_ill
- title = remove_illegal_chars(title)
- else:
- return None
- elif len(title) > 30:
- # 如果名字过长,就取前20对反爬虫策略有一定的反制手段,如使用代理IP、设置随机访问时间、获取ajax等个字符
- title = title[:30]
- return title
- # 仅用作测试
- def GetHTML():
- response = GetResponse_AV()
- html = response.text
- return html
- # # 测试代码
- if __name__ == '__main__':
- A = GetAudioInfo()
- print(A)
- B = GetVideoInfo()
- print(B)
- C = GetTitile()
- print('修饰过后名字为:',C)
- D = GetHTML()
- pprint(D)
|