python 爬取B站原视频的实例代码
这篇文章主要介绍了python 爬取B站原视频的实例代码,帮助大家更好的理解和使用python 爬虫,感兴趣的朋友可以了解下
B站原视频爬取,我就不多说直接上代码。直接运行就好。
B站是把视频和音频分开。要把2个合并起来使用。这个需要分析才能看出来。然后就是登陆这块是比较难的。
1 import os 2 import re 3 import argparse 4 import subprocess 5 import prettytable 6 from DecryptLogin import login 7 8 9 """B站类""" 10 class Bilibili(): 11 def __init__(self, username, password, **kwargs): 12 self.username = username 13 self.password = password 14 self.session = Bilibili.login(username, password) 15 self.headers = { 16 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36" 17 } 18 self.user_info_url = "http://api.bilibili.com/x/space/acc/info" 19 self.submit_videos_url = "http://space.bilibili.com/ajax/member/getSubmitVideos" 20 self.view_url = "http://api.bilibili.com/x/web-interface/view" 21 self.video_player_url = "http://api.bilibili.com/x/player/playurl" 22 """运行主程序""" 23 def run(self): 24 while True: 25 userid = input("请输入目标用户ID(例:345993405)(我的一个LOL好友凯撒可以关注他一下 谢谢) ——> ") 26 user_info = self.__getUserInfo(userid) 27 tb = prettytable.PrettyTable() 28 tb.field_names = list(user_info.keys()) 29 tb.add_row(list(user_info.values())) 30 print("获取的用户信息如下:") 31 print(tb) 32 is_download = input("是否下载该用户的所有视频(y/n, 默认: y) ——> ") 33 if is_download == "y" or is_download == "yes" or not is_download: 34 self.__downloadVideos(userid) 35 """根据userid获得该用户基本信息""" 36 def __getUserInfo(self, userid): 37 params = {"mid": userid, "jsonp": "jsonp"} 38 res = self.session.get(self.user_info_url, params=params, headers=self.headers) 39 res_json = res.json() 40 user_info = { 41 "用户名": res_json["data"]["name"], 42 "性别": res_json["data"]["sex"], 43 "个性签名": res_json["data"]["sign"], 44 "用户等级": res_json["data"]["level"], 45 "生日": res_json["data"]["birthday"] 46 } 47 return user_info 48 """下载目标用户的所有视频""" 49 def __downloadVideos(self, userid): 50 if not os.path.exists(userid): 51 os.mkdir(userid) 52 # 非会员用户只能下载到高清1080P 53 quality = [("16", "流畅 360P"), 54 ("32", "清晰 480P"), 55 ("64", "高清 720P"), 56 ("74", "高清 720P60"), 57 ("80", "高清 1080P"), 58 ("112", "高清 1080P+"), 59 ("116", "高清 1080P60")][-3] 60 # 获得用户的视频基本信息 61 video_info = {"aids": [], "cid_parts": [], "titles": [], "links": [], "down_flags": []} 62 params = {"mid": userid, "pagesize": 30, "tid": 0, "page": 1, "order": "pubdate"} 63 while True: 64 res = self.session.get(self.submit_videos_url, headers=self.headers, params=params) 65 res_json = res.json() 66 for item in res_json["data"]["vlist"]: 67 video_info["aids"].append(item["aid"]) 68 if len(video_info["aids"]) < int(res_json["data"]["count"]): 69 params["page"] += 1 70 else: 71 break 72 for aid in video_info["aids"]: 73 params = {"aid": aid} 74 res = self.session.get(self.view_url, headers=self.headers, params=params) 75 cid_part = [] 76 for page in res.json()["data"]["pages"]: 77 cid_part.append([page["cid"], page["part"]]) 78 video_info["cid_parts"].append(cid_part) 79 title = res.json()["data"]["title"] 80 title = re.sub(r"[‘"/:*?"<>|s"]", " ", title) 81 video_info["titles"].append(title) 82 print("共获取到用户ID<%s>的<%d>个视频..." % (userid, len(video_info["titles"]))) 83 for idx in range(len(video_info["titles"])): 84 aid = video_info["aids"][idx] 85 cid_part = video_info["cid_parts"][idx] 86 link = [] 87 down_flag = False 88 for cid, part in cid_part: 89 params = {"avid": aid, "cid": cid, "qn": quality, "otype": "json", "fnver": 0, "fnval": 16} 90 res = self.session.get(self.video_player_url, params=params, headers=self.headers) 91 res_json = res.json() 92 if "dash" in res_json["data"]: 93 down_flag = True 94 v, a = res_json["data"]["dash"]["video"][0], res_json["data"]["dash"]["audio"][0] 95 link_v = [v["baseUrl"]] 96 link_a = [a["baseUrl"]] 97 if v["backup_url"]: 98 for item in v["backup_url"]: 99 link_v.append(item) 100 if a["backup_url"]: 101 for item in a["backup_url"]: 102 link_a.append(item) 103 link = [link_v, link_a] 104 else: 105 link = [res_json["data"]["durl"][-1]["url"]] 106 if res_json["data"]["durl"][-1]["backup_url"]: 107 for item in res_json["data"]["durl"][-1]["backup_url"]: 108 link.append(item) 109 video_info["links"].append(link) 110 video_info["down_flags"].append(down_flag) 111 # 开始下载 112 out_pipe_quiet = subprocess.PIPE 113 out_pipe = None 114 aria2c_path = os.path.join(os.getcwd(), "tools/aria2c") 115 ffmpeg_path = os.path.join(os.getcwd(), "tools/ffmpeg") 116 for idx in range(len(video_info["titles"])): 117 title = video_info["titles"][idx] 118 aid = video_info["aids"][idx] 119 down_flag = video_info["down_flags"][idx] 120 print("正在下载视频<%s>..." % title) 121 if down_flag: 122 link_v, link_a = video_info["links"][idx] 123 # --视频 124 url = ""{}"".format("" "".join(link_v)) 125 command = "{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}" 126 command = command.format(aria2c_path, len(link_v), userid, title+".flv", aid, "", url) 127 print(command) 128 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True) 129 process.wait() 130 # --音频 131 url = ""{}"".format("" "".join(link_a)) 132 command = "{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}" 133 command = command.format(aria2c_path, len(link_v), userid, title+".aac", aid, "", url) 134 print(command) 135 136 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True) 137 process.wait() 138 # --合并 139 command = "{} -i "{}" -i "{}" -c copy -f mp4 -y "{}"" 140 command = command.format(ffmpeg_path, os.path.join(userid, title+".flv"), os.path.join(userid, title+".aac"), os.path.join(userid, title+".mp4")) 141 print(command) 142 143 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe_quiet, shell=True) 144 process.wait() 145 os.remove(os.path.join(userid, title+".flv")) 146 os.remove(os.path.join(userid, title+".aac")) 147 else: 148 link = video_info["links"][idx] 149 url = ""{}"".format("" "".join(link)) 150 command = "{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}" 151 command = command.format(aria2c_path, len(link), userid, title+".flv", aid, "", url) 152 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True) 153 process.wait() 154 os.rename(os.path.join(userid, title+".flv"), os.path.join(userid, title+".mp4")) 155 print("所有视频下载完成, 该用户所有视频保存在<%s>文件夹中..." % (userid)) 156 """借助大佬开源的库来登录B站""" 157 @staticmethod 158 def login(username, password): 159 _, session = login.Login().bilibili(username, password) 160 return session 161 162 163 """run""" 164 if __name__ == "__main__": 165 parser = argparse.ArgumentParser(description="下载B站指定用户的所有视频(仅支持Windows下使用)") 166 parser.add_argument("--username", dest="username", help="xxx", type=str, required=True) 167 parser.add_argument("--password", dest="password", help="xxxx", type=str, required=True) 168 print(parser) 169 args = parser.parse_args(["--password", "xxxx","--username", "xxx"]) 170 # args = parser.parse_args(["--password", "FOO"]) 171 print("5") 172 bili = Bilibili(args.username, args.password) 173 bili.run()