python 爬取B站原視頻的實(shí)例代碼
B站原視頻爬取,我就不多說直接上代碼。直接運(yùn)行就好。B站是把視頻和音頻分開。要把2個(gè)合并起來使用。這個(gè)需要分析才能看出來。然后就是登陸這塊是比較難的。
import osimport reimport argparseimport subprocessimport prettytablefrom DecryptLogin import login’’’B站類’’’class Bilibili():def __init__(self, username, password, **kwargs):self.username = usernameself.password = passwordself.session = Bilibili.login(username, password)self.headers = {’User-Agent’: ’Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36’}self.user_info_url = ’http://api.bilibili.com/x/space/acc/info’self.submit_videos_url = ’http://space.bilibili.com/ajax/member/getSubmitVideos’self.view_url = ’http://api.bilibili.com/x/web-interface/view’self.video_player_url = ’http://api.bilibili.com/x/player/playurl’’’’運(yùn)行主程序’’’def run(self):while True:userid = input(’請(qǐng)輸入目標(biāo)用戶ID(例:345993405)(我的一個(gè)LOL好友凱撒可以關(guān)注他一下 謝謝) ——> ’)user_info = self.__getUserInfo(userid)tb = prettytable.PrettyTable()tb.field_names = list(user_info.keys())tb.add_row(list(user_info.values()))print(’獲取的用戶信息如下:’)print(tb)is_download = input(’是否下載該用戶的所有視頻(y/n, 默認(rèn): y) ——> ’)if is_download == ’y’ or is_download == ’yes’ or not is_download:self.__downloadVideos(userid)’’’根據(jù)userid獲得該用戶基本信息’’’def __getUserInfo(self, userid):params = {’mid’: userid, ’jsonp’: ’jsonp’}res = self.session.get(self.user_info_url, params=params, headers=self.headers)res_json = res.json()user_info = {’用戶名’: res_json[’data’][’name’],’性別’: res_json[’data’][’sex’],’個(gè)性簽名’: res_json[’data’][’sign’],’用戶等級(jí)’: res_json[’data’][’level’],’生日’: res_json[’data’][’birthday’]}return user_info’’’下載目標(biāo)用戶的所有視頻’’’def __downloadVideos(self, userid):if not os.path.exists(userid):os.mkdir(userid)# 非會(huì)員用戶只能下載到高清1080Pquality = [(’16’, ’流暢 360P’), (’32’, ’清晰 480P’), (’64’, ’高清 720P’), (’74’, ’高清 720P60’), (’80’, ’高清 1080P’), (’112’, ’高清 1080P+’), (’116’, ’高清 1080P60’)][-3]# 獲得用戶的視頻基本信息video_info = {’aids’: [], ’cid_parts’: [], ’titles’: [], ’links’: [], ’down_flags’: []}params = {’mid’: userid, ’pagesize’: 30, ’tid’: 0, ’page’: 1, ’order’: ’pubdate’}while True:res = self.session.get(self.submit_videos_url, headers=self.headers, params=params)res_json = res.json()for item in res_json[’data’][’vlist’]:video_info[’aids’].append(item[’aid’])if len(video_info[’aids’]) < int(res_json[’data’][’count’]):params[’page’] += 1else:breakfor aid in video_info[’aids’]:params = {’aid’: aid}res = self.session.get(self.view_url, headers=self.headers, params=params)cid_part = []for page in res.json()[’data’][’pages’]:cid_part.append([page[’cid’], page[’part’]])video_info[’cid_parts’].append(cid_part)title = res.json()[’data’][’title’]title = re.sub(r'[‘’/:*?'<>|s’]', ’ ’, title)video_info[’titles’].append(title)print(’共獲取到用戶ID<%s>的<%d>個(gè)視頻...’ % (userid, len(video_info[’titles’])))for idx in range(len(video_info[’titles’])):aid = video_info[’aids’][idx]cid_part = video_info[’cid_parts’][idx]link = []down_flag = Falsefor cid, part in cid_part:params = {’avid’: aid, ’cid’: cid, ’qn’: quality, ’otype’: ’json’, ’fnver’: 0, ’fnval’: 16}res = self.session.get(self.video_player_url, params=params, headers=self.headers)res_json = res.json()if ’dash’ in res_json[’data’]:down_flag = Truev, a = res_json[’data’][’dash’][’video’][0], res_json[’data’][’dash’][’audio’][0]link_v = [v[’baseUrl’]]link_a = [a[’baseUrl’]]if v[’backup_url’]:for item in v[’backup_url’]:link_v.append(item)if a[’backup_url’]:for item in a[’backup_url’]:link_a.append(item)link = [link_v, link_a]else:link = [res_json[’data’][’durl’][-1][’url’]]if res_json[’data’][’durl’][-1][’backup_url’]:for item in res_json[’data’][’durl’][-1][’backup_url’]:link.append(item)video_info[’links’].append(link)video_info[’down_flags’].append(down_flag)# 開始下載out_pipe_quiet = subprocess.PIPEout_pipe = Nonearia2c_path = os.path.join(os.getcwd(), ’tools/aria2c’)ffmpeg_path = os.path.join(os.getcwd(), ’tools/ffmpeg’)for idx in range(len(video_info[’titles’])):title = video_info[’titles’][idx]aid = video_info[’aids’][idx]down_flag = video_info[’down_flags’][idx]print(’正在下載視頻<%s>...’ % title)if down_flag:link_v, link_a = video_info[’links’][idx]# --視頻url = ’'{}'’.format(’' '’.join(link_v))command = ’{} -c -k 1M -x {} -d '{}' -o '{}' --referer='https://www.bilibili.com/video/av{}' {} {}’command = command.format(aria2c_path, len(link_v), userid, title+’.flv’, aid, '', url)print(command)process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True)process.wait()# --音頻url = ’'{}'’.format(’' '’.join(link_a))command = ’{} -c -k 1M -x {} -d '{}' -o '{}' --referer='https://www.bilibili.com/video/av{}' {} {}’command = command.format(aria2c_path, len(link_v), userid, title+’.aac’, aid, '', url)print(command)process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True)process.wait()# --合并command = ’{} -i '{}' -i '{}' -c copy -f mp4 -y '{}'’command = command.format(ffmpeg_path, os.path.join(userid, title+’.flv’), os.path.join(userid, title+’.aac’), os.path.join(userid, title+’.mp4’))print(command)process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe_quiet, shell=True)process.wait()os.remove(os.path.join(userid, title+’.flv’))os.remove(os.path.join(userid, title+’.aac’))else:link = video_info[’links’][idx]url = ’'{}'’.format(’' '’.join(link))command = ’{} -c -k 1M -x {} -d '{}' -o '{}' --referer='https://www.bilibili.com/video/av{}' {} {}’command = command.format(aria2c_path, len(link), userid, title+’.flv’, aid, '', url)process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True)process.wait()os.rename(os.path.join(userid, title+’.flv’), os.path.join(userid, title+’.mp4’))print(’所有視頻下載完成, 該用戶所有視頻保存在<%s>文件夾中...’ % (userid))’’’借助大佬開源的庫(kù)來登錄B站’’’@staticmethoddef login(username, password):_, session = login.Login().bilibili(username, password)return session’’’run’’’if __name__ == ’__main__’:parser = argparse.ArgumentParser(description=’下載B站指定用戶的所有視頻(僅支持Windows下使用)’)parser.add_argument(’--username’, dest=’username’, help=’xxx’, type=str, required=True)parser.add_argument(’--password’, dest=’password’, help=’xxxx’, type=str, required=True)print(parser)args = parser.parse_args([’--password’, ’xxxx’,’--username’, ’xxx’])# args = parser.parse_args([’--password’, ’FOO’])print(’5’)bili = Bilibili(args.username, args.password)bili.run()
把賬號(hào)密碼填上就行。這是我根據(jù)一個(gè)微信公眾號(hào)Charles大佬的想法寫的。大家可以去關(guān)注他一下。
以上就是python 爬取B站原視頻的實(shí)例代碼的詳細(xì)內(nèi)容,更多關(guān)于python 爬取B站原視頻的資料請(qǐng)關(guān)注好吧啦網(wǎng)其它相關(guān)文章!
相關(guān)文章:
1. Java 3D的動(dòng)畫展示(Part1-使用JMF)2. IntelliJ Idea 2020.1 正式發(fā)布,官方支持中文(必看)3. Django ORM實(shí)現(xiàn)按天獲取數(shù)據(jù)去重求和例子4. layui Ajax請(qǐng)求給下拉框賦值的實(shí)例5. Django nginx配置實(shí)現(xiàn)過程詳解6. Python基于jieba, wordcloud庫(kù)生成中文詞云7. Java實(shí)現(xiàn)基于http協(xié)議的網(wǎng)絡(luò)文件下載8. JavaScript中Object、map、weakmap的區(qū)別分析9. 小技巧處理div內(nèi)容溢出10. Django中的AutoField字段使用
