怎么⽤Python爬取抖⾳⼩视频?资深程序员都这样爬取的(附源码)
简介
抖⾳,是⼀款可以拍短视频的⾳乐创意短视频社交软件,该软件于2016年9⽉上线,是⼀个专注年轻⼈的15秒⾳乐短视频社区。⽤户可以通过这款软件选择歌曲,拍摄15秒的⾳
乐短视频,形成⾃⼰的作品。此APP已在Android各⼤应⽤商店和APP Store均有上线。
今天咱们就⽤Python爬取抖⾳视频
准备:
环境:Python3.6+Windows
IDE:你开⾏就好,喜欢⽤哪个就⽤哪个
模块:
1from splinter.driver.webdriver.chrome import Options, Chrome
2from splinter.browser import Browser
3from contextlib import closing
4 import requests, json, time, re, os, sys, time
5from bs4 import BeautifulSoup
获得视频播放地址
查询的⽤户ID
视频名字列表
视频链接列表
⽤户昵称
1 def get_video_urls(self, user_id):
2
3 + video_names = []
4 + video_urls = []
5 + unique_id = ''
6 + while unique_id != user_id:
7 + search_url = 'api.amemv/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&
8 + req = (url = search_url, verify = False)
9 + html = json.)
10 + aweme_count = html['user_list'][0]['user_info']['aweme_count']
11 + uid = html['user_list'][0]['user_info']['uid']
12 + nickname = html['user_list'][0]['user_info']['nickname']韩乔生模仿日本解说
13 + unique_id = html['user_list'][0]['user_info']['unique_id']
14 + user_url = 'www.douyin/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)
15 + req = (url = user_url, verify = False)
16 + html = json.)
17 + i = 1
18 + for each in html['aweme_list']:
19 + share_desc = each['share_info']['share_desc']
20 + if'抖⾳-原创⾳乐短视频社区' == share_desc:
21 + video_names.append(str(i) + '.mp4')
22 + i += 1
23 + else:
24 + video_names.append(share_desc + '.mp4')
25 + video_urls.append(each['share_info']['share_url'])
26 +
27 + return video_names, video_urls, nickname
获得带⽔印的视频播放地址
video_url:带⽔印的视频播放地址
download_url: 带⽔印的视频下载地址
1 def get_download_url(self, video_url):
2
3 + req = (url = video_url, verify = False)
4 + bf = , 'lxml')
5 + script = bf.find_all('script')[-1]
6 + video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]
7 + video_html = json.loads(video_url_js)
8 + download_url = video_html['video']['play_addr']['url_list'][0]
9 + return download_url
视频下载
video_url: 带⽔印的视频地址
video_name: 视频名
watermark_flag: 是否下载不带⽔印的视频
1 def video_downloader(self, video_url, video_name, watermark_flag=True):
2 + """
3 + 视频下载
4 + Parameters:
5 + video_url: 带⽔印的视频地址
6 + video_name: 视频名
7 + watermark_flag: 是否下载不带⽔印的视频
8 + Returns:
9 + ⽆
10 + """
11 + size = 0
12 + if watermark_flag == True:
13 + video_url = ve_watermark(video_url)
14 + else:
15 + video_url = _download_url(video_url)
16 + with (video_url, stream=True, verify = False)) as response:
17 + chunk_size = 1024
18 + content_size = int(response.headers['content-length'])
19 + if response.status_code == 200:
20 + sys.stdout.write(' [⽂件⼤⼩]:%0.2f MB\n' % (content_size / chunk_size / 1024))
21 +
22 + with open(video_name, "wb") as file:
23 + for data in response.iter_content(chunk_size = chunk_size):
24 + file.write(data)
25 + size += len(data)
26 + file.flush()
27 +
28 + sys.stdout.write(' [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
29 + sys.stdout.flush()
数学小知识获得⽆⽔印的视频播放地址
1def remove_watermark(self, video_url):
2 + """
3+ 获得⽆⽔印的视频播放地址
4+ Parameters:
5+ video_url: 带⽔印的视频地址
6+ Returns:
7+ ⽆⽔印的视频下载地址
8+ """
9 + self.driver.visit('douyin.iiilab/')
10 + self.driver.find_by_tag('input').fill(video_url)
11 + self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()
12 + html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html
13 + bf = BeautifulSoup(html, 'lxml')
14 + return bf.find('a').get('href')
下载视频
1def run(self):
2 + """
3+ 运⾏函数
4+ Parameters:
5+ None
6+ Returns:
7+ None
8+ """
9 + self.hello()
10 + user_id = input('请输⼊ID(例如40103580):')
11 + video_names, video_urls, nickname = _video_urls(user_id)
12 + if nickname not in os.listdir():
13 + os.mkdir(nickname)
14 + print('视频下载中:共有%d个作品!\n' % len(video_urls))
15 + for num in range(len(video_urls)):
16 + print(' 解析第%d个视频链接 [%s] 中,请稍后!\n' % (num+1, video_urls[num]))
17 + if'\\'in video_names[num]:
18 + video_name = video_names[num].replace('\\', '')
19 + elif'/'in video_names[num]:
20 + video_name = video_names[num].replace('/', '')
21 + else:
22 + video_name = video_names[num]
23 + self.video_downloader(video_urls[num], os.path.join(nickname, video_name))
24 + print('\n')
25 +
26 + print('下载完成!')
全部代码
1 +# -*- coding:utf-8 -*-
2
3 +Python学习交流:125240963
4 +Python学习交流:125240963
5 +Python学习交流:125240963
6
7 +from splinter.driver.webdriver.chrome import Options, Chrome
8 +from splinter.browser import Browser
关于公关礼仪的论文
9 +from contextlib import closing
10 +import requests, json, time, re, os, sys, time
11 +from bs4 import BeautifulSoup
12 +
13class DouYin(object):
14def__init__(self, width = 500, height = 300):
15 + """
16+ 抖⾳App视频下载
17+ """
18 + # ⽆头浏览器
19 + chrome_options = Options()
20 + chrome_options.add_argument('user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"')
21 + self.driver = Browser(driver_name='chrome', executable_path='D:/chromedriver', options=chrome_options, headless=True)
22 +
23def get_video_urls(self, user_id):
24 + """
25+ 获得视频播放地址
26+ Parameters:
27+ user_id:查询的⽤户ID
28+ Returns:
29+ video_names: 视频名字列表
30+ video_urls: 视频链接列表
31+ nickname: ⽤户昵称
32+ """
33 + video_names = []
34 + video_urls = []
张嘉译的前妻35 + unique_id = ''
36 + while unique_id != user_id:
37 + search_url = 'api.amemv/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme
38 + req = (url = search_url, verify = False)
39 + html = json.)
40 + aweme_count = html['user_list'][0]['user_info']['aweme_count']
41 + uid = html['user_list'][0]['user_info']['uid']
42 + nickname = html['user_list'][0]['user_info']['nickname']
43 + unique_id = html['user_list'][0]['user_info']['unique_id']
44 + user_url = 'www.douyin/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)
45 + req = (url = user_url, verify = False)
46 + html = json.)
47 + i = 1
48 + for each in html['aweme_list']:
49 + share_desc = each['share_info']['share_desc']
50 + if'抖⾳-原创⾳乐短视频社区' == share_desc:
51 + video_names.append(str(i) + '.mp4')
52 + i += 1
53 + else:
54 + video_names.append(share_desc + '.mp4')
55 + video_urls.append(each['share_info']['share_url'])
56 +
57 + return video_names, video_urls, nickname
58 +
59def get_download_url(self, video_url):
60 + """
61+ 获得带⽔印的视频播放地址
62+ Parameters:
63+ video_url:带⽔印的视频播放地址
64+ Returns:
65+ download_url: 带⽔印的视频下载地址
66+ """
67 + req = (url = video_url, verify = False)
68 + bf = , 'lxml')
69 + script = bf.find_all('script')[-1]
70 + video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]
71 + video_html = json.loads(video_url_js)
72 + download_url = video_html['video']['play_addr']['url_list'][0]
73 + return download_url
74 +
75def video_downloader(self, video_url, video_name, watermark_flag=True):
76 + """
77+ 视频下载
78+ Parameters:
79+ video_url: 带⽔印的视频地址
80+ video_name: 视频名
81+ watermark_flag: 是否下载不带⽔印的视频
82+ Returns:
83+ ⽆
84+ """
85 + size = 0
86 + if watermark_flag == True:
87 + video_url = ve_watermark(video_url)
88 + else:
89 + video_url = _download_url(video_url)
90 + with (video_url, stream=True, verify = False)) as response:
91 + chunk_size = 1024
92 + content_size = int(response.headers['content-length'])
93 + if response.status_code == 200:
94 + sys.stdout.write(' [⽂件⼤⼩]:%0.2f MB\n' % (content_size / chunk_size / 1024))
95 +
96 + with open(video_name, "wb") as file:
97 + for data in response.iter_content(chunk_size = chunk_size):
98 + file.write(data)
99 + size += len(data)
100 + file.flush()
101 +
102 + sys.stdout.write(' [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r') 103 + sys.stdout.flush()
104 +
105 +
106def remove_watermark(self, video_url):
107 + """
108+ 获得⽆⽔印的视频播放地址
109+ Parameters:
110+ video_url: 带⽔印的视频地址
111+ Returns:
112+ ⽆⽔印的视频下载地址
113+ """
9月份的黑龙江有多冷114 + self.driver.visit('douyin.iiilab/')
115 + self.driver.find_by_tag('input').fill(video_url)奉献与爱心
116 + self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()
117 + html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html
118 + bf = BeautifulSoup(html, 'lxml')
119 + return bf.find('a').get('href')
120 +
121def run(self):
122 + """
123+ 运⾏函数
124+ Parameters:
125+ None
126+ Returns:
127+ None
128+ """
129 + self.hello()
130 + user_id = input('请输⼊ID(例如40103580):')
131 + video_names, video_urls, nickname = _video_urls(user_id)
132 + if nickname not in os.listdir():
133 + os.mkdir(nickname)
134 + print('视频下载中:共有%d个作品!\n' % len(video_urls))
135 + for num in range(len(video_urls)):
136 + print(' 解析第%d个视频链接 [%s] 中,请稍后!\n' % (num+1, video_urls[num]))
137 + if'\\'in video_names[num]:
138 + video_name = video_names[num].replace('\\', '')
139 + elif'/'in video_names[num]:
140 + video_name = video_names[num].replace('/', '')
141 + else:
142 + video_name = video_names[num]
143 + self.video_downloader(video_urls[num], os.path.join(nickname, video_name))
144 + print('\n')
145 +
146 + print('下载完成!')
147 +
148def hello(self):
149 + """
150+ 打印欢迎界⾯
151+ Parameters:
152+ None
153+ Returns:
154+ None
155+ """
156 + print('*' * 100)
157 + print('\t\t\t\t抖⾳App视频下载⼩助⼿')
158 + print('\t\t作者:Python学习交流:125240963')
159 + print('*' * 100)
160 +
161 +
162 +if__name__ == '__main__':
163 + douyin = DouYin()
164 + douyin.run()
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论