本文共 5104 字,大约阅读时间需要 17 分钟。
今天来更新第二章
第二章是爬取网易云新歌热门榜单ctrl + u 快捷键
如下图所示import osimport requestsfrom bs4 import BeautifulSoup# #歌单地址# url = 'https://music.163.com/#/discover/toplist?id=3778678'# #外链地址# baser_url = 'http://music.163.com/song/media/outer/url'#请求头url = 'https://music.163.com/discover/toplist?id=3778678'headers = { 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36'}respoen = requests.get(url=url,headers=headers).texts = BeautifulSoup(respoen, 'lxml')main = s.find('ul', class_='f-hide')#print(main)for music in main.find_all('a'): print('{} : {}'.format(music.text, music['href'])) #这里我们确认没有问题数据的在这里面我们继续往下写
import osimport requestsfrom bs4 import BeautifulSoup# #歌单地址# url = 'https://music.163.com/#/discover/toplist?id=3778678'# #外链地址# baser_url = 'http://music.163.com/song/media/outer/url'#请求头url = 'https://music.163.com/discover/toplist?id=3778678'headers = { 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36'}respoen = requests.get(url=url,headers=headers).texts = BeautifulSoup(respoen, 'lxml')main = s.find('ul', class_='f-hide')#print(main)for music in main.find_all('a'): #print(music) #print('{} : {}'.format(music.text, music['href'])) #MP3地址 musicUrl = 'http://music.163.com/song/media/outer/url' + music['href'][5:] + '.mp3' print(musicUrl)
import osimport requestsfrom bs4 import BeautifulSoup# #歌单地址# url = 'https://music.163.com/#/discover/toplist?id=3778678'# #外链地址# baser_url = 'http://music.163.com/song/media/outer/url'#请求头url = 'https://music.163.com/discover/toplist?id=3778678'headers = { 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36'}respoen = requests.get(url=url,headers=headers).texts = BeautifulSoup(respoen, 'lxml')main = s.find('ul', class_='f-hide')#print(main)lists = []for music in main.find_all('a'): list = [] #print(music) #print('{} : {}'.format(music.text, music['href'])) #MP3地址 musicUrl = 'http://music.163.com/song/media/outer/url' + music['href'][5:] + '.mp3' #print(musicUrl) # 歌曲名 musicName = music.text #print(musicName) # 单首歌曲的名字和地址放在list列表中 list.append(musicName) list.append(musicUrl) lists.append(list) #循环从列表里面拿数据 for i in lists: url = i[1] # print(url) name = i[0] #print(name) try: print(f'歌曲{name}在在下载', end='\t\t') # MP3数据流 res = requests.get(url, headers=headers) # print(res) MP3 = res.content # print(MP3) with open('./music/' + name + '.mp3', 'wb') as file: file.write(MP3) print(name, '下载成功') except: print('下载失败')
4.我们来封装一下代码保持可读性
import osimport requestsfrom bs4 import BeautifulSoup# #歌单地址# url = 'https://music.163.com/#/discover/toplist?id=3778678'# #外链地址# baser_url = 'http://music.163.com/song/media/outer/url'#请求头headers = { 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36'}# 新建一个文件夹保存所有MP3if not os.path.exists('./music/'): # 如果没有当前文件夹则创建一个 os.mkdir('./music/')#定义解析函数def down(url): respoen = requests.get(url=url,headers=headers).text s = BeautifulSoup(respoen, 'lxml') main = s.find('ul', class_='f-hide') # print(main) lists = [] for music in main.find_all('a'): list = [] # print('{} : {}'.format(music.text, music['href'])) musicUrl = 'http://music.163.com/song/media/outer/url' + music['href'][5:] + '.mp3' # MP3 = requests.get(musicUrl).content #print(MP3) #歌曲名 musicName = music.text #print(musicName) # 单首歌曲的名字和地址放在list列表中 list.append(musicName) list.append(musicUrl) lists.append(list) for i in lists: url = i[1] #print(url) name = i[0] #print(name) try: print(f'歌曲{name}正在下载',end='\t\t') #MP3数据流 res = requests.get(url,headers=headers) #print(res) MP3 = res.content #print(MP3) with open('./music/' + name + '.mp3', 'wb') as file: file.write(MP3) print(name,'下载成功') except: print('下载失败')if __name__ == '__main__': #歌单地址 url = 'https://music.163.com/discover/toplist?id=3778678' down(url) #这里数据有点多可能会很慢因为这里是一个单线程写的 #感兴趣的话可以查看我写的那篇多线程的文章
总结
网易云的链接是用外链地址的所以等到以下分析结果把上面红色部分ID数字换成网易云播放页面的id即可。
实例:转载地址:http://toywi.baihongyu.com/