抓取百度音乐频道歌曲url
参考了 http://blog.csdn.net/banguijun/article/details/11815263
后写了一个抓取fm音乐url的模块,得到所有频道歌曲的url。缺点是百度会变更url地址,失效后需要重新获取。话不多说上代码:
# -*- coding: UTF-8 -*- ''' Created on 2013-12-3 从百度的频道中抓取歌曲的下载地址 @author: ''' import urllib import json SAVEPATH='/home/gfc/musiclistnew.txt' # defines the storage location 定义存储位置 用时要修改为自己的文件存放地址 getlisturl = "http://fm.baidu.com/dev/api/?tn=playlist&format=json&id=" #get list url 获取频道列表url getmusicrealurl = 'http://music.baidu.com/data/music/fmlink?type=mp3&rate=320&songIds=' #获取歌曲地址url html = urllib.urlopen("http://fm.baidu.com/").read().decode("utf-8") start = html.find("{", html.find("rawChannelList")) # find javascript code of the channel list urls end = html.find(";", start) listjson = html[start:end].strip() # cut spaces to get channel list json data = json.loads(listjson) # parse json channel_id_list = [] for item in data['channel_list']: # print "Channel Name:\t", item['channel_name'], "Category:\t", item['cate'] channel_id_list.append(item['channel_id']) # get channel id musiclistfile = open(SAVEPATH, 'w') # open file for channel_id in channel_id_list:#循环频道列表 musiclist = urllib.urlopen(getlisturl + channel_id).read().decode("utf-8") musicidslist = json.loads(musiclist)['list'] musicUrlList = [] for songid in musicidslist:#遍历频道歌曲列表 getMusicUrls = urllib.urlopen(getmusicrealurl + str(songid['id'])).read() urlmusic = json.loads(getMusicUrls) musicUrl = urlmusic['data']['songList'] for songlink in musicUrl:#取得歌曲url并写入文本文件 musiclistfile.write(songlink['songLink']) musiclistfile.write('\n') musiclistfile.close()
浙公网安备 33010602011771号