# !/usr/bin/python
# -*- coding: UTF-8 -*-
import re
import urllib2
#charset 编码转换
def mdcode( str, encoding='utf-8' ):
if isinstance(str, unicode):
return str.encode(encoding)
for c in ('utf-8', 'gbk', 'gb2312','gb18030','utf-16'):
try:
if encoding == 'unicode':
return str.decode(c)
else:
return str.decode(c).encode( encoding )
except:
pass
raise 'Unknown charset'
#下载mp3文件,不支持断点续传
def downmp3(url,name):
res=urllib2.urlopen(url)
open(mdcode(name,'gbk'),"wb").write(res.read())
#请求html文件
def gethtml(url):
context = urllib2.urlopen(url).read()
return mdcode(context)
#通过正则,提取MP3的下载地址,以及mp3的演唱者
def feedmp3url(data):
mp3url = re.findall(r'''<input(\s*)(.*?)(\s*)downlink(\s*)=(\s*)([\"\s]*)([^\"\']+?)([\"\s]+)(.*?)id=\"bit128\"(.*?)>''' ,data,re.S|re.I)
#print result
author = re.findall(r'''<span class=\"author_list\" title(\s*)=(\s*)([\"\s]*)([^\"\']+?)([\"\s]+)(\s*)>''',data,re.S|re.I)
return mp3url[0][6], author[0][3]
#通过正则,提取每期的MP3列表
def feedurllist(data):
urls = []
url = re.findall(r'''<a(\s*)(.*?)(\s*)href(\s*)=(\s*)([\"\s]*)([^\"\']+?)([\"\s]+)(\s*)title(\s*)=(\s*)([\"\s]*)([^\"\']+?)([\"\s]+)(\s*)>''' ,data,re.S|re.I)
#print url
for u in url:
#print u[6],u[12]
mp3url, author=feedmp3url(gethtml('http://ting.baidu.com'+u[6]+'/download'))
urls.append([mp3url, u[12], author])
print mdcode(u[12]+'-'+author,'gbk')
return urls
#主函数,传入好声音各期的页面url
def main(urls):
url = []
for u in urls:
url += feedurllist(gethtml(u))
return url
if __name__ == '__main__':
#context = urllib2.urlopen('http://ting.baidu.com/album/23149328').read()
#print context
all = ['http://ting.baidu.com/album/23149328',
'http://ting.baidu.com/album/23150394',
'http://ting.baidu.com/album/23150523',
'http://ting.baidu.com/album/23152500',
'http://ting.baidu.com/album/23152435',
'http://ting.baidu.com/album/23151786',
'http://ting.baidu.com/album/23160050',
'http://ting.baidu.com/album/23364352',
'http://ting.baidu.com/album/23528761',
'http://ting.baidu.com/album/24493381' ]
#urls = feedurllist(gethtml('http://ting.baidu.com/album/23149328'))
mp3=feedmp3url(gethtml('http://ting.baidu.com'+'/song/23528758'+'/download'))
urls = main(all)
print "Start down mp3 ..."
for url in urls:
mp3name=mdcode(url[1]+'-'+url[2]+'.mp3','gbk')
print mp3name,"Down ..."
downmp3("http://ting.baidu.com"+url[0],mp3name)