[转]Python下载百度新歌100的代码

#!/usr/bin/python 
#
 -*- coding: utf-8 -*- 
#
 Copyright (c) 2006 UbuntuChina <http://www.ubuntu.org.cn> 
#
 License: GPLv2 
#
 Author: oneleaf <oneleaf AT gmail.com> 

import httplib 
import re 
import urllib 
import os 
import locale 

def getdownurl(url): 
    urllist
=[] 
    conn 
= httplib.HTTPConnection('mp3.baidu.com'
    conn.request(
"GET",url) 
    response 
= conn.getresponse() 
    html
=response.read() 
    conn.close() 
    expression
='http://220.181.27.54/m(.*)</a>' 
    listSentence 
= re.findall(expression, html) 
    lineno
=
    
while lineno<len(listSentence): 
        mp3url
=re.search('title=(.*)onclick',listSentence[lineno]) 
        
if mp3url: 
           mp3url
=mp3url.group(0) 
           mp3url
=re.search('http(\S*)',mp3url) 
           
if mp3url: 
              mp3url
=mp3url.group(0) 
              
try
                  mp3url
=mp3url.decode('gbk'
              
except:pass 
              urllist.append(mp3url) 
        lineno
+=2 
    
return urllist 

def downmp3(url,author,name,filelist): 
    filename
=author+"-"+name; 
    
for i in filelist: 
        name
=unicode(i,locale.getpreferredencoding()) 
        
if name.find(filename) == 0: 
            
print u"文件已经下载,忽略。" 
            
return 1 
    urllists
=getdownurl(url) 
    
for i in urllists:        
        
print u"正在连接",i 
        
        ext
=i[-4:] 
        
try
            urlopen 
= urllib.URLopener() 
            fp
=urlopen.open(i) 
            data 
= fp.read() 
            fp.close() 
            filename
=filename+ext; 
            file
=open(filename,'w+b'
            file.write(data) 
            file.close() 
            
print u"下载成功!" 
            
return 1 
        
except
            
continue 
    
return 0 

if __name__ == "__main__"
    conn 
= httplib.HTTPConnection('list.mp3.baidu.com'
    conn.request(
"GET",'/list/newhits.html?id=1'
    response 
= conn.getresponse() 
    html
=response.read().decode('gbk'
    conn.close() 
    expression
='<a href="http://mp3.baidu.com/m(.*)</a>' 
    listSentence 
= re.findall(expression, html) 
    lineno
=
    
while lineno<len(listSentence): 
       url
=re.search('(.*)target',listSentence[lineno]) 
       url
='/m'+url.group(0)[:-8
       name
=re.search('blank>(.*)',listSentence[lineno]) 
       name
=name.group(0)[6:] 
       author
=re.search('blank>(.*)',listSentence[lineno+1]) 
       author
=author.group(0)[6:] 
       
print u"开始下载",author,name 
       filelist
=os.listdir('.'); 
       
if downmp3(url,author,name,filelist)==0: 
          
print u"下载",author,name,u'失败!' 
       lineno
+=2 
posted @ 2006-07-13 18:17  福娃  阅读(661)  评论(0编辑  收藏  举报