1 命令行下运行wget -i _baidump3url.txt
2 运行_baidump3.py> baidump3.txt
3 运行del *.htm?
将获得baidu所有歌曲的名字列表

_baidump3url.txt的内容:

http://list.mp3.baidu.com/song/A.htm
http://list.mp3.baidu.com/song/B.htm
http://list.mp3.baidu.com/song/C.htm
http://list.mp3.baidu.com/song/D.htm
http://list.mp3.baidu.com/song/E.htm
http://list.mp3.baidu.com/song/F.htm
http://list.mp3.baidu.com/song/G.htm
http://list.mp3.baidu.com/song/H.htm
http://list.mp3.baidu.com/song/J.htm
http://list.mp3.baidu.com/song/K.htm
http://list.mp3.baidu.com/song/L.htm
http://list.mp3.baidu.com/song/M.htm
http://list.mp3.baidu.com/song/N.htm
http://list.mp3.baidu.com/song/O.htm
http://list.mp3.baidu.com/song/P.htm
http://list.mp3.baidu.com/song/Q.htm
http://list.mp3.baidu.com/song/R.htm
http://list.mp3.baidu.com/song/S.htm
http://list.mp3.baidu.com/song/T.htm
http://list.mp3.baidu.com/song/W.htm
http://list.mp3.baidu.com/song/X.htm
http://list.mp3.baidu.com/song/Y.htm
http://list.mp3.baidu.com/song/Z.htm

_baidump3.py:

#!/usr/bin/python

import urllib
import string
import re

def GetContent (url):
  
try:
    URLFile
=urllib.urlopen(url)
  
except IOError:
    
print "\nCan not retrieve ",url,"!\nThe connection cannot be made!\n"
  
else:
    HTMLText
=URLFile.read()
    URLFile.close()
    
return HTMLText


if(__name__=="__main__"):
  file
=open('_baidump3url.txt','r')
  fileread
=file.read()
  urls
=fileread.split('\n')
  
  queue
=[]
  
#
  regexp=re.compile(r'" target=_blank>(.*?)</[aA]></td>')
  
for url in urls:
    
#print url
    url=re.sub('http://list.mp3.baidu.com/song','.',url)
    
#print url
    
    content
=open(url,'r').read()
    lines
=content.split('\n')
    
    
for line in lines:
      
#print line
      ccc=regexp.search(line)
      
if(ccc):
        word
=ccc.groups()[0]
        
if word in queue:
          
pass
        
else:
          queue.append(word)
          
    
#print url
  file.close()
  
  regexp1
=re.compile(r'[- ](.*)')
  
  
for w in queue:
  
    w 
= unicode(w,'cp936')
    w 
= w.encode('utf8')
    
    ccc
=regexp1.search(w)
    
if(ccc):
      w
=ccc.groups()[0]
      
#print w
        
    w
=re.sub('《|》|,|\.|·|!','',w)
    
if(''==w):
      
continue
    
print w