a

#coding = utf-8
from BeautifulSoup import BeautifulSoup
import urllib2
import sys
def write1(url,title):
    f = urllib2.urlopen(url)
    soup = BeautifulSoup(f.read().decode('gbk','ignore'))
    str1='\n'+title+'\n'+str(soup.find('dd',id='contents'))
    print title
    str1=str1.replace('<br />\n<br />','\n')
    str1=str1.replace('<br />','\n')
    str1=str1.replace(' ',' ')
    str1=str1.replace('<dd id="contents">','')
    str1=str1.replace('</dd>','')
    #print str1
    return str1

if __name__=='__main__':
    url = 'http://www.23us.com/html/2/2544/'
    f = urllib2.urlopen(url)
    soup = BeautifulSoup(f.read().decode('gbk','ignore'))
    body = soup.findAll('td')
    str3=""
    for i in body:
        try:
            str2=url+i.a['href']
            str3+=write1(str2,str(i.a.string))
        except:
            pass

        
    ff=open('w.txt','w')    
    ff.write(str3)
    ff.close()

  

posted @ 2012-10-16 18:45  Crazy_yiner  阅读(142)  评论(0)    收藏  举报