pythn抓取网页小例子

import urllib.request
import re
from tkinter import *
win = Tk()
win.geometry('500x300+400+300')

t = Text(win)
t.pack()


url = 'http://stock.sohu.com/news/'

html = urllib.request.urlopen(url).read()

html = html.decode('GBK')



pattern = re.compile("<a test=a href='http://stock.sohu.com/(.*?)/(.*?).shtml' target='_blank'>(.*?)</a>",re.S)
items = re.findall(pattern,html)
for item in items:
    t.insert(END,item[2])
    t.insert(END,'\n')
    

 

posted @ 2016-09-01 08:33  疯陈演义  阅读(365)  评论(0编辑  收藏  举报