取出所有的连接

# 取出所有的连接

# coding: utf-8
import sys, os, random, time, datetime
import urllib, re

reload(sys)
sys.setdefaultencoding('utf-8')
text = urllib.urlopen("http://www.qq.com").read()
regex = r'''(<a[^>]*?href="([^"]+)"[^>]*?>)|(<a[^>]*?href='([^']+)'[^>]*?>)'''
result_s = re.findall(regex, text)
for result in result_s:
  
 print result[1]

posted @ 2013-10-09 11:24  华腾智算  阅读(223)  评论(0)    收藏  举报
https://damo.alibaba.com/ https://tianchi.aliyun.com/course?spm=5176.21206777.J_3941670930.5.87dc17c9BZNvLL