from lxml import etree
import requests
import re
url = "http://dec3.jlu.edu.cn/webcourse/t000039/xinshiyeyingyu1/xsyyy1/unit1/a/lp.htm"
html = requests.get(url)
dr = re.compile(r'<[a][^>]+>',re.S)
dd = dr.sub('',html.text)
print dd
selector = etree.HTML(dd)
content = selector.xpath('/html/body/table/tr/td/table/tr/td/p')
for each in content:
# read = each.get_attribute("src")
# read = each.xpath('img')[0]
# print read.attrib['src']
# html = etree.tostring(each)
# re_h=re.compile('</?\w+[^>]*>')
# s=re_h.sub('',html)
# print s
brlist = each.xpath('br')
for br in brlist:
print br.tail