如何去除html中的标签
from lxml import etree a = ''' <table class="reference"> <tbody> <tr> <td>姓名:</td> <td>大湘菜</td> </tr> </tbody> </table> ''' b = etree.HTML(a) # 解析html c = b.xpath('string(.)') print(c)
结果为:
姓名:
大湘菜
from lxml import etree a = ''' <table class="reference"> <tbody> <tr> <td>姓名:</td> <td>大湘菜</td> </tr> </tbody> </table> ''' b = etree.HTML(a) # 解析html c = b.xpath('string(.)') print(c)
结果为:
姓名:
大湘菜