摘要: from lxml import etree text = ''' first item second item third item fourth item fifth item ''' html = etree.HTML(text) result = html.xpath(... 阅读全文
posted @ 2018-12-25 20:34 青春叛逆者 阅读(164) 评论(0) 推荐(0)
摘要: from lxml import etree html = etree.parse('./test.html', etree.HTMLParser())## #test.html是html文件,etree.HTMLParser(),解析器 # result = html.xpath('//li')#选取所有的li节点,是一个列表的形式 # print(result) # print(resul... 阅读全文
posted @ 2018-12-25 20:28 青春叛逆者 阅读(182) 评论(0) 推荐(0)
摘要: #方法一import re from lxml import html import requests def myRequest(url): ''' 封装自己爬取exam页面的request :param url: 地址 :return: ''' response = requests.get(url) cookiejar = respo... 阅读全文
posted @ 2018-12-25 19:30 青春叛逆者 阅读(574) 评论(0) 推荐(0)
摘要: from lxml import etree text = ''' first item second item third item fourth item fifth item ''' html = etree.HTML(text)#构造了一个XPath解析对象并对HTML... 阅读全文
posted @ 2018-12-25 18:12 青春叛逆者 阅读(120) 评论(0) 推荐(0)
摘要: import requests import re import lxml.html class Exam_spider: def __init__(self): self.base_url = 'http://datamining.comratings.com/exam' self.s = requests.session() def do... 阅读全文
posted @ 2018-12-25 10:02 青春叛逆者 阅读(349) 评论(0) 推荐(0)
摘要: import re import lxml.html test_data = """ 抓取下面10个ip地址 128 54 38 220 . 107 12 . 99 75 . 79 . . . 82 196 . 74 179 141 . . . 180 162 45 196 . 119 157 188 222 . 37 . 165 25 79 154 . 11 61 . 239 102 ... 阅读全文
posted @ 2018-12-25 09:49 青春叛逆者 阅读(396) 评论(0) 推荐(0)