正则表达式获取西祠代理
def getProxy(text):
pattern = re.compile(u'<tr class=".*?">.*?'
+ u'<td class="country"><img.*?/></td>.*?'
+ u'<td>(\d+\.\d+\.\d+\.\d+)</td>.*?'
+ u'<td>(\d+)</td>.*?'
+ u'<td>.*?'
+ u'<a href=".*?">(.*?)</a>.*?'
+ u'</td>.*?'
+ u'<td class="country">(.*?)</td>.*?'
+ u'<td>([A-Z]+)</td>.*?'
+ '</tr>'
, re.S)
l = re.findall(pattern, text)
for item in l:
print(item)
浙公网安备 33010602011771号