(13)爬虫re


import  re
str = 'I Study python3.7 everyday'
print('--'*50)
m1 = re.match(r'.',str)
print(m1.group())
#m2=re.search(r'S\w+',str)
#print(m2.group())
m3 = re.search(r'p\w+.\w',str)
print(m3.group())

print('--'*50)
f1 = re.findall(r'y',str)
print(f1)
print('--'*50)
str2='<div><a herf=''http://www.baidu.com''>尚学堂bjsxt</a></div>'
f2 = re.findall(r'[\u4e00-\u9fa5]\w+',str2)
print(f2)


'''

'''
import requests
import re
url='https://www.qiushibaike.com/text/'
header = {
    'User-Agent':'Mozilla/5.0(Windows;U;WindowsNT6.1;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50'
}
response= requests.get(url,headers=header)
info=response.text
print(info)
f1=re.findall(r'<div class="content">\s*<span>\s*(.+)\s*</span>',info)


print('-'*1000)
print(f1)
with open('xiaohua.txt','w',encoding='utf-8') as f:
    for fs in f1:
        f.write(fs+'\n\n\n')
posted @ 2020-07-14 18:25  kuanleung  阅读(4)  评论(0)    收藏  举报  来源