python超简单爬虫

import codecs
import requests
from bs4 import BeautifulSoup
import re
from lxml import etree
url = 'https://jbk.jiankang.com/zhengzhuang/pinyin-z/?p=5'
r = requests.get(url).text
sym = re.findall('<a href="/zhengzhuang/[0-9]{1,5}.*(?=</a>)',r)
print(sym)
for line in sym:
    print(line)
with codecs.open('症状.txt','a','utf8') as f:
    for line in sym:
        f.write(line+'\n')

 

posted @ 2019-09-18 14:40  依我去  阅读(180)  评论(0编辑  收藏  举报