import requests
import re
import pymysql
# 保存到数据库
# mysql=pymysql.connect(host='127.0.0.1',user='root',password='wbf980728',database='aaa',charset='utf8')#连接数据库,编码方式为utf8
# youbiao=mysql.cursor()#建立游标
# source = requests.get('https://www.23us.com/').content.decode('gbk') #获取页面源代码
# a='<li><p class="ul1">\[(.*?)\]《<a class="poptext" href=".*?" target="_blank">(.*?)</a>》</p><p class="ul2"><a href=".*?" target="_blank">(.*?)</a></p><p>(.*?)</p>(.*?)</li>'#用正则取出要获取的东西
# demo = re.compile(a)#将用正则取出的字符串编译为字节代码。
# lists = demo.findall(source)#以列表的形式返回能匹配的字符串
# # print(lists)
# for a,b,c,d,e in lists:
# sql='insert into dingdian(leixing,shuming,zhangjie,zuozhe,shijian) values("{}","{}","{}","{}","{}")'.format(a,b,c,d,e)#sql语句
# youbiao.execute(sql)#用sql语句将数据填入表中
# mysql.commit()
# mysql.close()#保存
# 以文档形式保存
# a=requests.get('https://www.23us.com/html/78/78926/').content.decode('gbk')#decode('gbk')为编码方式
# b='<a href="(\d+\.html)">.*?</a>'
# c=re.compile(b)
# d=c.findall(a)
# for i in d[0:1]:
# hrefs='https://www.23us.com/html/78/78926/'+i
# a=requests.get(hrefs).content.decode('gbk')
# b='<h1>(.*?)</h1>.*?<dd id="contents".*?>(.*?)</dd>'
# b=b.replace('/<br \/>', "\r")
# c=re.compile(b,re.S)
# d=c.findall(a)
# # print(d)
# for a,b in d:
# b=b.replace(' ',' ').replace('<br /><br />','\n')#替换
# op=open('E:\\text\\1.text','w+')#写入
# op.write(a+'\n'+b)
# op.close()
# # print(a,b)