练习requests/BeautifulSoup选择器/sqlite3的用法
1 import sqlite3 2 import requests 3 from bs4 import BeautifulSoup 4 5 headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 6 'Accept-Encoding': 'gzip, deflate, br', 7 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 8 'Connection': 'keep-alive', 9 'Host': 'pos.baidu.com', 10 'Referer': 'https://cuiqingcai.com/tag/python', 11 'Upgrade-Insecure-Requests': '1', 12 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:59.0) Gecko/20100101 Firefox/59.0' 13 } 14 15 def createDB(): 16 conn = sqlite3.connect('cuiqingcai.db') 17 conn.execute('drop table IF EXISTS cuiqingcai') 18 sql = '''CREATE TABLE cuiqingcai 19 (title text, 20 url text);''' 21 conn.execute(sql) 22 conn.commit() 23 conn.close() 24 print('数据表创建成功!') 25 26 def save_data(data_lst): 27 conn = sqlite3.connect('cuiqingcai.db') 28 sql = 'INSERT INTO cuiqingcai VALUES(?,?)' 29 conn.executemany(sql, data_lst) 30 conn.commit() 31 conn.close() 32 33 def gethtml(url, encode='utf-8'): 34 try: 35 r = requests.get(url) 36 r.raise_for_status() 37 r.encoding = encode 38 result = r.text 39 except requests.RequestException as e: 40 print(e) 41 else: 42 return result 43 44 def parse(html): 45 result = [] 46 try: 47 soup = BeautifulSoup(html, 'lxml') 48 a_tags = soup.select('.excerpt header h2 a') 49 for a in a_tags: 50 result.append((a['title'], a['href'])) 51 # print(a['title'], a['href']) 52 return result 53 except: 54 pass 55 56 def work(): 57 createDB() 58 url_base = 'https://cuiqingcai.com/tag/python/page/' 59 for i in range(15): 60 url = url_base + str(i+1) 61 html = gethtml(url) 62 try: 63 save_data(parse(html)) 64 print('------Page_%s 保存成功!------' %str(i+1)) 65 except: 66 print('------Page_%s 解析错误!------' %str(i+1)) 67 if i+1==15: 68 print('======所有工作已经完成!======') 69 70 def read_data(): 71 conn = sqlite3.connect('cuiqingcai.db') 72 cursor = conn.cursor() 73 cursor.execute('SELECT * FROM cuiqingcai') 74 print(cursor) 75 items = cursor.fetchall() 76 for item in items: 77 print(item) 78 cursor.close() 79 conn.close() 80 81 def main(): 82 work() 83 read_data() 84 85 main()
浙公网安备 33010602011771号