练习requests/BeautifulSoup选择器/sqlite3的用法

 1 import sqlite3
 2 import requests
 3 from bs4 import BeautifulSoup
 4 
 5 headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
 6            'Accept-Encoding': 'gzip, deflate, br', 
 7            'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 
 8            'Connection': 'keep-alive', 
 9            'Host': 'pos.baidu.com', 
10            'Referer': 'https://cuiqingcai.com/tag/python', 
11            'Upgrade-Insecure-Requests': '1', 
12            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:59.0) Gecko/20100101 Firefox/59.0'
13             }
14 
15 def createDB():
16     conn = sqlite3.connect('cuiqingcai.db')
17     conn.execute('drop table IF EXISTS cuiqingcai')
18     sql = '''CREATE TABLE cuiqingcai
19             (title text,
20             url text);'''
21     conn.execute(sql)
22     conn.commit()
23     conn.close()
24     print('数据表创建成功!')
25 
26 def save_data(data_lst):
27     conn = sqlite3.connect('cuiqingcai.db')
28     sql = 'INSERT INTO cuiqingcai VALUES(?,?)'
29     conn.executemany(sql, data_lst)
30     conn.commit()
31     conn.close()
32 
33 def gethtml(url, encode='utf-8'):
34     try:
35         r = requests.get(url)
36         r.raise_for_status()
37         r.encoding = encode
38         result = r.text
39     except requests.RequestException as e:
40         print(e)
41     else:
42         return result
43 
44 def parse(html):
45     result = []
46     try:
47         soup = BeautifulSoup(html, 'lxml')
48         a_tags = soup.select('.excerpt header h2 a')
49         for a in a_tags:
50             result.append((a['title'], a['href']))
51             # print(a['title'], a['href'])
52         return result
53     except:
54         pass
55 
56 def work():
57     createDB()
58     url_base = 'https://cuiqingcai.com/tag/python/page/'
59     for i in range(15):
60         url = url_base + str(i+1)
61         html = gethtml(url)
62         try:
63             save_data(parse(html))
64             print('------Page_%s  保存成功!------' %str(i+1))
65         except:
66             print('------Page_%s  解析错误!------' %str(i+1))
67         if i+1==15:
68             print('======所有工作已经完成!======')
69 
70 def read_data():
71     conn = sqlite3.connect('cuiqingcai.db')
72     cursor = conn.cursor()
73     cursor.execute('SELECT * FROM cuiqingcai')
74     print(cursor)
75     items = cursor.fetchall()
76     for item in items:
77         print(item)
78     cursor.close()
79     conn.close()
80 
81 def main():
82     work()
83     read_data()
84 
85 main()

 

posted on 2018-04-23 11:03  math98  阅读(115)  评论(0)    收藏  举报