\本文涉及到的知识有postgresql数据库使用,文件操作,以及爬虫相关知识,用到了request2、BeautifulSoup两个模块
\pip install request2,pip install Beautifulsoup4,pip install htmllib5 安装三个模块
\站在别人的肩上,虽然很简单,但还是折腾了半天,主要是数据库管理软件的使用没有完全掌握,建议读者遇到不懂的地方一定要及时查阅相关资料,读一篇文章就要将其读懂,慢慢积累,定会有收获!
 1 import psycopg2
 2 import requests
 3 from bs4 import BeautifulSoup   \注意这里,导入的时候特别注意
 4 
 5 
 6 def create_table():   \创建数据库
 7     conn = psycopg2.connect(database="test", user="postgres", password="123", host="127.0.0.1", port="5432")
 8     sql = "create table if not exists article ( article_title text ,article_author text ,article_content text)"
 9     cur = conn.cursor()
10     try:
11         cur.execute(sql)
12         conn.commit()
13         print("create table success")
14     except BaseException as e:
15         conn.rollback()
16         print(e)
17     finally:
18         cur.close()
19         conn.close()
20 
21 
22 def get_html_data():   \获得网页内容
23     response = requests.get("http://meiriyiwen.com/random")
24     soup = BeautifulSoup(response.content, "html5lib") \BeautifulSoup的功能还是比较强大的,htmllib5用来解析网页。
25     article = soup.find("div", id='article_show')
26     article_title = article.h1.string
27     article_author = article.find("p", class_="article_author").string
28     article_contents = article.find("div", class_="article_text").find_all("p")
29     article_content = ""
30     for content in article_contents:
31         article_content = article_content + str(content)
32     insert_table(article_title, article_author, article_content)
33 
34 
35 def insert_table(article_title, article_author, article_content):    \ 将获取到的内容存入网页
36     conn = psycopg2.connect(database="test", user="postgres", password="123", host="127.0.0.1", port="5432")
37     query_sql = "select * from article WHERE article_title=%s"
38     sql = "insert into article (article_title,article_author,article_content) VALUES (%s,%s,%s)"
39     cur = conn.cursor()
40     try:
41         query_value = (article_title,)
42         cur.execute(query_sql, query_value)
43         results = cur.fetchall()
44         if len(results) == 0:
45             sql_values = (article_title, article_author, article_content)
46             cur.execute(sql, sql_values)
47             conn.commit()
48             return True
49         else:
50             return False
51     except BaseException as e:
52         conn.rollback()
53         print(e)
54     finally:
55         cur.close()
56         conn.close()
57 
58 
59 if __name__ == "__main__":
60     create_table()   \这里第一次使用加的,创建数据库之后将其删掉!
61     get_html_data()