第三周

第三周学习内容

1、爬取薄荷网热量为大创app数据计算添加数据库数据；

youmian.py

from youGet import youGet
if __name__ == '__main__':
    data = youGet()
    data.get_date()
    data.parse_date()
import youdaosql
youdaosql.du_sql()

View Code

youGet.py

爬取所有食物需要更改url和每个next的数字就可以；

import json

import lxml.html
import requests

etree = lxml.html.etree
import time
from requests.adapters import HTTPAdapter

heatss = []
class youGet():
    def get_date(self):
        url = "http://www.boohee.com/food/view_menu"
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/80.0.3987.149 Safari/537.36 '
        }
        response = requests.get(url, headers=headers)
        with open('htmlyou.txt', 'w', encoding="utf-8") as file:
            file.write(response.text)
        response.close()

    def parse_date(self):
        with open('htmlyou.txt', 'r', encoding='utf-8') as file:
            text = file.read()
        html = etree.HTML(text)
        # hrefpage = html.xpath('//a[@class="next_page"]/@href')
        page = html.xpath('//div[@class="pagination"]/@limit_page')[0]
        a = 2
        try:
            if page!=0:
                heats = html.xpath('//ul[@class="food-list"]/li/div/p/text()')
                if len(heats)!=0:
                    names = html.xpath('//ul[@class="food-list"]/li/div/h4/a/@title')
                    hrefs = html.xpath('//ul[@class="food-list"]/li/div/h4/a/@href')
                    heat = html.xpath('//ul[@class="food-list"]/li/div/p/text()')
                    num = 0
                    for index in names:
                        info = {}
                        info["name"] = index
                        info["href"] = hrefs[num]
                        info["heat"] = heat[num]
                        heatss.append(info)
                        num += 1
                    # next =  html.xpath('//div[@class="pagination"]/a[@class="next_page"]/@herf')
                    next ="http://www.boohee.com/food/view_menu?page="+str(a)
                    while(len(next)!=0):
                        if a==int(page)+1:
                            break
                        a+=1
                        headers = {
                            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                                          'Chrome/80.0.3987.149 Safari/537.36 '
                        }
                        response = requests.get(next, headers=headers)
                        html1 = etree.HTML(response.text)
                        heats2 = html1.xpath('//ul[@class="food-list"]/li/div/p/text()')
                        if len(heats2)!=0:
                            names = html1.xpath('//ul[@class="food-list"]/li/div/h4/a/@title')
                            hrefs = html1.xpath('//ul[@class="food-list"]/li/div/h4/a/@href')
                            heat = html1.xpath('//ul[@class="food-list"]/li/div/p/text()')
                            num = 0
                            for index in names:
                                info = {}
                                info["name"] = index
                                info["href"] = hrefs[num]
                                info["heat"] = heat[num]
                                heatss.append(info)
                                num += 1
                            next ="http://www.boohee.com/food/view_menu?page="+str(a)

                        response.close()
                result = json.dumps(heatss, sort_keys=True, indent=2)
                with open('./data5.json', 'w', encoding='utf-8') as file:
                    for i in result:
                        file.write(i)

        except requests.exceptions.ConnectionError:
            result = json.dumps(heatss, sort_keys=True, indent=2)
            with open('./data4.json', 'w', encoding='utf-8') as file:
                for i in result:
                    file.write(i)
            with open('./htmlhref1.txt', 'w', encoding="utf-8") as file:
                for i in heatss:
                    file.write(i)

View Code

youdaosql.py

import mysql.connector
import json
with open('./data5.json', 'r') as file:
    data = file.read()
    data = json.loads(data)

def du_sql():
    mydb = mysql.connector.connect(
        host="localhost",
        user="root",
        password="password",
        database="test1",
        auth_plugin="mysql_native_password"
    )
    dbpath = mydb.cursor()
    savaDataSql(dbpath)
    print("ok")
    mydb.commit()

def savaDataSql(dbpath):
    cur = dbpath
    for each in data:
        name = each['name']
        href = each['href']
        heat = each['heat']
        sql = "INSERT INTO heats (name,href,heat) values (%s,%s,%s)"
        var = (name,href,heat)
        cur.execute(sql,var)

View Code

2、学习oracle的使用

安装oracle19c——地址就是官网就可以下载 https://www.oracle.com/database/technologies/oracle-database-software-downloads.html

安装详细 https://blog.csdn.net/weixin_44841225/article/details/100782269

下载完后需要建立数据库，然后打开sql plus进行oracle数据库操作，具体可视化软件还没用

首次打开sql plus 需要登入账号可以为 sys as sysdba 密码为自己设置的

登入后可以创建用户 create user 用户名 identified by 密码;

给用户修改密码 alter user 用户名 identified by 新密码;

删除用户 drop user 用户名 [cascade]可选参数 cascade

**给用户赋权限**

grant 权限/角色 to 用户名;

其中权力有

具体的权限使用可以单独查询，

但赋权需要使用管理员可以使用conn /as sysdba进行切换

切换回去就是 conn 用户名/密码

创建表、序列、触发器、视图、以及查找、添加、修改、删除操作都和mysql一样；

posted @ 2021-08-02 16:46 韦德·沃兹阅读(43) 评论(0) 收藏举报

刷新页面返回顶部

韦德·沃兹

第三周

公告