mysql 练习基础 10:数据保存方式方法

-- 数据准备
create database taobao;

use taobao;

create table UserBehavior(
    user_id int,
    item_id int,
    item_category int,
    behavior_type varchar(10),
    user_geohash varchar(10),
    times datetime,
    amount decimal(5,2)
);

show variables like '%secure%';-- 查看安全路径,随后将表格粘贴到该安全路径下,
load data infile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/UserBehavior.csv"
into table UserBehavior
fields terminated by ','
ignore 1 lines;

select * from UserBehavior limit 10;
select count(*) from UserBehavior;#1048575



-- 数据清洗
-- 缺失值处理
select
count(user_id),
count(item_id),
count(item_category),
count(behavior_type),
count(user_geohash),
count(times),
count(amount)
from UserBehavior;



-- 异常值检查

select min(times),max(times),min(amount),max(amount) from userbehavior;

-- 重复记录处理

select distinct * from userbehavior;

-- 字段处理:根据times字段增加计算字段用户行为日期、周和小时,排除后续分析不需要的user_geohash字段,并将筛选后的结果保存到新表

#数据保存方式方法

#保存方式一:直接导出表格csv文件 select user_id, item_id, item_category, behavior_type, date(times) as 日期, hour(times) as 小时, date_format(times,'%w') as 星期, amount from (select distinct * from userbehavior) as t #从去重的表里面去查询 limit 100; #保存方式二:代码保存(至安全路径) select user_id, item_id, item_category, behavior_type, date(times) as 日期, hour(times) as 小时, date_format(times,'%w') as 星期, amount from (select distinct * from userbehavior) as t limit 100 into outfile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/t2.csv"-- 保存到安全路径并命名为t2 fields terminated by ',' lines terminated by '\r\n'; #保存方式三:视图 #视图:优点 1.简化SQL语句,快速提取,将复杂查询语句进行封装;2.安全,保护数据,设置访问权限 让不同级别用户看到不同的视图;缺点 操作视图比直接操作基础表要慢 create view userbehavior_new as select user_id, item_id, item_category, behavior_type, date(times) as 日期, hour(times) as 小时, date_format(times,'%w') as 星期, amount from (select distinct * from userbehavior) as t; select * from userbehavior_new limit 100; select count(*) from userbehavior_new;

 

posted @ 2021-10-16 11:03  charlly  阅读(411)  评论(0)    收藏  举报