第一周总结——大数据清洗范例

create database iptest;

use iptest;
//首先创建和文件相对应的表结构
create table data(
`ip` string,
`Date` string,
`day` string,
`traffic` string,
`type` string,
`id` string
)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
//接着将文件内容导入该表(预先将文件放入虚拟机的/export/data目录下)
load data local inpath '/export/data/result.csv' into table data;



create table video_article as
(select
type,
id,
count(*) as times
from data
group by type, id
order by times DESC
limit 10);

select * from video_article;

create table city_test as
(select
CONCAT(split(ip,'\\.')[0],'.',split(ip,'\\.')[1]) as city,
ip,
traffic,
type,
id
from data);

select * from city_test limit 10;

create table city as
select
city,
type,
id,
count(*) as cishu
from city_test
group by city,type,id
order by cishu DESC
limit 10;

select * from city;

create table traffic as
select
type,
id,
sum(traffic) as liulian
from city_test
group by type,id
order by liulian DESC
limit 10;

select * from traffic;
posted @ 2023-09-23 23:00  Joranger  阅读(10)  评论(0)    收藏  举报