• 博客园logo
  • 会员
  • 众包
  • 新闻
  • 博问
  • 闪存
  • 赞助商
  • HarmonyOS
  • Chat2DB
    • 搜索
      所有博客
    • 搜索
      当前博客
  • 写随笔 我的博客 短消息 简洁模式
    用户头像
    我的博客 我的园子 账号设置 会员中心 简洁模式 ... 退出登录
    注册 登录

dmhn

  • 博客园
  • 联系
  • 管理

公告

View Post

测验-大数据样卷

package web;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WebSubmitter {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf=new Configuration();
    FileSystem fs=FileSystem.get(conf);
    
    if(fs.exists(new Path(args[1]))) {
        fs.delete(new Path(args[1]), true);
    }
    Job job=Job.getInstance();
    job.setJarByClass(WebSubmitter.class);
    
    job.setMapperClass(WebMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);
    
    job.setReducerClass(WebReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.waitForCompletion(true);
}
}
package web;

import java.io.IOException;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WebReducer extends Reducer< Text, Text, Text, NullWritable>{
@Override
protected void reduce(Text k, Iterable<Text> v, Reducer<Text, Text, Text, NullWritable>.Context context)
        throws IOException, InterruptedException {    
       context.write(k,NullWritable.get());
}
}
package web;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


public class WebMapper extends Mapper<LongWritable, Text, Text, NullWritable>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
        throws IOException, InterruptedException {
    String[] line=value.toString().split(",");
    String phone=line[0];
    String sex=line[1];
    String wang=line[4];
    String time=line[line.length-2];
    String age=line[line.length-1];
    String we=phone+"\t"+sex+"\t"+wang+"\t"+time+"\t"+age;
    context.write(new Text(we),NullWritable.get());
}
}

customer.txt

13726230503,0,00-FD-07-A4-72-B8:CMCC,192.168.33.3,http://www.imooc.cn/stu,2017-08-04 15:30:20,20
13826544101,1,5C-0E-8B-C7-F1-E0:CMCC,192.168.33.3,http://www.edu360.cn/teach,2017-08-04 15:35:20,19
13926435656,1,20-10-7A-28-CC-0A:CMCC,192.168.33.4,http://www.csdn.cn/stu,2017-08-04 15:30:20,28
13926251106,0,5C-0E-8B-8B-B1-50:CMCC,192.168.33.4,http://www.edu360.cn/job,2017-08-04 16:30:20,34
18211575961,1,94-71-AC-CD-E6-18:CMCC-EASY,192.168.33.5,http://www.imooc.cn/job,2017-08-04 15:40:20,12
13560439658,1,C4-17-FE-BA-DE-D9:CMCC,192.168.33.3,http://www.edu360.cn/stu,2017-08-05 15:30:20,34
15920133257,0,5C-0E-8B-C7-BA-20:CMCC,192.168.44.3,http://www.edu360.cn/teach,2017-08-05 15:35:20,26
13719199419,1,68-A1-B7-03-07-B1:CMCC-EASY,192.168.33.44,http://www.edu360.cn/stu,2017-08-05 15:30:20,40
15013685858,1,5C-0E-8B-C7-F7-90:CMCC,192.168.33.46,http://www.edu360.cn/job,2017-08-05 16:30:20,45
15989002119,0,E8-99-C4-4E-93-E0:CMCC-EASY,192.168.33.55,http://www.edu360.cn/job,2017-08-05 15:40:20,56
13560439658,0,C4-17-FE-BA-DE-D9:CMCC,192.168.133.3,http://www.csdn.cn/register,2017-08-06 15:30:20,27
13480253104,0,5C-0E-8B-C7-FC-80:CMCC-EASY,192.168.111.3,http://www.edu360.cn/register,2017-08-06 15:35:20,19
13602846565,1,5C-0E-8B-8B-B6-00:CMCC,192.168.34.44,http://www.imooc.cn/pay,2017-08-06 15:30:20,21
13922314466,0,00-FD-07-A2-EC-BA:CMCC,192.168.33.46,http://www.imooc0.cn/excersize,2017-08-06 16:30:20,28
13502468823,1,5C-0A-5B-6A-0B-D4:CMCC-EASY,192.168.33.55,http://www.csdn.cn/job,2017-08-06 15:40:20,35
13726230503,1,84-25-DB-4F-10-1A:CMCC-EASY,192.168.33.46,http://www.edu360.cn/excersize,2017-08-06 16:30:20,38
13925057413,0,00-1F-64-E1-E6-9A:CMCC,192.168.33.25,http://www.edu360.cn/job,2017-08-06 15:40:20,24
13760778710,0,00-FD-07-A4-7B-08:CMCC,192.168.33.36,http://www.csdn.cn/excersize,2017-08-06 16:30:20,42
13726230503,0,C4-17-FE-BA-DE-D9:CMCC,192.168.33.55,http://www.imooc.cn/job,2017-08-06 15:40:20,61

 

2.虚拟机运行

 - hadoop jar /opt/web.jar web.WebSubmitter /text/customer.txt /jie

3.hive创库创表

-create database ren;

-use ren;
-create table cum_backup(phone bigint,sex string,wang string,time string,age int)row format delimited fields terminated by '\t';

4.加载数据

-load data inpath '/jie/part-r-00000'into table cum_backup;

5.mysql创库创表

-create database ren;
-use ren;
-create table cum_backup(phone bigint,sex varchar(47),wang varchar(47),time varchar(47),age int);

6.

(1) 使用Hive SQL实现查询年龄在20~40岁之间的用户信息(我改了一下名字,表名按代码上的来)

select * from cum_backup where age>=20 and age<=40;

(2) 使用Hive SQL实现根据性别统计“男”,”女”人数

select count(*),sex from cum_backup group by sex;

(3) 使用Hive SQL实现查询2017-08-04后访问http://www.edu360.cn/stu 的女性用户

select * from cum_backup where time>'2017-08-04' and wang ='http://www.edu360.cn/stu' and sex='1';

7.sqoop把hive里的数据导出到mysql里面

sqoop export --connect jdbc:mysql://master:3306/ren --table cum_backup --username root --password 123456 --export-dir '/user/hive/warehouse/ren.db/cum_backupa/part-r-00000' --columns phone,sex,wang,time,age --fields-terminated-by '\t' --m 1;

(暂时就到这里,题目在文件里,名字为 "大数据样卷 ")

posted on 2020-05-24 22:49  我是一只魈魈魈鸟  阅读(165)  评论(0)    收藏  举报

刷新页面返回顶部
 
博客园  ©  2004-2025
浙公网安备 33010602011771号 浙ICP备2021040463号-3