单纯linux系统下hadoop2.7.3 eclipse,记一次成功的运行wordcount的注意事项
- hadoop要正确安装好
- hadoop eclipse plugin要对应相应的eclipse版本 define hadoop location mr master:9000 另一个9001 下面的是代理不用理会
bin/hdfs dfs -mkdir /user,在hdfs上创建user文件夹\
bin/hdfs dfs -put /usr/local/hadoop/README.txt /input/ 上传到input
- 按照上面命令上传文件到hdfs上
,不要建output文件夹
将 /user权限开放
- 有log4j警告时,把ssh包里的log4j.properties放入src里
- argument里要写好输入要具体到文件,输出文件只写路径 ,例如:hdfs://localhost:9000/user/input/README.txt hdfs://localhost:9000/user/output/
- 暂时想到这么多
package WordCount; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class WordCount { public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } @SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
或者
import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapred.TextOutputFormat; public class WordCount { public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); } } } public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } output.collect(key, new IntWritable(sum)); } } public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJobName("WordCount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); } }
好像这两个图片显示不出来了

12:44:34,422 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 12:44:35,109 INFO deprecation:1173 - session.id is deprecated. Instead, use dfs.metrics.session-id 12:44:35,111 INFO JvmMetrics:76 - Initializing JVM Metrics with processName=JobTracker, sessionId= 12:44:35,244 WARN JobResourceUploader:171 - No job jar file set. User classes may not be found. See Job or Job#setJar(String). 12:44:35,273 INFO FileInputFormat:283 - Total input paths to process : 1 12:44:35,378 INFO JobSubmitter:198 - number of splits:1 12:44:35,490 INFO JobSubmitter:287 - Submitting tokens for job: job_local1396121088_0001 12:44:35,673 INFO Job:1294 - The url to track the job: http://localhost:8080/ 12:44:35,674 INFO Job:1339 - Running job: job_local1396121088_0001 12:44:35,678 INFO LocalJobRunner:471 - OutputCommitter set in config null 12:44:35,685 INFO FileOutputCommitter:108 - File Output Committer Algorithm version is 1 12:44:35,687 INFO LocalJobRunner:489 - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter 12:44:35,799 INFO LocalJobRunner:448 - Waiting for map tasks 12:44:35,799 INFO LocalJobRunner:224 - Starting task: attempt_local1396121088_0001_m_000000_0 12:44:35,848 INFO FileOutputCommitter:108 - File Output Committer Algorithm version is 1 12:44:35,864 INFO Task:612 - Using ResourceCalculatorProcessTree : [ ] 12:44:35,874 INFO MapTask:756 - Processing split: hdfs://localhost:9000/user/input/README.txt:0+1366 12:44:35,940 INFO MapTask:1205 - (EQUATOR) 0 kvi 26214396(104857584) 12:44:35,940 INFO MapTask:998 - mapreduce.task.io.sort.mb: 100 12:44:35,941 INFO MapTask:999 - soft limit at 83886080 12:44:35,941 INFO MapTask:1000 - bufstart = 0; bufvoid = 104857600 12:44:35,941 INFO MapTask:1001 - kvstart = 26214396; length = 6553600 12:44:35,945 INFO MapTask:403 - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer 12:44:36,064 INFO LocalJobRunner:591 - 12:44:36,193 INFO MapTask:1460 - Starting flush of map output 12:44:36,193 INFO MapTask:1482 - Spilling map output 12:44:36,193 INFO MapTask:1483 - bufstart = 0; bufend = 2055; bufvoid = 104857600 12:44:36,193 INFO MapTask:1485 - kvstart = 26214396(104857584); kvend = 26213684(104854736); length = 713/6553600 12:44:36,226 INFO MapTask:1667 - Finished spill 0 12:44:36,234 INFO Task:1038 - Task:attempt_local1396121088_0001_m_000000_0 is done. And is in the process of committing 12:44:36,252 INFO LocalJobRunner:591 - map 12:44:36,252 INFO Task:1158 - Task 'attempt_local1396121088_0001_m_000000_0' done. 12:44:36,253 INFO LocalJobRunner:249 - Finishing task: attempt_local1396121088_0001_m_000000_0 12:44:36,253 INFO LocalJobRunner:456 - map task executor complete. 12:44:36,261 INFO LocalJobRunner:448 - Waiting for reduce tasks 12:44:36,263 INFO LocalJobRunner:302 - Starting task: attempt_local1396121088_0001_r_000000_0 12:44:36,274 INFO FileOutputCommitter:108 - File Output Committer Algorithm version is 1 12:44:36,275 INFO Task:612 - Using ResourceCalculatorProcessTree : [ ] 12:44:36,279 INFO ReduceTask:362 - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@10c41eda 12:44:36,326 INFO MergeManagerImpl:197 - MergerManager: memoryLimit=1941805440, maxSingleShuffleLimit=485451360, mergeThreshold=1281591680, ioSortFactor=10, memToMemMergeOutputsThreshold=10 12:44:36,333 INFO EventFetcher:61 - attempt_local1396121088_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events 12:44:36,423 INFO LocalFetcher:144 - localfetcher#1 about to shuffle output of map attempt_local1396121088_0001_m_000000_0 decomp: 1832 len: 1836 to MEMORY 12:44:36,431 INFO InMemoryMapOutput:100 - Read 1832 bytes from map-output for attempt_local1396121088_0001_m_000000_0 12:44:36,433 INFO MergeManagerImpl:315 - closeInMemoryFile -> map-output of size: 1832, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->1832 12:44:36,437 INFO EventFetcher:76 - EventFetcher is interrupted.. Returning 12:44:36,439 INFO LocalJobRunner:591 - 1 / 1 copied. 12:44:36,440 INFO MergeManagerImpl:687 - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs 12:44:36,456 INFO Merger:606 - Merging 1 sorted segments 12:44:36,457 INFO Merger:705 - Down to the last merge-pass, with 1 segments left of total size: 1823 bytes 12:44:36,460 INFO MergeManagerImpl:754 - Merged 1 segments, 1832 bytes to disk to satisfy reduce memory limit 12:44:36,461 INFO MergeManagerImpl:784 - Merging 1 files, 1836 bytes from disk 12:44:36,461 INFO MergeManagerImpl:799 - Merging 0 segments, 0 bytes from memory into reduce 12:44:36,462 INFO Merger:606 - Merging 1 sorted segments 12:44:36,462 INFO Merger:705 - Down to the last merge-pass, with 1 segments left of total size: 1823 bytes 12:44:36,463 INFO LocalJobRunner:591 - 1 / 1 copied. 12:44:36,530 INFO deprecation:1173 - mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords 12:44:36,677 INFO Job:1360 - Job job_local1396121088_0001 running in uber mode : false 12:44:36,679 INFO Job:1367 - map 100% reduce 0% 12:44:36,776 INFO Task:1038 - Task:attempt_local1396121088_0001_r_000000_0 is done. And is in the process of committing 12:44:36,782 INFO LocalJobRunner:591 - 1 / 1 copied. 12:44:36,782 INFO Task:1199 - Task attempt_local1396121088_0001_r_000000_0 is allowed to commit now 12:44:36,799 INFO FileOutputCommitter:535 - Saved output of task 'attempt_local1396121088_0001_r_000000_0' to hdfs://localhost:9000/user/output/_temporary/0/task_local1396121088_0001_r_000000 12:44:36,802 INFO LocalJobRunner:591 - reduce > reduce 12:44:36,802 INFO Task:1158 - Task 'attempt_local1396121088_0001_r_000000_0' done. 12:44:36,802 INFO LocalJobRunner:325 - Finishing task: attempt_local1396121088_0001_r_000000_0 12:44:36,803 INFO LocalJobRunner:456 - reduce task executor complete. 12:44:37,680 INFO Job:1367 - map 100% reduce 100% 12:44:37,681 INFO Job:1378 - Job job_local1396121088_0001 completed successfully 12:44:37,708 INFO Job:1385 - Counters: 35 File System Counters FILE: Number of bytes read=4048 FILE: Number of bytes written=567860 FILE: Number of read operations=0 FILE: Number of large read operations=0 FILE: Number of write operations=0 HDFS: Number of bytes read=2732 HDFS: Number of bytes written=1306 HDFS: Number of read operations=13 HDFS: Number of large read operations=0 HDFS: Number of write operations=4 Map-Reduce Framework Map input records=31 Map output records=179 Map output bytes=2055 Map output materialized bytes=1836 Input split bytes=108 Combine input records=179 Combine output records=131 Reduce input groups=131 Reduce shuffle bytes=1836 Reduce input records=131 Reduce output records=131 Spilled Records=262 Shuffled Maps =1 Failed Shuffles=0 Merged Map outputs=1 GC time elapsed (ms)=0 Total committed heap usage (bytes)=571473920 Shuffle Errors BAD_ID=0 CONNECTION=0 IO_ERROR=0 WRONG_LENGTH=0 WRONG_MAP=0 WRONG_REDUCE=0 File Input Format Counters Bytes Read=1366 File Output Format Counters Bytes Written=1306
hadoop@kali:/usr/local/hadoop$ bin/hdfs dfs -cat /user/output/* 17/02/20 12:51:19 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable (BIS), 1 (ECCN) 1 (TSU) 1 (see 1 5D002.C.1, 1 740.13) 1 <http://www.wassenaar.org/> 1 Administration 1 Apache 1 BEFORE 1 BIS 1 Bureau 1 Commerce, 1 Commodity 1 Control 1 Core 1 Department 1 ENC 1 Exception 1 Export 2 For 1 Foundation 1 Government 1 Hadoop 1 Hadoop, 1 Industry 1 Jetty 1 License 1 Number 1 Regulations, 1 SSL 1 Section 1 Security 1 See 1 Software 2 Technology 1 The 4 This 1 U.S. 1 Unrestricted 1 about 1 algorithms. 1 and 6 and/or 1 another 1 any 1 as 1 asymmetric 1 at: 2 both 1 by 1 check 1 classified 1 code 1 code. 1 concerning 1 country 1 country's 1 country, 1 cryptographic 3 currently 1 details 1 distribution 2 eligible 1 encryption 3 exception 1 export 1 following 1 for 3 form 1 from 1 functions 1 has 1 have 1 http://hadoop.apache.org/core/ 1 http://wiki.apache.org/hadoop/ 1 if 1 import, 2 in 1 included 1 includes 2 information 2 information. 1 is 1 it 1 latest 1 laws, 1 libraries 1 makes 1 manner 1 may 1 more 2 mortbay.org. 1 object 1 of 5 on 2 or 2 our 2 performing 1 permitted. 1 please 2 policies 1 possession, 2 project 1 provides 1 re-export 2 regulations 1 reside 1 restrictions 1 security 1 see 1 software 2 software, 2 software. 2 software: 1 source 1 the 8 this 3 to 2 under 1 use, 2 uses 1 using 2 visit 1 website 1 which 2 wiki, 1 with 1 written 1 you 1 your 1 hadoop@kali:/usr/local/hadoop$
--------------------------------------------------------------------------------------------------------------------------------------------
作者:Honey_Badger —— 觉得这文章好,点一下左下角
出处:http://tk55.cnblogs.com/
posted on 2017-02-20 13:26 Honey_Badger 阅读(270) 评论(0) 收藏 举报