Java8新特性——lambda表达式.(案例:词频统计)

需求:读入一个文本文件,确定所有单词的使用频率并从高到低排序,打印出所有单词及其频率的排序列表

先用传统方法解:

 1 package cn._1.wordfrequency;
 2 
 3 import java.util.HashSet;
 4 import java.util.Map;
 5 import java.util.Set;
 6 import java.util.TreeMap;
 7 import java.util.regex.Matcher;
 8 import java.util.regex.Pattern;
 9 
10 /*
11  * Functional Thinking by Neal Ford(O'Reilly).
12  */
13 public class Word {
14 
15     @SuppressWarnings("serial")
16     //统计除了以下单词的其他单词的使用频率
17     private Set<String> NON_WORDS = new HashSet<String>() {{
18         //匿名内部类+初始化块的初始化方式
19             add("the");add("and");add("of");add("to");add("a");
20             add("i");add("it");add("in");add("or");add("is");
21             add("as");add("so");add("but");add("be");
22         }};
23     public Map<String, Integer> wordFreq(String words) {
24         TreeMap<String,Integer> wordMap = new TreeMap<>();
25         Matcher m = Pattern.compile("\\w+").matcher(words);
26         while(m.find()){
27             String word = m.group().toLowerCase();
28             if (!NON_WORDS.contains(word)) {
29                 if (wordMap.get(word) == null) {
30                     wordMap.put(word, 1);
31                 }else {
32                     wordMap.put(word, wordMap.get(word)+1);
33                 }
34             }
35         }
36         return wordMap;
37     }
38 }

再使用Java8的新特性解:

 1 package cn._1.wordfrequency;
 2 
 3 import java.util.ArrayList;
 4 import java.util.HashSet;
 5 import java.util.List;
 6 import java.util.Map;
 7 import java.util.Set;
 8 import java.util.TreeMap;
 9 import java.util.regex.Matcher;
10 import java.util.regex.Pattern;
11 
12 /*
13  * Functional Thinking by Neal Ford(O'Reilly).
14  */
15 public class Word2 {
16     @SuppressWarnings("serial")
17     private Set<String> NON_WORDS = new HashSet<String>() {{
18         //匿名内部类+初始化块的初始化方式
19             add("the");add("and");add("of");add("to");add("a");
20             add("i");add("it");add("in");add("or");add("is");
21             add("as");add("so");add("but");add("be");
22         }};
23     /*
24      * 使用正则表达式获得包含所有单词的List
25      */
26     private List<String> regexToList(String words,String regex){
27         List<String> wordList = new ArrayList<>();
28         Matcher m = Pattern.compile(regex).matcher(words);
29         while(m.find())
30             wordList.add(m.group());
31         return wordList;
32     }
33     public Map<String, Integer> wordFreq(String words){
34         TreeMap<String, Integer> wordMap = new TreeMap<>();//使用TreeMap是为了使输出结果自然排序
35         /*
36          * java.util.stream.Stream:A sequence of elements supporting sequential and parallel aggregate operations.
37          * map:Returns a stream consisting of the results of applying the given function to the elements of this stream.
38          * filter:Returns a stream consisting of the elements of this stream that match the given predicate.
39          * forEach:Performs an action for each element of this stream.
40          */
41         regexToList(words, "\\w+").stream()//将collection对象变为stream
42             .map(w -> w.toLowerCase())//返回一个经过小写处理的stream
43             .filter(w -> !NON_WORDS.contains(w))//过滤,使流中的元素都是NON_WORDS集合中不包含的元素
44             .forEach(w -> wordMap.put(w, wordMap.getOrDefault(w, 0)+1));//遍历执行操作
45         return wordMap;
46     }
47 }

测试类:

 1 package cn._1.wordfrequency;
 2 
 3 import java.io.FileInputStream;
 4 import java.io.IOException;
 5 import java.util.ArrayList;
 6 import java.util.Collections;
 7 import java.util.Comparator;
 8 import java.util.List;
 9 import java.util.Map;
10 import java.util.Map.Entry;
11 
12 public class Mmain {
13 
14     public static void main(String[] args) throws IOException {
15         String str = readText("/home/yanshaochen/workspace/Functional_Thinking_Examples/mflie/sucai.txt");
16         //调用老方法
17         /*Map<String, Integer> map = new Word().wordFreq(str);*/
18         //调用新方法:
19         Map<String, Integer> map = new Word2().wordFreq(str);
20         //自然排序:
21         for (Entry<String, Integer> item : map.entrySet()) {
22             System.out.println(item.getKey()+","+item.getValue());
23         }
24         //按照value进行排序(摘自网络):
25         /*List<Map.Entry<String, Integer>> infoIds = new ArrayList<>(map.entrySet());
26         Collections.sort(infoIds, new Comparator<Map.Entry<String, Integer>>() {
27             public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {      
28                 return (o2.getValue() - o1.getValue()); 
29                 //return (o1.getKey()).toString().compareTo(o2.getKey());
30                 }
31             }); 
32         for (Entry<String, Integer> item : infoIds) {
33             System.out.println(item.getKey()+","+item.getValue());
34         }*/
35     }
36 
37     /*
38      * IO流
39      */
40     private static String readText(String path) throws IOException {
41         FileInputStream fis = new FileInputStream(path);
42         byte[] bytes = new byte[1024];
43         int data;
44         String str ="";
45         while((data = fis.read(bytes))!=-1){
46             str += new String(bytes, 0, data);
47         }
48         fis.close();
49         return str;
50     }
51 }

 

posted @ 2017-06-22 23:53  Tomas曼  Views(1697)  Comments(0Edit  收藏  举报