单词统计
package word;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.*;
public class wordcount {
public static void main(String [] args) throws Exception {
BufferedReader br = new BufferedReader(new FileReader("D:/学习/大二选修java/fly.txt"));
StringBuffer sb = new StringBuffer();
String text =null;
while ((text=br.readLine())!= null){
sb.append(text);// 将读取出的字符追加到stringbuffer中
}
br.close(); // 关闭读入流
String str = sb.toString().toLowerCase(); // 将stringBuffer转为字符并转换为小写
String[] words = str.split("[^(a-zA-Z)]+"); // 非单词的字符来分割,得到所有单词
Map<String ,Integer> map = new HashMap<String, Integer>() ;
for(String word :words){
if(map.get(word)==null){ // 若不存在说明是第一次,则加入到map,出现次数为1
map.put(word,1);
}else{
map.put(word,map.get(word)+1); // 若存在,次数累加1
}
}
// 排序
List<Map.Entry<String ,Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
Comparator<Map.Entry<String,Integer>> comparator = new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> left, Map.Entry<String, Integer> right) {
return (left.getValue().compareTo(right.getValue()));
}
};
// 集合默认升序升序
Collections.sort(list,comparator);
float sum=0;
float a;
float b;
for(int i=0;i<list.size();i++) {
if(list.get(list.size()-i-1).getKey().equals("a")||list.get(list.size()-i-1).getKey().equals("the")||list.get(list.size()-i-1).getKey().equals("and")) {
}else {
a=list.get(list.size()-i-1).getValue();
b=sum;
sum=a+b;
}
}
System.out.println("可用单词总和为:"+sum);
System.out.println("可用单词出现次数及所占百分比降序排列如下:");
for(int i=0;i<list.size();i++){// 由高到低输出
if(list.get(list.size()-i-1).getKey().equals("a")||list.get(list.size()-i-1).getKey().equals("the")||list.get(list.size()-i-1).getKey().equals("and")) {}
else{
System.out.println(list.get(list.size()-i-1).getKey() +":出现"+list.get(list.size()-i-1).getValue()+"次 占比"+String.format("%.2f",(list.get(list.size()-i-1).getValue()/sum)*100));
}
}
}
}


浙公网安备 33010602011771号