单词统计

       这次课堂测试总的来说收获还是蛮大的,用到了读取文件,虽然文件类型的测试也做了不少了,但确实是不够熟练,对文件这一块了解太少,其中一些基本的函数都不知道。所以还是要对学习学习这方面知识。

       这次实验大概用了4个小时,花费的时间与其他同学相比有点多,主要原因是总想从网上找模板,自己不亲自动手,但是课下自己亲手实践,确实是从中学到了东西,所以浪费的时间还是值得的。

       下面是这次练习的代码:

       

package xxx;
/* yr
 * 2019/4/28
 */
import java.io.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
public class Word{
 
 //统计26个英文字母(不区分大小写)出现的频率
 public void check1(String string) {
   try {
             char num[] = new char[10000];          //存储读取的各个字符
             char c[] = new char[26];                 //存储26个英文字母
             c[0]='a';
             for(int i=1;i<26;i++)
             {
              c[i]=(char)(c[i-1]+1);
             }
            
             int j=0;
             double count[]=new double[26];                 //存储26个英文字母出现的次数
             double p[]=new double[26];               //存储26个英文字母出现的频率
             String pathname=string;
             File filename=new File(pathname);
             InputStreamReader reader=new InputStreamReader(new FileInputStream(filename));
             BufferedReader br=new BufferedReader(reader);
             String line[]=new String[100];;   
             for(int i=0;i<line.length;i++){
             line[i]=br.readLine();     //读取一行         
             }
     
             br.close();
             int n=0;
          
              while(line[n]!=null) {
              
                  for(int i=0;i<line[n].length();i++) {                               
                       num[j]=line[n].charAt(i);
                       j++;                    
                  }
              n++;
             }   
            
             for(int i=0;i<num.length;i++) {
                  switch(num[i]) {
                 
                  case 'a':count[0]++;break;
                  case 'b':count[1]++;break;
                  case 'c':count[2]++;break;
                  case 'd':count[3]++;break;
                  case 'e':count[4]++;break;
                  case 'f':count[5]++;break;
                  case 'g':count[6]++;break;
                  case 'h':count[7]++;break;
                  case 'i':count[8]++;break;
                  case 'j':count[9]++;break;
                  case 'k':count[10]++;break;
                  case 'l':count[11]++;break;
                  case 'm':count[12]++;break;
                  case 'n':count[13]++;break;
                  case 'o':count[14]++;break;
                  case 'p':count[15]++;break;
                  case 'q':count[16]++;break;
                  case 'r':count[17]++;break;
                  case 's':count[18]++;break;
                  case 't':count[19]++;break;
                  case 'u':count[20]++;break;
                  case 'v':count[21]++;break;
                  case 'w':count[22]++;break;
                  case 'x':count[23]++;break;
                  case 'y':count[24]++;break;
                  case 'z':count[25]++;break;
                  case 'A':count[0]++;break;
                  case 'B':count[1]++;break;
                  case 'C':count[2]++;break;
                  case 'D':count[3]++;break;
                  case 'E':count[4]++;break;
                  case 'F':count[5]++;break;
                  case 'G':count[7]++;break;
                  case 'H':count[6]++;break;
                  case 'I':count[8]++;break;
                  case 'J':count[9]++;break;
                  case 'K':count[10]++;break;
                  case 'L':count[11]++;break;
                  case 'M':count[12]++;break;
                  case 'N':count[13]++;break;
                  case 'O':count[14]++;break;
                  case 'P':count[15]++;break;
                  case 'Q':count[16]++;break;
                  case 'R':count[17]++;break;
                  case 'S':count[18]++;break;
                  case 'T':count[19]++;break;
                  case 'U':count[20]++;break;
                  case 'V':count[21]++;break;
                  case 'W':count[22]++;break;
                  case 'X':count[23]++;break;
                  case 'Y':count[24]++;break;
                  case 'Z':count[25]++;break;
                  }
             }   
            
    
             double sum=0.0;
             System.out.println("短文中各字母出现情况统计如下:");
             for(int i=0;i<26;i++)
             {
                 sum=sum+count[i];
             }
            
            
             for(int i=0;i<26;i++)
             {
              
                 p[i]=(Math.round(count[i]/sum * 10000) / 100.0);
               
             }
          
             //对频率数组进行排序
             double temp;            
             char f;
             for(int i=0;i<25;i++) //冒泡排序,外循环有N-1轮
              for(int t=0;t<25-i;t++)
              if(p[t]<p[t+1])            //从大到小
              {
               temp=p[t];
               p[t]=p[t+1];
               p[t+1]=temp;                 
               
               f=c[t];
               c[t]=c[t+1];
               c[t+1]=f;
              }
            
             //输出
             for(int i=0;i<26;i++)
             {
              System.out.println(c[i]+"的频率为:"+p[i]+"%");
             }
           
         }catch (Exception e) {
             e.printStackTrace();
         }
 }
 
 //统计每个单词出现的次数
 public void check2(String string) {
    BufferedReader br = null;
  try {
   br = new BufferedReader(new FileReader(string));
  } catch (FileNotFoundException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
         StringBuffer sb = new StringBuffer();
         String text =null;
         try {
    while ((text=br.readLine())!= null){
        sb.append(text);// 将读取出的字符追加到stringbuffer中
    }
   } catch (IOException e1) {
    // TODO Auto-generated catch block
    e1.printStackTrace();
   }
         try {
    br.close();
   } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   }  // 关闭读入流
         String str = sb.toString().toLowerCase(); // 将stringBuffer转为字符并转换为小写
         String[] words = str.split("[^(a-zA-Z)]+");  // 非单词的字符来分割,得到所有单词
         Map<String ,Integer> map = new HashMap<String, Integer>() ;
         for(String word :words){
             if(map.get(word)==null){  // 若不存在说明是第一次,则加入到map,出现次数为1
                 map.put(word,1);
             }else{
                 map.put(word,map.get(word)+1);  // 若存在,次数累加1
             }
         }
         // 排序
        List<Map.Entry<String ,Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
         Comparator<Map.Entry<String,Integer>> comparator = new Comparator<Map.Entry<String, Integer>>() {
             public int compare(Map.Entry<String, Integer> left, Map.Entry<String, Integer> right) {
              int i=left.getValue()-right.getValue();
                 if(i==0) {
                  return (right.getKey().compareTo(left.getKey()));
                 }
                 return (left.getValue().compareTo(right.getValue()));
             }
         };
      
         // 集合默认升序
         Collections.sort(list,comparator);
         int n=list.size();
         System.out.println("请输入你要统计前多少个单词(<"+n+")");
         Scanner scanner=new Scanner(System.in);
         n=scanner.nextInt();
         for(int i=0;i<n;i++){// 由高到低输出
             System.out.println(list.get(list.size()-i-1).getKey() +":"+list.get(list.size()-i-1).getValue());
         }
 }
 
 //递归遍历目录下所有文件
 public void test(String fileDir) {
  List<File> fileList = new ArrayList<File>();
  File file = new File(fileDir);
  File[] files = file.listFiles();// 获取目录下的所有文件或文件夹
  if (files == null) {// 如果目录为空,直接退出
   return;
  }
  // 遍历,目录下的所有文件
  for (File f : files) {
   if (f.isFile()) {
               
    fileList.add(f);
   } else if (f.isDirectory()) {
    System.out.println(f.getAbsolutePath());
    test(f.getAbsolutePath());
   }
  }
  Word word=new Word();
  String str="";
  for (File f1 : fileList) {
   str=fileDir+"\\\\"+f1.getName();
   System.out.println(str);
   //对文件进行统计
   word.check1(str);
   word.check2(str);
  }
  
 }
  public static void main(String args[]) {
     Word word=new Word();
     Scanner scanner=new Scanner(System.in);
     System.out.println("请输入要查询的文件目录如(D:\\\\大二下\\\\软件工程\\\\wendang\\\\word.txt)");
     String str=scanner.next();
     System.out.println(str);
     word.test(str);
    
      
    }
 }
 

 

posted on 2019-04-28 21:31  打篮球的Curry  阅读(156)  评论(0编辑  收藏  举报