【C++自学练习(一)】找出几个文本文件中单词、数字、错误最多的文件

题目:在D:\盘有三个文本文件test1.txt,test2.txt,test3.txt, 请分别找出单词、数字、错误最多的文件,打印出文件名。文件中的内容皆以空格分隔,全是字母的认为是单词,单个字母也算单词,全是0-9的认为是数字,全0也算数字,包含字母或者数字以外字符的串认为是错误。

 

实现:

 1 #include "CountFormTextFiles.h"
 2 
 3 int main()
 4 {
 5     CountFormTextFiles test;
 6     const std::string fileWantSearch = "test*.txt";
 7     const std::string path = "D:\\Documents\\Visual Studio 2010\\Projects\\CountFormTextFiles\\testfiles\\";
 8     test.countUnderDir(path,fileWantSearch);
 9     system("pause");
10     return 0;
11 }
 1 #include <iostream>
 2 #include <string>
 3 #include <fstream>
 4 
 5 enum Type
 6 {
 7     WORD,
 8     NUMBER,
 9     ERROR,
10 };
11 
12 class CountFormTextFiles
13 {
14 public:
15     CountFormTextFiles();
16     ~CountFormTextFiles();
17     bool judgeType(const std::string& srcStr, const Type type);
18     int countUnderDir(const std::string& path, const std::string& filesWantSearch);
19     void countInFile(const std::string& fileName, size_t& words, size_t& numbers, size_t& errors);
20 };
#include <io.h>
#include <map>
#include "CountFormTextFiles.h"

CountFormTextFiles::CountFormTextFiles()
{
}

CountFormTextFiles::~CountFormTextFiles()
{
}

/*===========================================================
函数名:judgeType
函数功能:判断srcStr是否为type(WORD,NUMBER,ERROR)
入参: srcStr 传入的字符串, type 要判断的类型
出参:无
返回值:srcStr是type :true    srcStr不是type :false
===========================================================*/
bool CountFormTextFiles::judgeType(const std::string& srcStr, const Type type)
{
    if(srcStr.empty())
    {
        std::cout << "CountFormTextFiles::judgeType srcStr is Null." << std::endl;
        return false;
    }

    bool ret = true;
    switch(type)
    {
    case WORD:
        {
            for(int i=0; i < (int)srcStr.size(); i++)
            {
                if(srcStr[i]<'A' || srcStr[i] > 'z' || (srcStr[i] < 'a' && srcStr[i] > 'Z'))
                {
                    ret = false;
                    break;
                }
            }

            break;
        }

    case NUMBER:
        {
            for(int i=0; i < (int)srcStr.size(); i++)
            {
               if(srcStr[i]<'0' || srcStr[i] > '9')
                {
                    ret = false;
                    break;
                }
            }

            break;
        }
    case ERROR:
        {
            bool allLetter = true;
            bool allNumber = true;
            for(int i=0; i < (int)srcStr.size(); i++)
            {
                bool notLetter = (srcStr[i]<'A' || srcStr[i] > 'z' || (srcStr[i] < 'a' && srcStr[i] > 'Z'));
                bool notNumber = (srcStr[i]<'0' || srcStr[i] > '9');
                if(notLetter)
                {
                    allLetter = false;
                }

                if(notNumber)
                {
                    allNumber = false;
                }
            }

            if( allLetter || allNumber )
            {
                ret = false;
            }

            break;
        }

    }

    return ret;
}

/*===========================================================
函数名:countInFile
函数功能:统计某文件中单词、数字、错误的个数
入参: fileName 文件名(带绝对路径)
出参: words,  numbers,  errors 分别为fileName中单词、数字、错误的个数
返回值:无
===========================================================*/
void CountFormTextFiles::countInFile(const std::string& fileName, size_t& words, size_t& numbers, size_t& errors)
{
    //cout << fileName << endl;
    std::fstream read(fileName);
    std::string srcStr;
    std::map<std::string, size_t> word_count;
    std::map<std::string, size_t> number_count;
    std::map<std::string, size_t> error_count;

    while (read >> srcStr)
    {
        if(judgeType(srcStr,WORD))
        {
            ++word_count[srcStr];
            continue;
        }

        if(judgeType(srcStr,NUMBER))
        {
            ++number_count[srcStr];
            continue;
        }

        ++error_count[srcStr];
        
    }

    read.close();

    std::map<std::string, size_t>::iterator itr = word_count.begin();
    for( ; itr != word_count.end(); itr++)
    {
        words += itr->second;
    }

    itr = number_count.begin();
    for( ; itr != number_count.end(); itr++)
    {
        numbers += itr->second;
    }

    itr = error_count.begin();
    for( ; itr != error_count.end(); itr++)
    {
        errors += itr->second;
    }
    /*
    cout<< "file: " << fileName << " has " << words << " words." << endl;
    cout<< "file: " << fileName << " has " << numbers << " number." << endl;
    cout<< "file: " << fileName << " has " << errors << " error." << endl;
    */
}

/*===========================================================
函数名:countUnderDir
函数功能:统计某路径下符合一定命名规则的文件中单词最多的文件、数字最多的文件、错误最多的文件
入参: path 路径, filesWantSearch 想要查找的文件(支持通配符)
出参: 无
返回值:统计成功返回0,否则返回-1
===========================================================*/
int CountFormTextFiles::countUnderDir(const std::string& path, const std::string& filesWantSearch)
{
    size_t MaxWordsCount = 0;
    size_t MaxNumbersCount = 0;
    size_t MaxErrorsCount = 0;
    std::string MaxWordsFileName;
    std::string MaxNumbesrFileName;
    std::string MaxErrorsFileName;

    std::string filesWithPath = path + filesWantSearch;
    char *to_search = new char[filesWithPath.size() + 1];
    strcpy(to_search,filesWithPath.c_str());
    to_search[filesWithPath.size()] = '\0';

    long handle;                                                //用于查找的句柄
    struct _finddata_t fileinfo;                          //文件信息的结构体
    handle=_findfirst(to_search,&fileinfo);         //第一次查找
    if(-1==handle)
    {
       std::cout << "CountFormTextFiles::countUnderDir search failed." << std::endl;
       return -1;
    }

    
    std::string fileName = path;
    fileName = path + fileinfo.name;
    //cout << fileName << endl;
    countInFile(fileName, MaxWordsCount, MaxNumbersCount, MaxErrorsCount);
    MaxWordsFileName = fileinfo.name;
    MaxNumbesrFileName = fileinfo.name;
    MaxErrorsFileName = fileinfo.name;

    while(!_findnext(handle,&fileinfo))               //循环查找其他符合的文件,知道找不到其他的为止
    {
         //cout << fileName << endl;
         size_t wordsCount = 0;
         size_t numbersCount = 0;
         size_t errorsCount = 0;

         fileName = path + fileinfo.name;
         countInFile(fileName, wordsCount, numbersCount, errorsCount);

         if(wordsCount > MaxWordsCount)
         {
             MaxWordsCount = wordsCount;
             MaxWordsFileName = fileinfo.name;
         }

         if(numbersCount > MaxNumbersCount)
         {
             MaxNumbersCount = numbersCount;
             MaxNumbesrFileName = fileinfo.name;
         }

         if(errorsCount > MaxErrorsCount)
         {
             MaxErrorsCount = errorsCount;
             MaxErrorsFileName = fileinfo.name;
         }
    }
    _findclose(handle);                                    

    std::cout << "the most words file is: " << MaxWordsFileName << ". it has " << MaxWordsCount << " words." << std::endl;
    std::cout << "the most numbers file is: " << MaxNumbesrFileName << ". it has " << MaxNumbersCount << " words." << std::endl;
    std::cout << "the most errors file is: " << MaxErrorsFileName << ". it has " << MaxErrorsCount << " words." << std::endl;

     return 0;
}

 

posted on 2018-04-24 00:41  草丛里的蚂蚱  阅读(623)  评论(0)    收藏  举报

导航