01_统计单词个数

目标是记录文件b.txt中的单词数,将该任务当作状态机

1.字符无非就两种状态,要么是分隔符,要么是单词中的字母

如果当前是分割符,记录状态为 OUT
否则就是单词,记录状态为 IN,只需要记录从 OUT->IN 的次数,因为单词的个数其实就是每个单词的首字符的出现个数

2.fopen:

FILE *fopen(const char *pathname, const char *mode);
Upon successful completion fopen(), fdopen(), and freopen() return a FILE pointer.  
Otherwise, NULL is returned and errno is set to indicate the error.

3.fgetc:

int fgetc(FILE *stream);
return the character read as an unsigned char cast to an int or EOF on end of file or error.

Code

#include <stdio.h>
#define IN      1
#define OUT     0

// int bool_split(char c) {
//     if(c == ' ' || c == ',' || c == '.' || c == '!' || c == ';' ||
//             c == '\'' || c == '\"' || c == '-' ||
//             c == '\n' || c == '\t' || c == '+' ||
//             c == '{' || c == '}' || c == '(' || 
//             c == ')' || c == '[' || c == ']')
//        return 1;
//     else 
//         return 0;
// }

int bool_split(char c) {
    if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
        return 0;
    else 
        return 1;
}
int count_word(char *filename) {
    int res = 0;
    int status = OUT; //初始为OUT态
    FILE *fp = fopen(filename, "r");
    if(fp == NULL) return -1; //读入失败
    char c;
    while((c = getc(fp)) != EOF) {
        if(bool_split(c)) {
            status = OUT;
        }
        else {
            if(status == OUT) res ++;
            status = IN;
        }   
    }
    return res;
}
int main(int argc, char *argv[]) {
    //文件名作为main函数的传参
    if(argc < 2) return -1; //未传入文件名
    int res = count_word(argv[1]);
    printf("word: %d\n", res);
    return 0;
}

作业:实现统计每个单词个数

考虑使用map来作为计数器

/*
作业:实现统计单词个数
b.txt含有中文引号

*/
#include <stdio.h>
#include <iostream>
#include <string>
#include <map>
#define IN      1
#define OUT     0

using namespace std;

int bool_split(char c) {
    if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
        return 0;
    else 
        return 1;
}
int count_word(char *filename) {
    int res = 0;
    int status = OUT; //初始为OUT态
    FILE *fp = fopen(filename, "r");
    if(fp == NULL) return -1; //读入失败
    char c;
    map<string, int> mp;
    string str = "";
    while((c = getc(fp)) != EOF) {
        if(bool_split(c)) {
            if(status == IN) {
                if(str.size() > 0) {
                    mp[str] ++;
                    str = "";
                }
            }
            status = OUT;
        }
        else {
            if(status == OUT) res ++;
            str.push_back(c);
            status = IN;
        }   
    }
    if(str.size() > 0) mp[str] ++;

    printf("各单词出现的次数\n");
    for (map<string,int>::iterator it=mp.begin(); it!=mp.end(); it ++) {
        std::cout<<it->first<<' '<<it->second<<'\n';
    }
    return res;
}
int main(int argc, char *argv[]) {
    //文件名作为main函数的传参
    if(argc < 2) return -1; //未传入文件名
    int res = count_word(argv[1]);
    printf("单词总个数: %d\n", res);
    return 0;
}
posted @ 2025-10-10 11:14  Xiaomostream  阅读(15)  评论(0)    收藏  举报