统计字符串中单词个数的算法优化


要求：输入一个字符串，统计每个单词的个数。单词间用空格隔开，可多个空格，写出自己认为高效的算法。

例如：输入：I love love China
输出为：
I： 1
love： 2
China： 1

首先想到的还是模拟的方法，就是用struct把出现过的单词缓存起来，然后再输入文本中遍历到新单词的时候，遍历一次struct，看这个单词是不是已经存，做相关处理。
如果输入文本中有n个字母，不重复的字母为m个，则算法复杂度为O(nm^2) 最好情况是m =1 ，最差情况是m=n 其实现代码如下：

 1 
 2 #include <stdio.h>
 3 #include <string.h>
 4  struct struct_words{
 5         char word[20];
 6         int count;
 7 };
 8  int main(){
 9         char string[100];
10         char c;
11         struct struct_words words[20];
12         int i = 0, k = 0 , ws =0;
13 
14         for(; i < 20; i++){
15                 words[i].word[0] = '\0';
16                 words[i].count = 0;
17         }
18         puts("please input words.");
19         gets(string);
20         puts("=============开始取词================");
21 
22         i = 0;
23         do{
24                 c = string[i];
25                 if(c != ' ' && c !='\0'){
26                         words[k].word[ws] = c;
27                         words[k].count = 1;
28                         ws ++;
29                 }else{
30                         words[k].word[ws] = '\0';
31                         ws = 0;
32                         k ++;
33                 }
34                 i ++;
35         }while(c!='\0');lda
36 
37 
38         puts("=========== 合并相同的单词 ==============");
39         for(i = 0; words[i].word[0] != '\0' ; i++){
40                 puts(words[i].word);
41                 if( words[i].count >= 1)
42                 for(k = i; words[k].word[0] != '\0'; k++){
43                         if(strcmp(words[i].word, words[k].word) == 0
44                            && words[k].count == 1){
45                                 words[k].count --;
46                                 words[i].count ++;
47                         }
48                 }
49         }
50 
51         puts("=============== End ==============");
52         for(i = 0;words[i].word[0] != '\0' ;i++){
53                 if(words[i].count != 0 )
54                         printf("%s:\t\t%d\n",words[i].word, words[i].count);
55         }
56         return(0);
57 }

然后呢，做一下优化，恩路是遍历用户的输入文本是必须的，但是，单词的缓存和出现次数的统计是可以使用hash算法来优化的，借用hash算法的特性，使复杂度立刻就降低到了 O(n),实现代码如下：


#include <stdio.h>
#include <string.h>
#define N 100

struct struct_words{
    char word[100];
    int count;
};

int hash(char* key)
{
     unsigned long h=0;
      while(*key)
           {   
                 h=(h<<4)+*key++;
                   unsigned long g=h & 0xF0000000L;
                     if(g)
                            h^=g>>24;
                       h&=~g;
                        }   
       return h&N;
}
int main(){
    char string[1000];
    char current_word[100];
    char c;
    struct struct_words words[200]; 
    int i = 0, k = 0 , ws =0 , key;
    int keys[100];

    for(; i < 200; i++){
        words[i].word[0] = '\0';
        words[i].count = 0;
    }   
    puts("=============输入一些单词，用空格隔开================");
    gets(string);

    i = 0;
    do{ 
        c = string[i];
        //如果第一个单词前有空格，跳过去
        if( ws == 0  && c == ' ') {i++ ; continue;}
        if(c != ' ' && c !='\0'){
            current_word[ws] = c;
            ws ++; 
        }else{
            current_word[ws] = '\0';
            key = hash(current_word);
            if(words[key].count == 0){ 
                strcpy(words[key].word, current_word);
                keys[k] = key;
                k++;
            }   
            words[key].count ++; 
            ws = 0;
        }
    i ++;
    }while(c != '\0');

    printf("%d" ,k);
    puts("===============打印結果 ==============");
    for(i = 0 ; i < k ;i++){
            printf("%s:\t\t%d\n",words[keys[i]].word, words[keys[i]].count);
    }
    puts("=============== End ==============");
    return 0;
}

呵呵，弄了近三个小时，发现Linux下gdb不熟太痛苦了，加油！

posted @ 2009-12-05 05:18 玉米疯收阅读(2891) 评论(3) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

玉米疯收

统计字符串中单词个数的算法优化

公告