句子分析器

初步实现了一个接口:

  1 #include<stdio.h>
  2 #include <stdlib.h>
  3 #include <string.h>
  4 #include "lyPublic/lyCodeConvert.h"
  5 #define X_LONGSEN  500
  6 #define Y_LONGWORD 100
  7 struct node 
  8 {
  9     char MWord[Y_LONGWORD]; //对应的最高权权值
 10     int order; //权值、并作为标记是否有词
 11     struct node *next[16];
 12 };
 13 
 14 typedef struct node node;
 15 char z_Str[Y_LONGWORD];
 16 
 17 void insertTree(char *str, node *T, char *MaxWord, int num_max);
 18 void findStr(char *str, node *T);
 19 int findNum(char *str, node *T);
 20 int SentenceTransform(char *FromWord,char *Tostr);
 21 
 22 /*
 23 函数功能:
 24     将一句话里面的部分词 转化为 权值最高的词
 25 变量说明:
 26     FromWord 原句子
 27     Tostr 转化后的句子
 28 */
 29 
 30 int SentenceTransform(char *FromWord,char *Tostr)
 31 {
 32     //FILE *fp;  
 33     FILE *fq;
 34     char GetSentence[X_LONGSEN] = ""; // 读取权值文档的句子
 35     int lenGetSen, leWord, leSen; 
 36     char GetWord[Y_LONGWORD] = "" ;
 37     char ToWord[Y_LONGWORD] = "";
 38     char strhan[Y_LONGWORD] = "";
 39     char MaxWord[Y_LONGWORD] = ""; //每一句的权值最高词
 40     char hanMax[Y_LONGWORD] = "" ;
 41     node *T;
 42     int i, j, k, len, s, num, max_num, f;
 43     int from, to, at;
 44     int num_max;
 45     int lenTostr = 0 , lenZ_Str;
 46 
 47     T = (node *)malloc(sizeof(node));
 48         //初始化节点
 49     memset (T->MWord, 0 ,sizeof(T->MWord));
 50     T->order = -1;
 51     for(i = 0; i < 16; i++)
 52         T->next[i] = NULL;
 53 
 54 
 55 /*
 56     打开权值文档,文档格式:
 57         平凡28&平淡--62 平庸--5 平凡--82 平常--38]
 58         平常83&平常--38 寻常--31]
 59         贫困24&困顿--0 贫困--42 窘迫--4]
 60         贫困24&贫困--42 贫寒--0 清贫--31 贫穷--7 穷苦--1]
 61     说明:
 62         第一个是一句话权值最高的词,紧跟着的就是权值
 63         & 是分隔符
 64         后面的是 近义词 的词和气权值大小
 65 */
 66 
 67 
 68 
 69 //  读取权值文档,建立字典树
 70     fq = fopen ("1.txt","r++");
 71 //    fp = fopen ("jieguo.txt","w+r");
 72     while (fgets (GetSentence, 500, fq) != NULL) //读取权值文档,建立各个词对应的最高权值
 73     {
 74         lenGetSen = strlen(GetSentence);
 75         leSen = 0;
 76         memset (MaxWord, 0, sizeof(MaxWord));
 77         leWord = 0;
 78         leWord = 0;
 79         while(GetSentence[leSen] != '&' && (GetSentence[leSen] < '0' || GetSentence[leSen] >'9'))
 80             MaxWord[leWord++] = GetSentence[leSen++];
 81         //取最高权词的权值 
 82         num_max = 0; 
 83         while(GetSentence[leSen] >= '0' && GetSentence[leSen] <= '9')
 84             num_max = num_max*10 + GetSentence[leSen++] - '0';
 85         leSen++;
 86         while (GetSentence[leSen] != ']' && leSen < lenGetSen)
 87         {
 88             memset (GetWord, 0, sizeof(GetWord));
 89             memset (ToWord, 0, sizeof(ToWord));
 90             leWord = 0;
 91             while (GetSentence[leSen] != '-') 
 92             {
 93                 GetWord[leWord++] = GetSentence[leSen++];
 94             }
 95             HanziToAnsi (GetWord, strlen(GetWord), ToWord, sizeof(ToWord));
 96             insertTree (ToWord, T, MaxWord, num_max);
 97             while (GetSentence[leSen] == ' ' || ( GetSentence[leSen] >='0' && GetSentence[leSen] <= '9') || GetSentence[leSen] == '-')
 98                 leSen++;
 99         }
100     }
101 
102 
103     //转化句子
104     len = strlen(FromWord);
105     at = 0;
106     for (i = 0; i < len;)
107     {
108         max_num = -1;
109         memset(strhan, 0, sizeof(strhan));
110         for (j = i; j <= len; j+=2)
111         {
112             memset (GetWord, 0, sizeof(GetWord));
113             s = 0;
114             num = -10;
115             //记录汉字
116             for (k = i; k < j; k++)
117                 strhan[s++] = FromWord [k];
118 
119                 //转码
120             HanziToAnsi(strhan, strlen(strhan), GetWord, sizeof(GetWord));
121 
122             if(strlen(strhan) != 0)
123                 num = findNum(GetWord, T);
124             else
125                 continue;
126             if (num > max_num)
127             {
128                 max_num = num;
129                 memset(hanMax, 0, sizeof(hanMax));
130                 strcpy(hanMax, strhan);
131                 from = i;
132                 to =j;
133             }
134         }
135         if(max_num != -1)
136         {
137             while(at < from)
138             {
139                 //fputc(FromWord [at], fp);
140                 Tostr[lenTostr++] = FromWord[at++];
141             }
142             memset (GetWord, 0, sizeof(GetWord));
143             memset(z_Str, 0, sizeof(z_Str));
144             HanziToAnsi(hanMax, strlen(hanMax), GetWord, sizeof(GetWord));
145             findStr(GetWord, T);
146             lenZ_Str = strlen(z_Str);
147             for(f = 0; f < lenZ_Str ;f++)
148                 Tostr[lenTostr++] = z_Str[f];
149             at = to;
150             i = to;
151         }
152         else
153         {
154             for(j = at; j < at+2; j++)
155                 Tostr[lenTostr++] = FromWord[j];
156             //    fputc(FromWord[j], fp);
157             at += 2;
158             i += 2;
159         }
160     }
161     return 0;
162 }
163 
164 void insertTree(char *str, node *T, char *MaxWord, int num_max)
165 {
166     int len, i, j, flag=0, id;
167     node *p, *q;
168     p = T;
169     len = strlen(str);
170     for (i = 0; i < len; i++)
171     {
172         if(str[i]>= 'a' && str[i] <= 'f')//当时abcdef时 转化为数字
173             id = str[i]- 'a' + 10;
174         else
175             id = str[i] - '0';
176         if( p ->next[id] == NULL)//扩展节点
177         {
178             flag = 1;
179             q = (node *)malloc(sizeof(node));
180             memset(q->MWord, 0, sizeof(q->MWord));
181             q->order = -1;
182             for(j = 0;j < 16 ;j++)
183                 q ->next[j] = NULL;
184             p->next[id] = q;
185         }
186         p = p->next[id];
187     }
188     if(flag)
189     {
190         strcpy(p->MWord, MaxWord);
191         p->order = num_max;
192     }
193     else
194     {
195         if( p -> order == -1)
196         {
197             strcpy(p->MWord, MaxWord);
198             p->order = num_max ;
199         }
200     }
201 }
202 
203 void findStr(char *str, node *T)
204 {
205     int len , i, id;
206     node *p;
207     p = T;
208     len = strlen(str);
209     for (i=0; i< len ; ++i)
210     {
211         if(str[i]>= 'a' && str[i] <= 'f')
212             id = str[i]- 'a' + 10;
213         else
214             id = str[i] - '0';
215         if(p->next[id] == NULL)
216             return;
217         p = p->next[id];
218     }
219     strcpy(z_Str, p->MWord);
220 }
221 
222 
223 int findNum(char *str, node *T)
224 {
225     int len, i, id;
226     node *p;
227     p = T;
228     len = strlen(str);
229     for(i = 0; i < len; i++)
230     {
231         if(str[i]>= 'a' && str[i] <= 'f')
232             id = str[i]- 'a' + 10;
233         else
234             id = str[i] - '0';
235         if(p->next[id] == NULL)
236             return  -1;
237         p = p->next[id];
238     }
239     return p->order;
240 }
  1 #include<stdio.h>
  2 #include <stdlib.h>
  3 #include <string.h>
  4 #include "lyPublic/lyCodeConvert.h"
  5 #define X_LONGSEN  500
  6 #define Y_LONGWORD 100
  7 struct node 
  8 {
  9     char MWord[Y_LONGWORD]; //对应的最高权权值
 10     int order; //权值、并作为标记是否有词
 11     struct node *next[16];
 12 };
 13 
 14 typedef struct node node;
 15 char z_Str[Y_LONGWORD];
 16 
 17 void insertTree(char *str, node *T, char *MaxWord, int num_max);
 18 void findStr(char *str, node *T);
 19 int findNum(char *str, node *T);
 20 int SentenceTransform(char *FromWord,char *Tostr);
 21 
 22 /*
 23 函数功能:
 24     将一句话里面的部分词 转化为 权值最高的词
 25 变量说明:
 26     FromWord 原句子
 27     Tostr 转化后的句子
 28 */
 29 
 30 int SentenceTransform(char *FromWord,char *Tostr)
 31 {
 32     //FILE *fp;  
 33     FILE *fq;
 34     char GetSentence[X_LONGSEN] = ""; // 读取权值文档的句子
 35     int lenGetSen, leWord, leSen; 
 36     char GetWord[Y_LONGWORD] = "" ;
 37     char ToWord[Y_LONGWORD] = "";
 38     char strhan[Y_LONGWORD] = "";
 39     char MaxWord[Y_LONGWORD] = ""; //每一句的权值最高词
 40     char hanMax[Y_LONGWORD] = "" ;
 41     node *T;
 42     int i, j, k, len, s, num, max_num, f;
 43     int from, to, at;
 44     int num_max;
 45     int lenTostr = 0 , lenZ_Str;
 46 
 47     T = (node *)malloc(sizeof(node));
 48         //初始化节点
 49     memset (T->MWord, 0 ,sizeof(T->MWord));
 50     T->order = -1;
 51     for(i = 0; i < 16; i++)
 52         T->next[i] = NULL;
 53 
 54 
 55 /*
 56     打开权值文档,文档格式:
 57         平凡28&平淡--62 平庸--5 平凡--82 平常--38]
 58         平常83&平常--38 寻常--31]
 59         贫困24&困顿--0 贫困--42 窘迫--4]
 60         贫困24&贫困--42 贫寒--0 清贫--31 贫穷--7 穷苦--1]
 61     说明:
 62         第一个是一句话权值最高的词,紧跟着的就是权值
 63         & 是分隔符
 64         后面的是 近义词 的词和气权值大小
 65 */
 66 
 67 
 68 
 69 //  读取权值文档,建立字典树
 70     fq = fopen ("1.txt","r++");
 71 //    fp = fopen ("jieguo.txt","w+r");
 72     while (fgets (GetSentence, 500, fq) != NULL) //读取权值文档,建立各个词对应的最高权值
 73     {
 74         lenGetSen = strlen(GetSentence);
 75         leSen = 0;
 76         memset (MaxWord, 0, sizeof(MaxWord));
 77         leWord = 0;
 78         leWord = 0;
 79         while(GetSentence[leSen] != '&' && (GetSentence[leSen] < '0' || GetSentence[leSen] >'9'))
 80             MaxWord[leWord++] = GetSentence[leSen++];
 81         //取最高权词的权值 
 82         num_max = 0; 
 83         while(GetSentence[leSen] >= '0' && GetSentence[leSen] <= '9')
 84             num_max = num_max*10 + GetSentence[leSen++] - '0';
 85         leSen++;
 86         while (GetSentence[leSen] != ']' && leSen < lenGetSen)
 87         {
 88             memset (GetWord, 0, sizeof(GetWord));
 89             memset (ToWord, 0, sizeof(ToWord));
 90             leWord = 0;
 91             while (GetSentence[leSen] != '-') 
 92             {
 93                 GetWord[leWord++] = GetSentence[leSen++];
 94             }
 95             HanziToAnsi (GetWord, strlen(GetWord), ToWord, sizeof(ToWord));
 96             insertTree (ToWord, T, MaxWord, num_max);
 97             while (GetSentence[leSen] == ' ' || ( GetSentence[leSen] >='0' && GetSentence[leSen] <= '9') || GetSentence[leSen] == '-')
 98                 leSen++;
 99         }
100     }
101 
102 
103     //转化句子
104     len = strlen(FromWord);
105     at = 0;
106     for (i = 0; i < len;)
107     {
108         max_num = -1;
109         memset(strhan, 0, sizeof(strhan));
110         for (j = i; j <= len; j+=2)
111         {
112             memset (GetWord, 0, sizeof(GetWord));
113             s = 0;
114             num = -10;
115             //记录汉字
116             for (k = i; k < j; k++)
117                 strhan[s++] = FromWord [k];
118 
119                 //转码
120             HanziToAnsi(strhan, strlen(strhan), GetWord, sizeof(GetWord));
121 
122             if(strlen(strhan) != 0)
123                 num = findNum(GetWord, T);
124             else
125                 continue;
126             if (num > max_num)
127             {
128                 max_num = num;
129                 memset(hanMax, 0, sizeof(hanMax));
130                 strcpy(hanMax, strhan);
131                 from = i;
132                 to =j;
133             }
134         }
135         if(max_num != -1)
136         {
137             while(at < from)
138             {
139                 //fputc(FromWord [at], fp);
140                 Tostr[lenTostr++] = FromWord[at++];
141             }
142             memset (GetWord, 0, sizeof(GetWord));
143             memset(z_Str, 0, sizeof(z_Str));
144             HanziToAnsi(hanMax, strlen(hanMax), GetWord, sizeof(GetWord));
145             findStr(GetWord, T);
146             lenZ_Str = strlen(z_Str);
147             for(f = 0; f < lenZ_Str ;f++)
148                 Tostr[lenTostr++] = z_Str[f];
149             at = to;
150             i = to;
151         }
152         else
153         {
154             for(j = at; j < at+2; j++)
155                 Tostr[lenTostr++] = FromWord[j];
156             //    fputc(FromWord[j], fp);
157             at += 2;
158             i += 2;
159         }
160     }
161     return 0;
162 }
163 
164 void insertTree(char *str, node *T, char *MaxWord, int num_max)
165 {
166     int len, i, j, flag=0, id;
167     node *p, *q;
168     p = T;
169     len = strlen(str);
170     for (i = 0; i < len; i++)
171     {
172         if(str[i]>= 'a' && str[i] <= 'f')//当时abcdef时 转化为数字
173             id = str[i]- 'a' + 10;
174         else
175             id = str[i] - '0';
176         if( p ->next[id] == NULL)//扩展节点
177         {
178             flag = 1;
179             q = (node *)malloc(sizeof(node));
180             memset(q->MWord, 0, sizeof(q->MWord));
181             q->order = -1;
182             for(j = 0;j < 16 ;j++)
183                 q ->next[j] = NULL;
184             p->next[id] = q;
185         }
186         p = p->next[id];
187     }
188     if(flag)
189     {
190         strcpy(p->MWord, MaxWord);
191         p->order = num_max;
192     }
193     else
194     {
195         if( p -> order == -1)
196         {
197             strcpy(p->MWord, MaxWord);
198             p->order = num_max ;
199         }
200     }
201 }
202 
203 void findStr(char *str, node *T)
204 {
205     int len , i, id;
206     node *p;
207     p = T;
208     len = strlen(str);
209     for (i=0; i< len ; ++i)
210     {
211         if(str[i]>= 'a' && str[i] <= 'f')
212             id = str[i]- 'a' + 10;
213         else
214             id = str[i] - '0';
215         if(p->next[id] == NULL)
216             return;
217         p = p->next[id];
218     }
219     strcpy(z_Str, p->MWord);
220 }
221 
222 
223 int findNum(char *str, node *T)
224 {
225     int len, i, id;
226     node *p;
227     p = T;
228     len = strlen(str);
229     for(i = 0; i < len; i++)
230     {
231         if(str[i]>= 'a' && str[i] <= 'f')
232             id = str[i]- 'a' + 10;
233         else
234             id = str[i] - '0';
235         if(p->next[id] == NULL)
236             return  -1;
237         p = p->next[id];
238     }
239     return p->order;
240 }

主函数:

#include<stdio.h>
#include<string.h>
#include "AnalysisWord.h"

int main()
{
    char strGetFromWeb[500] = "";
    char strGetToWeb[500] = "";
    while(gets(strGetFromWeb))
    {
        SentenceTransform(strGetFromWeb,strGetToWeb);
        puts(strGetToWeb);
        memset(strGetToWeb, 0, sizeof(strGetToWeb));
    }
    return 0;
}

 

 

posted @ 2013-07-26 13:24  煮人为乐  阅读(3990)  评论(0编辑  收藏  举报