Crtforwardidx

 1 //./ForwardDocIdx Tianwang.raw.2559638448.seg > moon.fdx
 2 #include <iostream>
 3 #include <fstream>
 4 
 5 using namespace std;
 6 
 7 const string SEPARATOR("/  ");         //词间分割符
 8 int main(int argc, char* argv[])
 9 {
10     ifstream ifsImgInfo(argv[1]);//打开输入流
11     if (!ifsImgInfo) {
12         cerr << "Cannot open " << argv[1] << " for input\n";
13         return -1;
14     }
15 
16     string strLine,strDocNum;
17     int cnt = 0;
18     while (getline(ifsImgInfo, strLine)) {
19         string::size_type idx;
20 
21         cnt++;
22         if (cnt%2 == 1){//奇数行是文档编号
23             strDocNum = strLine.substr(0,strLine.size());
24             continue;
25         }
26 
27 
28 
29 
30         if (strLine[0]=='\0' || strLine[0]=='#' || strLine[0]=='\n'){
31             continue;
32         }
33 
34 
35         //偶数行是文档分词的结果
36         while ( (idx = strLine.find(SEPARATOR)) != string::npos ) {
37             string tmp1 = strLine.substr(0,idx);
38             cout << tmp1 << "\t" << strDocNum << endl;
39             strLine = strLine.substr(idx + SEPARATOR.size());
40         }
41 
42         //if (cnt==100) break;
43     }
44 
45     return 0;
46 }

posted on 2012-07-15 14:50  kakamilan  阅读(139)  评论(0编辑  收藏  举报

导航