1 //./ForwardDocIdx Tianwang.raw.2559638448.seg > moon.fdx
2 #include <iostream>
3 #include <fstream>
4
5 using namespace std;
6
7 const string SEPARATOR("/ "); //词间分割符
8 int main(int argc, char* argv[])
9 {
10 ifstream ifsImgInfo(argv[1]);//打开输入流
11 if (!ifsImgInfo) {
12 cerr << "Cannot open " << argv[1] << " for input\n";
13 return -1;
14 }
15
16 string strLine,strDocNum;
17 int cnt = 0;
18 while (getline(ifsImgInfo, strLine)) {
19 string::size_type idx;
20
21 cnt++;
22 if (cnt%2 == 1){//奇数行是文档编号
23 strDocNum = strLine.substr(0,strLine.size());
24 continue;
25 }
26
27
28
29
30 if (strLine[0]=='\0' || strLine[0]=='#' || strLine[0]=='\n'){
31 continue;
32 }
33
34
35 //偶数行是文档分词的结果
36 while ( (idx = strLine.find(SEPARATOR)) != string::npos ) {
37 string tmp1 = strLine.substr(0,idx);
38 cout << tmp1 << "\t" << strDocNum << endl;
39 strLine = strLine.substr(idx + SEPARATOR.size());
40 }
41
42 //if (cnt==100) break;
43 }
44
45 return 0;
46 }