1 // ./CrtInvertedIdx moon.fidx.sort > sun.iidx
2 #include <iostream>
3 #include <fstream>
4
5 using namespace std;
6
7 int main(int argc, char* argv[])
8 {
9 ifstream ifsImgInfo(argv[1]);//打开输入流
10 if (!ifsImgInfo) {
11 cerr << "Cannot open " << argv[1] << " for input\n";
12 return -1;
13 }
14
15 string strLine,strDocNum,tmp1="";
16 int cnt = 0;
17 while (getline(ifsImgInfo, strLine)) {
18 string::size_type idx;
19 string tmp;
20
21
22 idx = strLine.find("\t");
23 tmp = strLine.substr(0,idx);
24 //得到索引词
25 if (tmp.size()<2 || tmp.size() > 8) continue;//不可能构成中文或者大于最大匹配单元
26
27 if (tmp1.empty()) tmp1=tmp;
28
29 if (tmp == tmp1) {//如果是相同的索引词,则将文档编号追加到strDocNum中
30 strDocNum = strDocNum + " " + strLine.substr(idx+1);//索引词相同,则加起来
31 }
32 else {//索引词不同
33 if ( strDocNum.empty() )
34 strDocNum = strDocNum + " " + strLine.substr(idx+1);
35
36 cout << tmp1 << "\t" << strDocNum << endl;//输出
37 tmp1 = tmp;
38 strDocNum.clear();
39 strDocNum = strDocNum + " " + strLine.substr(idx+1);
40 }
41
42 cnt++;
43 //if (cnt==100) break;
44 }
45 cout << tmp1 << "\t" << strDocNum << endl;
46
47 return 0;
48 }