CrtInvertedIdx

 1 // ./CrtInvertedIdx moon.fidx.sort > sun.iidx
 2 #include <iostream>
 3 #include <fstream>
 4 
 5 using namespace std;
 6 
 7 int main(int argc, char* argv[])
 8 {
 9     ifstream ifsImgInfo(argv[1]);//打开输入流
10     if (!ifsImgInfo) {
11         cerr << "Cannot open " << argv[1] << " for input\n";
12         return -1;
13     }
14 
15     string strLine,strDocNum,tmp1="";
16     int cnt = 0;
17     while (getline(ifsImgInfo, strLine)) {
18         string::size_type idx;
19         string tmp;
20 
21 
22         idx = strLine.find("\t");
23         tmp = strLine.substr(0,idx);
24         //得到索引词
25         if (tmp.size()<2 || tmp.size() > 8) continue;//不可能构成中文或者大于最大匹配单元
26 
27         if (tmp1.empty()) tmp1=tmp;
28 
29         if (tmp == tmp1) {//如果是相同的索引词,则将文档编号追加到strDocNum中
30             strDocNum = strDocNum + " " + strLine.substr(idx+1);//索引词相同,则加起来
31         }
32         else {//索引词不同
33             if ( strDocNum.empty() )
34                 strDocNum = strDocNum + " " + strLine.substr(idx+1);
35 
36             cout << tmp1 << "\t" << strDocNum << endl;//输出
37             tmp1 = tmp;
38             strDocNum.clear();
39             strDocNum = strDocNum + " " + strLine.substr(idx+1);
40         }
41 
42         cnt++;
43         //if (cnt==100) break;
44     }
45     cout << tmp1 << "\t" << strDocNum << endl;
46 
47     return 0;
48 }

posted on 2012-07-15 15:08  kakamilan  阅读(173)  评论(0编辑  收藏  举报

导航