字符串相似度的计算（百度笔试题回忆）

粗略解法

//字符串相似度的计算,模板实现，可用于其他容器
#include <string>
#include <iostream>
using namespace std;

template<typename Iterator>
int caculateDistance(Iterator pAbegin, Iterator pAend, Iterator pBbegin, Iterator pBend)
{
if (pAbegin>pAend)
{
if (pBbegin>pBend)
return 0;
else
return pBend-pBbegin+1;
}
if (pBbegin>pBend)
{
if (pAbegin>pAend)
return 0;
else
return pAend-pAbegin+1;
}
if (*pAbegin == *pBbegin)
{
return caculateDistance(pAbegin+1,pAend,pBbegin+1,pBend);
}
else
{
int t1 = caculateDistance(pAbegin+1,pAend,pBbegin+2,pBend);
int t2 = caculateDistance(pAbegin+2,pAend,pBbegin+1,pBend);
int t3 = caculateDistance(pAbegin+2,pAend,pBbegin+2,pBend);
int minValue = t1>t2?t2:t1;
minValue = minValue>t3?t3:minValue;
return minValue+1;
}
}

int main()
{
string A = "abcdefghijklmn";
string B = "cdefghijklmn";
cout << caculateDistance(A.begin(),A.end(),B.begin(),B.end()) << endl;
return 0;
}

解法改进

历史记录结构体

//存储历史记录的结构体
template<typename Iterator>
struct DataForHistory
{
DataForHistory(Iterator iAbegin, Iterator iAend, Iterator iBbegin, Iterator iBend)
:pAbegin(iAbegin), pAend(iAend), pBbegin(iBbegin), pBend(iBend), result(0)
{
}
bool operator==(const DataForHistory& rh)
{
if (pAbegin == rh.pAbegin
&& pAend == rh.pAend
&& pBbegin == rh.pBbegin
&& pBend == rh.pBend)
return true;
else
return false;
}
void set(Iterator iAbegin, Iterator iAend, Iterator iBbegin, Iterator iBend)
{
pAbegin = iAbegin;
pAend = iAend;
pBbegin = iBbegin;
pBend = iBend;
}
Iterator pAbegin;
Iterator pAend;
Iterator pBbegin;
Iterator pBend;
int result;//存储计算的结果
};

自定义查找历史记录函数

//查找历史记录中是否存在data
template<typename Iterator>
int find(vector<DataForHistory<Iterator> >& history, DataForHistory<Iterator>& data)
{
int k=-1;
for (int i=0; i<history.size(); i++)
{
if (data == history[i])
{
k = i;
break;
}
}
return k;
}

递归决策函数

template<typename Iterator>
int findOrCaculate(vector<DataForHistory<Iterator> >& history, Iterator pAbegin, Iterator pAend, Iterator pBbegin, Iterator pBend)
{
DataForHistory<Iterator> data(pAbegin,pAend,pBbegin,pBend);
int i = find(history,data);
if (i != -1)
//    if (false)    //不查找历史记录，对每一次需求都重新递归计算
{
return history[i].result;
}
else
{
data.result = caculateDistance(pAbegin,pAend,pBbegin,pBend);
history.push_back(data);
//cout << data.pAbegin << ", " << data.pAend << ", "
//     << data.pBbegin << ", " << data.pBend << ". "
//cout << data.result << endl;
return data.result;
}
}

PS

template<typename Iterator>
int findOrCaculate(vector<DataForHistory<Iterator> >& history, Iterator pAbegin, Iterator pAend, Iterator pBbegin, Iterator pBend)
{
DataForHistory<Iterator> data(pAbegin,pAend,pBbegin,pBend);
vector<DataForHistory<Iterator> >::iterator it = find(history.begin(),history.end(),data);
if (it)
{
return it->result;
}
else
{
data.result = caculateDistance(pAbegin,pAend,pBbegin,pBend);
history.push_back(data);
return data.result;
}
}

like.cpp:69:2: error: need 'typename' before 'std::vector<DataForHistory<Iterator> >::iterator' beca
use 'std::vector<DataForHistory<Iterator> >' is a dependent scope

like.cpp:70:2: error: could not convert 'it' from 'std::vector<DataForHistory<__gnu_cxx::__normal_it
erator<char*, std::basic_string<char> > >, std::allocator<DataForHistory<__gnu_cxx::__normal_iterato
r<char*, std::basic_string<char> > > > >::iterator {aka __gnu_cxx::__normal_iterator<DataForHistory<
__gnu_cxx::__normal_iterator<char*, std::basic_string<char> > >*, std::vector<DataForHistory<__gnu_c
xx::__normal_iterator<char*, std::basic_string<char> > >, std::allocator<DataForHistory<__gnu_cxx::_
_normal_iterator<char*, std::basic_string<char> > > > > >}' to 'bool'

if (it != history.end())

//决策是否递归计算，如果data已经存在就不再递归计算而直接返回历史记录中保存的数据
//如果历史记录中找不到，则递归计算出数据，然后push到历史记录中
template<typename Iterator>
int findOrCaculate(vector<DataForHistory<Iterator> >& history, Iterator pAbegin, Iterator pAend, Iterator pBbegin, Iterator pBend)
{
DataForHistory<Iterator> data(pAbegin,pAend,pBbegin,pBend);
typename vector<DataForHistory<Iterator> >::iterator it = find(history.begin(),history.end(),data);
if (it != history.end())
{
return it->result;
}
else
{
data.result = caculateDistance(pAbegin,pAend,pBbegin,pBend);
history.push_back(data);
return data.result;
}
}

递归函数

template<typename Iterator>
int caculateDistance(Iterator pAbegin, Iterator pAend, Iterator pBbegin, Iterator pBend)
{
static vector<DataForHistory<Iterator> > history;
cout << "call caculateDistance() times:"<< history.size() << endl;
if (pAbegin>pAend)
{
if (pBbegin>pBend)
return 0;
else
return pBend-pBbegin+1;
}
if (pBbegin>pBend)
{
if (pAbegin>pAend)
return 0;
else
return pAend-pAbegin+1;
}
if (*pAbegin == *pBbegin)
{
findOrCaculate(history,pAbegin+1,pAend,pBbegin+1,pBend);
}
else
{
int t1,t2,t3;
t1 = findOrCaculate(history,pAbegin+1,pAend,pBbegin+2,pBend);
t2 = findOrCaculate(history,pAbegin+2,pAend,pBbegin+1,pBend);
t3 = findOrCaculate(history,pAbegin+2,pAend,pBbegin+2,pBend);
int minValue = t1>t2?t2:t1;
minValue = minValue>t3?t3:minValue;
return minValue+1;
}
}

更好的解法

C++代码实现：

//Levenshtein算法计算两字符串的编辑距离

#include <algorithm>
#include <string>
#include <iostream>
#include <fstream>
#include <vector>
using namespace std;

int Levenshtein(string& s, string& t)
{
//第一步
int n = s.size();
int m = t.size();
if (0 == n) return m;
else if (0 == m) return n;

vector<int> v0(m+1);
vector<int> v1(m+1);

//第二步
for (int i=0; i<=m; i++)
v0[i] = i;

//第三四步
int cost=0;//编辑代价
for (int i=1; i<=n; i++)
{
v1[0] = i;
for (int j=1; j<=m; j++)
{
//第五步
if (s[i-1] == t[j-1])
{
cost=0;
}
else
{
cost=1;
}

//第六步
int min = v0[j] + 1;
int b = v1[j-1] + 1;
int c = v0[j-1] + cost;
min = min>b?b:min;
min = min>c?c:min;
v1[j] = min;
}
copy(v1.begin(),v1.end(),v0.begin());
}
//第七步
return v0[m];
}

int main()
{
string s;
string t;
string str;
fstream f("t.c");
getline(f,s,'\1');
f.close();
f.open("tt.c");
getline(f,t,'\1');
f.close();
cout << s << endl;
cout << t << endl;
cout << Levenshtein(t,s) << endl;
}
posted @ 2012-10-16 15:31  涵曦  阅读(4198)  评论(3编辑  收藏  举报