# 从优化到再优化，最长公共子串

## 暴力解法 – 所得即所求

1 int longestCommonSubstring_n3(const string& str1, const string& str2)
2 {
3     size_t size1 = str1.size();
4     size_t size2 = str2.size();
5     if (size1 == 0 || size2 == 0) return 0;
6
7     // the start position of substring in original string
8     int start1 = -1;
9     int start2 = -1;
10     // the longest length of common substring
11     int longest = 0;
12
13     // record how many comparisons the solution did;
14     // it can be used to know which algorithm is better
15     int comparisons = 0;
16
17     for (int i = 0; i < size1; ++i)
18     {
19         for (int j = 0; j < size2; ++j)
20         {
21             // find longest length of prefix
22             int length = 0;
23             int m = i;
24             int n = j;
25             while(m < size1 && n < size2)
26             {
27                 ++comparisons;
28                 if (str1[m] != str2[n]) break;
29
30                 ++length;
31                 ++m;
32                 ++n;
33             }
34
35             if (longest < length)
36             {
37                 longest = length;
38                 start1 = i;
39                 start2 = j;
40             }
41         }
42     }
43 #ifdef IDER_DEBUG
44     cout<< "(first, second, comparisions) = ("
45         << start1 << ", " << start2 << ", " << comparisons
46         << ")" << endl;
47 #endif
48     return longest;
49 }

## 动态规划法 – 空间换时间

1 int longestCommonSubstring_n2_n2(const string& str1, const string& str2)
2 {
3     size_t size1 = str1.size();
4     size_t size2 = str2.size();
5     if (size1 == 0 || size2 == 0) return 0;
6
7     vector<vector<int> > table(size1, vector<int>(size2, 0));
8     // the start position of substring in original string
9     int start1 = -1;
10     int start2 = -1;
11     // the longest length of common substring
12     int longest = 0;
13
14     // record how many comparisons the solution did;
15     // it can be used to know which algorithm is better
16     int comparisons = 0;
17     for (int j = 0; j < size2; ++j)
18     {
19         ++comparisons;
20         table[0][j] = (str1[0] == str2[j] ? 1 :0);
21     }
22
23     for (int i = 1; i < size1; ++i)
24     {
25         ++comparisons;
26         table[i][0] = (str1[i] == str2[0] ? 1 :0);
27
28         for (int j = 1; j < size2; ++j)
29         {
30             ++comparisons;
31             if (str1[i] == str2[j])
32             {
33                 table[i][j] = table[i-1][j-1]+1;
34             }
35         }
36     }
37
38     for (int i = 0; i < size1; ++i)
39     {
40         for (int j = 0; j < size2; ++j)
41         {
42             if (longest < table[i][j])
43             {
44                 longest = table[i][j];
45                 start1 = i-longest+1;
46                 start2 = j-longest+1;
47             }
48         }
49     }
50 #ifdef IDER_DEBUG
51     cout<< "(first, second, comparisions) = ("
52         << start1 << ", " << start2 << ", " << comparisons
53         << ")" << endl;
54 #endif
55
56     return longest;
57 }

## 动态规划法优化 – 能省一点是一点

int longestCommonSubstring_n2_2n(const string& str1, const string& str2)
{
size_t size1 = str1.size();
size_t size2 = str2.size();
if (size1 == 0 || size2 == 0) return 0;

vector<vector<int> > table(2, vector<int>(size2, 0));

// the start position of substring in original string
int start1 = -1;
int start2 = -1;
// the longest length of common substring
int longest = 0;

// record how many comparisons the solution did;
// it can be used to know which algorithm is better
int comparisons = 0;
for (int j = 0; j < size2; ++j)
{
++comparisons;
if (str1[0] == str2[j])
{
table[0][j] = 1;
if (longest == 0)
{
longest = 1;
start1 = 0;
start2 = j;
}
}
}

for (int i = 1; i < size1; ++i)
{
++comparisons;
// with odd/even to swith working row
int cur = ((i&1) == 1); //index for current working row
int pre = ((i&1) == 0); //index for previous working row
table[cur][0] = 0;
if (str1[i] == str2[0])
{
table[cur][0] = 1;
if (longest == 0)
{
longest = 1;
start1 = i;
start2 = 0;
}
}

for (int j = 1; j < size2; ++j)
{
++comparisons;
if (str1[i] == str2[j])
{
table[cur][j] = table[pre][j-1]+1;
if (longest < table[cur][j])
{
longest = table[cur][j];
start1 = i-longest+1;
start2 = j-longest+1;
}
}
else
{
table[cur][j] = 0;
}
}
}

#ifdef IDER_DEBUG
cout<< "(first, second, comparisions) = ("
<< start1 << ", " << start2 << ", " << comparisons
<< ")" << endl;
#endif

return longest;
}

## 动态规划法再优化 – 能用一点就只用一点

1 int longestCommonSubstring_n2_1(const string& str1, const string& str2)
2 {
3     size_t size1 = str1.size();
4     size_t size2 = str2.size();
5     if (size1 == 0 || size2 == 0) return 0;
6
7     // the start position of substring in original string
8     int start1 = -1;
9     int start2 = -1;
10     // the longest length of common substring
11     int longest = 0;
12
13     // record how many comparisons the solution did;
14     // it can be used to know which algorithm is better
15     int comparisons = 0;
16
17     for (int i = 0; i < size1; ++i)
18     {
19         int m = i;
20         int n = 0;
21         int length = 0;
22         while(m < size1 && n < size2)
23         {
24             ++comparisons;
25             if (str1[m] != str2[n])
26             {
27                 length = 0;
28             }
29             else
30             {
31                 ++length;
32                 if (longest < length)
33                 {
34                     longest = length;
35                     start1 = m-longest+1;
36                     start2 = n-longest+1;
37                 }
38             }
39
40             ++m;
41             ++n;
42         }
43     }
44
45     // shift string2 to find the longest common substring
46     for (int j = 1; j < size2; ++j)
47     {
48         int m = 0;
49         int n = j;
50         int length = 0;
51         while(m < size1 && n < size2)
52         {
53             ++comparisons;
54             if (str1[m] != str2[n])
55             {
56                 length = 0;
57             }
58             else
59             {
60                 ++length;
61                 if (longest < length)
62                 {
63                     longest = length;
64                     start1 = m-longest+1;
65                     start2 = n-longest+1;
66                 }
67             }
68
69             ++m;
70             ++n;
71         }
72     }
73
74 #ifdef IDER_DEBUG
75     cout<< "(first, second, comparisions) = ("
76         << start1 << ", " << start2 << ", " << comparisons
77         << ")" << endl;
78 #endif
79
80     return longest;
81 }

1 int longestCommonSubstring_n2_1(const string& str1, const string& str2)
2 {
3     size_t size1 = str1.size();
4     size_t size2 = str2.size();
5     if (size1 == 0 || size2 == 0) return 0;
6
7     // the start position of substring in original string
8     int start1 = -1;
9     int start2 = -1;
10     // the longest length of common substring
11     int longest = 0;
12
13     // record how many comparisons the solution did;
14     // it can be used to know which algorithm is better
15     int comparisons = 0;
16
17     int indices[] = {0, 0};
18     int sizes[] = {size1, size2};
19
20     // shift strings to find the longest common substring
21     for (int index = 0; index < 2; ++index)
22     {
23         indices[0] = 0;
24         indices[1] = 0;
25
26         // i is reference to the value in array
27         int &i = indices[index];
28         int size = sizes[index];
29
30         // this is tricky to skip comparing strings both start with 0 for second loop
31         i = index;
32         for (; i < size; ++i)
33         {
34             int m = indices[0];
35             int n = indices[1];
36             int length = 0;
37
38             // with following check to reduce some more comparisons
39             if (size1-m <= longest || size2-n <= longest)
40                 break;
41
42             while(m < size1 && n < size2)
43             {
44                 ++comparisons;
45                 if (str1[m] != str2[n])
46                 {
47                     length = 0;
48                 }
49                 else
50                 {
51                     ++length;
52                     if (longest < length)
53                     {
54                         longest = length;
55                         start1 = m-longest+1;
56                         start2 = n-longest+1;
57                     }
58                 }
59
60                 ++m;
61                 ++n;
62             }
63         }
64     }
65
66 #ifdef IDER_DEBUG
67     cout<< "(first, second, comparisions) = ("
68         << start1 << ", " << start2 << ", " << comparisons
69         << ")" << endl;
70 #endif
71
72     return longest;
73 }

## 测试案例及结果

YXXXXXY (7)
YXYXXYYYYXXYYYYXYYXXYYXXYXYYYYYYXYXYYXYXYYYXXXXXX (49)
(first, second, comparisions) = (0, 42, 537)
6
(first, second, comparisions) = (0, 42, 343)
6
(first, second, comparisions) = (0, 42, 343)
6
(first, second, comparisions) = (0, 42, 316)
6

XXYXYYYXXYXYYYYXYXYYYXYYYYYXYX (30)
XYY (3)
(first, second, comparisions) = (3, 0, 127)
3
(first, second, comparisions) = (3, 0, 90)
3
(first, second, comparisions) = (3, 0, 90)
3
(first, second, comparisions) = (3, 0, 12)
3

XXYXXYYYXYXYYXXYYYYYXXYXXXYXXYXYXXXXYXXYYYXYYXYXYXXXYYXXXYYXYYXYXYXYXXXXXXXXXYXXXX (82)
YYYYYXYYYXYYXXXYYYXXYYXXYXXXYYYYYYYYXXYXYYYYXYXYYXYX (52)
(first, second, comparisions) = (5, 41, 7911)
9
(first, second, comparisions) = (5, 41, 4264)
9
(first, second, comparisions) = (5, 41, 4264)
9
(first, second, comparisions) = (18, 20, 4183)
9

X (1)
XXYYYYYYXYYXYXXXYYXXXYYYYYYXYYYXYYXXYYYYXXXYXXXXXXXYXYXYXYYYYYYYYXYXYXXX (72)
(first, second, comparisions) = (0, 0, 72)
1
(first, second, comparisions) = (0, 0, 72)
1
(first, second, comparisions) = (0, 0, 72)
1
(first, second, comparisions) = (0, 0, 1)
1

(0)
XYXXYYYXXXYYXXYYYYXXYYYXYYYXXXXXYYXXYXYXXXYY (44)
0
0
0
0

References:

posted @ 2013-07-18 10:27  Ider  阅读(72263)  评论(19编辑  收藏  举报