Dottext关键字控制
最近使用dottext的关键字功能,设置了中文的关键字,发现不起作用,但是前后加空格就起作用了,这个对于英语国家来说是为了防止单词间的关键字重复问题,比如如果设置了eat关键字,不加单词位置判断的话,就会把heat中的eat也关键字转换了,这显然是个错误,但是对于我们中文用户,是没有把词语间加空格的,所以对于中文blog这个设定就变得累赘了,我分析了.text关于设置关键字这段代码。
1
private static string Scan(string source, string oldValue, string newValue, bool isFormat, bool onlyFirstMatch, bool CaseSensitive)2

{ 3
const char tagOpen = '<';4
const char tagClose = '>';5
const string anchorOpen = "<a ";6
const string anchorClose = "</a";7

8
source += " ";9

10
bool lastIterMatched = false;11

12
ScanState state = ScanState.Replace;13
StringBuilder outputBuffer = new StringBuilder(source.Length);14

15
CharQueue tagstack = 16
new CharQueue(anchorOpen.Length >= anchorClose.Length ? anchorOpen.Length : anchorClose.Length);17
18
for (int i = 0; i < source.Length; i++)19

{20
char nextChar = source[i];21
tagstack.Enqueue(nextChar);22

23
switch (state)24

{25
case ScanState.Replace:26
if (anchorOpen == tagstack.ToString(anchorOpen.Length))27

{28
state = ScanState.InAnchor;29
break;30
}31
else32

{ 33
if (tagOpen == nextChar)34

{35
state = ScanState.InTag;36
break;37
}38
else39

{ 40
string matchTarget;41
if (source.Length - (i + tagstack.Length + oldValue.Length) > 0)42

{43
// peek a head the next target length chunk + 1 boundary char44
matchTarget = source.Substring(i + tagstack.Length, oldValue.Length);45
//Do we want a case insesitive comparison?46
if(string.Compare(matchTarget,oldValue,!CaseSensitive) == 0)47
//if (matchTarget == oldValue)48

{49
int index= tagstack.Length - i;50
if(index != 0) //Skip if we are at the start of the block51

{52
char prevBeforeMatch = source[(i + tagstack.Length)-1];53

if(prevBeforeMatch != '>' && prevBeforeMatch != '"' /**//*&& !Char.IsWhiteSpace(prevBeforeMatch)*/)54

{55
break;56
}57
}58
59
// check for word boundary60
char nextAfterMatch = source[i + tagstack.Length + oldValue.Length];61

/**//*if (!CharIsWordBoundary(nextAfterMatch))62
break;63
*/64

65
// format old with specifier else it's a straight replace66
if (isFormat)67
outputBuffer.AppendFormat(newValue, oldValue);68
else69
outputBuffer.Append(newValue);70

71
// if we're onlyFirstMatch, tack on remainder of source and return72
if (onlyFirstMatch) 73

{74
outputBuffer.AppendFormat(source.Substring(i + oldValue.Length, 75
source.Length - (i + oldValue.Length + 1)));76
return outputBuffer.ToString();77
}78
else // pop index ahead to end of match and continue79
i += oldValue.Length - 1;80

81
lastIterMatched = true;82
break;83
}84
}85
}86
}87

88
break;89

90
case ScanState.InAnchor:91
if (anchorClose == tagstack.ToString(anchorClose.Length))92
state = ScanState.Replace;93
break;94

95
case ScanState.InTag:96
if (anchorOpen == tagstack.ToString(anchorOpen.Length))97
state = ScanState.InAnchor;98
else if (tagClose == nextChar)99
state = ScanState.Replace;100
break;101

102
default:103
break;104
}105

106
if (!lastIterMatched)107

{ 108
outputBuffer.Append(nextChar); 109
}110
else111
lastIterMatched = false;112
}113
114
return outputBuffer.ToString().Trim();115
}这个是在Dottext.Framework.Util下的scan函数,刚才说的就是在第53行和66行的代码设定的,Char.IsWhiteSpace(prevBeforeMatch)这个是判断match关键字前面的空格,CharIsWordBoundary(nextAfterMatch)是判断match关键字后面的空格,当我把这2个条件break的代码注释掉以后,关键字的功能就能正常的在中文文章上起效了,为了避免英文的问题,我考虑可以在后台增加条件,用来判断用户设定的关键字是否要求判断英文的单词位置隔离,这样就可以更好的解决中文与英文的关键字问题了。
浙公网安备 33010602011771号