C#中查询字符串中是否包含指定字符/字符串,使用IndexOf还是Contains?
2010-03-28 18:27 uonun 阅读(6228) 评论(4) 收藏 举报C#中查询字符串中是否包含指定字符/串,使用IndexOf还是Contains?这是一个很常见的命题,以前也没有注意,今天QQ群里有人提起,于是就做了下试验,代码如下:
using System; using System.Diagnostics; namespace ConsoleApplication1 { class Program { private const int N = 10000000; private static Stopwatch watch = new Stopwatch(); static void Main(string[] args) { string source = "abcdefghijklmnopqrstuvwxyz0123456789C#" + "中查询字符串中是否包含指定字符/串,使用IndexOf还是Contains?.uonun"; string target = "a"; Console.WriteLine("目标为第一个字符时:"); TestContains(source, target); TestIndexOf(source, target); Console.WriteLine(); Console.WriteLine("目标为中部某个字符时:"); target = "中"; TestContains(source, target); TestIndexOf(source, target); Console.WriteLine(); Console.WriteLine("目标为最后一个字符时:"); target = "u"; TestContains(source, target); TestIndexOf(source, target); Console.WriteLine("执行完毕,按任意键退出..."); Console.ReadKey(); } private static void TestIndexOf(string source, string target) { watch.Reset(); watch.Start(); for (int i = 0;i < N;i++) { source.IndexOf(target); } watch.Stop(); Console.WriteLine("IndexOf: " + watch.ElapsedMilliseconds.ToString() + "ms"); return; } private static void TestContains(string source, string target) { watch.Reset(); watch.Start(); for (int i = 0;i < N;i++) { source.Contains(target); } watch.Stop(); Console.WriteLine("Contains: " + watch.ElapsedMilliseconds.ToString() + "ms"); return; } } }
得到的结果是:
目标为第一个字符时:
Contains: 973ms
IndexOf: 1343ms
目标为中部某个字符时:
Contains: 1813ms
IndexOf: 8602ms
目标为最后一个字符时:
Contains: 1433ms
IndexOf: 5094ms
执行完毕,按任意键退出...
可以看出,使用Contains方法的效率比IndexOf的效率高很多。
疑问:Contains 调用了 IndexOf,反倒比 IndexOf 还快?
正如评论中@Vampire_D 所说,Contains 调用了 IndexOf,如果 Contains 比 IndexOf 还快,简直不科学啊!不过仔细一看一对比你就会知道,正是 IndexOf 的第二个参数在捣鬼。
.NET 中 Contains 和 IndexOf 分别是这样实现的:
// Type: System.String
// Assembly: mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
// MVID: 00788FED-9041-4DB2-AB5B-0952A06515B1
// Assembly location: C:\Windows\Microsoft.NET\Framework\v2.0.50727\mscorlib.dll
// .NET 中字符串的 Contains 和 IndexOf 方法的实现:
public bool Contains(string value)
{
return this.IndexOf(value, StringComparison.Ordinal) >= 0;
}
public int IndexOf(string value)
{
return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value);
}
public int IndexOf(string value, int startIndex)
{
return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex);
}
public int IndexOf(string value, int startIndex, int count)
{
if (startIndex < 0 || startIndex > this.Length)
throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_Index"));
if (count < 0 || count > this.Length - startIndex)
throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_Count"));
else
return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.None);
}
public int IndexOf(string value, StringComparison comparisonType)
{
return this.IndexOf(value, 0, this.Length, comparisonType);
}
public int IndexOf(string value, int startIndex, StringComparison comparisonType)
{
return this.IndexOf(value, startIndex, this.Length - startIndex, comparisonType);
}
public int IndexOf(string value, int startIndex, int count, StringComparison comparisonType)
{
if (value == null)
throw new ArgumentNullException("value");
if (startIndex < 0 || startIndex > this.Length)
throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_Index"));
if (count < 0 || startIndex > this.Length - count)
throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_Count"));
switch (comparisonType)
{
case StringComparison.CurrentCulture:
return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.None);
case StringComparison.CurrentCultureIgnoreCase:
return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.IgnoreCase);
case StringComparison.InvariantCulture:
return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.None);
case StringComparison.InvariantCultureIgnoreCase:
return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.IgnoreCase);
case StringComparison.Ordinal:
return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.Ordinal);
case StringComparison.OrdinalIgnoreCase:
return TextInfo.IndexOfStringOrdinalIgnoreCase(this, value, startIndex, count);
default:
throw new ArgumentException(Environment.GetResourceString("NotSupported_StringComparison"), "comparisonType");
}
}
可以看出,Contains 默认使用 CultureInfo.InvariantCulture 来比较字符/字符串,而 IndexOf 在不带第二个参数时,默认使用 CultureInfo.CurrentCulture 来比较字符/字符串,这直接导致了比较过程的复杂性,从而影响了两个方法的执行效率(以及结果——我没有去验证什么情况下结果会不一致)。
进一步地,我们来看看使用这两个不同的 CultureInfo 到底有什么不同?
CultureInfo.InvariantCulture:它是 CultureInfo 中的一个静态变量,直接在 CultureInfo 的静态构造里实例化了,并且只需要实例化一次。
// Type: System.Globalization.CultureInfo
// Assembly: mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
// MVID: 00788FED-9041-4DB2-AB5B-0952A06515B1
// Assembly location: C:\Windows\Microsoft.NET\Framework\v2.0.50727\mscorlib.dll
//------------CultureInfo.InvariantCulture----------
private static CultureInfo m_InvariantCultureInfo;
static CultureInfo()
{
if (CultureInfo.m_InvariantCultureInfo == null)
CultureInfo.m_InvariantCultureInfo = new CultureInfo((int) sbyte.MaxValue, false)
{
m_isReadOnly = true
};
CultureInfo.m_userDefaultCulture = CultureInfo.m_userDefaultUICulture = CultureInfo.m_InvariantCultureInfo;
CultureInfo.m_userDefaultCulture = CultureInfo.InitUserDefaultCulture();
CultureInfo.m_userDefaultUICulture = CultureInfo.InitUserDefaultUICulture();
}
public static CultureInfo InvariantCulture
{
get
{
return CultureInfo.m_InvariantCultureInfo;
}
}
//-----------CultureInfo.CurrentCulture--------
public static CultureInfo CurrentCulture
{
get
{
return Thread.CurrentThread.CurrentCulture;
}
}
// other code...
再看 CultureInfo.CurrentCulture:它是当前线程的 CultureInfo,要获取这个属性的值会有一系列的操作,请见下面的代码,Thread.CurrentThread.CurrentCulture:
// Type: System.Threading.Thread
// Assembly: mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
// MVID: 00788FED-9041-4DB2-AB5B-0952A06515B1
// Assembly location: C:\Windows\Microsoft.NET\Framework\v2.0.50727\mscorlib.dll
public CultureInfo CurrentCulture
{
get
{
if (this.m_CurrentCulture == null)
return CultureInfo.UserDefaultCulture;
CultureInfo safeCulture = (CultureInfo) null;
if (!Thread.nativeGetSafeCulture(this, Thread.GetDomainID(), false, ref safeCulture) || safeCulture == null)
return CultureInfo.UserDefaultCulture;
else
return safeCulture;
}
[SecurityPermission(SecurityAction.Demand, ControlThread = true)] set
{
if (value == null)
throw new ArgumentNullException("value");
CultureInfo.CheckNeutral(value);
CultureInfo.nativeSetThreadLocale(value.LCID);
value.StartCrossDomainTracking();
this.m_CurrentCulture = value;
}
}
因此在使用两个不同的 CultureInfo 进行字符/字符串比较时,所花的代价就很不同了,因此执行效率上就有了较大差别。
结论:不特殊考虑 CultureInfo 时,Contains(string) 的执行效率比 IndexOf(string) 高
浙公网安备 33010602011771号