代码改变世界

C#中查询字符串中是否包含指定字符/字符串,使用IndexOf还是Contains?

2010-03-28 18:27  uonun  阅读(6084)  评论(4编辑  收藏  举报

C#中查询字符串中是否包含指定字符/串,使用IndexOf还是Contains?这是一个很常见的命题,以前也没有注意,今天QQ群里有人提起,于是就做了下试验,代码如下:

using System;
using System.Diagnostics;

namespace ConsoleApplication1
{
    class Program
    {
        private const int N = 10000000;
        private static Stopwatch watch = new Stopwatch();
        static void Main(string[] args)
        {

            string source = "abcdefghijklmnopqrstuvwxyz0123456789C#"
                          + "中查询字符串中是否包含指定字符/串,使用IndexOf还是Contains?.uonun";
            string target = "a";
            Console.WriteLine("目标为第一个字符时:");
            TestContains(source, target);
            TestIndexOf(source, target);
            Console.WriteLine();

            Console.WriteLine("目标为中部某个字符时:");
            target = "中";
            TestContains(source, target);
            TestIndexOf(source, target);
            Console.WriteLine();

            Console.WriteLine("目标为最后一个字符时:");
            target = "u";
            TestContains(source, target);
            TestIndexOf(source, target);

            Console.WriteLine("执行完毕,按任意键退出...");
            Console.ReadKey();

        }
        private static void TestIndexOf(string source, string target)
        {
            watch.Reset();
            watch.Start();
            for (int i = 0;i < N;i++)
            {
                source.IndexOf(target);
            }
            watch.Stop();
            Console.WriteLine("IndexOf: " + watch.ElapsedMilliseconds.ToString() + "ms");
            return;
        }

        private static void TestContains(string source, string target)
        {
            watch.Reset();
            watch.Start();
            for (int i = 0;i < N;i++)
            {
                source.Contains(target);
            }
            watch.Stop();
            Console.WriteLine("Contains: " + watch.ElapsedMilliseconds.ToString() + "ms");
            return;
        }
    }
}

得到的结果是:

目标为第一个字符时:
Contains: 973ms
IndexOf: 1343ms

目标为中部某个字符时:
Contains: 1813ms
IndexOf: 8602ms

目标为最后一个字符时:
Contains: 1433ms
IndexOf: 5094ms
执行完毕,按任意键退出...

可以看出,使用Contains方法的效率比IndexOf的效率高很多。

疑问:Contains 调用了 IndexOf,反倒比 IndexOf 还快?

正如评论中@Vampire_D 所说,Contains 调用了 IndexOf,如果 Contains 比 IndexOf 还快,简直不科学啊!不过仔细一看一对比你就会知道,正是 IndexOf 的第二个参数在捣鬼。

.NET 中 Contains 和 IndexOf 分别是这样实现的:

// Type: System.String
// Assembly: mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
// MVID: 00788FED-9041-4DB2-AB5B-0952A06515B1
// Assembly location: C:\Windows\Microsoft.NET\Framework\v2.0.50727\mscorlib.dll

// .NET 中字符串的 Contains 和 IndexOf 方法的实现:

    public bool Contains(string value)
    {
      return this.IndexOf(value, StringComparison.Ordinal) >= 0;
    }

    public int IndexOf(string value)
    {
      return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value);
    }

    public int IndexOf(string value, int startIndex)
    {
      return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex);
    }

    public int IndexOf(string value, int startIndex, int count)
    {
      if (startIndex < 0 || startIndex > this.Length)
        throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_Index"));
      if (count < 0 || count > this.Length - startIndex)
        throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_Count"));
      else
        return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.None);
    }

    public int IndexOf(string value, StringComparison comparisonType)
    {
      return this.IndexOf(value, 0, this.Length, comparisonType);
    }

    public int IndexOf(string value, int startIndex, StringComparison comparisonType)
    {
      return this.IndexOf(value, startIndex, this.Length - startIndex, comparisonType);
    }

    public int IndexOf(string value, int startIndex, int count, StringComparison comparisonType)
    {
      if (value == null)
        throw new ArgumentNullException("value");
      if (startIndex < 0 || startIndex > this.Length)
        throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_Index"));
      if (count < 0 || startIndex > this.Length - count)
        throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_Count"));
      switch (comparisonType)
      {
        case StringComparison.CurrentCulture:
          return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.None);
        case StringComparison.CurrentCultureIgnoreCase:
          return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.IgnoreCase);
        case StringComparison.InvariantCulture:
          return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.None);
        case StringComparison.InvariantCultureIgnoreCase:
          return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.IgnoreCase);
        case StringComparison.Ordinal:
          return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.Ordinal);
        case StringComparison.OrdinalIgnoreCase:
          return TextInfo.IndexOfStringOrdinalIgnoreCase(this, value, startIndex, count);
        default:
          throw new ArgumentException(Environment.GetResourceString("NotSupported_StringComparison"), "comparisonType");
      }
    }

可以看出,Contains 默认使用 CultureInfo.InvariantCulture 来比较字符/字符串,而 IndexOf 在不带第二个参数时,默认使用 CultureInfo.CurrentCulture 来比较字符/字符串,这直接导致了比较过程的复杂性,从而影响了两个方法的执行效率(以及结果——我没有去验证什么情况下结果会不一致)。

进一步地,我们来看看使用这两个不同的 CultureInfo 到底有什么不同?

CultureInfo.InvariantCulture:它是 CultureInfo 中的一个静态变量,直接在 CultureInfo 的静态构造里实例化了,并且只需要实例化一次。

// Type: System.Globalization.CultureInfo
// Assembly: mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
// MVID: 00788FED-9041-4DB2-AB5B-0952A06515B1
// Assembly location: C:\Windows\Microsoft.NET\Framework\v2.0.50727\mscorlib.dll

//------------CultureInfo.InvariantCulture----------

    private static CultureInfo m_InvariantCultureInfo;

    static CultureInfo()
    {
      if (CultureInfo.m_InvariantCultureInfo == null)
        CultureInfo.m_InvariantCultureInfo = new CultureInfo((int) sbyte.MaxValue, false)
        {
          m_isReadOnly = true
        };
      CultureInfo.m_userDefaultCulture = CultureInfo.m_userDefaultUICulture = CultureInfo.m_InvariantCultureInfo;
      CultureInfo.m_userDefaultCulture = CultureInfo.InitUserDefaultCulture();
      CultureInfo.m_userDefaultUICulture = CultureInfo.InitUserDefaultUICulture();
    }

    public static CultureInfo InvariantCulture
    {
      get
      {
        return CultureInfo.m_InvariantCultureInfo;
      }
    }

//-----------CultureInfo.CurrentCulture--------
    public static CultureInfo CurrentCulture
    {
      get
      {
        return Thread.CurrentThread.CurrentCulture;
      }
    }

// other code...

再看 CultureInfo.CurrentCulture:它是当前线程的 CultureInfo,要获取这个属性的值会有一系列的操作,请见下面的代码,Thread.CurrentThread.CurrentCulture:

// Type: System.Threading.Thread
// Assembly: mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
// MVID: 00788FED-9041-4DB2-AB5B-0952A06515B1
// Assembly location: C:\Windows\Microsoft.NET\Framework\v2.0.50727\mscorlib.dll

    public CultureInfo CurrentCulture
    {
      get
      {
        if (this.m_CurrentCulture == null)
          return CultureInfo.UserDefaultCulture;
        CultureInfo safeCulture = (CultureInfo) null;
        if (!Thread.nativeGetSafeCulture(this, Thread.GetDomainID(), false, ref safeCulture) || safeCulture == null)
          return CultureInfo.UserDefaultCulture;
        else
          return safeCulture;
      }
      [SecurityPermission(SecurityAction.Demand, ControlThread = true)] set
      {
        if (value == null)
          throw new ArgumentNullException("value");
        CultureInfo.CheckNeutral(value);
        CultureInfo.nativeSetThreadLocale(value.LCID);
        value.StartCrossDomainTracking();
        this.m_CurrentCulture = value;
      }
    }

因此在使用两个不同的 CultureInfo 进行字符/字符串比较时,所花的代价就很不同了,因此执行效率上就有了较大差别。 

结论:不特殊考虑 CultureInfo 时,Contains(string) 的执行效率比 IndexOf(string) 高