判断重复字符存在:更有意义一点

   1:      class Program
   2:      {
   3:          static void Main(string[] args)
   4:          {
   5:              Program p = new Program();
   6:              List<char> result = p.FindMostFrequentlyUsedChars("hjkluuiuikjk");
   7:   
   8:              Console.ReadKey();
   9:          }
  10:   
  11:          /// <summary>
  12:          /// 判断字符串中是否存在重复字符
  13:          /// 该算法假设的前提条件:所有字符都是ASCII
  14:          /// 时间复杂度O(n),n=s.Length
  15:          /// 空间复杂度为常数O(256)
  16:          /// 时间复杂度已经最低了,空间复杂度还能有更优化的解法嘛?
  17:          /// 参见IsStringWithNoDuplicatedChar_Implemenation2
  18:          /// </summary>
  19:          public bool IsStringWithNoDuplicatedChar_Implemenation1(string s)
  20:          {
  21:              bool[] flags = new bool[256];
  22:              foreach (char c in s)
  23:              {
  24:                  //错误处理:含有非ASCII码情况
  25:                  if (c > 255)
  26:                  {
  27:                      throw new ArgumentException("string contains UNICODE char");
  28:                  }
  29:   
  30:                  if (flags[c])
  31:                  {
  32:                      return false;
  33:                  }
  34:                  flags[c] = true;
  35:              }
  36:              return true;
  37:          }
  38:   
  39:          /// <summary>
  40:          /// bit映射法,该解法对比IsStringWithNoDuplicatedChar_Implemenation1空间使用上更少了
  41:          /// </summary>
  42:          public bool IsStringWithNoDuplicatedChar_Implemenation2(string s)
  43:          {
  44:              //注意是Int32而不是Int,在64位机器上Int=Int64
  45:              Int32[] flags = new Int32[8];
  46:              foreach (char c in s)
  47:              {
  48:                  //依旧需要该错误处理,因为假设前提是不变的,否则该算法无效
  49:                  if (c > 255)
  50:                  {
  51:                      throw new ArgumentException("string contains UNICODE char");
  52:                  }
  53:   
  54:                  int index = c / 32;
  55:                  int relative_position = c % 32;
  56:                  if ((flags[index] & (1 << relative_position)) >0)
  57:                  {
  58:                      return false;
  59:                  }
  60:                  flags[index] |= (1 << relative_position);
  61:              }
  62:              return true;
  63:          }
  64:   
  65:          /// <summary>
  66:          /// 再次增进假设,假设所有字符都是介于a-z的小写字母
  67:          /// 26个字母可用一个32位的int映射
  68:          /// </summary>
  69:          public bool IsStringWithNoDuplicatedChar_Implemenation3(string s)
  70:          {
  71:              Int32 flag = 0;
  72:              foreach (char c in s)
  73:              {
  74:                  if (c <='a' && c>='z')
  75:                  {
  76:                      throw new ArgumentException("string contains char out of a-z");
  77:                  }
  78:   
  79:                  int relative_position = c - 'a';
  80:                  if ((flag & (1 << relative_position)) > 0)
  81:                  {
  82:                      return false;
  83:                  }
  84:                  flag |= (1 << relative_position);
  85:              }
  86:              return true;
  87:          }
  88:   
  89:   
  90:          /// <summary>
  91:          ///以上算法有意义麽?
  92:          ///很明显这些算法都基于一个共同的前提就是字符集的基数是ASCII码,
  93:          ///如果是Unicode,那么无无论是哪一种,都需要更大的空间
  94:          ///如果假设前提还是ASCII,其实也还有更好的思路:
  95:          ///如果字符串长度大于字符集范围,那么一定属于两种情况之一:
  96:          ///要么有重复字符,要麽输入非法
  97:          ///这便是IsStringWithNoDuplicatedChar_Implemenation4
  98:          /// </summary>
  99:          public bool IsStringWithNoDuplicatedChar_Implemenation4(string s)
 100:          {
 101:              if (s.Length > 256)
 102:              {
 103:                  return false;
 104:              }
 105:              else
 106:              {
 107:                  return IsStringWithNoDuplicatedChar_Implemenation2(s);
 108:              }
 109:          }
 110:   
 111:          /// <summary>
 112:          /// 基于输入是ASCII这个前提,
 113:          /// 长度大于256,根本不用判断,肯定有重复
 114:          /// 长度小于256,1-3算法其实对现代计算机来说简直是小菜一碟,不值一提
 115:          /// 所以更有可能的应用场景是找到最常出现的字符,这比判断是否有重复字符看起来更有价值一些
 116:          /// </summary>
 117:          public char? FindMostFrequentlyUsedChar(string s)
 118:          {
 119:              int[] counters = new int[256];
 120:              int most_used_times = 0;
 121:              char? most_used_char=null;
 122:              foreach (char c in s)
 123:              {
 124:                  //错误处理:含有非ASCII码情况
 125:                  if (c > 255)
 126:                  {
 127:                      throw new ArgumentException("string contains UNICODE char");
 128:                  }
 129:   
 130:                  counters[c]++;
 131:   
 132:                  if (counters[c] > most_used_times)
 133:                  {
 134:                      most_used_times = counters[c];
 135:                      most_used_char = c;
 136:                  }
 137:              }
 138:              return most_used_char;
 139:          }
 140:   
 141:          /// <summary>
 142:          /// 处理FindMostFrequentlyUsedChar无法处理并列第一的情况
 143:          /// 时间复杂度依旧是O(n)
 144:          /// 
 145:          /// </summary>
 146:          public List<char> FindMostFrequentlyUsedChars(string s)
 147:          {
 148:              List<char> most_used_chars = new List<char>();
 149:              int[] counters = new int[256];
 150:              int most_used_times = 0;
 151:              foreach (char c in s)
 152:              {
 153:                  //错误处理:含有非ASCII码情况
 154:                  if (c > 255)
 155:                  {
 156:                      throw new ArgumentException("string contains UNICODE char");
 157:                  }
 158:   
 159:                  counters[c]++;
 160:   
 161:                  if (counters[c] > most_used_times)
 162:                  {
 163:                      most_used_chars.Clear();
 164:                      most_used_times = counters[c];
 165:                      most_used_chars.Add(c);
 166:                  }
 167:                  else if (counters[c] == most_used_times)
 168:                  {
 169:                      most_used_chars.Add(c);
 170:                  }
 171:              }
 172:              return most_used_chars;
 173:          }
 174:      }
posted @ 2013-12-24 20:34  Dance With Automation  Views(230)  Comments(0Edit  收藏  举报