C#里面滥用String造成的性能问题

前两天给我们的json写一个解析函数, 之前用的正宗的json parser, 支持完整的json特性. 但是实际上我们用到特性, 只有key-value的映射, value的类型只有数字字符串两种类型. 由于parse的速度比较慢, 所以我打算自己用字符串解析一遍. 第一个能工作的原型出来的时候, 速度和json解析差不多. 做了profile之后发现, 绝大部分时间都浪费在构造String和检索IndexOf上面.

下了coreclr的源码研究了一下, 发现String.Split在实现的时候, 先扫描一遍split, 计算有多少个元素, 然后分配一个Array, 然后再去做Split操作. Split操作里面还会再new一个新的String出来, 顺便做一下拷贝. 看到这里我就惊呆了, 本来String在C#和Jawa这两个托管语言里面都是不可变的, 那么为什么他们不用一个Slice去构造一个SubString呢?

网上搜了一下, 也没发现有人写的StringSlice或者类似的东西, 我就顺手撸了一个StringView, 一个只读的StringSlice.

  1 using System.Collections.Generic;
  2 
  3 public unsafe struct StringView
  4 {
  5     public static readonly StringView Empty = new StringView("");
  6 
  7     public StringView(string str) : this(str, 0, str.Length) { }
  8 
  9     public StringView(string str, int begin, int length)
 10     {
 11         this.str = str;
 12         this.begin = begin;
 13         this.length = length;
 14         if (str.Length <= 0) return;
 15 
 16         if (this.begin < 0 ||
 17             this.begin >= this.str.Length ||
 18             this.begin + this.length > this.str.Length)
 19         {
 20             throw new System.Exception("StringView's Constructor OutOfBound");
 21         }
 22     }
 23 
 24     public int IndexOf(char c, int start = 0)
 25     {
 26         fixed (char* p = this.str)
 27         {
 28             for (int i = start; i < length; ++i)
 29             {
 30                 if (p[this.begin + i] == c) return i;
 31             }
 32         }
 33 
 34         return -1;
 35     }
 36 
 37     private static bool ArrayContains(char[] array, char c)
 38     {
 39         int length = array.Length;
 40         fixed (char* p = array)
 41         {
 42             for (int i = 0; i < length; ++i)
 43                 if (p[i] == c) return true;
 44         }
 45 
 46         return false;
 47     }
 48 
 49     public int IndexOf(char[] array, int start = 0)
 50     {
 51         if (array.Length == 1) return this.IndexOf(array[0], start);
 52 
 53         fixed (char* p = this.str)
 54         {
 55             for (int i = start; i < length; ++i)
 56             {
 57                 if (ArrayContains(array, p[this.begin + i])) return i;
 58             }
 59         }
 60 
 61         return -1;
 62     }
 63 
 64     public int IndexOf(string s, int start = 0)
 65     {
 66         int s1_length = this.str.Length;
 67         int s2_length = s.Length;
 68         fixed (char* p1 = this.str)
 69         {
 70             fixed (char* p2 = s)
 71             {
 72                 int index = this.IndexOf(p2[0], start);
 73                 while (index >= 0)
 74                 {
 75                     if (s2_length > s1_length - this.begin - index)
 76                         return -1;
 77                     bool match = true;
 78                     for (int i = 0; i < s2_length; ++i)
 79                     {
 80                         if (p1[this.begin + index + i] != p2[i]) { match = false; break; }
 81                     }
 82                     if (match) return index;
 83 
 84                     index = this.IndexOf(p2[0], index + 1);
 85                 }
 86                 return -1;
 87             }
 88         }
 89     }
 90 
 91     public unsafe char this[int index]
 92     {
 93         get
 94         {
 95             if (index < 0 || index >= this.length)
 96             {
 97                 throw new System.Exception("StringView's Index OutOfBound");
 98             }
 99 
100             fixed (char* p = this.str)
101             {
102                 return p[this.begin + index];
103             }
104         }
105     }
106 
107     public StringView SubString(int begin)
108     {
109         return this.SubString(begin, this.length - begin);
110     }
111 
112     public StringView SubString(int begin, int length)
113     {
114         return new StringView(this.str, this.begin + begin, length);
115     }
116 
117     public List<StringView> Split(char split, List<StringView> array)
118     {
119         array.Clear();
120 
121         int index = 0;
122         int pos1 = 0, pos2 = 0;
123         pos2 = this.IndexOf(split);
124         while (pos2 > 0 && pos2 < this.length)
125         {
126             array.Add(new StringView(str, this.begin + pos1, pos2 - pos1));
127             pos1 = pos2 + 1;
128             pos2 = this.IndexOf(split, pos1);
129             ++index;
130         }
131         if (pos1 != this.length) array.Add(new StringView(str, this.begin + pos1, this.length - pos1));
132 
133         return array;
134     }
135 
136     public override bool Equals(object obj)
137     {
138         if (obj is StringView)
139         {
140             StringView v = (StringView)obj;
141             return this.Equals(v);
142         }
143         return false;
144     }
145 
146     public bool Equals(StringView v)
147     {
148         if (v.Length != this.Length) return false;
149         for (int i = 0; i < this.Length; ++i)
150         {
151             if (this[i] != v[i]) return false;
152         }
153         return true;
154     }
155 
156     internal static int CombineHashCodes(int h1, int h2)
157     {
158         return (((h1 << 5) + h1) ^ h2);
159     }
160 
161     public override int GetHashCode()
162     {
163         int hash_code = 0;
164         for (int i = 0; i < this.length; ++i)
165         {
166             hash_code = CombineHashCodes(hash_code, this[i].GetHashCode());
167         }
168         return hash_code;
169     }
170 
171     public int Length { get { return this.length; } }
172 
173     public override string ToString()
174     {
175         return this.str.Substring(begin, length);
176     }
177 
178     public string GetRawString() { return this.str; }
179     public int GetBegin() { return this.begin; }
180 
181     private string str;
182     private int begin;
183     private int length;
184 }

为了方便替换String, 很多接口都保持了一致. 目前这个版本只是满足我自己的需求, 以后可以考虑继续完善添加String的函数进来.

之前说的IndexOf也比较耗, 因为String索引器会带有边界检测, 而IndexOf一直在用索引器, 所以个人感觉是不太合适的, 所以我的StringView一直在用指针….

PS: 修改之后的纯text parse, 速度比json parse的速度快一倍以上, 性能还不错, 实际上还有提升的空间

PS: 现在比较完整的StringView已经上传至github, https://github.com/egmkang/StringView 添加了ToInt64, StringBuilder.Append支持

posted @ 2017-08-29 18:03 egmkang 阅读(...) 评论(...) 编辑 收藏