判断文件是否为UTF-8编码(以前收集的)
1
private bool CheckEncoding(string strFileName)
2
{
3
using (FileStream stream = new FileStream(strFileName, FileMode.Open))
4
{
5
byte[] bs = new byte[stream.Length];
6
stream.Read(bs, 0, bs.Length);
7
if (utf8_probability(bs) > 0) return true;
8
else return false;
9![](/Images/OutliningIndicators/InBlock.gif)
10
/*
11
if (stream != null && stream.Length >= 2)
12
{
13
//保存文件流的前4个字节
14
byte byte1 = 0;
15
byte byte2 = 0;
16
byte byte3 = 0;
17
byte byte4 = 0;
18
//保存当前Seek位置
19
long origPos = stream.Seek(0, SeekOrigin.Begin);
20
stream.Seek(0, SeekOrigin.Begin);
21
int nByte = stream.ReadByte();
22
byte1 = Convert.ToByte(nByte);
23
byte2 = Convert.ToByte(stream.ReadByte());
24
if (stream.Length >= 3)
25
{
26
byte3 = Convert.ToByte(stream.ReadByte());
27
}
28
if (stream.Length >= 4)
29
{
30
byte4 = Convert.ToByte(stream.ReadByte());
31
}
32![](/Images/OutliningIndicators/InBlock.gif)
33
//根据文件流的前4个字节判断Encoding
34
//Unicode {0xFF, 0xFE};
35
//BE-Unicode {0xFE, 0xFF};
36
//UTF8 = {0xEF, 0xBB, 0xBF};
37
if (byte1 == 0xFE && byte2 == 0xFF)//UnicodeBe
38
{
39
targetEncoding = Encoding.BigEndianUnicode;
40
}
41
if (byte1 == 0xFF && byte2 == 0xFE && byte3 != 0xFF)//Unicode
42
{
43
targetEncoding = Encoding.Unicode;
44
}
45
if (byte1 == 0xEF && byte2 == 0xBB && byte3 == 0xBF)//UTF8
46
{
47
targetEncoding = Encoding.UTF8;
48
}
49
//恢复Seek位置
50
stream.Seek(origPos, SeekOrigin.Begin);
51
52
}*/
53
}
54
}
55
56
57
private int utf8_probability(byte[] rawtext)
58
{
59
int score = 0;
60
int i, rawtextlen = 0;
61
int goodbytes = 0, asciibytes = 0;
62![](/Images/OutliningIndicators/InBlock.gif)
63
// Maybe also use UTF8 Byte Order Mark: EF BB BF
64![](/Images/OutliningIndicators/InBlock.gif)
65
// Check to see if characters fit into acceptable ranges
66
rawtextlen = rawtext.Length;
67
for (i = 0; i < rawtextlen; i++)
68
{
69
if ((rawtext[i] & (byte)0x7F) == rawtext[i])
70
{ // One byte
71
asciibytes++;
72
// Ignore ASCII, can throw off count
73
}
74
else
75
{
76
int m_rawInt0 = Convert.ToInt16(rawtext[i]);
77
int m_rawInt1 = Convert.ToInt16(rawtext[i + 1]);
78
int m_rawInt2 = Convert.ToInt16(rawtext[i + 2]);
79![](/Images/OutliningIndicators/InBlock.gif)
80
if (256 - 64 <= m_rawInt0 && m_rawInt0 <= 256 - 33 && // Two bytes
81
i + 1 < rawtextlen &&
82
256 - 128 <= m_rawInt1 && m_rawInt1 <= 256 - 65)
83
{
84
goodbytes += 2;
85
i++;
86
}
87
else if (256 - 32 <= m_rawInt0 && m_rawInt0 <= 256 - 17 && // Three bytes
88
i + 2 < rawtextlen &&
89
256 - 128 <= m_rawInt1 && m_rawInt1 <= 256 - 65 &&
90
256 - 128 <= m_rawInt2 && m_rawInt2 <= 256 - 65)
91
{
92
goodbytes += 3;
93
i += 2;
94
}
95
}
96
}
97![](/Images/OutliningIndicators/InBlock.gif)
98
if (asciibytes == rawtextlen) { return 0; }
99![](/Images/OutliningIndicators/InBlock.gif)
100
score = (int)(100 * ((float)goodbytes / (float)(rawtextlen - asciibytes)));
101![](/Images/OutliningIndicators/InBlock.gif)
102
// If not above 98, reduce to zero to prevent coincidental matches
103
// Allows for some (few) bad formed sequences
104
if (score > 98)
105
{
106
return score;
107
}
108
else if (score > 95 && goodbytes > 30)
109
{
110
return score;
111
}
112
else
113
{
114
return 0;
115
}
116![](/Images/OutliningIndicators/InBlock.gif)
117
}
![](/Images/OutliningIndicators/None.gif)
2
![](/Images/OutliningIndicators/ExpandedBlockStart.gif)
3
![](/Images/OutliningIndicators/InBlock.gif)
4
![](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
5
![](/Images/OutliningIndicators/InBlock.gif)
6
![](/Images/OutliningIndicators/InBlock.gif)
7
![](/Images/OutliningIndicators/InBlock.gif)
8
![](/Images/OutliningIndicators/InBlock.gif)
9
![](/Images/OutliningIndicators/InBlock.gif)
10
![](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
11
![](/Images/OutliningIndicators/InBlock.gif)
12
![](/Images/OutliningIndicators/InBlock.gif)
13
![](/Images/OutliningIndicators/InBlock.gif)
14
![](/Images/OutliningIndicators/InBlock.gif)
15
![](/Images/OutliningIndicators/InBlock.gif)
16
![](/Images/OutliningIndicators/InBlock.gif)
17
![](/Images/OutliningIndicators/InBlock.gif)
18
![](/Images/OutliningIndicators/InBlock.gif)
19
![](/Images/OutliningIndicators/InBlock.gif)
20
![](/Images/OutliningIndicators/InBlock.gif)
21
![](/Images/OutliningIndicators/InBlock.gif)
22
![](/Images/OutliningIndicators/InBlock.gif)
23
![](/Images/OutliningIndicators/InBlock.gif)
24
![](/Images/OutliningIndicators/InBlock.gif)
25
![](/Images/OutliningIndicators/InBlock.gif)
26
![](/Images/OutliningIndicators/InBlock.gif)
27
![](/Images/OutliningIndicators/InBlock.gif)
28
![](/Images/OutliningIndicators/InBlock.gif)
29
![](/Images/OutliningIndicators/InBlock.gif)
30
![](/Images/OutliningIndicators/InBlock.gif)
31
![](/Images/OutliningIndicators/InBlock.gif)
32
![](/Images/OutliningIndicators/InBlock.gif)
33
![](/Images/OutliningIndicators/InBlock.gif)
34
![](/Images/OutliningIndicators/InBlock.gif)
35
![](/Images/OutliningIndicators/InBlock.gif)
36
![](/Images/OutliningIndicators/InBlock.gif)
37
![](/Images/OutliningIndicators/InBlock.gif)
38
![](/Images/OutliningIndicators/InBlock.gif)
39
![](/Images/OutliningIndicators/InBlock.gif)
40
![](/Images/OutliningIndicators/InBlock.gif)
41
![](/Images/OutliningIndicators/InBlock.gif)
42
![](/Images/OutliningIndicators/InBlock.gif)
43
![](/Images/OutliningIndicators/InBlock.gif)
44
![](/Images/OutliningIndicators/InBlock.gif)
45
![](/Images/OutliningIndicators/InBlock.gif)
46
![](/Images/OutliningIndicators/InBlock.gif)
47
![](/Images/OutliningIndicators/InBlock.gif)
48
![](/Images/OutliningIndicators/InBlock.gif)
49
![](/Images/OutliningIndicators/InBlock.gif)
50
![](/Images/OutliningIndicators/InBlock.gif)
51
![](/Images/OutliningIndicators/InBlock.gif)
52
![](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
53
![](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
54
![](/Images/OutliningIndicators/ExpandedBlockEnd.gif)
55
![](/Images/OutliningIndicators/None.gif)
56
![](/Images/OutliningIndicators/None.gif)
57
![](/Images/OutliningIndicators/None.gif)
58
![](/Images/OutliningIndicators/ExpandedBlockStart.gif)
59
![](/Images/OutliningIndicators/InBlock.gif)
60
![](/Images/OutliningIndicators/InBlock.gif)
61
![](/Images/OutliningIndicators/InBlock.gif)
62
![](/Images/OutliningIndicators/InBlock.gif)
63
![](/Images/OutliningIndicators/InBlock.gif)
64
![](/Images/OutliningIndicators/InBlock.gif)
65
![](/Images/OutliningIndicators/InBlock.gif)
66
![](/Images/OutliningIndicators/InBlock.gif)
67
![](/Images/OutliningIndicators/InBlock.gif)
68
![](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
69
![](/Images/OutliningIndicators/InBlock.gif)
70
![](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
71
![](/Images/OutliningIndicators/InBlock.gif)
72
![](/Images/OutliningIndicators/InBlock.gif)
73
![](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
74
![](/Images/OutliningIndicators/InBlock.gif)
75
![](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
76
![](/Images/OutliningIndicators/InBlock.gif)
77
![](/Images/OutliningIndicators/InBlock.gif)
78
![](/Images/OutliningIndicators/InBlock.gif)
79
![](/Images/OutliningIndicators/InBlock.gif)
80
![](/Images/OutliningIndicators/InBlock.gif)
81
![](/Images/OutliningIndicators/InBlock.gif)
82
![](/Images/OutliningIndicators/InBlock.gif)
83
![](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
84
![](/Images/OutliningIndicators/InBlock.gif)
85
![](/Images/OutliningIndicators/InBlock.gif)
86
![](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
87
![](/Images/OutliningIndicators/InBlock.gif)
88
![](/Images/OutliningIndicators/InBlock.gif)
89
![](/Images/OutliningIndicators/InBlock.gif)
90
![](/Images/OutliningIndicators/InBlock.gif)
91
![](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
92
![](/Images/OutliningIndicators/InBlock.gif)
93
![](/Images/OutliningIndicators/InBlock.gif)
94
![](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
95
![](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
96
![](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
97
![](/Images/OutliningIndicators/InBlock.gif)
98
![](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
99
![](/Images/OutliningIndicators/InBlock.gif)
100
![](/Images/OutliningIndicators/InBlock.gif)
101
![](/Images/OutliningIndicators/InBlock.gif)
102
![](/Images/OutliningIndicators/InBlock.gif)
103
![](/Images/OutliningIndicators/InBlock.gif)
104
![](/Images/OutliningIndicators/InBlock.gif)
105
![](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
106
![](/Images/OutliningIndicators/InBlock.gif)
107
![](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
108
![](/Images/OutliningIndicators/InBlock.gif)
109
![](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
110
![](/Images/OutliningIndicators/InBlock.gif)
111
![](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
112
![](/Images/OutliningIndicators/InBlock.gif)
113
![](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
114
![](/Images/OutliningIndicators/InBlock.gif)
115
![](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
116
![](/Images/OutliningIndicators/InBlock.gif)
117
![](/Images/OutliningIndicators/ExpandedBlockEnd.gif)