LumiSoft.Net邮件接收乱码问题解决

今天遇到用LumiSoft.Net这个组件收取邮件中含有类似于=?utf-8?B?5rWL6K+V6YKu5Lu2?= ,=?gbk?Q?=C6=BD=B0=B2=D6=A4=C8=AF*=C3=BF=D6=DC=B1=A8?=这两种格式的乱码,随后Google了下,原因是邮件本身的编码,跟传输过程采用的编码不一致,=?utf-8?B?5rWL6K+V6YKu5Lu2?= 这个表示邮件编码是utf-8,传输采用base64编码格式,第二个Q表示传输格式为Quote-Printable。
对于这种格式的字符,Google大神帮我搜到了相关的处理代码,然后综合项目,整理出来了,留个记号。

  	/// <summary>
        /// 乱码解析
        /// </summary>
        /// <param name="input"></param>
        /// <returns></returns>
        private string GetMailSubject(string input)
        {
            try
            {
                string regex = @"=\?(?<encode>.*?)\?(?<type>[B|Q])\?(?<body>.*?)\?=";
                Regex re = new Regex(regex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
                MatchCollection mcs = re.Matches(input);
                foreach (Match mc in mcs)
                {
                    string encode = mc.Groups["encode"].Value;
                    string type = mc.Groups["type"].Value;
                    Encoding encod = null;
                    if (!string.IsNullOrEmpty(encode))
                    {
                        if ((encode.ToLower().Contains("gbk") || encode.ToLower().Contains("utf8") || encode.ToLower().Contains("utf-8")) && type == "Q")
                        {
                            if (encode.ToLower().Contains("utf-8"))
                            {
                                encod = Encoding.UTF8;
                            }
                            else if (encode.ToLower().Contains("gbk"))
                            {
                                encod = Encoding.GetEncoding("gb2312");
                            }
                            input = input.Replace(mc.Value, QuotedPrintable.Decode(mc.Groups["body"].Value, encod));
                        }
                        else
                        {
                            if (encode.ToLower().Contains("euccn") || encode.ToLower().Contains("euc-cn"))
                            {
                                encode = "gb2312";
                            }
                            else if (encode.ToLower().Contains("utf8"))
                            {
                                encode = "utf-8";
                            }
                            string body = mc.Groups["body"].Value;
                            byte[] bytes = Convert.FromBase64String(body);
                            string result = Encoding.GetEncoding(encode).GetString(bytes);
                            input = input.Replace(mc.Value, result);
                        }
                    }

                }
                if (mcs.Count > 0)
                {
                    FileLogHelper.WriteInfo(string.Format("原邮件标题:[{0}]\r\n解析后标题:[{1}]", input, input));
                }
                return input;
            }
            catch (Exception)
            {
                return input;
            }
        }

 针对两种不同类型的字符做了处理,这里调用了一个Quote-Printable编码解码类,代码如下:

using System;
using System.Collections;
using System.Text;
namespace Wind.MailRobot.BLL
{
    public class QuotedPrintable
    {
        private const byte EQUALS = 61;
        private const byte CR = 13;
        private const byte LF = 10;
        private const byte SPACE = 32;
        private const byte TAB = 9;

        /// <summary>
        /// Encodes a string to QuotedPrintable
        /// </summary>
        /// <param name="_ToEncode">String to encode</param>
        /// <returns>QuotedPrintable encoded string</returns>
        public static string Encode(string _ToEncode, Encoding encoding)
        {
            StringBuilder Encoded = new StringBuilder();
            string hex = string.Empty;
            byte[] bytes = encoding.GetBytes(_ToEncode);
            int count = 0;

            for (int i = 0; i < bytes.Length; i++)
            {
                //these characters must be encoded
                if ((bytes[i] < 33 || bytes[i] > 126 || bytes[i] == EQUALS) && bytes[i] != CR && bytes[i] != LF && bytes[i] != SPACE)
                {
                    if (bytes[i].ToString("X").Length < 2)
                    {
                        hex = "0" + bytes[i].ToString("X");
                        Encoded.Append("=" + hex);
                    }
                    else
                    {
                        hex = bytes[i].ToString("X");
                        Encoded.Append("=" + hex);
                    }
                }
                else
                {
                    //check if index out of range
                    if ((i + 1) < bytes.Length)
                    {
                        //if TAB is at the end of the line - encode it!
                        if ((bytes[i] == TAB && bytes[i + 1] == LF) || (bytes[i] == TAB && bytes[i + 1] == CR))
                        {
                            Encoded.Append("=0" + bytes[i].ToString("X"));
                        }
                        //if SPACE is at the end of the line - encode it!
                        else if ((bytes[i] == SPACE && bytes[i + 1] == LF) || (bytes[i] == SPACE && bytes[i + 1] == CR))
                        {
                            Encoded.Append("=" + bytes[i].ToString("X"));
                        }
                        else
                        {
                            Encoded.Append(System.Convert.ToChar(bytes[i]));
                        }
                    }
                    else
                    {
                        Encoded.Append(System.Convert.ToChar(bytes[i]));
                    }
                }
                if (count == 75)
                {
                    Encoded.Append("=\r\n"); //insert soft-linebreak
                    count = 0;
                }
                count++;
            }

            return Encoded.ToString();
        }

        /// <summary>
        /// Decodes a QuotedPrintable encoded string 
        /// </summary>
        /// <param name="_ToDecode">The encoded string to decode</param>
        /// <returns>Decoded string</returns>
        public static string Decode(string _ToDecode, Encoding encoding)
        {
            try
            {
                //remove soft-linebreaks first
                _ToDecode = _ToDecode.Replace("=\r\n", "");
                char[] chars = _ToDecode.ToCharArray();
                byte[] bytes = new byte[chars.Length];
                int bytesCount = 0;
                for (int i = 0; i < chars.Length; i++)
                {
                    // if encoded character found decode it
                    if (chars[i] == '=')
                    {
                        bytes[bytesCount++] = System.Convert.ToByte(int.Parse(chars[i + 1].ToString() + chars[i + 2].ToString(), System.Globalization.NumberStyles.HexNumber));
                        i += 2;
                    }
                    else
                    {
                        bytes[bytesCount++] = System.Convert.ToByte(chars[i]);
                    }
                }
                return encoding.GetString(bytes, 0, bytesCount);
            }
            catch (Exception)
            {

                return _ToDecode;
            }
        }
    }
}

 

posted @ 2013-05-27 14:10  AlanCoder  阅读(798)  评论(0编辑  收藏  举报
View Code