代码改变世界

基于Poco的UTF8、UTF16、GBK、Hex之间的转换

2014-02-17 09:41  DVwei  阅读(3305)  评论(0编辑  收藏  举报
/******Encoding.h*******/
#include "Poco/UnicodeConverter.h"
#include "Poco/Exception.h"
#include "Poco/DigestEngine.h"

#define MyLib_API Foundation_API

using namespace Poco;

POCO_DECLARE_EXCEPTION(MyLib_API, EncodeException, Exception)

class Encoding
{
public:
    enum ByteOrderType
    {
        BIG_ENDIAN_BYTE_ORDER,
        LITTLE_ENDIAN_BYTE_ORDER,
        UNKNOW
    };

    static void GBKToUTF16(const std::string& gbkString, std::wstring& utf16String) throw(EncodeException);
    static void UTF16ToGBK(const std::wstring& utf16String, std::string& gbkString) throw(EncodeException);
    static void UTF8ToUTF16(const std::string& utf8String, std::wstring& utf16String) throw(EncodeException);
    static void UTF16ToUTF8(const std::wstring& utf16String, std::string& utf8String) throw(EncodeException);
    static void UTF8ToGBK(const std::string& utf8String, std::string& gbkString) throw(EncodeException);
    static void GBKToUTF8(const std::string& gbkString, std::string& utf8String) throw(EncodeException);
    static void EncodeHexString(const std::string& bytes, std::string& hexString);
    static void DecodeHexString(const std::string& hexString, std::string& bytes);
    static void EncodeHexString(const std::wstring& bytes, std::string& hexString);
    static void DecodeHexString(const std::string& hexString, std::wstring& bytes);
    static ByteOrderType GetCurrentByteOrder();

private:
    static Poco::UnicodeConverter _unicodeConverter;
    static ByteOrderType _currentByteOrder;
};
/********Encoding.cpp********/#include "Encoding.h"
#include "Poco/NumberParser.h"

Poco::UnicodeConverter Encoding::_unicodeConverter;
Encoding::ByteOrderType Encoding::_currentByteOrder;

POCO_IMPLEMENT_EXCEPTION(EncodeException, Poco::Exception, "Encoding error")

void Encoding::GBKToUTF16(const std::string& gbkString, std::wstring& utf16String)
{
    //获得需要分配的空间大小
    int size = MultiByteToWideChar(936, 0, gbkString.c_str(), -1, NULL, 0);
    std::vector<wchar_t> buff(size);
    if(MultiByteToWideChar(936, 0, gbkString.c_str(), -1, buff.data(), size) == 0)
    {
        //throw a exception
        throw EncodeException("GBK convert to UTF16 failed", GetLastError());
    }
    if(!utf16String.empty())
        utf16String.clear();
    utf16String.append(buff.data(), buff.size());
}


void Encoding::UTF16ToGBK(const std::wstring& utf16String, std::string& gbkString)
{
    int size = 0;

    //获得需要分配的空间大小
    size = WideCharToMultiByte(936, 0, utf16String.c_str(), -1, NULL, 0, NULL, NULL);
    std::vector<char> buff(size);
    if(WideCharToMultiByte(936, 0, utf16String.c_str(), -1, buff.data(), size, NULL, NULL) == 0)
        throw EncodeException("UTF16 convert to GBK failed", GetLastError());

    if(!gbkString.empty())
        gbkString.clear();
    gbkString.append(buff.data(), buff.size());
}

void Encoding::UTF8ToUTF16(const std::string& utf8String, std::wstring& utf16String)
{
    std::string errorMessage;

    try
    {
        _unicodeConverter.toUTF16(utf8String, utf16String);
    }
    catch(Poco::Exception &e)
    {
        errorMessage.append("UTF8 convert to UTF16 failed, ");
        errorMessage.append(e.message());
    }
    if(!errorMessage.empty())
        throw EncodeException(errorMessage, GetLastError());
}

void Encoding::UTF16ToUTF8(const std::wstring& utf16String, std::string& utf8String)
{
    std::string errorMessage;

    try
    {
        _unicodeConverter.toUTF8(utf16String, utf8String);
    }
    catch(Poco::Exception &e)
    {
        errorMessage.append("UTF8 convert to UTF16 failed, ");
        errorMessage.append(e.message());
    }
    if(!errorMessage.empty())
        throw EncodeException(errorMessage, GetLastError());
}

void Encoding::UTF8ToGBK(const std::string& utf8String, std::string& gbkString)
{
    std::wstring utf16String;
    std::string errorMessage;

    try
    {
        _unicodeConverter.toUTF16(utf8String, utf16String);
        UTF16ToGBK(utf16String, gbkString);
    }
    catch(EncodeException)
    {
        errorMessage = "UTF8 convert to GBK failed";
    }
    catch(Poco::Exception &e)
    {
        errorMessage.append("UTF8 convert to UTF16 failed, ");
        errorMessage.append(e.message());
    }
    if(!errorMessage.empty())
        throw EncodeException(errorMessage, GetLastError());
}

void Encoding::GBKToUTF8(const std::string& gbkString, std::string& utf8String)
{
    std::wstring utf16String;
    std::string errorMessage;

    try
    {
        GBKToUTF16(gbkString, utf16String);
        _unicodeConverter.toUTF8(utf16String, utf8String);
    }
    catch(EncodeException)
    {
        errorMessage = "GBK convert to UTF8 failed";
    }
    catch(Poco::Exception &e)
    {
        errorMessage.append("UTF8 convert to UTF16 failed, ");
        errorMessage.append(e.message());
    }
    if(!errorMessage.empty())
        throw EncodeException(errorMessage, GetLastError());
}

void Encoding::EncodeHexString(const std::string& bytes, std::string& hexString)
{
    if(!hexString.empty())
        hexString.clear();

    Poco::DigestEngine::Digest digest(bytes.begin(), bytes.end());
    hexString = Poco::DigestEngine::digestToHex(digest);
}

void Encoding::DecodeHexString(const std::string& hexString, std::string& bytes)
{
    unsigned int _value;
    if(!bytes.empty())
        bytes.clear();

    for(std::string::size_type i = 0, j = 0; i <  hexString.length(); i+=2)
    {
        if(NumberParser::tryParseHex(hexString.substr(i, 2), _value))
            bytes.push_back(_value);
    }
}

void Encoding::EncodeHexString(const std::wstring& utf16String, std::string& hexString)
{
    if(!hexString.empty())
        hexString.clear();

    Poco::DigestEngine::Digest digest;
    for(auto iter = utf16String.begin(); iter != utf16String.end(); ++iter)
    {
        const unsigned char* ptr = (const unsigned char*)&*iter;
        if(GetCurrentByteOrder() == BIG_ENDIAN_BYTE_ORDER)
        {
            digest.push_back(*ptr);
            digest.push_back(*(ptr + 1));
        }
        else if(GetCurrentByteOrder() == LITTLE_ENDIAN_BYTE_ORDER)
        {
            digest.push_back(*(ptr + 1));
            digest.push_back(*ptr);
        }
        else
            return;
    }
    
    hexString = Poco::DigestEngine::digestToHex(digest);
}

void Encoding::DecodeHexString(const std::string& hexString, std::wstring& utf16String)
{
    unsigned int _value;
    if(!utf16String.empty())
        utf16String.clear();

    for(std::string::size_type i = 0, j = 0; i <  hexString.length(); i+=4)
    {
        if(NumberParser::tryParseHex(hexString.substr(i, 4), _value))
            utf16String.push_back(_value);
    }
}

Encoding::ByteOrderType Encoding::GetCurrentByteOrder()
{
    static bool flag = false;
    if(flag)
        return _currentByteOrder;

    union
    {
        char16_t s;
        char c[2];
    }un;

    un.s = 0x0102;
    if(un.c[0] == 1 && un.c[1] == 2)
        _currentByteOrder = BIG_ENDIAN_BYTE_ORDER;
    else if(un.c[0] == 2 && un.c[1] == 1)
        _currentByteOrder = LITTLE_ENDIAN_BYTE_ORDER;
    else
        _currentByteOrder = UNKNOW;

    flag = true;
    return _currentByteOrder;
}