URL编码解码类
URL编码分为ansii和utf8 同时这两种又分为是否编码空格等等,为了方便使用封装此类
头文件:
#ifndef _URLENCODE_H_
#define _URLENCODE_H_
#include <string>
// Decode all encoded characters. Also decode + as space.
int UrlDecode(const char *source, char *dest, unsigned max);
int UrlDecodeUTF8(const char *source, char *dest, unsigned max);
// Decode all encoded characters.
int UrlDecodeWithoutEncodingSpaceAsPlus(const char *source, char *dest,
unsigned max);
int UrlDecodeWithoutEncodingSpaceAsPlusUTF8(const char *source, char *dest,
unsigned max);
// Encode all characters except alphas, numbers, and -_.!~*'()
// Also encode space as +.
int UrlEncode(const char *source, char *dest, unsigned max);
int UrlEncodeUTF8(const char *source, char *dest, unsigned max);
// Encode all characters except alphas, numbers, and -_.!~*'()
int UrlEncodeWithoutEncodingSpaceAsPlus(const char *source, char *dest,
unsigned max);
int UrlEncodeWithoutEncodingSpaceAsPlusUTF8(const char *source, char *dest,
unsigned max);
// Encode only unsafe chars, including \ "^&`<>[]{}
// Also encode space as %20, instead of +
int UrlEncodeOnlyUnsafeChars(const char *source, char *dest, unsigned max);
int UrlEncodeOnlyUnsafeCharsUTF8(const char *source, char *dest, unsigned max);
std::string UrlDecodeString(const std::string & encoded);
std::string UrlDecodeStringUTF8(const std::string & encoded);
std::string UrlDecodeStringWithoutEncodingSpaceAsPlus(
const std::string & encoded);
std::string UrlDecodeStringWithoutEncodingSpaceAsPlusUTF8(
const std::string & encoded);
std::string UrlEncodeString(const std::string & decoded);
std::string UrlEncodeStringUTF8(const std::string & decoded);
std::string UrlEncodeStringWithoutEncodingSpaceAsPlus(
const std::string & decoded);
std::string UrlEncodeStringWithoutEncodingSpaceAsPlusUTF8(
const std::string & decoded);
std::string UrlEncodeStringForOnlyUnsafeChars(const std::string & decoded);
std::string UrlEncodeStringForOnlyUnsafeCharsUTF8(const std::string & decoded);
#endif
源文件:
#include "urlencode.h"
#include <assert.h>
#include <Windows.h>
//#define STACK_ARRAY(TYPE, LEN) static_cast<TYPE*>(::alloca((LEN)*sizeof(TYPE)))
// Do not assert in this function since it is used by the asssertion code!
std::wstring SysMultiByteToWide(const std::string& mb, unsigned int code_page) {
if (mb.empty())
return std::wstring();
int mb_length = static_cast<int>(mb.length());
// Compute the length of the buffer.
int charcount = MultiByteToWideChar(code_page, 0,
mb.data(), mb_length, NULL, 0);
if (charcount == 0)
return std::wstring();
std::wstring wide;
wide.resize(charcount);
MultiByteToWideChar(code_page, 0, mb.data(), mb_length, &wide[0], charcount);
return wide;
}
// Do not assert in this function since it is used by the asssertion code!
std::string SysWideToMultiByte(const std::wstring& wide, unsigned int code_page) {
int wide_length = static_cast<int>(wide.length());
if (wide_length == 0)
return std::string();
// Compute the length of the buffer we'll need.
int charcount = WideCharToMultiByte(code_page, 0, wide.data(), wide_length,
NULL, 0, NULL, NULL);
if (charcount == 0)
return std::string();
std::string mb;
mb.resize(charcount);
WideCharToMultiByte(code_page, 0, wide.data(), wide_length,
&mb[0], charcount, NULL, NULL);
return mb;
}
// Do not assert in this function since it is used by the asssertion code!
std::string SysWideToUTF8(const std::wstring& wide) {
return SysWideToMultiByte(wide, CP_UTF8);
}
// Do not assert in this function since it is used by the asssertion code!
std::wstring SysUTF8ToWide(const std::string& utf8) {
return SysMultiByteToWide(utf8, CP_UTF8);
}
std::string SysWideToNativeMB(const std::wstring& wide) {
return SysWideToMultiByte(wide, CP_ACP);
}
std::wstring SysNativeMBToWide(const std::string& native_mb) {
return SysMultiByteToWide(native_mb, CP_ACP);
}
static int HexPairValue(const char * code) {
int value = 0;
const char * pch = code;
for (;;) {
int digit = *pch++;
if (digit >= '0' && digit <= '9') {
value += digit - '0';
}
else if (digit >= 'A' && digit <= 'F') {
value += digit - 'A' + 10;
}
else if (digit >= 'a' && digit <= 'f') {
value += digit - 'a' + 10;
}
else {
return -1;
}
if (pch == code + 2)
return value;
value <<= 4;
}
}
int InternalUrlDecode(const char *source, char *dest, unsigned max,
bool encode_space_as_plus,
bool utf8){
if (max == 0) {
return 0;
}
char * start = dest;
while (static_cast<unsigned>(dest - start) < max && *source) {
switch (*source) {
case '+':
if (encode_space_as_plus) {
*(dest++) = ' ';
} else {
*dest++ = *source;
}
break;
case '%':
if (source[1] && source[2]) {
int value = HexPairValue(source + 1);
if (value >= 0) {
*(dest++) = value;
source += 2;
}
else {
*dest++ = '?';
}
}
else {
*dest++ = '?';
}
break;
default:
*dest++ = *source;
}
source++;
}
// assert(static_cast<unsigned int>(dest - start) < max);
*dest = 0;
int size = static_cast<int>(dest - start);
if (utf8 && size > 0) {
std::wstring wide = SysUTF8ToWide(start);
std::string native = SysWideToNativeMB(wide);
memcpy(start, native.c_str(), strlen(native.c_str()));
*(start + strlen(native.c_str())) = 0;
size = strlen(start);
}
return size;
}
int UrlDecode(const char *source, char *dest, unsigned max) {
return InternalUrlDecode(source, dest, max, true, false);
}
int UrlDecodeUTF8(const char *source, char *dest, unsigned max) {
return InternalUrlDecode(source, dest, max, true, true);
}
int UrlDecodeWithoutEncodingSpaceAsPlus(const char *source, char *dest, unsigned max) {
return InternalUrlDecode(source, dest, max, false, false);
}
int UrlDecodeWithoutEncodingSpaceAsPlusUTF8(const char *source, char *dest, unsigned max) {
return InternalUrlDecode(source, dest, max, false, true);
}
bool IsValidUrlChar(char ch, bool unsafe_only) {
if (unsafe_only) {
return !(ch <= ' ' || strchr("\\\"^&`<>[]{}", ch));
} else {
return isalnum((unsigned char)ch) || strchr("-_.!~*'()", ch);
}
}
int InternalUrlEncode(const char *source, char *dest, unsigned int max,
bool encode_space_as_plus, bool unsafe_only, bool utf8) {
static const char *digits = "0123456789ABCDEF";
if (max == 0) {
return 0;
}
std::string utf8_source;
if (utf8) {
std::wstring wide = SysNativeMBToWide(source);
utf8_source = SysWideToUTF8(wide);
source = utf8_source.c_str();
}
char *start = dest;
while (static_cast<unsigned>(dest - start) < max && *source) {
unsigned char ch = static_cast<unsigned char>(*source);
if (*source == ' ' && encode_space_as_plus && !unsafe_only) {
*dest++ = '+';
} else if (IsValidUrlChar(ch, unsafe_only)) {
*dest++ = *source;
} else {
if (static_cast<unsigned>(dest - start) + 4 > max) {
break;
}
*dest++ = '%';
*dest++ = digits[(ch >> 4) & 0x0F];
*dest++ = digits[ ch & 0x0F];
}
source++;
}
assert(static_cast<unsigned int>(dest - start) < max);
*dest = 0;
return static_cast<int>(dest - start);
}
int UrlEncode(const char *source, char *dest, unsigned max) {
return InternalUrlEncode(source, dest, max, true, false, false);
}
int UrlEncodeUTF8(const char *source, char *dest, unsigned max) {
return InternalUrlEncode(source, dest, max, true, false, true);
}
int UrlEncodeWithoutEncodingSpaceAsPlus(const char *source, char *dest,
unsigned max) {
return InternalUrlEncode(source, dest, max, false, false, false);
}
int UrlEncodeWithoutEncodingSpaceAsPlusUTF8(const char *source, char *dest,
unsigned max) {
return InternalUrlEncode(source, dest, max, false, false, true);
}
int UrlEncodeOnlyUnsafeChars(const char *source, char *dest, unsigned max) {
return InternalUrlEncode(source, dest, max, false, true, false);
}
int UrlEncodeOnlyUnsafeCharsUTF8(const char *source, char *dest, unsigned max) {
return InternalUrlEncode(source, dest, max, false, true, true);
}
std::string
InternalUrlDecodeString(const std::string & encoded,
bool encode_space_as_plus,
bool utf8) {
size_t needed_length = encoded.length() + 1;
//char* buf = STACK_ARRAY(char, needed_length);
std::string buf;
buf.resize(needed_length);
InternalUrlDecode(encoded.c_str(), (char*)buf.c_str(), needed_length, encode_space_as_plus, utf8);
return buf;
}
std::string
UrlDecodeString(const std::string & encoded) {
return InternalUrlDecodeString(encoded, true, false);
}
std::string
UrlDecodeStringUTF8(const std::string & encoded) {
return InternalUrlDecodeString(encoded, true, true);
}
std::string
UrlDecodeStringWithoutEncodingSpaceAsPlus(const std::string & encoded) {
return InternalUrlDecodeString(encoded, false, false);
}
std::string
UrlDecodeStringWithoutEncodingSpaceAsPlusUTF8(const std::string & encoded) {
return InternalUrlDecodeString(encoded, false, true);
}
std::string
InternalUrlEncodeString(const std::string & decoded,
bool encode_space_as_plus,
bool unsafe_only,
bool utf8) {
int needed_length = static_cast<int>(decoded.length()) * 3 + 1;
//char* buf = STACK_ARRAY(char, needed_length);
std::string buf;
buf.resize(needed_length);
InternalUrlEncode(decoded.c_str(), (char*)buf.c_str(), needed_length,
encode_space_as_plus, unsafe_only, utf8);
return buf;
}
std::string
UrlEncodeString(const std::string & decoded) {
return InternalUrlEncodeString(decoded, true, false, false);
}
std::string
UrlEncodeStringUTF8(const std::string & decoded) {
return InternalUrlEncodeString(decoded, true, false, true);
}
std::string
UrlEncodeStringWithoutEncodingSpaceAsPlus(const std::string & decoded) {
return InternalUrlEncodeString(decoded, false, false, false);
}
std::string
UrlEncodeStringWithoutEncodingSpaceAsPlusUTF8(const std::string & decoded) {
return InternalUrlEncodeString(decoded, false, false, true);
}
std::string
UrlEncodeStringForOnlyUnsafeChars(const std::string & decoded) {
return InternalUrlEncodeString(decoded, false, true, false);
}
std::string
UrlEncodeStringForOnlyUnsafeCharsUTF8(const std::string & decoded) {
return InternalUrlEncodeString(decoded, false, true, true);
}
测试源码:
#include "urlencode.h"
#define ARRAY_SIZE(x) (static_cast<int>(sizeof(x) / sizeof(x[0])))
void SourceTooLong() {
char source[] = "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^";
char dest[1];
int i =UrlEncode(source, dest, ARRAY_SIZE(dest));
// ASSERT_EQ('\0', dest[0]);
dest[0] = 'a';
i = UrlEncode(source, dest, 0);
//ASSERT_EQ('a', dest[0]);
}
void OneCharacterConversion() {
char source[] = "^";
char dest[4];
int i = UrlEncode(source, dest, ARRAY_SIZE(dest));//3
//ASSERT_STREQ("%5E", dest);
}
void ShortDestinationNoEncoding() {
// In this case we have a destination that would not be
// big enough to hold an encoding but is big enough to
// hold the text given.
char source[] = "aa";
char dest[3];
int i = UrlEncode(source, dest, ARRAY_SIZE(dest)); //2
//ASSERT_STREQ("aa", dest);
}
void ShortDestinationEncoding() {
// In this case we have a destination that is not
// big enough to hold the encoding.
char source[] = "&";
char dest[3];
int i = UrlEncode(source, dest, ARRAY_SIZE(dest)); //0
//ASSERT_EQ('\0', dest[0]);
}
void Encoding1() {
char* source = "[session=]#中国#china sad_sad@163.com";
char dest[180];
int i = UrlEncodeUTF8(source, dest, ARRAY_SIZE(dest)); //5
std::string dests = UrlEncodeStringWithoutEncodingSpaceAsPlusUTF8(source);
//ASSERT_STREQ("A%5E+", dest);
}
void Encoding2() {
char source[] = "A^ ";
char dest[8];
int i = UrlEncodeWithoutEncodingSpaceAsPlus(source, dest,
ARRAY_SIZE(dest)); //i
//ASSERT_STREQ("A%5E%20", dest);
}
void Decoding1() {
char source[] = "ed2k://|file|%E5%B0%8F%E5%8E%9F%E5%88%AB%E5%93%AD.Nakuna.Hara-chan.Ep06.Chi_Jap.HDTVrip.1024X576-YYeTs%E4%BA%BA%E4%BA%BA%E5%BD%B1%E8%A7%86.mkv|575452738|a9be9c1cf178805985002437b5782548|h=iqipca7ddjb2osas7s26u5in5ejooaae|/";
char dest[800];
int i = UrlDecodeUTF8(source, dest, ARRAY_SIZE(dest)); //3
std::string dests = UrlDecodeStringUTF8(source);
//ASSERT_STREQ("A^ ", dest);
}
void Decoding2() {
char source[] = "A%5E+";
char dest[8];
int i = UrlDecodeWithoutEncodingSpaceAsPlus(source, dest, ARRAY_SIZE(dest));//3
// ASSERT_STREQ("A^+", dest);
}
char s1[] = "女孩";
char s2[] = "BBC.单口喜剧的艺术.第二集.BBC.Imagine.2011.The.Art.of.Stand-up.Chi_Eng.HR-HDTV.AAC.1024X576.x264-YYeTs人人影视.mkv";
char s3[] = "%5B%C9%ED%D4%DA%C6%E4%D6%D0%5D.Inside.2007.x264.720P.DTS.BDRiP-CHD.mkv";
char s4[] = "璀︾晫閲戠.Golden.Boy.S01E01.HDTVrip.x264.鍙岃瀛楀箷-娣卞奖瀛楀箷缁mp4";
char s5[] = "閭伓宕囨嫓.Cult.S01E01.Chi_Eng.HDTVrip.1024X576-YYeTs浜轰汉褰辫.mkv";
int main() {
std::string st1 = UrlEncodeStringWithoutEncodingSpaceAsPlusUTF8(s1);
//bool f = IsStringASCII(st1);
std::string st2 = UrlDecodeString(s1);
//f = IsStringASCII(st2);
std::string st3 = UrlDecodeString(s3);
//f = IsStringASCII(st3);
//SourceTooLong();
//OneCharacterConversion();
//ShortDestinationNoEncoding();
//ShortDestinationEncoding();
//Encoding1();
//Encoding2();
//Decoding1();
//Decoding2();
}
浙公网安备 33010602011771号