1 #if (defined _WIN32 || defined _WIN64)
2 # include <windows.h>
3 # include <stdio.h>
4 # include <ctype.h>
5 #elif defined(__linux__)
6 # include <iconv.h>
7 # include <wctype.h>
8 # include <wchar.h>
9 # include <errno.h>
10 #endif
11
12 using namespace std;
13
14 //代码页
15 #define CP_GBK 936
16 #define CP_UTF8 65001
17
18 std::wstring s2ws(const std::string str, int code_page);
19 std::string ws2s(const std::wstring wstr, int code_page);
20
21 //默认的输出字符串字节长度
22 //经测试发现OUT_LEN = 10 每次可转3个汉字
23 const int OUT_LEN = 200;
24
25 /** @fn wstring s2ws(const string str, int code_page)
26 * @brief 从多字节字符串转为宽字符串
27 * @param str 源字符串
28 * @param code_page 要使用的代码页
29 * @return 成功返回宽字符串,失败返回空字符串
30 */
31 wstring s2ws(const string str, int code_page)
32 {
33 wstring wstr_dest;
34 if (str.size() == 0)
35 {
36 return wstr_dest;
37 }
38 wchar_t* wcs = NULL;
39 #ifdef _MSC_VER
40 //要转换的多字节字符串
41 int size = MultiByteToWideChar(code_page, 0, str.c_str(), -1, NULL, 0);
42 wcs = new(std::nothrow)wchar_t[size];
43 if (wcs == NULL)
44 {
45 return wstr_dest;
46 }
47 if (MultiByteToWideChar(code_page, 0, str.c_str(), -1, wcs, size) == 0)
48 {
49 wstr_dest.clear();
50 }
51 else
52 {
53 wstr_dest += wcs;
54 }
55 delete[] wcs;
56
57 #elif defined __linux
58 //申请临时缓冲区,用于保存转换后的字符串
59 wcs = new(std::nothrow)wchar_t[OUT_LEN];
60 if (wcs == NULL)
61 {
62 return wstr_dest;
63 }
64 iconv_t handle = (void*)-1;
65 switch (code_page)
66 {
67 case CP_GBK:
68 handle = iconv_open("UCS-4", "GBK");
69 break;
70 case CP_UTF8:
71 handle = iconv_open("UCS-4", "UTF-8");
72 break;
73 default:
74 //不支持
75 break;
76 }
77 if (handle == (void*)-1)
78 {
79 delete[] wcs;
80 return wstr_dest;
81 }
82
83 size_t nsrc = str.size()*sizeof(char);
84 char* src = (char*)str.c_str();
85 wchar_t* tmp = wcs;
86 size_t ndst = OUT_LEN * sizeof(wchar_t);
87 //需多次转换,直到转换完毕
88 while (nsrc>0)
89 {
90 memset(wcs, 0, OUT_LEN*sizeof(wchar_t));
91 tmp = wcs;
92 ndst = OUT_LEN * sizeof(wchar_t);
93 if (iconv(handle, (char**)&src, &nsrc, (char**)&tmp, &ndst) ==(size_t)-1 && errno != E2BIG)
94 {
95 wstr_dest.clear();
96 break;
97 }
98 wstr_dest += wstring(wcs, OUT_LEN - ndst/sizeof(wchar_t));
99 }
100 iconv_close(handle);
101 //释放临时缓冲区
102 delete[] wcs;
103
104 #endif
105 return wstr_dest;
106 }
107
108 /** @fn string ws2s(const wstring wstr, int code_page)
109 * @brief 从宽字符串转为多字节字符串
110 * @param wstr 源字符串
111 * @param code_page 要使用的代码页
112 * @return 成功返回多字节字符串,失败返回空字符串
113 */
114 string ws2s(const wstring wstr, int code_page)
115 {
116 string str_dest;
117 if (wstr.size() == 0)
118 {
119 return str_dest;
120 }
121 char *mbs = NULL;
122 #ifdef _MSC_VER
123 int size = WideCharToMultiByte(code_page, 0, wstr.c_str(), -1, NULL, 0, NULL, NULL);
124 mbs = new(std::nothrow) char[size];
125 if (NULL == mbs)
126 {
127 return str_dest;
128 }
129 if (0 == WideCharToMultiByte(code_page, 0, wstr.c_str(), -1, mbs, size, NULL, NULL))
130 {
131 str_dest.clear();
132 }
133 else
134 {
135 str_dest += mbs;
136 }
137 delete[] mbs;
138 #elif defined __linux
139 //申请临时缓冲区,用于保存转换后的字符串
140 mbs = new(std::nothrow)char[OUT_LEN];
141 if (NULL == mbs)
142 {
143 return str_dest;
144 }
145 iconv_t handle = (void*)-1;
146 switch (code_page)
147 {
148 case CP_GBK:
149 handle = iconv_open("GBK", "UCS-4");
150 break;
151 case CP_UTF8:
152 handle = iconv_open("UTF-8", "UCS-4");
153 break;
154 default:
155 //不支持
156 break;
157 }
158 if (handle == (void*)-1)
159 {
160 delete[] mbs;
161 return str_dest;
162 }
163
164 size_t nsrc = wstr.size() * sizeof(wchar_t);
165 wchar_t* src = (wchar_t*)wstr.c_str();
166 char* tmp = NULL;
167 size_t ndst = OUT_LEN;
168 //需多次转换,直到转换完毕
169 while (nsrc>0)
170 {
171 memset(mbs, 0, OUT_LEN);
172 tmp = mbs;
173 ndst = OUT_LEN;
174 if (iconv(handle, (char**)&src, &nsrc, (char**)&tmp, &ndst) ==(size_t)-1 && errno != E2BIG)
175 {
176 str_dest.clear();
177 break;
178 }
179 str_dest += string(mbs, OUT_LEN - ndst);
180 }
181 iconv_close(handle);
182 //释放临时缓冲区
183 delete[] mbs;
184
185 #endif
186 return str_dest;
187 }
188
189 /** @fn string utf82gbk(const string str_utf8)
190 * @brief 从UTF-8字符串转为GBK字符串
191 * @param str_utf8 源字符串
192 * @return 成功返回GBK字符串,失败返回空字符串
193 */
194 string utf82gbk(const string str_utf8)
195 {
196 string str_gbk;
197 #ifdef _MSC_VER
198 wstring wstr = s2ws(str_utf8, CP_UTF8);
199 str_gbk = ws2s(wstr, CP_GBK);
200 #elif defined __linux
201 //申请临时缓冲区,用于保存转换后的字符串
202 char* gbk = new(std::nothrow)char[OUT_LEN];
203 if (NULL == gbk)
204 {
205 return str_gbk;
206 }
207 iconv_t handle = iconv_open("GBK", "UTF-8");
208 if (handle == (void*)-1)
209 {
210 delete[] gbk;
211 return str_gbk;
212 }
213 size_t nsrc = str_utf8.size();
214 char* src = (char*)str_utf8.c_str();
215 char* tmp = NULL;
216 size_t ndst = OUT_LEN;
217 //需多次转换,直到转换完毕
218 while (nsrc > 0)
219 {
220 memset(gbk, 0, OUT_LEN);
221 tmp = gbk;
222 ndst = OUT_LEN;
223 if (iconv(handle, (char**)&src, &nsrc, (char**)&tmp, &ndst) ==(size_t)-1 && errno != E2BIG)
224 {
225 str_gbk.clear();
226 break;
227 }
228 str_gbk += string(gbk, OUT_LEN - ndst);
229 }
230 iconv_close(handle);
231 //释放临时缓冲区
232 delete[] gbk;
233 #endif
234 return str_gbk;
235 }
236
237 /** @fn string gbk2utf8(const string str_gbk)
238 * @brief 从GBK字符串转为UTF-8字符串
239 * @param str_gbk 源字符串指针
240 * @return 成功返回UTF-8字符串,失败返回空字符串
241 */
242 string gbk2utf8(const string str_gbk)
243 {
244 string str_utf8;
245 #ifdef _MSC_VER
246 wstring wstr = s2ws(str_gbk, CP_GBK);
247 str_utf8 = ws2s(wstr, CP_UTF8);
248 #elif defined __linux
249 //申请临时缓冲区,用于保存转换后的字符串
250 char* utf8 = new(std::nothrow)char[OUT_LEN];
251 if (NULL == utf8)
252 {
253 return str_utf8;
254 }
255 iconv_t handle = iconv_open("UTF-8", "GBK");
256 if (handle == (void*)-1)
257 {
258 delete[] utf8;
259 return str_utf8;
260 }
261 size_t nsrc = str_gbk.size();
262 char* src = (char*)str_gbk.c_str();
263 char* tmp = NULL;
264 size_t ndst = OUT_LEN;
265 //需多次转换,直到转换完毕
266 while (nsrc > 0)
267 {
268 memset(utf8, 0, OUT_LEN);
269 tmp = utf8;
270 ndst = OUT_LEN;
271 if (iconv(handle, (char**)&src, &nsrc, (char**)&tmp, &ndst) ==(size_t)-1 && errno != E2BIG)
272 {
273 str_utf8.clear();
274 break;
275 }
276 str_utf8 += string(utf8, OUT_LEN - ndst);
277 }
278 iconv_close(handle);
279 //释放临时缓冲区
280 delete[] utf8;
281 #endif
282 return str_utf8;
283 }
284
285
286 //wchar_t转成UTF-8
287 int Wchar2Utf8Convert( const wchar_t* a_szSrc, char* a_szDest, int a_nDestSize )
288 {
289 #if (defined _WIN32 || defined _WIN64)
290 return WideCharToMultiByte( CP_UTF8, 0, a_szSrc, -1, a_szDest, a_nDestSize, NULL, NULL );
291 #elif defined(__linux__)
292 size_t result;
293 size_t srcSize = (wcslen(a_szSrc)+1)*sizeof(wchar_t);
294 iconv_t env;
295 env = iconv_open("UTF-8","WCHAR_T");
296 if (env==(iconv_t)-1)
297 {
298 //printf("iconv_open WCHAR_T->UTF8 error%s %d/n",strerror(errno),errno) ;
299 return 0;
300 }
301 size_t buf_count = a_nDestSize;
302 result = iconv(env,(char**)&a_szSrc,(size_t*)&srcSize,(char**)&a_szDest,(size_t*)&buf_count);
303 if (result==(size_t)-1)
304 {
305 //printf("iconv WCHAR_T->UTF8 error %d/n",errno) ;
306 return 0;
307 }
308 iconv_close(env);
309 return (int)result;
310 #endif
311 }
312
313 //UTF-8转成wchar_t
314 int Utf82WcharConvert( const char* a_szSrc, wchar_t* a_szDest, int a_nDestSize )
315 {
316 #if (defined _WIN32 || defined _WIN64)
317 return MultiByteToWideChar( CP_UTF8, 0, a_szSrc, -1, a_szDest, a_nDestSize );
318 #elif defined(__linux__)
319 size_t result;
320 iconv_t env;
321 size_t size = strlen(a_szSrc)+1 ;
322 env = iconv_open("WCHAR_T","UTF-8");
323 if (env==(iconv_t)-1)
324 {
325 //printf("iconv_open UTF8->WCHAR_T error %d/n",errno) ;
326 return 0;
327 }
328 size_t buf_count = a_nDestSize*sizeof(wchar_t);
329 result = iconv(env,(char**)&a_szSrc,(size_t*)&size,(char**)&a_szDest,(size_t*)&buf_count);
330 if (result==(size_t)-1)
331 {
332 //printf("iconv UTF8->WCHAR_T error %d/n",errno) ;
333 return 0;
334 }
335 iconv_close(env);
336 return (int)result;
337
338 #endif
339 }