charconv.hpp
source: wtcpp/folder98/folder02/folder1/file08.md
#pragma once
#include <stdlib.h>
#include <string>
#ifdef _MSC_VER
#include <windows.h>
#else
#include <iconv.h>
#endif
class UTF8toChar
{
public :
UTF8toChar(const char *utf8_string)
{
init(utf8_string);
}
UTF8toChar(const std::string& utf8_string)
{
init(utf8_string.c_str());
}
void init(const char *utf8_string)
{
if (0 == utf8_string)
t_string = 0;
else if (0 == *utf8_string)
{
needFree = false;
t_string = ("");
}
else if ( isPureAscii(utf8_string))
{
needFree = false;
t_string = (char *)utf8_string;
}
else
{
// Either TCHAR = Unicode (2 bytes), or utf8_string contains non-ASCII characters.
// Needs conversion
needFree = true;
// Convert to Unicode (2 bytes)
std::size_t string_len = strlen(utf8_string);
std::size_t dst_len = string_len * 2 + 2;
#ifdef _MSC_VER
wchar_t *buffer = new wchar_t[string_len + 1];
MultiByteToWideChar(CP_UTF8, 0, utf8_string, -1, buffer, string_len + 1);
buffer[string_len] = 0;
t_string = new char[string_len * 2 + 2];
WideCharToMultiByte(CP_ACP, 0, buffer, -1, t_string, dst_len, 0, 0);
t_string[string_len * 2 + 1] = 0;
delete[] buffer;
#else
iconv_t cd;
t_string = new char[dst_len];
cd = iconv_open("gbk", "utf8");
if (cd != 0)
{
memset(t_string, 0, dst_len);
iconv(cd, (char**)&utf8_string, &string_len, &t_string, &dst_len);
iconv_close(cd);
t_string[dst_len] = '\0';
}
#endif
}
}
operator const char*()
{
return t_string;
}
const char* c_str()
{
return t_string;
}
~UTF8toChar()
{
if (needFree)
delete[] t_string;
}
private :
char *t_string;
bool needFree;
//
// helper utility to test if a string contains only ASCII characters
//
bool isPureAscii(const char *s)
{
while (*s != 0) { if (*(s++) & 0x80) return false; }
return true;
}
//disable assignment
UTF8toChar(const UTF8toChar &rhs);
UTF8toChar &operator=(const UTF8toChar &rhs);
};
class ChartoUTF8
{
public :
ChartoUTF8(const std::string& str)
{
init(str.c_str());
}
ChartoUTF8(const char *t_string)
{
init(t_string);
}
void init(const char *t_string)
{
if (0 == t_string)
utf8_string = 0;
else if (0 == *t_string)
{
utf8_string = "";
needFree = false;
}
else if (isPureAscii((char *)t_string))
{
utf8_string = (char *)t_string;
needFree = false;
}
else
{
needFree = true;
std::size_t string_len = strlen(t_string);
std::size_t dst_len = string_len * 3 + 1;
#ifdef _MSC_VER
// Convert to Unicode if not already in unicode.
wchar_t *w_string = new wchar_t[string_len + 1];
MultiByteToWideChar(CP_ACP, 0, t_string, -1, w_string, string_len + 1);
w_string[string_len] = 0;
// Convert from Unicode (2 bytes) to UTF8
utf8_string = new char[dst_len];
WideCharToMultiByte(CP_UTF8, 0, w_string, -1, utf8_string, dst_len, 0, 0);
utf8_string[string_len * 3] = 0;
if (w_string != (wchar_t *)t_string)
delete[] w_string;
#else
iconv_t cd;
utf8_string = new char[dst_len];
cd = iconv_open("utf8", "gbk");
if (cd != 0)
{
memset(utf8_string, 0, dst_len);
iconv(cd, (char**)&t_string, &string_len, &utf8_string, &dst_len);
iconv_close(cd);
utf8_string[dst_len] = '\0';
}
#endif
}
}
operator const char*()
{
return utf8_string;
}
const char* c_str() const
{
return utf8_string;
}
~ChartoUTF8()
{
if (needFree)
delete[] utf8_string;
}
private :
char *utf8_string;
bool needFree;
//
// helper utility to test if a string contains only ASCII characters
//
bool isPureAscii(const char *s)
{
while (*s != 0) { if (*(s++) & 0x80) return false; }
return true;
}
//disable assignment
ChartoUTF8(const ChartoUTF8 &rhs);
ChartoUTF8 &operator=(const ChartoUTF8 &rhs);
};
class URLEncode
{
public:
URLEncode(const char* src)
{
char hex[] = "0123456789ABCDEF";
for (unsigned int i = 0; i < strlen(src); ++i)
{
const char cc = src[i];
if (isPureAscii(&cc))
{
if (cc == ' ')
{
encoded_string += "%20";
}
else
encoded_string += cc;
}
else
{
unsigned char c = static_cast<unsigned char>(src[i]);
encoded_string += '%';
encoded_string += hex[c / 16];
encoded_string += hex[c % 16];
}
}
}
operator const char*(){return encoded_string.c_str();}
private:
bool isPureAscii(const char *s)
{
while (*s != 0) { if (*(s++) & 0x80) return false; }
return true;
}
private:
std::string encoded_string;
};
class URLDecode
{
public:
URLDecode(const char* src)
{
int hex = 0;
for (unsigned int i = 0; i < strlen(src); ++i)
{
switch (src[i])
{
case '+':
decoded_string += ' ';
break;
case '%':
if (isxdigit(src[i + 1]) && isxdigit(src[i + 2]))
{
std::string hexStr;
hexStr += src[i+1];
hexStr += src[i+2];
hex = strtol(hexStr.c_str(), 0, 16);
//字母和数字[0-9a-zA-Z]、一些特殊符号[$-_.+!*'(),] 、以及某些保留字[$&+,/:;=?@]
//可以不经过编码直接用于URL
if (!((hex >= 48 && hex <= 57) || //0-9
(hex >=97 && hex <= 122) || //a-z
(hex >=65 && hex <= 90) || //A-Z
//一些特殊符号及保留字[$-_.+!*'(),] [$&+,/:;=?@]
hex == 0x21 || hex == 0x24 || hex == 0x26 || hex == 0x27 || hex == 0x28 || hex == 0x29
|| hex == 0x2a || hex == 0x2b|| hex == 0x2c || hex == 0x2d || hex == 0x2e || hex == 0x2f
|| hex == 0x3A || hex == 0x3B|| hex == 0x3D || hex == 0x3f || hex == 0x40 || hex == 0x5f
))
{
decoded_string += char(hex);
i += 2;
}
else decoded_string += '%';
}else {
decoded_string += '%';
}
break;
default:
decoded_string += src[i];
break;
}
}
}
operator const char*(){return decoded_string.c_str();}
private:
std::string decoded_string;
};