00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00034 #include "blocxx/IConv.hpp"
00035
00036 #ifdef BLOCXX_HAVE_ICONV_H
00037 #include "blocxx/Assertion.hpp"
00038 #include "blocxx/Format.hpp"
00039 #include "blocxx/Exec.hpp"
00040
00041 #include <cwchar>
00042 #include <cwctype>
00043
00044 #include <errno.h>
00045
00046 namespace BLOCXX_NAMESPACE
00047 {
00048
00049
00050 IConv_t::IConv_t()
00051 : m_iconv(iconv_t(-1))
00052 {
00053 }
00054
00055
00056
00057 IConv_t::IConv_t(const String &fromEncoding, const String &toEncoding)
00058 {
00059 m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
00060 if( m_iconv == iconv_t(-1))
00061 {
00062 BLOCXX_THROW(StringConversionException,
00063 Format("Unable to convert from \"%1\" to \"%2\"",
00064 fromEncoding, toEncoding).c_str());
00065 }
00066 }
00067
00068
00069
00070 IConv_t::~IConv_t()
00071 {
00072 close();
00073 }
00074
00075
00076
00077 bool
00078 IConv_t::open(const String &fromEncoding, const String &toEncoding)
00079 {
00080 close();
00081 m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
00082 return ( m_iconv != iconv_t(-1));
00083 }
00084
00085
00086
00087 size_t
00088 IConv_t::convert(char **istr, size_t *ibytesleft,
00089 char **ostr, size_t *obytesleft)
00090 {
00091 return ::iconv(m_iconv, istr, ibytesleft, ostr, obytesleft);
00092 }
00093
00094
00095
00096 bool
00097 IConv_t::close()
00098 {
00099 bool ret = true;
00100 int err = errno;
00101
00102 if( m_iconv != iconv_t(-1))
00103 {
00104 if( ::iconv_close(m_iconv) == -1)
00105 ret = false;
00106 m_iconv = iconv_t(-1);
00107 }
00108
00109 errno = err;
00110 return ret;
00111 }
00112
00113
00114
00115 namespace IConv
00116 {
00117
00118
00119 static inline void
00120 mayThrowStringConversionException()
00121 {
00122 switch( errno)
00123 {
00124 case E2BIG:
00125 break;
00126
00127 case EILSEQ:
00128 BLOCXX_THROW(StringConversionException,
00129 "Invalid character or multibyte sequence in the input");
00130 break;
00131
00132 case EINVAL:
00133 default:
00134 BLOCXX_THROW(StringConversionException,
00135 "Incomplete multibyte sequence in the input");
00136 break;
00137 }
00138 }
00139
00140
00141 String
00142 fromByteString(const String &enc, const char *str, size_t len)
00143 {
00144 if( !str || len == 0)
00145 return String();
00146
00147 IConv_t iconv(enc, "UTF-8");
00148 String out;
00149 char obuf[4097];
00150 char *optr;
00151 size_t olen;
00152
00153 char *sptr = (char *)str;
00154 size_t slen = len;
00155
00156 while( slen > 0)
00157 {
00158 obuf[0] = '\0';
00159 optr = (char *)obuf;
00160 olen = sizeof(obuf) - sizeof(obuf[0]);
00161
00162 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00163 if( ret == size_t(-1))
00164 {
00165 mayThrowStringConversionException();
00166 }
00167 *optr = '\0';
00168 out += obuf;
00169 }
00170
00171 return out;
00172 }
00173
00174
00175
00176 String
00177 fromByteString(const String &enc, const std::string &str)
00178 {
00179 return fromByteString(enc, str.c_str(), str.length());
00180 }
00181
00182
00183
00184 String
00185 fromWideString(const String &enc, const std::wstring &str)
00186 {
00187 if( str.empty())
00188 return String();
00189
00190 IConv_t iconv(enc, "UTF-8");
00191 String out;
00192 char obuf[4097];
00193 char *optr;
00194 size_t olen;
00195
00196 char *sptr = (char *)str.c_str();
00197 size_t slen = str.length() * sizeof(wchar_t);
00198
00199 while( slen > 0)
00200 {
00201 obuf[0] = '\0';
00202 optr = (char *)obuf;
00203 olen = sizeof(obuf) - sizeof(obuf[0]);
00204
00205 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00206 if( ret == size_t(-1))
00207 {
00208 mayThrowStringConversionException();
00209 }
00210 *optr = '\0';
00211 out += obuf;
00212 }
00213
00214 return out;
00215 }
00216
00217
00218 std::string
00219 toByteString(const String &enc, const String &utf8)
00220 {
00221 if( utf8.empty())
00222 return std::string();
00223
00224 IConv_t iconv("UTF-8", enc);
00225 std::string out;
00226 char obuf[4097];
00227 char *optr;
00228 size_t olen;
00229
00230 char *sptr = (char *)utf8.c_str();
00231 size_t slen = utf8.length();
00232
00233 while( slen > 0)
00234 {
00235 obuf[0] = '\0';
00236 optr = (char *)obuf;
00237 olen = sizeof(obuf) - sizeof(obuf[0]);
00238
00239 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00240 if( ret == size_t(-1))
00241 {
00242 mayThrowStringConversionException();
00243 }
00244 *optr = '\0';
00245 out += obuf;
00246 }
00247
00248 return out;
00249 }
00250
00251
00252 std::wstring
00253 toWideString(const String &enc, const String &utf8)
00254 {
00255 if( utf8.empty())
00256 return std::wstring();
00257
00258 IConv_t iconv("UTF-8", enc);
00259 std::wstring out;
00260 wchar_t obuf[1025];
00261 char *optr;
00262 size_t olen;
00263
00264 char *sptr = (char *)utf8.c_str();
00265 size_t slen = utf8.length();
00266
00267 while( slen > 0)
00268 {
00269 obuf[0] = L'\0';
00270 optr = (char *)obuf;
00271 olen = sizeof(obuf) - sizeof(obuf[0]);
00272
00273 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00274 if( ret == size_t(-1))
00275 {
00276 mayThrowStringConversionException();
00277 }
00278 *((wchar_t *)optr) = L'\0';
00279 out += obuf;
00280 }
00281
00282 return out;
00283 }
00284
00285
00286 #if 0
00287
00288 StringArray
00289 encodings()
00290 {
00291 StringArray command;
00292 String output;
00293 int status = -1;
00294
00295 command.push_back("/usr/bin/iconv");
00296 command.push_back("--list");
00297
00298 try
00299 {
00300 Exec::executeProcessAndGatherOutput(command, output, status);
00301 }
00302 catch(...)
00303 {
00304 }
00305
00306 if(status == 0)
00307 {
00308 return output.tokenize("\r\n");
00309 }
00310 return StringArray();
00311 }
00312 #endif
00313
00314
00315 }
00316 }
00317
00318 #endif // BLOCXX_HAVE_ICONV_H
00319
00320
00321