Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

IConv.cpp

Go to the documentation of this file.
00001 /*******************************************************************************
00002 * Copyright (C) 2005 Novell, Inc. All rights reserved.
00003 *
00004 * Redistribution and use in source and binary forms, with or without
00005 * modification, are permitted provided that the following conditions are met:
00006 *
00007 *  - Redistributions of source code must retain the above copyright notice,
00008 *    this list of conditions and the following disclaimer.
00009 *
00010 *  - Redistributions in binary form must reproduce the above copyright notice,
00011 *    this list of conditions and the following disclaimer in the documentation
00012 *    and/or other materials provided with the distribution.
00013 *
00014 *  - Neither the name of Novell, Inc., nor the names of its
00015 *    contributors may be used to endorse or promote products derived from this
00016 *    software without specific prior written permission.
00017 *
00018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
00019 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 * ARE DISCLAIMED. IN NO EVENT SHALL Novell, Inc., OR THE 
00022 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
00023 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
00024 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
00025 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
00026 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
00027 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
00028 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 *******************************************************************************/
00034 #include "blocxx/IConv.hpp"
00035 
00036 #ifdef BLOCXX_HAVE_ICONV_H
00037 #include "blocxx/Assertion.hpp"
00038 #include "blocxx/Format.hpp"
00039 #include "blocxx/Exec.hpp"
00040 
00041 #include <cwchar>
00042 #include <cwctype>
00043 
00044 #include <errno.h>
00045 
00046 namespace BLOCXX_NAMESPACE
00047 {
00048 
00049 // -------------------------------------------------------------------
00050 IConv_t::IConv_t()
00051    : m_iconv(iconv_t(-1))
00052 {
00053 }
00054 
00055 
00056 // -------------------------------------------------------------------
00057 IConv_t::IConv_t(const String &fromEncoding, const String &toEncoding)
00058 {
00059    m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
00060    if( m_iconv == iconv_t(-1))
00061    {
00062       BLOCXX_THROW(StringConversionException,
00063                    Format("Unable to convert from \"%1\" to \"%2\"",
00064                           fromEncoding, toEncoding).c_str());
00065    }
00066 }
00067 
00068 
00069 // -------------------------------------------------------------------
00070 IConv_t::~IConv_t()
00071 {
00072    close();
00073 }
00074 
00075 
00076 // -------------------------------------------------------------------
00077 bool
00078 IConv_t::open(const String &fromEncoding, const String &toEncoding)
00079 {
00080    close();
00081    m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
00082    return ( m_iconv != iconv_t(-1));
00083 }
00084 
00085 
00086 // -------------------------------------------------------------------
00087 size_t
00088 IConv_t::convert(char **istr, size_t *ibytesleft,
00089                char **ostr, size_t *obytesleft)
00090 {
00091    return ::iconv(m_iconv, istr, ibytesleft, ostr, obytesleft);
00092 }
00093 
00094 
00095 // -------------------------------------------------------------------
00096 bool
00097 IConv_t::close()
00098 {
00099    bool ret = true;
00100    int  err = errno;
00101 
00102    if( m_iconv != iconv_t(-1))
00103    {
00104       if( ::iconv_close(m_iconv) == -1)
00105          ret = false;
00106       m_iconv = iconv_t(-1);
00107    }
00108 
00109    errno = err;
00110    return ret;
00111 }
00112 
00113 
00114 // *******************************************************************
00115 namespace IConv
00116 {
00117 
00118 // -------------------------------------------------------------------
00119 static inline void
00120 mayThrowStringConversionException()
00121 {
00122    switch( errno)
00123    {
00124       case E2BIG:
00125       break;
00126 
00127       case EILSEQ:
00128          BLOCXX_THROW(StringConversionException,
00129          "Invalid character or multibyte sequence in the input");
00130       break;
00131 
00132       case EINVAL:
00133       default:
00134          BLOCXX_THROW(StringConversionException,
00135          "Incomplete multibyte sequence in the input");
00136       break;
00137    }
00138 }
00139 
00140 // -------------------------------------------------------------------
00141 String
00142 fromByteString(const String &enc, const char *str, size_t len)
00143 {
00144    if( !str || len == 0)
00145       return String();
00146 
00147    IConv_t      iconv(enc, "UTF-8"); // throws error
00148    String       out;
00149    char         obuf[4097];
00150    char        *optr;
00151    size_t       olen;
00152 
00153    char        *sptr = (char *)str;
00154    size_t       slen = len;
00155 
00156    while( slen > 0)
00157    {
00158       obuf[0] = '\0';
00159       optr = (char *)obuf;
00160       olen = sizeof(obuf) - sizeof(obuf[0]);
00161 
00162       size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00163       if( ret == size_t(-1))
00164       {
00165          mayThrowStringConversionException();
00166       }
00167       *optr = '\0';
00168       out  += obuf;
00169    }
00170 
00171    return out;
00172 }
00173 
00174 
00175 // -------------------------------------------------------------------
00176 String
00177 fromByteString(const String &enc, const std::string  &str)
00178 {
00179    return fromByteString(enc, str.c_str(), str.length());
00180 }
00181 
00182 
00183 // -------------------------------------------------------------------
00184 String
00185 fromWideString(const String &enc, const std::wstring &str)
00186 {
00187    if( str.empty())
00188       return String();
00189 
00190    IConv_t      iconv(enc, "UTF-8"); // throws error
00191    String       out;
00192    char         obuf[4097];
00193    char        *optr;
00194    size_t       olen;
00195 
00196    char        *sptr = (char *)str.c_str();
00197    size_t       slen = str.length() * sizeof(wchar_t);
00198 
00199    while( slen > 0)
00200    {
00201       obuf[0] = '\0';
00202       optr = (char *)obuf;
00203       olen = sizeof(obuf) - sizeof(obuf[0]);
00204 
00205       size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00206       if( ret == size_t(-1))
00207       {
00208          mayThrowStringConversionException();
00209       }
00210       *optr = '\0';
00211       out  += obuf;
00212    }
00213 
00214    return out;
00215 }
00216 
00217 // -------------------------------------------------------------------
00218 std::string
00219 toByteString(const String &enc, const String &utf8)
00220 {
00221    if( utf8.empty())
00222       return std::string();
00223 
00224    IConv_t      iconv("UTF-8", enc); // throws error
00225    std::string  out;
00226    char         obuf[4097];
00227    char        *optr;
00228    size_t       olen;
00229 
00230    char        *sptr = (char *)utf8.c_str();
00231    size_t       slen = utf8.length();
00232 
00233    while( slen > 0)
00234    {
00235       obuf[0] = '\0';
00236       optr = (char *)obuf;
00237       olen = sizeof(obuf) - sizeof(obuf[0]);
00238 
00239       size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00240       if( ret == size_t(-1))
00241       {
00242          mayThrowStringConversionException();
00243       }
00244       *optr = '\0';
00245       out  += obuf;
00246    }
00247 
00248    return out;
00249 }
00250 
00251 // -------------------------------------------------------------------
00252 std::wstring
00253 toWideString(const String &enc, const String &utf8)
00254 {
00255    if( utf8.empty())
00256       return std::wstring();
00257 
00258    IConv_t      iconv("UTF-8", enc); // throws error
00259    std::wstring out;
00260    wchar_t      obuf[1025];
00261    char        *optr;
00262    size_t       olen;
00263 
00264    char        *sptr = (char *)utf8.c_str();
00265    size_t       slen = utf8.length();
00266 
00267    while( slen > 0)
00268    {
00269       obuf[0] = L'\0';
00270       optr = (char *)obuf;
00271       olen = sizeof(obuf) - sizeof(obuf[0]);
00272 
00273       size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00274       if( ret == size_t(-1))
00275       {
00276          mayThrowStringConversionException();
00277       }
00278       *((wchar_t *)optr) = L'\0';
00279       out += obuf;
00280    }
00281 
00282    return out;
00283 }
00284 
00285 
00286 #if 0
00287 // -------------------------------------------------------------------
00288 StringArray
00289 encodings()
00290 {
00291    StringArray   command;
00292    String        output;
00293    int           status = -1;
00294 
00295    command.push_back("/usr/bin/iconv");
00296    command.push_back("--list");
00297 
00298    try
00299    {
00300       Exec::executeProcessAndGatherOutput(command, output, status);
00301    }
00302    catch(...)
00303    {
00304    }
00305 
00306    if(status == 0)
00307    {
00308       return output.tokenize("\r\n");
00309    }
00310    return StringArray();
00311 }
00312 #endif
00313 
00314 
00315 }  // End of IConv namespace
00316 }  // End of BLOCXX_NAMESPACE
00317 
00318 #endif // BLOCXX_HAVE_ICONV_H
00319 
00320 /* vim: set ts=8 sts=8 sw=8 ai noet: */
00321 

Generated on Mon Sep 12 23:56:34 2005 for blocxx by  doxygen 1.4.4