kdecore Library API Documentation

kcharsets.cpp

00001 /* This file is part of the KDE libraries
00002     Copyright (C) 1999 Lars Knoll (knoll@kde.org)
00003 
00004     This library is free software; you can redistribute it and/or
00005     modify it under the terms of the GNU Library General Public
00006     License as published by the Free Software Foundation; either
00007     version 2 of the License, or (at your option) any later version.
00008 
00009     This library is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012     Library General Public License for more details.
00013 
00014     You should have received a copy of the GNU Library General Public License
00015     along with this library; see the file COPYING.LIB.  If not, write to
00016     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00017     Boston, MA 02111-1307, USA.
00018 */
00019 #include "kcharsets.h"
00020 
00021 #include "kentities.c"
00022 
00023 #include <kapplication.h>
00024 #include <kglobal.h>
00025 #include <klocale.h>
00026 #include <kconfig.h>
00027 
00028 #include <qfontinfo.h>
00029 #include <qstrlist.h>
00030 #include <qfontdatabase.h>
00031 #include <kdebug.h>
00032 
00033 #include <qtextcodec.h>
00034 #include <qmap.h>
00035 #include <qcstring.h>
00036 
00037 #include <assert.h>
00038 
00039 #define CHARSETS_COUNT 33
00040 
00041 static const char * const language_names[] = {
00042     I18N_NOOP( "Other" ),
00043     I18N_NOOP( "Arabic" ),
00044     I18N_NOOP( "Baltic" ),
00045     I18N_NOOP( "Central European" ),
00046     I18N_NOOP( "Chinese Simplified" ),
00047     I18N_NOOP( "Chinese Traditional" ),
00048     I18N_NOOP( "Cyrillic" ),
00049     I18N_NOOP( "Greek" ),
00050     I18N_NOOP( "Hebrew" ),
00051     I18N_NOOP( "Japanese" ),
00052     I18N_NOOP( "Korean" ),
00053     I18N_NOOP( "Thai" ),
00054     I18N_NOOP( "Turkish" ),
00055     I18N_NOOP( "Western European" ),
00056     I18N_NOOP( "Tamil" ),
00057     I18N_NOOP( "Unicode" ),
00058     I18N_NOOP( "Northern Saami" )
00059 };
00060 
00061 // this list gives the charsets that can be used to display a file given in a certain encoding.
00062 // the list should be in order of preference
00063 // left side is the name returned by the codec used, right side the name of the charset as
00064 // used in kcharsets.cpp
00065 // 'unicode' will always be chosen as last resort, so it only needs to be added to the list,
00066 // if it should get a higher priority
00067 // every line must end with 0
00068 
00069 static const char* const charsets_for_encoding[] = {
00070     "koi8-r",                "koi8-r","cp 1251","koi8-u","iso-8859-5", 0,
00071     "koi8-u",                "koi8-u","cp 1251","iso-8859-5","koi8-r", 0,
00072     "iso 8859-1",            "iso8859-1","iso8859-15", 0,
00073     "iso 8859-2",            "iso8859-2","unicode","iso8859-1", 0,
00074     "iso 8859-3",            "iso8859-3","unicode","iso8859-1", 0,
00075     "iso 8859-4",            "iso8859-4","unicode","iso8859-13", "iso8859-1", 0,
00076     "iso 8859-5",            "iso8859-5","koi8-u","koi8-r", 0,
00077     "iso 8859-6",            "unicode","iso8859-6", 0,
00078     "iso 8859-7",            "iso8859-7", 0,
00079     "iso 8859-8",            "iso8859-8", 0,
00080     "iso 8859-8-i",          "iso8859-8", 0,
00081     "iso 8859-9",            "iso8859-9","unicode","iso8859-1", 0,
00082     "iso 8859-11",           "iso8859-11", 0,
00083     "iso 8859-13",           "iso8859-13","unicode","iso8859-4", "iso8859-1", 0,
00084     "iso 8859-15",           "iso8859-15","unicode","iso8859-1", 0,
00085     "utf8",                  "unicode","iso8859-1", 0,
00086     "utf16",                 "unicode","iso8859-1", 0,
00087     "iso-10646-ucs-2",       "unicode","iso8859-1", 0,
00088     "cp 1250",               "iso8859-2", 0,
00089     "cp 1251",               "cp 1251","koi8-u","koi8-r","iso8859-5", 0,
00090     "cp 1252",               "iso8859-1", 0,
00091     "cp 1253",               "iso8859-7", 0,
00092     "cp 1254",               "iso8859-9", 0,
00093     "cp 1255",               "iso8859-8", 0,
00094     "cp 1256",               "unicode","iso8859-6", 0,
00095     "cp 1257",               "iso8859-13", "iso8859-4", 0,
00096     "ibm850",                "ibm850","unicode","iso8859-1", 0,
00097     "ibm852",                "unicode","iso-8859-2", 0,
00098     "ibm866",                "ibm866","cp 1251","koi8-u","koi8-r","iso8859-5", 0,
00099     "tis620",                "iso8859-11", 0,
00100     "eucjp",                 "eucjp","unicode","iso8859-1", 0,
00101     "sjis",                  "eucjp","unicode","iso8859-1", 0,
00102     "jis7",                  "eucjp","unicode","iso8859-1", 0,
00103     "big5",                  "big5","unicode","iso8859-1", 0,
00104     "gbk",                   "gb2312.1980-0","gbk-0","unicode","iso8859-1", "gb_2312-80", 0,
00105     "gb18030",               "gb18030.2000-1", "gb18030.2000-0", "unicode", "gbk-0", "gb2313.1980-0", "iso8859-1", 0,
00106     "gb2312",                "gb2312.1980-0","unicode","iso8859-1", 0,
00107     "euckr",                 "euckr","unicode","iso8859-1", 0,
00108     "tscii",                 "tscii", 0,
00109     "pt 154",                "pt 154","cp 1251","koi8-u","koi8-r","iso8859-5", 0,
00110     "winsami2",              "winsami2", "cp1252", "unicode", 0,
00111     0 }; // extra 0 for end
00112 
00113 // 0 other
00114 // 1 Arabic
00115 // 2 Baltic
00116 // 3 Central European
00117 // 4 Chinese Simplified
00118 // 5 Chinese Traditional
00119 // 6 Cyrillic
00120 // 7 Greek
00121 // 8 Hebrew
00122 // 9 Japanese
00123 // 10 Korean
00124 // 11 Thai
00125 // 12 Turkish
00126 // 13 Western European
00127 // 14 Tamil
00128 // 15 Unicode
00129 // 16 Northern Sami
00130 // ### FIXME KDE4: the name of the encodings should mostly be uppercase
00131 static struct LanguageForEncoding
00132     {
00133     const char* index;
00134     int data;
00135     } const language_for_encoding[] = {
00136     { "iso 8859-1", 13 },
00137     { "iso 8859-15", 13 },
00138     { "cp 1252", 13 },
00139     { "ibm850", 13 },
00140     { "iso 8859-2", 3 },
00141     { "iso 8859-3", 3 },
00142     { "iso 8859-4", 2 },
00143     { "iso 8859-13", 2 },
00144     { "cp 1250", 3 },
00145     { "cp 1254", 12 },
00146     { "cp 1257", 2 },
00147     { "ibm852", 3 },
00148     { "koi8-r", 6 },
00149     { "iso 8859-5", 6 },
00150     { "cp 1251", 6 },
00151     { "koi8-u", 6 },
00152     { "pt 154", 6 },
00153     { "ibm866", 6 },
00154     { "big5", 5 },
00155     { "gb18030", 4 },
00156     { "gbk", 4 },
00157     { "gb2312", 4 },
00158     { "euckr", 10 },
00159     { "sjis", 9 },
00160     { "jis7", 9 },
00161     { "eucjp", 9 },
00162     { "iso 8859-7", 7 },
00163     { "cp 1253", 7 },
00164     { "iso 8859-6", 1 },
00165     { "cp 1256", 1 },
00166     { "iso 8859-8", 8 },
00167     { "iso 8859-8-i", 8 },
00168     { "cp 1255", 8 },
00169     { "iso 8859-9", 12 },
00170     { "tis620", 11 },
00171     { "iso 8859-11", 11 },
00172     { "utf8", 15 },
00173     { "utf16", 15 },
00174     { "utf7", 15 }, // ### FIXME: UTF-7 is not in Qt
00175     { "ucs2", 15 },
00176     { "iso-10646-ucs-2", 15 },
00177     { "winsami2", 16},
00178     { 0, 0 } };
00179 
00180 // defines some different names for codecs that are built into Qt.
00181 static struct Builtin
00182     {
00183     const char* index;
00184     const char* data;
00185     } const builtin[] = {
00186     { "iso-ir-111", "koi8-r" },
00187     { "koi8-ru", "koi8-u" },
00188     { "koi8r", "koi8-r" },
00189     { "koi8u", "koi8-u" },
00190     { "koi unified", "koi8-r" },
00191     { "us-ascii", "iso 8859-1" },
00192     { "usascii", "iso 8859-1" },
00193     { "x-utf-8", "utf-8" },
00194     { "x-utf-7", "utf-7" }, // ### FIXME: UTF-7 is not in Qt 
00195     { "unicode-1-1-utf-7", "utf-7" }, // ### FIXME: UTF-7 is not in Qt
00196     { "utf-16", "iso-10646-ucs-2" },
00197     { "utf16", "iso-10646-ucs-2" },
00198     { "ucs2", "iso-10646-ucs-2" },
00199     { "iso10646-1", "iso-10646-ucs-2" },
00200     { "gb18030.2000-1", "gb18030" },
00201     { "gb18030.2000-0", "gb18030" },
00202     { "gbk-0", "gbk" },
00203     { "gb2312", "gbk" },
00204     { "gb2312.1980-0", "gbk" },
00205     { "gb_2312-80", "gbk" },/* this one is not official, but MS is using it :/ */
00206     { "big5-0", "big5" },
00207     { "euc-kr", "euckr" },
00208     { "x-euc-kr", "euckr" },
00209     { "euc-jp", "eucjp" },
00210     { "x-euc-jp", "eucjp" },
00211     { "jisx0201.1976-0", "eucjp" },
00212     { "jisx0208.1983-0", "eucjp" },
00213     { "jisx0208.1990-0", "eucjp" },
00214     { "jisx0208.1997-0", "eucjp" },
00215     { "jisx0212.1990-0", "eucjp" },
00216     { "jisx0213.2000-1", "eucjp" },
00217     { "jisx0213.2000-2", "eucjp" },
00218     { "shift_jis", "sjis" },
00219     { "shift-jis", "sjis" },
00220     { "x-sjis", "sjis" },
00221     { "iso-2022-jp", "jis7" },
00222     { "windows850", "ibm850" },
00223     { "windows866", "ibm866" },
00224     { "windows1251", "cp 1251" },
00225     { "windows1252", "cp 1252" },
00226     { "windows1253", "cp 1253" },
00227     { "windows1254", "cp 1254" },
00228     { "windows1255", "cp 1255" },
00229     { "windows1256", "cp 1256" },
00230     { "windows1257", "cp 1257" },
00231     { "windows-850", "ibm850" },
00232     { "windows-866", "ibm866" },
00233     { "windows-1250", "cp 1250" },
00234     { "windows-1251", "cp 1251" },
00235     { "windows-1252", "cp 1252" },
00236     { "windows-1253", "cp 1253" },
00237     { "windows-1254", "cp 1254" },
00238     { "windows-1255", "cp 1255" },
00239     { "windows-1256", "cp 1256" },
00240     { "windows-1257", "cp 1257" },
00241     { "x-windows-850", "ibm850" },
00242     { "x-windows-866", "ibm866" },
00243     { "x-windows-1250", "cp 1250" },
00244     { "x-windows-1251", "cp 1251" },
00245     { "x-windows-1252", "cp 1252" },
00246     { "x-windows-1253", "cp 1253" },
00247     { "x-windows-1254", "cp 1254" },
00248     { "x-windows-1255", "cp 1255" },
00249     { "x-windows-1256", "cp 1256" },
00250     { "x-windows-1257", "cp 1257" },
00251     { "cp850", "ibm850" },
00252     { "cp866", "ibm866" },
00253     { "cp-850", "ibm850" },
00254     { "cp-866", "ibm866" },
00255     { "cp-1250", "cp 1250" },
00256     { "cp-1251", "cp 1251" },
00257     { "cp-1252", "cp 1252" },
00258     { "cp-1253", "cp 1253" },
00259     { "cp-1254", "cp 1254" },
00260     { "cp-1255", "cp 1255" },
00261     { "cp-1256", "cp 1256" },
00262     { "cp-1257", "cp 1257" },
00263     { "cp-10000", "apple roman" },
00264     { "x-cp-850", "ibm850" },
00265     { "x-cp-866", "ibm866" },
00266     { "x-cp-1250", "cp 1250" },
00267     { "x-cp-1251", "cp 1251" },
00268     { "x-cp-1252", "cp 1252" },
00269     { "x-cp-1253", "cp 1253" },
00270     { "x-cp-1254", "cp 1254" },
00271     { "x-cp-1255", "cp 1255" },
00272     { "x-cp-1256", "cp 1256" },
00273     { "x-cp-1257", "cp 1257" },
00274     { "x-cp-10000", "apple roman" },
00275     { "tis620", "iso 8859-11" },
00276     { "tis-620", "iso 8859-11" },
00277     { "thai-tis620", "iso 8859-11" },
00278     { "windows-874", "iso 8859-11" },
00279     { "windows874", "iso 8859-11" },
00280     { "x-windows-874", "iso 8859-11" },
00281     { "cp874", "iso 8859-11" },
00282     { "cp-874", "iso 8859-11" },
00283     { "x-cp-874", "iso 8859-11" },
00284     { "ksc5601.1987-0", "euckr" },
00285     { "ks_c_5601-1987", "euckr" },
00286     { "iso-8859-1", "iso 8859-1" },
00287     { "iso-8859-2", "iso 8859-2" },
00288     { "iso-8859-3", "iso 8859-3" },
00289     { "iso-8859-4", "iso 8859-4" },
00290     { "iso-8859-5", "iso 8859-5" },
00291     { "iso-8859-6", "iso 8859-6" },
00292     { "iso-8859-7", "iso 8859-7" },
00293     { "iso-8859-8", "iso 8859-8" },
00294     { "iso-8859-9", "iso 8859-9" },
00295     { "iso-8859-10", "iso 8859-10" },
00296     { "iso-8859-11", "iso 8859-11" },
00297     { "iso-8859-12", "iso 8859-12" },
00298     { "iso-8859-13", "iso 8859-13" },
00299     { "iso-8859-14", "iso 8859-14" },
00300     { "iso-8859-15", "iso 8859-15" },
00301     { "tscii", "tscii" },
00302     { "paratype-154", "pt 154" },
00303     { "pt-154", "pt 154" },
00304     { "x-winsami2", "winsami2" },
00305     { "x-mac-roman", "apple roman" },
00306     { "macintosh", "apple roman" },
00307     { "mac", "apple roman" },
00308     { 0, 0 }};
00309 
00310 // some different names for the encodings defined in the charmaps files.
00311 // even though the charmap file names are all uppercase, the names are all lowercase here.
00312 static struct Aliases
00313     {
00314     const char* index;
00315     const char* data;
00316     } const aliases[] = {
00317     { "cp852", "ibm852" },
00318     { "cp-852", "ibm852" },
00319     { "x-cp-852", "ibm852" },
00320     { "windows852", "ibm852" },
00321     { "windows-852", "ibm852" },
00322     { "x-windows-852", "ibm852" },
00323     { 0, 0 }};
00324 
00325 // some last resort hints in case the charmap file couldn't be found. This gives at least a partial conversion
00326 // and helps making things readable.
00327 // the name used as input here is already converted to the more canonical name as defined in the aliases array.
00328 static struct ConversionHints
00329     {
00330     const char* index;
00331     const char* data;
00332     } const conversion_hints[] = {
00333     { "cp1250", "iso-8859-2" },
00334     { "koi8-r", "iso-8859-5" },
00335     { "koi8-u", "koi8-r" },
00336     { 0, 0 }};
00337 
00338 
00339 // search an array of items index/data, index is const char*, data is T, find first matching index
00340 // and return data, or return 0
00341 template< typename T, typename Data >
00342 static Data kcharsets_array_search( const T* start, const char* entry )
00343 {
00344     for( const T* pos = start;
00345          pos->index != 0;
00346          ++pos )
00347         if( qstrcmp( pos->index, entry ) == 0 )
00348             return pos->data;
00349     return 0;
00350 }
00351 
00352 
00353 class KCharsetsPrivate
00354 {
00355 public:
00356     KCharsetsPrivate(KCharsets* _kc)
00357         : codecForNameDict(43, false) // case insensitive
00358     {
00359         db = 0;
00360         kc = _kc;
00361     }
00362     ~KCharsetsPrivate()
00363     {
00364         delete db;
00365     }
00366     QFontDatabase *db;
00367     QAsciiDict<QTextCodec> codecForNameDict;
00368     KCharsets* kc;
00369 };
00370 
00371 // --------------------------------------------------------------------------
00372 
00373 KCharsets::KCharsets()
00374 {
00375     d = new KCharsetsPrivate(this);
00376 }
00377 
00378 KCharsets::~KCharsets()
00379 {
00380     delete d;
00381 }
00382 
00383 QChar KCharsets::fromEntity(const QString &str)
00384 {
00385     QChar res = QChar::null;
00386 
00387     int pos = 0;
00388     if(str[pos] == '&') pos++;
00389 
00390     // Check for '&#000' or '&#x0000' sequence
00391     if (str[pos] == '#' && str.length()-pos > 1) {
00392         bool ok;
00393         pos++;
00394         if (str[pos] == 'x' || str[pos] == 'X') {
00395             pos++;
00396             // '&#x0000', hexadeciaml character reference
00397             QString tmp(str.unicode()+pos, str.length()-pos);
00398             res = tmp.toInt(&ok, 16);
00399         } else {
00400             //  '&#0000', deciaml character reference
00401             QString tmp(str.unicode()+pos, str.length()-pos);
00402             res = tmp.toInt(&ok, 10);
00403         }
00404         return res;
00405     }
00406 
00407     const entity *e = kde_findEntity(str.ascii(), str.length());
00408 
00409     if(!e)
00410     {
00411         //kdDebug( 0 ) << "unknown entity " << str <<", len = " << str.length() << endl;
00412         return QChar::null;
00413     }
00414     //kdDebug() << "got entity " << str << " = " << e->code << endl;
00415 
00416     return QChar(e->code);
00417 }
00418 
00419 QChar KCharsets::fromEntity(const QString &str, int &len)
00420 {
00421     // entities are never longer than 8 chars... we start from
00422     // that length and work backwards...
00423     len = 8;
00424     while(len > 0)
00425     {
00426         QString tmp = str.left(len);
00427         QChar res = fromEntity(tmp);
00428         if( res != QChar::null ) return res;
00429         len--;
00430     }
00431     return QChar::null;
00432 }
00433 
00434 
00435 QString KCharsets::toEntity(const QChar &ch)
00436 {
00437     QString ent;
00438     ent.sprintf("&#0x%x;", ch.unicode());
00439     return ent;
00440 }
00441 
00442 QString KCharsets::resolveEntities( const QString &input )
00443 {
00444     QString text = input;
00445     const QChar *p = text.unicode();
00446     const QChar *end = p + text.length();
00447     const QChar *ampersand = 0;
00448     bool scanForSemicolon = false;
00449 
00450     for ( ; p < end; ++p ) {
00451         const QChar ch = *p;
00452 
00453         if ( ch == '&' ) {
00454             ampersand = p;
00455             scanForSemicolon = true;
00456             continue;
00457         }
00458 
00459         if ( ch != ';' || scanForSemicolon == false )
00460             continue;
00461 
00462         assert( ampersand );
00463 
00464         scanForSemicolon = false;
00465 
00466         const QChar *entityBegin = ampersand + 1;
00467 
00468         const uint entityLength = p - entityBegin;
00469         if ( entityLength == 0 )
00470             continue;
00471 
00472         const QChar entityValue = KCharsets::fromEntity( QConstString( entityBegin, entityLength ).string() );
00473         if ( entityValue.isNull() )
00474             continue;
00475 
00476         const uint ampersandPos = ampersand - text.unicode();
00477 
00478         text[ (int)ampersandPos ] = entityValue;
00479         text.remove( ampersandPos + 1, entityLength + 1 );
00480         p = text.unicode() + ampersandPos;
00481         end = text.unicode() + text.length();
00482         ampersand = 0;
00483     }
00484 
00485     return text;
00486 }
00487 
00488 QStringList KCharsets::availableEncodingNames()
00489 {
00490     QStringList available;
00491 
00492     const char* const* pos = charsets_for_encoding;
00493     while( *pos != 0 ) {
00494         //kdDebug(0) << "key = " << *pos << endl;
00495 
00496 
00497         // iterate thorugh the list and find the first charset that is available
00498         for( const char* const* charsets = pos + 1;
00499              *charsets != 0;
00500              ++charsets ) {
00501             //kdDebug(0) << "checking for " << *charsets << endl;
00502 #ifdef __GNUC__
00503 #warning FIXME?
00504 #endif
00505             if( true ) {
00506                 //kdDebug(0) << *charsets << " available" << endl;
00507                 available.append( QString::fromLatin1( *pos ));
00508                 break;
00509             }
00510         }
00511         while( *pos != 0 ) // find end of line
00512             ++pos;
00513         ++pos; // move to the next line
00514     }
00515     return available;
00516 }
00517 
00518 QString KCharsets::languageForEncoding( const QString &encoding )
00519 {
00520     int lang = kcharsets_array_search< LanguageForEncoding, int >
00521         ( language_for_encoding, encoding.latin1());
00522     return i18n( language_names[lang] );
00523 }
00524 
00525 QString KCharsets::encodingForName( const QString &descriptiveName )
00526 {
00527     const int left = descriptiveName.findRev( '(' );
00528     
00529     if (left<0) // No parenthesis, so assume it is a normal encoding name
00530     return descriptiveName.stripWhiteSpace();
00531     
00532     QString name(descriptiveName.mid(left+1));
00533     
00534     const int right = name.findRev( ')' );
00535     
00536     if (right<0) 
00537         return name;
00538 
00539     return name.left(right).stripWhiteSpace();
00540 }
00541 
00542 QStringList KCharsets::descriptiveEncodingNames()
00543 {
00544   QStringList encodings = availableEncodingNames();
00545   QStringList::Iterator it;
00546   for( it = encodings.begin(); it != encodings.end(); ++it ) {
00547       QString lang = KGlobal::charsets()->languageForEncoding( *it );
00548       *it = i18n("Descriptive Encoding Name", "%1 ( %2 )") .arg(lang) .arg(*it);
00549   }
00550   encodings.sort();
00551   return encodings;
00552 }
00553 
00554 QTextCodec *KCharsets::codecForName(const QString &n) const
00555 {
00556     bool b;
00557     return codecForName( n, b );
00558 }
00559 
00560 QTextCodec *KCharsets::codecForName(const QString &n, bool &ok) const
00561 {
00562     ok = true;
00563 
00564     QTextCodec* codec = 0;
00565     // dict lookup is case insensitive anyway
00566     if((codec = d->codecForNameDict[n.isEmpty() ? "->locale<-" : n.latin1()]))
00567         return codec; // cache hit, return
00568 
00569     if (n.isEmpty()) {
00570         codec = KGlobal::locale()->codecForEncoding();
00571         d->codecForNameDict.replace("->locale<-", codec);
00572         return codec;
00573     }
00574 
00575     QCString name = n.lower().latin1();
00576     QCString key = name;
00577     if (name.right(8) == "_charset")
00578        name.truncate(name.length()-8);
00579 
00580     if (name.isEmpty()) {
00581       ok = false;
00582       return QTextCodec::codecForName("iso8859-1");
00583     }
00584 
00585     codec = QTextCodec::codecForName(name);
00586 
00587     if(codec) {
00588         d->codecForNameDict.replace(key, codec);
00589         return codec;
00590     }
00591 
00592     // these codecs are built into Qt, but the name given for the codec is different,
00593     // so QTextCodec did not recognize it.
00594     QCString cname = kcharsets_array_search< Builtin, const char* >( builtin, name.data());
00595 
00596     if(!cname.isEmpty())
00597         codec = QTextCodec::codecForName(cname);
00598 
00599     if(codec)
00600     {
00601         d->codecForNameDict.replace(key, codec);
00602         return codec;
00603     }
00604 
00605     // ### TODO: charmaps have changed a little since this code was written. The default dir should be changed and KFilterDev should be used for reading gzipped files.
00606     QString dir;
00607     {
00608     KConfigGroupSaver cfgsav( KGlobal::config(), "i18n" );
00609     dir = KGlobal::config()->readPathEntry("i18ndir", QString::fromLatin1("/usr/share/i18n/charmaps"));
00610     dir += "/";
00611     }
00612 
00613     // these are codecs not included in Qt. They can be build up if the corresponding charmap
00614     // is available in the charmap directory.
00615     cname = kcharsets_array_search< Aliases, const char* >( aliases, name.data());
00616 
00617     if(cname.isEmpty())
00618         cname = name;
00619     cname = cname.upper();
00620 
00621     codec = QTextCodec::loadCharmapFile((QString)(dir + cname.data()));
00622 
00623     if(codec) {
00624         d->codecForNameDict.replace(key, codec);
00625         return codec;
00626     }
00627 
00628     // this also failed, the last resort is now to take some compatibility charmap
00629 
00630     cname = cname.lower();
00631     cname = kcharsets_array_search< ConversionHints, const char* >( conversion_hints, (const char*)cname );
00632 
00633     if(!cname.isEmpty())
00634         codec = QTextCodec::codecForName(cname);
00635 
00636     if(codec) {
00637         d->codecForNameDict.replace(key, codec);
00638         return codec;
00639     }
00640 
00641     // could not assign a codec, let's return Latin1
00642     ok = false;
00643     return QTextCodec::codecForName("iso8859-1");
00644 }
KDE Logo
This file is part of the documentation for kdecore Library Version 3.4.0.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Tue Mar 22 19:46:38 2005 by doxygen 1.4.1 written by Dimitri van Heesch, © 1997-2003