#! /bin/bash

# gnome1desktopiconv
# Converts GNOME1 style desktop and directory files to UTF-8.
# Author: Stanislav Brabec <sbrabec@suse.cz>

unset ${!CHARSET_*}
CHARSET_aa=ISO-8859-1
CHARSET_af=ISO-8859-1
CHARSET_am=UTF-8
CHARSET_an=ISO-8859-15
CHARSET_ar=ISO-8859-6
CHARSET_az=UTF-8
CHARSET_be=CP1251
CHARSET_bg=CP1251
CHARSET_bn=UTF-8
CHARSET_br=ISO-8859-1
CHARSET_bs=ISO-8859-2
CHARSET_byn=UTF-8
CHARSET_ca=ISO-8859-1
CHARSET_cs=ISO-8859-2
CHARSET_cy=ISO-8859-14
CHARSET_da=ISO-8859-1
CHARSET_de=ISO-8859-1
CHARSET_el=ISO-8859-7
CHARSET_en=ISO-8859-1
CHARSET_es=ISO-8859-1
CHARSET_et=ISO-8859-1
CHARSET_eu=ISO-8859-1
CHARSET_fa=UTF-8
CHARSET_fi=ISO-8859-1
CHARSET_fo=ISO-8859-1
CHARSET_fr=ISO-8859-1
CHARSET_ga=ISO-8859-1
CHARSET_gd=ISO-8859-15
CHARSET_gez=UTF-8
CHARSET_gl=ISO-8859-1
CHARSET_gu=UTF-8
CHARSET_gv=ISO-8859-1
CHARSET_he=ISO-8859-8
CHARSET_hi=UTF-8
CHARSET_hr=ISO-8859-2
CHARSET_hu=ISO-8859-2
CHARSET_id=ISO-8859-1
CHARSET_is=ISO-8859-1
CHARSET_it=ISO-8859-1
CHARSET_iw=ISO-8859-8
CHARSET_ja=eucjp
CHARSET_ka=GEORGIAN-PS
CHARSET_kl=ISO-8859-1
CHARSET_kn=UTF-8
CHARSET_ko=euckr
CHARSET_kw=ISO-8859-1
CHARSET_lg=ISO-8859-10
CHARSET_lo=UTF-8
CHARSET_lt=ISO-8859-13
CHARSET_lv=ISO-8859-13
CHARSET_mi=ISO-8859-13
CHARSET_mk=ISO-8859-5
CHARSET_ml=UTF-8
CHARSET_mn=UTF-8
CHARSET_mr=UTF-8
CHARSET_ms=ISO-8859-1
CHARSET_mt=ISO-8859-3
CHARSET_nb=ISO-8859-1
CHARSET_ne=UTF-8
CHARSET_nl=ISO-8859-1
CHARSET_nn=ISO-8859-1
CHARSET_no=ISO-8859-1
CHARSET_oc=ISO-8859-1
CHARSET_om=ISO-8859-1
CHARSET_pa=UTF-8
CHARSET_pl=ISO-8859-2
CHARSET_pt=ISO-8859-1
# Warning: Default is ISO-8859-5, but Russian GNOME1 translators have used KOI8-R without notify.
CHARSET_ru=KOI8-R #ISO-8859-5
CHARSET_ru_UA=KOI8-U
CHARSET_ro=ISO-8859-2
CHARSET_se=UTF-8
CHARSET_sh=ISO-8859-2
CHARSET_sid=UTF-8
CHARSET_sk=ISO-8859-2
CHARSET_sl=ISO-8859-2
CHARSET_so=ISO-8859-1
CHARSET_sq=ISO-8859-1
CHARSET_sr=ISO-8859-2
CHARSET_sr__Latn=ISO-8859-2
CHARSET_st=ISO-8859-1
CHARSET_sv=ISO-8859-1
CHARSET_ta=UTF-8
CHARSET_te=UTF-8
CHARSET_tg=KOI8-T
CHARSET_th=TIS-620
CHARSET_tig=UTF-8
CHARSET_ti=UTF-8
CHARSET_tl=ISO-8859-1
CHARSET_tr=ISO-8859-9
CHARSET_uk=KOI8-U
CHARSET_ur=UTF-8
CHARSET_uz=ISO-8859-1
CHARSET_vi=UTF-8
CHARSET_wa=ISO-8859-1
CHARSET_xh=ISO-8859-1
CHARSET_yi=CP1255
CHARSET_zu=ISO-8859-1
CHARSET_zh=GB2312
CHARSET_zh_SG=GB2312
CHARSET_zh_HK=BIG5-HKSCS
CHARSET_zh_TW=BIG5
CHARSET_zh_TW___Big5=BIG5

if ! grep -q ^Encoding=Legacy-Mixed $1 ; then
    if grep -q ^Encoding= $1 ; then
	exit
    fi
fi

echo >&2 "$0 warning: File $1 is probably in deprecated Legacy-Mixed encoding. Converting to UTF-8."

exec <$1
exec >$1.utf8

while read LINE ; do
    LNG=${LINE%%]=*}
    LNG=${LNG#Name[}
    LNG=${LNG#GenericName[}
    LNG=${LNG#Comment[}
    if test "$LNG" = "$LINE" ; then
	if test "$LINE" = Encoding=Legacy-Mixed ; then
	    continue
	fi
	echo $LINE
	if test "$LINE" = "[Desktop Entry]" ; then
	    echo "Encoding=UTF-8"
	fi
    else
	LNG=${LNG//@/__}
	LNG=${LNG//./___}
	eval LNC=\$CHARSET_$LNG
	if test -z "$LNC" ; then
	    LNG=${LNG%%_*}
	    eval LNC=\$CHARSET_$LNG
	    if test -z "$LNC" ; then
		echo >&2 "$0 warning: Unknown encoding for $LNG. Assuming UTF-8."
		LNC=UTF-8
	    fi
	fi
	# Try whether it is parsable as UTF-8.
	if test $LNC = UTF-8 ; then
		UTFLINE=$LINE
	else
	    UTFLINE=$(echo "$LINE" | iconv 2>/dev/null -f UTF-8 -t UTF-8)
	    if test $? -eq 0 ; then
		UTFLINE=$(echo "$LINE" | iconv 2>/dev/null -f UTF-8 -t ASCII)
		if test $? -gt 0 ; then
		    echo >&2 "$0 warning: Following line seems to be already in UTF-8 instead of Legacy."
		    echo >&2 "$LINE"
		    UTFLINE=$LINE
		else
		    UTFLINE=$LINE
		fi
	    else
		UTFLINE=$(echo "$LINE" | iconv -f $LNC -t UTF-8)
		if test $? -gt 0 ; then
		    echo >&2 "$0 error: Cannot convert following line."
		    echo >&2 "$LINE"
		    UTFLINE=$LINE
		fi
	    fi
	fi
	echo $UTFLINE
    fi
done

mv $1.utf8 $1
