| SJISTOU8 | = | '-Swm0' | ||
| U8TOSJIS | = | '-Wsm0' | ||
| EUCTOU8 | = | '-Ewm0' | ||
| U8TOEUC | = | '-Wem0' | ||
| UTF_8 | = | 'UTF-8' | Native, default format is UTF-8, so it is declared here rather than in an encodings/ definition. | |
| UTF_16 | = | 'UTF-16' | ||
| UNILE | = | 'UNILE' |
| encoding | [R] | ID —> Encoding name |
# File lib/rexml/encoding.rb, line 8
8: def self.apply(obj, enc)
9: @encoding_methods[enc][obj]
10: end
# File lib/rexml/encoding.rb, line 11
11: def self.encoding_method(enc)
12: @encoding_methods[enc]
13: end
# File lib/rexml/encoding.rb, line 5 5: def self.register(enc, &block) 6: @encoding_methods[enc] = block 7: end
# File lib/rexml/encoding.rb, line 57
57: def check_encoding str
58: # We have to recognize UTF-16, LSB UTF-16, and UTF-8
59: return UTF_16 if /\A\xfe\xff/n =~ str
60: return UNILE if /\A\xff\xfe/n =~ str
61: str =~ /^\s*<?xml\s*version\s*=\s*(['"]).*?\2\s*encoding\s*=\s*(["'])(.*?)\2/um
62: return $1.upcase if $1
63: return UTF_8
64: end
Convert to UTF-8
# File lib/rexml/encodings/US-ASCII.rb, line 19
19: def decode_ascii(str)
20: str.unpack('C*').pack('U*')
21: end
# File lib/rexml/encodings/EUC-JP.rb, line 19
19: def decode_eucjp(str)
20: NKF.nkf(EUCTOU8, str)
21: end
# File lib/rexml/encodings/ICONV.rb, line 6 6: def decode_iconv(str) 7: Iconv.conv(UTF_8, @encoding, str) 8: end
# File lib/rexml/encodings/SHIFT-JIS.rb, line 6 6: def decode_sjis content 7: Uconv::sjistou8(content) 8: end
# File lib/rexml/encodings/SHIFT-JIS.rb, line 19
19: def decode_sjis(str)
20: NKF.nkf(SJISTOU8, str)
21: end
# File lib/rexml/encodings/UNILE.rb, line 18
18: def decode_unile(str)
19: array_enc=str.unpack('C*')
20: array_utf8 = []
21: 0.step(array_enc.size-1, 2){|i|
22: array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
23: }
24: array_utf8.pack('U*')
25: end
# File lib/rexml/encodings/UTF-16.rb, line 18
18: def decode_utf16(str)
19: str = str[2..-1] if /^\376\377/ =~ str
20: array_enc=str.unpack('C*')
21: array_utf8 = []
22: 0.step(array_enc.size-1, 2){|i|
23: array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
24: }
25: array_utf8.pack('U*')
26: end
Convert from UTF-8
# File lib/rexml/encodings/US-ASCII.rb, line 4
4: def encode_ascii content
5: array_utf8 = content.unpack('U*')
6: array_enc = []
7: array_utf8.each do |num|
8: if num <= 0x7F
9: array_enc << num
10: else
11: # Numeric entity (&#nnnn;); shard by Stefan Scholl
12: array_enc.concat "&\##{num};".unpack('C*')
13: end
14: end
15: array_enc.pack('C*')
16: end
# File lib/rexml/encodings/EUC-JP.rb, line 23
23: def encode_eucjp content
24: NKF.nkf(U8TOEUC, content)
25: end
# File lib/rexml/encodings/EUC-JP.rb, line 10
10: def encode_eucjp content
11: Uconv::u8toeuc(content)
12: end
# File lib/rexml/encodings/ICONV.rb, line 10
10: def encode_iconv(content)
11: Iconv.conv(@encoding, UTF_8, content)
12: end
# File lib/rexml/encodings/SHIFT-JIS.rb, line 10
10: def encode_sjis(str)
11: Uconv::u8tosjis(str)
12: end
# File lib/rexml/encodings/SHIFT-JIS.rb, line 23
23: def encode_sjis content
24: NKF.nkf(U8TOSJIS, content)
25: end
# File lib/rexml/encodings/UNILE.rb, line 3
3: def encode_unile content
4: array_utf8 = content.unpack("U*")
5: array_enc = []
6: array_utf8.each do |num|
7: if ((num>>16) > 0)
8: array_enc << ??
9: array_enc << 0
10: else
11: array_enc << (num & 0xFF)
12: array_enc << (num >> 8)
13: end
14: end
15: array_enc.pack('C*')
16: end
# File lib/rexml/encodings/UTF-16.rb, line 3
3: def encode_utf16 content
4: array_utf8 = content.unpack("U*")
5: array_enc = []
6: array_utf8.each do |num|
7: if ((num>>16) > 0)
8: array_enc << 0
9: array_enc << ??
10: else
11: array_enc << (num >> 8)
12: array_enc << (num & 0xFF)
13: end
14: end
15: array_enc.pack('C*')
16: end
# File lib/rexml/encoding.rb, line 23
23: def encoding=( enc )
24: old_verbosity = $VERBOSE
25: begin
26: $VERBOSE = false
27: enc = enc.nil? ? nil : enc.upcase
28: return false if defined? @encoding and enc == @encoding
29: if enc and enc != UTF_8
30: @encoding = enc
31: raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
32: @encoding.untaint
33: begin
34: require 'rexml/encodings/ICONV.rb'
35: Encoding.apply(self, "ICONV")
36: rescue LoadError, Exception
37: begin
38: enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
39: require enc_file
40: Encoding.apply(self, @encoding)
41: rescue LoadError => err
42: puts err.message
43: raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
44: end
45: end
46: else
47: @encoding = UTF_8
48: require 'rexml/encodings/UTF-8.rb'
49: Encoding.apply(self, @encoding)
50: end
51: ensure
52: $VERBOSE = old_verbosity
53: end
54: true
55: end