| Class | REXML::Text |
| In: |
lib/rexml/text.rb
|
| Parent: | Child |
Represents text nodes in an XML document
| SPECIALS | = | [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ] | The order in which the substitutions occur | |
| SUBSTITUTES | = | ['&', '<', '>', '"', ''', ' '] | ||
| SLAICEPS | = | [ '<', '>', '"', "'", '&' ] | Characters which are substituted in written strings | |
| SETUTITSBUS | = | [ /</u, />/u, /"/u, /'/u, /&/u ] | ||
| ILLEGAL | = | /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um | ||
| NUMERICENTITY | = | /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ | ||
| REFERENCE | = | /#{Entity::REFERENCE}/ | ||
| EREFERENCE | = | /&(?!#{Entity::NAME};)/ |
Constructor arg if a String, the content is set to the String. If a Text, the object is shallowly cloned.
respect_whitespace (boolean, false) if true, whitespace is respected
parent (nil) if this is a Parent object, the parent will be set to this.
raw (nil) This argument can be given three values. If true, then the value of used to construct this object is expected to contain no unescaped XML markup, and REXML will not change the text. If this value is false, the string may contain any characters, and REXML will escape any and all defined entities whose values are contained in the text. If this value is nil (the default), then the raw value of the parent will be used as the raw value for this node. If there is no raw value for the parent, and no value is supplied, the default is false. Use this field if you have entities defined for some text, and you don‘t want REXML to escape that text in output.
Text.new( "<&", false, nil, false ) #-> "<&" Text.new( "<&", false, nil, true ) #-> Parse exception Text.new( "<&", false, nil, true ) #-> "<&" # Assume that the entity "s" is defined to be "sean" # and that the entity "r" is defined to be "russell" Text.new( "sean russell" ) #-> "&s; &r;" Text.new( "sean russell", false, nil, true ) #-> "sean russell"
entity_filter (nil) This can be an array of entities to match in the supplied text. This argument is only useful if raw is set to false.
Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell" Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
In the last example, the entity_filter argument is ignored.
pattern INTERNAL USE ONLY
# File lib/rexml/text.rb, line 59
59: def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
60: entity_filter=nil, illegal=ILLEGAL )
61:
62: @raw = false
63:
64: if parent
65: super( parent )
66: @raw = parent.raw
67: else
68: @parent = nil
69: end
70:
71: @raw = raw unless raw.nil?
72: @entity_filter = entity_filter
73: @normalized = @unnormalized = nil
74:
75: if arg.kind_of? String
76: @string = arg.clone
77: @string.squeeze!(" \n\t") unless respect_whitespace
78: elsif arg.kind_of? Text
79: @string = arg.to_s
80: @raw = arg.raw
81: elsif
82: raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
83: end
84:
85: @string.gsub!( /\r\n?/, "\n" )
86:
87: # check for illegal characters
88: if @raw
89: if @string =~ illegal
90: raise "Illegal character '#{$1}' in raw string \"#{@string}\""
91: end
92: end
93: end
Escapes all possible entities
# File lib/rexml/text.rb, line 286
286: def Text::normalize( input, doctype=nil, entity_filter=nil )
287: copy = input
288: # Doing it like this rather than in a loop improves the speed
289: if doctype
290: # Replace all ampersands that aren't part of an entity
291: copy = copy.gsub( EREFERENCE, '&' )
292: doctype.entities.each_value do |entity|
293: copy = copy.gsub( entity.value,
294: "&#{entity.name};" ) if entity.value and
295: not( entity_filter and entity_filter.include?(entity) )
296: end
297: else
298: # Replace all ampersands that aren't part of an entity
299: copy = copy.gsub( EREFERENCE, '&' )
300: DocType::DEFAULT_ENTITIES.each_value do |entity|
301: copy = copy.gsub(entity.value, "&#{entity.name};" )
302: end
303: end
304: copy
305: end
Reads text, substituting entities
# File lib/rexml/text.rb, line 260
260: def Text::read_with_substitution( input, illegal=nil )
261: copy = input.clone
262:
263: if copy =~ illegal
264: raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
265: end if illegal
266:
267: copy.gsub!( /\r\n?/, "\n" )
268: if copy.include? ?&
269: copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
270: copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
271: copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
272: copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
273: copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
274: copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
275: m=$1
276: #m='0' if m==''
277: m = "0#{m}" if m[0] == ?x
278: [Integer(m)].pack('U*')
279: }
280: end
281: copy
282: end
Unescapes all possible entities
# File lib/rexml/text.rb, line 308
308: def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
309: rv = string.clone
310: rv.gsub!( /\r\n?/, "\n" )
311: matches = rv.scan( REFERENCE )
312: return rv if matches.size == 0
313: rv.gsub!( NUMERICENTITY ) {|m|
314: m=$1
315: m = "0#{m}" if m[0] == ?x
316: [Integer(m)].pack('U*')
317: }
318: matches.collect!{|x|x[0]}.compact!
319: if matches.size > 0
320: if doctype
321: matches.each do |entity_reference|
322: unless filter and filter.include?(entity_reference)
323: entity_value = doctype.entity( entity_reference )
324: re = /&#{entity_reference};/
325: rv.gsub!( re, entity_value ) if entity_value
326: end
327: end
328: else
329: matches.each do |entity_reference|
330: unless filter and filter.include?(entity_reference)
331: entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
332: re = /&#{entity_reference};/
333: rv.gsub!( re, entity_value.value ) if entity_value
334: end
335: end
336: end
337: rv.gsub!( /&/, '&' )
338: end
339: rv
340: end
Appends text to this text node. The text is appended in the raw mode of this text node.
# File lib/rexml/text.rb, line 111
111: def <<( to_append )
112: @string << to_append.gsub( /\r\n?/, "\n" )
113: end
# File lib/rexml/text.rb, line 201
201: def indent_text(string, level=1, style="\t", indentfirstline=true)
202: return string if level < 0
203: new_string = ''
204: string.each { |line|
205: indent_string = style * level
206: new_line = (indent_string + line).sub(/[\s]+$/,'')
207: new_string << new_line
208: }
209: new_string.strip! unless indentfirstline
210: return new_string
211: end
Returns the string value of this text node. This string is always escaped, meaning that it is a valid XML text node string, and all entities that can be escaped, have been inserted. This method respects the entity filter set in the constructor.
# Assume that the entity "s" is defined to be "sean", and that the # entity "r" is defined to be "russell" t = Text.new( "< & sean russell", false, nil, false, ['s'] ) t.to_s #-> "< & &s; russell" t = Text.new( "< & &s; russell", false, nil, false ) t.to_s #-> "< & &s; russell" u = Text.new( "sean russell", false, nil, true ) u.to_s #-> "sean russell"
# File lib/rexml/text.rb, line 136
136: def to_s
137: return @string if @raw
138: return @normalized if @normalized
139:
140: doctype = nil
141: if @parent
142: doc = @parent.document
143: doctype = doc.doctype if doc
144: end
145:
146: @normalized = Text::normalize( @string, doctype, @entity_filter )
147: end
Returns the string value of this text. This is the text without entities, as it might be used programmatically, or printed to the console. This ignores the ‘raw’ attribute setting, and any entity_filter.
# Assume that the entity "s" is defined to be "sean", and that the # entity "r" is defined to be "russell" t = Text.new( "< & sean russell", false, nil, false, ['s'] ) t.value #-> "< & sean russell" t = Text.new( "< & &s; russell", false, nil, false ) t.value #-> "< & sean russell" u = Text.new( "sean russell", false, nil, true ) u.value #-> "sean russell"
# File lib/rexml/text.rb, line 166
166: def value
167: @unnormalized if @unnormalized
168: doctype = nil
169: if @parent
170: doc = @parent.document
171: doctype = doc.doctype if doc
172: end
173: @unnormalized = Text::unnormalize( @string, doctype )
174: end
Sets the contents of this text node. This expects the text to be unnormalized. It returns self.
e = Element.new( "a" ) e.add_text( "foo" ) # <a>foo</a> e[0].value = "bar" # <a>bar</a> e[0].value = "<a>" # <a><a></a>
# File lib/rexml/text.rb, line 194
194: def value=( val )
195: @string = val.gsub( /\r\n?/, "\n" )
196: @unnormalized = nil
197: @normalized = nil
198: @raw = false
199: end
# File lib/rexml/text.rb, line 176
176: def wrap(string, width, addnewline=false)
177: # Recursivly wrap string at width.
178: return string if string.length <= width
179: place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
180: if addnewline then
181: return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
182: else
183: return string[0,place] + "\n" + wrap(string[place+1..-1], width)
184: end
185: end
# File lib/rexml/text.rb, line 213
213: def write( writer, indent=-1, transitive=false, ie_hack=false )
214: s = to_s()
215: if not (@parent and @parent.whitespace) then
216: s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
217: if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
218: s = indent_text(s, indent, @parent.context[:indentstyle], false)
219: end
220: s.squeeze!(" \n\t") if @parent and !@parent.whitespace
221: end
222: writer << s
223: end
Writes out text, substituting special characters beforehand. out A String, IO, or any other object supporting <<( String ) input the text to substitute and the write out
z=utf8.unpack("U*")
ascOut=""
z.each{|r|
if r < 0x100
ascOut.concat(r.chr)
else
ascOut.concat(sprintf("&#x%x;", r))
end
}
puts ascOut
# File lib/rexml/text.rb, line 247
247: def write_with_substitution out, input
248: copy = input.clone
249: # Doing it like this rather than in a loop improves the speed
250: copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
251: copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
252: copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
253: copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
254: copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
255: copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
256: out << copy
257: end