Object
A recursive-descent parser for RDoc markup.
The parser tokenizes an input string then parses the tokens into a Document. Documents can be converted into output formats by writing a visitor like RDoc::Markup::ToHTML.
The parser only handles the block-level constructs Paragraph, List, ListItem, Heading, Verbatim, BlankLine and Rule. Inline markup such as +blah+ is handled separately by RDoc::Markup::AttributeManager.
To see what markup the Parser implements read RDoc. To see how to use RDoc markup to format text in your program read RDoc::Markup.
List token types
Parsers str into a Document
# File lib/rdoc/markup/parser.rb, line 57
57: def self.parse str
58: parser = new
59: #parser.debug = true
60: parser.tokenize str
61: RDoc::Markup::Document.new(*parser.parse)
62: end
Builds a Heading of level
# File lib/rdoc/markup/parser.rb, line 88
88: def build_heading level
89: heading = RDoc::Markup::Heading.new level, text
90: skip :NEWLINE
91:
92: heading
93: end
Builds a List flush to margin
# File lib/rdoc/markup/parser.rb, line 98
98: def build_list margin
99: p :list_start => margin if @debug
100:
101: list = RDoc::Markup::List.new
102:
103: until @tokens.empty? do
104: type, data, column, = get
105:
106: case type
107: when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then
108: list_type = type
109:
110: if column < margin then
111: unget
112: break
113: end
114:
115: if list.type and list.type != list_type then
116: unget
117: break
118: end
119:
120: list.type = list_type
121:
122: case type
123: when :NOTE, :LABEL then
124: _, indent, = get # SPACE
125: if :NEWLINE == peek_token.first then
126: get
127: peek_type, new_indent, peek_column, = peek_token
128: indent = new_indent if
129: peek_type == :INDENT and peek_column >= column
130: unget
131: end
132: else
133: data = nil
134: _, indent, = get
135: end
136:
137: list_item = build_list_item(margin + indent, data)
138:
139: list << list_item if list_item
140: else
141: unget
142: break
143: end
144: end
145:
146: p :list_end => margin if @debug
147:
148: return nil if list.empty?
149:
150: list
151: end
Builds a ListItem that is flush to indent with type item_type
# File lib/rdoc/markup/parser.rb, line 156
156: def build_list_item indent, item_type = nil
157: p :list_item_start => [indent, item_type] if @debug
158:
159: list_item = RDoc::Markup::ListItem.new item_type
160:
161: until @tokens.empty? do
162: type, data, column = get
163:
164: if column < indent and
165: not type == :NEWLINE and
166: (type != :INDENT or data < indent) then
167: unget
168: break
169: end
170:
171: case type
172: when :INDENT then
173: unget
174: list_item.push(*parse(indent))
175: when :TEXT then
176: unget
177: list_item << build_paragraph(indent)
178: when :HEADER then
179: list_item << build_heading(data)
180: when :NEWLINE then
181: list_item << RDoc::Markup::BlankLine.new
182: when *LIST_TOKENS then
183: unget
184: list_item << build_list(column)
185: else
186: raise ParseError, "Unhandled token #{@current_token.inspect}"
187: end
188: end
189:
190: p :list_item_end => [indent, item_type] if @debug
191:
192: return nil if list_item.empty?
193:
194: list_item.parts.shift if
195: RDoc::Markup::BlankLine === list_item.parts.first and
196: list_item.length > 1
197:
198: list_item
199: end
Builds a Paragraph that is flush to margin
# File lib/rdoc/markup/parser.rb, line 204
204: def build_paragraph margin
205: p :paragraph_start => margin if @debug
206:
207: paragraph = RDoc::Markup::Paragraph.new
208:
209: until @tokens.empty? do
210: type, data, column, = get
211:
212: case type
213: when :INDENT then
214: next if data == margin and peek_token[0] == :TEXT
215:
216: unget
217: break
218: when :TEXT then
219: if column != margin then
220: unget
221: break
222: end
223:
224: paragraph << data
225: skip :NEWLINE
226: else
227: unget
228: break
229: end
230: end
231:
232: p :paragraph_end => margin if @debug
233:
234: paragraph
235: end
Builds a Verbatim that is flush to margin
# File lib/rdoc/markup/parser.rb, line 240
240: def build_verbatim margin
241: p :verbatim_begin => margin if @debug
242: verbatim = RDoc::Markup::Verbatim.new
243:
244: until @tokens.empty? do
245: type, data, column, = get
246:
247: case type
248: when :INDENT then
249: if margin >= data then
250: unget
251: break
252: end
253:
254: indent = data - margin
255:
256: verbatim << ' ' * indent
257: when :HEADER then
258: verbatim << '=' * data
259:
260: _, _, peek_column, = peek_token
261: peek_column ||= column + data
262: verbatim << ' ' * (peek_column - column - data)
263: when :RULE then
264: width = 2 + data
265: verbatim << '-' * width
266:
267: _, _, peek_column, = peek_token
268: peek_column ||= column + data + 2
269: verbatim << ' ' * (peek_column - column - width)
270: when :TEXT then
271: verbatim << data
272: when *LIST_TOKENS then
273: if column <= margin then
274: unget
275: break
276: end
277:
278: list_marker = case type
279: when :BULLET then data
280: when :LABEL then "[#{data}]"
281: when :LALPHA, :NUMBER, :UALPHA then "#{data}."
282: when :NOTE then "#{data}::"
283: end
284:
285: verbatim << list_marker
286:
287: _, data, = get
288:
289: verbatim << ' ' * (data - list_marker.length)
290: when :NEWLINE then
291: verbatim << data
292: break unless [:INDENT, :NEWLINE].include? peek_token[0]
293: else
294: unget
295: break
296: end
297: end
298:
299: verbatim.normalize
300:
301: p :verbatim_end => margin if @debug
302:
303: verbatim
304: end
Pulls the next token from the stream.
# File lib/rdoc/markup/parser.rb, line 309
309: def get
310: @current_token = @tokens.shift
311: p :get => @current_token if @debug
312: @current_token
313: end
Parses the tokens into a Document
# File lib/rdoc/markup/parser.rb, line 318
318: def parse indent = 0
319: p :parse_start => indent if @debug
320:
321: document = []
322:
323: until @tokens.empty? do
324: type, data, column, = get
325:
326: if type != :INDENT and column < indent then
327: unget
328: break
329: end
330:
331: case type
332: when :HEADER then
333: document << build_heading(data)
334: when :INDENT then
335: if indent > data then
336: unget
337: break
338: elsif indent == data then
339: next
340: end
341:
342: unget
343: document << build_verbatim(indent)
344: when :NEWLINE then
345: document << RDoc::Markup::BlankLine.new
346: skip :NEWLINE, false
347: when :RULE then
348: document << RDoc::Markup::Rule.new(data)
349: skip :NEWLINE
350: when :TEXT then
351: unget
352: document << build_paragraph(indent)
353:
354: # we're done with this paragraph (indent mismatch)
355: break if peek_token[0] == :TEXT
356: when *LIST_TOKENS then
357: unget
358:
359: list = build_list(indent)
360:
361: document << list if list
362:
363: # we're done with this list (indent mismatch)
364: break if LIST_TOKENS.include? peek_token.first and indent > 0
365: else
366: type, data, column, line = @current_token
367: raise ParseError,
368: "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
369: end
370: end
371:
372: p :parse_end => indent if @debug
373:
374: document
375: end
Returns the next token on the stream without modifying the stream
# File lib/rdoc/markup/parser.rb, line 380
380: def peek_token
381: token = @tokens.first || []
382: p :peek => token if @debug
383: token
384: end
Skips a token of token_type, optionally raising an error.
# File lib/rdoc/markup/parser.rb, line 389
389: def skip token_type, error = true
390: type, data, = get
391:
392: return unless type # end of stream
393:
394: return @current_token if token_type == type
395:
396: unget
397:
398: raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if
399: error
400: end
Consumes tokens until NEWLINE and turns them back into text
# File lib/rdoc/markup/parser.rb, line 405
405: def text
406: text = ''
407:
408: loop do
409: type, data, = get
410:
411: text << case type
412: when :BULLET then
413: _, space, = get # SPACE
414: "*#{' ' * (space - 1)}"
415: when :LABEL then
416: _, space, = get # SPACE
417: "[#{data}]#{' ' * (space - data.length - 2)}"
418: when :LALPHA, :NUMBER, :UALPHA then
419: _, space, = get # SPACE
420: "#{data}.#{' ' * (space - 2)}"
421: when :NOTE then
422: _, space = get # SPACE
423: "#{data}::#{' ' * (space - data.length - 2)}"
424: when :TEXT then
425: data
426: when :NEWLINE then
427: unget
428: break
429: when nil then
430: break
431: else
432: raise ParseError, "unhandled token #{@current_token.inspect}"
433: end
434: end
435:
436: text
437: end
Calculates the column and line of the current token based on offset.
# File lib/rdoc/markup/parser.rb, line 442
442: def token_pos offset
443: [offset - @line_pos, @line]
444: end
Turns text input into a stream of tokens
# File lib/rdoc/markup/parser.rb, line 449
449: def tokenize input
450: s = StringScanner.new input
451:
452: @line = 0
453: @line_pos = 0
454:
455: until s.eos? do
456: pos = s.pos
457:
458: @tokens << case
459: when s.scan(/\r?\n/) then
460: token = [:NEWLINE, s.matched, *token_pos(pos)]
461: @line_pos = s.pos
462: @line += 1
463: token
464: when s.scan(/ +/) then
465: [:INDENT, s.matched_size, *token_pos(pos)]
466: when s.scan(/(=+)\s*/) then
467: level = s[1].length
468: level = 6 if level > 6
469: @tokens << [:HEADER, level, *token_pos(pos)]
470:
471: pos = s.pos
472: s.scan(/.*/)
473: [:TEXT, s.matched, *token_pos(pos)]
474: when s.scan(/^(-{3,}) *$/) then
475: [:RULE, s[1].length - 2, *token_pos(pos)]
476: when s.scan(/([*-])\s+/) then
477: @tokens << [:BULLET, s[1], *token_pos(pos)]
478: [:SPACE, s.matched_size, *token_pos(pos)]
479: when s.scan(/([a-z]|\d+)\.[ \t]+\S/) then
480: list_label = s[1]
481: width = s.matched_size - 1
482:
483: s.pos -= 1 # unget \S
484:
485: list_type = case list_label
486: when /[a-z]/ then :LALPHA
487: when /[A-Z]/ then :UALPHA
488: when /\d/ then :NUMBER
489: else
490: raise ParseError, "BUG token #{list_label}"
491: end
492:
493: @tokens << [list_type, list_label, *token_pos(pos)]
494: [:SPACE, width, *token_pos(pos)]
495: when s.scan(/\[(.*?)\]( +|$)/) then
496: @tokens << [:LABEL, s[1], *token_pos(pos)]
497: [:SPACE, s.matched_size, *token_pos(pos)]
498: when s.scan(/(.*?)::( +|$)/) then
499: @tokens << [:NOTE, s[1], *token_pos(pos)]
500: [:SPACE, s.matched_size, *token_pos(pos)]
501: else s.scan(/.*/)
502: [:TEXT, s.matched, *token_pos(pos)]
503: end
504: end
505:
506: self
507: end
Returns the current token or token to the token stream
# File lib/rdoc/markup/parser.rb, line 512
512: def unget token = @current_token
513: p :unget => token if @debug
514: raise Error, 'too many #ungets' if token == @tokens.first
515: @tokens.unshift token if token
516: end
Disabled; run with --debug to generate this.
Generated with the Darkfish Rdoc Generator 1.1.6.