Parent

Included Modules

RDoc::Markup::Parser

A recursive-descent parser for RDoc markup.

The parser tokenizes an input string then parses the tokens into a Document. Documents can be converted into output formats by writing a visitor like RDoc::Markup::ToHTML.

The parser only handles the block-level constructs Paragraph, List, ListItem, Heading, Verbatim, BlankLine and Rule. Inline markup such as +blah+ is handled separately by RDoc::Markup::AttributeManager.

To see what markup the Parser implements read RDoc. To see how to use RDoc markup to format text in your program read RDoc::Markup.

Constants

LIST_TOKENS

List token types

Attributes

debug[RW]

Enables display of debugging information

tokens[R]

Token accessor

Public Class Methods

new() click to toggle source

Creates a new Parser. See also ::parse

    # File lib/rdoc/markup/parser.rb, line 76
76:   def initialize
77:     @tokens = []
78:     @current_token = nil
79:     @debug = false
80: 
81:     @line = 0
82:     @line_pos = 0
83:   end
parse(str) click to toggle source

Parsers str into a Document

    # File lib/rdoc/markup/parser.rb, line 57
57:   def self.parse str
58:     parser = new
59:     #parser.debug = true
60:     parser.tokenize str
61:     RDoc::Markup::Document.new(*parser.parse)
62:   end
tokenize(str) click to toggle source

Returns a token stream for str, for testing

    # File lib/rdoc/markup/parser.rb, line 67
67:   def self.tokenize str
68:     parser = new
69:     parser.tokenize str
70:     parser.tokens
71:   end

Public Instance Methods

build_heading(level) click to toggle source

Builds a Heading of level

    # File lib/rdoc/markup/parser.rb, line 88
88:   def build_heading level
89:     heading = RDoc::Markup::Heading.new level, text
90:     skip :NEWLINE
91: 
92:     heading
93:   end
build_list(margin) click to toggle source

Builds a List flush to margin

     # File lib/rdoc/markup/parser.rb, line 98
 98:   def build_list margin
 99:     p :list_start => margin if @debug
100: 
101:     list = RDoc::Markup::List.new
102: 
103:     until @tokens.empty? do
104:       type, data, column, = get
105: 
106:       case type
107:       when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then
108:         list_type = type
109: 
110:         if column < margin then
111:           unget
112:           break
113:         end
114: 
115:         if list.type and list.type != list_type then
116:           unget
117:           break
118:         end
119: 
120:         list.type = list_type
121: 
122:         case type
123:         when :NOTE, :LABEL then
124:           _, indent, = get # SPACE
125:           if :NEWLINE == peek_token.first then
126:             get
127:             peek_type, new_indent, peek_column, = peek_token
128:             indent = new_indent if
129:               peek_type == :INDENT and peek_column >= column
130:             unget
131:           end
132:         else
133:           data = nil
134:           _, indent, = get
135:         end
136: 
137:         list_item = build_list_item(margin + indent, data)
138: 
139:         list << list_item if list_item
140:       else
141:         unget
142:         break
143:       end
144:     end
145: 
146:     p :list_end => margin if @debug
147: 
148:     return nil if list.empty?
149: 
150:     list
151:   end
build_list_item(indent, item_type = nil) click to toggle source

Builds a ListItem that is flush to indent with type item_type

     # File lib/rdoc/markup/parser.rb, line 156
156:   def build_list_item indent, item_type = nil
157:     p :list_item_start => [indent, item_type] if @debug
158: 
159:     list_item = RDoc::Markup::ListItem.new item_type
160: 
161:     until @tokens.empty? do
162:       type, data, column = get
163: 
164:       if column < indent and
165:          not type == :NEWLINE and
166:          (type != :INDENT or data < indent) then
167:         unget
168:         break
169:       end
170: 
171:       case type
172:       when :INDENT then
173:         unget
174:         list_item.push(*parse(indent))
175:       when :TEXT then
176:         unget
177:         list_item << build_paragraph(indent)
178:       when :HEADER then
179:         list_item << build_heading(data)
180:       when :NEWLINE then
181:         list_item << RDoc::Markup::BlankLine.new
182:       when *LIST_TOKENS then
183:         unget
184:         list_item << build_list(column)
185:       else
186:         raise ParseError, "Unhandled token #{@current_token.inspect}"
187:       end
188:     end
189: 
190:     p :list_item_end => [indent, item_type] if @debug
191: 
192:     return nil if list_item.empty?
193: 
194:     list_item.parts.shift if
195:       RDoc::Markup::BlankLine === list_item.parts.first and
196:       list_item.length > 1
197: 
198:     list_item
199:   end
build_paragraph(margin) click to toggle source

Builds a Paragraph that is flush to margin

     # File lib/rdoc/markup/parser.rb, line 204
204:   def build_paragraph margin
205:     p :paragraph_start => margin if @debug
206: 
207:     paragraph = RDoc::Markup::Paragraph.new
208: 
209:     until @tokens.empty? do
210:       type, data, column, = get
211: 
212:       case type
213:       when :INDENT then
214:         next if data == margin and peek_token[0] == :TEXT
215: 
216:         unget
217:         break
218:       when :TEXT then
219:         if column != margin then
220:           unget
221:           break
222:         end
223: 
224:         paragraph << data
225:         skip :NEWLINE
226:       else
227:         unget
228:         break
229:       end
230:     end
231: 
232:     p :paragraph_end => margin if @debug
233: 
234:     paragraph
235:   end
build_verbatim(margin) click to toggle source

Builds a Verbatim that is flush to margin

     # File lib/rdoc/markup/parser.rb, line 240
240:   def build_verbatim margin
241:     p :verbatim_begin => margin if @debug
242:     verbatim = RDoc::Markup::Verbatim.new
243: 
244:     until @tokens.empty? do
245:       type, data, column, = get
246: 
247:       case type
248:       when :INDENT then
249:         if margin >= data then
250:           unget
251:           break
252:         end
253: 
254:         indent = data - margin
255: 
256:         verbatim << ' ' * indent
257:       when :HEADER then
258:         verbatim << '=' * data
259: 
260:         _, _, peek_column, = peek_token
261:         peek_column ||= column + data
262:         verbatim << ' ' * (peek_column - column - data)
263:       when :RULE then
264:         width = 2 + data
265:         verbatim << '-' * width
266: 
267:         _, _, peek_column, = peek_token
268:         peek_column ||= column + data + 2
269:         verbatim << ' ' * (peek_column - column - width)
270:       when :TEXT then
271:         verbatim << data
272:       when *LIST_TOKENS then
273:         if column <= margin then
274:           unget
275:           break
276:         end
277: 
278:         list_marker = case type
279:                       when :BULLET                   then data
280:                       when :LABEL                    then "[#{data}]"
281:                       when :LALPHA, :NUMBER, :UALPHA then "#{data}."
282:                       when :NOTE                     then "#{data}::"
283:                       end
284: 
285:         verbatim << list_marker
286: 
287:         _, data, = get
288: 
289:         verbatim << ' ' * (data - list_marker.length)
290:       when :NEWLINE then
291:         verbatim << data
292:         break unless [:INDENT, :NEWLINE].include? peek_token[0]
293:       else
294:         unget
295:         break
296:       end
297:     end
298: 
299:     verbatim.normalize
300: 
301:     p :verbatim_end => margin if @debug
302: 
303:     verbatim
304:   end
get() click to toggle source

Pulls the next token from the stream.

     # File lib/rdoc/markup/parser.rb, line 309
309:   def get
310:     @current_token = @tokens.shift
311:     p :get => @current_token if @debug
312:     @current_token
313:   end
parse(indent = 0) click to toggle source

Parses the tokens into a Document

     # File lib/rdoc/markup/parser.rb, line 318
318:   def parse indent = 0
319:     p :parse_start => indent if @debug
320: 
321:     document = []
322: 
323:     until @tokens.empty? do
324:       type, data, column, = get
325: 
326:       if type != :INDENT and column < indent then
327:         unget
328:         break
329:       end
330: 
331:       case type
332:       when :HEADER then
333:         document << build_heading(data)
334:       when :INDENT then
335:         if indent > data then
336:           unget
337:           break
338:         elsif indent == data then
339:           next
340:         end
341: 
342:         unget
343:         document << build_verbatim(indent)
344:       when :NEWLINE then
345:         document << RDoc::Markup::BlankLine.new
346:         skip :NEWLINE, false
347:       when :RULE then
348:         document << RDoc::Markup::Rule.new(data)
349:         skip :NEWLINE
350:       when :TEXT then
351:         unget
352:         document << build_paragraph(indent)
353: 
354:         # we're done with this paragraph (indent mismatch)
355:         break if peek_token[0] == :TEXT
356:       when *LIST_TOKENS then
357:         unget
358: 
359:         list = build_list(indent)
360: 
361:         document << list if list
362: 
363:         # we're done with this list (indent mismatch)
364:         break if LIST_TOKENS.include? peek_token.first and indent > 0
365:       else
366:         type, data, column, line = @current_token
367:         raise ParseError,
368:               "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
369:       end
370:     end
371: 
372:     p :parse_end => indent if @debug
373: 
374:     document
375:   end
peek_token() click to toggle source

Returns the next token on the stream without modifying the stream

     # File lib/rdoc/markup/parser.rb, line 380
380:   def peek_token
381:     token = @tokens.first || []
382:     p :peek => token if @debug
383:     token
384:   end
skip(token_type, error = true) click to toggle source

Skips a token of token_type, optionally raising an error.

     # File lib/rdoc/markup/parser.rb, line 389
389:   def skip token_type, error = true
390:     type, data, = get
391: 
392:     return unless type # end of stream
393: 
394:     return @current_token if token_type == type
395: 
396:     unget
397: 
398:     raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if
399:       error
400:   end
text() click to toggle source

Consumes tokens until NEWLINE and turns them back into text

     # File lib/rdoc/markup/parser.rb, line 405
405:   def text
406:     text = ''
407: 
408:     loop do
409:       type, data, = get
410: 
411:       text << case type
412:               when :BULLET then
413:                 _, space, = get # SPACE
414:                 "*#{' ' * (space - 1)}"
415:               when :LABEL then
416:                 _, space, = get # SPACE
417:                 "[#{data}]#{' ' * (space - data.length - 2)}"
418:               when :LALPHA, :NUMBER, :UALPHA then
419:                 _, space, = get # SPACE
420:                 "#{data}.#{' ' * (space - 2)}"
421:               when :NOTE then
422:                 _, space = get # SPACE
423:                 "#{data}::#{' ' * (space - data.length - 2)}"
424:               when :TEXT then
425:                 data
426:               when :NEWLINE then
427:                 unget
428:                 break
429:               when nil then
430:                 break
431:               else
432:                 raise ParseError, "unhandled token #{@current_token.inspect}"
433:               end
434:     end
435: 
436:     text
437:   end
token_pos(offset) click to toggle source

Calculates the column and line of the current token based on offset.

     # File lib/rdoc/markup/parser.rb, line 442
442:   def token_pos offset
443:     [offset - @line_pos, @line]
444:   end
tokenize(input) click to toggle source

Turns text input into a stream of tokens

     # File lib/rdoc/markup/parser.rb, line 449
449:   def tokenize input
450:     s = StringScanner.new input
451: 
452:     @line = 0
453:     @line_pos = 0
454: 
455:     until s.eos? do
456:       pos = s.pos
457: 
458:       @tokens << case
459:                  when s.scan(/\r?\n/) then
460:                    token = [:NEWLINE, s.matched, *token_pos(pos)]
461:                    @line_pos = s.pos
462:                    @line += 1
463:                    token
464:                  when s.scan(/ +/) then
465:                    [:INDENT, s.matched_size, *token_pos(pos)]
466:                  when s.scan(/(=+)\s*/) then
467:                    level = s[1].length
468:                    level = 6 if level > 6
469:                    @tokens << [:HEADER, level, *token_pos(pos)]
470: 
471:                    pos = s.pos
472:                    s.scan(/.*/)
473:                    [:TEXT, s.matched, *token_pos(pos)]
474:                  when s.scan(/^(-{3,}) *$/) then
475:                    [:RULE, s[1].length - 2, *token_pos(pos)]
476:                  when s.scan(/([*-])\s+/) then
477:                    @tokens << [:BULLET, s[1], *token_pos(pos)]
478:                    [:SPACE, s.matched_size, *token_pos(pos)]
479:                  when s.scan(/([a-z]|\d+)\.[ \t]+\S/) then
480:                    list_label = s[1]
481:                    width      = s.matched_size - 1
482: 
483:                    s.pos -= 1 # unget \S
484: 
485:                    list_type = case list_label
486:                                when /[a-z]/ then :LALPHA
487:                                when /[A-Z]/ then :UALPHA
488:                                when /\d/    then :NUMBER
489:                                else
490:                                  raise ParseError, "BUG token #{list_label}"
491:                                end
492: 
493:                    @tokens << [list_type, list_label, *token_pos(pos)]
494:                    [:SPACE, width, *token_pos(pos)]
495:                  when s.scan(/\[(.*?)\]( +|$)/) then
496:                    @tokens << [:LABEL, s[1], *token_pos(pos)]
497:                    [:SPACE, s.matched_size, *token_pos(pos)]
498:                  when s.scan(/(.*?)::( +|$)/) then
499:                    @tokens << [:NOTE, s[1], *token_pos(pos)]
500:                    [:SPACE, s.matched_size, *token_pos(pos)]
501:                  else s.scan(/.*/)
502:                    [:TEXT, s.matched, *token_pos(pos)]
503:                  end
504:     end
505: 
506:     self
507:   end
unget(token = @current_token) click to toggle source

Returns the current token or token to the token stream

     # File lib/rdoc/markup/parser.rb, line 512
512:   def unget token = @current_token
513:     p :unget => token if @debug
514:     raise Error, 'too many #ungets' if token == @tokens.first
515:     @tokens.unshift token if token
516:   end

Disabled; run with --debug to generate this.

[Validate]

Generated with the Darkfish Rdoc Generator 1.1.6.