eaiovnaovbqoebvqoeavibavo usr/share/ruby/racc/parser.rb000064400000043565147634553320012222 0ustar00#-- # $originalId: parser.rb,v 1.8 2006/07/06 11:42:07 aamine Exp $ # # Copyright (c) 1999-2006 Minero Aoki # # This program is free software. # You can distribute/modify this program under the same terms of ruby. # # As a special exception, when this code is copied by Racc # into a Racc output file, you may use that output file # without restriction. #++ module Racc class ParseError < StandardError; end end unless defined?(::ParseError) ParseError = Racc::ParseError end # Racc is a LALR(1) parser generator. # It is written in Ruby itself, and generates Ruby programs. # # == Command-line Reference # # racc [-ofilename] [--output-file=filename] # [-erubypath] [--embedded=rubypath] # [-v] [--verbose] # [-Ofilename] [--log-file=filename] # [-g] [--debug] # [-E] [--embedded] # [-l] [--no-line-convert] # [-c] [--line-convert-all] # [-a] [--no-omit-actions] # [-C] [--check-only] # [-S] [--output-status] # [--version] [--copyright] [--help] grammarfile # # [+filename+] # Racc grammar file. Any extention is permitted. # [-o+outfile+, --output-file=+outfile+] # A filename for output. default is <+filename+>.tab.rb # [-O+filename+, --log-file=+filename+] # Place logging output in file +filename+. # Default log file name is <+filename+>.output. # [-e+rubypath+, --executable=+rubypath+] # output executable file(mode 755). where +path+ is the ruby interpreter. # [-v, --verbose] # verbose mode. create +filename+.output file, like yacc's y.output file. # [-g, --debug] # add debug code to parser class. To display debuggin information, # use this '-g' option and set @yydebug true in parser class. # [-E, --embedded] # Output parser which doesn't need runtime files (racc/parser.rb). # [-C, --check-only] # Check syntax of racc grammer file and quit. # [-S, --output-status] # Print messages time to time while compiling. # [-l, --no-line-convert] # turns off line number converting. # [-c, --line-convert-all] # Convert line number of actions, inner, header and footer. # [-a, --no-omit-actions] # Call all actions, even if an action is empty. # [--version] # print Racc version and quit. # [--copyright] # Print copyright and quit. # [--help] # Print usage and quit. # # == Generating Parser Using Racc # # To compile Racc grammar file, simply type: # # $ racc parse.y # # This creates ruby script file "parse.tab.y". The -o option can change the output filename. # # == Writing A Racc Grammar File # # If you want your own parser, you have to write a grammar file. # A grammar file contains the name of your parser class, grammar for the parser, # user code, and anything else. # When writing a grammar file, yacc's knowledge is helpful. # If you have not used yacc before, Racc is not too difficult. # # Here's an example Racc grammar file. # # class Calcparser # rule # target: exp { print val[0] } # # exp: exp '+' exp # | exp '*' exp # | '(' exp ')' # | NUMBER # end # # Racc grammar files resemble yacc files. # But (of course), this is Ruby code. # yacc's $$ is the 'result', $0, $1... is # an array called 'val', and $-1, $-2... is an array called '_values'. # # See the {Grammar File Reference}[rdoc-ref:lib/racc/rdoc/grammar.en.rdoc] for # more information on grammar files. # # == Parser # # Then you must prepare the parse entry method. There are two types of # parse methods in Racc, Racc::Parser#do_parse and Racc::Parser#yyparse # # Racc::Parser#do_parse is simple. # # It's yyparse() of yacc, and Racc::Parser#next_token is yylex(). # This method must returns an array like [TOKENSYMBOL, ITS_VALUE]. # EOF is [false, false]. # (TOKENSYMBOL is a Ruby symbol (taken from String#intern) by default. # If you want to change this, see the grammar reference. # # Racc::Parser#yyparse is little complicated, but useful. # It does not use Racc::Parser#next_token, instead it gets tokens from any iterator. # # For example, yyparse(obj, :scan) causes # calling +obj#scan+, and you can return tokens by yielding them from +obj#scan+. # # == Debugging # # When debugging, "-v" or/and the "-g" option is helpful. # # "-v" creates verbose log file (.output). # "-g" creates a "Verbose Parser". # Verbose Parser prints the internal status when parsing. # But it's _not_ automatic. # You must use -g option and set +@yydebug+ to +true+ in order to get output. # -g option only creates the verbose parser. # # === Racc reported syntax error. # # Isn't there too many "end"? # grammar of racc file is changed in v0.10. # # Racc does not use '%' mark, while yacc uses huge number of '%' marks.. # # === Racc reported "XXXX conflicts". # # Try "racc -v xxxx.y". # It causes producing racc's internal log file, xxxx.output. # # === Generated parsers does not work correctly # # Try "racc -g xxxx.y". # This command let racc generate "debugging parser". # Then set @yydebug=true in your parser. # It produces a working log of your parser. # # == Re-distributing Racc runtime # # A parser, which is created by Racc, requires the Racc runtime module; # racc/parser.rb. # # Ruby 1.8.x comes with Racc runtime module, # you need NOT distribute Racc runtime files. # # If you want to include the Racc runtime module with your parser. # This can be done by using '-E' option: # # $ racc -E -omyparser.rb myparser.y # # This command creates myparser.rb which `includes' Racc runtime. # Only you must do is to distribute your parser file (myparser.rb). # # Note: parser.rb is LGPL, but your parser is not. # Your own parser is completely yours. module Racc unless defined?(Racc_No_Extentions) Racc_No_Extentions = false # :nodoc: end class Parser Racc_Runtime_Version = '1.4.6' Racc_Runtime_Revision = %w$originalRevision: 1.8 $[1] Racc_Runtime_Core_Version_R = '1.4.6' Racc_Runtime_Core_Revision_R = %w$originalRevision: 1.8 $[1] begin require 'racc/cparse' # Racc_Runtime_Core_Version_C = (defined in extention) Racc_Runtime_Core_Revision_C = Racc_Runtime_Core_Id_C.split[2] unless new.respond_to?(:_racc_do_parse_c, true) raise LoadError, 'old cparse.so' end if Racc_No_Extentions raise LoadError, 'selecting ruby version of racc runtime core' end Racc_Main_Parsing_Routine = :_racc_do_parse_c # :nodoc: Racc_YY_Parse_Method = :_racc_yyparse_c # :nodoc: Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_C # :nodoc: Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_C # :nodoc: Racc_Runtime_Type = 'c' # :nodoc: rescue LoadError Racc_Main_Parsing_Routine = :_racc_do_parse_rb Racc_YY_Parse_Method = :_racc_yyparse_rb Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_R Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_R Racc_Runtime_Type = 'ruby' end def Parser.racc_runtime_type # :nodoc: Racc_Runtime_Type end def _racc_setup @yydebug = false unless self.class::Racc_debug_parser @yydebug = false unless defined?(@yydebug) if @yydebug @racc_debug_out = $stderr unless defined?(@racc_debug_out) @racc_debug_out ||= $stderr end arg = self.class::Racc_arg arg[13] = true if arg.size < 14 arg end def _racc_init_sysvars @racc_state = [0] @racc_tstack = [] @racc_vstack = [] @racc_t = nil @racc_val = nil @racc_read_next = true @racc_user_yyerror = false @racc_error_status = 0 end # The entry point of the parser. This method is used with #next_token. # If Racc wants to get token (and its value), calls next_token. # # Example: # def parse # @q = [[1,1], # [2,2], # [3,3], # [false, '$']] # do_parse # end # # def next_token # @q.shift # end def do_parse __send__(Racc_Main_Parsing_Routine, _racc_setup(), false) end # The method to fetch next token. # If you use #do_parse method, you must implement #next_token. # # The format of return value is [TOKEN_SYMBOL, VALUE]. # +token-symbol+ is represented by Ruby's symbol by default, e.g. :IDENT # for 'IDENT'. ";" (String) for ';'. # # The final symbol (End of file) must be false. def next_token raise NotImplementedError, "#{self.class}\#next_token is not defined" end def _racc_do_parse_rb(arg, in_debug) action_table, action_check, action_default, action_pointer, _, _, _, _, _, _, token_table, _, _, _, * = arg _racc_init_sysvars tok = act = i = nil catch(:racc_end_parse) { while true if i = action_pointer[@racc_state[-1]] if @racc_read_next if @racc_t != 0 # not EOF tok, @racc_val = next_token() unless tok # EOF @racc_t = 0 else @racc_t = (token_table[tok] or 1) # error token end racc_read_token(@racc_t, tok, @racc_val) if @yydebug @racc_read_next = false end end i += @racc_t unless i >= 0 and act = action_table[i] and action_check[i] == @racc_state[-1] act = action_default[@racc_state[-1]] end else act = action_default[@racc_state[-1]] end while act = _racc_evalact(act, arg) ; end end } end # Another entry point for the parser. # If you use this method, you must implement RECEIVER#METHOD_ID method. # # RECEIVER#METHOD_ID is a method to get next token. # It must 'yield' the token, which format is [TOKEN-SYMBOL, VALUE]. def yyparse(recv, mid) __send__(Racc_YY_Parse_Method, recv, mid, _racc_setup(), true) end def _racc_yyparse_rb(recv, mid, arg, c_debug) action_table, action_check, action_default, action_pointer, _, _, _, _, _, _, token_table, _, _, _, * = arg _racc_init_sysvars act = nil i = nil catch(:racc_end_parse) { until i = action_pointer[@racc_state[-1]] while act = _racc_evalact(action_default[@racc_state[-1]], arg) ; end end recv.__send__(mid) do |tok, val| unless tok @racc_t = 0 else @racc_t = (token_table[tok] or 1) # error token end @racc_val = val @racc_read_next = false i += @racc_t unless i >= 0 and act = action_table[i] and action_check[i] == @racc_state[-1] act = action_default[@racc_state[-1]] end while act = _racc_evalact(act, arg) ; end while not(i = action_pointer[@racc_state[-1]]) or not @racc_read_next or @racc_t == 0 # $ unless i and i += @racc_t and i >= 0 and act = action_table[i] and action_check[i] == @racc_state[-1] act = action_default[@racc_state[-1]] end while act = _racc_evalact(act, arg) ; end end end } end ### ### common ### def _racc_evalact(act, arg) action_table, action_check, _, action_pointer, _, _, _, _, _, _, _, shift_n, reduce_n, _, _, * = arg nerr = 0 # tmp if act > 0 and act < shift_n # # shift # if @racc_error_status > 0 @racc_error_status -= 1 unless @racc_t == 1 # error token end @racc_vstack.push @racc_val @racc_state.push act @racc_read_next = true if @yydebug @racc_tstack.push @racc_t racc_shift @racc_t, @racc_tstack, @racc_vstack end elsif act < 0 and act > -reduce_n # # reduce # code = catch(:racc_jump) { @racc_state.push _racc_do_reduce(arg, act) false } if code case code when 1 # yyerror @racc_user_yyerror = true # user_yyerror return -reduce_n when 2 # yyaccept return shift_n else raise '[Racc Bug] unknown jump code' end end elsif act == shift_n # # accept # racc_accept if @yydebug throw :racc_end_parse, @racc_vstack[0] elsif act == -reduce_n # # error # case @racc_error_status when 0 unless arg[21] # user_yyerror nerr += 1 on_error @racc_t, @racc_val, @racc_vstack end when 3 if @racc_t == 0 # is $ throw :racc_end_parse, nil end @racc_read_next = true end @racc_user_yyerror = false @racc_error_status = 3 while true if i = action_pointer[@racc_state[-1]] i += 1 # error token if i >= 0 and (act = action_table[i]) and action_check[i] == @racc_state[-1] break end end throw :racc_end_parse, nil if @racc_state.size <= 1 @racc_state.pop @racc_vstack.pop if @yydebug @racc_tstack.pop racc_e_pop @racc_state, @racc_tstack, @racc_vstack end end return act else raise "[Racc Bug] unknown action #{act.inspect}" end racc_next_state(@racc_state[-1], @racc_state) if @yydebug nil end def _racc_do_reduce(arg, act) _, _, _, _, goto_table, goto_check, goto_default, goto_pointer, nt_base, reduce_table, _, _, _, use_result, * = arg state = @racc_state vstack = @racc_vstack tstack = @racc_tstack i = act * -3 len = reduce_table[i] reduce_to = reduce_table[i+1] method_id = reduce_table[i+2] void_array = [] tmp_t = tstack[-len, len] if @yydebug tmp_v = vstack[-len, len] tstack[-len, len] = void_array if @yydebug vstack[-len, len] = void_array state[-len, len] = void_array # tstack must be updated AFTER method call if use_result vstack.push __send__(method_id, tmp_v, vstack, tmp_v[0]) else vstack.push __send__(method_id, tmp_v, vstack) end tstack.push reduce_to racc_reduce(tmp_t, reduce_to, tstack, vstack) if @yydebug k1 = reduce_to - nt_base if i = goto_pointer[k1] i += state[-1] if i >= 0 and (curstate = goto_table[i]) and goto_check[i] == k1 return curstate end end goto_default[k1] end # This method is called when a parse error is found. # # ERROR_TOKEN_ID is an internal ID of token which caused error. # You can get string representation of this ID by calling # #token_to_str. # # ERROR_VALUE is a value of error token. # # value_stack is a stack of symbol values. # DO NOT MODIFY this object. # # This method raises ParseError by default. # # If this method returns, parsers enter "error recovering mode". def on_error(t, val, vstack) raise ParseError, sprintf("\nparse error on value %s (%s)", val.inspect, token_to_str(t) || '?') end # Enter error recovering mode. # This method does not call #on_error. def yyerror throw :racc_jump, 1 end # Exit parser. # Return value is Symbol_Value_Stack[0]. def yyaccept throw :racc_jump, 2 end # Leave error recovering mode. def yyerrok @racc_error_status = 0 end # For debugging output def racc_read_token(t, tok, val) @racc_debug_out.print 'read ' @racc_debug_out.print tok.inspect, '(', racc_token2str(t), ') ' @racc_debug_out.puts val.inspect @racc_debug_out.puts end def racc_shift(tok, tstack, vstack) @racc_debug_out.puts "shift #{racc_token2str tok}" racc_print_stacks tstack, vstack @racc_debug_out.puts end def racc_reduce(toks, sim, tstack, vstack) out = @racc_debug_out out.print 'reduce ' if toks.empty? out.print ' ' else toks.each {|t| out.print ' ', racc_token2str(t) } end out.puts " --> #{racc_token2str(sim)}" racc_print_stacks tstack, vstack @racc_debug_out.puts end def racc_accept @racc_debug_out.puts 'accept' @racc_debug_out.puts end def racc_e_pop(state, tstack, vstack) @racc_debug_out.puts 'error recovering mode: pop token' racc_print_states state racc_print_stacks tstack, vstack @racc_debug_out.puts end def racc_next_state(curstate, state) @racc_debug_out.puts "goto #{curstate}" racc_print_states state @racc_debug_out.puts end def racc_print_stacks(t, v) out = @racc_debug_out out.print ' [' t.each_index do |i| out.print ' (', racc_token2str(t[i]), ' ', v[i].inspect, ')' end out.puts ' ]' end def racc_print_states(s) out = @racc_debug_out out.print ' [' s.each {|st| out.print ' ', st } out.puts ' ]' end def racc_token2str(tok) self.class::Racc_token_to_s_table[tok] or raise "[Racc Bug] can't convert token #{tok} to string" end # Convert internal ID of token symbol to the string. def token_to_str(t) self.class::Racc_token_to_s_table[t] end end end usr/share/gems/gems/rdoc-4.0.0/lib/rdoc/parser.rb000064400000017516147635623610015276 0ustar00# -*- coding: us-ascii -*- ## # A parser is simple a class that subclasses RDoc::Parser and implements #scan # to fill in an RDoc::TopLevel with parsed data. # # The initialize method takes an RDoc::TopLevel to fill with parsed content, # the name of the file to be parsed, the content of the file, an RDoc::Options # object and an RDoc::Stats object to inform the user of parsed items. The # scan method is then called to parse the file and must return the # RDoc::TopLevel object. By calling super these items will be set for you. # # In order to be used by RDoc the parser needs to register the file extensions # it can parse. Use ::parse_files_matching to register extensions. # # require 'rdoc' # # class RDoc::Parser::Xyz < RDoc::Parser # parse_files_matching /\.xyz$/ # # def initialize top_level, file_name, content, options, stats # super # # # extra initialization if needed # end # # def scan # # parse file and fill in @top_level # end # end class RDoc::Parser @parsers = [] class << self ## # An Array of arrays that maps file extension (or name) regular # expressions to parser classes that will parse matching filenames. # # Use parse_files_matching to register a parser's file extensions. attr_reader :parsers end ## # The name of the file being parsed attr_reader :file_name ## # Alias an extension to another extension. After this call, files ending # "new_ext" will be parsed using the same parser as "old_ext" def self.alias_extension(old_ext, new_ext) old_ext = old_ext.sub(/^\.(.*)/, '\1') new_ext = new_ext.sub(/^\.(.*)/, '\1') parser = can_parse_by_name "xxx.#{old_ext}" return false unless parser RDoc::Parser.parsers.unshift [/\.#{new_ext}$/, parser] true end ## # Determines if the file is a "binary" file which basically means it has # content that an RDoc parser shouldn't try to consume. def self.binary?(file) return false if file =~ /\.(rdoc|txt)$/ s = File.read(file, 1024) or return false have_encoding = s.respond_to? :encoding return true if s[0, 2] == Marshal.dump('')[0, 2] or s.index("\x00") if have_encoding then mode = "r" s.sub!(/\A#!.*\n/, '') # assume shebang line isn't longer than 1024. encoding = s[/^\s*\#\s*(?:-\*-\s*)?(?:en)?coding:\s*([^\s;]+?)(?:-\*-|[\s;])/, 1] mode = "r:#{encoding}" if encoding s = File.open(file, mode) {|f| f.gets(nil, 1024)} not s.valid_encoding? else if 0.respond_to? :fdiv then s.count("\x00-\x7F", "^ -~\t\r\n").fdiv(s.size) > 0.3 else # HACK 1.8.6 (s.count("\x00-\x7F", "^ -~\t\r\n").to_f / s.size) > 0.3 end end end ## # Processes common directives for CodeObjects for the C and Ruby parsers. # # Applies +directive+'s +value+ to +code_object+, if appropriate def self.process_directive code_object, directive, value warn "RDoc::Parser::process_directive is deprecated and wil be removed in RDoc 4. Use RDoc::Markup::PreProcess#handle_directive instead" if $-w case directive when 'nodoc' then code_object.document_self = nil # notify nodoc code_object.document_children = value.downcase != 'all' when 'doc' then code_object.document_self = true code_object.force_documentation = true when 'yield', 'yields' then # remove parameter &block code_object.params.sub!(/,?\s*&\w+/, '') if code_object.params code_object.block_params = value when 'arg', 'args' then code_object.params = value end end ## # Checks if +file+ is a zip file in disguise. Signatures from # http://www.garykessler.net/library/file_sigs.html def self.zip? file zip_signature = File.read file, 4 zip_signature == "PK\x03\x04" or zip_signature == "PK\x05\x06" or zip_signature == "PK\x07\x08" rescue false end ## # Return a parser that can handle a particular extension def self.can_parse file_name parser = can_parse_by_name file_name # HACK Selenium hides a jar file using a .txt extension return if parser == RDoc::Parser::Simple and zip? file_name parser end ## # Returns a parser that can handle the extension for +file_name+. This does # not depend upon the file being readable. def self.can_parse_by_name file_name _, parser = RDoc::Parser.parsers.find { |regexp,| regexp =~ file_name } # The default parser must not parse binary files ext_name = File.extname file_name return parser if ext_name.empty? if parser == RDoc::Parser::Simple and ext_name !~ /txt|rdoc/ then case check_modeline file_name when nil, 'rdoc' then # continue else return nil end end parser rescue Errno::EACCES end ## # Returns the file type from the modeline in +file_name+ def self.check_modeline file_name line = open file_name do |io| io.gets end /-\*-\s*(.*?\S)\s*-\*-/ =~ line return nil unless type = $1 if /;/ =~ type then return nil unless /(?:\s|\A)mode:\s*([^\s;]+)/i =~ type type = $1 end return nil if /coding:/i =~ type type.downcase rescue ArgumentError # invalid byte sequence, etc. end ## # Finds and instantiates the correct parser for the given +file_name+ and # +content+. def self.for top_level, file_name, content, options, stats return if binary? file_name parser = use_markup content unless parser then parse_name = file_name # If no extension, look for shebang if file_name !~ /\.\w+$/ && content =~ %r{\A#!(.+)} then shebang = $1 case shebang when %r{env\s+ruby}, %r{/ruby} parse_name = 'dummy.rb' end end parser = can_parse parse_name end return unless parser parser.new top_level, file_name, content, options, stats rescue SystemCallError nil end ## # Record which file types this parser can understand. # # It is ok to call this multiple times. def self.parse_files_matching(regexp) RDoc::Parser.parsers.unshift [regexp, self] end ## # If there is a markup: parser_name comment at the front of the # file, use it to determine the parser. For example: # # # markup: rdoc # # Class comment can go here # # class C # end # # The comment should appear as the first line of the +content+. # # If the content contains a shebang or editor modeline the comment may # appear on the second or third line. # # Any comment style may be used to hide the markup comment. def self.use_markup content markup = content.lines.first(3).grep(/markup:\s+(\w+)/) { $1 }.first return unless markup # TODO Ruby should be returned only when the filename is correct return RDoc::Parser::Ruby if %w[tomdoc markdown].include? markup markup = Regexp.escape markup RDoc::Parser.parsers.find do |_, parser| /^#{markup}$/i =~ parser.name.sub(/.*:/, '') end.last end ## # Creates a new Parser storing +top_level+, +file_name+, +content+, # +options+ and +stats+ in instance variables. In +@preprocess+ an # RDoc::Markup::PreProcess object is created which allows processing of # directives. def initialize top_level, file_name, content, options, stats @top_level = top_level @top_level.parser = self.class @store = @top_level.store @file_name = file_name @content = content @options = options @stats = stats @preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include @preprocess.options = @options end autoload :RubyTools, 'rdoc/parser/ruby_tools' autoload :Text, 'rdoc/parser/text' end # simple must come first in order to show up last in the parsers list require 'rdoc/parser/simple' require 'rdoc/parser/c' require 'rdoc/parser/changelog' require 'rdoc/parser/markdown' require 'rdoc/parser/rd' require 'rdoc/parser/ruby' usr/share/gems/gems/rdoc-4.0.0/lib/rdoc/markup/parser.rb000064400000034034147636132600016562 0ustar00require 'strscan' ## # A recursive-descent parser for RDoc markup. # # The parser tokenizes an input string then parses the tokens into a Document. # Documents can be converted into output formats by writing a visitor like # RDoc::Markup::ToHTML. # # The parser only handles the block-level constructs Paragraph, List, # ListItem, Heading, Verbatim, BlankLine and Rule. Inline markup such as # \+blah\+ is handled separately by RDoc::Markup::AttributeManager. # # To see what markup the Parser implements read RDoc. To see how to use # RDoc markup to format text in your program read RDoc::Markup. class RDoc::Markup::Parser include RDoc::Text ## # List token types LIST_TOKENS = [ :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA, ] ## # Parser error subclass class Error < RuntimeError; end ## # Raised when the parser is unable to handle the given markup class ParseError < Error; end ## # Enables display of debugging information attr_accessor :debug ## # Token accessor attr_reader :tokens ## # Parses +str+ into a Document. # # Use RDoc::Markup#parse instead of this method. def self.parse str parser = new parser.tokenize str doc = RDoc::Markup::Document.new parser.parse doc end ## # Returns a token stream for +str+, for testing def self.tokenize str parser = new parser.tokenize str parser.tokens end ## # Creates a new Parser. See also ::parse def initialize @binary_input = nil @current_token = nil @debug = false @have_encoding = Object.const_defined? :Encoding @have_byteslice = ''.respond_to? :byteslice @input = nil @input_encoding = nil @line = 0 @line_pos = 0 @s = nil @tokens = [] end ## # Builds a Heading of +level+ def build_heading level type, text, = get text = case type when :TEXT then skip :NEWLINE text else unget '' end RDoc::Markup::Heading.new level, text end ## # Builds a List flush to +margin+ def build_list margin p :list_start => margin if @debug list = RDoc::Markup::List.new label = nil until @tokens.empty? do type, data, column, = get case type when *LIST_TOKENS then if column < margin || (list.type && list.type != type) then unget break end list.type = type peek_type, _, column, = peek_token case type when :NOTE, :LABEL then label = [] unless label if peek_type == :NEWLINE then # description not on the same line as LABEL/NOTE # skip the trailing newline & any blank lines below while peek_type == :NEWLINE get peek_type, _, column, = peek_token end # we may be: # - at end of stream # - at a column < margin: # [text] # blah blah blah # - at the same column, but with a different type of list item # [text] # * blah blah # - at the same column, with the same type of list item # [one] # [two] # In all cases, we have an empty description. # In the last case only, we continue. if peek_type.nil? || column < margin then empty = true elsif column == margin then case peek_type when type empty = :continue when *LIST_TOKENS empty = true else empty = false end else empty = false end if empty then label << data next if empty == :continue break end end else data = nil end if label then data = label << data label = nil end list_item = RDoc::Markup::ListItem.new data parse list_item, column list << list_item else unget break end end p :list_end => margin if @debug if list.empty? then return nil unless label return nil unless [:LABEL, :NOTE].include? list.type list_item = RDoc::Markup::ListItem.new label, RDoc::Markup::BlankLine.new list << list_item end list end ## # Builds a Paragraph that is flush to +margin+ def build_paragraph margin p :paragraph_start => margin if @debug paragraph = RDoc::Markup::Paragraph.new until @tokens.empty? do type, data, column, = get if type == :TEXT and column == margin then paragraph << data break if peek_token.first == :BREAK data << ' ' if skip :NEWLINE else unget break end end paragraph.parts.last.sub!(/ \z/, '') # cleanup p :paragraph_end => margin if @debug paragraph end ## # Builds a Verbatim that is indented from +margin+. # # The verbatim block is shifted left (the least indented lines start in # column 0). Each part of the verbatim is one line of text, always # terminated by a newline. Blank lines always consist of a single newline # character, and there is never a single newline at the end of the verbatim. def build_verbatim margin p :verbatim_begin => margin if @debug verbatim = RDoc::Markup::Verbatim.new min_indent = nil generate_leading_spaces = true line = '' until @tokens.empty? do type, data, column, = get if type == :NEWLINE then line << data verbatim << line line = '' generate_leading_spaces = true next end if column <= margin unget break end if generate_leading_spaces then indent = column - margin line << ' ' * indent min_indent = indent if min_indent.nil? || indent < min_indent generate_leading_spaces = false end case type when :HEADER then line << '=' * data _, _, peek_column, = peek_token peek_column ||= column + data indent = peek_column - column - data line << ' ' * indent when :RULE then width = 2 + data line << '-' * width _, _, peek_column, = peek_token peek_column ||= column + width indent = peek_column - column - width line << ' ' * indent when :BREAK, :TEXT then line << data else # *LIST_TOKENS list_marker = case type when :BULLET then data when :LABEL then "[#{data}]" when :NOTE then "#{data}::" else # :LALPHA, :NUMBER, :UALPHA "#{data}." end line << list_marker peek_type, _, peek_column = peek_token unless peek_type == :NEWLINE then peek_column ||= column + list_marker.length indent = peek_column - column - list_marker.length line << ' ' * indent end end end verbatim << line << "\n" unless line.empty? verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0 verbatim.normalize p :verbatim_end => margin if @debug verbatim end ## # The character offset for the input string at the given +byte_offset+ def char_pos byte_offset if @have_byteslice then @input.byteslice(0, byte_offset).length elsif @have_encoding then matched = @binary_input[0, byte_offset] matched.force_encoding @input_encoding matched.length else byte_offset end end ## # Pulls the next token from the stream. def get @current_token = @tokens.shift p :get => @current_token if @debug @current_token end ## # Parses the tokens into an array of RDoc::Markup::XXX objects, # and appends them to the passed +parent+ RDoc::Markup::YYY object. # # Exits at the end of the token stream, or when it encounters a token # in a column less than +indent+ (unless it is a NEWLINE). # # Returns +parent+. def parse parent, indent = 0 p :parse_start => indent if @debug until @tokens.empty? do type, data, column, = get case type when :BREAK then parent << RDoc::Markup::BlankLine.new skip :NEWLINE, false next when :NEWLINE then # trailing newlines are skipped below, so this is a blank line parent << RDoc::Markup::BlankLine.new skip :NEWLINE, false next end # indentation change: break or verbatim if column < indent then unget break elsif column > indent then unget parent << build_verbatim(indent) next end # indentation is the same case type when :HEADER then parent << build_heading(data) when :RULE then parent << RDoc::Markup::Rule.new(data) skip :NEWLINE when :TEXT then unget parent << build_paragraph(indent) when *LIST_TOKENS then unget parent << build_list(indent) else type, data, column, line = @current_token raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}" end end p :parse_end => indent if @debug parent end ## # Returns the next token on the stream without modifying the stream def peek_token token = @tokens.first || [] p :peek => token if @debug token end ## # Creates the StringScanner def setup_scanner input @line = 0 @line_pos = 0 @input = input.dup if @have_encoding and not @have_byteslice then @input_encoding = @input.encoding @binary_input = @input.force_encoding Encoding::BINARY end @s = StringScanner.new input end ## # Skips the next token if its type is +token_type+. # # Optionally raises an error if the next token is not of the expected type. def skip token_type, error = true type, = get return unless type # end of stream return @current_token if token_type == type unget raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error end ## # Turns text +input+ into a stream of tokens def tokenize input setup_scanner input until @s.eos? do pos = @s.pos # leading spaces will be reflected by the column of the next token # the only thing we loose are trailing spaces at the end of the file next if @s.scan(/ +/) # note: after BULLET, LABEL, etc., # indent will be the column of the next non-newline token @tokens << case # [CR]LF => :NEWLINE when @s.scan(/\r?\n/) then token = [:NEWLINE, @s.matched, *token_pos(pos)] @line_pos = char_pos @s.pos @line += 1 token # === text => :HEADER then :TEXT when @s.scan(/(=+)(\s*)/) then level = @s[1].length header = [:HEADER, level, *token_pos(pos)] if @s[2] =~ /^\r?\n/ then @s.pos -= @s[2].length header else pos = @s.pos @s.scan(/.*/) @tokens << header [:TEXT, @s.matched.sub(/\r$/, ''), *token_pos(pos)] end # --- (at least 3) and nothing else on the line => :RULE when @s.scan(/(-{3,}) *\r?$/) then [:RULE, @s[1].length - 2, *token_pos(pos)] # * or - followed by white space and text => :BULLET when @s.scan(/([*-]) +(\S)/) then @s.pos -= @s[2].bytesize # unget \S [:BULLET, @s[1], *token_pos(pos)] # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER when @s.scan(/([a-z]|\d+)\. +(\S)/i) then # FIXME if tab(s), the column will be wrong # either support tabs everywhere by first expanding them to # spaces, or assume that they will have been replaced # before (and provide a check for that at least in debug # mode) list_label = @s[1] @s.pos -= @s[2].bytesize # unget \S list_type = case list_label when /[a-z]/ then :LALPHA when /[A-Z]/ then :UALPHA when /\d/ then :NUMBER else raise ParseError, "BUG token #{list_label}" end [list_type, list_label, *token_pos(pos)] # [text] followed by spaces or end of line => :LABEL when @s.scan(/\[(.*?)\]( +|\r?$)/) then [:LABEL, @s[1], *token_pos(pos)] # text:: followed by spaces or end of line => :NOTE when @s.scan(/(.*?)::( +|\r?$)/) then [:NOTE, @s[1], *token_pos(pos)] # anything else: :TEXT else @s.scan(/(.*?)( )?\r?$/) token = [:TEXT, @s[1], *token_pos(pos)] if @s[2] then @tokens << token [:BREAK, @s[2], *token_pos(pos + @s[1].length)] else token end end end self end ## # Calculates the column (by character) and line of the current token from # +scanner+ based on +byte_offset+. def token_pos byte_offset offset = char_pos byte_offset [offset - @line_pos, @line] end ## # Returns the current token to the token stream def unget token = @current_token p :unget => token if @debug raise Error, 'too many #ungets' if token == @tokens.first @tokens.unshift token if token end end