eaiovnaovbqoebvqoeavibavo
usr/share/ruby/racc/parser.rb 0000644 00000043565 14763455332 0012222 0 ustar 00 #--
# $originalId: parser.rb,v 1.8 2006/07/06 11:42:07 aamine Exp $
#
# Copyright (c) 1999-2006 Minero Aoki
#
# This program is free software.
# You can distribute/modify this program under the same terms of ruby.
#
# As a special exception, when this code is copied by Racc
# into a Racc output file, you may use that output file
# without restriction.
#++
module Racc
class ParseError < StandardError; end
end
unless defined?(::ParseError)
ParseError = Racc::ParseError
end
# Racc is a LALR(1) parser generator.
# It is written in Ruby itself, and generates Ruby programs.
#
# == Command-line Reference
#
# racc [-ofilename] [--output-file=filename]
# [-erubypath] [--embedded=rubypath]
# [-v] [--verbose]
# [-Ofilename] [--log-file=filename]
# [-g] [--debug]
# [-E] [--embedded]
# [-l] [--no-line-convert]
# [-c] [--line-convert-all]
# [-a] [--no-omit-actions]
# [-C] [--check-only]
# [-S] [--output-status]
# [--version] [--copyright] [--help] grammarfile
#
# [+filename+]
# Racc grammar file. Any extention is permitted.
# [-o+outfile+, --output-file=+outfile+]
# A filename for output. default is <+filename+>.tab.rb
# [-O+filename+, --log-file=+filename+]
# Place logging output in file +filename+.
# Default log file name is <+filename+>.output.
# [-e+rubypath+, --executable=+rubypath+]
# output executable file(mode 755). where +path+ is the ruby interpreter.
# [-v, --verbose]
# verbose mode. create +filename+.output file, like yacc's y.output file.
# [-g, --debug]
# add debug code to parser class. To display debuggin information,
# use this '-g' option and set @yydebug true in parser class.
# [-E, --embedded]
# Output parser which doesn't need runtime files (racc/parser.rb).
# [-C, --check-only]
# Check syntax of racc grammer file and quit.
# [-S, --output-status]
# Print messages time to time while compiling.
# [-l, --no-line-convert]
# turns off line number converting.
# [-c, --line-convert-all]
# Convert line number of actions, inner, header and footer.
# [-a, --no-omit-actions]
# Call all actions, even if an action is empty.
# [--version]
# print Racc version and quit.
# [--copyright]
# Print copyright and quit.
# [--help]
# Print usage and quit.
#
# == Generating Parser Using Racc
#
# To compile Racc grammar file, simply type:
#
# $ racc parse.y
#
# This creates ruby script file "parse.tab.y". The -o option can change the output filename.
#
# == Writing A Racc Grammar File
#
# If you want your own parser, you have to write a grammar file.
# A grammar file contains the name of your parser class, grammar for the parser,
# user code, and anything else.
# When writing a grammar file, yacc's knowledge is helpful.
# If you have not used yacc before, Racc is not too difficult.
#
# Here's an example Racc grammar file.
#
# class Calcparser
# rule
# target: exp { print val[0] }
#
# exp: exp '+' exp
# | exp '*' exp
# | '(' exp ')'
# | NUMBER
# end
#
# Racc grammar files resemble yacc files.
# But (of course), this is Ruby code.
# yacc's $$ is the 'result', $0, $1... is
# an array called 'val', and $-1, $-2... is an array called '_values'.
#
# See the {Grammar File Reference}[rdoc-ref:lib/racc/rdoc/grammar.en.rdoc] for
# more information on grammar files.
#
# == Parser
#
# Then you must prepare the parse entry method. There are two types of
# parse methods in Racc, Racc::Parser#do_parse and Racc::Parser#yyparse
#
# Racc::Parser#do_parse is simple.
#
# It's yyparse() of yacc, and Racc::Parser#next_token is yylex().
# This method must returns an array like [TOKENSYMBOL, ITS_VALUE].
# EOF is [false, false].
# (TOKENSYMBOL is a Ruby symbol (taken from String#intern) by default.
# If you want to change this, see the grammar reference.
#
# Racc::Parser#yyparse is little complicated, but useful.
# It does not use Racc::Parser#next_token, instead it gets tokens from any iterator.
#
# For example, yyparse(obj, :scan)
causes
# calling +obj#scan+, and you can return tokens by yielding them from +obj#scan+.
#
# == Debugging
#
# When debugging, "-v" or/and the "-g" option is helpful.
#
# "-v" creates verbose log file (.output).
# "-g" creates a "Verbose Parser".
# Verbose Parser prints the internal status when parsing.
# But it's _not_ automatic.
# You must use -g option and set +@yydebug+ to +true+ in order to get output.
# -g option only creates the verbose parser.
#
# === Racc reported syntax error.
#
# Isn't there too many "end"?
# grammar of racc file is changed in v0.10.
#
# Racc does not use '%' mark, while yacc uses huge number of '%' marks..
#
# === Racc reported "XXXX conflicts".
#
# Try "racc -v xxxx.y".
# It causes producing racc's internal log file, xxxx.output.
#
# === Generated parsers does not work correctly
#
# Try "racc -g xxxx.y".
# This command let racc generate "debugging parser".
# Then set @yydebug=true in your parser.
# It produces a working log of your parser.
#
# == Re-distributing Racc runtime
#
# A parser, which is created by Racc, requires the Racc runtime module;
# racc/parser.rb.
#
# Ruby 1.8.x comes with Racc runtime module,
# you need NOT distribute Racc runtime files.
#
# If you want to include the Racc runtime module with your parser.
# This can be done by using '-E' option:
#
# $ racc -E -omyparser.rb myparser.y
#
# This command creates myparser.rb which `includes' Racc runtime.
# Only you must do is to distribute your parser file (myparser.rb).
#
# Note: parser.rb is LGPL, but your parser is not.
# Your own parser is completely yours.
module Racc
unless defined?(Racc_No_Extentions)
Racc_No_Extentions = false # :nodoc:
end
class Parser
Racc_Runtime_Version = '1.4.6'
Racc_Runtime_Revision = %w$originalRevision: 1.8 $[1]
Racc_Runtime_Core_Version_R = '1.4.6'
Racc_Runtime_Core_Revision_R = %w$originalRevision: 1.8 $[1]
begin
require 'racc/cparse'
# Racc_Runtime_Core_Version_C = (defined in extention)
Racc_Runtime_Core_Revision_C = Racc_Runtime_Core_Id_C.split[2]
unless new.respond_to?(:_racc_do_parse_c, true)
raise LoadError, 'old cparse.so'
end
if Racc_No_Extentions
raise LoadError, 'selecting ruby version of racc runtime core'
end
Racc_Main_Parsing_Routine = :_racc_do_parse_c # :nodoc:
Racc_YY_Parse_Method = :_racc_yyparse_c # :nodoc:
Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_C # :nodoc:
Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_C # :nodoc:
Racc_Runtime_Type = 'c' # :nodoc:
rescue LoadError
Racc_Main_Parsing_Routine = :_racc_do_parse_rb
Racc_YY_Parse_Method = :_racc_yyparse_rb
Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_R
Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_R
Racc_Runtime_Type = 'ruby'
end
def Parser.racc_runtime_type # :nodoc:
Racc_Runtime_Type
end
def _racc_setup
@yydebug = false unless self.class::Racc_debug_parser
@yydebug = false unless defined?(@yydebug)
if @yydebug
@racc_debug_out = $stderr unless defined?(@racc_debug_out)
@racc_debug_out ||= $stderr
end
arg = self.class::Racc_arg
arg[13] = true if arg.size < 14
arg
end
def _racc_init_sysvars
@racc_state = [0]
@racc_tstack = []
@racc_vstack = []
@racc_t = nil
@racc_val = nil
@racc_read_next = true
@racc_user_yyerror = false
@racc_error_status = 0
end
# The entry point of the parser. This method is used with #next_token.
# If Racc wants to get token (and its value), calls next_token.
#
# Example:
# def parse
# @q = [[1,1],
# [2,2],
# [3,3],
# [false, '$']]
# do_parse
# end
#
# def next_token
# @q.shift
# end
def do_parse
__send__(Racc_Main_Parsing_Routine, _racc_setup(), false)
end
# The method to fetch next token.
# If you use #do_parse method, you must implement #next_token.
#
# The format of return value is [TOKEN_SYMBOL, VALUE].
# +token-symbol+ is represented by Ruby's symbol by default, e.g. :IDENT
# for 'IDENT'. ";" (String) for ';'.
#
# The final symbol (End of file) must be false.
def next_token
raise NotImplementedError, "#{self.class}\#next_token is not defined"
end
def _racc_do_parse_rb(arg, in_debug)
action_table, action_check, action_default, action_pointer,
_, _, _, _,
_, _, token_table, _,
_, _, * = arg
_racc_init_sysvars
tok = act = i = nil
catch(:racc_end_parse) {
while true
if i = action_pointer[@racc_state[-1]]
if @racc_read_next
if @racc_t != 0 # not EOF
tok, @racc_val = next_token()
unless tok # EOF
@racc_t = 0
else
@racc_t = (token_table[tok] or 1) # error token
end
racc_read_token(@racc_t, tok, @racc_val) if @yydebug
@racc_read_next = false
end
end
i += @racc_t
unless i >= 0 and
act = action_table[i] and
action_check[i] == @racc_state[-1]
act = action_default[@racc_state[-1]]
end
else
act = action_default[@racc_state[-1]]
end
while act = _racc_evalact(act, arg)
;
end
end
}
end
# Another entry point for the parser.
# If you use this method, you must implement RECEIVER#METHOD_ID method.
#
# RECEIVER#METHOD_ID is a method to get next token.
# It must 'yield' the token, which format is [TOKEN-SYMBOL, VALUE].
def yyparse(recv, mid)
__send__(Racc_YY_Parse_Method, recv, mid, _racc_setup(), true)
end
def _racc_yyparse_rb(recv, mid, arg, c_debug)
action_table, action_check, action_default, action_pointer,
_, _, _, _,
_, _, token_table, _,
_, _, * = arg
_racc_init_sysvars
act = nil
i = nil
catch(:racc_end_parse) {
until i = action_pointer[@racc_state[-1]]
while act = _racc_evalact(action_default[@racc_state[-1]], arg)
;
end
end
recv.__send__(mid) do |tok, val|
unless tok
@racc_t = 0
else
@racc_t = (token_table[tok] or 1) # error token
end
@racc_val = val
@racc_read_next = false
i += @racc_t
unless i >= 0 and
act = action_table[i] and
action_check[i] == @racc_state[-1]
act = action_default[@racc_state[-1]]
end
while act = _racc_evalact(act, arg)
;
end
while not(i = action_pointer[@racc_state[-1]]) or
not @racc_read_next or
@racc_t == 0 # $
unless i and i += @racc_t and
i >= 0 and
act = action_table[i] and
action_check[i] == @racc_state[-1]
act = action_default[@racc_state[-1]]
end
while act = _racc_evalact(act, arg)
;
end
end
end
}
end
###
### common
###
def _racc_evalact(act, arg)
action_table, action_check, _, action_pointer,
_, _, _, _,
_, _, _, shift_n, reduce_n,
_, _, * = arg
nerr = 0 # tmp
if act > 0 and act < shift_n
#
# shift
#
if @racc_error_status > 0
@racc_error_status -= 1 unless @racc_t == 1 # error token
end
@racc_vstack.push @racc_val
@racc_state.push act
@racc_read_next = true
if @yydebug
@racc_tstack.push @racc_t
racc_shift @racc_t, @racc_tstack, @racc_vstack
end
elsif act < 0 and act > -reduce_n
#
# reduce
#
code = catch(:racc_jump) {
@racc_state.push _racc_do_reduce(arg, act)
false
}
if code
case code
when 1 # yyerror
@racc_user_yyerror = true # user_yyerror
return -reduce_n
when 2 # yyaccept
return shift_n
else
raise '[Racc Bug] unknown jump code'
end
end
elsif act == shift_n
#
# accept
#
racc_accept if @yydebug
throw :racc_end_parse, @racc_vstack[0]
elsif act == -reduce_n
#
# error
#
case @racc_error_status
when 0
unless arg[21] # user_yyerror
nerr += 1
on_error @racc_t, @racc_val, @racc_vstack
end
when 3
if @racc_t == 0 # is $
throw :racc_end_parse, nil
end
@racc_read_next = true
end
@racc_user_yyerror = false
@racc_error_status = 3
while true
if i = action_pointer[@racc_state[-1]]
i += 1 # error token
if i >= 0 and
(act = action_table[i]) and
action_check[i] == @racc_state[-1]
break
end
end
throw :racc_end_parse, nil if @racc_state.size <= 1
@racc_state.pop
@racc_vstack.pop
if @yydebug
@racc_tstack.pop
racc_e_pop @racc_state, @racc_tstack, @racc_vstack
end
end
return act
else
raise "[Racc Bug] unknown action #{act.inspect}"
end
racc_next_state(@racc_state[-1], @racc_state) if @yydebug
nil
end
def _racc_do_reduce(arg, act)
_, _, _, _,
goto_table, goto_check, goto_default, goto_pointer,
nt_base, reduce_table, _, _,
_, use_result, * = arg
state = @racc_state
vstack = @racc_vstack
tstack = @racc_tstack
i = act * -3
len = reduce_table[i]
reduce_to = reduce_table[i+1]
method_id = reduce_table[i+2]
void_array = []
tmp_t = tstack[-len, len] if @yydebug
tmp_v = vstack[-len, len]
tstack[-len, len] = void_array if @yydebug
vstack[-len, len] = void_array
state[-len, len] = void_array
# tstack must be updated AFTER method call
if use_result
vstack.push __send__(method_id, tmp_v, vstack, tmp_v[0])
else
vstack.push __send__(method_id, tmp_v, vstack)
end
tstack.push reduce_to
racc_reduce(tmp_t, reduce_to, tstack, vstack) if @yydebug
k1 = reduce_to - nt_base
if i = goto_pointer[k1]
i += state[-1]
if i >= 0 and (curstate = goto_table[i]) and goto_check[i] == k1
return curstate
end
end
goto_default[k1]
end
# This method is called when a parse error is found.
#
# ERROR_TOKEN_ID is an internal ID of token which caused error.
# You can get string representation of this ID by calling
# #token_to_str.
#
# ERROR_VALUE is a value of error token.
#
# value_stack is a stack of symbol values.
# DO NOT MODIFY this object.
#
# This method raises ParseError by default.
#
# If this method returns, parsers enter "error recovering mode".
def on_error(t, val, vstack)
raise ParseError, sprintf("\nparse error on value %s (%s)",
val.inspect, token_to_str(t) || '?')
end
# Enter error recovering mode.
# This method does not call #on_error.
def yyerror
throw :racc_jump, 1
end
# Exit parser.
# Return value is Symbol_Value_Stack[0].
def yyaccept
throw :racc_jump, 2
end
# Leave error recovering mode.
def yyerrok
@racc_error_status = 0
end
# For debugging output
def racc_read_token(t, tok, val)
@racc_debug_out.print 'read '
@racc_debug_out.print tok.inspect, '(', racc_token2str(t), ') '
@racc_debug_out.puts val.inspect
@racc_debug_out.puts
end
def racc_shift(tok, tstack, vstack)
@racc_debug_out.puts "shift #{racc_token2str tok}"
racc_print_stacks tstack, vstack
@racc_debug_out.puts
end
def racc_reduce(toks, sim, tstack, vstack)
out = @racc_debug_out
out.print 'reduce '
if toks.empty?
out.print ' '
else
toks.each {|t| out.print ' ', racc_token2str(t) }
end
out.puts " --> #{racc_token2str(sim)}"
racc_print_stacks tstack, vstack
@racc_debug_out.puts
end
def racc_accept
@racc_debug_out.puts 'accept'
@racc_debug_out.puts
end
def racc_e_pop(state, tstack, vstack)
@racc_debug_out.puts 'error recovering mode: pop token'
racc_print_states state
racc_print_stacks tstack, vstack
@racc_debug_out.puts
end
def racc_next_state(curstate, state)
@racc_debug_out.puts "goto #{curstate}"
racc_print_states state
@racc_debug_out.puts
end
def racc_print_stacks(t, v)
out = @racc_debug_out
out.print ' ['
t.each_index do |i|
out.print ' (', racc_token2str(t[i]), ' ', v[i].inspect, ')'
end
out.puts ' ]'
end
def racc_print_states(s)
out = @racc_debug_out
out.print ' ['
s.each {|st| out.print ' ', st }
out.puts ' ]'
end
def racc_token2str(tok)
self.class::Racc_token_to_s_table[tok] or
raise "[Racc Bug] can't convert token #{tok} to string"
end
# Convert internal ID of token symbol to the string.
def token_to_str(t)
self.class::Racc_token_to_s_table[t]
end
end
end
usr/share/gems/gems/rdoc-4.0.0/lib/rdoc/parser.rb 0000644 00000017516 14763562361 0015276 0 ustar 00 # -*- coding: us-ascii -*-
##
# A parser is simple a class that subclasses RDoc::Parser and implements #scan
# to fill in an RDoc::TopLevel with parsed data.
#
# The initialize method takes an RDoc::TopLevel to fill with parsed content,
# the name of the file to be parsed, the content of the file, an RDoc::Options
# object and an RDoc::Stats object to inform the user of parsed items. The
# scan method is then called to parse the file and must return the
# RDoc::TopLevel object. By calling super these items will be set for you.
#
# In order to be used by RDoc the parser needs to register the file extensions
# it can parse. Use ::parse_files_matching to register extensions.
#
# require 'rdoc'
#
# class RDoc::Parser::Xyz < RDoc::Parser
# parse_files_matching /\.xyz$/
#
# def initialize top_level, file_name, content, options, stats
# super
#
# # extra initialization if needed
# end
#
# def scan
# # parse file and fill in @top_level
# end
# end
class RDoc::Parser
@parsers = []
class << self
##
# An Array of arrays that maps file extension (or name) regular
# expressions to parser classes that will parse matching filenames.
#
# Use parse_files_matching to register a parser's file extensions.
attr_reader :parsers
end
##
# The name of the file being parsed
attr_reader :file_name
##
# Alias an extension to another extension. After this call, files ending
# "new_ext" will be parsed using the same parser as "old_ext"
def self.alias_extension(old_ext, new_ext)
old_ext = old_ext.sub(/^\.(.*)/, '\1')
new_ext = new_ext.sub(/^\.(.*)/, '\1')
parser = can_parse_by_name "xxx.#{old_ext}"
return false unless parser
RDoc::Parser.parsers.unshift [/\.#{new_ext}$/, parser]
true
end
##
# Determines if the file is a "binary" file which basically means it has
# content that an RDoc parser shouldn't try to consume.
def self.binary?(file)
return false if file =~ /\.(rdoc|txt)$/
s = File.read(file, 1024) or return false
have_encoding = s.respond_to? :encoding
return true if s[0, 2] == Marshal.dump('')[0, 2] or s.index("\x00")
if have_encoding then
mode = "r"
s.sub!(/\A#!.*\n/, '') # assume shebang line isn't longer than 1024.
encoding = s[/^\s*\#\s*(?:-\*-\s*)?(?:en)?coding:\s*([^\s;]+?)(?:-\*-|[\s;])/, 1]
mode = "r:#{encoding}" if encoding
s = File.open(file, mode) {|f| f.gets(nil, 1024)}
not s.valid_encoding?
else
if 0.respond_to? :fdiv then
s.count("\x00-\x7F", "^ -~\t\r\n").fdiv(s.size) > 0.3
else # HACK 1.8.6
(s.count("\x00-\x7F", "^ -~\t\r\n").to_f / s.size) > 0.3
end
end
end
##
# Processes common directives for CodeObjects for the C and Ruby parsers.
#
# Applies +directive+'s +value+ to +code_object+, if appropriate
def self.process_directive code_object, directive, value
warn "RDoc::Parser::process_directive is deprecated and wil be removed in RDoc 4. Use RDoc::Markup::PreProcess#handle_directive instead" if $-w
case directive
when 'nodoc' then
code_object.document_self = nil # notify nodoc
code_object.document_children = value.downcase != 'all'
when 'doc' then
code_object.document_self = true
code_object.force_documentation = true
when 'yield', 'yields' then
# remove parameter &block
code_object.params.sub!(/,?\s*&\w+/, '') if code_object.params
code_object.block_params = value
when 'arg', 'args' then
code_object.params = value
end
end
##
# Checks if +file+ is a zip file in disguise. Signatures from
# http://www.garykessler.net/library/file_sigs.html
def self.zip? file
zip_signature = File.read file, 4
zip_signature == "PK\x03\x04" or
zip_signature == "PK\x05\x06" or
zip_signature == "PK\x07\x08"
rescue
false
end
##
# Return a parser that can handle a particular extension
def self.can_parse file_name
parser = can_parse_by_name file_name
# HACK Selenium hides a jar file using a .txt extension
return if parser == RDoc::Parser::Simple and zip? file_name
parser
end
##
# Returns a parser that can handle the extension for +file_name+. This does
# not depend upon the file being readable.
def self.can_parse_by_name file_name
_, parser = RDoc::Parser.parsers.find { |regexp,| regexp =~ file_name }
# The default parser must not parse binary files
ext_name = File.extname file_name
return parser if ext_name.empty?
if parser == RDoc::Parser::Simple and ext_name !~ /txt|rdoc/ then
case check_modeline file_name
when nil, 'rdoc' then # continue
else return nil
end
end
parser
rescue Errno::EACCES
end
##
# Returns the file type from the modeline in +file_name+
def self.check_modeline file_name
line = open file_name do |io|
io.gets
end
/-\*-\s*(.*?\S)\s*-\*-/ =~ line
return nil unless type = $1
if /;/ =~ type then
return nil unless /(?:\s|\A)mode:\s*([^\s;]+)/i =~ type
type = $1
end
return nil if /coding:/i =~ type
type.downcase
rescue ArgumentError # invalid byte sequence, etc.
end
##
# Finds and instantiates the correct parser for the given +file_name+ and
# +content+.
def self.for top_level, file_name, content, options, stats
return if binary? file_name
parser = use_markup content
unless parser then
parse_name = file_name
# If no extension, look for shebang
if file_name !~ /\.\w+$/ && content =~ %r{\A#!(.+)} then
shebang = $1
case shebang
when %r{env\s+ruby}, %r{/ruby}
parse_name = 'dummy.rb'
end
end
parser = can_parse parse_name
end
return unless parser
parser.new top_level, file_name, content, options, stats
rescue SystemCallError
nil
end
##
# Record which file types this parser can understand.
#
# It is ok to call this multiple times.
def self.parse_files_matching(regexp)
RDoc::Parser.parsers.unshift [regexp, self]
end
##
# If there is a markup: parser_name comment at the front of the
# file, use it to determine the parser. For example:
#
# # markup: rdoc
# # Class comment can go here
#
# class C
# end
#
# The comment should appear as the first line of the +content+.
#
# If the content contains a shebang or editor modeline the comment may
# appear on the second or third line.
#
# Any comment style may be used to hide the markup comment.
def self.use_markup content
markup = content.lines.first(3).grep(/markup:\s+(\w+)/) { $1 }.first
return unless markup
# TODO Ruby should be returned only when the filename is correct
return RDoc::Parser::Ruby if %w[tomdoc markdown].include? markup
markup = Regexp.escape markup
RDoc::Parser.parsers.find do |_, parser|
/^#{markup}$/i =~ parser.name.sub(/.*:/, '')
end.last
end
##
# Creates a new Parser storing +top_level+, +file_name+, +content+,
# +options+ and +stats+ in instance variables. In +@preprocess+ an
# RDoc::Markup::PreProcess object is created which allows processing of
# directives.
def initialize top_level, file_name, content, options, stats
@top_level = top_level
@top_level.parser = self.class
@store = @top_level.store
@file_name = file_name
@content = content
@options = options
@stats = stats
@preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include
@preprocess.options = @options
end
autoload :RubyTools, 'rdoc/parser/ruby_tools'
autoload :Text, 'rdoc/parser/text'
end
# simple must come first in order to show up last in the parsers list
require 'rdoc/parser/simple'
require 'rdoc/parser/c'
require 'rdoc/parser/changelog'
require 'rdoc/parser/markdown'
require 'rdoc/parser/rd'
require 'rdoc/parser/ruby'
usr/share/gems/gems/rdoc-4.0.0/lib/rdoc/markup/parser.rb 0000644 00000034034 14763613260 0016562 0 ustar 00 require 'strscan'
##
# A recursive-descent parser for RDoc markup.
#
# The parser tokenizes an input string then parses the tokens into a Document.
# Documents can be converted into output formats by writing a visitor like
# RDoc::Markup::ToHTML.
#
# The parser only handles the block-level constructs Paragraph, List,
# ListItem, Heading, Verbatim, BlankLine and Rule. Inline markup such as
# \+blah\+ is handled separately by RDoc::Markup::AttributeManager.
#
# To see what markup the Parser implements read RDoc. To see how to use
# RDoc markup to format text in your program read RDoc::Markup.
class RDoc::Markup::Parser
include RDoc::Text
##
# List token types
LIST_TOKENS = [
:BULLET,
:LABEL,
:LALPHA,
:NOTE,
:NUMBER,
:UALPHA,
]
##
# Parser error subclass
class Error < RuntimeError; end
##
# Raised when the parser is unable to handle the given markup
class ParseError < Error; end
##
# Enables display of debugging information
attr_accessor :debug
##
# Token accessor
attr_reader :tokens
##
# Parses +str+ into a Document.
#
# Use RDoc::Markup#parse instead of this method.
def self.parse str
parser = new
parser.tokenize str
doc = RDoc::Markup::Document.new
parser.parse doc
end
##
# Returns a token stream for +str+, for testing
def self.tokenize str
parser = new
parser.tokenize str
parser.tokens
end
##
# Creates a new Parser. See also ::parse
def initialize
@binary_input = nil
@current_token = nil
@debug = false
@have_encoding = Object.const_defined? :Encoding
@have_byteslice = ''.respond_to? :byteslice
@input = nil
@input_encoding = nil
@line = 0
@line_pos = 0
@s = nil
@tokens = []
end
##
# Builds a Heading of +level+
def build_heading level
type, text, = get
text = case type
when :TEXT then
skip :NEWLINE
text
else
unget
''
end
RDoc::Markup::Heading.new level, text
end
##
# Builds a List flush to +margin+
def build_list margin
p :list_start => margin if @debug
list = RDoc::Markup::List.new
label = nil
until @tokens.empty? do
type, data, column, = get
case type
when *LIST_TOKENS then
if column < margin || (list.type && list.type != type) then
unget
break
end
list.type = type
peek_type, _, column, = peek_token
case type
when :NOTE, :LABEL then
label = [] unless label
if peek_type == :NEWLINE then
# description not on the same line as LABEL/NOTE
# skip the trailing newline & any blank lines below
while peek_type == :NEWLINE
get
peek_type, _, column, = peek_token
end
# we may be:
# - at end of stream
# - at a column < margin:
# [text]
# blah blah blah
# - at the same column, but with a different type of list item
# [text]
# * blah blah
# - at the same column, with the same type of list item
# [one]
# [two]
# In all cases, we have an empty description.
# In the last case only, we continue.
if peek_type.nil? || column < margin then
empty = true
elsif column == margin then
case peek_type
when type
empty = :continue
when *LIST_TOKENS
empty = true
else
empty = false
end
else
empty = false
end
if empty then
label << data
next if empty == :continue
break
end
end
else
data = nil
end
if label then
data = label << data
label = nil
end
list_item = RDoc::Markup::ListItem.new data
parse list_item, column
list << list_item
else
unget
break
end
end
p :list_end => margin if @debug
if list.empty? then
return nil unless label
return nil unless [:LABEL, :NOTE].include? list.type
list_item = RDoc::Markup::ListItem.new label, RDoc::Markup::BlankLine.new
list << list_item
end
list
end
##
# Builds a Paragraph that is flush to +margin+
def build_paragraph margin
p :paragraph_start => margin if @debug
paragraph = RDoc::Markup::Paragraph.new
until @tokens.empty? do
type, data, column, = get
if type == :TEXT and column == margin then
paragraph << data
break if peek_token.first == :BREAK
data << ' ' if skip :NEWLINE
else
unget
break
end
end
paragraph.parts.last.sub!(/ \z/, '') # cleanup
p :paragraph_end => margin if @debug
paragraph
end
##
# Builds a Verbatim that is indented from +margin+.
#
# The verbatim block is shifted left (the least indented lines start in
# column 0). Each part of the verbatim is one line of text, always
# terminated by a newline. Blank lines always consist of a single newline
# character, and there is never a single newline at the end of the verbatim.
def build_verbatim margin
p :verbatim_begin => margin if @debug
verbatim = RDoc::Markup::Verbatim.new
min_indent = nil
generate_leading_spaces = true
line = ''
until @tokens.empty? do
type, data, column, = get
if type == :NEWLINE then
line << data
verbatim << line
line = ''
generate_leading_spaces = true
next
end
if column <= margin
unget
break
end
if generate_leading_spaces then
indent = column - margin
line << ' ' * indent
min_indent = indent if min_indent.nil? || indent < min_indent
generate_leading_spaces = false
end
case type
when :HEADER then
line << '=' * data
_, _, peek_column, = peek_token
peek_column ||= column + data
indent = peek_column - column - data
line << ' ' * indent
when :RULE then
width = 2 + data
line << '-' * width
_, _, peek_column, = peek_token
peek_column ||= column + width
indent = peek_column - column - width
line << ' ' * indent
when :BREAK, :TEXT then
line << data
else # *LIST_TOKENS
list_marker = case type
when :BULLET then data
when :LABEL then "[#{data}]"
when :NOTE then "#{data}::"
else # :LALPHA, :NUMBER, :UALPHA
"#{data}."
end
line << list_marker
peek_type, _, peek_column = peek_token
unless peek_type == :NEWLINE then
peek_column ||= column + list_marker.length
indent = peek_column - column - list_marker.length
line << ' ' * indent
end
end
end
verbatim << line << "\n" unless line.empty?
verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0
verbatim.normalize
p :verbatim_end => margin if @debug
verbatim
end
##
# The character offset for the input string at the given +byte_offset+
def char_pos byte_offset
if @have_byteslice then
@input.byteslice(0, byte_offset).length
elsif @have_encoding then
matched = @binary_input[0, byte_offset]
matched.force_encoding @input_encoding
matched.length
else
byte_offset
end
end
##
# Pulls the next token from the stream.
def get
@current_token = @tokens.shift
p :get => @current_token if @debug
@current_token
end
##
# Parses the tokens into an array of RDoc::Markup::XXX objects,
# and appends them to the passed +parent+ RDoc::Markup::YYY object.
#
# Exits at the end of the token stream, or when it encounters a token
# in a column less than +indent+ (unless it is a NEWLINE).
#
# Returns +parent+.
def parse parent, indent = 0
p :parse_start => indent if @debug
until @tokens.empty? do
type, data, column, = get
case type
when :BREAK then
parent << RDoc::Markup::BlankLine.new
skip :NEWLINE, false
next
when :NEWLINE then
# trailing newlines are skipped below, so this is a blank line
parent << RDoc::Markup::BlankLine.new
skip :NEWLINE, false
next
end
# indentation change: break or verbatim
if column < indent then
unget
break
elsif column > indent then
unget
parent << build_verbatim(indent)
next
end
# indentation is the same
case type
when :HEADER then
parent << build_heading(data)
when :RULE then
parent << RDoc::Markup::Rule.new(data)
skip :NEWLINE
when :TEXT then
unget
parent << build_paragraph(indent)
when *LIST_TOKENS then
unget
parent << build_list(indent)
else
type, data, column, line = @current_token
raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
end
end
p :parse_end => indent if @debug
parent
end
##
# Returns the next token on the stream without modifying the stream
def peek_token
token = @tokens.first || []
p :peek => token if @debug
token
end
##
# Creates the StringScanner
def setup_scanner input
@line = 0
@line_pos = 0
@input = input.dup
if @have_encoding and not @have_byteslice then
@input_encoding = @input.encoding
@binary_input = @input.force_encoding Encoding::BINARY
end
@s = StringScanner.new input
end
##
# Skips the next token if its type is +token_type+.
#
# Optionally raises an error if the next token is not of the expected type.
def skip token_type, error = true
type, = get
return unless type # end of stream
return @current_token if token_type == type
unget
raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error
end
##
# Turns text +input+ into a stream of tokens
def tokenize input
setup_scanner input
until @s.eos? do
pos = @s.pos
# leading spaces will be reflected by the column of the next token
# the only thing we loose are trailing spaces at the end of the file
next if @s.scan(/ +/)
# note: after BULLET, LABEL, etc.,
# indent will be the column of the next non-newline token
@tokens << case
# [CR]LF => :NEWLINE
when @s.scan(/\r?\n/) then
token = [:NEWLINE, @s.matched, *token_pos(pos)]
@line_pos = char_pos @s.pos
@line += 1
token
# === text => :HEADER then :TEXT
when @s.scan(/(=+)(\s*)/) then
level = @s[1].length
header = [:HEADER, level, *token_pos(pos)]
if @s[2] =~ /^\r?\n/ then
@s.pos -= @s[2].length
header
else
pos = @s.pos
@s.scan(/.*/)
@tokens << header
[:TEXT, @s.matched.sub(/\r$/, ''), *token_pos(pos)]
end
# --- (at least 3) and nothing else on the line => :RULE
when @s.scan(/(-{3,}) *\r?$/) then
[:RULE, @s[1].length - 2, *token_pos(pos)]
# * or - followed by white space and text => :BULLET
when @s.scan(/([*-]) +(\S)/) then
@s.pos -= @s[2].bytesize # unget \S
[:BULLET, @s[1], *token_pos(pos)]
# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
# FIXME if tab(s), the column will be wrong
# either support tabs everywhere by first expanding them to
# spaces, or assume that they will have been replaced
# before (and provide a check for that at least in debug
# mode)
list_label = @s[1]
@s.pos -= @s[2].bytesize # unget \S
list_type =
case list_label
when /[a-z]/ then :LALPHA
when /[A-Z]/ then :UALPHA
when /\d/ then :NUMBER
else
raise ParseError, "BUG token #{list_label}"
end
[list_type, list_label, *token_pos(pos)]
# [text] followed by spaces or end of line => :LABEL
when @s.scan(/\[(.*?)\]( +|\r?$)/) then
[:LABEL, @s[1], *token_pos(pos)]
# text:: followed by spaces or end of line => :NOTE
when @s.scan(/(.*?)::( +|\r?$)/) then
[:NOTE, @s[1], *token_pos(pos)]
# anything else: :TEXT
else @s.scan(/(.*?)( )?\r?$/)
token = [:TEXT, @s[1], *token_pos(pos)]
if @s[2] then
@tokens << token
[:BREAK, @s[2], *token_pos(pos + @s[1].length)]
else
token
end
end
end
self
end
##
# Calculates the column (by character) and line of the current token from
# +scanner+ based on +byte_offset+.
def token_pos byte_offset
offset = char_pos byte_offset
[offset - @line_pos, @line]
end
##
# Returns the current token to the token stream
def unget
token = @current_token
p :unget => token if @debug
raise Error, 'too many #ungets' if token == @tokens.first
@tokens.unshift token if token
end
end