eaiovnaovbqoebvqoeavibavo CC.pm000064400000006046147635103360005405 0ustar00package Regexp::Common::CC; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::_support qw /luhn/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my @cards = ( # Name Prefix Length mod 10 [Mastercard => '5[1-5]', 16, 1], [Visa => '4', [13, 16], 1], [Amex => '3[47]', 15, 1], # Carte Blanche ['Diners Club' => '3(?:0[0-5]|[68])', 14, 1], [Discover => '6011', 16, 1], [enRoute => '2(?:014|149)', 15, 0], [JCB => [['3', 16, 1], ['2131|1800', 15, 1]]], ); foreach my $card (@cards) { my ($name, $prefix, $length, $mod) = @$card; # Skip the harder ones for now. next if ref $prefix || ref $length; next unless $mod; my $times = $length + $mod; pattern name => [CC => $name], version => 5.006, create => sub { use re 'eval'; qr <((?=($prefix))[0-9]{$length}) (?(?{Regexp::Common::_support::luhn $1})|(?!))>x } ; } 1; __END__ =pod =head1 NAME Regexp::Common::CC -- provide patterns for credit card numbers. =head1 SYNOPSIS use Regexp::Common qw /CC/; while (<>) { /^$RE{CC}{Mastercard}$/ and print "Mastercard card number\n"; } =head1 DESCRIPTION Please consult the manual of L for a general description of the works of this interface. Do not use this module directly, but load it via I. This module offers patterns for credit card numbers of several major credit card types. Currently, the supported cards are: I, I, I, and I. =head1 SEE ALSO L for a general description of how to use this interface. =over 4 =item L Credit Card Validation - Check Digits =item L Everything you ever wanted to know about CC's =item L Luhn formula =back =head1 AUTHORS Damian Conway S<(I)> and Abigail S<(I)>. =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. Send them in to S>. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut lingua.pm000064400000004627147635103360006402 0ustar00package Regexp::Common::lingua; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; pattern name => [qw /lingua palindrome -chars=[A-Za-z]/], create => sub { use re 'eval'; my $keep = exists $_ [1] -> {-keep}; my $ch = $_ [1] -> {-chars}; my $idx = $keep ? "1:$ch" : "0:$ch"; my $r = "(??{\$Regexp::Common::lingua::pd{'" . $idx . "'}})"; $Regexp::Common::lingua::pd {$idx} = $keep ? qr /($ch|($ch)($r)?\2)/ : qr /$ch|($ch)($r)?\1/; # print "[$ch]: ", $Regexp::Common::lingua::pd {$idx}, "\n"; # $Regexp::Common::lingua::pd {$idx}; }, version => 5.006 ; 1; __END__ =pod =head1 NAME Regexp::Common::lingua -- provide regexes for language related stuff. =head1 SYNOPSIS use Regexp::Common qw /lingua/; while (<>) { /^$RE{lingua}{palindrome}$/ and print "is a palindrome\n"; } =head1 DESCRIPTION Please consult the manual of L for a general description of the works of this interface. Do not use this module directly, but load it via I. =head2 C<$RE{lingua}{palindrome}> Returns a pattern that recognizes a palindrome, a string that is the same if you reverse it. By default, it only matches strings consisting of letters, but this can be changed using the C<{-chars}> option. This option takes a character class (default is C<[A-Za-z]>) as argument. If C<{-keep}> is used, only C<$1> will be set, and set to the entire match. This pattern requires at least perl 5.6.0. =head1 SEE ALSO L for a general description of how to use this interface. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Many regexes are missing. Send them in to I. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut number.pm000064400000032633147635103360006411 0ustar00package Regexp::Common::number; use Config; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2013031101'; sub _croak { require Carp; goto &Carp::croak; } my $digits = join ("", 0 .. 9, "A" .. "Z"); sub int_creator { my $flags = $_ [1]; my ($sep, $group, $base, $places, $sign) = @{$flags} {qw /-sep -group -base -places -sign/}; # Deal with the bases. _croak "Base must be between 1 and 36" unless $base >= 1 && $base <= 36; my $chars = substr $digits, 0, $base; $sep = ',' if exists $flags -> {-sep} && !defined $flags -> {-sep}; my $max = $group; $max = $2 if $group =~ /^\s*(\d+)\s*,\s*(\d+)\s*$/; my $quant = $places ? "{$places}" : "+"; return $sep ? qq {(?k:(?k:$sign)(?k:[$chars]{1,$max}} . qq {(?:$sep} . qq {[$chars]{$group})*))} : qq {(?k:(?k:$sign)(?k:[$chars]$quant))} } sub real_creator { my ($base, $places, $radix, $sep, $group, $expon, $sign) = @{$_[1]}{-base, -places, -radix, -sep, -group, -expon, -sign}; _croak "Base must be between 1 and 36" unless $base >= 1 && $base <= 36; $sep = ',' if exists $_[1]->{-sep} && !defined $_[1]->{-sep}; if ($base > 14 && $expon =~ /^[Ee]$/) {$expon = 'G'} foreach ($radix, $sep, $expon) {$_ = "[$_]" if 1 == length} my $chars = substr $digits, 0, $base; return $sep ? qq {(?k:(?i)(?k:$sign)(?k:(?=$radix?[$chars])} . qq {(?k:[$chars]{1,$group}(?:(?:$sep)[$chars]{$group})*)} . qq {(?:(?k:$radix)(?k:[$chars]{$places}))?)} . qq {(?:(?k:$expon)(?k:(?k:$sign)(?k:[$chars]+))|))} : qq {(?k:(?i)(?k:$sign)(?k:(?=$radix?[$chars])} . qq {(?k:[$chars]*)(?:(?k:$radix)(?k:[$chars]{$places}))?)} . qq {(?:(?k:$expon)(?k:(?k:$sign)(?k:[$chars]+))|))}; } sub decimal_creator { my ($base, $places, $radix, $sep, $group, $sign) = @{$_[1]}{-base, -places, -radix, -sep, -group, -sign}; _croak "Base must be between 1 and 36" unless $base >= 1 && $base <= 36; $sep = ',' if exists $_[1]->{-sep} && !defined $_[1]->{-sep}; foreach ($radix, $sep) {$_ = "[$_]" if 1 == length} my $chars = substr $digits, 0, $base; return $sep ? qq {(?k:(?i)(?k:$sign)(?k:(?=$radix?[$chars])} . qq {(?k:[$chars]{1,$group}(?:(?:$sep)[$chars]{$group})*)} . qq {(?:(?k:$radix)(?k:[$chars]{$places}))?))} : qq {(?k:(?i)(?k:$sign)(?k:(?=$radix?[$chars])} . qq {(?k:[$chars]*)(?:(?k:$radix)(?k:[$chars]{$places}))?))} } pattern name => [qw (num int -sep= -base=10 -group=3 -sign=[-+]?)], create => \&int_creator, ; pattern name => [qw (num real -base=10), '-places=0,', qw (-radix=[.] -sep= -group=3 -expon=E -sign=[-+]?)], create => \&real_creator, ; pattern name => [qw (num decimal -base=10), '-places=0,', qw (-radix=[.] -sep= -group=3 -sign=[-+]?)], create => \&decimal_creator, ; sub real_synonym { my ($name, $base) = @_; pattern name => ['num', $name, '-places=0,', '-radix=[.]', '-sep=', '-group=3', '-expon=E', '-sign=[-+]?'], create => sub {my %flags = (%{$_[1]}, -base => $base); real_creator (undef, \%flags); } ; } real_synonym (hex => 16); real_synonym (dec => 10); real_synonym (oct => 8); real_synonym (bin => 2); # 2147483647 pattern name => [qw (num square)], create => sub { use re 'eval'; my $sixty_four_bits = $Config {use64bitint}; # # CPAN testers claim it fails on 5.8.8 and darwin 9.0. # $sixty_four_bits = 0 if $Config {osname} eq 'darwin' && $Config {osvers} eq '9.0' && $] == 5.008008; my $num = $sixty_four_bits ? '0*[1-8]?[0-9]{1,15}' : '0*(?:2(?:[0-0][0-9]{8}' . '|1(?:[0-3][0-9]{7}' . '|4(?:[0-6][0-9]{6}' . '|7(?:[0-3][0-9]{5}' . '|4(?:[0-7][0-9]{4}' . '|8(?:[0-2][0-9]{3}' . '|3(?:[0-5][0-9]{2}' . '|6(?:[0-3][0-9]{1}' . '|4[0-7])))))))))|1?[0-9]{1,9}'; qr {($num)(?(?{sqrt ($^N) == int sqrt ($^N)})|(?!))} }, version => 5.008; ; pattern name => [qw (num roman)], create => '(?xi)(?=[MDCLXVI]) (?k:M{0,3} (D?C{0,3}|CD|CM)? (L?X{0,3}|XL|XC)? (V?I{0,3}|IV|IX)?)' ; 1; __END__ =pod =head1 NAME Regexp::Common::number -- provide regexes for numbers =head1 SYNOPSIS use Regexp::Common qw /number/; while (<>) { /^$RE{num}{int}$/ and print "Integer\n"; /^$RE{num}{real}$/ and print "Real\n"; /^$RE{num}{real}{-base => 16}$/ and print "Hexadecimal real\n"; } =head1 DESCRIPTION Please consult the manual of L for a general description of the works of this interface. Do not use this module directly, but load it via I. =head2 C<$RE{num}{int}{-base}{-sep}{-group}{-places}{-sign}> Returns a pattern that matches an integer. If C<< -base => I >> is specified, the integer is in base I, with C<< 2 <= I <= 36 >>. For bases larger than 10, upper case letters are used. The default base is 10. If C<< -sep => I

>> is specified, the pattern I

is required as a grouping marker within the number. If this option is not given, no grouping marker is used. If C<< -group => I >> is specified, digits between grouping markers must be grouped in sequences of exactly I digits. The default value of I is 3. If C<< -group => I >> is specified, digits between grouping markers must be grouped in sequences of at least I digits, and at most I digits. This option is ignored unless the C<< -sep >> option is used. If C<< -places => I >> is specified, the integer recognized must be exactly I digits wide. If C<< -places => I >> is specified, the integer must be at least I wide, and at most I characters. There is no default, which means that integers are unlimited in size. This option is ignored if the C<< -sep >> option is used. If C<< -sign => I

>> is used, it's a pattern the leading sign has to match. This defaults to C<< [-+]? >>, which means the number is optionally preceded by a minus or a plus. If you want to match unsigned integers, use C<< $RE{num}{int}{-sign => ''} >>. For example: $RE{num}{int} # match 1234567 $RE{num}{int}{-sep=>','} # match 1,234,567 $RE{num}{int}{-sep=>',?'} # match 1234567 or 1,234,567 $RE{num}{int}{-sep=>'.'}{-group=>4} # match 1.2345.6789 Under C<-keep> (see L): =over 4 =item $1 captures the entire number =item $2 captures the optional sign of the number =item $3 captures the complete set of digits =back =head2 C<$RE{num}{real}{-base}{-radix}{-places}{-sep}{-group}{-expon}> Returns a pattern that matches a floating-point number. If C<-base=I> is specified, the number is assumed to be in that base (with A..Z representing the digits for 11..36). By default, the base is 10. If C<-radix=I

> is specified, the pattern I

is used as the radix point for the number (i.e. the "decimal point" in base 10). The default is C. If C<-places=I> is specified, the number is assumed to have exactly I places after the radix point. If C<-places=I> is specified, the number is assumed to have between I and I places after the radix point. By default, the number of places is unrestricted. If C<-sep=I

> specified, the pattern I

is required as a grouping marker within the pre-radix section of the number. By default, no separator is allowed. If C<-group=I> is specified, digits between grouping separators must be grouped in sequences of exactly I characters. The default value of I is 3. If C<-expon=I

> is specified, the pattern I

is used as the exponential marker. The default value of I

is C. If C<-sign=I

> is specified, the pattern I

is used to match the leading sign (and the sign of the exponent). This defaults to C<< [-+]? >>, means means that an optional plus or minus sign can be used. For example: $RE{num}{real} # matches 123.456 or -0.1234567 $RE{num}{real}{-places=>2} # matches 123.45 or -0.12 $RE{num}{real}{-places=>'0,3'} # matches 123.456 or 0 or 9.8 $RE{num}{real}{-sep=>'[,.]?'} # matches 123,456 or 123.456 $RE{num}{real}{-base=>3'} # matches 121.102 Under C<-keep>: =over 4 =item $1 captures the entire match =item $2 captures the optional sign of the number =item $3 captures the complete mantissa =item $4 captures the whole number portion of the mantissa =item $5 captures the radix point =item $6 captures the fractional portion of the mantissa =item $7 captures the optional exponent marker =item $8 captures the entire exponent value =item $9 captures the optional sign of the exponent =item $10 captures the digits of the exponent =back =head2 C<$RE{num}{dec}{-radix}{-places}{-sep}{-group}{-expon}> A synonym for C<< $RE{num}{real}{-base=>10}{...} >> =head2 C<$RE{num}{oct}{-radix}{-places}{-sep}{-group}{-expon}> A synonym for C<< $RE{num}{real}{-base=>8}{...} >> =head2 C<$RE{num}{bin}{-radix}{-places}{-sep}{-group}{-expon}> A synonym for C<< $RE{num}{real}{-base=>2}{...} >> =head2 C<$RE{num}{hex}{-radix}{-places}{-sep}{-group}{-expon}> A synonym for C<< $RE{num}{real}{-base=>16}{...} >> =head2 C<$RE{num}{decimal}{-base}{-radix}{-places}{-sep}{-group}> The same as C<$RE{num}{real}>, except that an exponent isn't allowed. Hence, this returns a pattern matching I numbers. If C<-base=I> is specified, the number is assumed to be in that base (with A..Z representing the digits for 11..36). By default, the base is 10. If C<-radix=I

> is specified, the pattern I

is used as the radix point for the number (i.e. the "decimal point" in base 10). The default is C. If C<-places=I> is specified, the number is assumed to have exactly I places after the radix point. If C<-places=I> is specified, the number is assumed to have between I and I places after the radix point. By default, the number of places is unrestricted. If C<-sep=I

> specified, the pattern I

is required as a grouping marker within the pre-radix section of the number. By default, no separator is allowed. If C<-group=I> is specified, digits between grouping separators must be grouped in sequences of exactly I characters. The default value of I is 3. For example: $RE{num}{decimal} # matches 123.456 or -0.1234567 $RE{num}{decimal}{-places=>2} # matches 123.45 or -0.12 $RE{num}{decimal}{-places=>'0,3'} # matches 123.456 or 0 or 9.8 $RE{num}{decimal}{-sep=>'[,.]?'} # matches 123,456 or 123.456 $RE{num}{decimal}{-base=>3'} # matches 121.102 Under C<-keep>: =over 4 =item $1 captures the entire match =item $2 captures the optional sign of the number =item $3 captures the complete mantissa =item $4 captures the whole number portion of the mantissa =item $5 captures the radix point =item $6 captures the fractional portion of the mantissa =back =head2 C<$RE{num}{square}> Returns a pattern that matches a (decimal) square. Because Perl's arithmetic is lossy when using integers over about 53 bits, this pattern only recognizes numbers less than 9000000000000000, if one uses a Perl that is configured to use 64 bit integers. Otherwise, the limit is 2147483647. These restrictions were introduced in versions 2.116 and 2.117 of Regexp::Common. Regardless whether C<-keep> was set, the matched number will be returned in C<$1>. This pattern is available for version 5.008 and up. =head2 C<$RE{num}{roman}> Returns a pattern that matches an integer written in Roman numbers. Case doesn't matter. Only the more modern style, that is, no more than three repetitions of a letter, is recognized. The largest number matched is I, or 3999. Larger numbers cannot be expressed using ASCII characters. A future version will be able to deal with the Unicode symbols to match larger Roman numbers. Under C<-keep>, the number will be captured in $1. =head1 SEE ALSO L for a general description of how to use this interface. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. For a start, there are many common regexes missing. Send them in to I. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2013, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut zip.pm000064400000052354147635103360005725 0ustar00package Regexp::Common::zip; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; # # Prefer '[0-9]' over \d, because the latter may include more # in Unicode string. # my %code = ( Australia => [qw /AUS? AU AUS/], Belgium => [qw /BE? BE B/], Denmark => [qw /DK DK DK/], France => [qw /FR? FR F/], Germany => [qw /DE? DE D/], Greenland => [qw /DK DK DK/], Italy => [qw /IT? IT I/], Netherlands => [qw /NL NL NL/], Norway => [qw /NO? NO N/], Spain => [qw /ES? ES E/], USA => [qw /USA? US USA/], ); # Returns the empty string if the argument is undefined, the argument otherwise. sub __ {defined $_ [0] ? $_ [0] : ""} # Used for allowable options. If the value starts with 'y', the option is # required ("{1,1}" is returned, if the value starts with 'n', the option # is disallowed ("{0,0}" is returned), otherwise, the option is allowed, # but not required ("{0,1}" is returned). sub _t { if (defined $_ [0]) { if ($_ [0] =~ /^y/i) {return "{1,1}"} if ($_ [0] =~ /^n/i) {return "{0,0}"} } "{0,1}" } # Returns the (sub)pattern for the country named '$name', and the # -country option '$country'. sub _c { my ($name, $country) = @_; if (defined $country && $country ne "") { if ($country eq 'iso') {return $code {$name} [1]} if ($country eq 'cept') {return $code {$name} [2]} return $country; } $code {$name} [0] } my %zip = ( Australia => "(?k:(?k:[1-8][0-9]|9[0-7]|0?[28]|0?9(?=09))(?k:[0-9]{2}))", # Postal codes of the form 'DDDD', with the first # two digits 02, 08 or 20-97. Leading 0 may be omitted. # 909 and 0909 are valid as well - but no other postal # codes starting with 9 or 09. Belgium => "(?k:(?k:[1-9])(?k:[0-9]{3}))", # Postal codes of the form: 'DDDD', with the first # digit representing the province; the others # distribution sectors. Postal codes do not start # with a zero. Denmark => "(?k:(?k:[1-9])(?k:[0-9])(?k:[0-9]{2}))", # Postal codes of the form: 'DDDD', with the first # digit representing the distribution region, the # second digit the distribution district. Postal # codes do not start with a zero. Postal codes # starting with '39' are in Greenland. France => "(?k:(?k:[0-8][0-9]|9[0-8])(?k:[0-9]{3}))", # Postal codes of the form: 'DDDDD'. All digits are used. # First two digits indicate the department, and range # from 01 to 98, or 00 for army. Germany => "(?k:(?k:[0-9])(?k:[0-9])(?k:[0-9]{3}))", # Postal codes of the form: 'DDDDD'. All digits are used. # First digit is the distribution zone, second a # distribution region. Other digits indicate the # distribution district and postal town. Greenland => "(?k:(?k:39)(?k:[0-9]{2}))", # Postal codes of Greenland are part of the Danish # system. Codes in Greenland start with 39. Italy => "(?k:(?k:[0-9])(?k:[0-9])(?k:[0-9])(?k:[0-9])(?k:[0-9]))", # First digit: region. # Second digit: province. # Third digit: capital/province (odd for capital). # Fourth digit: route. # Fifth digit: place on route (0 for small places) Norway => "(?k:[0-9]{4})", # Four digits, no significance (??). Spain => "(?k:(?k:0[1-9]|[1-4][0-9]|5[0-2])(?k:[0-9])(?k:[0-9]{2}))", # Five digits, first two indicate the province. # Third digit: large town, main delivery rounds. # Last 2 digits: delivery area, secondary delivery route # or link to rural areas. Switzerland => "(?k:[1-9][0-9]{3})", # Four digits, first is district, second is area, # third is route, fourth is post office number. ); my %alternatives = ( Australia => [qw /Australian/], France => [qw /French/], Germany => [qw /German/], ); while (my ($country, $zip) = each %zip) { my @names = ($country); push @names => @{$alternatives {$country}} if $alternatives {$country}; foreach my $name (@names) { my $pat_name = $name eq "Denmark" && $] < 5.00503 ? [zip => $name, qw /-country=/] : [zip => $name, qw /-prefix= -country=/]; pattern name => $pat_name, create => sub { my $pt = _t $_ [1] {-prefix}; my $cn = _c $country => $_ [1] {-country}; my $pfx = "(?:(?k:$cn)-)"; "(?k:$pfx$pt$zip)"; }, ; } } # Postal codes of the form 'DDDD LL', with F, I, O, Q, U and Y not # used, SA, SD and SS unused combinations, and the first digit # cannot be 0. No specific meaning to the letters or digits. foreach my $country (qw /Netherlands Dutch/) { pattern name => ['zip', $country => qw /-prefix= -country=/, "-sep= "], create => sub { my $pt = _t $_ [1] {-prefix}; # Unused letters: F, I, O, Q, U, Y. # Unused combinations: SA, SD, SS. my $num = '[1-9][0-9]{3}'; my $let = '[A-EGHJ-NPRTVWXZ][A-EGHJ-NPRSTVWXZ]|' . 'S[BCEGHJ-NPRTVWXZ]'; my $sep = __ $_ [1] {-sep}; my $cn = _c Netherlands => $_ [1] {-country}; my $pfx = "(?:(?k:$cn)-)"; "(?k:$pfx$pt(?k:(?k:$num)(?k:$sep)(?k:$let)))"; }, ; } # Postal codes of the form 'DDDDD' or 'DDDDD-DDDD'. All digits are used, # none carry any specific meaning. pattern name => [qw /zip US -prefix= -country= -extended= -sep=-/], create => sub { my $pt = _t $_ [1] {-prefix}; my $et = _t $_ [1] {-extended}; my $sep = __ $_ [1] {-sep}; my $cn = _c USA => $_ [1] {-country}; my $pfx = "(?:(?k:$cn)-)"; # my $zip = "(?k:[0-9]{5})"; # my $ext = "(?:(?k:$sep)(?k:[0-9]{4}))"; my $zip = "(?k:(?k:[0-9]{3})(?k:[0-9]{2}))"; my $ext = "(?:(?k:$sep)(?k:(?k:[0-9]{2})(?k:[0-9]{2})))"; "(?k:$pfx$pt(?k:$zip$ext$et))"; }, version => 5.00503, ; # pattern name => [qw /zip British/, "-sep= "], # create => sub { # my $sep = $_ [1] -> {-sep}; # # my $london = '(?:EC[1-4]|WC[12]|S?W1)[A-Z]'; # my $single = '[BGLMS][0-9]{1,2}'; # my $double = '[A-Z]{2}[0-9]{1,2}'; # # my $left = "(?:$london|$single|$double)"; # my $right = '[0-9][ABD-HJLNP-UW-Z]{2}'; # # "(?k:(?k:$left)(?k:$sep)(?k:$right))"; # }, # ; # # pattern name => [qw /zip Canadian/, "-sep= "], # create => sub { # my $sep = $_ [1] -> {-sep}; # # my $left = '[A-Z][0-9][A-Z]'; # my $right = '[0-9][A-Z][0-9]'; # # "(?k:(?k:$left)(?k:$sep)(?k:$right))"; # }, # ; 1; __END__ =pod =head1 NAME Regexp::Common::zip -- provide regexes for postal codes. =head1 SYNOPSIS use Regexp::Common qw /zip/; while (<>) { /^$RE{zip}{Netherlands}$/ and print "Dutch postal code\n"; } =head1 DESCRIPTION Please consult the manual of L for a general description of the works of this interface. Do not use this module directly, but load it via I. This module offers patterns for zip or postal codes of many different countries. They all have the form C<$RE{zip}{Country}[{options}]>. The following common options are used: =head2 C<{-prefix=[yes|no|allow]}> and C<{-country=PAT}>. Postal codes can be prefixed with a country abbreviation. That is, a dutch postal code of B<1234 AB> can also be written as B. By default, all the patterns will allow the prefixes. But this can be changed with the C<-prefix> option. With C<-prefix=yes>, the returned pattern requires a country prefix, while C<-prefix=no> disallows a prefix. Any argument that doesn't start with a C or a C allows a country prefix, but doesn't require them. The prefixes used are, unfortunally, not always the same. Officially, ISO country codes need to be used, but the usage of I codes (the same ones as used on cars) is common too. By default, each postal code will recognize a country prefix that's either the ISO standard or the CEPT code. That is, German postal codes may prefixed with either C or C. The recognized prefix can be changed with the C<-country> option, which takes a (sub)pattern as argument. The arguments C and C are special, and indicate the language prefix should be the ISO country code, or the CEPT code. Examples: /$RE{zip}{Netherlands}/; # Matches '1234 AB' and 'NL-1234 AB'. /$RE{zip}{Netherlands}{-prefix => 'no'}/; # Matches '1234 AB' but not 'NL-1234 AB'. /$RE{zip}{Netherlands}{-prefix => 'yes'}/; # Matches 'NL-1234 AB' but not '1234 AB'. /$RE{zip}{Germany}/; # Matches 'DE-12345' and 'D-12345'. /$RE{zip}{Germany}{-country => 'iso'}/; # Matches 'DE-12345' but not 'D-12345'. /$RE{zip}{Germany}{-country => 'cept'}/; # Matches 'D-12345' but not 'DE-12345'. /$RE{zip}{Germany}{-country => 'GER'}/; # Matches 'GER-12345'. =head2 C<{-sep=PAT}> Some countries have postal codes that consist of two parts. Typically there is an official way of separating those parts; but in practise people tend to use different separators. For instance, if the official way to separate parts is to use a space, it happens that the space is left off. The C<-sep> option can be given a pattern as argument which indicates what to use as a separator between the parts. Examples: /$RE{zip}{Netherlands}/; # Matches '1234 AB' but not '1234AB'. /$RE{zip}{Netherlands}{-sep => '\s*'}/; # Matches '1234 AB' and '1234AB'. =head2 C<$RE{zip}{Australia}> Returns a pattern that recognizes Australian postal codes. Australian postal codes consist of four digits; the first two digits, which range from '10' to '97', indicate the state. Territories use '02' or '08' as starting digits; the leading zero is optional. '0909' is the only postal code starting with '09' (the leading zero is optional here as well) - this is the postal code for the Nothern Territory University). The (optional) country prefixes are I (ISO country code) and I (CEPT code). Regexp::Common 2.107 and before used C<$RE{zip}{Australia}>. This is still supported. If C<{-keep}> is used, the following variables will be set: =over 4 =item $1 The entire postal code. =item $2 The country code prefix. =item $3 The postal code without the country prefix. =item $4 The state or territory. =item $5 The last two digits. =back =head2 C<$RE{zip}{Belgium}> Returns a pattern than recognizes Belgian postal codes. Belgian postal codes consist of 4 digits, of which the first indicates the province. The (optional) country prefixes are I (ISO country code) and I (CEPT code). If C<{-keep}> is used, the following variables will be set: =over 4 =item $1 The entire postal code. =item $2 The country code prefix. =item $3 The postal code without the country prefix. =item $4 The digit indicating the province. =item $5 The last three digits of the postal code. =back =head2 C<$RE{zip}{Denmark}> Returns a pattern that recognizes Danish postal codes. Danish postal codes consist of four numbers; the first digit (which cannot be 0), indicates the distribution region, the second the distribution district. The (optional) country prefix is I, which is both the ISO country code and the CEPT code. If C<{-keep}> is used, the following variables will be set: =over 4 =item $1 The entire postal code. =item $2 The country code prefix. =item $3 The postal code without the country prefix. =item $4 The digit indicating the distribution region. =item $5 The digit indicating the distribution district. =item $6 The last two digits of the postal code. =back =head2 C<$RE{zip}{France}> Returns a pattern that recognizes French postal codes. French postal codes consist of five numbers; the first two numbers, which range from '01' to '98', indicate the department. The (optional) country prefixes are I (ISO country code) and I (CEPT code). Regexp::Common 2.107 and before used C<$RE{zip}{French}>. This is still supported. If C<{-keep}> is used, the following variables will be set: =over 4 =item $1 The entire postal code. =item $2 The country code prefix. =item $3 The postal code without the country prefix. =item $4 The department. =item $5 The last three digits. =back =head2 C<$RE{zip}{Germany}> Returns a pattern that recognizes German postal codes. German postal codes consist of five numbers; the first number indicating the distribution zone, the second the distribution region, while the latter three indicate the distribution district and the postal town. The (optional) country prefixes are I (ISO country code) and I (CEPT code). Regexp::Common 2.107 and before used C<$RE{zip}{German}>. This is still supported. If C<{-keep}> is used, the following variables will be set: =over 4 =item $1 The entire postal code. =item $2 The country code prefix. =item $3 The postal code without the country prefix. =item $4 The distribution zone. =item $5 The distribution region. =item $6 The distribution district and postal town. =back =head2 C<$RE{zip}{Greenland}> Returns a pattern that recognizes postal codes from Greenland. Greenland, being part of Denmark, uses Danish postal codes. All postal codes of Greenland start with 39. The (optional) country prefix is I, which is both the ISO country code and the CEPT code. If C<{-keep}> is used, the following variables will be set: =over 4 =item $1 The entire postal code. =item $2 The country code prefix. =item $3 The postal code without the country prefix. =item $4 39, being the distribution region and distribution district for Greenland. =item $5 The last two digits of the postal code. =back =head2 C<$RE{zip}{Italy}> Returns a pattern recognizing Italian postal codes. Italian postal codes consist of 5 digits. The first digit indicates the region, the second the province. The third digit is odd for province capitals, and even for the province itself. The fourth digit indicates the route, and the fifth a place on the route (0 for small places, alphabetically for the rest). The country prefix is either I (the ISO country code), or I (the CEPT code). If C<{-keep}> is used, the following variables will be set: =over 4 =item $1 The entire postal code. =item $2 The country code prefix. =item $3 The postal code without the country prefix. =item $4 The region. =item $5 The province. =item $6 Capital or province. =item $7 The route. =item $8 The place on the route. =back =head2 C<$RE{zip}{Netherlands}> Returns a pattern that recognizes Dutch postal codes. Dutch postal codes consist of 4 digits and 2 letters, separated by a space. The separator can be changed using the C<{-sep}> option, as discussed above. The (optional) country prefix is I, which is both the ISO country code and the CEPT code. Regexp::Common 2.107 and earlier used C<$RE{zip}{Dutch}>. This is still supported. If C<{-keep}> is used, the following variables will be set: =over 4 =item $1 The entire postal code. =item $2 The country code prefix. =item $3 The postal code without the country prefix. =item $4 The digits part of the postal code. =item $5 The separator between the digits and the letters. =item $6 The letters part of the postal code. =back =head2 C<< $RE{zip}{Norway} >> Returns a pattern that recognizes Norwegian postal codes. Norwegian postal codes consist of four digits. The country prefix is either I (the ISO country code), or I (the CEPT code). If C<{-keep}> is used, the following variables will be set: =over 4 =item $1 The entire postal code. =item $2 The country code prefix. =item $3 The postal code without the country prefix. =back =head2 C<< $RE{zip}{Spain} >> Returns a pattern that recognizes Spanish postal codes. Spanish postal codes consist of 5 digits. The first 2 indicate one of Spains fifties provinces (in alphabetical order), starting with C<00>. The third digit indicates a main city or the main delivery rounds. The last two digits are the delivery area, secondary delivery route or a link to rural areas. The country prefix is either I (the ISO country code), or I (the CEPT code). If C<{-keep}> is used, the following variables will be set: =over 4 =item $1 The entire postal code. =item $2 The country code prefix. =item $3 The postal code without the country prefix. =item $4 The two digits indicating the province. =item $5 The digit indicating the main city or main delivery route. =item $6 The digits indicating the delivery area, secondary delivery route or a link to rural areas. =back =head2 C<< $RE{zip}{Switzerland} >> Returns a pattern that recognizes Swiss postal codes. Swiss postal codes consist of 4 digits. The first indicates the district, starting with 1. The second indicates the area, the third, the route, and the fourth the post office number. =head2 C<< $RE{zip}{US}{-extended => [yes|no|allow]} >> Returns a pattern that recognizes US zip codes. US zip codes consist of 5 digits, with an optional 4 digit extension. By default, extensions are allowed, but not required. This can be influenced by the C<-extended> option. If its argument starts with a C, extensions are required; if the argument starts with a C, extensions will not be recognized. If an extension is used, a dash is used to separate the main part from the extension, but this can be changed with the C<-sep> option. The country prefix is either I (the ISO country code), or I (the CEPT code). If C<{-keep}> is being used, the following variables will be set: =over 4 =item $1 The entire postal code. =item $2 The country code prefix. =item $3 The postal code without the country prefix. =item $4 The first 5 digits of the postal code. =item $5 The first three digits of the postal code, indicating a sectional center or a large city. New in Regexp::Common 2.119. =item $6 The last 2 digits of the 5 digit part of the postal code, indicating a post office facility or delivery area. New in Regexp::Common 2.119. =item $7 The separator between the 5 digit part and the 4 digit part. Up to Regexp::Common 2.118, this used to be $5. =item $8 The 4 digit part of the postal code (if any). Up to Regexp::Common 2.118, this used to be $6. =item $9 The first two digits of the 4 digit part of the postal code, indicating a sector, or several blocks. New in Regexp::Common 2.119. =item $10 The last two digits of the 4 digit part of the postal code, indicating a segment or one side of a street. New in Regexp::Common 2.119. =back You need at least version 5.005_03 to be able to use US postal codes. Older versions contain a bug that let the pattern match invalid US postal codes. =head3 Questions =over 4 =item Can the 5 digit part of the zip code (in theory) start with 000? =item Can the 5 digit part of the zip code (in theory) end with 00? =item Can the 4 digit part of the zip code (in theory) start with 00? =item Can the 4 digit part of the zip code (in theory) end with 00? =back =head1 SEE ALSO L for a general description of how to use this interface. =over 4 =item L Frank's compulsive guide to postal addresses. =item L Postal addressing systems. =item L Postal code information. =item L Links to Postcode Pages. =item L Information about Australian postal codes. =item L Information about US postal codes. =item L =back =head1 AUTHORS Damian Conway S<(I)> and Abigail S<(I)>. =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Zip codes for most countries are missing. Send them in to I. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut balanced.pm000064400000012427147635103360006651 0ustar00package Regexp::Common::balanced; { use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2013030901'; my %closer = ( '{'=>'}', '('=>')', '['=>']', '<'=>'>' ); my %cache; sub nested { my ($start, $finish) = @_; return $cache {$start} {$finish} if exists $cache {$start} {$finish}; my @starts = map {s/\\(.)/$1/g; $_} grep {length} $start =~ /([^|\\]+|\\.)+/gs; my @finishes = map {s/\\(.)/$1/g; $_} grep {length} $finish =~ /([^|\\]+|\\.)+/gs; push @finishes => ($finishes [-1]) x (@starts - @finishes); my @re; local $" = "|"; foreach my $begin (@starts) { my $end = shift @finishes; my $qb = quotemeta $begin; my $qe = quotemeta $end; my $fb = quotemeta substr $begin => 0, 1; my $fe = quotemeta substr $end => 0, 1; my $tb = quotemeta substr $begin => 1; my $te = quotemeta substr $end => 1; my $add; if ($fb eq $fe) { push @re => qq /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|(?-1))*$qe)/; } else { my @clauses = "(?>[^$fb$fe]+)"; push @clauses => "$fb(?!$tb)" if length $tb; push @clauses => "$fe(?!$te)" if length $te; push @clauses => "(?-1)"; push @re => qq /(?:$qb(?:@clauses)*$qe)/; } } $cache {$start} {$finish} = qr /(@re)/; } pattern name => [qw /balanced -parens=() -begin= -end=/], create => sub { my $flag = $_[1]; unless (defined $flag -> {-begin} && length $flag -> {-begin} && defined $flag -> {-end} && length $flag -> {-end}) { my @open = grep {index ($flag->{-parens}, $_) >= 0} ('[','(','{','<'); my @close = map {$closer {$_}} @open; $flag -> {-begin} = join "|" => @open; $flag -> {-end} = join "|" => @close; } return nested @$flag {qw /-begin -end/}; }, version => 5.010, ; } 1; __END__ =pod =head1 NAME Regexp::Common::balanced -- provide regexes for strings with balanced parenthesized delimiters or arbitrary delimiters. =head1 SYNOPSIS use Regexp::Common qw /balanced/; while (<>) { /$RE{balanced}{-parens=>'()'}/ and print q{balanced parentheses\n}; } =head1 DESCRIPTION Please consult the manual of L for a general description of the works of this interface. Do not use this module directly, but load it via I. =head2 C<$RE{balanced}{-parens}> Returns a pattern that matches a string that starts with the nominated opening parenthesis or bracket, contains characters and properly nested parenthesized subsequences, and ends in the matching parenthesis. More than one type of parenthesis can be specified: $RE{balanced}{-parens=>'(){}'} in which case all specified parenthesis types must be correctly balanced within the string. Since version 2013030901, C<< $1 >> will always be set (to the entire matched substring), regardless whether C<< {-keep} >> is used or not. =head2 C<< $RE{balanced}{-begin => "begin"}{-end => "end"} >> Returns a pattern that matches a string that is properly balanced using the I and I strings as start and end delimiters. Multiple sets of begin and end strings can be given by separating them by C<|>s (which can be escaped with a backslash). qr/$RE{balanced}{-begin => "do|if|case"}{-end => "done|fi|esac"}/ will match properly balanced strings that either start with I and end with I, start with I and end with I, or start with I and end with I. If I<-end> contains less cases than I<-begin>, the last case of I<-end> is repeated. If it contains more cases than I<-begin>, the extra cases are ignored. If either of I<-begin> or I<-end> isn't given, or is empty, I<< -begin => '(' >> and I<< -end => ')' >> are assumed. Since version 2013030901, C<< $1 >> will always be set (to the entire matched substring), regardless whether C<< {-keep} >> is used or not. =head2 Note Since version 2013030901 the pattern will make of the recursive construct C<< (?-1) >>, instead of using the problematic C<< (??{ }) >> construct. This fixes an problem that was introduced in the 5.17 development track. This also means the pattern is no longer available for Perls older than 5.010. =head1 SEE ALSO L for a general description of how to use this interface. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. For a start, there are many common regexes missing. Send them in to I. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2013, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut net.pm000064400000031060147635103360005700 0ustar00package Regexp::Common::net; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2013031301'; my %IPunit = ( dec => q{(?k:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})}, oct => q{(?k:[0-3]?[0-7]{1,2})}, hex => q{(?k:[0-9a-fA-F]{1,2})}, bin => q{(?k:[0-1]{1,8})}, ); my %MACunit = ( %IPunit, hex => q{(?k:[0-9a-fA-F]{1,2})}, ); my %IPv6unit = ( hex => q {(?k:[0-9a-f]{1,4})}, HEX => q {(?k:[0-9A-F]{1,4})}, HeX => q {(?k:[0-9a-fA-F]{1,4})}, ); sub dec {$_}; sub bin {oct "0b$_"} my $IPdefsep = '[.]'; my $MACdefsep = ':'; my $IPv6defsep = ':'; pattern name => [qw (net IPv4)], create => "(?k:$IPunit{dec}$IPdefsep$IPunit{dec}$IPdefsep" . "$IPunit{dec}$IPdefsep$IPunit{dec})", ; pattern name => [qw (net MAC)], create => "(?k:" . join ($MACdefsep => ($MACunit{hex}) x 6) . ")", subs => sub { $_ [1] = join ":" => map {sprintf "%02x" => hex} split /$MACdefsep/ => $_ [1] if $_ [1] =~ /$_[0]/ }, ; foreach my $type (qw /dec oct hex bin/) { pattern name => [qw (net IPv4), $type, "-sep=$IPdefsep"], create => sub {my $sep = $_ [1] -> {-sep}; "(?k:$IPunit{$type}$sep$IPunit{$type}$sep" . "$IPunit{$type}$sep$IPunit{$type})" }, ; pattern name => [qw (net MAC), $type, "-sep=$MACdefsep"], create => sub {my $sep = $_ [1] -> {-sep}; "(?k:" . join ($sep => ($MACunit{$type}) x 6) . ")", }, subs => sub { return if $] < 5.006 and $type eq 'bin'; $_ [1] = join ":" => map {sprintf "%02x" => eval $type} $2, $3, $4, $5, $6, $7 if $_ [1] =~ $RE {net} {MAC} {$type} {-sep => $_ [0] -> {flags} {-sep}} {-keep}; }, ; } my %cache6; pattern name => [qw (net IPv6), "-sep=$IPv6defsep", "-style=HeX"], create => sub { my $style = $_ [1] {-style}; my $sep = $_ [1] {-sep}; return $cache6 {$style, $sep} if $cache6 {$style, $sep}; my @re; die "Impossible style '$style'\n" unless exists $IPv6unit {$style}; # # Nothing missing # push @re => join $sep => ($IPv6unit {$style}) x 8; # # For "double colon" representations, at least 2 units must # be omitted, leaving us with at most 6 units. 0 units is also # possible. Note we can have at most one double colon. # for (my $l = 0; $l <= 6; $l ++) { # # We prefer to do longest match, so larger $r gets priority # for (my $r = 6 - $l; $r >= 0; $r --) { # # $l is the number of blocks left of the double colon, # $r is the number of blocks left of the double colon, # $m is the number of omitted blocks # my $m = 8 - $l - $r; my $patl = $l ? ($IPv6unit {$style} . $sep) x $l : $sep; my $patr = $r ? ($sep . $IPv6unit {$style}) x $r : $sep; my $patm = "(?k:)" x $m; my $pat = $patl . $patm . $patr; push @re => "(?:$pat)"; } } local $" = "|"; $cache6 {$style, $sep} = qq /(?k:(?|@re))/; }, version => 5.010 ; my $letter = "[A-Za-z]"; my $let_dig = "[A-Za-z0-9]"; my $let_dig_hyp = "[-A-Za-z0-9]"; # Domain names, from RFC 1035. pattern name => [qw (net domain -nospace= -rfc1101=)], create => sub { my $rfc1101 = exists $_ [1] {-rfc1101} && !defined $_ [1] {-rfc1101}; my $lead = $rfc1101 ? "(?!$RE{net}{IPv4}(?:[.]|\$))$let_dig" : $letter; if (exists $_ [1] {-nospace} && !defined $_ [1] {-nospace}) { return "(?k:$lead(?:(?:$let_dig_hyp){0,61}$let_dig)?" . "(?:\\.$lead(?:(?:$let_dig_hyp){0,61}$let_dig)?)*)" } else { return "(?k: |(?:$lead(?:(?:$let_dig_hyp){0,61}$let_dig)?" . "(?:\\.$lead(?:(?:$let_dig_hyp){0,61}$let_dig)?)*))" } }, ; 1; __END__ =head1 NAME Regexp::Common::net -- provide regexes for IPv4 addresses. =head1 SYNOPSIS use Regexp::Common qw /net/; while (<>) { /$RE{net}{IPv4}/ and print "Dotted decimal IP address"; /$RE{net}{IPv4}{hex}/ and print "Dotted hexadecimal IP address"; /$RE{net}{IPv4}{oct}{-sep => ':'}/ and print "Colon separated octal IP address"; /$RE{net}{IPv4}{bin}/ and print "Dotted binary IP address"; /$RE{net}{MAC}/ and print "MAC address"; /$RE{net}{MAC}{oct}{-sep => " "}/ and print "Space separated octal MAC address"; } =head1 DESCRIPTION Please consult the manual of L for a general description of the works of this interface. Do not use this module directly, but load it via I. This modules gives you regular expressions for various style IPv4 and MAC (or ethernet) addresses. =head2 C<$RE{net}{IPv4}> Returns a pattern that matches a valid IP address in "dotted decimal". Note that while C<318.99.183.11> is not a valid IP address, it does match C, but this is because C<318.99.183.11> contains a valid IP address, namely C<18.99.183.11>. To prevent the unwanted matching, one needs to anchor the regexp: C. For this pattern and the next four, under C<-keep> (See L): =over 4 =item $1 captures the entire match =item $2 captures the first component of the address =item $3 captures the second component of the address =item $4 captures the third component of the address =item $5 captures the final component of the address =back =head2 C<$RE{net}{IPv4}{dec}{-sep}> Returns a pattern that matches a valid IP address in "dotted decimal" If C<< -sep=I

>> is specified the pattern I

is used as the separator. By default I

is C. =head2 C<$RE{net}{IPv4}{hex}{-sep}> Returns a pattern that matches a valid IP address in "dotted hexadecimal", with the letters C to C capitalized. If C<< -sep=I

>> is specified the pattern I

is used as the separator. By default I

is C. C<< -sep="" >> and C<< -sep=" " >> are useful alternatives. =head2 C<$RE{net}{IPv4}{oct}{-sep}> Returns a pattern that matches a valid IP address in "dotted octal" If C<< -sep=I

>> is specified the pattern I

is used as the separator. By default I

is C. =head2 C<$RE{net}{IPv4}{bin}{-sep}> Returns a pattern that matches a valid IP address in "dotted binary" If C<< -sep=I

>> is specified the pattern I

is used as the separator. By default I

is C. =head2 C<$RE{net}{MAC}> Returns a pattern that matches a valid MAC or ethernet address as colon separated hexadecimals. For this pattern, and the next four, under C<-keep> (See L): =over 4 =item $1 captures the entire match =item $2 captures the first component of the address =item $3 captures the second component of the address =item $4 captures the third component of the address =item $5 captures the fourth component of the address =item $6 captures the fifth component of the address =item $7 captures the sixth and final component of the address =back This pattern, and the next four, have a C method as well, which will transform a matching MAC address into so called canonical format. Canonical format means that every component of the address will be exactly two hexadecimals (with a leading zero if necessary), and the components will be separated by a colon. The C method will not work for binary MAC addresses if the Perl version predates 5.6.0. =head2 C<$RE{net}{MAC}{dec}{-sep}> Returns a pattern that matches a valid MAC address as colon separated decimals. If C<< -sep=I

>> is specified the pattern I

is used as the separator. By default I

is C. =head2 C<$RE{net}{MAC}{hex}{-sep}> Returns a pattern that matches a valid MAC address as colon separated hexadecimals, with the letters C to C in lower case. If C<< -sep=I

>> is specified the pattern I

is used as the separator. By default I

is C. =head2 C<$RE{net}{MAC}{oct}{-sep}> Returns a pattern that matches a valid MAC address as colon separated octals. If C<< -sep=I

>> is specified the pattern I

is used as the separator. By default I

is C. =head2 C<$RE{net}{MAC}{bin}{-sep}> Returns a pattern that matches a valid MAC address as colon separated binary numbers. If C<< -sep=I

>> is specified the pattern I

is used as the separator. By default I

is C. =head2 C<$RE{net}{IPv6}{-sep => ':'}{-style => 'HeX'}> Returns a pattern matching IPv6 numbers. An IPv6 address consists of eigth groups of four hexadecimal digits, separated by colons. In each group, leading zeros may be omitted. Two or more consecutive groups consisting of only zeros may be omitted (including any colons separating them), resulting into two sets of groups, separated by a double colon. (Each of the groups may be empty; C<< :: >> is a valid address, equal to C<< 0000:0000:0000:0000:0000:0000:0000:0000 >>). The hex numbers may be in either case. If the C<< -sep >> option is used, its argument is a pattern that matches the separator that separates groups. This defaults to C<< : >>. The C<< -style >> option is used to denote which case the hex numbers may be. The default style, C<< 'HeX' >> indicates both lower case letters C<< 'a' >> to C<< 'f' >> and upper case letters C<< 'A' >> to C<< 'F' >> will be matched. The style C<< 'HEX' >> restricts matching to upper case letters, and C<< 'hex' >> only matches lower case letters. If C<< {-keep} >> is used, C<< $1 >> to C<< $9 >> will be set. C<< $1 >> will be set to the matched address, while C<< $2 >> to C<< $9 >> will be set to each matched group. If a group is omitted because it contains all zeros, its matching variable will be the empty string. Example: "2001:db8:85a3::8a2e:370:7334" =~ /$RE{net}{IPv6}{-keep}/; print $2; # '2001' print $4; # '85a3' print $6; # Empty string print $8; # '370' Perl 5.10 (or later) is required for this pattern. =head2 C<$RE{net}{domain}> Returns a pattern to match domains (and hosts) as defined in RFC 1035. Under I{-keep} only the entire domain name is returned. RFC 1035 says that a single space can be a domainname too. So, the pattern returned by C<$RE{net}{domain}> recognizes a single space as well. This is not always what people want. If you want to recognize domainnames, but not a space, you can do one of two things, either use /(?! )$RE{net}{domain}/ or use the C<{-nospace}> option (without an argument). RFC 1035 does B allow host or domain names to start with a digits; however, this restriction is relaxed in RFC 1101; this RFC allows host and domain names to start with a digit, as long as the first part of a domain does not look like an IP address. If the C<< {-rfc1101} >> option is given (as in C<< $RE {net} {domain} {-rfc1101} >>), we will match using the relaxed rules. =head1 REFERENCES =over 4 =item B Mockapetris, P.: I. November 1987. =item B Mockapetris, P.: I. April 1987. =back =head1 SEE ALSO L for a general description of how to use this interface. =head1 AUTHOR Damian Conway I. =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. For a start, there are many common regexes missing. Send them in to I. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2013, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut delimited.pm000064400000010105147635103360007047 0ustar00package Regexp::Common::delimited; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; sub gen_delimited { my ($dels, $escs) = @_; # return '(?:\S*)' unless $dels =~ /\S/; if (length $escs) { $escs .= substr ($escs, -1) x (length ($dels) - length ($escs)); } my @pat = (); my $i; for ($i=0; $i < length $dels; $i++) { my $del = quotemeta substr ($dels, $i, 1); my $esc = length($escs) ? quotemeta substr ($escs, $i, 1) : ""; if ($del eq $esc) { push @pat, "(?k:$del)(?k:[^$del]*(?:(?:$del$del)[^$del]*)*)(?k:$del)"; } elsif (length $esc) { push @pat, "(?k:$del)(?k:[^$esc$del]*(?:$esc.[^$esc$del]*)*)(?k:$del)"; } else { push @pat, "(?k:$del)(?k:[^$del]*)(?k:$del)"; } } my $pat = join '|', @pat; return "(?k:$pat)"; } sub _croak { require Carp; goto &Carp::croak; } pattern name => [qw( delimited -delim= -esc=\\ )], create => sub {my $flags = $_[1]; _croak 'Must specify delimiter in $RE{delimited}' unless length $flags->{-delim}; return gen_delimited (@{$flags}{-delim, -esc}); }, ; pattern name => [qw( quoted -esc=\\ )], create => sub {my $flags = $_[1]; return gen_delimited (q{"'`}, $flags -> {-esc}); }, ; 1; __END__ =pod =head1 NAME Regexp::Common::delimited -- provides a regex for delimited strings =head1 SYNOPSIS use Regexp::Common qw /delimited/; while (<>) { /$RE{delimited}{-delim=>'"'}/ and print 'a \" delimited string'; /$RE{delimited}{-delim=>'/'}/ and print 'a \/ delimited string'; } =head1 DESCRIPTION Please consult the manual of L for a general description of the works of this interface. Do not use this module directly, but load it via I. =head2 C<$RE{delimited}{-delim}{-esc}> Returns a pattern that matches a single-character-delimited substring, with optional internal escaping of the delimiter. When C<-delim=I> is specified, each character in the sequence I is a possible delimiter. There is no default delimiter, so this flag must always be specified. If C<-esc=I> is specified, each character in the sequence I is the delimiter for the corresponding character in the C<-delim=I> list. The default escape is backslash. For example: $RE{delimited}{-delim=>'"'} # match "a \" delimited string" $RE{delimited}{-delim=>'"'}{-esc=>'"'} # match "a "" delimited string" $RE{delimited}{-delim=>'/'} # match /a \/ delimited string/ $RE{delimited}{-delim=>q{'"}} # match "string" or 'string' Under C<-keep> (See L): =over 4 =item $1 captures the entire match =item $2 captures the opening delimiter (provided only one delimiter was specified) =item $3 captures delimited portion of the string (provided only one delimiter was specified) =item $4 captures the closing delimiter (provided only one delimiter was specified) =back =head2 $RE{quoted}{-esc} A synonym for C<$RE{delimited}{q{-delim='"`}{...}}> =head1 SEE ALSO L for a general description of how to use this interface. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. For a start, there are many common regexes missing. Send them in to I. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut profanity.pm000064400000010551147635103360007127 0ustar00package Regexp::Common::profanity; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $profanity = '(?:cvff(?:\\ gnxr|\\-gnxr|gnxr|r(?:ef|[feq])|vat|l)?|dhvzf?|fuvg(?:g(?:r(?:ef|[qe])|vat|l)|r(?:ef|[fqel])|vat|[fr])?|g(?:heqf?|jngf?)|jnax(?:r(?:ef|[eq])|vat|f)?|n(?:ef(?:r(?:\\ ubyr|\\-ubyr|ubyr|[fq])|vat|r)|ff(?:\\ ubyrf?|\\-ubyrf?|rq|ubyrf?|vat))|o(?:hyy(?:\\ fuvg(?:g(?:r(?:ef|[qe])|vat)|f)?|\\-fuvg(?:g(?:r(?:ef|[qe])|vat)|f)?|fuvg(?:g(?:r(?:ef|[qe])|vat)|f)?)|ybj(?:\\ wbof?|\\-wbof?|wbof?))|p(?:bpx(?:\\ fhpx(?:ref?|vat)|\\-fhpx(?:ref?|vat)|fhpx(?:ref?|vat))|enc(?:c(?:r(?:ef|[eq])|vat|l)|f)?|h(?:agf?|z(?:vat|zvat|f)))|qvpx(?:\\ urnq|\\-urnq|rq|urnq|vat|yrff|f)|s(?:hpx(?:rq|vat|f)?|neg(?:r[eq]|vat|[fl])?|rygpu(?:r(?:ef|[efq])|vat)?)|un(?:eq[\\-\\ ]?ba|ys(?:\\ n[fe]|\\-n[fe]|n[fe])frq)|z(?:bgure(?:\\ shpx(?:ref?|vat)|\\-shpx(?:ref?|vat)|shpx(?:ref?|vat))|hgu(?:n(?:\\ shpx(?:ref?|vat|[nnn])|\\-shpx(?:ref?|vat|[nnn])|shpx(?:ref?|vat|[nnn]))|re(?:\\ shpx(?:ref?|vat)|\\-shpx(?:ref?|vat)|shpx(?:ref?|vat)))|reqr?))'; my $contextual = '(?:c(?:bex|e(?:bax|vpxf?)|hff(?:vrf|l)|vff(?:\\ gnxr|\\-gnxr|gnxr|r(?:ef|[feq])|vat|l)?)|dhvzf?|ebbg(?:r(?:ef|[eq])|vat|f)?|f(?:bq(?:q(?:rq|vat)|f)?|chax|perj(?:rq|vat|f)?|u(?:nt(?:t(?:r(?:ef|[qe])|vat)|f)?|vg(?:g(?:r(?:ef|[qe])|vat|l)|r(?:ef|[fqel])|vat|[fr])?))|g(?:heqf?|jngf?|vgf?)|jnax(?:r(?:ef|[eq])|vat|f)?|n(?:ef(?:r(?:\\ ubyr|\\-ubyr|ubyr|[fq])|vat|r)|ff(?:\\ ubyrf?|\\-ubyrf?|rq|ubyrf?|vat))|o(?:ba(?:r(?:ef|[fe])|vat|r)|h(?:ttre|yy(?:\\ fuvg(?:g(?:r(?:ef|[qe])|vat)|f)?|\\-fuvg(?:g(?:r(?:ef|[qe])|vat)|f)?|fuvg(?:g(?:r(?:ef|[qe])|vat)|f)?))|n(?:fgneq|yy(?:r(?:ef|[qe])|vat|f)?)|yb(?:bql|j(?:\\ wbof?|\\-wbof?|wbof?)))|p(?:bpx(?:\\ fhpx(?:ref?|vat)|\\-fhpx(?:ref?|vat)|fhpx(?:ref?|vat)|f)?|enc(?:c(?:r(?:ef|[eq])|vat|l)|f)?|h(?:agf?|z(?:vat|zvat|f)))|q(?:batf?|vpx(?:\\ urnq|\\-urnq|rq|urnq|vat|yrff|f)?)|s(?:hpx(?:rq|vat|f)?|neg(?:r[eq]|vat|[fl])?|rygpu(?:r(?:ef|[efq])|vat)?)|u(?:hzc(?:r(?:ef|[eq])|vat|f)?|n(?:eq[\\-\\ ]?ba|ys(?:\\ n[fe]|\\-n[fe]|n[fe])frq))|z(?:bgure(?:\\ shpx(?:ref?|vat)|\\-shpx(?:ref?|vat)|shpx(?:ref?|vat))|hgu(?:n(?:\\ shpx(?:ref?|vat|[nnn])|\\-shpx(?:ref?|vat|[nnn])|shpx(?:ref?|vat|[nnn]))|re(?:\\ shpx(?:ref?|vat)|\\-shpx(?:ref?|vat)|shpx(?:ref?|vat)))|reqr?))'; tr/A-Za-z/N-ZA-Mn-za-m/ foreach $profanity, $contextual; pattern name => [qw (profanity)], create => '(?:\b(?k:' . $profanity . ')\b)', ; pattern name => [qw (profanity contextual)], create => '(?:\b(?k:' . $contextual . ')\b)', ; 1; __END__ =pod =head1 NAME Regexp::Common::profanity -- provide regexes for profanity =head1 SYNOPSIS use Regexp::Common qw /profanity/; while (<>) { /$RE{profanity}/ and print "Contains profanity\n"; } =head1 DESCRIPTION Please consult the manual of L for a general description of the works of this interface. Do not use this module directly, but load it via I. =head2 $RE{profanity} Returns a pattern matching words -- such as Carlin's "big seven" -- that are most likely to give offense. Note that correct anatomical terms are deliberately I included in the list. Under C<-keep> (see L): =over 4 =item $1 captures the entire word =back =head2 C<$RE{profanity}{contextual}> Returns a pattern matching words that are likely to give offense when used in specific contexts, but which also have genuinely non-offensive meanings. Under C<-keep> (see L): =over 4 =item $1 captures the entire word =back =head1 SEE ALSO L for a general description of how to use this interface. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. For a start, there are many common regexes missing. Send them in to I. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut _support.pm000064400000004466147635103360006777 0ustar00package Regexp::Common::_support; BEGIN { # This makes sure 'use warnings' doesn't bomb out on 5.005_*; # warnings won't be enabled on those old versions though. if ($] < 5.006 && !exists $INC {"warnings.pm"}) { $INC {"warnings.pm"} = 1; no strict 'refs'; *{"warnings::unimport"} = sub {0}; } } use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; # # Returns true/false, depending whether the given the argument # satisfies the LUHN checksum. # See http://www.webopedia.com/TERM/L/Luhn_formula.html. # # Note that this function is intended to be called from regular # expression, so it should NOT use a regular expression in any way. # sub luhn { my $arg = shift; my $even = 0; my $sum = 0; while (length $arg) { my $num = chop $arg; return if $num lt '0' || $num gt '9'; if ($even && (($num *= 2) > 9)) {$num = 1 + ($num % 10)} $even = 1 - $even; $sum += $num; } !($sum % 10) } sub import { my $pack = shift; my $caller = caller; no strict 'refs'; *{$caller . "::" . $_} = \&{$pack . "::" . $_} for @_; } 1; __END__ =pod =head1 NAME Regexp::Common::support -- Support functions for Regexp::Common. =head1 SYNOPSIS use Regexp::Common::_support qw /luhn/; luhn ($number) # Returns true/false. =head1 DESCRIPTION This module contains some subroutines to be used by other C modules. It's not intended to be used directly. Subroutines from the module may disappear without any notice, or their meaning or interface may change without notice. =over 4 =item luhn This subroutine returns true if its argument passes the luhn checksum test. =back =head1 SEE ALSO L. =head1 AUTHOR Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut SEN.pm000064400000006731147635103360005546 0ustar00package Regexp::Common::SEN; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; =begin does_not_exist sub par11 { my $string = shift; my $sum = 0; for my $i (0 .. length ($string) - 1) { my $c = substr ($string, $i, 1); $sum += $c * (length ($string) - $i) } !($sum % 11) } =end does_not_exist =cut # http://www.ssa.gov/history/ssn/geocard.html pattern name => [qw /SEN USA SSN -sep=-/], create => sub { my $sep = $_ [1] {-sep}; "(?k:(?k:[1-9][0-9][0-9]|0[1-9][0-9]|00[1-9])$sep" . "(?k:[1-9][0-9]|0[1-9])$sep" . "(?k:[1-9][0-9][0-9][0-9]|0[1-9][0-9][0-9]|" . "00[1-9][0-9]|000[1-9]))" }, ; =begin does_not_exist It's not clear whether this is the right checksum. # http://www.google.nl/search?q=cache:8m1zKNYrEO0J:www.enschede.nl/nieuw/projecten/aanbesteding/integratie/pve%2520Bijlage%25207.5.doc+Sofi+nummer+formaat&hl=en&start=56&lr=lang_en|lang_nl&ie=UTF-8 pattern name => [qw /SEN Netherlands SoFi/], create => sub { # 9 digits (d1 d2 d3 d4 d5 d6 d7 d8 d9) # 9*d1 + 8*d2 + 7*d3 + 6*d4 + 5*d5 + 4*d6 + 3*d7 + 2*d8 + 1*d9 # == 0 mod 11. qr /([0-9]{9})(?(?{par11 ($^N)})|(?!))/; } ; =end does_not_exist =cut 1; __END__ =pod =head1 NAME Regexp::Common::SEN -- provide regexes for Social-Economical Numbers. =head1 SYNOPSIS use Regexp::Common qw /SEN/; while (<>) { /^$RE{SEN}{USA}{SSN}$/ and print "Social Security Number\n"; } =head1 DESCRIPTION Please consult the manual of L for a general description of the works of this interface. Do not use this module directly, but load it via I. =head2 C<$RE{SEN}{USA}{SSN}{-sep}> Returns a pattern that matches an American Social Security Number (SSN). SSNs consist of three groups of numbers, separated by a hypen (C<->). This pattern only checks for a valid structure, that is, it validates whether a number is valid SSN, was a valid SSN, or maybe a valid SSN in the future. There are almost a billion possible SSNs, and about 400 million are in use, or have been in use. If C<-sep=I

> is specified, the pattern I

is used as the separator between the groups of numbers. Under C<-keep> (see L): =over 4 =item $1 captures the entire SSN. =item $2 captures the first group of digits (the area number). =item $3 captures the second group of digits (the group number). =item $4 captures the third group of digits (the serial number). =back =head1 SEE ALSO L for a general description of how to use this interface. =head1 AUTHORS Damian Conway and Abigail. =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. For a start, there are many common regexes missing. Send them in to I. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI.pm000064400000006540147635103360005556 0ustar00package Regexp::Common::URI; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use Exporter (); use vars qw /@EXPORT_OK @ISA/; @ISA = qw /Exporter/; @EXPORT_OK = qw /register_uri/; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; # Use 'require' here, not 'use', so we delay running them after we are compiled. # We also do it using an 'eval'; this saves us from have repeated similar # lines. The eval is further explained in 'perldoc -f require'. my @uris = qw /fax file ftp gopher http pop prospero news tel telnet tv wais/; foreach my $uri (@uris) { eval "require Regexp::Common::URI::$uri"; die $@ if $@; } my %uris; sub register_uri { my ($scheme, $uri) = @_; $uris {$scheme} = $uri; } pattern name => [qw (URI)], create => sub {my $uri = join '|' => values %uris; $uri =~ s/\(\?k:/(?:/g; "(?k:$uri)"; }, ; 1; __END__ =pod =head1 NAME Regexp::Common::URI -- provide patterns for URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{HTTP}/ and print "Contains an HTTP URI.\n"; } =head1 DESCRIPTION Patterns for the following URIs are supported: fax, file, FTP, gopher, HTTP, news, NTTP, pop, prospero, tel, telnet, tv and WAIS. Each is documented in the I>, manual page, for the appropriate scheme (in lowercase), except for I URIs which are found in I. =head2 C<$RE{URI}> Return a pattern that recognizes any of the supported URIs. With C<{-keep}>, only the entire URI is returned (in C<$1>). =head1 REFERENCES =over 4 =item B<[DRAFT-URI-TV]> Zigmond, D. and Vickers, M: I. December 2000. =item B<[DRAFT-URL-FTP]> Casey, James: I. November 1996. =item B<[RFC 1035]> Mockapetris, P.: I. November 1987. =item B<[RFC 1738]> Berners-Lee, Tim, Masinter, L., McCahill, M.: I. December 1994. =item B<[RFC 2396]> Berners-Lee, Tim, Fielding, R., and Masinter, L.: I. August 1998. =item B<[RFC 2616]> Fielding, R., Gettys, J., Mogul, J., Frystyk, H., Masinter, L., Leach, P. and Berners-Lee, Tim: I. June 1999. =item B<[RFC 2806]> Vaha-Sipila, A.: I. April 2000. =back =head1 SEE ALSO L for a general description of how to use this interface. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. For a start, there are many common regexes missing. Send them in to I. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut list.pm000064400000010216147635103360006065 0ustar00package Regexp::Common::list; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; sub gen_list_pattern { my ($pat, $sep, $lsep) = @_; $lsep = $sep unless defined $lsep; return "(?k:(?:(?:$pat)(?:$sep))*(?:$pat)(?k:$lsep)(?:$pat))"; } my $defpat = '.*?\S'; my $defsep = '\s*,\s*'; pattern name => ['list', "-pat=$defpat", "-sep=$defsep", '-lastsep'], create => sub {gen_list_pattern (@{$_[1]}{-pat, -sep, -lastsep})}, ; pattern name => ['list', 'conj', '-word=(?:and|or)'], create => sub {gen_list_pattern($defpat, $defsep, '\s*,?\s*'.$_[1]->{-word}.'\s*'); }, ; pattern name => ['list', 'and'], create => sub {gen_list_pattern ($defpat, $defsep, '\s*,?\s*and\s*')}, ; pattern name => ['list', 'or'], create => sub {gen_list_pattern ($defpat, $defsep, '\s*,?\s*or\s*')}, ; 1; __END__ =pod =head1 NAME Regexp::Common::list -- provide regexes for lists =head1 SYNOPSIS use Regexp::Common qw /list/; while (<>) { /$RE{list}{-pat => '\w+'}/ and print "List of words"; /$RE{list}{-pat => $RE{num}{real}}/ and print "List of numbers"; } =head1 DESCRIPTION Please consult the manual of L for a general description of the works of this interface. Do not use this module directly, but load it via I. =head2 C<$RE{list}{-pat}{-sep}{-lastsep}> Returns a pattern matching a list of (at least two) substrings. If C<-pat=I

> is specified, it defines the pattern for each substring in the list. By default, I

is C. In Regexp::Common 0.02 or earlier, the default pattern was C. But that will match a single space, causing unintended parsing of C as a list of four elements instead of 3 (with C<-word> being C<(?:and)>). One consequence is that a list of the form "a,,b" will no longer be parsed. Use the pattern C to be able to parse this, but see the previous remark. If C<-sep=I

> is specified, it defines the pattern I

to be used as a separator between each pair of substrings in the list, except the final two. By default I

is C. If C<-lastsep=I

> is specified, it defines the pattern I

to be used as a separator between the final two substrings in the list. By default I

is the same as the pattern specified by the C<-sep> flag. For example: $RE{list}{-pat=>'\w+'} # match a list of word chars $RE{list}{-pat=>$RE{num}{real}} # match a list of numbers $RE{list}{-sep=>"\t"} # match a tab-separated list $RE{list}{-lastsep=>',\s+and\s+'} # match a proper English list Under C<-keep>: =over 4 =item $1 captures the entire list =item $2 captures the last separator =back =head2 C<$RE{list}{conj}{-word=I}> An alias for C<< $RE{list}{-lastsep=>'\s*,?\s*I\s*'} >> If C<-word> is not specified, the default pattern is C. For example: $RE{list}{conj}{-word=>'et'} # match Jean, Paul, et Satre $RE{list}{conj}{-word=>'oder'} # match Bonn, Koln oder Hamburg =head2 C<$RE{list}{and}> An alias for C<< $RE{list}{conj}{-word=>'and'} >> =head2 C<$RE{list}{or}> An alias for C<< $RE{list}{conj}{-word=>'or'} >> =head1 SEE ALSO L for a general description of how to use this interface. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. For a start, there are many common regexes missing. Send them in to I. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut whitespace.pm000064400000003633147635103360007253 0ustar00package Regexp::Common::whitespace; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; pattern name => [qw (ws crop)], create => '(?:^\s+|\s+$)', subs => sub {$_[1] =~ s/^\s+//; $_[1] =~ s/\s+$//;} ; 1; __END__ =pod =head1 NAME Regexp::Common::whitespace -- provides a regex for leading or trailing whitescape =head1 SYNOPSIS use Regexp::Common qw /whitespace/; while (<>) { s/$RE{ws}{crop}//g; # Delete surrounding whitespace } =head1 DESCRIPTION Please consult the manual of L for a general description of the works of this interface. Do not use this module directly, but load it via I. =head2 C<$RE{ws}{crop}> Returns a pattern that identifies leading or trailing whitespace. For example: $str =~ s/$RE{ws}{crop}//g; # Delete surrounding whitespace The call: $RE{ws}{crop}->subs($str); is optimized (but probably still slower than doing the s///g explicitly). This pattern does not capture under C<-keep>. =head1 SEE ALSO L for a general description of how to use this interface. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. For a start, there are many common regexes missing. Send them in to I. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut comment.pm000064400000075527147635103360006574 0ustar00package Regexp::Common::comment; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my @generic = ( {languages => [qw /ABC Forth/], to_eol => ['\\\\']}, # This is for just a *single* backslash. {languages => [qw /Ada Alan Eiffel lua/], to_eol => ['--']}, {languages => [qw /Advisor/], to_eol => ['#|//']}, {languages => [qw /Advsys CQL Lisp LOGO M MUMPS REBOL Scheme SMITH zonefile/], to_eol => [';']}, {languages => ['Algol 60'], from_to => [[qw /comment ;/]]}, {languages => [qw {ALPACA B C C-- LPC PL/I}], from_to => [[qw {/* */}]]}, {languages => [qw /awk fvwm2 Icon m4 mutt Perl Python QML R Ruby shell Tcl/], to_eol => ['#']}, {languages => [[BASIC => 'mvEnterprise']], to_eol => ['[*!]|REM']}, {languages => [qw /Befunge-98 Funge-98 Shelta/], id => [';']}, {languages => ['beta-Juliet', 'Crystal Report', 'Portia', 'Ubercode'], to_eol => ['//']}, {languages => ['BML'], from_to => [['']], }, {languages => [qw /C++/, 'C#', qw /Cg ECMAScript FPL Java JavaScript/], to_eol => ['//'], from_to => [[qw {/* */}]]}, {languages => [qw /CLU LaTeX slrn TeX/], to_eol => ['%']}, {languages => [qw /False/], from_to => [[qw !{ }!]]}, {languages => [qw /Fortran/], to_eol => ['!']}, {languages => [qw /Haifu/], id => [',']}, {languages => [qw /ILLGOL/], to_eol => ['NB']}, {languages => [qw /INTERCAL/], to_eol => [q{(?:(?:PLEASE(?:\s+DO)?|DO)\s+)?(?:NOT|N'T)}]}, {languages => [qw /J/], to_eol => ['NB[.]']}, {languages => [qw /JavaDoc/], from_to => [[qw {/** */}]]}, {languages => [qw /Nickle/], to_eol => ['#'], from_to => [[qw {/* */}]]}, {languages => [qw /Oberon/], from_to => [[qw /(* *)/]]}, {languages => [[qw /Pascal Delphi/], [qw /Pascal Free/], [qw /Pascal GPC/]], to_eol => ['//'], from_to => [[qw !{ }!], [qw !(* *)!]]}, {languages => [[qw /Pascal Workshop/]], id => [qw /"/], from_to => [[qw !{ }!], [qw !(* *)!], [qw !/* */!]]}, {languages => [qw /PEARL/], to_eol => ['!'], from_to => [[qw {/* */}]]}, {languages => [qw /PHP/], to_eol => ['#', '//'], from_to => [[qw {/* */}]]}, {languages => [qw !PL/B!], to_eol => ['[.;]']}, {languages => [qw !PL/SQL!], to_eol => ['--'], from_to => [[qw {/* */}]]}, {languages => [qw /Q-BAL/], to_eol => ['`']}, {languages => [qw /Smalltalk/], id => ['"']}, {languages => [qw /SQL/], to_eol => ['-{2,}']}, {languages => [qw /troff/], to_eol => ['\\\"']}, {languages => [qw /vi/], to_eol => ['"']}, {languages => [qw /*W/], from_to => [[qw {|| !!}]]}, {languages => [qw /ZZT-OOP/], to_eol => ["'"]}, ); my @plain_or_nested = ( [Caml => undef, "(*" => "*)"], [Dylan => "//", "/*" => "*/"], [Haskell => "-{2,}", "{-" => "-}"], [Hugo => "!(?!\\\\)", "!\\" => "\\!"], [SLIDE => "#", "(*" => "*)"], ['Modula-2' => undef, "(*" => "*)"], ['Modula-3' => undef, "(*" => "*)"], ); # # Helper subs. # sub combine { local $_ = join "|", @_; if (@_ > 1) { s/\(\?k:/(?:/g; $_ = "(?k:$_)"; } $_ } sub to_eol ($) {"(?k:(?k:$_[0])(?k:[^\\n]*)(?k:\\n))"} sub id ($) {"(?k:(?k:$_[0])(?k:[^$_[0]]*)(?k:$_[0]))"} # One char only! sub from_to { my ($begin, $end) = @_; my $qb = quotemeta $begin; my $qe = quotemeta $end; my $fe = quotemeta substr $end => 0, 1; my $te = quotemeta substr $end => 1; "(?k:(?k:$qb)(?k:(?:[^$fe]+|$fe(?!$te))*)(?k:$qe))"; } my $count = 0; sub nested { my ($begin, $end) = @_; $count ++; my $r = '(??{$Regexp::Common::comment ['. $count . ']})'; my $qb = quotemeta $begin; my $qe = quotemeta $end; my $fb = quotemeta substr $begin => 0, 1; my $fe = quotemeta substr $end => 0, 1; my $tb = quotemeta substr $begin => 1; my $te = quotemeta substr $end => 1; use re 'eval'; my $re; if ($fb eq $fe) { $re = qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/; } else { local $" = "|"; my @clauses = "(?>[^$fb$fe]+)"; push @clauses => "$fb(?!$tb)" if length $tb; push @clauses => "$fe(?!$te)" if length $te; push @clauses => $r; $re = qr /(?:$qb(?:@clauses)*$qe)/; } $Regexp::Common::comment [$count] = qr/$re/; } # # Process data. # foreach my $info (@plain_or_nested) { my ($language, $mark, $begin, $end) = @$info; pattern name => [comment => $language], create => sub {my $re = nested $begin => $end; my $prefix = defined $mark ? $mark . "[^\n]*\n|" : ""; exists $_ [1] -> {-keep} ? qr /($prefix$re)/ : qr /$prefix$re/ }, version => 5.006, ; } foreach my $group (@generic) { my $pattern = combine +(map {to_eol $_} @{$group -> {to_eol}}), (map {from_to @$_} @{$group -> {from_to}}), (map {id $_} @{$group -> {id}}), ; foreach my $language (@{$group -> {languages}}) { pattern name => [comment => ref $language ? @$language : $language], create => $pattern, ; } } # # Other languages. # # http://www.pascal-central.com/docs/iso10206.txt pattern name => [qw /comment Pascal/], create => '(?k:' . '(?k:[{]|[(][*])' . '(?k:[^}*]*(?:[*](?![)])[^}*]*)*)' . '(?k:[}]|[*][)])' . ')' ; # http://www.templetons.com/brad/alice/language/ pattern name => [qw /comment Pascal Alice/], create => '(?k:(?k:[{])(?k:[^}\n]*)(?k:[}]))' ; # http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt pattern name => [qw (comment), 'Algol 68'], create => q {(?k:(?:#[^#]*#)|} . q {(?:\bco\b(?:[^c]+|\Bc|\bc(?!o\b))*\bco\b)|} . q {(?:\bcomment\b(?:[^c]+|\Bc|\bc(?!omment\b))*\bcomment\b))} ; # See rules 91 and 92 of ISO 8879 (SGML). # Charles F. Goldfarb: "The SGML Handbook". # Oxford: Oxford University Press. 1990. ISBN 0-19-853737-9. # Ch. 10.3, pp 390. pattern name => [qw (comment HTML)], create => q {(?k:(?k:))}, ; pattern name => [qw /comment SQL MySQL/], create => q {(?k:(?:#|-- )[^\n]*\n|} . q {/\*(?:(?>[^*;"']+)|"[^"]*"|'[^']*'|\*(?!/))*(?:;|\*/))}, ; # Anything that isn't <>[]+-., # http://home.wxs.nl/~faase009/Ha_BF.html pattern name => [qw /comment Brainfuck/], create => '(?k:[^<>\[\]+\-.,]+)' ; # Squeak is a variant of Smalltalk-80. # http://www.squeak. # http://mucow.com/squeak-qref.html pattern name => [qw /comment Squeak/], create => '(?k:(?k:")(?k:[^"]*(?:""[^"]*)*)(?k:"))' ; # # Scores of less than 5 or above 17.... # http://www.cliff.biffle.org/esoterica/beatnik.html @Regexp::Common::comment::scores = (1, 3, 3, 2, 1, 4, 2, 4, 1, 8, 5, 1, 3, 1, 1, 3, 10, 1, 1, 1, 1, 4, 4, 8, 4, 10); { my ($s, $x); pattern name => [qw /comment Beatnik/], create => sub { use re 'eval'; my $re = qr {\b([A-Za-z]+)\b (?(?{($s, $x) = (0, lc $^N); $s += $Regexp::Common::comment::scores [ord (chop $x) - ord ('a')] while length $x; $s >= 5 && $s < 18})XXX|)}x; $re; }, version => 5.008, ; } # http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/ # (Goto table of contents/3.3 Source Form) # Fortran, in fixed format. Comments start with a C, c or * in the first # column, or a ! anywhere, but the sixth column. Then end with a newline. pattern name => [qw /comment Fortran fixed/], create => '(?k:(?k:(?:^[Cc*]|(? [qw /comment COBOL/], create => '(?<=^......)(?k:(?k:[*])(?k:[^\n]*)(?k:\n))', version => '5.008', ; 1; __END__ =pod =head1 NAME Regexp::Common::comment -- provide regexes for comments. =head1 SYNOPSIS use Regexp::Common qw /comment/; while (<>) { /$RE{comment}{C}/ and print "Contains a C comment\n"; /$RE{comment}{C++}/ and print "Contains a C++ comment\n"; /$RE{comment}{PHP}/ and print "Contains a PHP comment\n"; /$RE{comment}{Java}/ and print "Contains a Java comment\n"; /$RE{comment}{Perl}/ and print "Contains a Perl comment\n"; /$RE{comment}{awk}/ and print "Contains an awk comment\n"; /$RE{comment}{HTML}/ and print "Contains an HTML comment\n"; } use Regexp::Common qw /comment RE_comment_HTML/; while (<>) { $_ =~ RE_comment_HTML() and print "Contains an HTML comment\n"; } =head1 DESCRIPTION Please consult the manual of L for a general description of the works of this interface. Do not use this module directly, but load it via I. This modules gives you regular expressions for comments in various languages. =head2 THE LANGUAGES Below, the comments of each of the languages are described. The patterns are available as C<$RE{comment}{I}>, foreach language I. Some languages have variants; it's described at the individual languages how to get the patterns for the variants. Unless mentioned otherwise, C<{-keep}> sets C<$1>, C<$2>, C<$3> and C<$4> to the entire comment, the opening marker, the content of the comment, and the closing marker (for many languages, the latter is a newline) respectively. =over 4 =item ABC Comments in I start with a backslash (C<\>), and last till the end of the line. See L. =item Ada Comments in I start with C<-->, and last till the end of the line. =item Advisor I is a language used by the HP product I. Comments for this language start with either C<#> or C, and last till the end of the line. =item Advsys Comments for the I language start with C<;> and last till the end of the line. See also L. =item Alan I comments start with C<-->, and last till the end of the line. See also L. =item Algol 60 Comments in the I language start with the keyword C, and end with a C<;>. See L. =item Algol 68 In I, comments are either delimited by C<#>, or by one of the keywords C or C. The keywords should not be part of another word. See L. With C<{-keep}>, only C<$1> will be set, returning the entire comment. =item ALPACA The I language has comments starting with C and ending with C<*/>. =item awk The I programming language uses comments that start with C<#> and end at the end of the line. =item B The I language has comments starting with C and ending with C<*/>. =item BASIC There are various forms of BASIC around. Currently, we only support the variant supported by I, whose pattern is available as C<$RE{comment}{BASIC}{mvEnterprise}>. Comments in this language start with a C, a C<*> or the keyword C, and end till the end of the line. See L. =item Beatnik The esotoric language I only uses words consisting of letters. Words are scored according to the rules of Scrabble. Words scoring less than 5 points, or 18 points or more are considered comments (although the compiler might mock at you if you score less than 5 points). Regardless whether C<{-keep}>, C<$1> will be set, and set to the entire comment. This pattern requires I or newer. =item beta-Juliet The I programming language has comments that start with C and that continue till the end of the line. See also L. =item Befunge-98 The esotoric language I uses comments that start and end with a C<;>. See L. =item BML I, or I is an HTML templating language that uses comments starting with C<< >, and ending with C<< c_?> >>. See L. =item Brainfuck The minimal language I uses only eight characters, C>, C>, C<[>, C<]>, C<+>, C<->, C<.> and C<,>. Any other characters are considered comments. With C<{-keep}>, C<$1> is set to the entire comment. =item C The I language has comments starting with C and ending with C<*/>. =item C-- The I language has comments starting with C and ending with C<*/>. See L. =item C++ The I language has two forms of comments. Comments that start with C and last till the end of the line, and comments that start with C, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be set, and set to the entire comment. =item C# The I language has two forms of comments. Comments that start with C and last till the end of the line, and comments that start with C, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be set, and set to the entire comment. See L. =item Caml Comments in I start with C<(*>, end with C<*)>, and can be nested. See L and L. =item Cg The I language has two forms of comments. Comments that start with C and last till the end of the line, and comments that start with C, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be set, and set to the entire comment. See L. =item CLU In C, a comment starts with a procent sign (C<%>), and ends with the next newline. See L and L. =item COBOL Traditionally, comments in I are indicated by an asteriks in the seventh column. This is what the pattern matches. Modern compiler may more lenient though. See L, and L. Due to a bug in the regexp engine of perl 5.6.x, this regexp is only available in version 5.8.0 and up. =item CQL Comments in the chess query language (I) start with a semi colon (C<;>) and last till the end of the line. See L. =item Crystal Report The formula editor in I uses comments that start with C, and end with the end of the line. =item Dylan There are two types of comments in I. They either start with C, or are nested comments, delimited with C and C<*/>. Under C<{-keep}>, only C<$1> will be set, returning the entire comment. This pattern requires I or newer. =item ECMAScript The I language has two forms of comments. Comments that start with C and last till the end of the line, and comments that start with C, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be set, and set to the entire comment. I is Netscapes implementation of I. See L, and L. =item Eiffel I comments start with C<-->, and last till the end of the line. =item False In I, comments start with C<{> and end with C<}>. See L =item FPL The I language has two forms of comments. Comments that start with C and last till the end of the line, and comments that start with C, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be set, and set to the entire comment. =item Forth Comments in Forth start with C<\>, and end with the end of the line. See also L. =item Fortran There are two forms of I. There's free form I, which has comments that start with C, and end at the end of the line. The pattern for this is given by C<$RE{Fortran}>. Fixed form I, which has been obsoleted, has comments that start with C, C or C<*> in the first column, or with C anywhere, but the sixth column. The pattern for this are given by C<$RE{Fortran}{fixed}>. See also L. =item Funge-98 The esotoric language I uses comments that start and end with a C<;>. =item fvwm2 Configuration files for I have comments starting with a C<#> and lasting the rest of the line. =item Haifu I, an esotoric language using haikus, has comments starting and ending with a C<,>. See L. =item Haskell There are two types of comments in I. They either start with at least two dashes, or are nested comments, delimited with C<{-> and C<-}>. Under C<{-keep}>, only C<$1> will be set, returning the entire comment. This pattern requires I or newer. =item HTML In I, comments only appear inside a I. A comment declaration starts with a C!>, and ends with a C>. Inside this declaration, we have zero or more comments. Comments starts with C<--> and end with C<-->, and are optionally followed by whitespace. The pattern C<$RE{comment}{HTML}> recognizes those comment declarations (and hence more than a comment). Note that this is not the same as something that starts with C!--> and ends with C<--E>, because the following will be matched completely: Second Comment Do not be fooled by what your favourite browser thinks is an HTML comment. If C<{-keep}> is used, the following are returned: =over 4 =item $1 captures the entire comment declaration. =item $2 captures the MDO (markup declaration open), C!>. =item $3 captures the content between the MDO and the MDC. =item $4 captures the (last) comment, without the surrounding dashes. =item $5 captures the MDC (markup declaration close), C>. =back =item Hugo There are two types of comments in I. They either start with C (which cannot be followed by a C<\>), or are nested comments, delimited with C and C<\!>. Under C<{-keep}>, only C<$1> will be set, returning the entire comment. This pattern requires I or newer. =item Icon I has comments that start with C<#> and end at the next new line. See L, L, and L. =item ILLGOL The esotoric language I uses comments starting with I and lasting till the end of the line. See L. =item INTERCAL Comments in INTERCAL are single line comments. They start with one of the keywords C or C, and can optionally be preceded by the keywords C and C. If both keywords are used, C precedes C. Keywords are separated by whitespace. =item J The language I uses comments that start with C, and that last till the end of the line. See L, and L. =item Java The I language has two forms of comments. Comments that start with C and last till the end of the line, and comments that start with C, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be set, and set to the entire comment. =item JavaDoc The I documentation syntax is demarked with a subset of ordinary Java comments to separate it from code. Comments start with C end with C<*/>. If C<{-keep}> is used, only C<$1> will be set, and set to the entire comment. See L. =item JavaScript The I language has two forms of comments. Comments that start with C and last till the end of the line, and comments that start with C, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be set, and set to the entire comment. I is Netscapes implementation of I. See L, and L. =item LaTeX The documentation language I uses comments starting with C<%> and ending at the end of the line. =item Lisp Comments in I start with a semi-colon (C<;>) and last till the end of the line. =item LPC The I language has comments starting with C and ending with C<*/>. =item LOGO Comments for the language I start with C<;>, and last till the end of the line. =item lua Comments for the I language start with C<-->, and last till the end of the line. See also L. =item M, MUMPS In C (aka C), comments start with a semi-colon, and last till the end of a line. The language specification requires the semi-colon to be preceded by one or more Is. Those characters default to a space, but that's configurable. This requirement, of preceding the comment with linestart characters is B tested for. See L, L, and L. =item m4 By default, the preprocessor language I uses single line comments, that start with a C<#> and continue to the end of the line, including the newline. The pattern C<$RE {comment} {m4}> matches such comments. In I, it is possible to change the starting token though. See L, L, and L. =item Modula-2 In C, comments start with C<(*>, and end with C<*)>. Comments may be nested. See L. =item Modula-3 In C, comments start with C<(*>, and end with C<*)>. Comments may be nested. See L. =item mutt Configuration files for I have comments starting with a C<#> and lasting the rest of the line. =item Nickle The I language has one line comments starting with C<#> (like Perl), or multiline comments delimited by C and C<*/> (like C). Under C<-keep>, only C<$1> will be set. See also L. =item Oberon Comments in I start with C<(*> and end with C<*)>. See L. =item Pascal There are many implementations of Pascal. This modules provides pattern for comments of several implementations. =over 4 =item C<$RE{comment}{Pascal}> This is the pattern that recognizes comments according to the Pascal ISO standard. This standard says that comments start with either C<{>, or C<(*>, and end with C<}> or C<*)>. This means that C<{*)> and C<(*}> are considered to be comments. Many Pascal applications don't allow this. See L =item C<$RE{comment}{Pascal}{Alice}> The I compiler accepts comments that start with C<{> and end with C<}>. Comments are not allowed to contain newlines. See L. =item C<$RE{comment}{Pascal}{Delphi}>, C<$RE{comment}{Pascal}{Free}> and C<$RE{comment}{Pascal}{GPC}> The I, I and the I implementations of Pascal all have comments that either start with C and last till the end of the line, are delimited with C<{> and C<}> or are delimited with C<(*> and C<*)>. Patterns for those comments are given by C<$RE{comment}{Pascal}{Delphi}>, C<$RE{comment}{Pascal}{Free}> and C<$RE{comment}{Pascal}{GPC}> respectively. These patterns only set C<$1> when C<{-keep}> is used, which will then include the entire comment. See L, L and L. =item C<$RE{comment}{Pascal}{Workshop}> The I compiler, from SUN Microsystems, allows comments that are delimited with either C<{> and C<}>, delimited with C<(*)> and C<*>), delimited with C, and C<*/>, or starting and ending with a double quote (C<">). When C<{-keep}> is used, only C<$1> is set, and returns the entire comment. See L. =back =item PEARL Comments in I start with a C and last till the end of the line, or start with C and end with C<*/>. With C<{-keep}>, C<$1> will be set to the entire comment. =item PHP Comments in I start with either C<#> or C and last till the end of the line, or are delimited by C and C<*/>. With C<{-keep}>, C<$1> will be set to the entire comment. =item PL/B In I, comments start with either C<.> or C<;>, and end with the next newline. See L. =item PL/I The I language has comments starting with C and ending with C<*/>. =item PL/SQL In I, comments either start with C<--> and run till the end of the line, or start with C and end with C<*/>. =item Perl I uses comments that start with a C<#>, and continue till the end of the line. =item Portia The I programming language has comments that start with C, and last till the end of the line. =item Python I uses comments that start with a C<#>, and continue till the end of the line. =item Q-BAL Comments in the I language start with C<`> (a backtick), and contine till the end of the line. =item QML In C, comments start with C<#> and last till the end of the line. See L. =item R The statistical language I uses comments that start with a C<#> and end with the following new line. See L. =item REBOL Comments for the I language start with C<;> and last till the end of the line. =item Ruby Comments in I start with C<#> and last till the end of the time. =item Scheme I comments start with C<;>, and last till the end of the line. See L. =item shell Comments in various Is start with a C<#> and end at the end of the line. =item Shelta The esotoric language I uses comments that start and end with a C<;>. See L. =item SLIDE The I language has two froms of comments. First there is the line comment, which starts with a C<#> and includes the rest of the line (just like Perl). Second, there is the multiline, nested comment, which are delimited by C<(*> and C<*)>. Under C{-keep}>, only C<$1> is set, and is set to the entire comment. This pattern needs at least Perl version 5.6.0. See L. =item slrn Configuration files for I have comments starting with a C<%> and lasting the rest of the line. =item Smalltalk I uses comments that start and end with a double quote, C<">. =item SMITH Comments in the I language start with C<;>, and last till the end of the line. =item Squeak In the Smalltalk variant I, comments start and end with C<">. Double quotes can appear inside comments by doubling them. =item SQL Standard I uses comments starting with two or more dashes, and ending at the end of the line. I does not follow the standard. Instead, it allows comments that start with a C<#> or C<-- > (that's two dashes and a space) ending with the following newline, and comments starting with C, and ending with the next C<;> or C<*/> that isn't inside single or double quotes. A pattern for this is returned by C<$RE{comment}{SQL}{MySQL}>. With C<{-keep}>, only C<$1> will be set, and it returns the entire comment. =item Tcl In I, comments start with C<#> and continue till the end of the line. =item TeX The documentation language I uses comments starting with C<%> and ending at the end of the line. =item troff The document formatting language I uses comments starting with C<\">, and continuing till the end of the line. =item Ubercode The Windows programming language I uses comments that start with C and continue to the end of the line. See L. =item vi In configuration files for the editor I, one can use comments starting with C<">, and ending at the end of the line. =item *W In the language I<*W>, comments start with C<||>, and end with C. =item zonefile Comments in DNS Is start with C<;>, and continue till the end of the line. =item ZZT-OOP The in-game language I uses comments that start with a C<'> character, and end at the following newline. See L. =back =head1 REFERENCES =over 4 =item B<[Go 90]> Charles F. Goldfarb: I. Oxford: Oxford University Press. B<1990>. ISBN 0-19-853737-9. Ch. 10.3, pp 390-391. =back =head1 SEE ALSO L for a general description of how to use this interface. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. For a start, there are many common regexes missing. Send them in to I. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/file.pm000064400000004057147635103360006476 0ustar00package Regexp::Common::URI::file; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI qw /register_uri/; use Regexp::Common::URI::RFC1738 qw /$host $fpath/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $scheme = 'file'; my $uri = "(?k:(?k:$scheme)://(?k:(?k:(?:$host|localhost)?)" . "(?k:/(?k:$fpath))))"; register_uri $scheme => $uri; pattern name => [qw (URI file)], create => $uri, ; 1; __END__ =pod =head1 NAME Regexp::Common::URI::file -- Returns a pattern for file URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{file}/ and print "Contains a file URI.\n"; } =head1 DESCRIPTION =head2 $RE{URI}{file} Returns a pattern that matches I URIs, as defined by RFC 1738. File URIs have the form: "file:" "//" [ host | "localhost" ] "/" fpath Under C<{-keep}>, the following are returned: =over 4 =item $1 The complete URI. =item $2 The scheme. =item $3 The part of the URI following "file://". =item $4 The hostname. =item $5 The path name, including the leading slash. =item $6 The path name, without the leading slash. =back =head1 REFERENCES =over 4 =item B<[RFC 1738]> Berners-Lee, Tim, Masinter, L., McCahill, M.: I. December 1994. =back =head1 SEE ALSO L for other supported URIs. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/http.pm000064400000005553147635103360006540 0ustar00package Regexp::Common::URI::http; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI qw /register_uri/; use Regexp::Common::URI::RFC2396 qw /$host $port $path_segments $query/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $http_uri = "(?k:(?k:http)://(?k:$host)(?::(?k:$port))?" . "(?k:/(?k:(?k:$path_segments)(?:[?](?k:$query))?))?)"; my $https_uri = $http_uri; $https_uri =~ s/http/https?/; register_uri HTTP => $https_uri; pattern name => [qw (URI HTTP), "-scheme=http"], create => sub { my $scheme = $_ [1] -> {-scheme}; my $uri = $http_uri; $uri =~ s/http/$scheme/; $uri; } ; 1; __END__ =pod =head1 NAME Regexp::Common::URI::http -- Returns a pattern for HTTP URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{HTTP}/ and print "Contains an HTTP URI.\n"; } =head1 DESCRIPTION =head2 $RE{URI}{HTTP}{-scheme} Provides a regex for an HTTP URI as defined by RFC 2396 (generic syntax) and RFC 2616 (HTTP). If C<< -scheme => I

>> is specified the pattern I

is used as the scheme. By default I

is C. C and C are reasonable alternatives. The syntax for an HTTP URI is: "http:" "//" host [ ":" port ] [ "/" path [ "?" query ]] Under C<{-keep}>, the following are returned: =over 4 =item $1 The entire URI. =item $2 The scheme. =item $3 The host (name or address). =item $4 The port (if any). =item $5 The absolute path, including the query and leading slash. =item $6 The absolute path, including the query, without the leading slash. =item $7 The absolute path, without the query or leading slash. =item $8 The query, without the question mark. =back =head1 REFERENCES =over 4 =item B<[RFC 2396]> Berners-Lee, Tim, Fielding, R., and Masinter, L.: I. August 1998. =item B<[RFC 2616]> Fielding, R., Gettys, J., Mogul, J., Frystyk, H., Masinter, L., Leach, P. and Berners-Lee, Tim: I. June 1999. =back =head1 SEE ALSO L for other supported URIs. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/RFC2396.pm000064400000012076147635103360006515 0ustar00package Regexp::Common::URI::RFC2396; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; use vars qw /@EXPORT @EXPORT_OK %EXPORT_TAGS @ISA/; use Exporter (); @ISA = qw /Exporter/; my %vars; BEGIN { $vars {low} = [qw /$digit $upalpha $lowalpha $alpha $alphanum $hex $escaped $mark $unreserved $reserved $pchar $uric $urics $userinfo $userinfo_no_colon $uric_no_slash/]; $vars {parts} = [qw /$query $fragment $param $segment $path_segments $ftp_segments $rel_segment $abs_path $rel_path $path/]; $vars {connect} = [qw /$port $IPv4address $toplabel $domainlabel $hostname $host $hostport $server $reg_name $authority/]; $vars {URI} = [qw /$scheme $net_path $opaque_part $hier_part $relativeURI $absoluteURI $URI_reference/]; } use vars map {@$_} values %vars; @EXPORT = (); @EXPORT_OK = map {@$_} values %vars; %EXPORT_TAGS = (%vars, ALL => [@EXPORT_OK]); # RFC 2396, base definitions. $digit = '[0-9]'; $upalpha = '[A-Z]'; $lowalpha = '[a-z]'; $alpha = '[a-zA-Z]'; # lowalpha | upalpha $alphanum = '[a-zA-Z0-9]'; # alpha | digit $hex = '[a-fA-F0-9]'; $escaped = "(?:%$hex$hex)"; $mark = "[\\-_.!~*'()]"; $unreserved = "[a-zA-Z0-9\\-_.!~*'()]"; # alphanum | mark # %61-%7A, %41-%5A, %30-%39 # a - z A - Z 0 - 9 # %21, %27, %28, %29, %2A, %2D, %2E, %5F, %7E # ! ' ( ) * - . _ ~ $reserved = "[;/?:@&=+\$,]"; $pchar = "(?:[a-zA-Z0-9\\-_.!~*'():\@&=+\$,]|$escaped)"; # unreserved | escaped | [:@&=+$,] $uric = "(?:[;/?:\@&=+\$,a-zA-Z0-9\\-_.!~*'()]|$escaped)"; # reserved | unreserved | escaped $urics = "(?:(?:[;/?:\@&=+\$,a-zA-Z0-9\\-_.!~*'()]+|" . "$escaped)*)"; $query = $urics; $fragment = $urics; $param = "(?:(?:[a-zA-Z0-9\\-_.!~*'():\@&=+\$,]+|$escaped)*)"; $segment = "(?:$param(?:;$param)*)"; $path_segments = "(?:$segment(?:/$segment)*)"; $ftp_segments = "(?:$param(?:/$param)*)"; # NOT from RFC 2396. $rel_segment = "(?:(?:[a-zA-Z0-9\\-_.!~*'();\@&=+\$,]*|$escaped)+)"; $abs_path = "(?:/$path_segments)"; $rel_path = "(?:$rel_segment(?:$abs_path)?)"; $path = "(?:(?:$abs_path|$rel_path)?)"; $port = "(?:$digit*)"; $IPv4address = "(?:$digit+[.]$digit+[.]$digit+[.]$digit+)"; $toplabel = "(?:$alpha"."[-a-zA-Z0-9]*$alphanum|$alpha)"; $domainlabel = "(?:(?:$alphanum"."[-a-zA-Z0-9]*)?$alphanum)"; $hostname = "(?:(?:$domainlabel\[.])*$toplabel\[.]?)"; $host = "(?:$hostname|$IPv4address)"; $hostport = "(?:$host(?::$port)?)"; $userinfo = "(?:(?:[a-zA-Z0-9\\-_.!~*'();:&=+\$,]+|$escaped)*)"; $userinfo_no_colon = "(?:(?:[a-zA-Z0-9\\-_.!~*'();&=+\$,]+|$escaped)*)"; $server = "(?:(?:$userinfo\@)?$hostport)"; $reg_name = "(?:(?:[a-zA-Z0-9\\-_.!~*'()\$,;:\@&=+]*|$escaped)+)"; $authority = "(?:$server|$reg_name)"; $scheme = "(?:$alpha"."[a-zA-Z0-9+\\-.]*)"; $net_path = "(?://$authority$abs_path?)"; $uric_no_slash = "(?:[a-zA-Z0-9\\-_.!~*'();?:\@&=+\$,]|$escaped)"; $opaque_part = "(?:$uric_no_slash$urics)"; $hier_part = "(?:(?:$net_path|$abs_path)(?:[?]$query)?)"; $relativeURI = "(?:(?:$net_path|$abs_path|$rel_path)(?:[?]$query)?"; $absoluteURI = "(?:$scheme:(?:$hier_part|$opaque_part))"; $URI_reference = "(?:(?:$absoluteURI|$relativeURI)?(?:#$fragment)?)"; 1; __END__ =pod =head1 NAME Regexp::Common::URI::RFC2396 -- Definitions from RFC2396; =head1 SYNOPSIS use Regexp::Common::URI::RFC2396 qw /:ALL/; =head1 DESCRIPTION This package exports definitions from RFC2396. It's intended usage is for Regexp::Common::URI submodules only. Its interface might change without notice. =head1 REFERENCES =over 4 =item B<[RFC 2396]> Berners-Lee, Tim, Fielding, R., and Masinter, L.: I. August 1998. =back =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/prospero.pm000064400000004172147635103360007426 0ustar00package Regexp::Common::URI::prospero; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI qw /register_uri/; use Regexp::Common::URI::RFC1738 qw /$host $port $ppath $fieldname $fieldvalue $fieldspec/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $scheme = 'prospero'; my $uri = "(?k:(?k:$scheme)://(?k:$host)(?::(?k:$port))?" . "/(?k:$ppath)(?k:$fieldspec*))"; register_uri $scheme => $uri; pattern name => [qw (URI prospero)], create => $uri, ; 1; __END__ =pod =head1 NAME Regexp::Common::URI::prospero -- Returns a pattern for prospero URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{prospero}/ and print "Contains a prospero URI.\n"; } =head1 DESCRIPTION =head2 $RE{URI}{prospero} Returns a pattern that matches I URIs, as defined by RFC 1738. prospero URIs have the form: "prospero:" "//" host [ ":" port ] "/" path [ fieldspec ] * Under C<{-keep}>, the following are returned: =over 4 =item $1 The complete URI. =item $2 The I. =item $3 The I. =item $4 The I, if given. =item $5 The propero path. =item $6 The field specifications, if given. There can be more field specifications; they will all be returned in C<$6>. =back =head1 REFERENCES =over 4 =item B<[RFC 1738]> Berners-Lee, Tim, Masinter, L., McCahill, M.: I. December 1994. =back =head1 SEE ALSO L for other supported URIs. =head1 AUTHOR Abigail. (I). =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/fax.pm000064400000005324147635103360006333 0ustar00package Regexp::Common::URI::fax; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI qw /register_uri/; use Regexp::Common::URI::RFC2806 qw /$fax_subscriber $fax_subscriber_no_future/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $fax_scheme = 'fax'; my $fax_uri = "(?k:(?k:$fax_scheme):(?k:$fax_subscriber))"; my $fax_uri_nf = "(?k:(?k:$fax_scheme):(?k:$fax_subscriber_no_future))"; register_uri $fax_scheme => $fax_uri; pattern name => [qw (URI fax)], create => $fax_uri ; pattern name => [qw (URI fax nofuture)], create => $fax_uri_nf ; 1; __END__ =pod =head1 NAME Regexp::Common::URI::fax -- Returns a pattern for fax URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{fax}/ and print "Contains a fax URI.\n"; } =head1 DESCRIPTION =head2 $RE{URI}{fax} Returns a pattern that matches I URIs, as defined by RFC 2806. Under C<{-keep}>, the following are returned: =over 4 =item $1 The complete URI. =item $2 The scheme. =item $3 The phone number, including any possible add-ons like ISDN subaddress, a post dial part, area specifier, service provider, etc. =back =head2 C<$RE{URI}{fax}{nofuture}> As above (including what's returned by C<{-keep}>), with the exception that I are not allowed. Without allowing those I, it becomes much easier to check a URI if the correct syntax for post dial, service provider, phone context, etc has been used - otherwise the regex could always classify them as a I. =head1 REFERENCES =over 4 =item B<[RFC 1035]> Mockapetris, P.: I. November 1987. =item B<[RFC 2396]> Berners-Lee, Tim, Fielding, R., and Masinter, L.: I. August 1998. =item B<[RFC 2806]> Vaha-Sipila, A.: I. April 2000. =back =head1 SEE ALSO L for other supported URIs. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/tel.pm000064400000005370147635103360006342 0ustar00package Regexp::Common::URI::tel; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI qw /register_uri/; use Regexp::Common::URI::RFC2806 qw /$telephone_subscriber $telephone_subscriber_no_future/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $tel_scheme = 'tel'; my $tel_uri = "(?k:(?k:$tel_scheme):(?k:$telephone_subscriber))"; my $tel_uri_nf = "(?k:(?k:$tel_scheme):(?k:$telephone_subscriber_no_future))"; register_uri $tel_scheme => $tel_uri; pattern name => [qw (URI tel)], create => $tel_uri ; pattern name => [qw (URI tel nofuture)], create => $tel_uri_nf ; 1; __END__ =pod =head1 NAME Regexp::Common::URI::tel -- Returns a pattern for telephone URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{tel}/ and print "Contains a telephone URI.\n"; } =head1 DESCRIPTION =head2 $RE{URI}{tel} Returns a pattern that matches I URIs, as defined by RFC 2806. Under C<{-keep}>, the following are returned: =over 4 =item $1 The complete URI. =item $2 The scheme. =item $3 The phone number, including any possible add-ons like ISDN subaddress, a post dial part, area specifier, service provider, etc. =back =head2 C<$RE{URI}{tel}{nofuture}> As above (including what's returned by C<{-keep}>), with the exception that I are not allowed. Without allowing those I, it becomes much easier to check a URI if the correct syntax for post dial, service provider, phone context, etc has been used - otherwise the regex could always classify them as a I. =head1 REFERENCES =over 4 =item B<[RFC 1035]> Mockapetris, P.: I. November 1987. =item B<[RFC 2396]> Berners-Lee, Tim, Fielding, R., and Masinter, L.: I. August 1998. =item B<[RFC 2806]> Vaha-Sipila, A.: I. April 2000. =back =head1 SEE ALSO L for other supported URIs. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/RFC2806.pm000064400000017622147635103360006513 0ustar00package Regexp::Common::URI::RFC2806; use Regexp::Common::URI::RFC1035 qw /$domain/; use Regexp::Common::URI::RFC2396 qw /$unreserved $escaped $hex/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; use vars qw /@EXPORT @EXPORT_OK %EXPORT_TAGS @ISA/; use Exporter (); @ISA = qw /Exporter/; my %vars; BEGIN { $vars {low} = [qw /$dtmf_digit $wait_for_dial_tone $one_second_pause $pause_character $visual_separator $phonedigit $escaped_no_dquote $quoted_string $token_char $token_chars/]; $vars {parts} = [qw /$future_extension/]; $vars {connect} = [qw /$provider_hostname $provider_tag $service_provider $private_prefix $local_network_prefix $global_network_prefix $network_prefix/]; $vars {phone} = [qw /$phone_context_ident $phone_context_tag $area_specifier $post_dial $isdn_subaddress $t33_subaddress $local_phone_number $local_phone_number_no_future $base_phone_number $global_phone_number $global_phone_number_no_future $telephone_subscriber $telephone_subscriber_no_future/]; $vars {fax} = [qw /$fax_local_phone $fax_local_phone_no_future $fax_global_phone $fax_global_phone_no_future $fax_subscriber $fax_subscriber_no_future/]; $vars {modem} = [qw //]; } use vars map {@$_} values %vars; @EXPORT = (); @EXPORT_OK = map {@$_} values %vars; %EXPORT_TAGS = (%vars, ALL => [@EXPORT_OK]); # RFC 2806, URIs for tel, fax & modem. $dtmf_digit = "(?:[*#ABCD])"; $wait_for_dial_tone= "(?:w)"; $one_second_pause = "(?:p)"; $pause_character = "(?:[wp])"; # wait_for_dial_tone | one_second_pause. $visual_separator = "(?:[\\-.()])"; $phonedigit = "(?:[0-9\\-.()])"; # DIGIT | visual_separator $escaped_no_dquote = "(?:%(?:[01]$hex)|2[013-9A-Fa-f]|[3-9A-Fa-f]$hex)"; $quoted_string = "(?:%22(?:(?:%5C(?:$unreserved|$escaped))|" . "$unreserved+|$escaped_no_dquote)*%22)"; # It is unclear wether we can allow only unreserved # characters to unescaped, or can we also use uric # characters that are unescaped? Or pchars? $token_char = "(?:[!'*\\-.0-9A-Z_a-z~]|" . "%(?:2[13-7ABDEabde]|3[0-9]|4[1-9A-Fa-f]|" . "5[AEFaef]|6[0-9A-Fa-f]|7[0-9ACEace]))"; # Only allowing unreserved chars to be unescaped. $token_chars = "(?:(?:[!'*\\-.0-9A-Z_a-z~]+|" . "%(?:2[13-7ABDEabde]|3[0-9]|4[1-9A-Fa-f]|" . "5[AEFaef]|6[0-9A-Fa-f]|7[0-9ACEace]))*)"; $future_extension = "(?:;$token_chars" . "(?:=(?:(?:$token_chars(?:[?]$token_chars)?)|" . "$quoted_string))?)"; $provider_hostname = $domain; $provider_tag = "(?:tsp)"; $service_provider = "(?:;$provider_tag=$provider_hostname)"; $private_prefix = "(?:(?:[!'E-OQ-VX-Z_e-oq-vx-z~]|" . "(?:%(?:2[124-7CFcf]|3[AC-Fac-f]|4[05-9A-Fa-f]|" . "5[1-689A-Fa-f]|6[05-9A-Fa-f]|" . "7[1-689A-Ea-e])))" . "(?:[!'()*\\-.0-9A-Z_a-z~]+|" . "(?:%(?:2[1-9A-Fa-f]|3[AC-Fac-f]|" . "[4-6][0-9A-Fa-f]|7[0-9A-Ea-e])))*)"; $local_network_prefix = "(?:[0-9\\-.()*#ABCDwp]+)"; $global_network_prefix = "(?:[+][0-9\\-.()]+)"; $network_prefix = "(?:$global_network_prefix|$local_network_prefix)"; $phone_context_ident = "(?:$network_prefix|$private_prefix)"; $phone_context_tag = "(?:phone-context)"; $area_specifier = "(?:;$phone_context_tag=$phone_context_ident)"; $post_dial = "(?:;postd=[0-9\\-.()*#ABCDwp]+)"; $isdn_subaddress = "(?:;isub=[0-9\\-.()]+)"; $t33_subaddress = "(?:;tsub=[0-9\\-.()]+)"; $local_phone_number= "(?:[0-9\\-.()*#ABCDwp]+$isdn_subaddress?" . "$post_dial?$area_specifier" . "(?:$area_specifier|$service_provider|" . "$future_extension)*)"; $local_phone_number_no_future = "(?:[0-9\\-.()*#ABCDwp]+$isdn_subaddress?" . "$post_dial?$area_specifier" . "(?:$area_specifier|$service_provider)*)"; $fax_local_phone = "(?:[0-9\\-.()*#ABCDwp]+$isdn_subaddress?" . "$t33_subaddress?$post_dial?$area_specifier" . "(?:$area_specifier|$service_provider|" . "$future_extension)*)"; $fax_local_phone_no_future = "(?:[0-9\\-.()*#ABCDwp]+$isdn_subaddress?" . "$t33_subaddress?$post_dial?$area_specifier" . "(?:$area_specifier|$service_provider)*)"; $base_phone_number = "(?:[0-9\\-.()]+)"; $global_phone_number = "(?:[+]$base_phone_number$isdn_subaddress?" . "$post_dial?" . "(?:$area_specifier|$service_provider|" . "$future_extension)*)"; $global_phone_number_no_future = "(?:[+]$base_phone_number$isdn_subaddress?" . "$post_dial?" . "(?:$area_specifier|$service_provider)*)"; $fax_global_phone = "(?:[+]$base_phone_number$isdn_subaddress?" . "$t33_subaddress?$post_dial?" . "(?:$area_specifier|$service_provider|" . "$future_extension)*)"; $fax_global_phone_no_future = "(?:[+]$base_phone_number$isdn_subaddress?" . "$t33_subaddress?$post_dial?" . "(?:$area_specifier|$service_provider)*)"; $telephone_subscriber = "(?:$global_phone_number|$local_phone_number)"; $telephone_subscriber_no_future = "(?:$global_phone_number_no_future|" . "$local_phone_number_no_future)"; $fax_subscriber = "(?:$fax_global_phone|$fax_local_phone)"; $fax_subscriber_no_future = "(?:$fax_global_phone_no_future|" . "$fax_local_phone_no_future)"; 1; __END__ =pod =head1 NAME Regexp::Common::URI::RFC2806 -- Definitions from RFC2806; =head1 SYNOPSIS use Regexp::Common::URI::RFC2806 qw /:ALL/; =head1 DESCRIPTION This package exports definitions from RFC2806. It's intended usage is for Regexp::Common::URI submodules only. Its interface might change without notice. =head1 REFERENCES =over 4 =item B<[RFC 2616]> Fielding, R., Gettys, J., Mogul, J., Frystyk, H., Masinter, L., Leach, P. and Berners-Lee, Tim: I. June 1999. =back =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/RFC1738.pm000064400000011345147635103360006512 0ustar00package Regexp::Common::URI::RFC1738; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; use vars qw /@EXPORT @EXPORT_OK %EXPORT_TAGS @ISA/; use Exporter (); @ISA = qw /Exporter/; my %vars; BEGIN { $vars {low} = [qw /$digit $digits $hialpha $lowalpha $alpha $alphadigit $safe $extra $national $punctuation $unreserved $unreserved_range $reserved $uchar $uchars $xchar $xchars $hex $escape/]; $vars {connect} = [qw /$port $hostnumber $toplabel $domainlabel $hostname $host $hostport $user $password $login/]; $vars {parts} = [qw /$fsegment $fpath $group $article $grouppart $search $database $wtype $wpath $psegment $fieldname $fieldvalue $fieldspec $ppath/]; } use vars map {@$_} values %vars; @EXPORT = qw /$host/; @EXPORT_OK = map {@$_} values %vars; %EXPORT_TAGS = (%vars, ALL => [@EXPORT_OK]); # RFC 1738, base definitions. # Lowlevel definitions. $digit = '[0-9]'; $digits = '[0-9]+'; $hialpha = '[A-Z]'; $lowalpha = '[a-z]'; $alpha = '[a-zA-Z]'; # lowalpha | hialpha $alphadigit = '[a-zA-Z0-9]'; # alpha | digit $safe = '[-$_.+]'; $extra = "[!*'(),]"; $national = '[][{}|\\^~`]'; $punctuation = '[<>#%"]'; $unreserved_range = q [-a-zA-Z0-9$_.+!*'(),]; # alphadigit | safe | extra $unreserved = "[$unreserved_range]"; $reserved = '[;/?:@&=]'; $hex = '[a-fA-F0-9]'; $escape = "(?:%$hex$hex)"; $uchar = "(?:$unreserved|$escape)"; $uchars = "(?:(?:$unreserved|$escape)*)"; $xchar = "(?:[$unreserved_range;/?:\@&=]|$escape)"; $xchars = "(?:(?:[$unreserved_range;/?:\@&=]|$escape)*)"; # Connection related stuff. $port = "(?:$digits)"; $hostnumber = "(?:$digits\[.]$digits\[.]$digits\[.]$digits)"; $toplabel = "(?:$alpha\[-a-zA-Z0-9]*$alphadigit|$alpha)"; $domainlabel = "(?:(?:$alphadigit\[-a-zA-Z0-9]*)?$alphadigit)"; $hostname = "(?:(?:$domainlabel\[.])*$toplabel)"; $host = "(?:$hostname|$hostnumber)"; $hostport = "(?:$host(?::$port)?)"; $user = "(?:(?:[$unreserved_range;?&=]|$escape)*)"; $password = "(?:(?:[$unreserved_range;?&=]|$escape)*)"; $login = "(?:(?:$user(?::$password)?\@)?$hostport)"; # Parts (might require more if we add more URIs). # FTP/file $fsegment = "(?:(?:[$unreserved_range:\@&=]|$escape)*)"; $fpath = "(?:$fsegment(?:/$fsegment)*)"; # NNTP/news. $group = "(?:$alpha\[-A-Za-z0-9.+_]*)"; $article = "(?:(?:[$unreserved_range;/?:&=]|$escape)+" . '@' . "$host)"; $grouppart = "(?:[*]|$article|$group)"; # It's important that # $article goes before # $group. # WAIS. $search = "(?:(?:[$unreserved_range;:\@&=]|$escape)*)"; $database = $uchars; $wtype = $uchars; $wpath = $uchars; # prospero $psegment = "(?:(?:[$unreserved_range?:\@&=]|$escape)*)"; $fieldname = "(?:(?:[$unreserved_range?:\@&]|$escape)*)"; $fieldvalue = "(?:(?:[$unreserved_range?:\@&]|$escape)*)"; $fieldspec = "(?:;$fieldname=$fieldvalue)"; $ppath = "(?:$psegment(?:/$psegment)*)"; # # The various '(?:(?:[$unreserved_range ...]|$escape)*)' above need # some loop unrolling to speed up the match. # 1; __END__ =pod =head1 NAME Regexp::Common::URI::RFC1738 -- Definitions from RFC1738; =head1 SYNOPSIS use Regexp::Common::URI::RFC1738 qw /:ALL/; =head1 DESCRIPTION This package exports definitions from RFC1738. It's intended usage is for Regexp::Common::URI submodules only. Its interface might change without notice. =head1 REFERENCES =over 4 =item B<[RFC 1738]> Berners-Lee, Tim, Masinter, L., McCahill, M.: I. December 1994. =back =head1 AUTHOR Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/RFC1035.pm000064400000004005147635103360006473 0ustar00package Regexp::Common::URI::RFC1035; use Regexp::Common qw /pattern clean no_defaults/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; use vars qw /@EXPORT @EXPORT_OK %EXPORT_TAGS @ISA/; use Exporter (); @ISA = qw /Exporter/; my %vars; BEGIN { $vars {low} = [qw /$digit $letter $let_dig $let_dig_hyp $ldh_str/]; $vars {parts} = [qw /$label $subdomain/]; $vars {domain} = [qw /$domain/]; } use vars map {@$_} values %vars; @EXPORT = qw /$host/; @EXPORT_OK = map {@$_} values %vars; %EXPORT_TAGS = (%vars, ALL => [@EXPORT_OK]); # RFC 1035. $digit = "[0-9]"; $letter = "[A-Za-z]"; $let_dig = "[A-Za-z0-9]"; $let_dig_hyp = "[-A-Za-z0-9]"; $ldh_str = "(?:[-A-Za-z0-9]+)"; $label = "(?:$letter(?:(?:$ldh_str){0,61}$let_dig)?)"; $subdomain = "(?:$label(?:[.]$label)*)"; $domain = "(?: |(?:$subdomain))"; 1; __END__ =pod =head1 NAME Regexp::Common::URI::RFC1035 -- Definitions from RFC1035; =head1 SYNOPSIS use Regexp::Common::URI::RFC1035 qw /:ALL/; =head1 DESCRIPTION This package exports definitions from RFC1035. It's intended usage is for Regexp::Common::URI submodules only. Its interface might change without notice. =head1 REFERENCES =over 4 =item B<[RFC 1035]> Mockapetris, P.: I. November 1987. =back =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/RFC2384.pm000064400000004464147635103360006514 0ustar00package Regexp::Common::URI::RFC2384; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI::RFC1738 qw /$unreserved_range $escape $hostport/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; use vars qw /@EXPORT @EXPORT_OK %EXPORT_TAGS @ISA/; use Exporter (); @ISA = qw /Exporter/; my %vars; BEGIN { $vars {low} = [qw /$achar_range $achar $achars $achar_more/]; $vars {connect} = [qw /$enc_sasl $enc_user $enc_ext $enc_auth_type $auth $user_auth $server/]; $vars {parts} = [qw /$pop_url/]; } use vars map {@$_} values %vars; @EXPORT = qw /$host/; @EXPORT_OK = map {@$_} values %vars; %EXPORT_TAGS = (%vars, ALL => [@EXPORT_OK]); # RFC 2384, POP3. # Lowlevel definitions. $achar_range = "$unreserved_range&=~"; $achar = "(?:[$achar_range]|$escape)"; $achars = "(?:(?:[$achar_range]+|$escape)*)"; $achar_more = "(?:(?:[$achar_range]+|$escape)+)"; $enc_sasl = $achar_more; $enc_user = $achar_more; $enc_ext = "(?:[+](?:APOP|$achar_more))"; $enc_auth_type = "(?:$enc_sasl|$enc_ext)"; $auth = "(?:;AUTH=(?:[*]|$enc_auth_type))"; $user_auth = "(?:$enc_user$auth?)"; $server = "(?:(?:$user_auth\@)?$hostport)"; $pop_url = "(?:pop://$server)"; 1; __END__ =pod =head1 NAME Regexp::Common::URI::RFC2384 -- Definitions from RFC2384; =head1 SYNOPSIS use Regexp::Common::URI::RFC2384 qw /:ALL/; =head1 DESCRIPTION This package exports definitions from RFC2384. It's intended usage is for Regexp::Common::URI submodules only. Its interface might change without notice. =head1 REFERENCES =over 4 =item B<[RFC 2384]> Gellens, R.: I August 1998. =back =head1 AUTHOR Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/tv.pm000064400000003751147635103360006210 0ustar00# TV URLs. # Internet draft: draft-zigmond-tv-url-03.txt package Regexp::Common::URI::tv; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI qw /register_uri/; use Regexp::Common::URI::RFC2396 qw /$hostname/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $tv_scheme = 'tv'; my $tv_url = "(?k:(?k:$tv_scheme):(?k:$hostname)?)"; register_uri $tv_scheme => $tv_url; pattern name => [qw (URI tv)], create => $tv_url, ; 1; __END__ =pod =head1 NAME Regexp::Common::URI::tv -- Returns a pattern for tv URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{tv}/ and print "Contains a tv URI.\n"; } =head1 DESCRIPTION =head2 C<$RE{URI}{tv}> Returns a pattern that recognizes TV uris as per an Internet draft [DRAFT-URI-TV]. Under C<{-keep}>, the following are returned: =over 4 =item $1 The entire URI. =item $2 The scheme. =item $3 The host. =back =head1 REFERENCES =over 4 =item B<[DRAFT-URI-TV]> Zigmond, D. and Vickers, M: I. December 2000. =item B<[RFC 2396]> Berners-Lee, Tim, Fielding, R., and Masinter, L.: I. August 1998. =back =head1 SEE ALSO L for other supported URIs. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/pop.pm000064400000004054147635103360006352 0ustar00package Regexp::Common::URI::pop; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI qw /register_uri/; use Regexp::Common::URI::RFC1738 qw /$host $port/; use Regexp::Common::URI::RFC2384 qw /$enc_user $enc_auth_type/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $scheme = "pop"; my $uri = "(?k:(?k:$scheme)://(?:(?k:$enc_user)" . "(?:;AUTH=(?k:[*]|$enc_auth_type))?\@)?" . "(?k:$host)(?::(?k:$port))?)"; register_uri $scheme => $uri; pattern name => [qw (URI POP)], create => $uri, ; 1; __END__ =pod =head1 NAME Regexp::Common::URI::pop -- Returns a pattern for POP URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{POP}/ and print "Contains a POP URI.\n"; } =head1 DESCRIPTION =head2 $RE{URI}{POP} Returns a pattern that matches I URIs, as defined by RFC 2384. POP URIs have the form: "pop:" "//" [ user [ ";AUTH" ( "*" | auth_type ) ] "@" ] host [ ":" port ] Under C<{-keep}>, the following are returned: =over 4 =item $1 The complete URI. =item $2 The I. =item $3 The I, if given. =item $4 The I, if given (could be a I<*>). =item $5 The I. =item $6 The I, if given. =back =head1 REFERENCES =over 4 =item B<[RFC 2384]> Gellens, R.: I. August 1998. =back =head1 SEE ALSO L for other supported URIs. =head1 AUTHOR Abigail. (I). =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/telnet.pm000064400000004413147635103360007046 0ustar00package Regexp::Common::URI::telnet; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI qw /register_uri/; use Regexp::Common::URI::RFC1738 qw /$user $password $host $port/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $telnet_uri = "(?k:(?k:telnet)://(?:(?k:(?k:$user)(?::(?k:$password))?)\@)?" . "(?k:(?k:$host)(?::(?k:$port))?)(?k:/)?)"; register_uri telnet => $telnet_uri; pattern name => [qw (URI telnet)], create => $telnet_uri, ; 1; __END__ =pod =head1 NAME Regexp::Common::URI::telnet -- Returns a pattern for telnet URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{telnet}/ and print "Contains a telnet URI.\n"; } =head1 DESCRIPTION =head2 $RE{URI}{telnet} Returns a pattern that matches I URIs, as defined by RFC 1738. Telnet URIs have the form: "telnet:" "//" [ user [ ":" password ] "@" ] host [ ":" port ] [ "/" ] Under C<{-keep}>, the following are returned: =over 4 =item $1 The complete URI. =item $2 The scheme. =item $3 The username:password combo, or just the username if there is no password. =item $4 The username, if given. =item $5 The password, if given. =item $6 The host:port combo, or just the host if there's no port. =item $7 The host. =item $8 The port, if given. =item $9 The trailing slash, if any. =back =head1 REFERENCES =over 4 =item B<[RFC 1738]> Berners-Lee, Tim, Masinter, L., McCahill, M.: I. December 1994. =back =head1 SEE ALSO L for other supported URIs. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/RFC1808.pm000064400000010455147635103360006511 0ustar00package Regexp::Common::URI::RFC1808; BEGIN { # This makes sure 'use warnings' doesn't bomb out on 5.005_*; # warnings won't be enabled on those old versions though. if ($] < 5.006 && !exists $INC {"warnings.pm"}) { $INC {"warnings.pm"} = 1; no strict 'refs'; *{"warnings::unimport"} = sub {0}; } } use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; use vars qw /@EXPORT @EXPORT_OK %EXPORT_TAGS @ISA/; use Exporter (); @ISA = qw /Exporter/; my %vars; BEGIN { $vars {low} = [qw /$punctuation $reserved_range $reserved $national $extra $safe $digit $digits $hialpha $lowalpha $alpha $alphadigit $hex $escape $unreserved_range $unreserved $uchar $uchars $pchar_range $pchar $pchars/], $vars {parts} = [qw /$fragment $query $param $params $segment $fsegment $path $net_loc $scheme $rel_path $abs_path $net_path $relativeURL $generic_RL $absoluteURL $URL/], } use vars map {@$_} values %vars; @EXPORT = qw /$host/; @EXPORT_OK = map {@$_} values %vars; %EXPORT_TAGS = (%vars, ALL => [@EXPORT_OK]); # RFC 1808, base definitions. # Lowlevel definitions. $punctuation = '[<>#%"]'; $reserved_range = q [;/?:@&=]; $reserved = "[$reserved_range]"; $national = '[][{}|\\^~`]'; $extra = "[!*'(),]"; $safe = '[-$_.+]'; $digit = '[0-9]'; $digits = '[0-9]+'; $hialpha = '[A-Z]'; $lowalpha = '[a-z]'; $alpha = '[a-zA-Z]'; # lowalpha | hialpha $alphadigit = '[a-zA-Z0-9]'; # alpha | digit $hex = '[a-fA-F0-9]'; $escape = "(?:%$hex$hex)"; $unreserved_range = q [-a-zA-Z0-9$_.+!*'(),]; # alphadigit | safe | extra $unreserved = "[$unreserved_range]"; $uchar = "(?:$unreserved|$escape)"; $uchars = "(?:(?:$unreserved+|$escape)*)"; $pchar_range = qq [$unreserved_range:\@&=]; $pchar = "(?:[$pchar_range]|$escape)"; $pchars = "(?:(?:[$pchar_range]+|$escape)*)"; # Parts $fragment = "(?:(?:[$unreserved_range$reserved_range]+|$escape)*)"; $query = "(?:(?:[$unreserved_range$reserved_range]+|$escape)*)"; $param = "(?:(?:[$pchar_range/]+|$escape)*)"; $params = "(?:$param(?:;$param)*)"; $segment = "(?:(?:[$pchar_range]+|$escape)*)"; $fsegment = "(?:(?:[$pchar_range]+|$escape)+)"; $path = "(?:$fsegment(?:/$segment)*)"; $net_loc = "(?:(?:[$pchar_range;?]+|$escape)*)"; $scheme = "(?:(?:[-a-zA-Z0-9+.]+|$escape)+)"; $rel_path = "(?:$path?(?:;$params)?(?:?$query)?)"; $abs_path = "(?:/$rel_path)"; $net_path = "(?://$net_loc$abs_path?)"; $relativeURL = "(?:$net_path|$abs_path|$rel_path)"; $generic_RL = "(?:$scheme:$relativeURL)"; $absoluteURL = "(?:$generic_RL|" . "(?:$scheme:(?:[$unreserved_range$reserved_range]+|$escape)*))"; $URL = "(?:(?:$absoluteURL|$relativeURL)(?:#$fragment)?)"; 1; __END__ =pod =head1 NAME Regexp::Common::URI::RFC1808 -- Definitions from RFC1808; =head1 SYNOPSIS use Regexp::Common::URI::RFC1808 qw /:ALL/; =head1 DESCRIPTION This package exports definitions from RFC1808. It's intended usage is for Regexp::Common::URI submodules only. Its interface might change without notice. =head1 REFERENCES =over 4 =item B<[RFC 1808]> Fielding, R.: I. June 1995. =back =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/news.pm000064400000005412147635103360006527 0ustar00package Regexp::Common::URI::news; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI qw /register_uri/; use Regexp::Common::URI::RFC1738 qw /$grouppart $group $article $host $port $digits/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $news_scheme = 'news'; my $news_uri = "(?k:(?k:$news_scheme):(?k:$grouppart))"; my $nntp_scheme = 'nntp'; my $nntp_uri = "(?k:(?k:$nntp_scheme)://(?k:(?k:(?k:$host)(?::(?k:$port))?)" . "/(?k:$group)(?:/(?k:$digits))?))"; register_uri $news_scheme => $news_uri; register_uri $nntp_scheme => $nntp_uri; pattern name => [qw (URI news)], create => $news_uri, ; pattern name => [qw (URI NNTP)], create => $nntp_uri, ; 1; __END__ =pod =head1 NAME Regexp::Common::URI::news -- Returns a pattern for file URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{news}/ and print "Contains a news URI.\n"; } =head1 DESCRIPTION =head2 $RE{URI}{news} Returns a pattern that matches I URIs, as defined by RFC 1738. News URIs have the form: "news:" ( "*" | group | article "@" host ) Under C<{-keep}>, the following are returned: =over 4 =item $1 The complete URI. =item $2 The scheme. =item $3 The part of the URI following "news://". =back =head2 $RE{URI}{NNTP} Returns a pattern that matches I URIs, as defined by RFC 1738. NNTP URIs have the form: "nntp://" host [ ":" port ] "/" group [ "/" digits ] Under C<{-keep}>, the following are returned: =over 4 =item $1 The complete URI. =item $2 The scheme. =item $3 The part of the URI following "nntp://". =item $4 The host and port, separated by a colon. If no port was given, just the host. =item $5 The host. =item $6 The port, if given. =item $7 The group. =item $8 The digits, if given. =back =head1 REFERENCES =over 4 =item B<[RFC 1738]> Berners-Lee, Tim, Masinter, L., McCahill, M.: I. December 1994. =back =head1 SEE ALSO L for other supported URIs. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/wais.pm000064400000004642147635103360006522 0ustar00package Regexp::Common::URI::wais; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI qw /register_uri/; use Regexp::Common::URI::RFC1738 qw /$host $port $search $database $wtype $wpath/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $scheme = 'wais'; my $uri = "(?k:(?k:$scheme)://(?k:$host)(?::(?k:$port))?/(?k:(?k:$database)" . "(?k:[?](?k:$search)|/(?k:$wtype)/(?k:$wpath))?))"; register_uri $scheme => $uri; pattern name => [qw (URI WAIS)], create => $uri, ; 1; __END__ =pod =head1 NAME Regexp::Common::URI::wais -- Returns a pattern for WAIS URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{WAIS}/ and print "Contains a WAIS URI.\n"; } =head1 DESCRIPTION =head2 $RE{URI}{WAIS} Returns a pattern that matches I URIs, as defined by RFC 1738. WAIS URIs have the form: "wais:" "//" host [ ":" port ] "/" database [ ( "?" search ) | ( "/" wtype "/" wpath ) ] Under C<{-keep}>, the following are returned: =over 4 =item $1 The complete URI. =item $2 The I. =item $3 The I. =item $4 The I, if given. =item $5 The I, followed by I or I, if given. =item $6 The I. =item $7 The part following the I if given, including the question mark or slash. =item $8 The I part, if given. =item $9 The I, if given. =item $10 The I, if given. =back =head1 REFERENCES =over 4 =item B<[RFC 1738]> Berners-Lee, Tim, Masinter, L., McCahill, M.: I. December 1994. =back =head1 SEE ALSO L for other supported URIs. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/gopher.pm000064400000010701147635103360007034 0ustar00package Regexp::Common::URI::gopher; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI qw /register_uri/; use Regexp::Common::URI::RFC1738 qw /$host $port $uchars/; use Regexp::Common::URI::RFC1808 qw /$pchars $pchar_range/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $pchars_notab = "(?:(?:[$pchar_range]+|" . "%(?:[1-9a-fA-F][0-9a-fA-F]|0[0-8a-fA-F]))*)"; my $gopherplus_string = $pchars; my $search = $pchars; my $search_notab = $pchars_notab; my $selector = $pchars; my $selector_notab = $pchars_notab; my $gopher_type = "(?:[0-9+IgT])"; my $scheme = "gopher"; my $uri = "(?k:(?k:$scheme)://(?k:$host)(?::(?k:$port))?" . "/(?k:(?k:$gopher_type)(?k:$selector)))"; my $uri_notab = "(?k:(?k:$scheme)://(?k:$host)(?::(?k:$port))?" . "/(?k:(?k:$gopher_type)(?k:$selector_notab)" . "(?:%09(?k:$search_notab)(?:%09(?k:$gopherplus_string))?)?))"; register_uri $scheme => $uri; pattern name => [qw (URI gopher -notab=)], create => sub { exists $_ [1] {-notab} && !defined $_ [1] {-notab} ? $uri_notab : $uri}, ; 1; __END__ =pod =head1 NAME Regexp::Common::URI::gopher -- Returns a pattern for gopher URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{gopher}/ and print "Contains a gopher URI.\n"; } =head1 DESCRIPTION =head2 $RE{URI}{gopher}{-notab} Gopher URIs are poorly defined. Originally, RFC 1738 defined gopher URIs, but they were later redefined in an internet draft. One that was expired in June 1997. The internet draft for gopher URIs defines them as follows: "gopher:" "//" host [ ":" port ] "/" gopher-type selector [ "%09" search [ "%09" gopherplus_string ]] Unfortunally, a I is defined in such a way that characters may be escaped using the URI escape mechanism. This includes tabs, which escaped are C<%09>. Hence, the syntax cannot distinguish between a URI that has both a I and a I part, and an URI where the I includes an escaped tab. (The text of the draft forbids tabs to be present in the I though). C<$RE{URI}{gopher}> follows the defined syntax. To disallow escaped tabs in the I and I parts, use C<$RE{URI}{gopher}{-notab}>. There are other differences between the text and the given syntax. According to the text, selector strings cannot have tabs, linefeeds or carriage returns in them. The text also allows the entire I, (the part after the slash following the hostport) to be empty; if this is empty the slash may be omitted as well. However, this isn't reflected in the syntax. Under C<{-keep}>, the following are returned: =over 4 =item $1 The entire URI. =item $2 The scheme. =item $3 The host (name or address). =item $4 The port (if any). =item $5 The "gopher-path", the part after the / following the host and port. =item $6 The gopher-type. =item $7 The selector. (When no C<{-notab}> is used, this includes the search and gopherplus_string, including the separating escaped tabs). =item $8 The search, if given. (Only when C<{-notab}> is given). =item $9 The gopherplus_string, if given. (Only when C<{-notab}> is given). =back head1 REFERENCES =over 4 =item B<[RFC 1738]> Berners-Lee, Tim, Masinter, L., McCahill, M.: I. December 1994. =item B<[RFC 1808]> Fielding, R.: I. June 1995. =item B<[GOPHER URL]> Krishnan, Murali R., Casey, James: "A Gopher URL Format". Expired Internet draft I. December 1996. =back =head1 SEE ALSO L for other supported URIs. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut URI/ftp.pm000064400000012604147635103360006345 0ustar00package Regexp::Common::URI::ftp; use Regexp::Common qw /pattern clean no_defaults/; use Regexp::Common::URI qw /register_uri/; use Regexp::Common::URI::RFC2396 qw /$host $port $ftp_segments $userinfo $userinfo_no_colon/; use strict; use warnings; use vars qw /$VERSION/; $VERSION = '2010010201'; my $ftp_uri = "(?k:(?k:ftp)://(?:(?k:$userinfo)(?k:)\@)?(?k:$host)" . "(?::(?k:$port))?(?k:/(?k:(?k:$ftp_segments)" . "(?:;type=(?k:[AIai]))?))?)"; my $ftp_uri_password = "(?k:(?k:ftp)://(?:(?k:$userinfo_no_colon)" . "(?::(?k:$userinfo_no_colon))?\@)?(?k:$host)" . "(?::(?k:$port))?(?k:/(?k:(?k:$ftp_segments)" . "(?:;type=(?k:[AIai]))?))?)"; register_uri FTP => $ftp_uri; pattern name => [qw (URI FTP), "-type=[AIai]", "-password="], create => sub { my $uri = exists $_ [1] -> {-password} && !defined $_ [1] -> {-password} ? $ftp_uri_password : $ftp_uri; my $type = $_ [1] -> {-type}; $uri =~ s/\[AIai\]/$type/; $uri; } ; 1; __END__ =pod =head1 NAME Regexp::Common::URI::ftp -- Returns a pattern for FTP URIs. =head1 SYNOPSIS use Regexp::Common qw /URI/; while (<>) { /$RE{URI}{FTP}/ and print "Contains an FTP URI.\n"; } =head1 DESCRIPTION =head2 $RE{URI}{FTP}{-type}{-password}; Returns a regex for FTP URIs. Note: FTP URIs are not formally defined. RFC 1738 defines FTP URLs, but parts of that RFC have been obsoleted by RFC 2396. However, the differences between RFC 1738 and RFC 2396 are such that they aren't applicable straightforwardly to FTP URIs. There are two main problems: =over 4 =item Passwords. RFC 1738 allowed an optional username and an optional password (separated by a colon) in the FTP URL. Hence, colons were not allowed in either the username or the password. RFC 2396 strongly recommends passwords should not be used in URIs. It does allow for I instead. This userinfo part may contain colons, and hence contain more than one colon. The regexp returned follows the RFC 2396 specification, unless the I<{-password}> option is given; then the regex allows for an optional username and password, separated by a colon. =item The ;type specifier. RFC 1738 does not allow semi-colons in FTP path names, because a semi-colon is a reserved character for FTP URIs. The semi-colon is used to separate the path from the option I specifier. However, in RFC 2396, paths consist of slash separated segments, and each segment is a semi-colon separated group of parameters. Straigthforward application of RFC 2396 would mean that a trailing I specifier couldn't be distinguished from the last segment of the path having a two parameters, the last one starting with I. Therefore we have opted to disallow a semi-colon in the path part of an FTP URI. Furthermore, RFC 1738 allows three values for the type specifier, I, I and I (either upper case or lower case). However, the internet draft about FTP URIs B<[DRAFT-FTP-URL]> (which expired in May 1997) notes the lack of consistent implementation of the I parameter and drops I from the set of possible values. We follow this practise; however, RFC 1738 behaviour can be archieved by using the I<-type => "[ADIadi]"> parameter. =back FTP URIs have the following syntax: "ftp:" "//" [ userinfo "@" ] host [ ":" port ] [ "/" path [ ";type=" value ]] When using I<{-password}>, we have the syntax: "ftp:" "//" [ user [ ":" password ] "@" ] host [ ":" port ] [ "/" path [ ";type=" value ]] Under C<{-keep}>, the following are returned: =over 4 =item $1 The complete URI. =item $2 The scheme. =item $3 The userinfo, or if I<{-password}> is used, the username. =item $4 If I<{-password}> is used, the password, else C. =item $5 The hostname or IP address. =item $6 The port number. =item $7 The full path and type specification, including the leading slash. =item $8 The full path and type specification, without the leading slash. =item $9 The full path, without the type specification nor the leading slash. =item $10 The value of the type specification. =back =head1 REFERENCES =over 4 =item B<[DRAFT-URL-FTP]> Casey, James: I. November 1996. =item B<[RFC 1738]> Berners-Lee, Tim, Masinter, L., McCahill, M.: I. December 1994. =item B<[RFC 2396]> Berners-Lee, Tim, Fielding, R., and Masinter, L.: I. August 1998. =back =head1 SEE ALSO L for other supported URIs. =head1 AUTHOR Damian Conway (damian@conway.org) =head1 MAINTAINANCE This package is maintained by Abigail S<(I)>. =head1 BUGS AND IRRITATIONS Bound to be plenty. =head1 LICENSE and COPYRIGHT This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. This module is free software, and maybe used under any of the following licenses: 1) The Perl Artistic License. See the file COPYRIGHT.AL. 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. 3) The BSD Licence. See the file COPYRIGHT.BSD. 4) The MIT Licence. See the file COPYRIGHT.MIT. =cut